1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker * Copyright © 2010 Intel Corporation
3*61046927SAndroid Build Coastguard Worker *
4*61046927SAndroid Build Coastguard Worker * Permission is hereby granted, free of charge, to any person obtaining a
5*61046927SAndroid Build Coastguard Worker * copy of this software and associated documentation files (the "Software"),
6*61046927SAndroid Build Coastguard Worker * to deal in the Software without restriction, including without limitation
7*61046927SAndroid Build Coastguard Worker * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8*61046927SAndroid Build Coastguard Worker * and/or sell copies of the Software, and to permit persons to whom the
9*61046927SAndroid Build Coastguard Worker * Software is furnished to do so, subject to the following conditions:
10*61046927SAndroid Build Coastguard Worker *
11*61046927SAndroid Build Coastguard Worker * The above copyright notice and this permission notice (including the next
12*61046927SAndroid Build Coastguard Worker * paragraph) shall be included in all copies or substantial portions of the
13*61046927SAndroid Build Coastguard Worker * Software.
14*61046927SAndroid Build Coastguard Worker *
15*61046927SAndroid Build Coastguard Worker * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16*61046927SAndroid Build Coastguard Worker * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17*61046927SAndroid Build Coastguard Worker * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18*61046927SAndroid Build Coastguard Worker * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19*61046927SAndroid Build Coastguard Worker * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20*61046927SAndroid Build Coastguard Worker * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21*61046927SAndroid Build Coastguard Worker * IN THE SOFTWARE.
22*61046927SAndroid Build Coastguard Worker */
23*61046927SAndroid Build Coastguard Worker
24*61046927SAndroid Build Coastguard Worker /** @file
25*61046927SAndroid Build Coastguard Worker *
26*61046927SAndroid Build Coastguard Worker * This file drives the GLSL IR -> LIR translation, contains the
27*61046927SAndroid Build Coastguard Worker * optimizations on the LIR, and drives the generation of native code
28*61046927SAndroid Build Coastguard Worker * from the LIR.
29*61046927SAndroid Build Coastguard Worker */
30*61046927SAndroid Build Coastguard Worker
31*61046927SAndroid Build Coastguard Worker #include "brw_eu.h"
32*61046927SAndroid Build Coastguard Worker #include "brw_fs.h"
33*61046927SAndroid Build Coastguard Worker #include "brw_fs_builder.h"
34*61046927SAndroid Build Coastguard Worker #include "brw_fs_live_variables.h"
35*61046927SAndroid Build Coastguard Worker #include "brw_nir.h"
36*61046927SAndroid Build Coastguard Worker #include "brw_cfg.h"
37*61046927SAndroid Build Coastguard Worker #include "brw_private.h"
38*61046927SAndroid Build Coastguard Worker #include "intel_nir.h"
39*61046927SAndroid Build Coastguard Worker #include "shader_enums.h"
40*61046927SAndroid Build Coastguard Worker #include "dev/intel_debug.h"
41*61046927SAndroid Build Coastguard Worker #include "dev/intel_wa.h"
42*61046927SAndroid Build Coastguard Worker #include "compiler/glsl_types.h"
43*61046927SAndroid Build Coastguard Worker #include "compiler/nir/nir_builder.h"
44*61046927SAndroid Build Coastguard Worker #include "util/u_math.h"
45*61046927SAndroid Build Coastguard Worker
46*61046927SAndroid Build Coastguard Worker using namespace brw;
47*61046927SAndroid Build Coastguard Worker
48*61046927SAndroid Build Coastguard Worker static void
49*61046927SAndroid Build Coastguard Worker initialize_sources(fs_inst *inst, const brw_reg src[], uint8_t num_sources);
50*61046927SAndroid Build Coastguard Worker
51*61046927SAndroid Build Coastguard Worker void
init(enum opcode opcode,uint8_t exec_size,const brw_reg & dst,const brw_reg * src,unsigned sources)52*61046927SAndroid Build Coastguard Worker fs_inst::init(enum opcode opcode, uint8_t exec_size, const brw_reg &dst,
53*61046927SAndroid Build Coastguard Worker const brw_reg *src, unsigned sources)
54*61046927SAndroid Build Coastguard Worker {
55*61046927SAndroid Build Coastguard Worker memset((void*)this, 0, sizeof(*this));
56*61046927SAndroid Build Coastguard Worker
57*61046927SAndroid Build Coastguard Worker initialize_sources(this, src, sources);
58*61046927SAndroid Build Coastguard Worker
59*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < sources; i++)
60*61046927SAndroid Build Coastguard Worker this->src[i] = src[i];
61*61046927SAndroid Build Coastguard Worker
62*61046927SAndroid Build Coastguard Worker this->opcode = opcode;
63*61046927SAndroid Build Coastguard Worker this->dst = dst;
64*61046927SAndroid Build Coastguard Worker this->exec_size = exec_size;
65*61046927SAndroid Build Coastguard Worker
66*61046927SAndroid Build Coastguard Worker assert(dst.file != IMM && dst.file != UNIFORM);
67*61046927SAndroid Build Coastguard Worker
68*61046927SAndroid Build Coastguard Worker assert(this->exec_size != 0);
69*61046927SAndroid Build Coastguard Worker
70*61046927SAndroid Build Coastguard Worker this->conditional_mod = BRW_CONDITIONAL_NONE;
71*61046927SAndroid Build Coastguard Worker
72*61046927SAndroid Build Coastguard Worker /* This will be the case for almost all instructions. */
73*61046927SAndroid Build Coastguard Worker switch (dst.file) {
74*61046927SAndroid Build Coastguard Worker case VGRF:
75*61046927SAndroid Build Coastguard Worker case ARF:
76*61046927SAndroid Build Coastguard Worker case FIXED_GRF:
77*61046927SAndroid Build Coastguard Worker case ATTR:
78*61046927SAndroid Build Coastguard Worker this->size_written = dst.component_size(exec_size);
79*61046927SAndroid Build Coastguard Worker break;
80*61046927SAndroid Build Coastguard Worker case BAD_FILE:
81*61046927SAndroid Build Coastguard Worker this->size_written = 0;
82*61046927SAndroid Build Coastguard Worker break;
83*61046927SAndroid Build Coastguard Worker case IMM:
84*61046927SAndroid Build Coastguard Worker case UNIFORM:
85*61046927SAndroid Build Coastguard Worker unreachable("Invalid destination register file");
86*61046927SAndroid Build Coastguard Worker }
87*61046927SAndroid Build Coastguard Worker
88*61046927SAndroid Build Coastguard Worker this->writes_accumulator = false;
89*61046927SAndroid Build Coastguard Worker }
90*61046927SAndroid Build Coastguard Worker
fs_inst()91*61046927SAndroid Build Coastguard Worker fs_inst::fs_inst()
92*61046927SAndroid Build Coastguard Worker {
93*61046927SAndroid Build Coastguard Worker init(BRW_OPCODE_NOP, 8, dst, NULL, 0);
94*61046927SAndroid Build Coastguard Worker }
95*61046927SAndroid Build Coastguard Worker
fs_inst(enum opcode opcode,uint8_t exec_size)96*61046927SAndroid Build Coastguard Worker fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size)
97*61046927SAndroid Build Coastguard Worker {
98*61046927SAndroid Build Coastguard Worker init(opcode, exec_size, reg_undef, NULL, 0);
99*61046927SAndroid Build Coastguard Worker }
100*61046927SAndroid Build Coastguard Worker
fs_inst(enum opcode opcode,uint8_t exec_size,const brw_reg & dst)101*61046927SAndroid Build Coastguard Worker fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const brw_reg &dst)
102*61046927SAndroid Build Coastguard Worker {
103*61046927SAndroid Build Coastguard Worker init(opcode, exec_size, dst, NULL, 0);
104*61046927SAndroid Build Coastguard Worker }
105*61046927SAndroid Build Coastguard Worker
fs_inst(enum opcode opcode,uint8_t exec_size,const brw_reg & dst,const brw_reg & src0)106*61046927SAndroid Build Coastguard Worker fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const brw_reg &dst,
107*61046927SAndroid Build Coastguard Worker const brw_reg &src0)
108*61046927SAndroid Build Coastguard Worker {
109*61046927SAndroid Build Coastguard Worker const brw_reg src[1] = { src0 };
110*61046927SAndroid Build Coastguard Worker init(opcode, exec_size, dst, src, 1);
111*61046927SAndroid Build Coastguard Worker }
112*61046927SAndroid Build Coastguard Worker
fs_inst(enum opcode opcode,uint8_t exec_size,const brw_reg & dst,const brw_reg & src0,const brw_reg & src1)113*61046927SAndroid Build Coastguard Worker fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const brw_reg &dst,
114*61046927SAndroid Build Coastguard Worker const brw_reg &src0, const brw_reg &src1)
115*61046927SAndroid Build Coastguard Worker {
116*61046927SAndroid Build Coastguard Worker const brw_reg src[2] = { src0, src1 };
117*61046927SAndroid Build Coastguard Worker init(opcode, exec_size, dst, src, 2);
118*61046927SAndroid Build Coastguard Worker }
119*61046927SAndroid Build Coastguard Worker
fs_inst(enum opcode opcode,uint8_t exec_size,const brw_reg & dst,const brw_reg & src0,const brw_reg & src1,const brw_reg & src2)120*61046927SAndroid Build Coastguard Worker fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const brw_reg &dst,
121*61046927SAndroid Build Coastguard Worker const brw_reg &src0, const brw_reg &src1, const brw_reg &src2)
122*61046927SAndroid Build Coastguard Worker {
123*61046927SAndroid Build Coastguard Worker const brw_reg src[3] = { src0, src1, src2 };
124*61046927SAndroid Build Coastguard Worker init(opcode, exec_size, dst, src, 3);
125*61046927SAndroid Build Coastguard Worker }
126*61046927SAndroid Build Coastguard Worker
fs_inst(enum opcode opcode,uint8_t exec_width,const brw_reg & dst,const brw_reg src[],unsigned sources)127*61046927SAndroid Build Coastguard Worker fs_inst::fs_inst(enum opcode opcode, uint8_t exec_width, const brw_reg &dst,
128*61046927SAndroid Build Coastguard Worker const brw_reg src[], unsigned sources)
129*61046927SAndroid Build Coastguard Worker {
130*61046927SAndroid Build Coastguard Worker init(opcode, exec_width, dst, src, sources);
131*61046927SAndroid Build Coastguard Worker }
132*61046927SAndroid Build Coastguard Worker
fs_inst(const fs_inst & that)133*61046927SAndroid Build Coastguard Worker fs_inst::fs_inst(const fs_inst &that)
134*61046927SAndroid Build Coastguard Worker {
135*61046927SAndroid Build Coastguard Worker memcpy((void*)this, &that, sizeof(that));
136*61046927SAndroid Build Coastguard Worker initialize_sources(this, that.src, that.sources);
137*61046927SAndroid Build Coastguard Worker }
138*61046927SAndroid Build Coastguard Worker
~fs_inst()139*61046927SAndroid Build Coastguard Worker fs_inst::~fs_inst()
140*61046927SAndroid Build Coastguard Worker {
141*61046927SAndroid Build Coastguard Worker if (this->src != this->builtin_src)
142*61046927SAndroid Build Coastguard Worker delete[] this->src;
143*61046927SAndroid Build Coastguard Worker }
144*61046927SAndroid Build Coastguard Worker
145*61046927SAndroid Build Coastguard Worker static void
initialize_sources(fs_inst * inst,const brw_reg src[],uint8_t num_sources)146*61046927SAndroid Build Coastguard Worker initialize_sources(fs_inst *inst, const brw_reg src[], uint8_t num_sources)
147*61046927SAndroid Build Coastguard Worker {
148*61046927SAndroid Build Coastguard Worker if (num_sources > ARRAY_SIZE(inst->builtin_src))
149*61046927SAndroid Build Coastguard Worker inst->src = new brw_reg[num_sources];
150*61046927SAndroid Build Coastguard Worker else
151*61046927SAndroid Build Coastguard Worker inst->src = inst->builtin_src;
152*61046927SAndroid Build Coastguard Worker
153*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < num_sources; i++)
154*61046927SAndroid Build Coastguard Worker inst->src[i] = src[i];
155*61046927SAndroid Build Coastguard Worker
156*61046927SAndroid Build Coastguard Worker inst->sources = num_sources;
157*61046927SAndroid Build Coastguard Worker }
158*61046927SAndroid Build Coastguard Worker
159*61046927SAndroid Build Coastguard Worker void
resize_sources(uint8_t num_sources)160*61046927SAndroid Build Coastguard Worker fs_inst::resize_sources(uint8_t num_sources)
161*61046927SAndroid Build Coastguard Worker {
162*61046927SAndroid Build Coastguard Worker if (this->sources == num_sources)
163*61046927SAndroid Build Coastguard Worker return;
164*61046927SAndroid Build Coastguard Worker
165*61046927SAndroid Build Coastguard Worker brw_reg *old_src = this->src;
166*61046927SAndroid Build Coastguard Worker brw_reg *new_src;
167*61046927SAndroid Build Coastguard Worker
168*61046927SAndroid Build Coastguard Worker const unsigned builtin_size = ARRAY_SIZE(this->builtin_src);
169*61046927SAndroid Build Coastguard Worker
170*61046927SAndroid Build Coastguard Worker if (old_src == this->builtin_src) {
171*61046927SAndroid Build Coastguard Worker if (num_sources > builtin_size) {
172*61046927SAndroid Build Coastguard Worker new_src = new brw_reg[num_sources];
173*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < this->sources; i++)
174*61046927SAndroid Build Coastguard Worker new_src[i] = old_src[i];
175*61046927SAndroid Build Coastguard Worker
176*61046927SAndroid Build Coastguard Worker } else {
177*61046927SAndroid Build Coastguard Worker new_src = old_src;
178*61046927SAndroid Build Coastguard Worker }
179*61046927SAndroid Build Coastguard Worker } else {
180*61046927SAndroid Build Coastguard Worker if (num_sources <= builtin_size) {
181*61046927SAndroid Build Coastguard Worker new_src = this->builtin_src;
182*61046927SAndroid Build Coastguard Worker assert(this->sources > num_sources);
183*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < num_sources; i++)
184*61046927SAndroid Build Coastguard Worker new_src[i] = old_src[i];
185*61046927SAndroid Build Coastguard Worker
186*61046927SAndroid Build Coastguard Worker } else if (num_sources < this->sources) {
187*61046927SAndroid Build Coastguard Worker new_src = old_src;
188*61046927SAndroid Build Coastguard Worker
189*61046927SAndroid Build Coastguard Worker } else {
190*61046927SAndroid Build Coastguard Worker new_src = new brw_reg[num_sources];
191*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < num_sources; i++)
192*61046927SAndroid Build Coastguard Worker new_src[i] = old_src[i];
193*61046927SAndroid Build Coastguard Worker }
194*61046927SAndroid Build Coastguard Worker
195*61046927SAndroid Build Coastguard Worker if (old_src != new_src)
196*61046927SAndroid Build Coastguard Worker delete[] old_src;
197*61046927SAndroid Build Coastguard Worker }
198*61046927SAndroid Build Coastguard Worker
199*61046927SAndroid Build Coastguard Worker this->sources = num_sources;
200*61046927SAndroid Build Coastguard Worker this->src = new_src;
201*61046927SAndroid Build Coastguard Worker }
202*61046927SAndroid Build Coastguard Worker
203*61046927SAndroid Build Coastguard Worker bool
is_send_from_grf() const204*61046927SAndroid Build Coastguard Worker fs_inst::is_send_from_grf() const
205*61046927SAndroid Build Coastguard Worker {
206*61046927SAndroid Build Coastguard Worker switch (opcode) {
207*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_SEND:
208*61046927SAndroid Build Coastguard Worker case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
209*61046927SAndroid Build Coastguard Worker case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
210*61046927SAndroid Build Coastguard Worker case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
211*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_INTERLOCK:
212*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_MEMORY_FENCE:
213*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_BARRIER:
214*61046927SAndroid Build Coastguard Worker return true;
215*61046927SAndroid Build Coastguard Worker case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
216*61046927SAndroid Build Coastguard Worker return src[1].file == VGRF;
217*61046927SAndroid Build Coastguard Worker default:
218*61046927SAndroid Build Coastguard Worker return false;
219*61046927SAndroid Build Coastguard Worker }
220*61046927SAndroid Build Coastguard Worker }
221*61046927SAndroid Build Coastguard Worker
222*61046927SAndroid Build Coastguard Worker bool
is_control_source(unsigned arg) const223*61046927SAndroid Build Coastguard Worker fs_inst::is_control_source(unsigned arg) const
224*61046927SAndroid Build Coastguard Worker {
225*61046927SAndroid Build Coastguard Worker switch (opcode) {
226*61046927SAndroid Build Coastguard Worker case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
227*61046927SAndroid Build Coastguard Worker return arg == 0;
228*61046927SAndroid Build Coastguard Worker
229*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_BROADCAST:
230*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_SHUFFLE:
231*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_QUAD_SWIZZLE:
232*61046927SAndroid Build Coastguard Worker case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
233*61046927SAndroid Build Coastguard Worker case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
234*61046927SAndroid Build Coastguard Worker case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
235*61046927SAndroid Build Coastguard Worker return arg == 1;
236*61046927SAndroid Build Coastguard Worker
237*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_MOV_INDIRECT:
238*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_CLUSTER_BROADCAST:
239*61046927SAndroid Build Coastguard Worker return arg == 1 || arg == 2;
240*61046927SAndroid Build Coastguard Worker
241*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_SEND:
242*61046927SAndroid Build Coastguard Worker return arg == 0 || arg == 1;
243*61046927SAndroid Build Coastguard Worker
244*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
245*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
246*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
247*61046927SAndroid Build Coastguard Worker return arg != MEMORY_LOGICAL_BINDING &&
248*61046927SAndroid Build Coastguard Worker arg != MEMORY_LOGICAL_ADDRESS &&
249*61046927SAndroid Build Coastguard Worker arg != MEMORY_LOGICAL_DATA0 &&
250*61046927SAndroid Build Coastguard Worker arg != MEMORY_LOGICAL_DATA1;
251*61046927SAndroid Build Coastguard Worker
252*61046927SAndroid Build Coastguard Worker default:
253*61046927SAndroid Build Coastguard Worker return false;
254*61046927SAndroid Build Coastguard Worker }
255*61046927SAndroid Build Coastguard Worker }
256*61046927SAndroid Build Coastguard Worker
257*61046927SAndroid Build Coastguard Worker bool
is_payload(unsigned arg) const258*61046927SAndroid Build Coastguard Worker fs_inst::is_payload(unsigned arg) const
259*61046927SAndroid Build Coastguard Worker {
260*61046927SAndroid Build Coastguard Worker switch (opcode) {
261*61046927SAndroid Build Coastguard Worker case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
262*61046927SAndroid Build Coastguard Worker case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
263*61046927SAndroid Build Coastguard Worker case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
264*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_INTERLOCK:
265*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_MEMORY_FENCE:
266*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_BARRIER:
267*61046927SAndroid Build Coastguard Worker return arg == 0;
268*61046927SAndroid Build Coastguard Worker
269*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_SEND:
270*61046927SAndroid Build Coastguard Worker return arg == 2 || arg == 3;
271*61046927SAndroid Build Coastguard Worker
272*61046927SAndroid Build Coastguard Worker default:
273*61046927SAndroid Build Coastguard Worker return false;
274*61046927SAndroid Build Coastguard Worker }
275*61046927SAndroid Build Coastguard Worker }
276*61046927SAndroid Build Coastguard Worker
277*61046927SAndroid Build Coastguard Worker /**
278*61046927SAndroid Build Coastguard Worker * Returns true if this instruction's sources and destinations cannot
279*61046927SAndroid Build Coastguard Worker * safely be the same register.
280*61046927SAndroid Build Coastguard Worker *
281*61046927SAndroid Build Coastguard Worker * In most cases, a register can be written over safely by the same
282*61046927SAndroid Build Coastguard Worker * instruction that is its last use. For a single instruction, the
283*61046927SAndroid Build Coastguard Worker * sources are dereferenced before writing of the destination starts
284*61046927SAndroid Build Coastguard Worker * (naturally).
285*61046927SAndroid Build Coastguard Worker *
286*61046927SAndroid Build Coastguard Worker * However, there are a few cases where this can be problematic:
287*61046927SAndroid Build Coastguard Worker *
288*61046927SAndroid Build Coastguard Worker * - Virtual opcodes that translate to multiple instructions in the
289*61046927SAndroid Build Coastguard Worker * code generator: if src == dst and one instruction writes the
290*61046927SAndroid Build Coastguard Worker * destination before a later instruction reads the source, then
291*61046927SAndroid Build Coastguard Worker * src will have been clobbered.
292*61046927SAndroid Build Coastguard Worker *
293*61046927SAndroid Build Coastguard Worker * - SIMD16 compressed instructions with certain regioning (see below).
294*61046927SAndroid Build Coastguard Worker *
295*61046927SAndroid Build Coastguard Worker * The register allocator uses this information to set up conflicts between
296*61046927SAndroid Build Coastguard Worker * GRF sources and the destination.
297*61046927SAndroid Build Coastguard Worker */
298*61046927SAndroid Build Coastguard Worker bool
has_source_and_destination_hazard() const299*61046927SAndroid Build Coastguard Worker fs_inst::has_source_and_destination_hazard() const
300*61046927SAndroid Build Coastguard Worker {
301*61046927SAndroid Build Coastguard Worker switch (opcode) {
302*61046927SAndroid Build Coastguard Worker case FS_OPCODE_PACK_HALF_2x16_SPLIT:
303*61046927SAndroid Build Coastguard Worker /* Multiple partial writes to the destination */
304*61046927SAndroid Build Coastguard Worker return true;
305*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_SHUFFLE:
306*61046927SAndroid Build Coastguard Worker /* This instruction returns an arbitrary channel from the source and
307*61046927SAndroid Build Coastguard Worker * gets split into smaller instructions in the generator. It's possible
308*61046927SAndroid Build Coastguard Worker * that one of the instructions will read from a channel corresponding
309*61046927SAndroid Build Coastguard Worker * to an earlier instruction.
310*61046927SAndroid Build Coastguard Worker */
311*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_SEL_EXEC:
312*61046927SAndroid Build Coastguard Worker /* This is implemented as
313*61046927SAndroid Build Coastguard Worker *
314*61046927SAndroid Build Coastguard Worker * mov(16) g4<1>D 0D { align1 WE_all 1H };
315*61046927SAndroid Build Coastguard Worker * mov(16) g4<1>D g5<8,8,1>D { align1 1H }
316*61046927SAndroid Build Coastguard Worker *
317*61046927SAndroid Build Coastguard Worker * Because the source is only read in the second instruction, the first
318*61046927SAndroid Build Coastguard Worker * may stomp all over it.
319*61046927SAndroid Build Coastguard Worker */
320*61046927SAndroid Build Coastguard Worker return true;
321*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_QUAD_SWIZZLE:
322*61046927SAndroid Build Coastguard Worker switch (src[1].ud) {
323*61046927SAndroid Build Coastguard Worker case BRW_SWIZZLE_XXXX:
324*61046927SAndroid Build Coastguard Worker case BRW_SWIZZLE_YYYY:
325*61046927SAndroid Build Coastguard Worker case BRW_SWIZZLE_ZZZZ:
326*61046927SAndroid Build Coastguard Worker case BRW_SWIZZLE_WWWW:
327*61046927SAndroid Build Coastguard Worker case BRW_SWIZZLE_XXZZ:
328*61046927SAndroid Build Coastguard Worker case BRW_SWIZZLE_YYWW:
329*61046927SAndroid Build Coastguard Worker case BRW_SWIZZLE_XYXY:
330*61046927SAndroid Build Coastguard Worker case BRW_SWIZZLE_ZWZW:
331*61046927SAndroid Build Coastguard Worker /* These can be implemented as a single Align1 region on all
332*61046927SAndroid Build Coastguard Worker * platforms, so there's never a hazard between source and
333*61046927SAndroid Build Coastguard Worker * destination. C.f. fs_generator::generate_quad_swizzle().
334*61046927SAndroid Build Coastguard Worker */
335*61046927SAndroid Build Coastguard Worker return false;
336*61046927SAndroid Build Coastguard Worker default:
337*61046927SAndroid Build Coastguard Worker return !is_uniform(src[0]);
338*61046927SAndroid Build Coastguard Worker }
339*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_DPAS:
340*61046927SAndroid Build Coastguard Worker /* This is overly conservative. The actual hazard is more complicated to
341*61046927SAndroid Build Coastguard Worker * describe. When the repeat count is N, the single instruction behaves
342*61046927SAndroid Build Coastguard Worker * like N instructions with a repeat count of one, but the destination
343*61046927SAndroid Build Coastguard Worker * and source registers are incremented (in somewhat complex ways) for
344*61046927SAndroid Build Coastguard Worker * each instruction.
345*61046927SAndroid Build Coastguard Worker *
346*61046927SAndroid Build Coastguard Worker * This means the source and destination register is actually a range of
347*61046927SAndroid Build Coastguard Worker * registers. The hazard exists of an earlier iteration would write a
348*61046927SAndroid Build Coastguard Worker * register that should be read by a later iteration.
349*61046927SAndroid Build Coastguard Worker *
350*61046927SAndroid Build Coastguard Worker * There may be some advantage to properly modeling this, but for now,
351*61046927SAndroid Build Coastguard Worker * be overly conservative.
352*61046927SAndroid Build Coastguard Worker */
353*61046927SAndroid Build Coastguard Worker return rcount > 1;
354*61046927SAndroid Build Coastguard Worker default:
355*61046927SAndroid Build Coastguard Worker /* The SIMD16 compressed instruction
356*61046927SAndroid Build Coastguard Worker *
357*61046927SAndroid Build Coastguard Worker * add(16) g4<1>F g4<8,8,1>F g6<8,8,1>F
358*61046927SAndroid Build Coastguard Worker *
359*61046927SAndroid Build Coastguard Worker * is actually decoded in hardware as:
360*61046927SAndroid Build Coastguard Worker *
361*61046927SAndroid Build Coastguard Worker * add(8) g4<1>F g4<8,8,1>F g6<8,8,1>F
362*61046927SAndroid Build Coastguard Worker * add(8) g5<1>F g5<8,8,1>F g7<8,8,1>F
363*61046927SAndroid Build Coastguard Worker *
364*61046927SAndroid Build Coastguard Worker * Which is safe. However, if we have uniform accesses
365*61046927SAndroid Build Coastguard Worker * happening, we get into trouble:
366*61046927SAndroid Build Coastguard Worker *
367*61046927SAndroid Build Coastguard Worker * add(8) g4<1>F g4<0,1,0>F g6<8,8,1>F
368*61046927SAndroid Build Coastguard Worker * add(8) g5<1>F g4<0,1,0>F g7<8,8,1>F
369*61046927SAndroid Build Coastguard Worker *
370*61046927SAndroid Build Coastguard Worker * Now our destination for the first instruction overwrote the
371*61046927SAndroid Build Coastguard Worker * second instruction's src0, and we get garbage for those 8
372*61046927SAndroid Build Coastguard Worker * pixels. There's a similar issue for the pre-gfx6
373*61046927SAndroid Build Coastguard Worker * pixel_x/pixel_y, which are registers of 16-bit values and thus
374*61046927SAndroid Build Coastguard Worker * would get stomped by the first decode as well.
375*61046927SAndroid Build Coastguard Worker */
376*61046927SAndroid Build Coastguard Worker if (exec_size == 16) {
377*61046927SAndroid Build Coastguard Worker for (int i = 0; i < sources; i++) {
378*61046927SAndroid Build Coastguard Worker if (src[i].file == VGRF && (src[i].stride == 0 ||
379*61046927SAndroid Build Coastguard Worker src[i].type == BRW_TYPE_UW ||
380*61046927SAndroid Build Coastguard Worker src[i].type == BRW_TYPE_W ||
381*61046927SAndroid Build Coastguard Worker src[i].type == BRW_TYPE_UB ||
382*61046927SAndroid Build Coastguard Worker src[i].type == BRW_TYPE_B)) {
383*61046927SAndroid Build Coastguard Worker return true;
384*61046927SAndroid Build Coastguard Worker }
385*61046927SAndroid Build Coastguard Worker }
386*61046927SAndroid Build Coastguard Worker }
387*61046927SAndroid Build Coastguard Worker return false;
388*61046927SAndroid Build Coastguard Worker }
389*61046927SAndroid Build Coastguard Worker }
390*61046927SAndroid Build Coastguard Worker
391*61046927SAndroid Build Coastguard Worker bool
can_do_source_mods(const struct intel_device_info * devinfo) const392*61046927SAndroid Build Coastguard Worker fs_inst::can_do_source_mods(const struct intel_device_info *devinfo) const
393*61046927SAndroid Build Coastguard Worker {
394*61046927SAndroid Build Coastguard Worker if (is_send_from_grf())
395*61046927SAndroid Build Coastguard Worker return false;
396*61046927SAndroid Build Coastguard Worker
397*61046927SAndroid Build Coastguard Worker /* From TGL PRM Vol 2a Pg. 1053 and Pg. 1069 MAD and MUL Instructions:
398*61046927SAndroid Build Coastguard Worker *
399*61046927SAndroid Build Coastguard Worker * "When multiplying a DW and any lower precision integer, source modifier
400*61046927SAndroid Build Coastguard Worker * is not supported."
401*61046927SAndroid Build Coastguard Worker */
402*61046927SAndroid Build Coastguard Worker if (devinfo->ver >= 12 && (opcode == BRW_OPCODE_MUL ||
403*61046927SAndroid Build Coastguard Worker opcode == BRW_OPCODE_MAD)) {
404*61046927SAndroid Build Coastguard Worker const brw_reg_type exec_type = get_exec_type(this);
405*61046927SAndroid Build Coastguard Worker const unsigned min_brw_type_size_bytes = opcode == BRW_OPCODE_MAD ?
406*61046927SAndroid Build Coastguard Worker MIN2(brw_type_size_bytes(src[1].type), brw_type_size_bytes(src[2].type)) :
407*61046927SAndroid Build Coastguard Worker MIN2(brw_type_size_bytes(src[0].type), brw_type_size_bytes(src[1].type));
408*61046927SAndroid Build Coastguard Worker
409*61046927SAndroid Build Coastguard Worker if (brw_type_is_int(exec_type) &&
410*61046927SAndroid Build Coastguard Worker brw_type_size_bytes(exec_type) >= 4 &&
411*61046927SAndroid Build Coastguard Worker brw_type_size_bytes(exec_type) != min_brw_type_size_bytes)
412*61046927SAndroid Build Coastguard Worker return false;
413*61046927SAndroid Build Coastguard Worker }
414*61046927SAndroid Build Coastguard Worker
415*61046927SAndroid Build Coastguard Worker switch (opcode) {
416*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_ADDC:
417*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_BFE:
418*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_BFI1:
419*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_BFI2:
420*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_BFREV:
421*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_CBIT:
422*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_FBH:
423*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_FBL:
424*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_ROL:
425*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_ROR:
426*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_SUBB:
427*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_DP4A:
428*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_DPAS:
429*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_BROADCAST:
430*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_CLUSTER_BROADCAST:
431*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_MOV_INDIRECT:
432*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_SHUFFLE:
433*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_INT_QUOTIENT:
434*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_INT_REMAINDER:
435*61046927SAndroid Build Coastguard Worker return false;
436*61046927SAndroid Build Coastguard Worker default:
437*61046927SAndroid Build Coastguard Worker return true;
438*61046927SAndroid Build Coastguard Worker }
439*61046927SAndroid Build Coastguard Worker }
440*61046927SAndroid Build Coastguard Worker
441*61046927SAndroid Build Coastguard Worker bool
can_do_cmod() const442*61046927SAndroid Build Coastguard Worker fs_inst::can_do_cmod() const
443*61046927SAndroid Build Coastguard Worker {
444*61046927SAndroid Build Coastguard Worker switch (opcode) {
445*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_ADD:
446*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_ADD3:
447*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_ADDC:
448*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_AND:
449*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_ASR:
450*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_AVG:
451*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_CMP:
452*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_CMPN:
453*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_DP2:
454*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_DP3:
455*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_DP4:
456*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_DPH:
457*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_FRC:
458*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_LINE:
459*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_LRP:
460*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_LZD:
461*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_MAC:
462*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_MACH:
463*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_MAD:
464*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_MOV:
465*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_MUL:
466*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_NOT:
467*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_OR:
468*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_PLN:
469*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_RNDD:
470*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_RNDE:
471*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_RNDU:
472*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_RNDZ:
473*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_SHL:
474*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_SHR:
475*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_SUBB:
476*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_XOR:
477*61046927SAndroid Build Coastguard Worker break;
478*61046927SAndroid Build Coastguard Worker default:
479*61046927SAndroid Build Coastguard Worker return false;
480*61046927SAndroid Build Coastguard Worker }
481*61046927SAndroid Build Coastguard Worker
482*61046927SAndroid Build Coastguard Worker /* The accumulator result appears to get used for the conditional modifier
483*61046927SAndroid Build Coastguard Worker * generation. When negating a UD value, there is a 33rd bit generated for
484*61046927SAndroid Build Coastguard Worker * the sign in the accumulator value, so now you can't check, for example,
485*61046927SAndroid Build Coastguard Worker * equality with a 32-bit value. See piglit fs-op-neg-uvec4.
486*61046927SAndroid Build Coastguard Worker */
487*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < sources; i++) {
488*61046927SAndroid Build Coastguard Worker if (brw_type_is_uint(src[i].type) && src[i].negate)
489*61046927SAndroid Build Coastguard Worker return false;
490*61046927SAndroid Build Coastguard Worker }
491*61046927SAndroid Build Coastguard Worker
492*61046927SAndroid Build Coastguard Worker return true;
493*61046927SAndroid Build Coastguard Worker }
494*61046927SAndroid Build Coastguard Worker
495*61046927SAndroid Build Coastguard Worker bool
can_change_types() const496*61046927SAndroid Build Coastguard Worker fs_inst::can_change_types() const
497*61046927SAndroid Build Coastguard Worker {
498*61046927SAndroid Build Coastguard Worker return dst.type == src[0].type &&
499*61046927SAndroid Build Coastguard Worker !src[0].abs && !src[0].negate && !saturate && src[0].file != ATTR &&
500*61046927SAndroid Build Coastguard Worker (opcode == BRW_OPCODE_MOV ||
501*61046927SAndroid Build Coastguard Worker (opcode == SHADER_OPCODE_LOAD_PAYLOAD && sources == 1) ||
502*61046927SAndroid Build Coastguard Worker (opcode == BRW_OPCODE_SEL &&
503*61046927SAndroid Build Coastguard Worker dst.type == src[1].type &&
504*61046927SAndroid Build Coastguard Worker predicate != BRW_PREDICATE_NONE &&
505*61046927SAndroid Build Coastguard Worker !src[1].abs && !src[1].negate && src[1].file != ATTR));
506*61046927SAndroid Build Coastguard Worker }
507*61046927SAndroid Build Coastguard Worker
508*61046927SAndroid Build Coastguard Worker bool
equals(const brw_reg & r) const509*61046927SAndroid Build Coastguard Worker brw_reg::equals(const brw_reg &r) const
510*61046927SAndroid Build Coastguard Worker {
511*61046927SAndroid Build Coastguard Worker return brw_regs_equal(this, &r);
512*61046927SAndroid Build Coastguard Worker }
513*61046927SAndroid Build Coastguard Worker
514*61046927SAndroid Build Coastguard Worker bool
negative_equals(const brw_reg & r) const515*61046927SAndroid Build Coastguard Worker brw_reg::negative_equals(const brw_reg &r) const
516*61046927SAndroid Build Coastguard Worker {
517*61046927SAndroid Build Coastguard Worker return brw_regs_negative_equal(this, &r);
518*61046927SAndroid Build Coastguard Worker }
519*61046927SAndroid Build Coastguard Worker
520*61046927SAndroid Build Coastguard Worker bool
is_contiguous() const521*61046927SAndroid Build Coastguard Worker brw_reg::is_contiguous() const
522*61046927SAndroid Build Coastguard Worker {
523*61046927SAndroid Build Coastguard Worker switch (file) {
524*61046927SAndroid Build Coastguard Worker case ARF:
525*61046927SAndroid Build Coastguard Worker case FIXED_GRF:
526*61046927SAndroid Build Coastguard Worker return hstride == BRW_HORIZONTAL_STRIDE_1 &&
527*61046927SAndroid Build Coastguard Worker vstride == width + hstride;
528*61046927SAndroid Build Coastguard Worker case VGRF:
529*61046927SAndroid Build Coastguard Worker case ATTR:
530*61046927SAndroid Build Coastguard Worker return stride == 1;
531*61046927SAndroid Build Coastguard Worker case UNIFORM:
532*61046927SAndroid Build Coastguard Worker case IMM:
533*61046927SAndroid Build Coastguard Worker case BAD_FILE:
534*61046927SAndroid Build Coastguard Worker return true;
535*61046927SAndroid Build Coastguard Worker }
536*61046927SAndroid Build Coastguard Worker
537*61046927SAndroid Build Coastguard Worker unreachable("Invalid register file");
538*61046927SAndroid Build Coastguard Worker }
539*61046927SAndroid Build Coastguard Worker
540*61046927SAndroid Build Coastguard Worker unsigned
component_size(unsigned width) const541*61046927SAndroid Build Coastguard Worker brw_reg::component_size(unsigned width) const
542*61046927SAndroid Build Coastguard Worker {
543*61046927SAndroid Build Coastguard Worker if (file == ARF || file == FIXED_GRF) {
544*61046927SAndroid Build Coastguard Worker const unsigned w = MIN2(width, 1u << this->width);
545*61046927SAndroid Build Coastguard Worker const unsigned h = width >> this->width;
546*61046927SAndroid Build Coastguard Worker const unsigned vs = vstride ? 1 << (vstride - 1) : 0;
547*61046927SAndroid Build Coastguard Worker const unsigned hs = hstride ? 1 << (hstride - 1) : 0;
548*61046927SAndroid Build Coastguard Worker assert(w > 0);
549*61046927SAndroid Build Coastguard Worker /* Note this rounds up to next horizontal stride to be consistent with
550*61046927SAndroid Build Coastguard Worker * the VGRF case below.
551*61046927SAndroid Build Coastguard Worker */
552*61046927SAndroid Build Coastguard Worker return ((MAX2(1, h) - 1) * vs + MAX2(w * hs, 1)) * brw_type_size_bytes(type);
553*61046927SAndroid Build Coastguard Worker } else {
554*61046927SAndroid Build Coastguard Worker return MAX2(width * stride, 1) * brw_type_size_bytes(type);
555*61046927SAndroid Build Coastguard Worker }
556*61046927SAndroid Build Coastguard Worker }
557*61046927SAndroid Build Coastguard Worker
558*61046927SAndroid Build Coastguard Worker void
vfail(const char * format,va_list va)559*61046927SAndroid Build Coastguard Worker fs_visitor::vfail(const char *format, va_list va)
560*61046927SAndroid Build Coastguard Worker {
561*61046927SAndroid Build Coastguard Worker char *msg;
562*61046927SAndroid Build Coastguard Worker
563*61046927SAndroid Build Coastguard Worker if (failed)
564*61046927SAndroid Build Coastguard Worker return;
565*61046927SAndroid Build Coastguard Worker
566*61046927SAndroid Build Coastguard Worker failed = true;
567*61046927SAndroid Build Coastguard Worker
568*61046927SAndroid Build Coastguard Worker msg = ralloc_vasprintf(mem_ctx, format, va);
569*61046927SAndroid Build Coastguard Worker msg = ralloc_asprintf(mem_ctx, "SIMD%d %s compile failed: %s\n",
570*61046927SAndroid Build Coastguard Worker dispatch_width, _mesa_shader_stage_to_abbrev(stage), msg);
571*61046927SAndroid Build Coastguard Worker
572*61046927SAndroid Build Coastguard Worker this->fail_msg = msg;
573*61046927SAndroid Build Coastguard Worker
574*61046927SAndroid Build Coastguard Worker if (unlikely(debug_enabled)) {
575*61046927SAndroid Build Coastguard Worker fprintf(stderr, "%s", msg);
576*61046927SAndroid Build Coastguard Worker }
577*61046927SAndroid Build Coastguard Worker }
578*61046927SAndroid Build Coastguard Worker
579*61046927SAndroid Build Coastguard Worker void
fail(const char * format,...)580*61046927SAndroid Build Coastguard Worker fs_visitor::fail(const char *format, ...)
581*61046927SAndroid Build Coastguard Worker {
582*61046927SAndroid Build Coastguard Worker va_list va;
583*61046927SAndroid Build Coastguard Worker
584*61046927SAndroid Build Coastguard Worker va_start(va, format);
585*61046927SAndroid Build Coastguard Worker vfail(format, va);
586*61046927SAndroid Build Coastguard Worker va_end(va);
587*61046927SAndroid Build Coastguard Worker }
588*61046927SAndroid Build Coastguard Worker
589*61046927SAndroid Build Coastguard Worker /**
590*61046927SAndroid Build Coastguard Worker * Mark this program as impossible to compile with dispatch width greater
591*61046927SAndroid Build Coastguard Worker * than n.
592*61046927SAndroid Build Coastguard Worker *
593*61046927SAndroid Build Coastguard Worker * During the SIMD8 compile (which happens first), we can detect and flag
594*61046927SAndroid Build Coastguard Worker * things that are unsupported in SIMD16+ mode, so the compiler can skip the
595*61046927SAndroid Build Coastguard Worker * SIMD16+ compile altogether.
596*61046927SAndroid Build Coastguard Worker *
597*61046927SAndroid Build Coastguard Worker * During a compile of dispatch width greater than n (if one happens anyway),
598*61046927SAndroid Build Coastguard Worker * this just calls fail().
599*61046927SAndroid Build Coastguard Worker */
600*61046927SAndroid Build Coastguard Worker void
limit_dispatch_width(unsigned n,const char * msg)601*61046927SAndroid Build Coastguard Worker fs_visitor::limit_dispatch_width(unsigned n, const char *msg)
602*61046927SAndroid Build Coastguard Worker {
603*61046927SAndroid Build Coastguard Worker if (dispatch_width > n) {
604*61046927SAndroid Build Coastguard Worker fail("%s", msg);
605*61046927SAndroid Build Coastguard Worker } else {
606*61046927SAndroid Build Coastguard Worker max_dispatch_width = MIN2(max_dispatch_width, n);
607*61046927SAndroid Build Coastguard Worker brw_shader_perf_log(compiler, log_data,
608*61046927SAndroid Build Coastguard Worker "Shader dispatch width limited to SIMD%d: %s\n",
609*61046927SAndroid Build Coastguard Worker n, msg);
610*61046927SAndroid Build Coastguard Worker }
611*61046927SAndroid Build Coastguard Worker }
612*61046927SAndroid Build Coastguard Worker
613*61046927SAndroid Build Coastguard Worker /**
614*61046927SAndroid Build Coastguard Worker * Returns true if the instruction has a flag that means it won't
615*61046927SAndroid Build Coastguard Worker * update an entire destination register.
616*61046927SAndroid Build Coastguard Worker *
617*61046927SAndroid Build Coastguard Worker * For example, dead code elimination and live variable analysis want to know
618*61046927SAndroid Build Coastguard Worker * when a write to a variable screens off any preceding values that were in
619*61046927SAndroid Build Coastguard Worker * it.
620*61046927SAndroid Build Coastguard Worker */
621*61046927SAndroid Build Coastguard Worker bool
is_partial_write() const622*61046927SAndroid Build Coastguard Worker fs_inst::is_partial_write() const
623*61046927SAndroid Build Coastguard Worker {
624*61046927SAndroid Build Coastguard Worker if (this->predicate && !this->predicate_trivial &&
625*61046927SAndroid Build Coastguard Worker this->opcode != BRW_OPCODE_SEL)
626*61046927SAndroid Build Coastguard Worker return true;
627*61046927SAndroid Build Coastguard Worker
628*61046927SAndroid Build Coastguard Worker if (!this->dst.is_contiguous())
629*61046927SAndroid Build Coastguard Worker return true;
630*61046927SAndroid Build Coastguard Worker
631*61046927SAndroid Build Coastguard Worker if (this->dst.offset % REG_SIZE != 0)
632*61046927SAndroid Build Coastguard Worker return true;
633*61046927SAndroid Build Coastguard Worker
634*61046927SAndroid Build Coastguard Worker return this->size_written % REG_SIZE != 0;
635*61046927SAndroid Build Coastguard Worker }
636*61046927SAndroid Build Coastguard Worker
637*61046927SAndroid Build Coastguard Worker unsigned
components_read(unsigned i) const638*61046927SAndroid Build Coastguard Worker fs_inst::components_read(unsigned i) const
639*61046927SAndroid Build Coastguard Worker {
640*61046927SAndroid Build Coastguard Worker /* Return zero if the source is not present. */
641*61046927SAndroid Build Coastguard Worker if (src[i].file == BAD_FILE)
642*61046927SAndroid Build Coastguard Worker return 0;
643*61046927SAndroid Build Coastguard Worker
644*61046927SAndroid Build Coastguard Worker switch (opcode) {
645*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_PLN:
646*61046927SAndroid Build Coastguard Worker return i == 0 ? 1 : 2;
647*61046927SAndroid Build Coastguard Worker
648*61046927SAndroid Build Coastguard Worker case FS_OPCODE_PIXEL_X:
649*61046927SAndroid Build Coastguard Worker case FS_OPCODE_PIXEL_Y:
650*61046927SAndroid Build Coastguard Worker assert(i < 2);
651*61046927SAndroid Build Coastguard Worker if (i == 0)
652*61046927SAndroid Build Coastguard Worker return 2;
653*61046927SAndroid Build Coastguard Worker else
654*61046927SAndroid Build Coastguard Worker return 1;
655*61046927SAndroid Build Coastguard Worker
656*61046927SAndroid Build Coastguard Worker case FS_OPCODE_FB_WRITE_LOGICAL:
657*61046927SAndroid Build Coastguard Worker assert(src[FB_WRITE_LOGICAL_SRC_COMPONENTS].file == IMM);
658*61046927SAndroid Build Coastguard Worker /* First/second FB write color. */
659*61046927SAndroid Build Coastguard Worker if (i < 2)
660*61046927SAndroid Build Coastguard Worker return src[FB_WRITE_LOGICAL_SRC_COMPONENTS].ud;
661*61046927SAndroid Build Coastguard Worker else
662*61046927SAndroid Build Coastguard Worker return 1;
663*61046927SAndroid Build Coastguard Worker
664*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TEX_LOGICAL:
665*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TXD_LOGICAL:
666*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TXF_LOGICAL:
667*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TXL_LOGICAL:
668*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TXS_LOGICAL:
669*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
670*61046927SAndroid Build Coastguard Worker case FS_OPCODE_TXB_LOGICAL:
671*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
672*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL:
673*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TXF_MCS_LOGICAL:
674*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_LOD_LOGICAL:
675*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TG4_LOGICAL:
676*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
677*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TG4_BIAS_LOGICAL:
678*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL:
679*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL:
680*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL:
681*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL:
682*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
683*61046927SAndroid Build Coastguard Worker assert(src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM &&
684*61046927SAndroid Build Coastguard Worker src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM &&
685*61046927SAndroid Build Coastguard Worker src[TEX_LOGICAL_SRC_RESIDENCY].file == IMM);
686*61046927SAndroid Build Coastguard Worker /* Texture coordinates. */
687*61046927SAndroid Build Coastguard Worker if (i == TEX_LOGICAL_SRC_COORDINATE)
688*61046927SAndroid Build Coastguard Worker return src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud;
689*61046927SAndroid Build Coastguard Worker /* Texture derivatives. */
690*61046927SAndroid Build Coastguard Worker else if ((i == TEX_LOGICAL_SRC_LOD || i == TEX_LOGICAL_SRC_LOD2) &&
691*61046927SAndroid Build Coastguard Worker opcode == SHADER_OPCODE_TXD_LOGICAL)
692*61046927SAndroid Build Coastguard Worker return src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].ud;
693*61046927SAndroid Build Coastguard Worker /* Texture offset. */
694*61046927SAndroid Build Coastguard Worker else if (i == TEX_LOGICAL_SRC_TG4_OFFSET)
695*61046927SAndroid Build Coastguard Worker return 2;
696*61046927SAndroid Build Coastguard Worker /* MCS */
697*61046927SAndroid Build Coastguard Worker else if (i == TEX_LOGICAL_SRC_MCS) {
698*61046927SAndroid Build Coastguard Worker if (opcode == SHADER_OPCODE_TXF_CMS_W_LOGICAL)
699*61046927SAndroid Build Coastguard Worker return 2;
700*61046927SAndroid Build Coastguard Worker else if (opcode == SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL)
701*61046927SAndroid Build Coastguard Worker return 4;
702*61046927SAndroid Build Coastguard Worker else
703*61046927SAndroid Build Coastguard Worker return 1;
704*61046927SAndroid Build Coastguard Worker } else
705*61046927SAndroid Build Coastguard Worker return 1;
706*61046927SAndroid Build Coastguard Worker
707*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
708*61046927SAndroid Build Coastguard Worker if (i == MEMORY_LOGICAL_DATA0 || i == MEMORY_LOGICAL_DATA0)
709*61046927SAndroid Build Coastguard Worker return 0;
710*61046927SAndroid Build Coastguard Worker /* fallthrough */
711*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
712*61046927SAndroid Build Coastguard Worker if (i == MEMORY_LOGICAL_DATA1)
713*61046927SAndroid Build Coastguard Worker return 0;
714*61046927SAndroid Build Coastguard Worker /* fallthrough */
715*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
716*61046927SAndroid Build Coastguard Worker if (i == MEMORY_LOGICAL_DATA0 || i == MEMORY_LOGICAL_DATA1)
717*61046927SAndroid Build Coastguard Worker return src[MEMORY_LOGICAL_COMPONENTS].ud;
718*61046927SAndroid Build Coastguard Worker else if (i == MEMORY_LOGICAL_ADDRESS)
719*61046927SAndroid Build Coastguard Worker return src[MEMORY_LOGICAL_COORD_COMPONENTS].ud;
720*61046927SAndroid Build Coastguard Worker else
721*61046927SAndroid Build Coastguard Worker return 1;
722*61046927SAndroid Build Coastguard Worker
723*61046927SAndroid Build Coastguard Worker case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
724*61046927SAndroid Build Coastguard Worker return (i == 0 ? 2 : 1);
725*61046927SAndroid Build Coastguard Worker
726*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_URB_WRITE_LOGICAL:
727*61046927SAndroid Build Coastguard Worker assert(src[URB_LOGICAL_SRC_COMPONENTS].file == IMM);
728*61046927SAndroid Build Coastguard Worker
729*61046927SAndroid Build Coastguard Worker if (i == URB_LOGICAL_SRC_DATA)
730*61046927SAndroid Build Coastguard Worker return src[URB_LOGICAL_SRC_COMPONENTS].ud;
731*61046927SAndroid Build Coastguard Worker else
732*61046927SAndroid Build Coastguard Worker return 1;
733*61046927SAndroid Build Coastguard Worker
734*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_DPAS:
735*61046927SAndroid Build Coastguard Worker unreachable("Do not use components_read() for DPAS.");
736*61046927SAndroid Build Coastguard Worker
737*61046927SAndroid Build Coastguard Worker default:
738*61046927SAndroid Build Coastguard Worker return 1;
739*61046927SAndroid Build Coastguard Worker }
740*61046927SAndroid Build Coastguard Worker }
741*61046927SAndroid Build Coastguard Worker
742*61046927SAndroid Build Coastguard Worker unsigned
size_read(int arg) const743*61046927SAndroid Build Coastguard Worker fs_inst::size_read(int arg) const
744*61046927SAndroid Build Coastguard Worker {
745*61046927SAndroid Build Coastguard Worker switch (opcode) {
746*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_SEND:
747*61046927SAndroid Build Coastguard Worker if (arg == 2) {
748*61046927SAndroid Build Coastguard Worker return mlen * REG_SIZE;
749*61046927SAndroid Build Coastguard Worker } else if (arg == 3) {
750*61046927SAndroid Build Coastguard Worker return ex_mlen * REG_SIZE;
751*61046927SAndroid Build Coastguard Worker }
752*61046927SAndroid Build Coastguard Worker break;
753*61046927SAndroid Build Coastguard Worker
754*61046927SAndroid Build Coastguard Worker case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
755*61046927SAndroid Build Coastguard Worker case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
756*61046927SAndroid Build Coastguard Worker if (arg == 0)
757*61046927SAndroid Build Coastguard Worker return mlen * REG_SIZE;
758*61046927SAndroid Build Coastguard Worker break;
759*61046927SAndroid Build Coastguard Worker
760*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_PLN:
761*61046927SAndroid Build Coastguard Worker if (arg == 0)
762*61046927SAndroid Build Coastguard Worker return 16;
763*61046927SAndroid Build Coastguard Worker break;
764*61046927SAndroid Build Coastguard Worker
765*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_LOAD_PAYLOAD:
766*61046927SAndroid Build Coastguard Worker if (arg < this->header_size)
767*61046927SAndroid Build Coastguard Worker return retype(src[arg], BRW_TYPE_UD).component_size(8);
768*61046927SAndroid Build Coastguard Worker break;
769*61046927SAndroid Build Coastguard Worker
770*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_BARRIER:
771*61046927SAndroid Build Coastguard Worker return REG_SIZE;
772*61046927SAndroid Build Coastguard Worker
773*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_MOV_INDIRECT:
774*61046927SAndroid Build Coastguard Worker if (arg == 0) {
775*61046927SAndroid Build Coastguard Worker assert(src[2].file == IMM);
776*61046927SAndroid Build Coastguard Worker return src[2].ud;
777*61046927SAndroid Build Coastguard Worker }
778*61046927SAndroid Build Coastguard Worker break;
779*61046927SAndroid Build Coastguard Worker
780*61046927SAndroid Build Coastguard Worker case BRW_OPCODE_DPAS: {
781*61046927SAndroid Build Coastguard Worker /* This is a little bit sketchy. There's no way to get at devinfo from
782*61046927SAndroid Build Coastguard Worker * here, so the regular reg_unit() cannot be used. However, on
783*61046927SAndroid Build Coastguard Worker * reg_unit() == 1 platforms, DPAS exec_size must be 8, and on known
784*61046927SAndroid Build Coastguard Worker * reg_unit() == 2 platforms, DPAS exec_size must be 16. This is not a
785*61046927SAndroid Build Coastguard Worker * coincidence, so this isn't so bad.
786*61046927SAndroid Build Coastguard Worker */
787*61046927SAndroid Build Coastguard Worker const unsigned reg_unit = this->exec_size / 8;
788*61046927SAndroid Build Coastguard Worker
789*61046927SAndroid Build Coastguard Worker switch (arg) {
790*61046927SAndroid Build Coastguard Worker case 0:
791*61046927SAndroid Build Coastguard Worker if (src[0].type == BRW_TYPE_HF) {
792*61046927SAndroid Build Coastguard Worker return rcount * reg_unit * REG_SIZE / 2;
793*61046927SAndroid Build Coastguard Worker } else {
794*61046927SAndroid Build Coastguard Worker return rcount * reg_unit * REG_SIZE;
795*61046927SAndroid Build Coastguard Worker }
796*61046927SAndroid Build Coastguard Worker case 1:
797*61046927SAndroid Build Coastguard Worker return sdepth * reg_unit * REG_SIZE;
798*61046927SAndroid Build Coastguard Worker case 2:
799*61046927SAndroid Build Coastguard Worker /* This is simpler than the formula described in the Bspec, but it
800*61046927SAndroid Build Coastguard Worker * covers all of the cases that we support. Each inner sdepth
801*61046927SAndroid Build Coastguard Worker * iteration of the DPAS consumes a single dword for int8, uint8, or
802*61046927SAndroid Build Coastguard Worker * float16 types. These are the one source types currently
803*61046927SAndroid Build Coastguard Worker * supportable through Vulkan. This is independent of reg_unit.
804*61046927SAndroid Build Coastguard Worker */
805*61046927SAndroid Build Coastguard Worker return rcount * sdepth * 4;
806*61046927SAndroid Build Coastguard Worker default:
807*61046927SAndroid Build Coastguard Worker unreachable("Invalid source number.");
808*61046927SAndroid Build Coastguard Worker }
809*61046927SAndroid Build Coastguard Worker break;
810*61046927SAndroid Build Coastguard Worker }
811*61046927SAndroid Build Coastguard Worker
812*61046927SAndroid Build Coastguard Worker default:
813*61046927SAndroid Build Coastguard Worker break;
814*61046927SAndroid Build Coastguard Worker }
815*61046927SAndroid Build Coastguard Worker
816*61046927SAndroid Build Coastguard Worker switch (src[arg].file) {
817*61046927SAndroid Build Coastguard Worker case UNIFORM:
818*61046927SAndroid Build Coastguard Worker case IMM:
819*61046927SAndroid Build Coastguard Worker return components_read(arg) * brw_type_size_bytes(src[arg].type);
820*61046927SAndroid Build Coastguard Worker case BAD_FILE:
821*61046927SAndroid Build Coastguard Worker case ARF:
822*61046927SAndroid Build Coastguard Worker case FIXED_GRF:
823*61046927SAndroid Build Coastguard Worker case VGRF:
824*61046927SAndroid Build Coastguard Worker case ATTR:
825*61046927SAndroid Build Coastguard Worker return components_read(arg) * src[arg].component_size(exec_size);
826*61046927SAndroid Build Coastguard Worker }
827*61046927SAndroid Build Coastguard Worker return 0;
828*61046927SAndroid Build Coastguard Worker }
829*61046927SAndroid Build Coastguard Worker
830*61046927SAndroid Build Coastguard Worker namespace {
831*61046927SAndroid Build Coastguard Worker unsigned
predicate_width(const intel_device_info * devinfo,brw_predicate predicate)832*61046927SAndroid Build Coastguard Worker predicate_width(const intel_device_info *devinfo, brw_predicate predicate)
833*61046927SAndroid Build Coastguard Worker {
834*61046927SAndroid Build Coastguard Worker if (devinfo->ver >= 20) {
835*61046927SAndroid Build Coastguard Worker return 1;
836*61046927SAndroid Build Coastguard Worker } else {
837*61046927SAndroid Build Coastguard Worker switch (predicate) {
838*61046927SAndroid Build Coastguard Worker case BRW_PREDICATE_NONE: return 1;
839*61046927SAndroid Build Coastguard Worker case BRW_PREDICATE_NORMAL: return 1;
840*61046927SAndroid Build Coastguard Worker case BRW_PREDICATE_ALIGN1_ANY2H: return 2;
841*61046927SAndroid Build Coastguard Worker case BRW_PREDICATE_ALIGN1_ALL2H: return 2;
842*61046927SAndroid Build Coastguard Worker case BRW_PREDICATE_ALIGN1_ANY4H: return 4;
843*61046927SAndroid Build Coastguard Worker case BRW_PREDICATE_ALIGN1_ALL4H: return 4;
844*61046927SAndroid Build Coastguard Worker case BRW_PREDICATE_ALIGN1_ANY8H: return 8;
845*61046927SAndroid Build Coastguard Worker case BRW_PREDICATE_ALIGN1_ALL8H: return 8;
846*61046927SAndroid Build Coastguard Worker case BRW_PREDICATE_ALIGN1_ANY16H: return 16;
847*61046927SAndroid Build Coastguard Worker case BRW_PREDICATE_ALIGN1_ALL16H: return 16;
848*61046927SAndroid Build Coastguard Worker case BRW_PREDICATE_ALIGN1_ANY32H: return 32;
849*61046927SAndroid Build Coastguard Worker case BRW_PREDICATE_ALIGN1_ALL32H: return 32;
850*61046927SAndroid Build Coastguard Worker default: unreachable("Unsupported predicate");
851*61046927SAndroid Build Coastguard Worker }
852*61046927SAndroid Build Coastguard Worker }
853*61046927SAndroid Build Coastguard Worker }
854*61046927SAndroid Build Coastguard Worker }
855*61046927SAndroid Build Coastguard Worker
856*61046927SAndroid Build Coastguard Worker unsigned
flags_read(const intel_device_info * devinfo) const857*61046927SAndroid Build Coastguard Worker fs_inst::flags_read(const intel_device_info *devinfo) const
858*61046927SAndroid Build Coastguard Worker {
859*61046927SAndroid Build Coastguard Worker if (devinfo->ver < 20 && (predicate == BRW_PREDICATE_ALIGN1_ANYV ||
860*61046927SAndroid Build Coastguard Worker predicate == BRW_PREDICATE_ALIGN1_ALLV)) {
861*61046927SAndroid Build Coastguard Worker /* The vertical predication modes combine corresponding bits from
862*61046927SAndroid Build Coastguard Worker * f0.0 and f1.0 on Gfx7+.
863*61046927SAndroid Build Coastguard Worker */
864*61046927SAndroid Build Coastguard Worker const unsigned shift = 4;
865*61046927SAndroid Build Coastguard Worker return brw_fs_flag_mask(this, 1) << shift | brw_fs_flag_mask(this, 1);
866*61046927SAndroid Build Coastguard Worker } else if (predicate) {
867*61046927SAndroid Build Coastguard Worker return brw_fs_flag_mask(this, predicate_width(devinfo, predicate));
868*61046927SAndroid Build Coastguard Worker } else {
869*61046927SAndroid Build Coastguard Worker unsigned mask = 0;
870*61046927SAndroid Build Coastguard Worker for (int i = 0; i < sources; i++) {
871*61046927SAndroid Build Coastguard Worker mask |= brw_fs_flag_mask(src[i], size_read(i));
872*61046927SAndroid Build Coastguard Worker }
873*61046927SAndroid Build Coastguard Worker return mask;
874*61046927SAndroid Build Coastguard Worker }
875*61046927SAndroid Build Coastguard Worker }
876*61046927SAndroid Build Coastguard Worker
877*61046927SAndroid Build Coastguard Worker unsigned
flags_written(const intel_device_info * devinfo) const878*61046927SAndroid Build Coastguard Worker fs_inst::flags_written(const intel_device_info *devinfo) const
879*61046927SAndroid Build Coastguard Worker {
880*61046927SAndroid Build Coastguard Worker if (conditional_mod && (opcode != BRW_OPCODE_SEL &&
881*61046927SAndroid Build Coastguard Worker opcode != BRW_OPCODE_CSEL &&
882*61046927SAndroid Build Coastguard Worker opcode != BRW_OPCODE_IF &&
883*61046927SAndroid Build Coastguard Worker opcode != BRW_OPCODE_WHILE)) {
884*61046927SAndroid Build Coastguard Worker return brw_fs_flag_mask(this, 1);
885*61046927SAndroid Build Coastguard Worker } else if (opcode == FS_OPCODE_LOAD_LIVE_CHANNELS) {
886*61046927SAndroid Build Coastguard Worker return brw_fs_flag_mask(this, 32);
887*61046927SAndroid Build Coastguard Worker } else {
888*61046927SAndroid Build Coastguard Worker return brw_fs_flag_mask(dst, size_written);
889*61046927SAndroid Build Coastguard Worker }
890*61046927SAndroid Build Coastguard Worker }
891*61046927SAndroid Build Coastguard Worker
892*61046927SAndroid Build Coastguard Worker bool
has_sampler_residency() const893*61046927SAndroid Build Coastguard Worker fs_inst::has_sampler_residency() const
894*61046927SAndroid Build Coastguard Worker {
895*61046927SAndroid Build Coastguard Worker switch (opcode) {
896*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TEX_LOGICAL:
897*61046927SAndroid Build Coastguard Worker case FS_OPCODE_TXB_LOGICAL:
898*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TXL_LOGICAL:
899*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TXD_LOGICAL:
900*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TXF_LOGICAL:
901*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL:
902*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
903*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TXS_LOGICAL:
904*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
905*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TG4_LOGICAL:
906*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TG4_BIAS_LOGICAL:
907*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL:
908*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL:
909*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL:
910*61046927SAndroid Build Coastguard Worker case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL:
911*61046927SAndroid Build Coastguard Worker assert(src[TEX_LOGICAL_SRC_RESIDENCY].file == IMM);
912*61046927SAndroid Build Coastguard Worker return src[TEX_LOGICAL_SRC_RESIDENCY].ud != 0;
913*61046927SAndroid Build Coastguard Worker default:
914*61046927SAndroid Build Coastguard Worker return false;
915*61046927SAndroid Build Coastguard Worker }
916*61046927SAndroid Build Coastguard Worker }
917*61046927SAndroid Build Coastguard Worker
918*61046927SAndroid Build Coastguard Worker /* \sa inst_is_raw_move in brw_eu_validate. */
919*61046927SAndroid Build Coastguard Worker bool
is_raw_move() const920*61046927SAndroid Build Coastguard Worker fs_inst::is_raw_move() const
921*61046927SAndroid Build Coastguard Worker {
922*61046927SAndroid Build Coastguard Worker if (opcode != BRW_OPCODE_MOV)
923*61046927SAndroid Build Coastguard Worker return false;
924*61046927SAndroid Build Coastguard Worker
925*61046927SAndroid Build Coastguard Worker if (src[0].file == IMM) {
926*61046927SAndroid Build Coastguard Worker if (brw_type_is_vector_imm(src[0].type))
927*61046927SAndroid Build Coastguard Worker return false;
928*61046927SAndroid Build Coastguard Worker } else if (src[0].negate || src[0].abs) {
929*61046927SAndroid Build Coastguard Worker return false;
930*61046927SAndroid Build Coastguard Worker }
931*61046927SAndroid Build Coastguard Worker
932*61046927SAndroid Build Coastguard Worker if (saturate)
933*61046927SAndroid Build Coastguard Worker return false;
934*61046927SAndroid Build Coastguard Worker
935*61046927SAndroid Build Coastguard Worker return src[0].type == dst.type ||
936*61046927SAndroid Build Coastguard Worker (brw_type_is_int(src[0].type) &&
937*61046927SAndroid Build Coastguard Worker brw_type_is_int(dst.type) &&
938*61046927SAndroid Build Coastguard Worker brw_type_size_bits(src[0].type) == brw_type_size_bits(dst.type));
939*61046927SAndroid Build Coastguard Worker }
940*61046927SAndroid Build Coastguard Worker
941*61046927SAndroid Build Coastguard Worker /* For SIMD16, we need to follow from the uniform setup of SIMD8 dispatch.
942*61046927SAndroid Build Coastguard Worker * This brings in those uniform definitions
943*61046927SAndroid Build Coastguard Worker */
944*61046927SAndroid Build Coastguard Worker void
import_uniforms(fs_visitor * v)945*61046927SAndroid Build Coastguard Worker fs_visitor::import_uniforms(fs_visitor *v)
946*61046927SAndroid Build Coastguard Worker {
947*61046927SAndroid Build Coastguard Worker this->push_constant_loc = v->push_constant_loc;
948*61046927SAndroid Build Coastguard Worker this->uniforms = v->uniforms;
949*61046927SAndroid Build Coastguard Worker }
950*61046927SAndroid Build Coastguard Worker
951*61046927SAndroid Build Coastguard Worker enum brw_barycentric_mode
brw_barycentric_mode(const struct brw_wm_prog_key * key,nir_intrinsic_instr * intr)952*61046927SAndroid Build Coastguard Worker brw_barycentric_mode(const struct brw_wm_prog_key *key,
953*61046927SAndroid Build Coastguard Worker nir_intrinsic_instr *intr)
954*61046927SAndroid Build Coastguard Worker {
955*61046927SAndroid Build Coastguard Worker const glsl_interp_mode mode =
956*61046927SAndroid Build Coastguard Worker (enum glsl_interp_mode) nir_intrinsic_interp_mode(intr);
957*61046927SAndroid Build Coastguard Worker
958*61046927SAndroid Build Coastguard Worker /* Barycentric modes don't make sense for flat inputs. */
959*61046927SAndroid Build Coastguard Worker assert(mode != INTERP_MODE_FLAT);
960*61046927SAndroid Build Coastguard Worker
961*61046927SAndroid Build Coastguard Worker unsigned bary;
962*61046927SAndroid Build Coastguard Worker switch (intr->intrinsic) {
963*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_barycentric_pixel:
964*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_barycentric_at_offset:
965*61046927SAndroid Build Coastguard Worker /* When per sample interpolation is dynamic, assume sample
966*61046927SAndroid Build Coastguard Worker * interpolation. We'll dynamically remap things so that the FS thread
967*61046927SAndroid Build Coastguard Worker * payload is not affected.
968*61046927SAndroid Build Coastguard Worker */
969*61046927SAndroid Build Coastguard Worker bary = key->persample_interp == BRW_SOMETIMES ?
970*61046927SAndroid Build Coastguard Worker BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE :
971*61046927SAndroid Build Coastguard Worker BRW_BARYCENTRIC_PERSPECTIVE_PIXEL;
972*61046927SAndroid Build Coastguard Worker break;
973*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_barycentric_centroid:
974*61046927SAndroid Build Coastguard Worker bary = BRW_BARYCENTRIC_PERSPECTIVE_CENTROID;
975*61046927SAndroid Build Coastguard Worker break;
976*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_barycentric_sample:
977*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_barycentric_at_sample:
978*61046927SAndroid Build Coastguard Worker bary = BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE;
979*61046927SAndroid Build Coastguard Worker break;
980*61046927SAndroid Build Coastguard Worker default:
981*61046927SAndroid Build Coastguard Worker unreachable("invalid intrinsic");
982*61046927SAndroid Build Coastguard Worker }
983*61046927SAndroid Build Coastguard Worker
984*61046927SAndroid Build Coastguard Worker if (mode == INTERP_MODE_NOPERSPECTIVE)
985*61046927SAndroid Build Coastguard Worker bary += 3;
986*61046927SAndroid Build Coastguard Worker
987*61046927SAndroid Build Coastguard Worker return (enum brw_barycentric_mode) bary;
988*61046927SAndroid Build Coastguard Worker }
989*61046927SAndroid Build Coastguard Worker
990*61046927SAndroid Build Coastguard Worker /**
991*61046927SAndroid Build Coastguard Worker * Walk backwards from the end of the program looking for a URB write that
992*61046927SAndroid Build Coastguard Worker * isn't in control flow, and mark it with EOT.
993*61046927SAndroid Build Coastguard Worker *
994*61046927SAndroid Build Coastguard Worker * Return true if successful or false if a separate EOT write is needed.
995*61046927SAndroid Build Coastguard Worker */
996*61046927SAndroid Build Coastguard Worker bool
mark_last_urb_write_with_eot()997*61046927SAndroid Build Coastguard Worker fs_visitor::mark_last_urb_write_with_eot()
998*61046927SAndroid Build Coastguard Worker {
999*61046927SAndroid Build Coastguard Worker foreach_in_list_reverse(fs_inst, prev, &this->instructions) {
1000*61046927SAndroid Build Coastguard Worker if (prev->opcode == SHADER_OPCODE_URB_WRITE_LOGICAL) {
1001*61046927SAndroid Build Coastguard Worker prev->eot = true;
1002*61046927SAndroid Build Coastguard Worker
1003*61046927SAndroid Build Coastguard Worker /* Delete now dead instructions. */
1004*61046927SAndroid Build Coastguard Worker foreach_in_list_reverse_safe(exec_node, dead, &this->instructions) {
1005*61046927SAndroid Build Coastguard Worker if (dead == prev)
1006*61046927SAndroid Build Coastguard Worker break;
1007*61046927SAndroid Build Coastguard Worker dead->remove();
1008*61046927SAndroid Build Coastguard Worker }
1009*61046927SAndroid Build Coastguard Worker return true;
1010*61046927SAndroid Build Coastguard Worker } else if (prev->is_control_flow() || prev->has_side_effects()) {
1011*61046927SAndroid Build Coastguard Worker break;
1012*61046927SAndroid Build Coastguard Worker }
1013*61046927SAndroid Build Coastguard Worker }
1014*61046927SAndroid Build Coastguard Worker
1015*61046927SAndroid Build Coastguard Worker return false;
1016*61046927SAndroid Build Coastguard Worker }
1017*61046927SAndroid Build Coastguard Worker
1018*61046927SAndroid Build Coastguard Worker static unsigned
round_components_to_whole_registers(const intel_device_info * devinfo,unsigned c)1019*61046927SAndroid Build Coastguard Worker round_components_to_whole_registers(const intel_device_info *devinfo,
1020*61046927SAndroid Build Coastguard Worker unsigned c)
1021*61046927SAndroid Build Coastguard Worker {
1022*61046927SAndroid Build Coastguard Worker return DIV_ROUND_UP(c, 8 * reg_unit(devinfo)) * reg_unit(devinfo);
1023*61046927SAndroid Build Coastguard Worker }
1024*61046927SAndroid Build Coastguard Worker
1025*61046927SAndroid Build Coastguard Worker void
assign_curb_setup()1026*61046927SAndroid Build Coastguard Worker fs_visitor::assign_curb_setup()
1027*61046927SAndroid Build Coastguard Worker {
1028*61046927SAndroid Build Coastguard Worker unsigned uniform_push_length =
1029*61046927SAndroid Build Coastguard Worker round_components_to_whole_registers(devinfo, prog_data->nr_params);
1030*61046927SAndroid Build Coastguard Worker
1031*61046927SAndroid Build Coastguard Worker unsigned ubo_push_length = 0;
1032*61046927SAndroid Build Coastguard Worker unsigned ubo_push_start[4];
1033*61046927SAndroid Build Coastguard Worker for (int i = 0; i < 4; i++) {
1034*61046927SAndroid Build Coastguard Worker ubo_push_start[i] = 8 * (ubo_push_length + uniform_push_length);
1035*61046927SAndroid Build Coastguard Worker ubo_push_length += prog_data->ubo_ranges[i].length;
1036*61046927SAndroid Build Coastguard Worker
1037*61046927SAndroid Build Coastguard Worker assert(ubo_push_start[i] % (8 * reg_unit(devinfo)) == 0);
1038*61046927SAndroid Build Coastguard Worker assert(ubo_push_length % (1 * reg_unit(devinfo)) == 0);
1039*61046927SAndroid Build Coastguard Worker }
1040*61046927SAndroid Build Coastguard Worker
1041*61046927SAndroid Build Coastguard Worker prog_data->curb_read_length = uniform_push_length + ubo_push_length;
1042*61046927SAndroid Build Coastguard Worker if (stage == MESA_SHADER_FRAGMENT &&
1043*61046927SAndroid Build Coastguard Worker ((struct brw_wm_prog_key *)key)->null_push_constant_tbimr_workaround)
1044*61046927SAndroid Build Coastguard Worker prog_data->curb_read_length = MAX2(1, prog_data->curb_read_length);
1045*61046927SAndroid Build Coastguard Worker
1046*61046927SAndroid Build Coastguard Worker uint64_t used = 0;
1047*61046927SAndroid Build Coastguard Worker bool is_compute = gl_shader_stage_is_compute(stage);
1048*61046927SAndroid Build Coastguard Worker
1049*61046927SAndroid Build Coastguard Worker if (is_compute && brw_cs_prog_data(prog_data)->uses_inline_data) {
1050*61046927SAndroid Build Coastguard Worker /* With COMPUTE_WALKER, we can push up to one register worth of data via
1051*61046927SAndroid Build Coastguard Worker * the inline data parameter in the COMPUTE_WALKER command itself.
1052*61046927SAndroid Build Coastguard Worker *
1053*61046927SAndroid Build Coastguard Worker * TODO: Support inline data and push at the same time.
1054*61046927SAndroid Build Coastguard Worker */
1055*61046927SAndroid Build Coastguard Worker assert(devinfo->verx10 >= 125);
1056*61046927SAndroid Build Coastguard Worker assert(uniform_push_length <= reg_unit(devinfo));
1057*61046927SAndroid Build Coastguard Worker } else if (is_compute && devinfo->verx10 >= 125 && uniform_push_length > 0) {
1058*61046927SAndroid Build Coastguard Worker assert(devinfo->has_lsc);
1059*61046927SAndroid Build Coastguard Worker fs_builder ubld = fs_builder(this, 1).exec_all().at(
1060*61046927SAndroid Build Coastguard Worker cfg->first_block(), cfg->first_block()->start());
1061*61046927SAndroid Build Coastguard Worker
1062*61046927SAndroid Build Coastguard Worker /* The base offset for our push data is passed in as R0.0[31:6]. We have
1063*61046927SAndroid Build Coastguard Worker * to mask off the bottom 6 bits.
1064*61046927SAndroid Build Coastguard Worker */
1065*61046927SAndroid Build Coastguard Worker brw_reg base_addr =
1066*61046927SAndroid Build Coastguard Worker ubld.AND(retype(brw_vec1_grf(0, 0), BRW_TYPE_UD),
1067*61046927SAndroid Build Coastguard Worker brw_imm_ud(INTEL_MASK(31, 6)));
1068*61046927SAndroid Build Coastguard Worker
1069*61046927SAndroid Build Coastguard Worker /* On Gfx12-HP we load constants at the start of the program using A32
1070*61046927SAndroid Build Coastguard Worker * stateless messages.
1071*61046927SAndroid Build Coastguard Worker */
1072*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < uniform_push_length;) {
1073*61046927SAndroid Build Coastguard Worker /* Limit ourselves to LSC HW limit of 8 GRFs (256bytes D32V64). */
1074*61046927SAndroid Build Coastguard Worker unsigned num_regs = MIN2(uniform_push_length - i, 8);
1075*61046927SAndroid Build Coastguard Worker assert(num_regs > 0);
1076*61046927SAndroid Build Coastguard Worker num_regs = 1 << util_logbase2(num_regs);
1077*61046927SAndroid Build Coastguard Worker
1078*61046927SAndroid Build Coastguard Worker /* This pass occurs after all of the optimization passes, so don't
1079*61046927SAndroid Build Coastguard Worker * emit an 'ADD addr, base_addr, 0' instruction.
1080*61046927SAndroid Build Coastguard Worker */
1081*61046927SAndroid Build Coastguard Worker brw_reg addr = i == 0 ? base_addr :
1082*61046927SAndroid Build Coastguard Worker ubld.ADD(base_addr, brw_imm_ud(i * REG_SIZE));
1083*61046927SAndroid Build Coastguard Worker
1084*61046927SAndroid Build Coastguard Worker brw_reg srcs[4] = {
1085*61046927SAndroid Build Coastguard Worker brw_imm_ud(0), /* desc */
1086*61046927SAndroid Build Coastguard Worker brw_imm_ud(0), /* ex_desc */
1087*61046927SAndroid Build Coastguard Worker addr, /* payload */
1088*61046927SAndroid Build Coastguard Worker brw_reg(), /* payload2 */
1089*61046927SAndroid Build Coastguard Worker };
1090*61046927SAndroid Build Coastguard Worker
1091*61046927SAndroid Build Coastguard Worker brw_reg dest = retype(brw_vec8_grf(payload().num_regs + i, 0),
1092*61046927SAndroid Build Coastguard Worker BRW_TYPE_UD);
1093*61046927SAndroid Build Coastguard Worker fs_inst *send = ubld.emit(SHADER_OPCODE_SEND, dest, srcs, 4);
1094*61046927SAndroid Build Coastguard Worker
1095*61046927SAndroid Build Coastguard Worker send->sfid = GFX12_SFID_UGM;
1096*61046927SAndroid Build Coastguard Worker send->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD,
1097*61046927SAndroid Build Coastguard Worker LSC_ADDR_SURFTYPE_FLAT,
1098*61046927SAndroid Build Coastguard Worker LSC_ADDR_SIZE_A32,
1099*61046927SAndroid Build Coastguard Worker LSC_DATA_SIZE_D32,
1100*61046927SAndroid Build Coastguard Worker num_regs * 8 /* num_channels */,
1101*61046927SAndroid Build Coastguard Worker true /* transpose */,
1102*61046927SAndroid Build Coastguard Worker LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS));
1103*61046927SAndroid Build Coastguard Worker send->header_size = 0;
1104*61046927SAndroid Build Coastguard Worker send->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, 1);
1105*61046927SAndroid Build Coastguard Worker send->size_written =
1106*61046927SAndroid Build Coastguard Worker lsc_msg_dest_len(devinfo, LSC_DATA_SIZE_D32, num_regs * 8) * REG_SIZE;
1107*61046927SAndroid Build Coastguard Worker send->send_is_volatile = true;
1108*61046927SAndroid Build Coastguard Worker
1109*61046927SAndroid Build Coastguard Worker i += num_regs;
1110*61046927SAndroid Build Coastguard Worker }
1111*61046927SAndroid Build Coastguard Worker
1112*61046927SAndroid Build Coastguard Worker invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
1113*61046927SAndroid Build Coastguard Worker }
1114*61046927SAndroid Build Coastguard Worker
1115*61046927SAndroid Build Coastguard Worker /* Map the offsets in the UNIFORM file to fixed HW regs. */
1116*61046927SAndroid Build Coastguard Worker foreach_block_and_inst(block, fs_inst, inst, cfg) {
1117*61046927SAndroid Build Coastguard Worker for (unsigned int i = 0; i < inst->sources; i++) {
1118*61046927SAndroid Build Coastguard Worker if (inst->src[i].file == UNIFORM) {
1119*61046927SAndroid Build Coastguard Worker int uniform_nr = inst->src[i].nr + inst->src[i].offset / 4;
1120*61046927SAndroid Build Coastguard Worker int constant_nr;
1121*61046927SAndroid Build Coastguard Worker if (inst->src[i].nr >= UBO_START) {
1122*61046927SAndroid Build Coastguard Worker /* constant_nr is in 32-bit units, the rest are in bytes */
1123*61046927SAndroid Build Coastguard Worker constant_nr = ubo_push_start[inst->src[i].nr - UBO_START] +
1124*61046927SAndroid Build Coastguard Worker inst->src[i].offset / 4;
1125*61046927SAndroid Build Coastguard Worker } else if (uniform_nr >= 0 && uniform_nr < (int) uniforms) {
1126*61046927SAndroid Build Coastguard Worker constant_nr = push_constant_loc[uniform_nr];
1127*61046927SAndroid Build Coastguard Worker } else {
1128*61046927SAndroid Build Coastguard Worker /* Section 5.11 of the OpenGL 4.1 spec says:
1129*61046927SAndroid Build Coastguard Worker * "Out-of-bounds reads return undefined values, which include
1130*61046927SAndroid Build Coastguard Worker * values from other variables of the active program or zero."
1131*61046927SAndroid Build Coastguard Worker * Just return the first push constant.
1132*61046927SAndroid Build Coastguard Worker */
1133*61046927SAndroid Build Coastguard Worker constant_nr = 0;
1134*61046927SAndroid Build Coastguard Worker }
1135*61046927SAndroid Build Coastguard Worker
1136*61046927SAndroid Build Coastguard Worker assert(constant_nr / 8 < 64);
1137*61046927SAndroid Build Coastguard Worker used |= BITFIELD64_BIT(constant_nr / 8);
1138*61046927SAndroid Build Coastguard Worker
1139*61046927SAndroid Build Coastguard Worker struct brw_reg brw_reg = brw_vec1_grf(payload().num_regs +
1140*61046927SAndroid Build Coastguard Worker constant_nr / 8,
1141*61046927SAndroid Build Coastguard Worker constant_nr % 8);
1142*61046927SAndroid Build Coastguard Worker brw_reg.abs = inst->src[i].abs;
1143*61046927SAndroid Build Coastguard Worker brw_reg.negate = inst->src[i].negate;
1144*61046927SAndroid Build Coastguard Worker
1145*61046927SAndroid Build Coastguard Worker assert(inst->src[i].stride == 0);
1146*61046927SAndroid Build Coastguard Worker inst->src[i] = byte_offset(
1147*61046927SAndroid Build Coastguard Worker retype(brw_reg, inst->src[i].type),
1148*61046927SAndroid Build Coastguard Worker inst->src[i].offset % 4);
1149*61046927SAndroid Build Coastguard Worker }
1150*61046927SAndroid Build Coastguard Worker }
1151*61046927SAndroid Build Coastguard Worker }
1152*61046927SAndroid Build Coastguard Worker
1153*61046927SAndroid Build Coastguard Worker uint64_t want_zero = used & prog_data->zero_push_reg;
1154*61046927SAndroid Build Coastguard Worker if (want_zero) {
1155*61046927SAndroid Build Coastguard Worker fs_builder ubld = fs_builder(this, 8).exec_all().at(
1156*61046927SAndroid Build Coastguard Worker cfg->first_block(), cfg->first_block()->start());
1157*61046927SAndroid Build Coastguard Worker
1158*61046927SAndroid Build Coastguard Worker /* push_reg_mask_param is in 32-bit units */
1159*61046927SAndroid Build Coastguard Worker unsigned mask_param = prog_data->push_reg_mask_param;
1160*61046927SAndroid Build Coastguard Worker struct brw_reg mask = brw_vec1_grf(payload().num_regs + mask_param / 8,
1161*61046927SAndroid Build Coastguard Worker mask_param % 8);
1162*61046927SAndroid Build Coastguard Worker
1163*61046927SAndroid Build Coastguard Worker brw_reg b32;
1164*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < 64; i++) {
1165*61046927SAndroid Build Coastguard Worker if (i % 16 == 0 && (want_zero & BITFIELD64_RANGE(i, 16))) {
1166*61046927SAndroid Build Coastguard Worker brw_reg shifted = ubld.vgrf(BRW_TYPE_W, 2);
1167*61046927SAndroid Build Coastguard Worker ubld.SHL(horiz_offset(shifted, 8),
1168*61046927SAndroid Build Coastguard Worker byte_offset(retype(mask, BRW_TYPE_W), i / 8),
1169*61046927SAndroid Build Coastguard Worker brw_imm_v(0x01234567));
1170*61046927SAndroid Build Coastguard Worker ubld.SHL(shifted, horiz_offset(shifted, 8), brw_imm_w(8));
1171*61046927SAndroid Build Coastguard Worker
1172*61046927SAndroid Build Coastguard Worker fs_builder ubld16 = ubld.group(16, 0);
1173*61046927SAndroid Build Coastguard Worker b32 = ubld16.vgrf(BRW_TYPE_D);
1174*61046927SAndroid Build Coastguard Worker ubld16.group(16, 0).ASR(b32, shifted, brw_imm_w(15));
1175*61046927SAndroid Build Coastguard Worker }
1176*61046927SAndroid Build Coastguard Worker
1177*61046927SAndroid Build Coastguard Worker if (want_zero & BITFIELD64_BIT(i)) {
1178*61046927SAndroid Build Coastguard Worker assert(i < prog_data->curb_read_length);
1179*61046927SAndroid Build Coastguard Worker struct brw_reg push_reg =
1180*61046927SAndroid Build Coastguard Worker retype(brw_vec8_grf(payload().num_regs + i, 0), BRW_TYPE_D);
1181*61046927SAndroid Build Coastguard Worker
1182*61046927SAndroid Build Coastguard Worker ubld.AND(push_reg, push_reg, component(b32, i % 16));
1183*61046927SAndroid Build Coastguard Worker }
1184*61046927SAndroid Build Coastguard Worker }
1185*61046927SAndroid Build Coastguard Worker
1186*61046927SAndroid Build Coastguard Worker invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
1187*61046927SAndroid Build Coastguard Worker }
1188*61046927SAndroid Build Coastguard Worker
1189*61046927SAndroid Build Coastguard Worker /* This may be updated in assign_urb_setup or assign_vs_urb_setup. */
1190*61046927SAndroid Build Coastguard Worker this->first_non_payload_grf = payload().num_regs + prog_data->curb_read_length;
1191*61046927SAndroid Build Coastguard Worker }
1192*61046927SAndroid Build Coastguard Worker
1193*61046927SAndroid Build Coastguard Worker /*
1194*61046927SAndroid Build Coastguard Worker * Build up an array of indices into the urb_setup array that
1195*61046927SAndroid Build Coastguard Worker * references the active entries of the urb_setup array.
1196*61046927SAndroid Build Coastguard Worker * Used to accelerate walking the active entries of the urb_setup array
1197*61046927SAndroid Build Coastguard Worker * on each upload.
1198*61046927SAndroid Build Coastguard Worker */
1199*61046927SAndroid Build Coastguard Worker void
brw_compute_urb_setup_index(struct brw_wm_prog_data * wm_prog_data)1200*61046927SAndroid Build Coastguard Worker brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data)
1201*61046927SAndroid Build Coastguard Worker {
1202*61046927SAndroid Build Coastguard Worker /* TODO(mesh): Review usage of this in the context of Mesh, we may want to
1203*61046927SAndroid Build Coastguard Worker * skip per-primitive attributes here.
1204*61046927SAndroid Build Coastguard Worker */
1205*61046927SAndroid Build Coastguard Worker
1206*61046927SAndroid Build Coastguard Worker /* Make sure uint8_t is sufficient */
1207*61046927SAndroid Build Coastguard Worker STATIC_ASSERT(VARYING_SLOT_MAX <= 0xff);
1208*61046927SAndroid Build Coastguard Worker uint8_t index = 0;
1209*61046927SAndroid Build Coastguard Worker for (uint8_t attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1210*61046927SAndroid Build Coastguard Worker if (wm_prog_data->urb_setup[attr] >= 0) {
1211*61046927SAndroid Build Coastguard Worker wm_prog_data->urb_setup_attribs[index++] = attr;
1212*61046927SAndroid Build Coastguard Worker }
1213*61046927SAndroid Build Coastguard Worker }
1214*61046927SAndroid Build Coastguard Worker wm_prog_data->urb_setup_attribs_count = index;
1215*61046927SAndroid Build Coastguard Worker }
1216*61046927SAndroid Build Coastguard Worker
1217*61046927SAndroid Build Coastguard Worker void
convert_attr_sources_to_hw_regs(fs_inst * inst)1218*61046927SAndroid Build Coastguard Worker fs_visitor::convert_attr_sources_to_hw_regs(fs_inst *inst)
1219*61046927SAndroid Build Coastguard Worker {
1220*61046927SAndroid Build Coastguard Worker for (int i = 0; i < inst->sources; i++) {
1221*61046927SAndroid Build Coastguard Worker if (inst->src[i].file == ATTR) {
1222*61046927SAndroid Build Coastguard Worker assert(inst->src[i].nr == 0);
1223*61046927SAndroid Build Coastguard Worker int grf = payload().num_regs +
1224*61046927SAndroid Build Coastguard Worker prog_data->curb_read_length +
1225*61046927SAndroid Build Coastguard Worker inst->src[i].offset / REG_SIZE;
1226*61046927SAndroid Build Coastguard Worker
1227*61046927SAndroid Build Coastguard Worker /* As explained at brw_reg_from_fs_reg, From the Haswell PRM:
1228*61046927SAndroid Build Coastguard Worker *
1229*61046927SAndroid Build Coastguard Worker * VertStride must be used to cross GRF register boundaries. This
1230*61046927SAndroid Build Coastguard Worker * rule implies that elements within a 'Width' cannot cross GRF
1231*61046927SAndroid Build Coastguard Worker * boundaries.
1232*61046927SAndroid Build Coastguard Worker *
1233*61046927SAndroid Build Coastguard Worker * So, for registers that are large enough, we have to split the exec
1234*61046927SAndroid Build Coastguard Worker * size in two and trust the compression state to sort it out.
1235*61046927SAndroid Build Coastguard Worker */
1236*61046927SAndroid Build Coastguard Worker unsigned total_size = inst->exec_size *
1237*61046927SAndroid Build Coastguard Worker inst->src[i].stride *
1238*61046927SAndroid Build Coastguard Worker brw_type_size_bytes(inst->src[i].type);
1239*61046927SAndroid Build Coastguard Worker
1240*61046927SAndroid Build Coastguard Worker assert(total_size <= 2 * REG_SIZE);
1241*61046927SAndroid Build Coastguard Worker const unsigned exec_size =
1242*61046927SAndroid Build Coastguard Worker (total_size <= REG_SIZE) ? inst->exec_size : inst->exec_size / 2;
1243*61046927SAndroid Build Coastguard Worker
1244*61046927SAndroid Build Coastguard Worker unsigned width = inst->src[i].stride == 0 ? 1 : exec_size;
1245*61046927SAndroid Build Coastguard Worker struct brw_reg reg =
1246*61046927SAndroid Build Coastguard Worker stride(byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type),
1247*61046927SAndroid Build Coastguard Worker inst->src[i].offset % REG_SIZE),
1248*61046927SAndroid Build Coastguard Worker exec_size * inst->src[i].stride,
1249*61046927SAndroid Build Coastguard Worker width, inst->src[i].stride);
1250*61046927SAndroid Build Coastguard Worker reg.abs = inst->src[i].abs;
1251*61046927SAndroid Build Coastguard Worker reg.negate = inst->src[i].negate;
1252*61046927SAndroid Build Coastguard Worker
1253*61046927SAndroid Build Coastguard Worker inst->src[i] = reg;
1254*61046927SAndroid Build Coastguard Worker }
1255*61046927SAndroid Build Coastguard Worker }
1256*61046927SAndroid Build Coastguard Worker }
1257*61046927SAndroid Build Coastguard Worker
1258*61046927SAndroid Build Coastguard Worker int
brw_get_subgroup_id_param_index(const intel_device_info * devinfo,const brw_stage_prog_data * prog_data)1259*61046927SAndroid Build Coastguard Worker brw_get_subgroup_id_param_index(const intel_device_info *devinfo,
1260*61046927SAndroid Build Coastguard Worker const brw_stage_prog_data *prog_data)
1261*61046927SAndroid Build Coastguard Worker {
1262*61046927SAndroid Build Coastguard Worker if (prog_data->nr_params == 0)
1263*61046927SAndroid Build Coastguard Worker return -1;
1264*61046927SAndroid Build Coastguard Worker
1265*61046927SAndroid Build Coastguard Worker if (devinfo->verx10 >= 125)
1266*61046927SAndroid Build Coastguard Worker return -1;
1267*61046927SAndroid Build Coastguard Worker
1268*61046927SAndroid Build Coastguard Worker /* The local thread id is always the last parameter in the list */
1269*61046927SAndroid Build Coastguard Worker uint32_t last_param = prog_data->param[prog_data->nr_params - 1];
1270*61046927SAndroid Build Coastguard Worker if (last_param == BRW_PARAM_BUILTIN_SUBGROUP_ID)
1271*61046927SAndroid Build Coastguard Worker return prog_data->nr_params - 1;
1272*61046927SAndroid Build Coastguard Worker
1273*61046927SAndroid Build Coastguard Worker return -1;
1274*61046927SAndroid Build Coastguard Worker }
1275*61046927SAndroid Build Coastguard Worker
1276*61046927SAndroid Build Coastguard Worker /**
1277*61046927SAndroid Build Coastguard Worker * Assign UNIFORM file registers to either push constants or pull constants.
1278*61046927SAndroid Build Coastguard Worker *
1279*61046927SAndroid Build Coastguard Worker * We allow a fragment shader to have more than the specified minimum
1280*61046927SAndroid Build Coastguard Worker * maximum number of fragment shader uniform components (64). If
1281*61046927SAndroid Build Coastguard Worker * there are too many of these, they'd fill up all of register space.
1282*61046927SAndroid Build Coastguard Worker * So, this will push some of them out to the pull constant buffer and
1283*61046927SAndroid Build Coastguard Worker * update the program to load them.
1284*61046927SAndroid Build Coastguard Worker */
1285*61046927SAndroid Build Coastguard Worker void
assign_constant_locations()1286*61046927SAndroid Build Coastguard Worker fs_visitor::assign_constant_locations()
1287*61046927SAndroid Build Coastguard Worker {
1288*61046927SAndroid Build Coastguard Worker /* Only the first compile gets to decide on locations. */
1289*61046927SAndroid Build Coastguard Worker if (push_constant_loc)
1290*61046927SAndroid Build Coastguard Worker return;
1291*61046927SAndroid Build Coastguard Worker
1292*61046927SAndroid Build Coastguard Worker push_constant_loc = ralloc_array(mem_ctx, int, uniforms);
1293*61046927SAndroid Build Coastguard Worker for (unsigned u = 0; u < uniforms; u++)
1294*61046927SAndroid Build Coastguard Worker push_constant_loc[u] = u;
1295*61046927SAndroid Build Coastguard Worker
1296*61046927SAndroid Build Coastguard Worker /* Now that we know how many regular uniforms we'll push, reduce the
1297*61046927SAndroid Build Coastguard Worker * UBO push ranges so we don't exceed the 3DSTATE_CONSTANT limits.
1298*61046927SAndroid Build Coastguard Worker *
1299*61046927SAndroid Build Coastguard Worker * If changing this value, note the limitation about total_regs in
1300*61046927SAndroid Build Coastguard Worker * brw_curbe.c/crocus_state.c
1301*61046927SAndroid Build Coastguard Worker */
1302*61046927SAndroid Build Coastguard Worker const unsigned max_push_length = 64;
1303*61046927SAndroid Build Coastguard Worker unsigned push_length =
1304*61046927SAndroid Build Coastguard Worker round_components_to_whole_registers(devinfo, prog_data->nr_params);
1305*61046927SAndroid Build Coastguard Worker for (int i = 0; i < 4; i++) {
1306*61046927SAndroid Build Coastguard Worker struct brw_ubo_range *range = &prog_data->ubo_ranges[i];
1307*61046927SAndroid Build Coastguard Worker
1308*61046927SAndroid Build Coastguard Worker if (push_length + range->length > max_push_length)
1309*61046927SAndroid Build Coastguard Worker range->length = max_push_length - push_length;
1310*61046927SAndroid Build Coastguard Worker
1311*61046927SAndroid Build Coastguard Worker push_length += range->length;
1312*61046927SAndroid Build Coastguard Worker
1313*61046927SAndroid Build Coastguard Worker assert(push_length % (1 * reg_unit(devinfo)) == 0);
1314*61046927SAndroid Build Coastguard Worker
1315*61046927SAndroid Build Coastguard Worker }
1316*61046927SAndroid Build Coastguard Worker assert(push_length <= max_push_length);
1317*61046927SAndroid Build Coastguard Worker }
1318*61046927SAndroid Build Coastguard Worker
1319*61046927SAndroid Build Coastguard Worker bool
get_pull_locs(const brw_reg & src,unsigned * out_surf_index,unsigned * out_pull_index)1320*61046927SAndroid Build Coastguard Worker fs_visitor::get_pull_locs(const brw_reg &src,
1321*61046927SAndroid Build Coastguard Worker unsigned *out_surf_index,
1322*61046927SAndroid Build Coastguard Worker unsigned *out_pull_index)
1323*61046927SAndroid Build Coastguard Worker {
1324*61046927SAndroid Build Coastguard Worker assert(src.file == UNIFORM);
1325*61046927SAndroid Build Coastguard Worker
1326*61046927SAndroid Build Coastguard Worker if (src.nr < UBO_START)
1327*61046927SAndroid Build Coastguard Worker return false;
1328*61046927SAndroid Build Coastguard Worker
1329*61046927SAndroid Build Coastguard Worker const struct brw_ubo_range *range =
1330*61046927SAndroid Build Coastguard Worker &prog_data->ubo_ranges[src.nr - UBO_START];
1331*61046927SAndroid Build Coastguard Worker
1332*61046927SAndroid Build Coastguard Worker /* If this access is in our (reduced) range, use the push data. */
1333*61046927SAndroid Build Coastguard Worker if (src.offset / 32 < range->length)
1334*61046927SAndroid Build Coastguard Worker return false;
1335*61046927SAndroid Build Coastguard Worker
1336*61046927SAndroid Build Coastguard Worker *out_surf_index = range->block;
1337*61046927SAndroid Build Coastguard Worker *out_pull_index = (32 * range->start + src.offset) / 4;
1338*61046927SAndroid Build Coastguard Worker
1339*61046927SAndroid Build Coastguard Worker prog_data->has_ubo_pull = true;
1340*61046927SAndroid Build Coastguard Worker
1341*61046927SAndroid Build Coastguard Worker return true;
1342*61046927SAndroid Build Coastguard Worker }
1343*61046927SAndroid Build Coastguard Worker
1344*61046927SAndroid Build Coastguard Worker /**
1345*61046927SAndroid Build Coastguard Worker * Get the mask of SIMD channels enabled during dispatch and not yet disabled
1346*61046927SAndroid Build Coastguard Worker * by discard. Due to the layout of the sample mask in the fragment shader
1347*61046927SAndroid Build Coastguard Worker * thread payload, \p bld is required to have a dispatch_width() not greater
1348*61046927SAndroid Build Coastguard Worker * than 16 for fragment shaders.
1349*61046927SAndroid Build Coastguard Worker */
1350*61046927SAndroid Build Coastguard Worker brw_reg
brw_sample_mask_reg(const fs_builder & bld)1351*61046927SAndroid Build Coastguard Worker brw_sample_mask_reg(const fs_builder &bld)
1352*61046927SAndroid Build Coastguard Worker {
1353*61046927SAndroid Build Coastguard Worker const fs_visitor &s = *bld.shader;
1354*61046927SAndroid Build Coastguard Worker
1355*61046927SAndroid Build Coastguard Worker if (s.stage != MESA_SHADER_FRAGMENT) {
1356*61046927SAndroid Build Coastguard Worker return brw_imm_ud(0xffffffff);
1357*61046927SAndroid Build Coastguard Worker } else if (s.devinfo->ver >= 20 ||
1358*61046927SAndroid Build Coastguard Worker brw_wm_prog_data(s.prog_data)->uses_kill) {
1359*61046927SAndroid Build Coastguard Worker return brw_flag_subreg(sample_mask_flag_subreg(s) + bld.group() / 16);
1360*61046927SAndroid Build Coastguard Worker } else {
1361*61046927SAndroid Build Coastguard Worker assert(bld.dispatch_width() <= 16);
1362*61046927SAndroid Build Coastguard Worker assert(s.devinfo->ver < 20);
1363*61046927SAndroid Build Coastguard Worker return retype(brw_vec1_grf((bld.group() >= 16 ? 2 : 1), 7),
1364*61046927SAndroid Build Coastguard Worker BRW_TYPE_UW);
1365*61046927SAndroid Build Coastguard Worker }
1366*61046927SAndroid Build Coastguard Worker }
1367*61046927SAndroid Build Coastguard Worker
1368*61046927SAndroid Build Coastguard Worker uint32_t
brw_fb_write_msg_control(const fs_inst * inst,const struct brw_wm_prog_data * prog_data)1369*61046927SAndroid Build Coastguard Worker brw_fb_write_msg_control(const fs_inst *inst,
1370*61046927SAndroid Build Coastguard Worker const struct brw_wm_prog_data *prog_data)
1371*61046927SAndroid Build Coastguard Worker {
1372*61046927SAndroid Build Coastguard Worker uint32_t mctl;
1373*61046927SAndroid Build Coastguard Worker
1374*61046927SAndroid Build Coastguard Worker if (prog_data->dual_src_blend) {
1375*61046927SAndroid Build Coastguard Worker assert(inst->exec_size < 32);
1376*61046927SAndroid Build Coastguard Worker
1377*61046927SAndroid Build Coastguard Worker if (inst->group % 16 == 0)
1378*61046927SAndroid Build Coastguard Worker mctl = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
1379*61046927SAndroid Build Coastguard Worker else if (inst->group % 16 == 8)
1380*61046927SAndroid Build Coastguard Worker mctl = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23;
1381*61046927SAndroid Build Coastguard Worker else
1382*61046927SAndroid Build Coastguard Worker unreachable("Invalid dual-source FB write instruction group");
1383*61046927SAndroid Build Coastguard Worker } else {
1384*61046927SAndroid Build Coastguard Worker assert(inst->group == 0 || (inst->group == 16 && inst->exec_size == 16));
1385*61046927SAndroid Build Coastguard Worker
1386*61046927SAndroid Build Coastguard Worker if (inst->exec_size == 16)
1387*61046927SAndroid Build Coastguard Worker mctl = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
1388*61046927SAndroid Build Coastguard Worker else if (inst->exec_size == 8)
1389*61046927SAndroid Build Coastguard Worker mctl = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
1390*61046927SAndroid Build Coastguard Worker else if (inst->exec_size == 32)
1391*61046927SAndroid Build Coastguard Worker mctl = XE2_DATAPORT_RENDER_TARGET_WRITE_SIMD32_SINGLE_SOURCE;
1392*61046927SAndroid Build Coastguard Worker else
1393*61046927SAndroid Build Coastguard Worker unreachable("Invalid FB write execution size");
1394*61046927SAndroid Build Coastguard Worker }
1395*61046927SAndroid Build Coastguard Worker
1396*61046927SAndroid Build Coastguard Worker return mctl;
1397*61046927SAndroid Build Coastguard Worker }
1398*61046927SAndroid Build Coastguard Worker
1399*61046927SAndroid Build Coastguard Worker /**
1400*61046927SAndroid Build Coastguard Worker * Predicate the specified instruction on the sample mask.
1401*61046927SAndroid Build Coastguard Worker */
1402*61046927SAndroid Build Coastguard Worker void
brw_emit_predicate_on_sample_mask(const fs_builder & bld,fs_inst * inst)1403*61046927SAndroid Build Coastguard Worker brw_emit_predicate_on_sample_mask(const fs_builder &bld, fs_inst *inst)
1404*61046927SAndroid Build Coastguard Worker {
1405*61046927SAndroid Build Coastguard Worker assert(bld.shader->stage == MESA_SHADER_FRAGMENT &&
1406*61046927SAndroid Build Coastguard Worker bld.group() == inst->group &&
1407*61046927SAndroid Build Coastguard Worker bld.dispatch_width() == inst->exec_size);
1408*61046927SAndroid Build Coastguard Worker
1409*61046927SAndroid Build Coastguard Worker const fs_visitor &s = *bld.shader;
1410*61046927SAndroid Build Coastguard Worker const brw_reg sample_mask = brw_sample_mask_reg(bld);
1411*61046927SAndroid Build Coastguard Worker const unsigned subreg = sample_mask_flag_subreg(s);
1412*61046927SAndroid Build Coastguard Worker
1413*61046927SAndroid Build Coastguard Worker if (s.devinfo->ver >= 20 || brw_wm_prog_data(s.prog_data)->uses_kill) {
1414*61046927SAndroid Build Coastguard Worker assert(sample_mask.file == ARF &&
1415*61046927SAndroid Build Coastguard Worker sample_mask.nr == brw_flag_subreg(subreg).nr &&
1416*61046927SAndroid Build Coastguard Worker sample_mask.subnr == brw_flag_subreg(
1417*61046927SAndroid Build Coastguard Worker subreg + inst->group / 16).subnr);
1418*61046927SAndroid Build Coastguard Worker } else {
1419*61046927SAndroid Build Coastguard Worker bld.group(1, 0).exec_all()
1420*61046927SAndroid Build Coastguard Worker .MOV(brw_flag_subreg(subreg + inst->group / 16), sample_mask);
1421*61046927SAndroid Build Coastguard Worker }
1422*61046927SAndroid Build Coastguard Worker
1423*61046927SAndroid Build Coastguard Worker if (inst->predicate) {
1424*61046927SAndroid Build Coastguard Worker assert(inst->predicate == BRW_PREDICATE_NORMAL);
1425*61046927SAndroid Build Coastguard Worker assert(!inst->predicate_inverse);
1426*61046927SAndroid Build Coastguard Worker assert(inst->flag_subreg == 0);
1427*61046927SAndroid Build Coastguard Worker assert(s.devinfo->ver < 20);
1428*61046927SAndroid Build Coastguard Worker /* Combine the sample mask with the existing predicate by using a
1429*61046927SAndroid Build Coastguard Worker * vertical predication mode.
1430*61046927SAndroid Build Coastguard Worker */
1431*61046927SAndroid Build Coastguard Worker inst->predicate = BRW_PREDICATE_ALIGN1_ALLV;
1432*61046927SAndroid Build Coastguard Worker } else {
1433*61046927SAndroid Build Coastguard Worker inst->flag_subreg = subreg;
1434*61046927SAndroid Build Coastguard Worker inst->predicate = BRW_PREDICATE_NORMAL;
1435*61046927SAndroid Build Coastguard Worker inst->predicate_inverse = false;
1436*61046927SAndroid Build Coastguard Worker }
1437*61046927SAndroid Build Coastguard Worker }
1438*61046927SAndroid Build Coastguard Worker
register_pressure(const fs_visitor * v)1439*61046927SAndroid Build Coastguard Worker brw::register_pressure::register_pressure(const fs_visitor *v)
1440*61046927SAndroid Build Coastguard Worker {
1441*61046927SAndroid Build Coastguard Worker const fs_live_variables &live = v->live_analysis.require();
1442*61046927SAndroid Build Coastguard Worker const unsigned num_instructions = v->cfg->num_blocks ?
1443*61046927SAndroid Build Coastguard Worker v->cfg->blocks[v->cfg->num_blocks - 1]->end_ip + 1 : 0;
1444*61046927SAndroid Build Coastguard Worker
1445*61046927SAndroid Build Coastguard Worker regs_live_at_ip = new unsigned[num_instructions]();
1446*61046927SAndroid Build Coastguard Worker
1447*61046927SAndroid Build Coastguard Worker for (unsigned reg = 0; reg < v->alloc.count; reg++) {
1448*61046927SAndroid Build Coastguard Worker for (int ip = live.vgrf_start[reg]; ip <= live.vgrf_end[reg]; ip++)
1449*61046927SAndroid Build Coastguard Worker regs_live_at_ip[ip] += v->alloc.sizes[reg];
1450*61046927SAndroid Build Coastguard Worker }
1451*61046927SAndroid Build Coastguard Worker
1452*61046927SAndroid Build Coastguard Worker const unsigned payload_count = v->first_non_payload_grf;
1453*61046927SAndroid Build Coastguard Worker
1454*61046927SAndroid Build Coastguard Worker int *payload_last_use_ip = new int[payload_count];
1455*61046927SAndroid Build Coastguard Worker v->calculate_payload_ranges(true, payload_count, payload_last_use_ip);
1456*61046927SAndroid Build Coastguard Worker
1457*61046927SAndroid Build Coastguard Worker for (unsigned reg = 0; reg < payload_count; reg++) {
1458*61046927SAndroid Build Coastguard Worker for (int ip = 0; ip < payload_last_use_ip[reg]; ip++)
1459*61046927SAndroid Build Coastguard Worker ++regs_live_at_ip[ip];
1460*61046927SAndroid Build Coastguard Worker }
1461*61046927SAndroid Build Coastguard Worker
1462*61046927SAndroid Build Coastguard Worker delete[] payload_last_use_ip;
1463*61046927SAndroid Build Coastguard Worker }
1464*61046927SAndroid Build Coastguard Worker
~register_pressure()1465*61046927SAndroid Build Coastguard Worker brw::register_pressure::~register_pressure()
1466*61046927SAndroid Build Coastguard Worker {
1467*61046927SAndroid Build Coastguard Worker delete[] regs_live_at_ip;
1468*61046927SAndroid Build Coastguard Worker }
1469*61046927SAndroid Build Coastguard Worker
1470*61046927SAndroid Build Coastguard Worker void
invalidate_analysis(brw::analysis_dependency_class c)1471*61046927SAndroid Build Coastguard Worker fs_visitor::invalidate_analysis(brw::analysis_dependency_class c)
1472*61046927SAndroid Build Coastguard Worker {
1473*61046927SAndroid Build Coastguard Worker live_analysis.invalidate(c);
1474*61046927SAndroid Build Coastguard Worker regpressure_analysis.invalidate(c);
1475*61046927SAndroid Build Coastguard Worker idom_analysis.invalidate(c);
1476*61046927SAndroid Build Coastguard Worker def_analysis.invalidate(c);
1477*61046927SAndroid Build Coastguard Worker }
1478*61046927SAndroid Build Coastguard Worker
1479*61046927SAndroid Build Coastguard Worker void
debug_optimizer(const nir_shader * nir,const char * pass_name,int iteration,int pass_num) const1480*61046927SAndroid Build Coastguard Worker fs_visitor::debug_optimizer(const nir_shader *nir,
1481*61046927SAndroid Build Coastguard Worker const char *pass_name,
1482*61046927SAndroid Build Coastguard Worker int iteration, int pass_num) const
1483*61046927SAndroid Build Coastguard Worker {
1484*61046927SAndroid Build Coastguard Worker if (!brw_should_print_shader(nir, DEBUG_OPTIMIZER))
1485*61046927SAndroid Build Coastguard Worker return;
1486*61046927SAndroid Build Coastguard Worker
1487*61046927SAndroid Build Coastguard Worker char *filename;
1488*61046927SAndroid Build Coastguard Worker int ret = asprintf(&filename, "%s/%s%d-%s-%02d-%02d-%s",
1489*61046927SAndroid Build Coastguard Worker debug_get_option("INTEL_SHADER_OPTIMIZER_PATH", "./"),
1490*61046927SAndroid Build Coastguard Worker _mesa_shader_stage_to_abbrev(stage), dispatch_width, nir->info.name,
1491*61046927SAndroid Build Coastguard Worker iteration, pass_num, pass_name);
1492*61046927SAndroid Build Coastguard Worker if (ret == -1)
1493*61046927SAndroid Build Coastguard Worker return;
1494*61046927SAndroid Build Coastguard Worker brw_print_instructions(*this, filename);
1495*61046927SAndroid Build Coastguard Worker free(filename);
1496*61046927SAndroid Build Coastguard Worker }
1497*61046927SAndroid Build Coastguard Worker
1498*61046927SAndroid Build Coastguard Worker static uint32_t
brw_compute_max_register_pressure(fs_visitor & s)1499*61046927SAndroid Build Coastguard Worker brw_compute_max_register_pressure(fs_visitor &s)
1500*61046927SAndroid Build Coastguard Worker {
1501*61046927SAndroid Build Coastguard Worker const register_pressure &rp = s.regpressure_analysis.require();
1502*61046927SAndroid Build Coastguard Worker uint32_t ip = 0, max_pressure = 0;
1503*61046927SAndroid Build Coastguard Worker foreach_block_and_inst(block, fs_inst, inst, s.cfg) {
1504*61046927SAndroid Build Coastguard Worker max_pressure = MAX2(max_pressure, rp.regs_live_at_ip[ip]);
1505*61046927SAndroid Build Coastguard Worker ip++;
1506*61046927SAndroid Build Coastguard Worker }
1507*61046927SAndroid Build Coastguard Worker return max_pressure;
1508*61046927SAndroid Build Coastguard Worker }
1509*61046927SAndroid Build Coastguard Worker
1510*61046927SAndroid Build Coastguard Worker static fs_inst **
save_instruction_order(const struct cfg_t * cfg)1511*61046927SAndroid Build Coastguard Worker save_instruction_order(const struct cfg_t *cfg)
1512*61046927SAndroid Build Coastguard Worker {
1513*61046927SAndroid Build Coastguard Worker /* Before we schedule anything, stash off the instruction order as an array
1514*61046927SAndroid Build Coastguard Worker * of fs_inst *. This way, we can reset it between scheduling passes to
1515*61046927SAndroid Build Coastguard Worker * prevent dependencies between the different scheduling modes.
1516*61046927SAndroid Build Coastguard Worker */
1517*61046927SAndroid Build Coastguard Worker int num_insts = cfg->last_block()->end_ip + 1;
1518*61046927SAndroid Build Coastguard Worker fs_inst **inst_arr = new fs_inst * [num_insts];
1519*61046927SAndroid Build Coastguard Worker
1520*61046927SAndroid Build Coastguard Worker int ip = 0;
1521*61046927SAndroid Build Coastguard Worker foreach_block_and_inst(block, fs_inst, inst, cfg) {
1522*61046927SAndroid Build Coastguard Worker assert(ip >= block->start_ip && ip <= block->end_ip);
1523*61046927SAndroid Build Coastguard Worker inst_arr[ip++] = inst;
1524*61046927SAndroid Build Coastguard Worker }
1525*61046927SAndroid Build Coastguard Worker assert(ip == num_insts);
1526*61046927SAndroid Build Coastguard Worker
1527*61046927SAndroid Build Coastguard Worker return inst_arr;
1528*61046927SAndroid Build Coastguard Worker }
1529*61046927SAndroid Build Coastguard Worker
1530*61046927SAndroid Build Coastguard Worker static void
restore_instruction_order(struct cfg_t * cfg,fs_inst ** inst_arr)1531*61046927SAndroid Build Coastguard Worker restore_instruction_order(struct cfg_t *cfg, fs_inst **inst_arr)
1532*61046927SAndroid Build Coastguard Worker {
1533*61046927SAndroid Build Coastguard Worker ASSERTED int num_insts = cfg->last_block()->end_ip + 1;
1534*61046927SAndroid Build Coastguard Worker
1535*61046927SAndroid Build Coastguard Worker int ip = 0;
1536*61046927SAndroid Build Coastguard Worker foreach_block (block, cfg) {
1537*61046927SAndroid Build Coastguard Worker block->instructions.make_empty();
1538*61046927SAndroid Build Coastguard Worker
1539*61046927SAndroid Build Coastguard Worker assert(ip == block->start_ip);
1540*61046927SAndroid Build Coastguard Worker for (; ip <= block->end_ip; ip++)
1541*61046927SAndroid Build Coastguard Worker block->instructions.push_tail(inst_arr[ip]);
1542*61046927SAndroid Build Coastguard Worker }
1543*61046927SAndroid Build Coastguard Worker assert(ip == num_insts);
1544*61046927SAndroid Build Coastguard Worker }
1545*61046927SAndroid Build Coastguard Worker
1546*61046927SAndroid Build Coastguard Worker /* Per-thread scratch space is a power-of-two multiple of 1KB. */
1547*61046927SAndroid Build Coastguard Worker static inline unsigned
brw_get_scratch_size(int size)1548*61046927SAndroid Build Coastguard Worker brw_get_scratch_size(int size)
1549*61046927SAndroid Build Coastguard Worker {
1550*61046927SAndroid Build Coastguard Worker return MAX2(1024, util_next_power_of_two(size));
1551*61046927SAndroid Build Coastguard Worker }
1552*61046927SAndroid Build Coastguard Worker
1553*61046927SAndroid Build Coastguard Worker void
brw_allocate_registers(fs_visitor & s,bool allow_spilling)1554*61046927SAndroid Build Coastguard Worker brw_allocate_registers(fs_visitor &s, bool allow_spilling)
1555*61046927SAndroid Build Coastguard Worker {
1556*61046927SAndroid Build Coastguard Worker const struct intel_device_info *devinfo = s.devinfo;
1557*61046927SAndroid Build Coastguard Worker const nir_shader *nir = s.nir;
1558*61046927SAndroid Build Coastguard Worker bool allocated;
1559*61046927SAndroid Build Coastguard Worker
1560*61046927SAndroid Build Coastguard Worker static const enum instruction_scheduler_mode pre_modes[] = {
1561*61046927SAndroid Build Coastguard Worker SCHEDULE_PRE,
1562*61046927SAndroid Build Coastguard Worker SCHEDULE_PRE_NON_LIFO,
1563*61046927SAndroid Build Coastguard Worker SCHEDULE_NONE,
1564*61046927SAndroid Build Coastguard Worker SCHEDULE_PRE_LIFO,
1565*61046927SAndroid Build Coastguard Worker };
1566*61046927SAndroid Build Coastguard Worker
1567*61046927SAndroid Build Coastguard Worker static const char *scheduler_mode_name[] = {
1568*61046927SAndroid Build Coastguard Worker [SCHEDULE_PRE] = "top-down",
1569*61046927SAndroid Build Coastguard Worker [SCHEDULE_PRE_NON_LIFO] = "non-lifo",
1570*61046927SAndroid Build Coastguard Worker [SCHEDULE_PRE_LIFO] = "lifo",
1571*61046927SAndroid Build Coastguard Worker [SCHEDULE_POST] = "post",
1572*61046927SAndroid Build Coastguard Worker [SCHEDULE_NONE] = "none",
1573*61046927SAndroid Build Coastguard Worker };
1574*61046927SAndroid Build Coastguard Worker
1575*61046927SAndroid Build Coastguard Worker uint32_t best_register_pressure = UINT32_MAX;
1576*61046927SAndroid Build Coastguard Worker enum instruction_scheduler_mode best_sched = SCHEDULE_NONE;
1577*61046927SAndroid Build Coastguard Worker
1578*61046927SAndroid Build Coastguard Worker brw_fs_opt_compact_virtual_grfs(s);
1579*61046927SAndroid Build Coastguard Worker
1580*61046927SAndroid Build Coastguard Worker if (s.needs_register_pressure)
1581*61046927SAndroid Build Coastguard Worker s.shader_stats.max_register_pressure = brw_compute_max_register_pressure(s);
1582*61046927SAndroid Build Coastguard Worker
1583*61046927SAndroid Build Coastguard Worker s.debug_optimizer(nir, "pre_register_allocate", 90, 90);
1584*61046927SAndroid Build Coastguard Worker
1585*61046927SAndroid Build Coastguard Worker bool spill_all = allow_spilling && INTEL_DEBUG(DEBUG_SPILL_FS);
1586*61046927SAndroid Build Coastguard Worker
1587*61046927SAndroid Build Coastguard Worker /* Before we schedule anything, stash off the instruction order as an array
1588*61046927SAndroid Build Coastguard Worker * of fs_inst *. This way, we can reset it between scheduling passes to
1589*61046927SAndroid Build Coastguard Worker * prevent dependencies between the different scheduling modes.
1590*61046927SAndroid Build Coastguard Worker */
1591*61046927SAndroid Build Coastguard Worker fs_inst **orig_order = save_instruction_order(s.cfg);
1592*61046927SAndroid Build Coastguard Worker fs_inst **best_pressure_order = NULL;
1593*61046927SAndroid Build Coastguard Worker
1594*61046927SAndroid Build Coastguard Worker void *scheduler_ctx = ralloc_context(NULL);
1595*61046927SAndroid Build Coastguard Worker instruction_scheduler *sched = brw_prepare_scheduler(s, scheduler_ctx);
1596*61046927SAndroid Build Coastguard Worker
1597*61046927SAndroid Build Coastguard Worker /* Try each scheduling heuristic to see if it can successfully register
1598*61046927SAndroid Build Coastguard Worker * allocate without spilling. They should be ordered by decreasing
1599*61046927SAndroid Build Coastguard Worker * performance but increasing likelihood of allocating.
1600*61046927SAndroid Build Coastguard Worker */
1601*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < ARRAY_SIZE(pre_modes); i++) {
1602*61046927SAndroid Build Coastguard Worker enum instruction_scheduler_mode sched_mode = pre_modes[i];
1603*61046927SAndroid Build Coastguard Worker
1604*61046927SAndroid Build Coastguard Worker brw_schedule_instructions_pre_ra(s, sched, sched_mode);
1605*61046927SAndroid Build Coastguard Worker s.shader_stats.scheduler_mode = scheduler_mode_name[sched_mode];
1606*61046927SAndroid Build Coastguard Worker
1607*61046927SAndroid Build Coastguard Worker s.debug_optimizer(nir, s.shader_stats.scheduler_mode, 95, i);
1608*61046927SAndroid Build Coastguard Worker
1609*61046927SAndroid Build Coastguard Worker if (0) {
1610*61046927SAndroid Build Coastguard Worker brw_assign_regs_trivial(s);
1611*61046927SAndroid Build Coastguard Worker allocated = true;
1612*61046927SAndroid Build Coastguard Worker break;
1613*61046927SAndroid Build Coastguard Worker }
1614*61046927SAndroid Build Coastguard Worker
1615*61046927SAndroid Build Coastguard Worker /* We should only spill registers on the last scheduling. */
1616*61046927SAndroid Build Coastguard Worker assert(!s.spilled_any_registers);
1617*61046927SAndroid Build Coastguard Worker
1618*61046927SAndroid Build Coastguard Worker allocated = brw_assign_regs(s, false, spill_all);
1619*61046927SAndroid Build Coastguard Worker if (allocated)
1620*61046927SAndroid Build Coastguard Worker break;
1621*61046927SAndroid Build Coastguard Worker
1622*61046927SAndroid Build Coastguard Worker /* Save the maximum register pressure */
1623*61046927SAndroid Build Coastguard Worker uint32_t this_pressure = brw_compute_max_register_pressure(s);
1624*61046927SAndroid Build Coastguard Worker
1625*61046927SAndroid Build Coastguard Worker if (0) {
1626*61046927SAndroid Build Coastguard Worker fprintf(stderr, "Scheduler mode \"%s\" spilled, max pressure = %u\n",
1627*61046927SAndroid Build Coastguard Worker scheduler_mode_name[sched_mode], this_pressure);
1628*61046927SAndroid Build Coastguard Worker }
1629*61046927SAndroid Build Coastguard Worker
1630*61046927SAndroid Build Coastguard Worker if (this_pressure < best_register_pressure) {
1631*61046927SAndroid Build Coastguard Worker best_register_pressure = this_pressure;
1632*61046927SAndroid Build Coastguard Worker best_sched = sched_mode;
1633*61046927SAndroid Build Coastguard Worker delete[] best_pressure_order;
1634*61046927SAndroid Build Coastguard Worker best_pressure_order = save_instruction_order(s.cfg);
1635*61046927SAndroid Build Coastguard Worker }
1636*61046927SAndroid Build Coastguard Worker
1637*61046927SAndroid Build Coastguard Worker /* Reset back to the original order before trying the next mode */
1638*61046927SAndroid Build Coastguard Worker restore_instruction_order(s.cfg, orig_order);
1639*61046927SAndroid Build Coastguard Worker s.invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
1640*61046927SAndroid Build Coastguard Worker }
1641*61046927SAndroid Build Coastguard Worker
1642*61046927SAndroid Build Coastguard Worker ralloc_free(scheduler_ctx);
1643*61046927SAndroid Build Coastguard Worker
1644*61046927SAndroid Build Coastguard Worker if (!allocated) {
1645*61046927SAndroid Build Coastguard Worker if (0) {
1646*61046927SAndroid Build Coastguard Worker fprintf(stderr, "Spilling - using lowest-pressure mode \"%s\"\n",
1647*61046927SAndroid Build Coastguard Worker scheduler_mode_name[best_sched]);
1648*61046927SAndroid Build Coastguard Worker }
1649*61046927SAndroid Build Coastguard Worker restore_instruction_order(s.cfg, best_pressure_order);
1650*61046927SAndroid Build Coastguard Worker s.shader_stats.scheduler_mode = scheduler_mode_name[best_sched];
1651*61046927SAndroid Build Coastguard Worker
1652*61046927SAndroid Build Coastguard Worker allocated = brw_assign_regs(s, allow_spilling, spill_all);
1653*61046927SAndroid Build Coastguard Worker }
1654*61046927SAndroid Build Coastguard Worker
1655*61046927SAndroid Build Coastguard Worker delete[] orig_order;
1656*61046927SAndroid Build Coastguard Worker delete[] best_pressure_order;
1657*61046927SAndroid Build Coastguard Worker
1658*61046927SAndroid Build Coastguard Worker if (!allocated) {
1659*61046927SAndroid Build Coastguard Worker s.fail("Failure to register allocate. Reduce number of "
1660*61046927SAndroid Build Coastguard Worker "live scalar values to avoid this.");
1661*61046927SAndroid Build Coastguard Worker } else if (s.spilled_any_registers) {
1662*61046927SAndroid Build Coastguard Worker brw_shader_perf_log(s.compiler, s.log_data,
1663*61046927SAndroid Build Coastguard Worker "%s shader triggered register spilling. "
1664*61046927SAndroid Build Coastguard Worker "Try reducing the number of live scalar "
1665*61046927SAndroid Build Coastguard Worker "values to improve performance.\n",
1666*61046927SAndroid Build Coastguard Worker _mesa_shader_stage_to_string(s.stage));
1667*61046927SAndroid Build Coastguard Worker }
1668*61046927SAndroid Build Coastguard Worker
1669*61046927SAndroid Build Coastguard Worker if (s.failed)
1670*61046927SAndroid Build Coastguard Worker return;
1671*61046927SAndroid Build Coastguard Worker
1672*61046927SAndroid Build Coastguard Worker s.debug_optimizer(nir, "post_ra_alloc", 96, 0);
1673*61046927SAndroid Build Coastguard Worker
1674*61046927SAndroid Build Coastguard Worker brw_fs_opt_bank_conflicts(s);
1675*61046927SAndroid Build Coastguard Worker
1676*61046927SAndroid Build Coastguard Worker s.debug_optimizer(nir, "bank_conflict", 96, 1);
1677*61046927SAndroid Build Coastguard Worker
1678*61046927SAndroid Build Coastguard Worker brw_schedule_instructions_post_ra(s);
1679*61046927SAndroid Build Coastguard Worker
1680*61046927SAndroid Build Coastguard Worker s.debug_optimizer(nir, "post_ra_alloc_scheduling", 96, 2);
1681*61046927SAndroid Build Coastguard Worker
1682*61046927SAndroid Build Coastguard Worker /* Lowering VGRF to FIXED_GRF is currently done as a separate pass instead
1683*61046927SAndroid Build Coastguard Worker * of part of assign_regs since both bank conflicts optimization and post
1684*61046927SAndroid Build Coastguard Worker * RA scheduling take advantage of distinguishing references to registers
1685*61046927SAndroid Build Coastguard Worker * that were allocated from references that were already fixed.
1686*61046927SAndroid Build Coastguard Worker *
1687*61046927SAndroid Build Coastguard Worker * TODO: Change the passes above, then move this lowering to be part of
1688*61046927SAndroid Build Coastguard Worker * assign_regs.
1689*61046927SAndroid Build Coastguard Worker */
1690*61046927SAndroid Build Coastguard Worker brw_fs_lower_vgrfs_to_fixed_grfs(s);
1691*61046927SAndroid Build Coastguard Worker
1692*61046927SAndroid Build Coastguard Worker s.debug_optimizer(nir, "lowered_vgrfs_to_fixed_grfs", 96, 3);
1693*61046927SAndroid Build Coastguard Worker
1694*61046927SAndroid Build Coastguard Worker if (s.last_scratch > 0) {
1695*61046927SAndroid Build Coastguard Worker /* We currently only support up to 2MB of scratch space. If we
1696*61046927SAndroid Build Coastguard Worker * need to support more eventually, the documentation suggests
1697*61046927SAndroid Build Coastguard Worker * that we could allocate a larger buffer, and partition it out
1698*61046927SAndroid Build Coastguard Worker * ourselves. We'd just have to undo the hardware's address
1699*61046927SAndroid Build Coastguard Worker * calculation by subtracting (FFTID * Per Thread Scratch Space)
1700*61046927SAndroid Build Coastguard Worker * and then add FFTID * (Larger Per Thread Scratch Space).
1701*61046927SAndroid Build Coastguard Worker *
1702*61046927SAndroid Build Coastguard Worker * See 3D-Media-GPGPU Engine > Media GPGPU Pipeline >
1703*61046927SAndroid Build Coastguard Worker * Thread Group Tracking > Local Memory/Scratch Space.
1704*61046927SAndroid Build Coastguard Worker */
1705*61046927SAndroid Build Coastguard Worker if (s.last_scratch <= devinfo->max_scratch_size_per_thread) {
1706*61046927SAndroid Build Coastguard Worker /* Take the max of any previously compiled variant of the shader. In the
1707*61046927SAndroid Build Coastguard Worker * case of bindless shaders with return parts, this will also take the
1708*61046927SAndroid Build Coastguard Worker * max of all parts.
1709*61046927SAndroid Build Coastguard Worker */
1710*61046927SAndroid Build Coastguard Worker s.prog_data->total_scratch = MAX2(brw_get_scratch_size(s.last_scratch),
1711*61046927SAndroid Build Coastguard Worker s.prog_data->total_scratch);
1712*61046927SAndroid Build Coastguard Worker } else {
1713*61046927SAndroid Build Coastguard Worker s.fail("Scratch space required is larger than supported");
1714*61046927SAndroid Build Coastguard Worker }
1715*61046927SAndroid Build Coastguard Worker }
1716*61046927SAndroid Build Coastguard Worker
1717*61046927SAndroid Build Coastguard Worker if (s.failed)
1718*61046927SAndroid Build Coastguard Worker return;
1719*61046927SAndroid Build Coastguard Worker
1720*61046927SAndroid Build Coastguard Worker brw_fs_lower_scoreboard(s);
1721*61046927SAndroid Build Coastguard Worker }
1722*61046927SAndroid Build Coastguard Worker
1723*61046927SAndroid Build Coastguard Worker /**
1724*61046927SAndroid Build Coastguard Worker * Move load_interpolated_input with simple (payload-based) barycentric modes
1725*61046927SAndroid Build Coastguard Worker * to the top of the program so we don't emit multiple PLNs for the same input.
1726*61046927SAndroid Build Coastguard Worker *
1727*61046927SAndroid Build Coastguard Worker * This works around CSE not being able to handle non-dominating cases
1728*61046927SAndroid Build Coastguard Worker * such as:
1729*61046927SAndroid Build Coastguard Worker *
1730*61046927SAndroid Build Coastguard Worker * if (...) {
1731*61046927SAndroid Build Coastguard Worker * interpolate input
1732*61046927SAndroid Build Coastguard Worker * } else {
1733*61046927SAndroid Build Coastguard Worker * interpolate the same exact input
1734*61046927SAndroid Build Coastguard Worker * }
1735*61046927SAndroid Build Coastguard Worker *
1736*61046927SAndroid Build Coastguard Worker * This should be replaced by global value numbering someday.
1737*61046927SAndroid Build Coastguard Worker */
1738*61046927SAndroid Build Coastguard Worker bool
brw_nir_move_interpolation_to_top(nir_shader * nir)1739*61046927SAndroid Build Coastguard Worker brw_nir_move_interpolation_to_top(nir_shader *nir)
1740*61046927SAndroid Build Coastguard Worker {
1741*61046927SAndroid Build Coastguard Worker bool progress = false;
1742*61046927SAndroid Build Coastguard Worker
1743*61046927SAndroid Build Coastguard Worker nir_foreach_function_impl(impl, nir) {
1744*61046927SAndroid Build Coastguard Worker nir_block *top = nir_start_block(impl);
1745*61046927SAndroid Build Coastguard Worker nir_cursor cursor = nir_before_instr(nir_block_first_instr(top));
1746*61046927SAndroid Build Coastguard Worker bool impl_progress = false;
1747*61046927SAndroid Build Coastguard Worker
1748*61046927SAndroid Build Coastguard Worker for (nir_block *block = nir_block_cf_tree_next(top);
1749*61046927SAndroid Build Coastguard Worker block != NULL;
1750*61046927SAndroid Build Coastguard Worker block = nir_block_cf_tree_next(block)) {
1751*61046927SAndroid Build Coastguard Worker
1752*61046927SAndroid Build Coastguard Worker nir_foreach_instr_safe(instr, block) {
1753*61046927SAndroid Build Coastguard Worker if (instr->type != nir_instr_type_intrinsic)
1754*61046927SAndroid Build Coastguard Worker continue;
1755*61046927SAndroid Build Coastguard Worker
1756*61046927SAndroid Build Coastguard Worker nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1757*61046927SAndroid Build Coastguard Worker if (intrin->intrinsic != nir_intrinsic_load_interpolated_input)
1758*61046927SAndroid Build Coastguard Worker continue;
1759*61046927SAndroid Build Coastguard Worker nir_intrinsic_instr *bary_intrinsic =
1760*61046927SAndroid Build Coastguard Worker nir_instr_as_intrinsic(intrin->src[0].ssa->parent_instr);
1761*61046927SAndroid Build Coastguard Worker nir_intrinsic_op op = bary_intrinsic->intrinsic;
1762*61046927SAndroid Build Coastguard Worker
1763*61046927SAndroid Build Coastguard Worker /* Leave interpolateAtSample/Offset() where they are. */
1764*61046927SAndroid Build Coastguard Worker if (op == nir_intrinsic_load_barycentric_at_sample ||
1765*61046927SAndroid Build Coastguard Worker op == nir_intrinsic_load_barycentric_at_offset)
1766*61046927SAndroid Build Coastguard Worker continue;
1767*61046927SAndroid Build Coastguard Worker
1768*61046927SAndroid Build Coastguard Worker nir_instr *move[3] = {
1769*61046927SAndroid Build Coastguard Worker &bary_intrinsic->instr,
1770*61046927SAndroid Build Coastguard Worker intrin->src[1].ssa->parent_instr,
1771*61046927SAndroid Build Coastguard Worker instr
1772*61046927SAndroid Build Coastguard Worker };
1773*61046927SAndroid Build Coastguard Worker
1774*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
1775*61046927SAndroid Build Coastguard Worker if (move[i]->block != top) {
1776*61046927SAndroid Build Coastguard Worker nir_instr_move(cursor, move[i]);
1777*61046927SAndroid Build Coastguard Worker impl_progress = true;
1778*61046927SAndroid Build Coastguard Worker }
1779*61046927SAndroid Build Coastguard Worker }
1780*61046927SAndroid Build Coastguard Worker }
1781*61046927SAndroid Build Coastguard Worker }
1782*61046927SAndroid Build Coastguard Worker
1783*61046927SAndroid Build Coastguard Worker progress = progress || impl_progress;
1784*61046927SAndroid Build Coastguard Worker
1785*61046927SAndroid Build Coastguard Worker nir_metadata_preserve(impl, impl_progress ? nir_metadata_control_flow
1786*61046927SAndroid Build Coastguard Worker : nir_metadata_all);
1787*61046927SAndroid Build Coastguard Worker }
1788*61046927SAndroid Build Coastguard Worker
1789*61046927SAndroid Build Coastguard Worker return progress;
1790*61046927SAndroid Build Coastguard Worker }
1791*61046927SAndroid Build Coastguard Worker
1792*61046927SAndroid Build Coastguard Worker unsigned
brw_cs_push_const_total_size(const struct brw_cs_prog_data * cs_prog_data,unsigned threads)1793*61046927SAndroid Build Coastguard Worker brw_cs_push_const_total_size(const struct brw_cs_prog_data *cs_prog_data,
1794*61046927SAndroid Build Coastguard Worker unsigned threads)
1795*61046927SAndroid Build Coastguard Worker {
1796*61046927SAndroid Build Coastguard Worker assert(cs_prog_data->push.per_thread.size % REG_SIZE == 0);
1797*61046927SAndroid Build Coastguard Worker assert(cs_prog_data->push.cross_thread.size % REG_SIZE == 0);
1798*61046927SAndroid Build Coastguard Worker return cs_prog_data->push.per_thread.size * threads +
1799*61046927SAndroid Build Coastguard Worker cs_prog_data->push.cross_thread.size;
1800*61046927SAndroid Build Coastguard Worker }
1801*61046927SAndroid Build Coastguard Worker
1802*61046927SAndroid Build Coastguard Worker static bool
filter_simd(const nir_instr * instr,const void *)1803*61046927SAndroid Build Coastguard Worker filter_simd(const nir_instr *instr, const void * /* options */)
1804*61046927SAndroid Build Coastguard Worker {
1805*61046927SAndroid Build Coastguard Worker if (instr->type != nir_instr_type_intrinsic)
1806*61046927SAndroid Build Coastguard Worker return false;
1807*61046927SAndroid Build Coastguard Worker
1808*61046927SAndroid Build Coastguard Worker switch (nir_instr_as_intrinsic(instr)->intrinsic) {
1809*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_simd_width_intel:
1810*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_subgroup_id:
1811*61046927SAndroid Build Coastguard Worker return true;
1812*61046927SAndroid Build Coastguard Worker
1813*61046927SAndroid Build Coastguard Worker default:
1814*61046927SAndroid Build Coastguard Worker return false;
1815*61046927SAndroid Build Coastguard Worker }
1816*61046927SAndroid Build Coastguard Worker }
1817*61046927SAndroid Build Coastguard Worker
1818*61046927SAndroid Build Coastguard Worker static nir_def *
lower_simd(nir_builder * b,nir_instr * instr,void * options)1819*61046927SAndroid Build Coastguard Worker lower_simd(nir_builder *b, nir_instr *instr, void *options)
1820*61046927SAndroid Build Coastguard Worker {
1821*61046927SAndroid Build Coastguard Worker uintptr_t simd_width = (uintptr_t)options;
1822*61046927SAndroid Build Coastguard Worker
1823*61046927SAndroid Build Coastguard Worker switch (nir_instr_as_intrinsic(instr)->intrinsic) {
1824*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_simd_width_intel:
1825*61046927SAndroid Build Coastguard Worker return nir_imm_int(b, simd_width);
1826*61046927SAndroid Build Coastguard Worker
1827*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_subgroup_id:
1828*61046927SAndroid Build Coastguard Worker /* If the whole workgroup fits in one thread, we can lower subgroup_id
1829*61046927SAndroid Build Coastguard Worker * to a constant zero.
1830*61046927SAndroid Build Coastguard Worker */
1831*61046927SAndroid Build Coastguard Worker if (!b->shader->info.workgroup_size_variable) {
1832*61046927SAndroid Build Coastguard Worker unsigned local_workgroup_size = b->shader->info.workgroup_size[0] *
1833*61046927SAndroid Build Coastguard Worker b->shader->info.workgroup_size[1] *
1834*61046927SAndroid Build Coastguard Worker b->shader->info.workgroup_size[2];
1835*61046927SAndroid Build Coastguard Worker if (local_workgroup_size <= simd_width)
1836*61046927SAndroid Build Coastguard Worker return nir_imm_int(b, 0);
1837*61046927SAndroid Build Coastguard Worker }
1838*61046927SAndroid Build Coastguard Worker return NULL;
1839*61046927SAndroid Build Coastguard Worker
1840*61046927SAndroid Build Coastguard Worker default:
1841*61046927SAndroid Build Coastguard Worker return NULL;
1842*61046927SAndroid Build Coastguard Worker }
1843*61046927SAndroid Build Coastguard Worker }
1844*61046927SAndroid Build Coastguard Worker
1845*61046927SAndroid Build Coastguard Worker bool
brw_nir_lower_simd(nir_shader * nir,unsigned dispatch_width)1846*61046927SAndroid Build Coastguard Worker brw_nir_lower_simd(nir_shader *nir, unsigned dispatch_width)
1847*61046927SAndroid Build Coastguard Worker {
1848*61046927SAndroid Build Coastguard Worker return nir_shader_lower_instructions(nir, filter_simd, lower_simd,
1849*61046927SAndroid Build Coastguard Worker (void *)(uintptr_t)dispatch_width);
1850*61046927SAndroid Build Coastguard Worker }
1851*61046927SAndroid Build Coastguard Worker
1852*61046927SAndroid Build Coastguard Worker struct intel_cs_dispatch_info
brw_cs_get_dispatch_info(const struct intel_device_info * devinfo,const struct brw_cs_prog_data * prog_data,const unsigned * override_local_size)1853*61046927SAndroid Build Coastguard Worker brw_cs_get_dispatch_info(const struct intel_device_info *devinfo,
1854*61046927SAndroid Build Coastguard Worker const struct brw_cs_prog_data *prog_data,
1855*61046927SAndroid Build Coastguard Worker const unsigned *override_local_size)
1856*61046927SAndroid Build Coastguard Worker {
1857*61046927SAndroid Build Coastguard Worker struct intel_cs_dispatch_info info = {};
1858*61046927SAndroid Build Coastguard Worker
1859*61046927SAndroid Build Coastguard Worker const unsigned *sizes =
1860*61046927SAndroid Build Coastguard Worker override_local_size ? override_local_size :
1861*61046927SAndroid Build Coastguard Worker prog_data->local_size;
1862*61046927SAndroid Build Coastguard Worker
1863*61046927SAndroid Build Coastguard Worker const int simd = brw_simd_select_for_workgroup_size(devinfo, prog_data, sizes);
1864*61046927SAndroid Build Coastguard Worker assert(simd >= 0 && simd < 3);
1865*61046927SAndroid Build Coastguard Worker
1866*61046927SAndroid Build Coastguard Worker info.group_size = sizes[0] * sizes[1] * sizes[2];
1867*61046927SAndroid Build Coastguard Worker info.simd_size = 8u << simd;
1868*61046927SAndroid Build Coastguard Worker info.threads = DIV_ROUND_UP(info.group_size, info.simd_size);
1869*61046927SAndroid Build Coastguard Worker
1870*61046927SAndroid Build Coastguard Worker const uint32_t remainder = info.group_size & (info.simd_size - 1);
1871*61046927SAndroid Build Coastguard Worker if (remainder > 0)
1872*61046927SAndroid Build Coastguard Worker info.right_mask = ~0u >> (32 - remainder);
1873*61046927SAndroid Build Coastguard Worker else
1874*61046927SAndroid Build Coastguard Worker info.right_mask = ~0u >> (32 - info.simd_size);
1875*61046927SAndroid Build Coastguard Worker
1876*61046927SAndroid Build Coastguard Worker return info;
1877*61046927SAndroid Build Coastguard Worker }
1878*61046927SAndroid Build Coastguard Worker
brw_should_print_shader(const nir_shader * shader,uint64_t debug_flag)1879*61046927SAndroid Build Coastguard Worker bool brw_should_print_shader(const nir_shader *shader, uint64_t debug_flag)
1880*61046927SAndroid Build Coastguard Worker {
1881*61046927SAndroid Build Coastguard Worker return INTEL_DEBUG(debug_flag) && (!shader->info.internal || NIR_DEBUG(PRINT_INTERNAL));
1882*61046927SAndroid Build Coastguard Worker }
1883*61046927SAndroid Build Coastguard Worker
1884*61046927SAndroid Build Coastguard Worker namespace brw {
1885*61046927SAndroid Build Coastguard Worker brw_reg
fetch_payload_reg(const brw::fs_builder & bld,uint8_t regs[2],brw_reg_type type,unsigned n)1886*61046927SAndroid Build Coastguard Worker fetch_payload_reg(const brw::fs_builder &bld, uint8_t regs[2],
1887*61046927SAndroid Build Coastguard Worker brw_reg_type type, unsigned n)
1888*61046927SAndroid Build Coastguard Worker {
1889*61046927SAndroid Build Coastguard Worker if (!regs[0])
1890*61046927SAndroid Build Coastguard Worker return brw_reg();
1891*61046927SAndroid Build Coastguard Worker
1892*61046927SAndroid Build Coastguard Worker if (bld.dispatch_width() > 16) {
1893*61046927SAndroid Build Coastguard Worker const brw_reg tmp = bld.vgrf(type, n);
1894*61046927SAndroid Build Coastguard Worker const brw::fs_builder hbld = bld.exec_all().group(16, 0);
1895*61046927SAndroid Build Coastguard Worker const unsigned m = bld.dispatch_width() / hbld.dispatch_width();
1896*61046927SAndroid Build Coastguard Worker brw_reg *const components = new brw_reg[m * n];
1897*61046927SAndroid Build Coastguard Worker
1898*61046927SAndroid Build Coastguard Worker for (unsigned c = 0; c < n; c++) {
1899*61046927SAndroid Build Coastguard Worker for (unsigned g = 0; g < m; g++)
1900*61046927SAndroid Build Coastguard Worker components[c * m + g] =
1901*61046927SAndroid Build Coastguard Worker offset(retype(brw_vec8_grf(regs[g], 0), type), hbld, c);
1902*61046927SAndroid Build Coastguard Worker }
1903*61046927SAndroid Build Coastguard Worker
1904*61046927SAndroid Build Coastguard Worker hbld.LOAD_PAYLOAD(tmp, components, m * n, 0);
1905*61046927SAndroid Build Coastguard Worker
1906*61046927SAndroid Build Coastguard Worker delete[] components;
1907*61046927SAndroid Build Coastguard Worker return tmp;
1908*61046927SAndroid Build Coastguard Worker
1909*61046927SAndroid Build Coastguard Worker } else {
1910*61046927SAndroid Build Coastguard Worker return brw_reg(retype(brw_vec8_grf(regs[0], 0), type));
1911*61046927SAndroid Build Coastguard Worker }
1912*61046927SAndroid Build Coastguard Worker }
1913*61046927SAndroid Build Coastguard Worker
1914*61046927SAndroid Build Coastguard Worker brw_reg
fetch_barycentric_reg(const brw::fs_builder & bld,uint8_t regs[2])1915*61046927SAndroid Build Coastguard Worker fetch_barycentric_reg(const brw::fs_builder &bld, uint8_t regs[2])
1916*61046927SAndroid Build Coastguard Worker {
1917*61046927SAndroid Build Coastguard Worker if (!regs[0])
1918*61046927SAndroid Build Coastguard Worker return brw_reg();
1919*61046927SAndroid Build Coastguard Worker else if (bld.shader->devinfo->ver >= 20)
1920*61046927SAndroid Build Coastguard Worker return fetch_payload_reg(bld, regs, BRW_TYPE_F, 2);
1921*61046927SAndroid Build Coastguard Worker
1922*61046927SAndroid Build Coastguard Worker const brw_reg tmp = bld.vgrf(BRW_TYPE_F, 2);
1923*61046927SAndroid Build Coastguard Worker const brw::fs_builder hbld = bld.exec_all().group(8, 0);
1924*61046927SAndroid Build Coastguard Worker const unsigned m = bld.dispatch_width() / hbld.dispatch_width();
1925*61046927SAndroid Build Coastguard Worker brw_reg *const components = new brw_reg[2 * m];
1926*61046927SAndroid Build Coastguard Worker
1927*61046927SAndroid Build Coastguard Worker for (unsigned c = 0; c < 2; c++) {
1928*61046927SAndroid Build Coastguard Worker for (unsigned g = 0; g < m; g++)
1929*61046927SAndroid Build Coastguard Worker components[c * m + g] = offset(brw_vec8_grf(regs[g / 2], 0),
1930*61046927SAndroid Build Coastguard Worker hbld, c + 2 * (g % 2));
1931*61046927SAndroid Build Coastguard Worker }
1932*61046927SAndroid Build Coastguard Worker
1933*61046927SAndroid Build Coastguard Worker hbld.LOAD_PAYLOAD(tmp, components, 2 * m, 0);
1934*61046927SAndroid Build Coastguard Worker
1935*61046927SAndroid Build Coastguard Worker delete[] components;
1936*61046927SAndroid Build Coastguard Worker return tmp;
1937*61046927SAndroid Build Coastguard Worker }
1938*61046927SAndroid Build Coastguard Worker
1939*61046927SAndroid Build Coastguard Worker void
check_dynamic_msaa_flag(const fs_builder & bld,const struct brw_wm_prog_data * wm_prog_data,enum intel_msaa_flags flag)1940*61046927SAndroid Build Coastguard Worker check_dynamic_msaa_flag(const fs_builder &bld,
1941*61046927SAndroid Build Coastguard Worker const struct brw_wm_prog_data *wm_prog_data,
1942*61046927SAndroid Build Coastguard Worker enum intel_msaa_flags flag)
1943*61046927SAndroid Build Coastguard Worker {
1944*61046927SAndroid Build Coastguard Worker fs_inst *inst = bld.AND(bld.null_reg_ud(),
1945*61046927SAndroid Build Coastguard Worker dynamic_msaa_flags(wm_prog_data),
1946*61046927SAndroid Build Coastguard Worker brw_imm_ud(flag));
1947*61046927SAndroid Build Coastguard Worker inst->conditional_mod = BRW_CONDITIONAL_NZ;
1948*61046927SAndroid Build Coastguard Worker }
1949*61046927SAndroid Build Coastguard Worker }
1950*61046927SAndroid Build Coastguard Worker
1951