1 /*
2 * Copyright © 2012-2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file
25 *
26 * Instruction compaction is a feature of G45 and newer hardware that allows
27 * for a smaller instruction encoding.
28 *
29 * The instruction cache is on the order of 32KB, and many programs generate
30 * far more instructions than that. The instruction cache is built to barely
31 * keep up with instruction dispatch ability in cache hit cases -- L1
32 * instruction cache misses that still hit in the next level could limit
33 * throughput by around 50%.
34 *
35 * The idea of instruction compaction is that most instructions use a tiny
36 * subset of the GPU functionality, so we can encode what would be a 16 byte
37 * instruction in 8 bytes using some lookup tables for various fields.
38 *
39 *
40 * Instruction compaction capabilities vary subtly by generation.
41 *
42 * G45's support for instruction compaction is very limited. Jump counts on
43 * this generation are in units of 16-byte uncompacted instructions. As such,
44 * all jump targets must be 16-byte aligned. Also, all instructions must be
45 * naturally aligned, i.e. uncompacted instructions must be 16-byte aligned.
46 * A G45-only instruction, NENOP, must be used to provide padding to align
47 * uncompacted instructions.
48 *
49 * Gfx5 removes these restrictions and changes jump counts to be in units of
50 * 8-byte compacted instructions, allowing jump targets to be only 8-byte
51 * aligned. Uncompacted instructions can also be placed on 8-byte boundaries.
52 *
53 * Gfx6 adds the ability to compact instructions with a limited range of
54 * immediate values. Compactable immediates have 12 unrestricted bits, and a
55 * 13th bit that's replicated through the high 20 bits, to create the 32-bit
56 * value of DW3 in the uncompacted instruction word.
57 *
58 * On Gfx7 we can compact some control flow instructions with a small positive
59 * immediate in the low bits of DW3, like ENDIF with the JIP field. Other
60 * control flow instructions with UIP cannot be compacted, because of the
61 * replicated 13th bit. No control flow instructions can be compacted on Gfx6
62 * since the jump count field is not in DW3.
63 *
64 * break JIP/UIP
65 * cont JIP/UIP
66 * halt JIP/UIP
67 * if JIP/UIP
68 * else JIP (plus UIP on BDW+)
69 * endif JIP
70 * while JIP (must be negative)
71 *
72 * Gen 8 adds support for compacting 3-src instructions.
73 *
74 * Gfx12 reduces the number of bits that available to compacted immediates from
75 * 13 to 12, but improves the compaction of floating-point immediates by
76 * allowing the high bits to be encoded (the sign, 8-bit exponent, and the
77 * three most significant bits of the mantissa), rather than the lowest bits of
78 * the mantissa.
79 */
80
81 #include "brw_eu.h"
82 #include "brw_disasm.h"
83 #include "brw_disasm_info.h"
84 #include "dev/intel_debug.h"
85
86 static const uint16_t g45_subreg_table[32] = {
87 0b000000000000000,
88 0b000000010000000,
89 0b000001000000000,
90 0b000100000000000,
91 0b000000000100000,
92 0b100000000000000,
93 0b000000000010000,
94 0b001100000000000,
95 0b001010000000000,
96 0b000000100000000,
97 0b001000000000000,
98 0b000000000001000,
99 0b000000001000000,
100 0b000000000000001,
101 0b000010000000000,
102 0b000000010100000,
103 0b000000000000111,
104 0b000001000100000,
105 0b011000000000000,
106 0b000000110000000,
107 0b000000000000010,
108 0b000000000000100,
109 0b000000001100000,
110 0b000100000000010,
111 0b001110011000110,
112 0b001110100001000,
113 0b000110011000110,
114 0b000001000011000,
115 0b000110010000100,
116 0b001100000000110,
117 0b000000010000110,
118 0b000001000110000,
119 };
120
121 static const uint32_t gfx8_control_index_table[32] = {
122 0b0000000000000000010,
123 0b0000100000000000000,
124 0b0000100000000000001,
125 0b0000100000000000010,
126 0b0000100000000000011,
127 0b0000100000000000100,
128 0b0000100000000000101,
129 0b0000100000000000111,
130 0b0000100000000001000,
131 0b0000100000000001001,
132 0b0000100000000001101,
133 0b0000110000000000000,
134 0b0000110000000000001,
135 0b0000110000000000010,
136 0b0000110000000000011,
137 0b0000110000000000100,
138 0b0000110000000000101,
139 0b0000110000000000111,
140 0b0000110000000001001,
141 0b0000110000000001101,
142 0b0000110000000010000,
143 0b0000110000100000000,
144 0b0001000000000000000,
145 0b0001000000000000010,
146 0b0001000000000000100,
147 0b0001000000100000000,
148 0b0010110000000000000,
149 0b0010110000000010000,
150 0b0011000000000000000,
151 0b0011000000100000000,
152 0b0101000000000000000,
153 0b0101000000100000000,
154 };
155
156 static const uint32_t gfx8_datatype_table[32] = {
157 0b001000000000000000001,
158 0b001000000000001000000,
159 0b001000000000001000001,
160 0b001000000000011000001,
161 0b001000000000101011101,
162 0b001000000010111011101,
163 0b001000000011101000001,
164 0b001000000011101000101,
165 0b001000000011101011101,
166 0b001000001000001000001,
167 0b001000011000001000000,
168 0b001000011000001000001,
169 0b001000101000101000101,
170 0b001000111000101000100,
171 0b001000111000101000101,
172 0b001011100011101011101,
173 0b001011101011100011101,
174 0b001011101011101011100,
175 0b001011101011101011101,
176 0b001011111011101011100,
177 0b000000000010000001100,
178 0b001000000000001011101,
179 0b001000000000101000101,
180 0b001000001000001000000,
181 0b001000101000101000100,
182 0b001000111000100000100,
183 0b001001001001000001001,
184 0b001010111011101011101,
185 0b001011111011101011101,
186 0b001001111001101001100,
187 0b001001001001001001000,
188 0b001001011001001001000,
189 };
190
191 static const uint16_t gfx8_subreg_table[32] = {
192 0b000000000000000,
193 0b000000000000001,
194 0b000000000001000,
195 0b000000000001111,
196 0b000000000010000,
197 0b000000010000000,
198 0b000000100000000,
199 0b000000110000000,
200 0b000001000000000,
201 0b000001000010000,
202 0b000001010000000,
203 0b001000000000000,
204 0b001000000000001,
205 0b001000010000001,
206 0b001000010000010,
207 0b001000010000011,
208 0b001000010000100,
209 0b001000010000111,
210 0b001000010001000,
211 0b001000010001110,
212 0b001000010001111,
213 0b001000110000000,
214 0b001000111101000,
215 0b010000000000000,
216 0b010000110000000,
217 0b011000000000000,
218 0b011110010000111,
219 0b100000000000000,
220 0b101000000000000,
221 0b110000000000000,
222 0b111000000000000,
223 0b111000000011100,
224 };
225
226 static const uint16_t gfx8_src_index_table[32] = {
227 0b000000000000,
228 0b000000000010,
229 0b000000010000,
230 0b000000010010,
231 0b000000011000,
232 0b000000100000,
233 0b000000101000,
234 0b000001001000,
235 0b000001010000,
236 0b000001110000,
237 0b000001111000,
238 0b001100000000,
239 0b001100000010,
240 0b001100001000,
241 0b001100010000,
242 0b001100010010,
243 0b001100100000,
244 0b001100101000,
245 0b001100111000,
246 0b001101000000,
247 0b001101000010,
248 0b001101001000,
249 0b001101010000,
250 0b001101100000,
251 0b001101101000,
252 0b001101110000,
253 0b001101110001,
254 0b001101111000,
255 0b010001101000,
256 0b010001101001,
257 0b010001101010,
258 0b010110001000,
259 };
260
261 static const uint32_t gfx11_datatype_table[32] = {
262 0b001000000000000000001,
263 0b001000000000001000000,
264 0b001000000000001000001,
265 0b001000000000011000001,
266 0b001000000000101100101,
267 0b001000000101111100101,
268 0b001000000100101000001,
269 0b001000000100101000101,
270 0b001000000100101100101,
271 0b001000001000001000001,
272 0b001000011000001000000,
273 0b001000011000001000001,
274 0b001000101000101000101,
275 0b001000111000101000100,
276 0b001000111000101000101,
277 0b001100100100101100101,
278 0b001100101100100100101,
279 0b001100101100101100100,
280 0b001100101100101100101,
281 0b001100111100101100100,
282 0b000000000010000001100,
283 0b001000000000001100101,
284 0b001000000000101000101,
285 0b001000001000001000000,
286 0b001000101000101000100,
287 0b001000111000100000100,
288 0b001001001001000001001,
289 0b001101111100101100101,
290 0b001100111100101100101,
291 0b001001111001101001100,
292 0b001001001001001001000,
293 0b001001011001001001000,
294 };
295
296 static const uint32_t gfx12_control_index_table[32] = {
297 0b000000000000000000100, /* (16|M0) */
298 0b000000000000000000011, /* (8|M0) */
299 0b000000010000000000000, /* (W) (1|M0) */
300 0b000000010000000000100, /* (W) (16|M0) */
301 0b000000010000000000011, /* (W) (8|M0) */
302 0b010000000000000000100, /* (16|M0) (ge)f0.0 */
303 0b000000000000000100100, /* (16|M16) */
304 0b010100000000000000100, /* (16|M0) (lt)f0.0 */
305 0b000000000000000000000, /* (1|M0) */
306 0b000010000000000000100, /* (16|M0) (sat) */
307 0b000000000000000010011, /* (8|M8) */
308 0b001100000000000000100, /* (16|M0) (gt)f0.0 */
309 0b000100000000000000100, /* (16|M0) (eq)f0.0 */
310 0b000100010000000000100, /* (W) (16|M0) (eq)f0.0 */
311 0b001000000000000000100, /* (16|M0) (ne)f0.0 */
312 0b000000000000100000100, /* (f0.0) (16|M0) */
313 0b010100000000000000011, /* (8|M0) (lt)f0.0 */
314 0b000000000000110000100, /* (f1.0) (16|M0) */
315 0b000000010000000000001, /* (W) (2|M0) */
316 0b000000000000101000100, /* (f0.1) (16|M0) */
317 0b000000000000111000100, /* (f1.1) (16|M0) */
318 0b010000010000000000100, /* (W) (16|M0) (ge)f0.0 */
319 0b000000000000000100011, /* (8|M16) */
320 0b000000000000000110011, /* (8|M24) */
321 0b010100010000000000100, /* (W) (16|M0) (lt)f0.0 */
322 0b010000000000000000011, /* (8|M0) (ge)f0.0 */
323 0b000100010000000000000, /* (W) (1|M0) (eq)f0.0 */
324 0b000010000000000000011, /* (8|M0) (sat) */
325 0b010100000000010000100, /* (16|M0) (lt)f1.0 */
326 0b000100000000000000011, /* (8|M0) (eq)f0.0 */
327 0b000001000000000000011, /* (8|M0) {AccWrEn} */
328 0b000000010000000100100, /* (W) (16|M16) */
329 };
330
331 static const uint32_t gfx12_datatype_table[32] = {
332 0b11010110100101010100, /* grf<1>:f grf:f grf:f */
333 0b00000110100101010100, /* grf<1>:f grf:f arf:ub */
334 0b00000010101101010100, /* grf<1>:f imm:f arf:ub */
335 0b01010110110101010100, /* grf<1>:f grf:f imm:f */
336 0b11010100100101010100, /* arf<1>:f grf:f grf:f */
337 0b11010010100101010100, /* grf<1>:f arf:f grf:f */
338 0b01010100110101010100, /* arf<1>:f grf:f imm:f */
339 0b00000000100000000000, /* arf<1>:ub arf:ub arf:ub */
340 0b11010000100101010100, /* arf<1>:f arf:f grf:f */
341 0b00101110110011001100, /* grf<1>:d grf:d imm:w */
342 0b10110110100011001100, /* grf<1>:d grf:d grf:d */
343 0b01010010110101010100, /* grf<1>:f arf:f imm:f */
344 0b10010110100001000100, /* grf<1>:ud grf:ud grf:ud */
345 0b01010000110101010100, /* arf<1>:f arf:f imm:f */
346 0b00110110110011001100, /* grf<1>:d grf:d imm:d */
347 0b00010110110001000100, /* grf<1>:ud grf:ud imm:ud */
348 0b00000111000101010100, /* grf<2>:f grf:f arf:ub */
349 0b00101100110011001100, /* arf<1>:d grf:d imm:w */
350 0b00000000100000100010, /* arf<1>:uw arf:uw arf:ub */
351 0b00000010100001000100, /* grf<1>:ud arf:ud arf:ub */
352 0b00100110110000101010, /* grf<1>:w grf:uw imm:uv */
353 0b00001110110000100010, /* grf<1>:uw grf:uw imm:uw */
354 0b10010111000001000100, /* grf<2>:ud grf:ud grf:ud */
355 0b00000110100101001100, /* grf<1>:d grf:f arf:ub */
356 0b10001100100011001100, /* arf<1>:d grf:d grf:uw */
357 0b00000110100001010100, /* grf<1>:f grf:ud arf:ub */
358 0b00101110110001001100, /* grf<1>:d grf:ud imm:w */
359 0b00000010100000100010, /* grf<1>:uw arf:uw arf:ub */
360 0b00000110100000110100, /* grf<1>:f grf:uw arf:ub */
361 0b00000110100000010100, /* grf<1>:f grf:ub arf:ub */
362 0b00000110100011010100, /* grf<1>:f grf:d arf:ub */
363 0b00000010100101010100, /* grf<1>:f arf:f arf:ub */
364 };
365
366 static const uint16_t gfx12_subreg_table[32] = {
367 0b000000000000000, /* .0 .0 .0 */
368 0b100000000000000, /* .0 .0 .16 */
369 0b001000000000000, /* .0 .0 .4 */
370 0b011000000000000, /* .0 .0 .12 */
371 0b000000010000000, /* .0 .4 .0 */
372 0b010000000000000, /* .0 .0 .8 */
373 0b101000000000000, /* .0 .0 .20 */
374 0b000000000001000, /* .8 .0 .0 */
375 0b000000100000000, /* .0 .8 .0 */
376 0b110000000000000, /* .0 .0 .24 */
377 0b111000000000000, /* .0 .0 .28 */
378 0b000001000000000, /* .0 .16 .0 */
379 0b000000000000100, /* .4 .0 .0 */
380 0b000001100000000, /* .0 .24 .0 */
381 0b000001010000000, /* .0 .20 .0 */
382 0b000000110000000, /* .0 .12 .0 */
383 0b000001110000000, /* .0 .28 .0 */
384 0b000000000011100, /* .28 .0 .0 */
385 0b000000000010000, /* .16 .0 .0 */
386 0b000000000001100, /* .12 .0 .0 */
387 0b000000000011000, /* .24 .0 .0 */
388 0b000000000010100, /* .20 .0 .0 */
389 0b000000000000010, /* .2 .0 .0 */
390 0b000000101000000, /* .0 .10 .0 */
391 0b000000001000000, /* .0 .2 .0 */
392 0b000000010000100, /* .4 .4 .0 */
393 0b000000001011100, /* .28 .2 .0 */
394 0b000000001000010, /* .2 .2 .0 */
395 0b000000110001100, /* .12 .12 .0 */
396 0b000000000100000, /* .0 .1 .0 */
397 0b000000001100000, /* .0 .3 .0 */
398 0b110001100000000, /* .0 .24 .24 */
399 };
400
401 static const uint16_t gfx12_src0_index_table[16] = {
402 0b010001100100, /* r<8;8,1> */
403 0b000000000000, /* r<0;1,0> */
404 0b010001100110, /* -r<8;8,1> */
405 0b010001100101, /* (abs)r<8;8,1> */
406 0b000000000010, /* -r<0;1,0> */
407 0b001000000000, /* r<2;1,0> */
408 0b001001000000, /* r<2;4,0> */
409 0b001101000000, /* r<4;4,0> */
410 0b001000100100, /* r<2;2,1> */
411 0b001100000000, /* r<4;1,0> */
412 0b001000100110, /* -r<2;2,1> */
413 0b001101000100, /* r<4;4,1> */
414 0b010001100111, /* -(abs)r<8;8,1> */
415 0b000100000000, /* r<1;1,0> */
416 0b000000000001, /* (abs)r<0;1,0> */
417 0b111100010000, /* r[a]<1,0> */
418 };
419
420 static const uint16_t gfx12_src1_index_table[16] = {
421 0b000100011001, /* r<8;8,1> */
422 0b000000000000, /* r<0;1,0> */
423 0b100100011001, /* -r<8;8,1> */
424 0b100000000000, /* -r<0;1,0> */
425 0b010100011001, /* (abs)r<8;8,1> */
426 0b100011010000, /* -r<4;4,0> */
427 0b000010000000, /* r<2;1,0> */
428 0b000010001001, /* r<2;2,1> */
429 0b100010001001, /* -r<2;2,1> */
430 0b000011010000, /* r<4;4,0> */
431 0b000011010001, /* r<4;4,1> */
432 0b000011000000, /* r<4;1,0> */
433 0b110100011001, /* -(abs)r<8;8,1> */
434 0b010000000000, /* (abs)r<0;1,0> */
435 0b110000000000, /* -(abs)r<0;1,0> */
436 0b100011010001, /* -r<4;4,1> */
437 };
438
439 static const uint16_t xehp_src0_index_table[16] = {
440 0b000100000000, /* r<1;1,0> */
441 0b000000000000, /* r<0;1,0> */
442 0b000100000010, /* -r<1;1,0> */
443 0b000100000001, /* (abs)r<1;1,0> */
444 0b000000000010, /* -r<0;1,0> */
445 0b001000000000, /* r<2;1,0> */
446 0b001001000000, /* r<2;4,0> */
447 0b001101000000, /* r<4;4,0> */
448 0b001100000000, /* r<4;1,0> */
449 0b000100000011, /* -(abs)r<1;1,0> */
450 0b000000000001, /* (abs)r<0;1,0> */
451 0b111100010000, /* r[a]<1,0> */
452 0b010001100000, /* r<8;8,0> */
453 0b000101000000, /* r<1;4,0> */
454 0b010001001000, /* r<8;4,2> */
455 0b001000000010, /* -r<2;1,0> */
456 };
457
458 static const uint16_t xehp_src1_index_table[16] = {
459 0b000001000000, /* r<1;1,0> */
460 0b000000000000, /* r<0;1,0> */
461 0b100001000000, /* -r<1;1,0> */
462 0b100000000000, /* -r<0;1,0> */
463 0b010001000000, /* (abs)r<1;1,0> */
464 0b100011010000, /* -r<4;4,0> */
465 0b000010000000, /* r<2;1,0> */
466 0b000011010000, /* r<4;4,0> */
467 0b000011000000, /* r<4;1,0> */
468 0b110001000000, /* -(abs)r<1;1,0> */
469 0b010000000000, /* (abs)r<0;1,0> */
470 0b110000000000, /* -(abs)r<0;1,0> */
471 0b000100011000, /* r<8;8,0> */
472 0b100010000000, /* -r<2;1,0> */
473 0b100000001001, /* -r<0;2,1> */
474 0b100001000100, /* -r[a]<1;1,0> */
475 };
476
477 static const uint32_t xe2_control_index_table[32] = {
478 0b000000000000000100, /* (16|M0) */
479 0b000000100000000000, /* (W) (1|M0) */
480 0b000000000010000100, /* (16|M16) */
481 0b000000000000000000, /* (1|M0) */
482 0b000000100000000100, /* (W) (16|M0) */
483 0b010000000000000100, /* (16|M0) (.ge)f0.0 */
484 0b010100000000000100, /* (16|M0) (.lt)f0.0 */
485 0b000000100000000010, /* (W) (4|M0) */
486 0b000000000000000101, /* (32|M0) */
487 0b000000100000000011, /* (W) (8|M0) */
488 0b001100100000000000, /* (W) (1|M0) (.gt)f0.0 */
489 0b000010000000000100, /* (16|M0) (sat) */
490 0b000100000000000100, /* (16|M0) (.eq)f0.0 */
491 0b000000100000000001, /* (W) (2|M0) */
492 0b001100000000000100, /* (16|M0) (.gt)f0.0 */
493 0b000100100000000000, /* (W) (1|M0) (.eq)f0.0 */
494 0b010100100000000010, /* (W) (4|M0) (.lt)f0.0 */
495 0b010000100000000000, /* (W) (1|M0) (.ge)f0.0 */
496 0b010000100000000010, /* (W) (4|M0) (.ge)f0.0 */
497 0b010100100000000000, /* (W) (1|M0) (.lt)f0.0 */
498 0b001000000000000100, /* (16|M0) (.ne)f0.0 */
499 0b000000000100100100, /* (f2.0) (16|M0) */
500 0b010100100000000011, /* (W) (8|M0) (.lt)f0.0 */
501 0b000000000100011100, /* (f1.1) (16|M0) */
502 0b010000100000000011, /* (W) (8|M0) (.ge)f0.0 */
503 0b000000000100001100, /* (f0.1) (16|M0) */
504 0b000000000100010100, /* (f1.0) (16|M0) */
505 0b000000000100110100, /* (f3.0) (16|M0) */
506 0b000000000100111100, /* (f3.1) (16|M0) */
507 0b000000000100101100, /* (f2.1) (16|M0) */
508 0b000000000100000100, /* (f0.0) (16|M0) */
509 0b010100000000100100, /* (16|M0) (.lt)f2.0 */
510 };
511
512 static const uint32_t xe2_datatype_table[32] = {
513 0b11010110100101010100, /* grf<1>:f grf:f grf:f */
514 0b11010100100101010100, /* arf<1>:f grf:f grf:f */
515 0b00000110100101010100, /* grf<1>:f grf:f arf:ub */
516 0b00000110100001000100, /* grf<1>:ud grf:ud arf:ub */
517 0b01010110110101010100, /* grf<1>:f grf:f imm:f */
518 0b11010010100101010100, /* grf<1>:f arf:f grf:f */
519 0b10111110100011101110, /* grf<1>:q grf:q grf:q */
520 0b00000000100000000000, /* arf<1>:ub arf:ub arf:ub */
521 0b01010110100101010100, /* grf<1>:f grf:f arf:f */
522 0b00000010101001000100, /* grf<1>:ud imm:ud */
523 0b00101110110011001100, /* grf<1>:d grf:d imm:w */
524 0b11010000100101010100, /* arf<1>:f arf:f grf:f */
525 0b01010100100101010100, /* arf<1>:f grf:f arf:f */
526 0b01010100110101010100, /* arf<1>:f grf:f imm:f */
527 0b00000010101101010100, /* grf<1>:f imm:f */
528 0b00000110100011001100, /* grf<1>:d grf:d arf:ub */
529 0b00101110110011101110, /* grf<1>:q grf:q imm:w */
530 0b00000110100001100110, /* grf<1>:uq grf:uq arf:ub */
531 0b01010000100101010100, /* arf<1>:f arf:f arf:f */
532 0b10110110100011001100, /* grf<1>:d grf:d grf:d */
533 0b01010010100101010100, /* grf<1>:f arf:f arf:f */
534 0b00000111000001000100, /* grf<2>:ud grf:ud arf:ub */
535 0b00110110110011001110, /* grf<1>:q grf:d imm:d */
536 0b00101100110011001100, /* arf<1>:d grf:d imm:w */
537 0b11011110100101110110, /* grf<1>:df grf:df grf:df */
538 0b01010010110101010100, /* grf<1>:f arf:f imm:f */
539 0b10010110100001000100, /* grf<1>:ud grf:ud grf:ud */
540 0b00000010100001000100, /* grf<1>:ud arf:ud arf:ub */
541 0b00001110110001000100, /* grf<1>:ud grf:ud imm:uw */
542 0b00000010101010101100, /* grf<1>:d imm:w */
543 0b01010000110101010100, /* arf<1>:f arf:f imm:f */
544 0b00000100100001000100, /* arf<1>:ud grf:ud arf:ub */
545 };
546
547 static const uint16_t xe2_subreg_table[16] = {
548 0b000000000000, /* .0 .0 */
549 0b000010000000, /* .0 .4 */
550 0b000000000100, /* .4 .0 */
551 0b010000000000, /* .0 .32 */
552 0b001000000000, /* .0 .16 */
553 0b000000001000, /* .8 .0 */
554 0b000100000000, /* .0 .8 */
555 0b010100000000, /* .0 .40 */
556 0b011000000000, /* .0 .48 */
557 0b000110000000, /* .0 .12 */
558 0b000000010000, /* .16 .0 */
559 0b011010000000, /* .0 .52 */
560 0b001100000000, /* .0 .24 */
561 0b011100000000, /* .0 .56 */
562 0b010110000000, /* .0 .44 */
563 0b010010000000, /* .0 .36 */
564 };
565
566 static const uint16_t xe2_src0_index_table[8] = {
567 0b00100000000, /* r<1;1,0> */
568 0b00000000000, /* r<0;1,0> */
569 0b01000000000, /* r<2;1,0> */
570 0b00100000010, /* -r<1;1,0> */
571 0b01100000000, /* r<4;1,0> */
572 0b00100000001, /* (abs)r<1;1,0> */
573 0b00000000010, /* -r<0;1,0> */
574 0b01001000000, /* r<2;4,0> */
575 };
576
577 static const uint16_t xe2_src1_index_table[16] = {
578 0b0000100000000000, /* r<1;1,0>.0 */
579 0b0000000000000000, /* r<0;1,0>.0 */
580 0b1000100000000000, /* -r<1;1,0>.0 */
581 0b0000000000010000, /* r<0;1,0>.8 */
582 0b0000000000001000, /* r<0;1,0>.4 */
583 0b0000000000011000, /* r<0;1,0>.12 */
584 0b0000000001010000, /* r<0;1,0>.40 */
585 0b0000000001000000, /* r<0;1,0>.32 */
586 0b0000000000100000, /* r<0;1,0>.16 */
587 0b0000000001111000, /* r<0;1,0>.60 */
588 0b0000000000111000, /* r<0;1,0>.28 */
589 0b0000000000101000, /* r<0;1,0>.20 */
590 0b0000000001011000, /* r<0;1,0>.44 */
591 0b0000000001001000, /* r<0;1,0>.36 */
592 0b0000000001110000, /* r<0;1,0>.56 */
593 0b0000000000110000, /* r<0;1,0>.24 */
594 };
595
596 /* This is actually the control index table for Cherryview (26 bits), but the
597 * only difference from Broadwell (24 bits) is that it has two extra 0-bits at
598 * the start.
599 *
600 * The low 24 bits have the same mappings on both hardware.
601 */
602 static const uint32_t gfx8_3src_control_index_table[4] = {
603 0b00100000000110000000000001,
604 0b00000000000110000000000001,
605 0b00000000001000000000000001,
606 0b00000000001000000000100001,
607 };
608
609 /* This is actually the control index table for Cherryview (49 bits), but the
610 * only difference from Broadwell (46 bits) is that it has three extra 0-bits
611 * at the start.
612 *
613 * The low 44 bits have the same mappings on both hardware, and since the high
614 * three bits on Broadwell are zero, we can reuse Cherryview's table.
615 */
616 static const uint64_t gfx8_3src_source_index_table[4] = {
617 0b0000001110010011100100111001000001111000000000000,
618 0b0000001110010011100100111001000001111000000000010,
619 0b0000001110010011100100111001000001111000000001000,
620 0b0000001110010011100100111001000001111000000100000,
621 };
622
623 static const uint64_t gfx12_3src_control_index_table[32] = {
624 0b000001001010010101000000000000000100, /* (16|M0) grf<1>:f :f :f :f */
625 0b000001001010010101000000000000000011, /* (8|M0) grf<1>:f :f :f :f */
626 0b000001001000010101000000000000000011, /* (8|M0) arf<1>:f :f :f :f */
627 0b000001001010010101000010000000000011, /* (W) (8|M0) grf<1>:f :f :f :f */
628 0b000001001000010101000010000000000011, /* (W) (8|M0) arf<1>:f :f :f :f */
629 0b000001001000010101000000000000010011, /* (8|M8) arf<1>:f :f :f :f */
630 0b000001001010010101000000000000010011, /* (8|M8) grf<1>:f :f :f :f */
631 0b000001001000010101000010000000010011, /* (W) (8|M8) arf<1>:f :f :f :f */
632 0b000001001010010101000010000000010011, /* (W) (8|M8) grf<1>:f :f :f :f */
633 0b000001001010010101000010000000000100, /* (W) (16|M0) grf<1>:f :f :f :f */
634 0b000001001000010101000000000000000100, /* (16|M0) arf<1>:f :f :f :f */
635 0b000001001010010101010000000000000100, /* (16|M0) (sat)grf<1>:f :f :f :f */
636 0b000001001010010101000000000000100100, /* (16|M16) grf<1>:f :f :f :f */
637 0b000001001000010101000010000000000100, /* (W) (16|M0) arf<1>:f :f :f :f */
638 0b000001001010010101000010000000000000, /* (W) (1|M0) grf<1>:f :f :f :f */
639 0b000001001010010101010000000000000011, /* (8|M0) (sat)grf<1>:f :f :f :f */
640 0b000001001000010101000010000000110011, /* (W) (8|M24) arf<1>:f :f :f :f */
641 0b000001001000010101000010000000100011, /* (W) (8|M16) arf<1>:f :f :f :f */
642 0b000001001010010101000010000000110011, /* (W) (8|M24) grf<1>:f :f :f :f */
643 0b000001001010010101000010000000100011, /* (W) (8|M16) grf<1>:f :f :f :f */
644 0b000001001000010101000000000000100011, /* (8|M16) arf<1>:f :f :f :f */
645 0b000001001000010101000000000000110011, /* (8|M24) arf<1>:f :f :f :f */
646 0b000001001010010101000000000000100011, /* (8|M16) grf<1>:f :f :f :f */
647 0b000001001010010101000000000000110011, /* (8|M24) grf<1>:f :f :f :f */
648 0b000001001000010101010000000000000100, /* (16|M0) (sat)arf<1>:f :f :f :f */
649 0b000001001010010101010010000000000100, /* (W) (16|M0) (sat)grf<1>:f :f :f :f */
650 0b000001001010010101000010000000100100, /* (W) (16|M16) grf<1>:f :f :f :f */
651 0b000001001010010001000010000000000000, /* (W) (1|M0) grf<1>:ud :ud :ud :ud */
652 0b000001001000010101000000000000100100, /* (16|M16) arf<1>:f :f :f :f */
653 0b000001001010010101010000000000100100, /* (16|M16) (sat)grf<1>:f :f :f :f */
654 0b000001001010010101000010000000000010, /* (W) (4|M0) grf<1>:f :f :f :f */
655 0b000001001000010101010000000000000011, /* (8|M0) (sat)arf<1>:f :f :f :f */
656 };
657
658 static const uint64_t xehp_3src_control_index_table[32] = {
659 0b0000010010100010101000000000000000100, /* (16|M0) grf<1>:f :f :f :f */
660 0b0000010010100010101000000000000000011, /* (8|M0) grf<1>:f :f :f :f */
661 0b0000010010000010101000000000000000011, /* (8|M0) arf<1>:f :f :f :f */
662 0b0000010010100010101000010000000000011, /* (W) (8|M0) grf<1>:f :f :f :f */
663 0b0000010010000010101000010000000000011, /* (W) (8|M0) arf<1>:f :f :f :f */
664 0b0000010010000010101000000000000010011, /* (8|M8) arf<1>:f :f :f :f */
665 0b0000010010100010101000000000000010011, /* (8|M8) grf<1>:f :f :f :f */
666 0b0000010010000010101000010000000010011, /* (W) (8|M8) arf<1>:f :f :f :f */
667 0b0000010010100010101000010000000010011, /* (W) (8|M8) grf<1>:f :f :f :f */
668 0b0000010010100010101000010000000000100, /* (W) (16|M0) grf<1>:f :f :f :f */
669 0b0000010010000010101000000000000000100, /* (16|M0) arf<1>:f :f :f :f */
670 0b0000010010100010101010000000000000100, /* (16|M0) (sat)grf<1>:f :f :f :f */
671 0b0000010010100010101000000000000100100, /* (16|M16) grf<1>:f :f :f :f */
672 0b0000010010000010101000010000000000100, /* (W) (16|M0) arf<1>:f :f :f :f */
673 0b0000010010100010101000010000000000000, /* (W) (1|M0) grf<1>:f :f :f :f */
674 0b0000010010100010101010000000000000011, /* (8|M0) (sat)grf<1>:f :f :f :f */
675 0b0000010010000010101000010000000100011, /* (W) (8|M16) arf<1>:f :f :f :f */
676 0b0000010010000010101000010000000110011, /* (W) (8|M24) arf<1>:f :f :f :f */
677 0b0000010010100010101000010000000100011, /* (W) (8|M16) grf<1>:f :f :f :f */
678 0b0000010010100010101000010000000110011, /* (W) (8|M24) grf<1>:f :f :f :f */
679 0b0000010010000010101000000000000110011, /* (8|M24) arf<1>:f :f :f :f */
680 0b0000010010000010101000000000000100011, /* (8|M16) arf<1>:f :f :f :f */
681 0b0000000100111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :b */
682 0b0000000000111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :ub {Atomic} */
683 0b0000100100111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :b {Atomic} */
684 0b0000100000111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :ub {Atomic} */
685 0b0000100100111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :b */
686 0b0000000000111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :ub */
687 0b0000000100111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :b {Atomic} */
688 0b0000100000111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :ub */
689 0b0000101101111010101000100000000000011, /* dpas.8x* (8|M0) grf<1>:f :f :bf :bf {Atomic} */
690 0b0000101101111010101000000000000000011, /* dpas.8x* (8|M0) grf<1>:f :f :bf :bf */
691 };
692
693 static const uint64_t xe2_3src_control_index_table[16] = {
694 0b0000010010100010101000000000000100, /* (16|M0) grf<1>:f :f :f :f */
695 0b0000010010000010101000000000000100, /* (16|M0) arf<1>:f :f :f :f */
696 0b0000010010100010101000100000000100, /* (W)(16|M0) grf<1>:f :f :f :f */
697 0b0000010010000010101000100000000100, /* (W)(16|M0) arf<1>:f :f :f :f */
698 0b0000011011100011101100000000000100, /* (16|M0) grf<1>:df :df :df :df */
699 0b0000011011100011101100000010000100, /* (16|M16) grf<1>:df :df :df :df */
700 0b0000011011000011101100000000000100, /* (16|M0) arf<1>:df :df :df :df */
701 0b0000010010100010101000000000000101, /* (32|M0) grf<1>:f :f :f :f */
702 0b0000010010000010101000000000000101, /* (32|M0) arf<1>:f :f :f :f */
703 0b0000010010000010101010000000000100, /* (16|M0) (sat)arf<1>:f :f :f :f */
704 0b0000010010100010101010000000000100, /* (16|M0) (sat)grf<1>:f :f :f :f */
705 0b0000011011000011101100000010000100, /* (16|M16) arf<1>:df :df :df :df */
706 0b0000010010100010101000100000000000, /* (W)(1|M0) grf<1>:f :f :f :f */
707 0b0000010010100010001000000000000100, /* (16|M0) grf<1>:ud :ud :ud :ud */
708 0b0000110110100110011000000000000101, /* (32|M0) grf<1>:d :d :d :d */
709 0b0000011011000011101100000000000011, /* (8|M0) arf<1>:df :df :df :df */
710 };
711
712 static const uint64_t xe2_3src_dpas_control_index_table[16] = {
713 0b0000000000111110011001000000000100, /* dpas.8x* (16|M0) grf:d :d :ub :ub Atomic */
714 0b0000000100111110011001000000000100, /* dpas.8x* (16|M0) grf:d :d :ub :b Atomic */
715 0b0000100000111110011001000000000100, /* dpas.8x* (16|M0) grf:d :d :b :ub Atomic */
716 0b0000100100111110011001000000000100, /* dpas.8x* (16|M0) grf:d :d :b :b Atomic */
717 0b0000000000111110011000000000000100, /* dpas.8x* (16|M0) grf:d :d :ub :ub */
718 0b0000100100111110011000000000000100, /* dpas.8x* (16|M0) grf:d :d :b :b */
719 0b0000101101111010101001000000000100, /* dpas.8x* (16|M0) grf:f :f :bf :bf Atomic */
720 0b0000101101111101101001000000000100, /* dpas.8x* (16|M0) grf:f :bf :bf :bf Atomic */
721 0b0000101101111010110101000000000100, /* dpas.8x* (16|M0) grf:bf :f :bf :bf Atomic */
722 0b0000101101111101110101000000000100, /* dpas.8x* (16|M0) grf:bf :bf :bf :bf Atomic */
723 0b0000101101111010101000000000000100, /* dpas.8x* (16|M0) grf:f :f :bf :bf */
724 0b0000001001111010101001000000000100, /* dpas.8x* (16|M0) grf:f :f :hf :hf Atomic */
725 0b0000001001111001101001000000000100, /* dpas.8x* (16|M0) grf:f :hf :hf :hf Atomic */
726 0b0000001001111010100101000000000100, /* dpas.8x* (16|M0) grf:hf :f :hf :hf Atomic */
727 0b0000001001111001100101000000000100, /* dpas.8x* (16|M0) grf:hf :hf :hf :hf Atomic */
728 0b0000001001111010101000000000000100, /* dpas.8x* (16|M0) grf:f :f :hf :hf */
729 };
730
731 static const uint32_t gfx12_3src_source_index_table[32] = {
732 0b100101100001100000000, /* grf<0;0> grf<8;1> grf<0> */
733 0b100101100001001000010, /* arf<4;1> grf<8;1> grf<0> */
734 0b101101100001101000011, /* grf<8;1> grf<8;1> grf<1> */
735 0b100101100001101000011, /* grf<8;1> grf<8;1> grf<0> */
736 0b101100000000101000011, /* grf<8;1> grf<0;0> grf<1> */
737 0b101101100001101001011, /* -grf<8;1> grf<8;1> grf<1> */
738 0b101001100001101000011, /* grf<8;1> arf<8;1> grf<1> */
739 0b100001100001100000000, /* grf<0;0> arf<8;1> grf<0> */
740 0b101101100001100000000, /* grf<0;0> grf<8;1> grf<1> */
741 0b101101100101101000011, /* grf<8;1> grf<8;1> -grf<1> */
742 0b101101110001101000011, /* grf<8;1> -grf<8;1> grf<1> */
743 0b101100000000100000000, /* grf<0;0> grf<0;0> grf<1> */
744 0b100001100001101000011, /* grf<8;1> arf<8;1> grf<0> */
745 0b100101110001100000000, /* grf<0;0> -grf<8;1> grf<0> */
746 0b100101110001101000011, /* grf<8;1> -grf<8;1> grf<0> */
747 0b100101100001101001011, /* -grf<8;1> grf<8;1> grf<0> */
748 0b100100000000101000011, /* grf<8;1> grf<0;0> grf<0> */
749 0b100101100001100001000, /* -grf<0;0> grf<8;1> grf<0> */
750 0b100100000000100000000, /* grf<0;0> grf<0;0> grf<0> */
751 0b101101110001100000000, /* grf<0;0> -grf<8;1> grf<1> */
752 0b100101100101100000000, /* grf<0;0> grf<8;1> -grf<0> */
753 0b101001100001100000000, /* grf<0;0> arf<8;1> grf<1> */
754 0b100101100101101000011, /* grf<8;1> grf<8;1> -grf<0> */
755 0b101101100101101001011, /* -grf<8;1> grf<8;1> -grf<1> */
756 0b101001100001101001011, /* -grf<8;1> arf<8;1> grf<1> */
757 0b101101110001101001011, /* -grf<8;1> -grf<8;1> grf<1> */
758 0b101100010000101000011, /* grf<8;1> -grf<0;0> grf<1> */
759 0b101100000100101000011, /* grf<8;1> grf<0;0> -grf<1> */
760 0b101101100001100001000, /* -grf<0;0> grf<8;1> grf<1> */
761 0b101101100101100000000, /* grf<0;0> grf<8;1> -grf<1> */
762 0b100100000100101000011, /* grf<8;1> grf<0;0> -grf<0> */
763 0b101001100101101000011, /* grf<8;1> arf<8;1> -grf<1> */
764 };
765
766 static const uint32_t xehp_3src_source_index_table[32] = {
767 0b100100000001100000000, /* grf<0;0> grf<1;0> grf<0> */
768 0b100100000001000000001, /* arf<1;0> grf<1;0> grf<0> */
769 0b101100000001100000001, /* grf<1;0> grf<1;0> grf<1> */
770 0b100100000001100000001, /* grf<1;0> grf<1;0> grf<0> */
771 0b101100000000100000001, /* grf<1;0> grf<0;0> grf<1> */
772 0b101100000001100001001, /* -grf<1;0> grf<1;0> grf<1> */
773 0b101000000001100000001, /* grf<1;0> arf<1;0> grf<1> */
774 0b101100000001100000000, /* grf<0;0> grf<1;0> grf<1> */
775 0b100000000001100000000, /* grf<0;0> arf<1;0> grf<0> */
776 0b101100000101100000001, /* grf<1;0> grf<1;0> -grf<1> */
777 0b101100010001100000001, /* grf<1;0> -grf<1;0> grf<1> */
778 0b101100000000100000000, /* grf<0;0> grf<0;0> grf<1> */
779 0b100000000001100000001, /* grf<1;0> arf<1;0> grf<0> */
780 0b100100010001100000000, /* grf<0;0> -grf<1;0> grf<0> */
781 0b100100010001100000001, /* grf<1;0> -grf<1;0> grf<0> */
782 0b100100000001100001001, /* -grf<1;0> grf<1;0> grf<0> */
783 0b100100000000100000001, /* grf<1;0> grf<0;0> grf<0> */
784 0b100100000001100001000, /* -grf<0;0> grf<1;0> grf<0> */
785 0b100100000000100000000, /* grf<0;0> grf<0;0> grf<0>
786 * dpas.*x1 grf:d grf:[ub,b] grf:[ub,b]
787 * dpas.*x1 grf:f grf:bf grf:bf
788 */
789 0b101100010001100000000, /* grf<0;0> -grf<1;0> grf<1> */
790 0b100100000101100000000, /* grf<0;0> grf<1;0> -grf<0> */
791 0b101000000001100000000, /* grf<0;0> arf<1;0> grf<1> */
792 0b100100000101100000001, /* grf<1;0> grf<1;0> -grf<0> */
793 0b101100000101100001001, /* -grf<1;0> grf<1;0> -grf<1> */
794 0b100100010000100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[ub,b] */
795 0b100100000100100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[u2,s2] */
796 0b100100010100100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[u2,s2] */
797 0b100100001000100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[ub,b] */
798 0b100100001100100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[u2,s2] */
799 0b100100000010100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[u4,s4] */
800 0b100100001010100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[u4,s4] */
801 0b100100010010100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[u4,s4] */
802 };
803
804 static const uint32_t xe2_3src_source_index_table[16] = {
805 0b101100000001100000001, /* grf<1;0> grf<1;0> grf<1> */
806 0b101100000001000000001, /* arf<1;0> grf<1;0> grf<1> */
807 0b100100000001100000000, /* grf<0;0> grf<1;0> grf<0> */
808 0b100100000001000000001, /* arf<1;0> grf<1;0> grf<0> */
809 0b100100000001100000001, /* grf<1;0> grf<1;0> grf<0> */
810 0b100000000001100000000, /* grf<0;0> arf<1;0> grf<0> */
811 0b100000000001100000001, /* grf<1;0> arf<1;0> grf<0> */
812 0b101100000101100000001, /* grf<1;0> grf<1;0> -grf<1> */
813 0b101000000001100000001, /* grf<1;0> arf<1;0> grf<1> */
814 0b101000000001000000001, /* arf<1;0> arf<1;0> grf<1> */
815 0b100000000001000000001, /* arf<1;0> arf<1;0> grf<0> */
816 0b100100000000100000000, /* grf<0;0> grf<0;0> grf<0> */
817 0b100100000000100000001, /* grf<1;0> grf<0;0> grf<0> */
818 0b101100000101000000001, /* arf<1;0> grf<1;0> -grf<1> */
819 0b100100010001100000001, /* grf<1;0> -grf<1;0> grf<0> */
820 0b100100010001000000001, /* arf<1;0> -grf<1;0> grf<0> */
821 };
822
823 static const uint32_t xe2_3src_dpas_source_index_table[16] = {
824 0b100100000000100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[ub,b]
825 * dpas.*x1 grf:[f,bf] grf:bf grf:bf
826 * dpas.*x1 grf:[f,hf] grf:hf grf:hf
827 */
828 0b100100000010100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[u4,s4] */
829 0b100100000100100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[u2,s2] */
830 0b100100001000100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[ub,b] */
831 0b100100001010100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[u4,s4] */
832 0b100100001100100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[u2,s2] */
833 0b100100010000100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[ub,b] */
834 0b100100010010100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[u4,s4] */
835 0b100100010100100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[u2,s2] */
836 0b100100000000100000010, /* dpas.*x2 grf:d grf:[ub,b] grf:[ub,b] */
837 0b100100000010100000010, /* dpas.*x2 grf:d grf:[ub,b] grf:[u4,s4] */
838 0b100100001000100000010, /* dpas.*x2 grf:d grf:[u4,s4] grf:[ub,b] */
839 0b100100001010100000010, /* dpas.*x2 grf:d grf:[u4,s4] grf:[u4,s4] */
840 0b100100010100100000010, /* dpas.*x2 grf:d grf:[u2,s2] grf:[u2,s2] */
841 0b100100000000100001110, /* dpas.*x8 grf:d grf:[ub,b] grf:[ub,b] */
842 0b100100001010100001110, /* dpas.*x8 grf:d grf:[u4,s4] grf:[u4,s4] */
843 };
844
845 static const uint32_t gfx12_3src_subreg_table[32] = {
846 0b00000000000000000000, /* .0 .0 .0 .0 */
847 0b00100000000000000000, /* .0 .0 .0 .4 */
848 0b00000000000110000000, /* .0 .12 .0 .0 */
849 0b10100000000000000000, /* .0 .0 .0 .20 */
850 0b10000000001110000000, /* .0 .28 .0 .16 */
851 0b01100000000000000000, /* .0 .0 .0 .12 */
852 0b01000000000000000000, /* .0 .0 .0 .8 */
853 0b00000010000000000000, /* .0 .0 .8 .0 */
854 0b00000001000000000000, /* .0 .0 .4 .0 */
855 0b11000000000000000000, /* .0 .0 .0 .24 */
856 0b10000000000000000000, /* .0 .0 .0 .16 */
857 0b11100000000000000000, /* .0 .0 .0 .28 */
858 0b00000110000000000000, /* .0 .0 .24 .0 */
859 0b00000000000010000000, /* .0 .4 .0 .0 */
860 0b00000100000000000000, /* .0 .0 .16 .0 */
861 0b00000011000000000000, /* .0 .0 .12 .0 */
862 0b00000101000000000000, /* .0 .0 .20 .0 */
863 0b00000111000000000000, /* .0 .0 .28 .0 */
864 0b00000000000100000000, /* .0 .8 .0 .0 */
865 0b00000000001000000000, /* .0 .16 .0 .0 */
866 0b00000000001100000000, /* .0 .24 .0 .0 */
867 0b00000000001010000000, /* .0 .20 .0 .0 */
868 0b00000000001110000000, /* .0 .28 .0 .0 */
869 0b11000000001110000000, /* .0 .28 .0 .24 */
870 0b00100000000100000000, /* .0 .8 .0 .4 */
871 0b00100000000110000000, /* .0 .12 .0 .4 */
872 0b01000000000110000000, /* .0 .12 .0 .8 */
873 0b10000000001100000000, /* .0 .24 .0 .16 */
874 0b10000000001010000000, /* .0 .20 .0 .16 */
875 0b01100000000010000000, /* .0 .4 .0 .12 */
876 0b10100000001110000000, /* .0 .28 .0 .20 */
877 0b01000000000010000000, /* .0 .4 .0 .8 */
878 };
879
880 static const uint32_t xe2_3src_subreg_table[32] = {
881 0b00000000000000000000, /* .0 .0 .0 .0 */
882 0b00100000000000000000, /* .0 .0 .0 .8 */
883 0b10000000000000000000, /* .0 .0 .0 .32 */
884 0b00010000000000000000, /* .0 .0 .0 .4 */
885 0b11100000000000000000, /* .0 .0 .0 .56 */
886 0b01010000000000000000, /* .0 .0 .0 .20 */
887 0b10110000000000000000, /* .0 .0 .0 .44 */
888 0b01000000000011000000, /* .0 .12 .0 .16 */
889 0b01100000000000000000, /* .0 .0 .0 .24 */
890 0b10100000000000000000, /* .0 .0 .0 .40 */
891 0b11000000000000000000, /* .0 .0 .0 .48 */
892 0b01000000000000000000, /* .0 .0 .0 .16 */
893 0b01110000000110000000, /* .0 .24 .0 .28 */
894 0b10100000001001000000, /* .0 .36 .0 .40 */
895 0b11010000001100000000, /* .0 .48 .0 .52 */
896 0b01110000000000000000, /* .0 .0 .0 .28 */
897 0b11110000000000000000, /* .0 .0 .0 .60 */
898 0b10010000000000000000, /* .0 .0 .0 .36 */
899 0b00110000000000000000, /* .0 .0 .0 .12 */
900 0b00100000000010000000, /* .0 .8 .0 .8 */
901 0b00010000000001000000, /* .0 .4 .0 .4 */
902 0b00110000000011000000, /* .0 .12 .0 .12 */
903 0b11010000000000000000, /* .0 .0 .0 .52 */
904 0b00000000000001000000, /* .0 .4 .0 .0 */
905 0b00000101100000000000, /* .0 .0 .44 .0 */
906 0b00000100000000000000, /* .0 .0 .32 .0 */
907 0b00000000000010000000, /* .0 .8 .0 .0 */
908 0b00000000001100000000, /* .0 .48 .0 .0 */
909 0b00000000001101000000, /* .0 .52 .0 .0 */
910 0b00000110100000000000, /* .0 .0 .52 .0 */
911 0b00000000001000000000, /* .0 .32 .0 .0 */
912 0b00000000001111000000, /* .0 .60 .0 .0 */
913 };
914
915 struct compaction_state {
916 const struct brw_isa_info *isa;
917 const uint32_t *control_index_table;
918 const uint32_t *datatype_table;
919 const uint16_t *subreg_table;
920 const uint16_t *src0_index_table;
921 const uint16_t *src1_index_table;
922 };
923
924 static void compaction_state_init(struct compaction_state *c,
925 const struct brw_isa_info *isa);
926
927 static bool
set_control_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src)928 set_control_index(const struct compaction_state *c,
929 brw_compact_inst *dst, const brw_inst *src)
930 {
931 const struct intel_device_info *devinfo = c->isa->devinfo;
932 uint32_t uncompacted; /* 19b/IVB+; 21b/TGL+ */
933
934 if (devinfo->ver >= 20) {
935 uncompacted = (brw_inst_bits(src, 95, 92) << 14) | /* 4b */
936 (brw_inst_bits(src, 34, 34) << 13) | /* 1b */
937 (brw_inst_bits(src, 32, 32) << 12) | /* 1b */
938 (brw_inst_bits(src, 31, 31) << 11) | /* 1b */
939 (brw_inst_bits(src, 28, 28) << 10) | /* 1b */
940 (brw_inst_bits(src, 27, 26) << 8) | /* 2b */
941 (brw_inst_bits(src, 25, 24) << 6) | /* 2b */
942 (brw_inst_bits(src, 23, 21) << 3) | /* 3b */
943 (brw_inst_bits(src, 20, 18)); /* 3b */
944 } else if (devinfo->ver >= 12) {
945 uncompacted = (brw_inst_bits(src, 95, 92) << 17) | /* 4b */
946 (brw_inst_bits(src, 34, 34) << 16) | /* 1b */
947 (brw_inst_bits(src, 33, 33) << 15) | /* 1b */
948 (brw_inst_bits(src, 32, 32) << 14) | /* 1b */
949 (brw_inst_bits(src, 31, 31) << 13) | /* 1b */
950 (brw_inst_bits(src, 28, 28) << 12) | /* 1b */
951 (brw_inst_bits(src, 27, 24) << 8) | /* 4b */
952 (brw_inst_bits(src, 23, 22) << 6) | /* 2b */
953 (brw_inst_bits(src, 21, 19) << 3) | /* 3b */
954 (brw_inst_bits(src, 18, 16)); /* 3b */
955 } else {
956 uncompacted = (brw_inst_bits(src, 33, 31) << 16) | /* 3b */
957 (brw_inst_bits(src, 23, 12) << 4) | /* 12b */
958 (brw_inst_bits(src, 10, 9) << 2) | /* 2b */
959 (brw_inst_bits(src, 34, 34) << 1) | /* 1b */
960 (brw_inst_bits(src, 8, 8)); /* 1b */
961 }
962
963 for (int i = 0; i < 32; i++) {
964 if (c->control_index_table[i] == uncompacted) {
965 brw_compact_inst_set_control_index(devinfo, dst, i);
966 return true;
967 }
968 }
969
970 return false;
971 }
972
973 static bool
set_datatype_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src,bool is_immediate)974 set_datatype_index(const struct compaction_state *c, brw_compact_inst *dst,
975 const brw_inst *src, bool is_immediate)
976 {
977 const struct intel_device_info *devinfo = c->isa->devinfo;
978 uint32_t uncompacted; /* 18b/G45+; 21b/BDW+; 20b/TGL+ */
979
980 if (devinfo->ver >= 12) {
981 uncompacted = (brw_inst_bits(src, 91, 88) << 15) | /* 4b */
982 (brw_inst_bits(src, 66, 66) << 14) | /* 1b */
983 (brw_inst_bits(src, 50, 50) << 13) | /* 1b */
984 (brw_inst_bits(src, 49, 48) << 11) | /* 2b */
985 (brw_inst_bits(src, 47, 47) << 10) | /* 1b */
986 (brw_inst_bits(src, 46, 46) << 9) | /* 1b */
987 (brw_inst_bits(src, 43, 40) << 5) | /* 4b */
988 (brw_inst_bits(src, 39, 36) << 1) | /* 4b */
989 (brw_inst_bits(src, 35, 35)); /* 1b */
990
991 /* Src1.RegFile overlaps with the immediate, so ignore it if an immediate
992 * is present
993 */
994 if (!is_immediate) {
995 uncompacted |= brw_inst_bits(src, 98, 98) << 19; /* 1b */
996 }
997 } else {
998 uncompacted = (brw_inst_bits(src, 63, 61) << 18) | /* 3b */
999 (brw_inst_bits(src, 94, 89) << 12) | /* 6b */
1000 (brw_inst_bits(src, 46, 35)); /* 12b */
1001 }
1002
1003 for (int i = 0; i < 32; i++) {
1004 if (c->datatype_table[i] == uncompacted) {
1005 brw_compact_inst_set_datatype_index(devinfo, dst, i);
1006 return true;
1007 }
1008 }
1009
1010 return false;
1011 }
1012
1013 static bool
set_subreg_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src,bool is_immediate)1014 set_subreg_index(const struct compaction_state *c, brw_compact_inst *dst,
1015 const brw_inst *src, bool is_immediate)
1016 {
1017 const struct intel_device_info *devinfo = c->isa->devinfo;
1018 const unsigned table_len = devinfo->ver >= 20 ?
1019 ARRAY_SIZE(xe2_subreg_table) : ARRAY_SIZE(g45_subreg_table);
1020 uint16_t uncompacted; /* 15b/G45+; 12b/Xe2+ */
1021
1022 if (devinfo->ver >= 20) {
1023 uncompacted = (brw_inst_bits(src, 33, 33) << 0) | /* 1b */
1024 (brw_inst_bits(src, 55, 51) << 1) | /* 5b */
1025 (brw_inst_bits(src, 71, 67) << 6) | /* 5b */
1026 (brw_inst_bits(src, 87, 87) << 11); /* 1b */
1027 } else if (devinfo->ver >= 12) {
1028 uncompacted = (brw_inst_bits(src, 55, 51) << 0) | /* 5b */
1029 (brw_inst_bits(src, 71, 67) << 5); /* 5b */
1030
1031 if (!is_immediate)
1032 uncompacted |= brw_inst_bits(src, 103, 99) << 10; /* 5b */
1033 } else {
1034 uncompacted = (brw_inst_bits(src, 52, 48) << 0) | /* 5b */
1035 (brw_inst_bits(src, 68, 64) << 5); /* 5b */
1036
1037 if (!is_immediate)
1038 uncompacted |= brw_inst_bits(src, 100, 96) << 10; /* 5b */
1039 }
1040
1041 for (int i = 0; i < table_len; i++) {
1042 if (c->subreg_table[i] == uncompacted) {
1043 brw_compact_inst_set_subreg_index(devinfo, dst, i);
1044 return true;
1045 }
1046 }
1047
1048 return false;
1049 }
1050
1051 static bool
set_src0_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src)1052 set_src0_index(const struct compaction_state *c, brw_compact_inst *dst,
1053 const brw_inst *src)
1054 {
1055 const struct intel_device_info *devinfo = c->isa->devinfo;
1056 uint16_t uncompacted; /* 12b/G45+; 11b/Xe2+ */
1057 int table_len;
1058
1059 if (devinfo->ver >= 12) {
1060 table_len = (devinfo->ver >= 20 ? ARRAY_SIZE(xe2_src0_index_table) :
1061 ARRAY_SIZE(gfx12_src0_index_table));
1062 uncompacted = (devinfo->ver >= 20 ? 0 :
1063 brw_inst_bits(src, 87, 87) << 11) | /* 1b */
1064 (brw_inst_bits(src, 86, 84) << 8) | /* 3b */
1065 (brw_inst_bits(src, 83, 81) << 5) | /* 3b */
1066 (brw_inst_bits(src, 80, 80) << 4) | /* 1b */
1067 (brw_inst_bits(src, 65, 64) << 2) | /* 2b */
1068 (brw_inst_bits(src, 45, 44)); /* 2b */
1069 } else {
1070 table_len = ARRAY_SIZE(gfx8_src_index_table);
1071 uncompacted = brw_inst_bits(src, 88, 77); /* 12b */
1072 }
1073
1074 for (int i = 0; i < table_len; i++) {
1075 if (c->src0_index_table[i] == uncompacted) {
1076 brw_compact_inst_set_src0_index(devinfo, dst, i);
1077 return true;
1078 }
1079 }
1080
1081 return false;
1082 }
1083
1084 static bool
set_src1_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src,bool is_immediate,unsigned imm)1085 set_src1_index(const struct compaction_state *c, brw_compact_inst *dst,
1086 const brw_inst *src, bool is_immediate, unsigned imm)
1087 {
1088 const struct intel_device_info *devinfo = c->isa->devinfo;
1089 if (is_immediate) {
1090 if (devinfo->ver >= 12) {
1091 /* src1 index takes the low 4 bits of the 12-bit compacted value */
1092 brw_compact_inst_set_src1_index(devinfo, dst, imm & 0xf);
1093 } else {
1094 /* src1 index takes the high 5 bits of the 13-bit compacted value */
1095 brw_compact_inst_set_src1_index(devinfo, dst, imm >> 8);
1096 }
1097 return true;
1098 } else {
1099 uint16_t uncompacted; /* 12b/G45+ 16b/Xe2+ */
1100 int table_len;
1101
1102 if (devinfo->ver >= 20) {
1103 table_len = ARRAY_SIZE(xe2_src1_index_table);
1104 uncompacted = (brw_inst_bits(src, 121, 120) << 14) | /* 2b */
1105 (brw_inst_bits(src, 118, 116) << 11) | /* 3b */
1106 (brw_inst_bits(src, 115, 113) << 8) | /* 3b */
1107 (brw_inst_bits(src, 112, 112) << 7) | /* 1b */
1108 (brw_inst_bits(src, 103, 99) << 2) | /* 5b */
1109 (brw_inst_bits(src, 97, 96)); /* 2b */
1110 } else if (devinfo->ver >= 12) {
1111 table_len = ARRAY_SIZE(gfx12_src0_index_table);
1112 uncompacted = (brw_inst_bits(src, 121, 120) << 10) | /* 2b */
1113 (brw_inst_bits(src, 119, 116) << 6) | /* 4b */
1114 (brw_inst_bits(src, 115, 113) << 3) | /* 3b */
1115 (brw_inst_bits(src, 112, 112) << 2) | /* 1b */
1116 (brw_inst_bits(src, 97, 96)); /* 2b */
1117 } else {
1118 table_len = ARRAY_SIZE(gfx8_src_index_table);
1119 uncompacted = brw_inst_bits(src, 120, 109); /* 12b */
1120 }
1121
1122 for (int i = 0; i < table_len; i++) {
1123 if (c->src1_index_table[i] == uncompacted) {
1124 brw_compact_inst_set_src1_index(devinfo, dst, i);
1125 return true;
1126 }
1127 }
1128 }
1129
1130 return false;
1131 }
1132
1133 static bool
set_3src_control_index(const struct intel_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src,bool is_dpas)1134 set_3src_control_index(const struct intel_device_info *devinfo,
1135 brw_compact_inst *dst, const brw_inst *src,
1136 bool is_dpas)
1137 {
1138 if (devinfo->ver >= 20) {
1139 assert(is_dpas || !brw_inst_bits(src, 49, 49));
1140
1141 const uint64_t uncompacted = /* 34b/Xe2+ */
1142 (brw_inst_bits(src, 95, 92) << 30) | /* 4b */
1143 (brw_inst_bits(src, 90, 88) << 27) | /* 3b */
1144 (brw_inst_bits(src, 82, 80) << 24) | /* 3b */
1145 (brw_inst_bits(src, 50, 50) << 23) | /* 1b */
1146 (brw_inst_bits(src, 49, 48) << 21) | /* 2b */
1147 (brw_inst_bits(src, 42, 40) << 18) | /* 3b */
1148 (brw_inst_bits(src, 39, 39) << 17) | /* 1b */
1149 (brw_inst_bits(src, 38, 36) << 14) | /* 3b */
1150 (brw_inst_bits(src, 34, 34) << 13) | /* 1b */
1151 (brw_inst_bits(src, 32, 32) << 12) | /* 1b */
1152 (brw_inst_bits(src, 31, 31) << 11) | /* 1b */
1153 (brw_inst_bits(src, 28, 28) << 10) | /* 1b */
1154 (brw_inst_bits(src, 27, 26) << 8) | /* 2b */
1155 (brw_inst_bits(src, 25, 24) << 6) | /* 2b */
1156 (brw_inst_bits(src, 23, 21) << 3) | /* 3b */
1157 (brw_inst_bits(src, 20, 18)); /* 3b */
1158
1159 /* The bits used to index the tables for 3src and 3src-dpas
1160 * are the same, so just need to pick the right one.
1161 */
1162 const uint64_t *table = is_dpas ? xe2_3src_dpas_control_index_table :
1163 xe2_3src_control_index_table;
1164 const unsigned size = is_dpas ? ARRAY_SIZE(xe2_3src_dpas_control_index_table) :
1165 ARRAY_SIZE(xe2_3src_control_index_table);
1166 for (unsigned i = 0; i < size; i++) {
1167 if (table[i] == uncompacted) {
1168 brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1169 return true;
1170 }
1171 }
1172 } else if (devinfo->verx10 >= 125) {
1173 uint64_t uncompacted = /* 37b/XeHP+ */
1174 (brw_inst_bits(src, 95, 92) << 33) | /* 4b */
1175 (brw_inst_bits(src, 90, 88) << 30) | /* 3b */
1176 (brw_inst_bits(src, 82, 80) << 27) | /* 3b */
1177 (brw_inst_bits(src, 50, 50) << 26) | /* 1b */
1178 (brw_inst_bits(src, 49, 48) << 24) | /* 2b */
1179 (brw_inst_bits(src, 42, 40) << 21) | /* 3b */
1180 (brw_inst_bits(src, 39, 39) << 20) | /* 1b */
1181 (brw_inst_bits(src, 38, 36) << 17) | /* 3b */
1182 (brw_inst_bits(src, 34, 34) << 16) | /* 1b */
1183 (brw_inst_bits(src, 33, 33) << 15) | /* 1b */
1184 (brw_inst_bits(src, 32, 32) << 14) | /* 1b */
1185 (brw_inst_bits(src, 31, 31) << 13) | /* 1b */
1186 (brw_inst_bits(src, 28, 28) << 12) | /* 1b */
1187 (brw_inst_bits(src, 27, 24) << 8) | /* 4b */
1188 (brw_inst_bits(src, 23, 23) << 7) | /* 1b */
1189 (brw_inst_bits(src, 22, 22) << 6) | /* 1b */
1190 (brw_inst_bits(src, 21, 19) << 3) | /* 3b */
1191 (brw_inst_bits(src, 18, 16)); /* 3b */
1192
1193 for (unsigned i = 0; i < ARRAY_SIZE(xehp_3src_control_index_table); i++) {
1194 if (xehp_3src_control_index_table[i] == uncompacted) {
1195 brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1196 return true;
1197 }
1198 }
1199 } else if (devinfo->ver >= 12) {
1200 uint64_t uncompacted = /* 36b/TGL+ */
1201 (brw_inst_bits(src, 95, 92) << 32) | /* 4b */
1202 (brw_inst_bits(src, 90, 88) << 29) | /* 3b */
1203 (brw_inst_bits(src, 82, 80) << 26) | /* 3b */
1204 (brw_inst_bits(src, 50, 50) << 25) | /* 1b */
1205 (brw_inst_bits(src, 48, 48) << 24) | /* 1b */
1206 (brw_inst_bits(src, 42, 40) << 21) | /* 3b */
1207 (brw_inst_bits(src, 39, 39) << 20) | /* 1b */
1208 (brw_inst_bits(src, 38, 36) << 17) | /* 3b */
1209 (brw_inst_bits(src, 34, 34) << 16) | /* 1b */
1210 (brw_inst_bits(src, 33, 33) << 15) | /* 1b */
1211 (brw_inst_bits(src, 32, 32) << 14) | /* 1b */
1212 (brw_inst_bits(src, 31, 31) << 13) | /* 1b */
1213 (brw_inst_bits(src, 28, 28) << 12) | /* 1b */
1214 (brw_inst_bits(src, 27, 24) << 8) | /* 4b */
1215 (brw_inst_bits(src, 23, 23) << 7) | /* 1b */
1216 (brw_inst_bits(src, 22, 22) << 6) | /* 1b */
1217 (brw_inst_bits(src, 21, 19) << 3) | /* 3b */
1218 (brw_inst_bits(src, 18, 16)); /* 3b */
1219
1220 for (unsigned i = 0; i < ARRAY_SIZE(gfx12_3src_control_index_table); i++) {
1221 if (gfx12_3src_control_index_table[i] == uncompacted) {
1222 brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1223 return true;
1224 }
1225 }
1226 } else {
1227 uint32_t uncompacted = /* 26b/SKL+ */
1228 (brw_inst_bits(src, 36, 35) << 24) | /* 2b */
1229 (brw_inst_bits(src, 34, 32) << 21) | /* 3b */
1230 (brw_inst_bits(src, 28, 8)); /* 21b */
1231
1232 for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_control_index_table); i++) {
1233 if (gfx8_3src_control_index_table[i] == uncompacted) {
1234 brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1235 return true;
1236 }
1237 }
1238 }
1239
1240 return false;
1241 }
1242
1243 static bool
set_3src_source_index(const struct intel_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src,bool is_dpas)1244 set_3src_source_index(const struct intel_device_info *devinfo,
1245 brw_compact_inst *dst, const brw_inst *src,
1246 bool is_dpas)
1247 {
1248 if (devinfo->ver >= 12) {
1249 uint32_t uncompacted = /* 21b/TGL+ */
1250 (brw_inst_bits(src, 114, 114) << 20) | /* 1b */
1251 (brw_inst_bits(src, 113, 112) << 18) | /* 2b */
1252 (brw_inst_bits(src, 98, 98) << 17) | /* 1b */
1253 (brw_inst_bits(src, 97, 96) << 15) | /* 2b */
1254 (brw_inst_bits(src, 91, 91) << 14) | /* 1b */
1255 (brw_inst_bits(src, 87, 86) << 12) | /* 2b */
1256 (brw_inst_bits(src, 85, 84) << 10) | /* 2b */
1257 (brw_inst_bits(src, 83, 83) << 9) | /* 1b */
1258 (brw_inst_bits(src, 66, 66) << 8) | /* 1b */
1259 (brw_inst_bits(src, 65, 64) << 6) | /* 2b */
1260 (brw_inst_bits(src, 47, 47) << 5) | /* 1b */
1261 (brw_inst_bits(src, 46, 46) << 4) | /* 1b */
1262 (brw_inst_bits(src, 45, 44) << 2) | /* 2b */
1263 (brw_inst_bits(src, 43, 43) << 1) | /* 1b */
1264 (brw_inst_bits(src, 35, 35)); /* 1b */
1265
1266 /* In Xe2, the bits used to index the tables for 3src and 3src-dpas
1267 * are the same, so just need to pick the right one.
1268 */
1269 const uint32_t *three_src_source_index_table =
1270 devinfo->ver >= 20 ? (is_dpas ? xe2_3src_dpas_source_index_table :
1271 xe2_3src_source_index_table) :
1272 devinfo->verx10 >= 125 ? xehp_3src_source_index_table :
1273 gfx12_3src_source_index_table;
1274 const uint32_t three_src_source_index_table_len =
1275 devinfo->ver >= 20 ? (is_dpas ? ARRAY_SIZE(xe2_3src_dpas_source_index_table) :
1276 ARRAY_SIZE(xe2_3src_source_index_table)) :
1277 devinfo->verx10 >= 125 ? ARRAY_SIZE(xehp_3src_source_index_table) :
1278 ARRAY_SIZE(gfx12_3src_source_index_table);
1279
1280 for (unsigned i = 0; i < three_src_source_index_table_len; i++) {
1281 if (three_src_source_index_table[i] == uncompacted) {
1282 brw_compact_inst_set_3src_source_index(devinfo, dst, i);
1283 return true;
1284 }
1285 }
1286 } else {
1287 uint64_t uncompacted = /* 49b/SKL+ */
1288 (brw_inst_bits(src, 126, 125) << 47) | /* 2b */
1289 (brw_inst_bits(src, 105, 104) << 45) | /* 2b */
1290 (brw_inst_bits(src, 84, 84) << 44) | /* 1b */
1291 (brw_inst_bits(src, 83, 83) << 43) | /* 1b */
1292 (brw_inst_bits(src, 114, 107) << 35) | /* 8b */
1293 (brw_inst_bits(src, 93, 86) << 27) | /* 8b */
1294 (brw_inst_bits(src, 72, 65) << 19) | /* 8b */
1295 (brw_inst_bits(src, 55, 37)); /* 19b */
1296
1297 for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_source_index_table); i++) {
1298 if (gfx8_3src_source_index_table[i] == uncompacted) {
1299 brw_compact_inst_set_3src_source_index(devinfo, dst, i);
1300 return true;
1301 }
1302 }
1303 }
1304
1305 return false;
1306 }
1307
1308 static bool
set_3src_subreg_index(const struct intel_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src)1309 set_3src_subreg_index(const struct intel_device_info *devinfo,
1310 brw_compact_inst *dst, const brw_inst *src)
1311 {
1312 assert(devinfo->ver >= 12);
1313
1314 uint32_t uncompacted = /* 20b/TGL+ */
1315 (brw_inst_bits(src, 119, 115) << 15) | /* 5b */
1316 (brw_inst_bits(src, 103, 99) << 10) | /* 5b */
1317 (brw_inst_bits(src, 71, 67) << 5) | /* 5b */
1318 (brw_inst_bits(src, 55, 51)); /* 5b */
1319
1320 const uint32_t *table = devinfo->ver >= 20 ? xe2_3src_subreg_table :
1321 gfx12_3src_subreg_table;
1322 const uint32_t len =
1323 devinfo->ver >= 20 ? ARRAY_SIZE(xe2_3src_subreg_table) :
1324 ARRAY_SIZE(gfx12_3src_subreg_table);
1325
1326 for (unsigned i = 0; i < len; i++) {
1327 if (table[i] == uncompacted) {
1328 brw_compact_inst_set_3src_subreg_index(devinfo, dst, i);
1329 return true;
1330 }
1331 }
1332
1333 return false;
1334 }
1335
1336 static bool
has_unmapped_bits(const struct brw_isa_info * isa,const brw_inst * src)1337 has_unmapped_bits(const struct brw_isa_info *isa, const brw_inst *src)
1338 {
1339 const struct intel_device_info *devinfo = isa->devinfo;
1340
1341 /* EOT can only be mapped on a send if the src1 is an immediate */
1342 if ((brw_inst_opcode(isa, src) == BRW_OPCODE_SENDC ||
1343 brw_inst_opcode(isa, src) == BRW_OPCODE_SEND) &&
1344 brw_inst_eot(devinfo, src))
1345 return true;
1346
1347 /* Check for instruction bits that don't map to any of the fields of the
1348 * compacted instruction. The instruction cannot be compacted if any of
1349 * them are set. They overlap with:
1350 * - NibCtrl (bit 11 on Gfx8)
1351 * - Dst.AddrImm[9] (bit 47 on Gfx8)
1352 * - Src0.AddrImm[9] (bit 95 on Gfx8)
1353 * - Imm64[27:31] (bit 95 on Gfx8)
1354 * - UIP[31] (bit 95 on Gfx8)
1355 */
1356 if (devinfo->ver >= 12) {
1357 assert(!brw_inst_bits(src, 7, 7));
1358 return false;
1359 } else {
1360 assert(!brw_inst_bits(src, 7, 7));
1361 return brw_inst_bits(src, 95, 95) ||
1362 brw_inst_bits(src, 47, 47) ||
1363 brw_inst_bits(src, 11, 11);
1364 }
1365 }
1366
1367 static bool
has_3src_unmapped_bits(const struct intel_device_info * devinfo,const brw_inst * src,bool is_dpas)1368 has_3src_unmapped_bits(const struct intel_device_info *devinfo,
1369 const brw_inst *src, bool is_dpas)
1370 {
1371 /* Check for three-source instruction bits that don't map to any of the
1372 * fields of the compacted instruction. All of them seem to be reserved
1373 * bits currently.
1374 */
1375 if (devinfo->ver >= 20) {
1376 assert(is_dpas || !brw_inst_bits(src, 49, 49));
1377 assert(!brw_inst_bits(src, 33, 33));
1378 assert(!brw_inst_bits(src, 7, 7));
1379 } else if (devinfo->ver >= 12) {
1380 assert(is_dpas || !brw_inst_bits(src, 49, 49));
1381 assert(!brw_inst_bits(src, 7, 7));
1382 } else {
1383 assert(!brw_inst_bits(src, 127, 127) &&
1384 !brw_inst_bits(src, 7, 7));
1385 }
1386
1387 return false;
1388 }
1389
1390 static bool
brw_try_compact_3src_instruction(const struct brw_isa_info * isa,brw_compact_inst * dst,const brw_inst * src)1391 brw_try_compact_3src_instruction(const struct brw_isa_info *isa,
1392 brw_compact_inst *dst, const brw_inst *src)
1393 {
1394 const struct intel_device_info *devinfo = isa->devinfo;
1395
1396 bool is_dpas = brw_inst_opcode(isa, src) == BRW_OPCODE_DPAS;
1397 if (has_3src_unmapped_bits(devinfo, src, is_dpas))
1398 return false;
1399
1400 #define compact(field) \
1401 brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_##field(devinfo, src))
1402 #define compact_a16(field) \
1403 brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_a16_##field(devinfo, src))
1404
1405 compact(hw_opcode);
1406
1407 if (!set_3src_control_index(devinfo, dst, src, is_dpas))
1408 return false;
1409
1410 if (!set_3src_source_index(devinfo, dst, src, is_dpas))
1411 return false;
1412
1413 if (devinfo->ver >= 12) {
1414 if (!set_3src_subreg_index(devinfo, dst, src))
1415 return false;
1416
1417 compact(swsb);
1418 compact(debug_control);
1419 compact(dst_reg_nr);
1420 compact(src0_reg_nr);
1421 compact(src1_reg_nr);
1422 compact(src2_reg_nr);
1423 } else {
1424 compact(dst_reg_nr);
1425 compact_a16(src0_rep_ctrl);
1426 compact(debug_control);
1427 compact(saturate);
1428 compact_a16(src1_rep_ctrl);
1429 compact_a16(src2_rep_ctrl);
1430 compact(src0_reg_nr);
1431 compact(src1_reg_nr);
1432 compact(src2_reg_nr);
1433 compact_a16(src0_subreg_nr);
1434 compact_a16(src1_subreg_nr);
1435 compact_a16(src2_subreg_nr);
1436 }
1437 brw_compact_inst_set_3src_cmpt_control(devinfo, dst, true);
1438
1439 #undef compact
1440 #undef compact_a16
1441
1442 return true;
1443 }
1444
1445 /* On SNB through ICL, compacted instructions have 12-bits for immediate
1446 * sources, and a 13th bit that's replicated through the high 20 bits.
1447 *
1448 * Effectively this means we get 12-bit integers, 0.0f, and some limited uses
1449 * of packed vectors as compactable immediates.
1450 *
1451 * On TGL+, the high 12-bits of floating-point values (:f and :hf) are encoded
1452 * rather than the low 12-bits. For signed integer the 12th bit is replicated,
1453 * while for unsigned integers it is not.
1454 *
1455 * Returns the compacted immediate, or -1 if immediate cannot be compacted
1456 */
1457 static int
compact_immediate(const struct intel_device_info * devinfo,enum brw_reg_type type,unsigned imm)1458 compact_immediate(const struct intel_device_info *devinfo,
1459 enum brw_reg_type type, unsigned imm)
1460 {
1461 if (devinfo->ver >= 12) {
1462 /* 16-bit immediates need to be replicated through the 32-bit immediate
1463 * field
1464 */
1465 switch (type) {
1466 case BRW_TYPE_W:
1467 case BRW_TYPE_UW:
1468 case BRW_TYPE_HF:
1469 if ((imm >> 16) != (imm & 0xffff))
1470 return -1;
1471 break;
1472 default:
1473 break;
1474 }
1475
1476 switch (type) {
1477 case BRW_TYPE_F:
1478 /* We get the high 12-bits as-is; rest must be zero */
1479 if ((imm & 0xfffff) == 0)
1480 return (imm >> 20) & 0xfff;
1481 break;
1482 case BRW_TYPE_HF:
1483 /* We get the high 12-bits as-is; rest must be zero */
1484 if ((imm & 0xf) == 0)
1485 return (imm >> 4) & 0xfff;
1486 break;
1487 case BRW_TYPE_UD:
1488 case BRW_TYPE_VF:
1489 case BRW_TYPE_UV:
1490 case BRW_TYPE_V:
1491 /* We get the low 12-bits as-is; rest must be zero */
1492 if ((imm & 0xfffff000) == 0)
1493 return imm & 0xfff;
1494 break;
1495 case BRW_TYPE_UW:
1496 /* We get the low 12-bits as-is; rest must be zero */
1497 if ((imm & 0xf000) == 0)
1498 return imm & 0xfff;
1499 break;
1500 case BRW_TYPE_D:
1501 /* We get the low 11-bits as-is; 12th is replicated */
1502 if (((int)imm >> 11) == 0 || ((int)imm >> 11) == -1)
1503 return imm & 0xfff;
1504 break;
1505 case BRW_TYPE_W:
1506 /* We get the low 11-bits as-is; 12th is replicated */
1507 if (((short)imm >> 11) == 0 || ((short)imm >> 11) == -1)
1508 return imm & 0xfff;
1509 break;
1510 case BRW_TYPE_DF:
1511 case BRW_TYPE_Q:
1512 case BRW_TYPE_UQ:
1513 case BRW_TYPE_B:
1514 case BRW_TYPE_UB:
1515 default:
1516 return -1;
1517 }
1518 } else {
1519 /* We get the low 12 bits as-is; 13th is replicated */
1520 if (((int)imm >> 12) == 0 || ((int)imm >> 12 == -1)) {
1521 return imm & 0x1fff;
1522 }
1523 }
1524
1525 return -1;
1526 }
1527
1528 static int
uncompact_immediate(const struct intel_device_info * devinfo,enum brw_reg_type type,unsigned compact_imm)1529 uncompact_immediate(const struct intel_device_info *devinfo,
1530 enum brw_reg_type type, unsigned compact_imm)
1531 {
1532 if (devinfo->ver >= 12) {
1533 switch (type) {
1534 case BRW_TYPE_F:
1535 return compact_imm << 20;
1536 case BRW_TYPE_HF:
1537 return (compact_imm << 20) | (compact_imm << 4);
1538 case BRW_TYPE_UD:
1539 case BRW_TYPE_VF:
1540 case BRW_TYPE_UV:
1541 case BRW_TYPE_V:
1542 return compact_imm;
1543 case BRW_TYPE_UW:
1544 /* Replicate */
1545 return compact_imm << 16 | compact_imm;
1546 case BRW_TYPE_D:
1547 /* Extend the 12th bit into the high 20 bits */
1548 return (int)(compact_imm << 20) >> 20;
1549 case BRW_TYPE_W:
1550 /* Extend the 12th bit into the high 4 bits and replicate */
1551 return ((int)(compact_imm << 20) >> 4) |
1552 ((unsigned short)((short)(compact_imm << 4) >> 4));
1553 case BRW_TYPE_DF:
1554 case BRW_TYPE_Q:
1555 case BRW_TYPE_UQ:
1556 case BRW_TYPE_B:
1557 case BRW_TYPE_UB:
1558 unreachable("not reached");
1559 default:
1560 unreachable("invalid type");
1561 }
1562 } else {
1563 /* Replicate the 13th bit into the high 19 bits */
1564 return (int)(compact_imm << 19) >> 19;
1565 }
1566
1567 unreachable("not reached");
1568 }
1569
1570 static bool
has_immediate(const struct intel_device_info * devinfo,const brw_inst * inst,enum brw_reg_type * type)1571 has_immediate(const struct intel_device_info *devinfo, const brw_inst *inst,
1572 enum brw_reg_type *type)
1573 {
1574 if (brw_inst_src0_reg_file(devinfo, inst) == IMM) {
1575 *type = brw_inst_src0_type(devinfo, inst);
1576 return *type != BRW_TYPE_INVALID;
1577 } else if (brw_inst_src1_reg_file(devinfo, inst) == IMM) {
1578 *type = brw_inst_src1_type(devinfo, inst);
1579 return *type != BRW_TYPE_INVALID;
1580 }
1581
1582 return false;
1583 }
1584
1585 /**
1586 * Applies some small changes to instruction types to increase chances of
1587 * compaction.
1588 */
1589 static brw_inst
precompact(const struct brw_isa_info * isa,brw_inst inst)1590 precompact(const struct brw_isa_info *isa, brw_inst inst)
1591 {
1592 const struct intel_device_info *devinfo = isa->devinfo;
1593
1594 /* In XeHP the compaction tables removed the entries for source regions
1595 * <8;8,1> giving preference to <1;1,0> as the way to indicate
1596 * sequential elements, so convert to those before compacting.
1597 */
1598 if (devinfo->verx10 >= 125) {
1599 if (brw_inst_src0_reg_file(devinfo, &inst) == FIXED_GRF &&
1600 brw_inst_src0_vstride(devinfo, &inst) > BRW_VERTICAL_STRIDE_1 &&
1601 brw_inst_src0_vstride(devinfo, &inst) == (brw_inst_src0_width(devinfo, &inst) + 1) &&
1602 brw_inst_src0_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) {
1603 brw_inst_set_src0_vstride(devinfo, &inst, BRW_VERTICAL_STRIDE_1);
1604 brw_inst_set_src0_width(devinfo, &inst, BRW_WIDTH_1);
1605 brw_inst_set_src0_hstride(devinfo, &inst, BRW_HORIZONTAL_STRIDE_0);
1606 }
1607
1608 if (brw_inst_src1_reg_file(devinfo, &inst) == FIXED_GRF &&
1609 brw_inst_src1_vstride(devinfo, &inst) > BRW_VERTICAL_STRIDE_1 &&
1610 brw_inst_src1_vstride(devinfo, &inst) == (brw_inst_src1_width(devinfo, &inst) + 1) &&
1611 brw_inst_src1_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) {
1612 brw_inst_set_src1_vstride(devinfo, &inst, BRW_VERTICAL_STRIDE_1);
1613 brw_inst_set_src1_width(devinfo, &inst, BRW_WIDTH_1);
1614 brw_inst_set_src1_hstride(devinfo, &inst, BRW_HORIZONTAL_STRIDE_0);
1615 }
1616 }
1617
1618 if (brw_inst_src0_reg_file(devinfo, &inst) != IMM)
1619 return inst;
1620
1621 /* The Bspec's section titled "Non-present Operands" claims that if src0
1622 * is an immediate that src1's type must be the same as that of src0.
1623 *
1624 * The SNB+ DataTypeIndex instruction compaction tables contain mappings
1625 * that do not follow this rule. E.g., from the IVB/HSW table:
1626 *
1627 * DataTypeIndex 18-Bit Mapping Mapped Meaning
1628 * 3 001000001011111101 r:f | i:vf | a:ud | <1> | dir |
1629 *
1630 * And from the SNB table:
1631 *
1632 * DataTypeIndex 18-Bit Mapping Mapped Meaning
1633 * 8 001000000111101100 a:w | i:w | a:ud | <1> | dir |
1634 *
1635 * Neither of these cause warnings from the simulator when used,
1636 * compacted or otherwise. In fact, all compaction mappings that have an
1637 * immediate in src0 use a:ud for src1.
1638 *
1639 * Don't do any of this for 64-bit immediates, since the src1 fields
1640 * overlap with the immediate and setting them would overwrite the
1641 * immediate we set.
1642 */
1643 if (!(brw_inst_src0_type(devinfo, &inst) == BRW_TYPE_DF ||
1644 brw_inst_src0_type(devinfo, &inst) == BRW_TYPE_UQ ||
1645 brw_inst_src0_type(devinfo, &inst) == BRW_TYPE_Q)) {
1646 brw_inst_set_src1_reg_hw_type(devinfo, &inst, 0);
1647 }
1648
1649 /* Compacted instructions only have 12-bits (plus 1 for the other 20)
1650 * for immediate values. Presumably the hardware engineers realized
1651 * that the only useful floating-point value that could be represented
1652 * in this format is 0.0, which can also be represented as a VF-typed
1653 * immediate, so they gave us the previously mentioned mapping on IVB+.
1654 *
1655 * Strangely, we do have a mapping for imm:f in src1, so we don't need
1656 * to do this there.
1657 *
1658 * If we see a 0.0:F, change the type to VF so that it can be compacted.
1659 *
1660 * Compaction of floating-point immediates is improved on Gfx12, thus
1661 * removing the need for this.
1662 */
1663 if (devinfo->ver < 12 &&
1664 brw_inst_imm_ud(devinfo, &inst) == 0x0 &&
1665 brw_inst_src0_type(devinfo, &inst) == BRW_TYPE_F &&
1666 brw_inst_dst_type(devinfo, &inst) == BRW_TYPE_F &&
1667 brw_inst_dst_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) {
1668 enum brw_reg_file file = brw_inst_src0_reg_file(devinfo, &inst);
1669 brw_inst_set_src0_file_type(devinfo, &inst, file, BRW_TYPE_VF);
1670 }
1671
1672 /* There are no mappings for dst:d | i:d, so if the immediate is suitable
1673 * set the types to :UD so the instruction can be compacted.
1674 *
1675 * FINISHME: Use dst:f | imm:f on Gfx12
1676 */
1677 if (devinfo->ver < 12 &&
1678 compact_immediate(devinfo, BRW_TYPE_D,
1679 brw_inst_imm_ud(devinfo, &inst)) != -1 &&
1680 brw_inst_cond_modifier(devinfo, &inst) == BRW_CONDITIONAL_NONE &&
1681 brw_inst_src0_type(devinfo, &inst) == BRW_TYPE_D &&
1682 brw_inst_dst_type(devinfo, &inst) == BRW_TYPE_D) {
1683 enum brw_reg_file src_file = brw_inst_src0_reg_file(devinfo, &inst);
1684 enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, &inst);
1685
1686 brw_inst_set_src0_file_type(devinfo, &inst, src_file, BRW_TYPE_UD);
1687 brw_inst_set_dst_file_type(devinfo, &inst, dst_file, BRW_TYPE_UD);
1688 }
1689
1690 return inst;
1691 }
1692
1693 /**
1694 * Tries to compact instruction src into dst.
1695 *
1696 * It doesn't modify dst unless src is compactable, which is relied on by
1697 * brw_compact_instructions().
1698 */
1699 static bool
try_compact_instruction(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src)1700 try_compact_instruction(const struct compaction_state *c,
1701 brw_compact_inst *dst, const brw_inst *src)
1702 {
1703 const struct intel_device_info *devinfo = c->isa->devinfo;
1704 brw_compact_inst temp;
1705
1706 assert(brw_inst_cmpt_control(devinfo, src) == 0);
1707
1708 if (is_3src(c->isa, brw_inst_opcode(c->isa, src))) {
1709 memset(&temp, 0, sizeof(temp));
1710 if (brw_try_compact_3src_instruction(c->isa, &temp, src)) {
1711 *dst = temp;
1712 return true;
1713 } else {
1714 return false;
1715 }
1716 }
1717
1718 enum brw_reg_type type;
1719 bool is_immediate = has_immediate(devinfo, src, &type);
1720
1721 unsigned compacted_imm = 0;
1722
1723 if (is_immediate) {
1724 compacted_imm = compact_immediate(devinfo, type,
1725 brw_inst_imm_ud(devinfo, src));
1726 if (compacted_imm == -1)
1727 return false;
1728 }
1729
1730 if (has_unmapped_bits(c->isa, src))
1731 return false;
1732
1733 memset(&temp, 0, sizeof(temp));
1734
1735 #define compact(field) \
1736 brw_compact_inst_set_##field(devinfo, &temp, brw_inst_##field(devinfo, src))
1737 #define compact_reg(field) \
1738 brw_compact_inst_set_##field##_reg_nr(devinfo, &temp, \
1739 brw_inst_##field##_da_reg_nr(devinfo, src))
1740
1741 compact(hw_opcode);
1742 compact(debug_control);
1743
1744 if (!set_control_index(c, &temp, src))
1745 return false;
1746 if (!set_datatype_index(c, &temp, src, is_immediate))
1747 return false;
1748 if (!set_subreg_index(c, &temp, src, is_immediate))
1749 return false;
1750 if (!set_src0_index(c, &temp, src))
1751 return false;
1752 if (!set_src1_index(c, &temp, src, is_immediate, compacted_imm))
1753 return false;
1754
1755 if (devinfo->ver >= 12) {
1756 compact(swsb);
1757 compact_reg(dst);
1758 compact_reg(src0);
1759
1760 if (is_immediate) {
1761 /* src1 reg takes the high 8 bits (of the 12-bit compacted value) */
1762 brw_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm >> 4);
1763 } else {
1764 compact_reg(src1);
1765 }
1766 } else {
1767 compact(acc_wr_control);
1768
1769 compact(cond_modifier);
1770
1771 compact_reg(dst);
1772 compact_reg(src0);
1773
1774 if (is_immediate) {
1775 /* src1 reg takes the low 8 bits (of the 13-bit compacted value) */
1776 brw_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm & 0xff);
1777 } else {
1778 compact_reg(src1);
1779 }
1780 }
1781 brw_compact_inst_set_cmpt_control(devinfo, &temp, true);
1782
1783 #undef compact
1784 #undef compact_reg
1785
1786 *dst = temp;
1787
1788 return true;
1789 }
1790
1791 bool
brw_try_compact_instruction(const struct brw_isa_info * isa,brw_compact_inst * dst,const brw_inst * src)1792 brw_try_compact_instruction(const struct brw_isa_info *isa,
1793 brw_compact_inst *dst, const brw_inst *src)
1794 {
1795 struct compaction_state c;
1796 compaction_state_init(&c, isa);
1797 return try_compact_instruction(&c, dst, src);
1798 }
1799
1800 static void
set_uncompacted_control(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)1801 set_uncompacted_control(const struct compaction_state *c, brw_inst *dst,
1802 brw_compact_inst *src)
1803 {
1804 const struct intel_device_info *devinfo = c->isa->devinfo;
1805 uint32_t uncompacted =
1806 c->control_index_table[brw_compact_inst_control_index(devinfo, src)];
1807
1808 if (devinfo->ver >= 20) {
1809 brw_inst_set_bits(dst, 95, 92, (uncompacted >> 14) & 0xf);
1810 brw_inst_set_bits(dst, 34, 34, (uncompacted >> 13) & 0x1);
1811 brw_inst_set_bits(dst, 32, 32, (uncompacted >> 12) & 0x1);
1812 brw_inst_set_bits(dst, 31, 31, (uncompacted >> 11) & 0x1);
1813 brw_inst_set_bits(dst, 28, 28, (uncompacted >> 10) & 0x1);
1814 brw_inst_set_bits(dst, 27, 26, (uncompacted >> 8) & 0x3);
1815 brw_inst_set_bits(dst, 25, 24, (uncompacted >> 6) & 0x3);
1816 brw_inst_set_bits(dst, 23, 21, (uncompacted >> 3) & 0x7);
1817 brw_inst_set_bits(dst, 20, 18, (uncompacted >> 0) & 0x7);
1818 } else if (devinfo->ver >= 12) {
1819 brw_inst_set_bits(dst, 95, 92, (uncompacted >> 17));
1820 brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
1821 brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
1822 brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
1823 brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
1824 brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
1825 brw_inst_set_bits(dst, 27, 24, (uncompacted >> 8) & 0xf);
1826 brw_inst_set_bits(dst, 23, 22, (uncompacted >> 6) & 0x3);
1827 brw_inst_set_bits(dst, 21, 19, (uncompacted >> 3) & 0x7);
1828 brw_inst_set_bits(dst, 18, 16, (uncompacted >> 0) & 0x7);
1829 } else {
1830 brw_inst_set_bits(dst, 33, 31, (uncompacted >> 16));
1831 brw_inst_set_bits(dst, 23, 12, (uncompacted >> 4) & 0xfff);
1832 brw_inst_set_bits(dst, 10, 9, (uncompacted >> 2) & 0x3);
1833 brw_inst_set_bits(dst, 34, 34, (uncompacted >> 1) & 0x1);
1834 brw_inst_set_bits(dst, 8, 8, (uncompacted >> 0) & 0x1);
1835 }
1836 }
1837
1838 static void
set_uncompacted_datatype(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)1839 set_uncompacted_datatype(const struct compaction_state *c, brw_inst *dst,
1840 brw_compact_inst *src)
1841 {
1842 const struct intel_device_info *devinfo = c->isa->devinfo;
1843 uint32_t uncompacted =
1844 c->datatype_table[brw_compact_inst_datatype_index(devinfo, src)];
1845
1846 if (devinfo->ver >= 12) {
1847 brw_inst_set_bits(dst, 98, 98, (uncompacted >> 19));
1848 brw_inst_set_bits(dst, 91, 88, (uncompacted >> 15) & 0xf);
1849 brw_inst_set_bits(dst, 66, 66, (uncompacted >> 14) & 0x1);
1850 brw_inst_set_bits(dst, 50, 50, (uncompacted >> 13) & 0x1);
1851 brw_inst_set_bits(dst, 49, 48, (uncompacted >> 11) & 0x3);
1852 brw_inst_set_bits(dst, 47, 47, (uncompacted >> 10) & 0x1);
1853 brw_inst_set_bits(dst, 46, 46, (uncompacted >> 9) & 0x1);
1854 brw_inst_set_bits(dst, 43, 40, (uncompacted >> 5) & 0xf);
1855 brw_inst_set_bits(dst, 39, 36, (uncompacted >> 1) & 0xf);
1856 brw_inst_set_bits(dst, 35, 35, (uncompacted >> 0) & 0x1);
1857 } else {
1858 brw_inst_set_bits(dst, 63, 61, (uncompacted >> 18));
1859 brw_inst_set_bits(dst, 94, 89, (uncompacted >> 12) & 0x3f);
1860 brw_inst_set_bits(dst, 46, 35, (uncompacted >> 0) & 0xfff);
1861 }
1862 }
1863
1864 static void
set_uncompacted_subreg(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)1865 set_uncompacted_subreg(const struct compaction_state *c, brw_inst *dst,
1866 brw_compact_inst *src)
1867 {
1868 const struct intel_device_info *devinfo = c->isa->devinfo;
1869 uint16_t uncompacted =
1870 c->subreg_table[brw_compact_inst_subreg_index(devinfo, src)];
1871
1872 if (devinfo->ver >= 20) {
1873 brw_inst_set_bits(dst, 33, 33, (uncompacted >> 0) & 0x1);
1874 brw_inst_set_bits(dst, 55, 51, (uncompacted >> 1) & 0x1f);
1875 brw_inst_set_bits(dst, 71, 67, (uncompacted >> 6) & 0x1f);
1876 brw_inst_set_bits(dst, 87, 87, (uncompacted >> 11) & 0x1);
1877 } else if (devinfo->ver >= 12) {
1878 brw_inst_set_bits(dst, 103, 99, (uncompacted >> 10));
1879 brw_inst_set_bits(dst, 71, 67, (uncompacted >> 5) & 0x1f);
1880 brw_inst_set_bits(dst, 55, 51, (uncompacted >> 0) & 0x1f);
1881 } else {
1882 brw_inst_set_bits(dst, 100, 96, (uncompacted >> 10));
1883 brw_inst_set_bits(dst, 68, 64, (uncompacted >> 5) & 0x1f);
1884 brw_inst_set_bits(dst, 52, 48, (uncompacted >> 0) & 0x1f);
1885 }
1886 }
1887
1888 static void
set_uncompacted_src0(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)1889 set_uncompacted_src0(const struct compaction_state *c, brw_inst *dst,
1890 brw_compact_inst *src)
1891 {
1892 const struct intel_device_info *devinfo = c->isa->devinfo;
1893 uint32_t compacted = brw_compact_inst_src0_index(devinfo, src);
1894 uint16_t uncompacted = c->src0_index_table[compacted];
1895
1896 if (devinfo->ver >= 12) {
1897 if (devinfo->ver < 20)
1898 brw_inst_set_bits(dst, 87, 87, (uncompacted >> 11) & 0x1);
1899 brw_inst_set_bits(dst, 86, 84, (uncompacted >> 8) & 0x7);
1900 brw_inst_set_bits(dst, 83, 81, (uncompacted >> 5) & 0x7);
1901 brw_inst_set_bits(dst, 80, 80, (uncompacted >> 4) & 0x1);
1902 brw_inst_set_bits(dst, 65, 64, (uncompacted >> 2) & 0x3);
1903 brw_inst_set_bits(dst, 45, 44, (uncompacted >> 0) & 0x3);
1904 } else {
1905 brw_inst_set_bits(dst, 88, 77, uncompacted);
1906 }
1907 }
1908
1909 static void
set_uncompacted_src1(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)1910 set_uncompacted_src1(const struct compaction_state *c, brw_inst *dst,
1911 brw_compact_inst *src)
1912 {
1913 const struct intel_device_info *devinfo = c->isa->devinfo;
1914 uint16_t uncompacted =
1915 c->src1_index_table[brw_compact_inst_src1_index(devinfo, src)];
1916
1917 if (devinfo->ver >= 20) {
1918 brw_inst_set_bits(dst, 121, 120, (uncompacted >> 14) & 0x3);
1919 brw_inst_set_bits(dst, 118, 116, (uncompacted >> 11) & 0x7);
1920 brw_inst_set_bits(dst, 115, 113, (uncompacted >> 8) & 0x7);
1921 brw_inst_set_bits(dst, 112, 112, (uncompacted >> 7) & 0x1);
1922 brw_inst_set_bits(dst, 103, 99, (uncompacted >> 2) & 0x1f);
1923 brw_inst_set_bits(dst, 97, 96, (uncompacted >> 0) & 0x3);
1924 } else if (devinfo->ver >= 12) {
1925 brw_inst_set_bits(dst, 121, 120, (uncompacted >> 10));
1926 brw_inst_set_bits(dst, 119, 116, (uncompacted >> 6) & 0xf);
1927 brw_inst_set_bits(dst, 115, 113, (uncompacted >> 3) & 0x7);
1928 brw_inst_set_bits(dst, 112, 112, (uncompacted >> 2) & 0x1);
1929 brw_inst_set_bits(dst, 97, 96, (uncompacted >> 0) & 0x3);
1930 } else {
1931 brw_inst_set_bits(dst, 120, 109, uncompacted);
1932 }
1933 }
1934
1935 static void
set_uncompacted_3src_control_index(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src,bool is_dpas)1936 set_uncompacted_3src_control_index(const struct compaction_state *c,
1937 brw_inst *dst, brw_compact_inst *src,
1938 bool is_dpas)
1939 {
1940 const struct intel_device_info *devinfo = c->isa->devinfo;
1941
1942 if (devinfo->ver >= 20) {
1943 uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
1944 uint64_t uncompacted = is_dpas ? xe2_3src_dpas_control_index_table[compacted] :
1945 xe2_3src_control_index_table[compacted];
1946
1947 brw_inst_set_bits(dst, 95, 92, (uncompacted >> 30) & 0xf);
1948 brw_inst_set_bits(dst, 90, 88, (uncompacted >> 27) & 0x7);
1949 brw_inst_set_bits(dst, 82, 80, (uncompacted >> 24) & 0x7);
1950 brw_inst_set_bits(dst, 50, 50, (uncompacted >> 23) & 0x1);
1951 brw_inst_set_bits(dst, 49, 48, (uncompacted >> 21) & 0x3);
1952 brw_inst_set_bits(dst, 42, 40, (uncompacted >> 18) & 0x7);
1953 brw_inst_set_bits(dst, 39, 39, (uncompacted >> 17) & 0x1);
1954 brw_inst_set_bits(dst, 38, 36, (uncompacted >> 14) & 0x7);
1955 brw_inst_set_bits(dst, 34, 34, (uncompacted >> 13) & 0x1);
1956 brw_inst_set_bits(dst, 32, 32, (uncompacted >> 12) & 0x1);
1957 brw_inst_set_bits(dst, 31, 31, (uncompacted >> 11) & 0x1);
1958 brw_inst_set_bits(dst, 28, 28, (uncompacted >> 10) & 0x1);
1959 brw_inst_set_bits(dst, 27, 26, (uncompacted >> 8) & 0x3);
1960 brw_inst_set_bits(dst, 25, 24, (uncompacted >> 6) & 0x3);
1961 brw_inst_set_bits(dst, 23, 21, (uncompacted >> 3) & 0x7);
1962 brw_inst_set_bits(dst, 20, 18, (uncompacted >> 0) & 0x7);
1963
1964 } else if (devinfo->verx10 >= 125) {
1965 uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
1966 uint64_t uncompacted = xehp_3src_control_index_table[compacted];
1967
1968 brw_inst_set_bits(dst, 95, 92, (uncompacted >> 33));
1969 brw_inst_set_bits(dst, 90, 88, (uncompacted >> 30) & 0x7);
1970 brw_inst_set_bits(dst, 82, 80, (uncompacted >> 27) & 0x7);
1971 brw_inst_set_bits(dst, 50, 50, (uncompacted >> 26) & 0x1);
1972 brw_inst_set_bits(dst, 49, 48, (uncompacted >> 24) & 0x3);
1973 brw_inst_set_bits(dst, 42, 40, (uncompacted >> 21) & 0x7);
1974 brw_inst_set_bits(dst, 39, 39, (uncompacted >> 20) & 0x1);
1975 brw_inst_set_bits(dst, 38, 36, (uncompacted >> 17) & 0x7);
1976 brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
1977 brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
1978 brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
1979 brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
1980 brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
1981 brw_inst_set_bits(dst, 27, 24, (uncompacted >> 8) & 0xf);
1982 brw_inst_set_bits(dst, 23, 23, (uncompacted >> 7) & 0x1);
1983 brw_inst_set_bits(dst, 22, 22, (uncompacted >> 6) & 0x1);
1984 brw_inst_set_bits(dst, 21, 19, (uncompacted >> 3) & 0x7);
1985 brw_inst_set_bits(dst, 18, 16, (uncompacted >> 0) & 0x7);
1986
1987 } else if (devinfo->ver >= 12) {
1988 uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
1989 uint64_t uncompacted = gfx12_3src_control_index_table[compacted];
1990
1991 brw_inst_set_bits(dst, 95, 92, (uncompacted >> 32));
1992 brw_inst_set_bits(dst, 90, 88, (uncompacted >> 29) & 0x7);
1993 brw_inst_set_bits(dst, 82, 80, (uncompacted >> 26) & 0x7);
1994 brw_inst_set_bits(dst, 50, 50, (uncompacted >> 25) & 0x1);
1995 brw_inst_set_bits(dst, 48, 48, (uncompacted >> 24) & 0x1);
1996 brw_inst_set_bits(dst, 42, 40, (uncompacted >> 21) & 0x7);
1997 brw_inst_set_bits(dst, 39, 39, (uncompacted >> 20) & 0x1);
1998 brw_inst_set_bits(dst, 38, 36, (uncompacted >> 17) & 0x7);
1999 brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
2000 brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
2001 brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
2002 brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
2003 brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
2004 brw_inst_set_bits(dst, 27, 24, (uncompacted >> 8) & 0xf);
2005 brw_inst_set_bits(dst, 23, 23, (uncompacted >> 7) & 0x1);
2006 brw_inst_set_bits(dst, 22, 22, (uncompacted >> 6) & 0x1);
2007 brw_inst_set_bits(dst, 21, 19, (uncompacted >> 3) & 0x7);
2008 brw_inst_set_bits(dst, 18, 16, (uncompacted >> 0) & 0x7);
2009 } else {
2010 uint32_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
2011 uint32_t uncompacted = gfx8_3src_control_index_table[compacted];
2012
2013 brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
2014 brw_inst_set_bits(dst, 28, 8, (uncompacted >> 0) & 0x1fffff);
2015
2016 brw_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3);
2017 }
2018 }
2019
2020 static void
set_uncompacted_3src_source_index(const struct intel_device_info * devinfo,brw_inst * dst,brw_compact_inst * src,bool is_dpas)2021 set_uncompacted_3src_source_index(const struct intel_device_info *devinfo,
2022 brw_inst *dst, brw_compact_inst *src,
2023 bool is_dpas)
2024 {
2025 uint32_t compacted = brw_compact_inst_3src_source_index(devinfo, src);
2026
2027 if (devinfo->ver >= 12) {
2028 const uint32_t *three_src_source_index_table =
2029 devinfo->ver >= 20 ? (is_dpas ? xe2_3src_dpas_source_index_table :
2030 xe2_3src_source_index_table) :
2031 devinfo->verx10 >= 125 ? xehp_3src_source_index_table :
2032 gfx12_3src_source_index_table;
2033 uint32_t uncompacted = three_src_source_index_table[compacted];
2034
2035 brw_inst_set_bits(dst, 114, 114, (uncompacted >> 20));
2036 brw_inst_set_bits(dst, 113, 112, (uncompacted >> 18) & 0x3);
2037 brw_inst_set_bits(dst, 98, 98, (uncompacted >> 17) & 0x1);
2038 brw_inst_set_bits(dst, 97, 96, (uncompacted >> 15) & 0x3);
2039 brw_inst_set_bits(dst, 91, 91, (uncompacted >> 14) & 0x1);
2040 brw_inst_set_bits(dst, 87, 86, (uncompacted >> 12) & 0x3);
2041 brw_inst_set_bits(dst, 85, 84, (uncompacted >> 10) & 0x3);
2042 brw_inst_set_bits(dst, 83, 83, (uncompacted >> 9) & 0x1);
2043 brw_inst_set_bits(dst, 66, 66, (uncompacted >> 8) & 0x1);
2044 brw_inst_set_bits(dst, 65, 64, (uncompacted >> 6) & 0x3);
2045 brw_inst_set_bits(dst, 47, 47, (uncompacted >> 5) & 0x1);
2046 brw_inst_set_bits(dst, 46, 46, (uncompacted >> 4) & 0x1);
2047 brw_inst_set_bits(dst, 45, 44, (uncompacted >> 2) & 0x3);
2048 brw_inst_set_bits(dst, 43, 43, (uncompacted >> 1) & 0x1);
2049 brw_inst_set_bits(dst, 35, 35, (uncompacted >> 0) & 0x1);
2050 } else {
2051 uint64_t uncompacted = gfx8_3src_source_index_table[compacted];
2052
2053 brw_inst_set_bits(dst, 83, 83, (uncompacted >> 43) & 0x1);
2054 brw_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff);
2055 brw_inst_set_bits(dst, 93, 86, (uncompacted >> 27) & 0xff);
2056 brw_inst_set_bits(dst, 72, 65, (uncompacted >> 19) & 0xff);
2057 brw_inst_set_bits(dst, 55, 37, (uncompacted >> 0) & 0x7ffff);
2058
2059 brw_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3);
2060 brw_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3);
2061 brw_inst_set_bits(dst, 84, 84, (uncompacted >> 44) & 0x1);
2062 }
2063 }
2064
2065 static void
set_uncompacted_3src_subreg_index(const struct intel_device_info * devinfo,brw_inst * dst,brw_compact_inst * src)2066 set_uncompacted_3src_subreg_index(const struct intel_device_info *devinfo,
2067 brw_inst *dst, brw_compact_inst *src)
2068 {
2069 assert(devinfo->ver >= 12);
2070
2071 uint32_t compacted = brw_compact_inst_3src_subreg_index(devinfo, src);
2072 uint32_t uncompacted = (devinfo->ver >= 20 ? xe2_3src_subreg_table[compacted]:
2073 gfx12_3src_subreg_table[compacted]);
2074
2075 brw_inst_set_bits(dst, 119, 115, (uncompacted >> 15));
2076 brw_inst_set_bits(dst, 103, 99, (uncompacted >> 10) & 0x1f);
2077 brw_inst_set_bits(dst, 71, 67, (uncompacted >> 5) & 0x1f);
2078 brw_inst_set_bits(dst, 55, 51, (uncompacted >> 0) & 0x1f);
2079 }
2080
2081 static void
brw_uncompact_3src_instruction(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src,bool is_dpas)2082 brw_uncompact_3src_instruction(const struct compaction_state *c,
2083 brw_inst *dst, brw_compact_inst *src, bool is_dpas)
2084 {
2085 const struct intel_device_info *devinfo = c->isa->devinfo;
2086
2087 #define uncompact(field) \
2088 brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
2089 #define uncompact_a16(field) \
2090 brw_inst_set_3src_a16_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
2091
2092 uncompact(hw_opcode);
2093
2094 if (devinfo->ver >= 12) {
2095 set_uncompacted_3src_control_index(c, dst, src, is_dpas);
2096 set_uncompacted_3src_source_index(devinfo, dst, src, is_dpas);
2097 set_uncompacted_3src_subreg_index(devinfo, dst, src);
2098
2099 uncompact(debug_control);
2100 uncompact(swsb);
2101 uncompact(dst_reg_nr);
2102 uncompact(src0_reg_nr);
2103 uncompact(src1_reg_nr);
2104 uncompact(src2_reg_nr);
2105 } else {
2106 set_uncompacted_3src_control_index(c, dst, src, is_dpas);
2107 set_uncompacted_3src_source_index(devinfo, dst, src, is_dpas);
2108
2109 uncompact(dst_reg_nr);
2110 uncompact_a16(src0_rep_ctrl);
2111 uncompact(debug_control);
2112 uncompact(saturate);
2113 uncompact_a16(src1_rep_ctrl);
2114 uncompact_a16(src2_rep_ctrl);
2115 uncompact(src0_reg_nr);
2116 uncompact(src1_reg_nr);
2117 uncompact(src2_reg_nr);
2118 uncompact_a16(src0_subreg_nr);
2119 uncompact_a16(src1_subreg_nr);
2120 uncompact_a16(src2_subreg_nr);
2121 }
2122 brw_inst_set_3src_cmpt_control(devinfo, dst, false);
2123
2124 #undef uncompact
2125 #undef uncompact_a16
2126 }
2127
2128 static void
uncompact_instruction(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)2129 uncompact_instruction(const struct compaction_state *c, brw_inst *dst,
2130 brw_compact_inst *src)
2131 {
2132 const struct intel_device_info *devinfo = c->isa->devinfo;
2133 memset(dst, 0, sizeof(*dst));
2134
2135 const enum opcode opcode =
2136 brw_opcode_decode(c->isa, brw_compact_inst_3src_hw_opcode(devinfo, src));
2137 if (is_3src(c->isa, opcode)) {
2138 const bool is_dpas = opcode == BRW_OPCODE_DPAS;
2139 brw_uncompact_3src_instruction(c, dst, src, is_dpas);
2140 return;
2141 }
2142
2143 #define uncompact(field) \
2144 brw_inst_set_##field(devinfo, dst, brw_compact_inst_##field(devinfo, src))
2145 #define uncompact_reg(field) \
2146 brw_inst_set_##field##_da_reg_nr(devinfo, dst, \
2147 brw_compact_inst_##field##_reg_nr(devinfo, src))
2148
2149 uncompact(hw_opcode);
2150 uncompact(debug_control);
2151
2152 set_uncompacted_control(c, dst, src);
2153 set_uncompacted_datatype(c, dst, src);
2154 set_uncompacted_subreg(c, dst, src);
2155 set_uncompacted_src0(c, dst, src);
2156
2157 enum brw_reg_type type;
2158 if (has_immediate(devinfo, dst, &type)) {
2159 unsigned imm = uncompact_immediate(devinfo, type,
2160 brw_compact_inst_imm(devinfo, src));
2161 brw_inst_set_imm_ud(devinfo, dst, imm);
2162 } else {
2163 set_uncompacted_src1(c, dst, src);
2164 uncompact_reg(src1);
2165 }
2166
2167 if (devinfo->ver >= 12) {
2168 uncompact(swsb);
2169 uncompact_reg(dst);
2170 uncompact_reg(src0);
2171 } else {
2172 uncompact(acc_wr_control);
2173
2174 uncompact(cond_modifier);
2175
2176 uncompact_reg(dst);
2177 uncompact_reg(src0);
2178 }
2179 brw_inst_set_cmpt_control(devinfo, dst, false);
2180
2181 #undef uncompact
2182 #undef uncompact_reg
2183 }
2184
2185 void
brw_uncompact_instruction(const struct brw_isa_info * isa,brw_inst * dst,brw_compact_inst * src)2186 brw_uncompact_instruction(const struct brw_isa_info *isa,
2187 brw_inst *dst, brw_compact_inst *src)
2188 {
2189 struct compaction_state c;
2190 compaction_state_init(&c, isa);
2191 uncompact_instruction(&c, dst, src);
2192 }
2193
2194 void
brw_debug_compact_uncompact(const struct brw_isa_info * isa,brw_inst * orig,brw_inst * uncompacted)2195 brw_debug_compact_uncompact(const struct brw_isa_info *isa,
2196 brw_inst *orig,
2197 brw_inst *uncompacted)
2198 {
2199 fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
2200 isa->devinfo->ver);
2201
2202 fprintf(stderr, " before: ");
2203 brw_disassemble_inst(stderr, isa, orig, true, 0, NULL);
2204
2205 fprintf(stderr, " after: ");
2206 brw_disassemble_inst(stderr, isa, uncompacted, false, 0, NULL);
2207
2208 uint32_t *before_bits = (uint32_t *)orig;
2209 uint32_t *after_bits = (uint32_t *)uncompacted;
2210 fprintf(stderr, " changed bits:\n");
2211 for (int i = 0; i < 128; i++) {
2212 uint32_t before = before_bits[i / 32] & (1 << (i & 31));
2213 uint32_t after = after_bits[i / 32] & (1 << (i & 31));
2214
2215 if (before != after) {
2216 fprintf(stderr, " bit %d, %s to %s\n", i,
2217 before ? "set" : "unset",
2218 after ? "set" : "unset");
2219 }
2220 }
2221 }
2222
2223 static int
compacted_between(int old_ip,int old_target_ip,int * compacted_counts)2224 compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
2225 {
2226 int this_compacted_count = compacted_counts[old_ip];
2227 int target_compacted_count = compacted_counts[old_target_ip];
2228 return target_compacted_count - this_compacted_count;
2229 }
2230
2231 static void
update_uip_jip(const struct brw_isa_info * isa,brw_inst * insn,int this_old_ip,int * compacted_counts)2232 update_uip_jip(const struct brw_isa_info *isa, brw_inst *insn,
2233 int this_old_ip, int *compacted_counts)
2234 {
2235 const struct intel_device_info *devinfo = isa->devinfo;
2236
2237 /* JIP and UIP are in units of bytes on Gfx8+. */
2238 int shift = 3;
2239
2240 /* Even though the values are signed, we don't need the rounding behavior
2241 * of integer division. The shifts are safe.
2242 */
2243 assert(brw_inst_jip(devinfo, insn) % 8 == 0 &&
2244 brw_inst_uip(devinfo, insn) % 8 == 0);
2245
2246 int32_t jip_compacted = brw_inst_jip(devinfo, insn) >> shift;
2247 jip_compacted -= compacted_between(this_old_ip,
2248 this_old_ip + (jip_compacted / 2),
2249 compacted_counts);
2250 brw_inst_set_jip(devinfo, insn, (uint32_t)jip_compacted << shift);
2251
2252 if (brw_inst_opcode(isa, insn) == BRW_OPCODE_ENDIF ||
2253 brw_inst_opcode(isa, insn) == BRW_OPCODE_WHILE)
2254 return;
2255
2256 int32_t uip_compacted = brw_inst_uip(devinfo, insn) >> shift;
2257 uip_compacted -= compacted_between(this_old_ip,
2258 this_old_ip + (uip_compacted / 2),
2259 compacted_counts);
2260 brw_inst_set_uip(devinfo, insn, (uint32_t)uip_compacted << shift);
2261 }
2262
2263 static void
compaction_state_init(struct compaction_state * c,const struct brw_isa_info * isa)2264 compaction_state_init(struct compaction_state *c,
2265 const struct brw_isa_info *isa)
2266 {
2267 const struct intel_device_info *devinfo = isa->devinfo;
2268
2269 assert(g45_subreg_table[ARRAY_SIZE(g45_subreg_table) - 1] != 0);
2270 assert(gfx8_control_index_table[ARRAY_SIZE(gfx8_control_index_table) - 1] != 0);
2271 assert(gfx8_datatype_table[ARRAY_SIZE(gfx8_datatype_table) - 1] != 0);
2272 assert(gfx8_subreg_table[ARRAY_SIZE(gfx8_subreg_table) - 1] != 0);
2273 assert(gfx8_src_index_table[ARRAY_SIZE(gfx8_src_index_table) - 1] != 0);
2274 assert(gfx11_datatype_table[ARRAY_SIZE(gfx11_datatype_table) - 1] != 0);
2275 assert(gfx12_control_index_table[ARRAY_SIZE(gfx12_control_index_table) - 1] != 0);
2276 assert(gfx12_datatype_table[ARRAY_SIZE(gfx12_datatype_table) - 1] != 0);
2277 assert(gfx12_subreg_table[ARRAY_SIZE(gfx12_subreg_table) - 1] != 0);
2278 assert(gfx12_src0_index_table[ARRAY_SIZE(gfx12_src0_index_table) - 1] != 0);
2279 assert(gfx12_src1_index_table[ARRAY_SIZE(gfx12_src1_index_table) - 1] != 0);
2280 assert(xehp_src0_index_table[ARRAY_SIZE(xehp_src0_index_table) - 1] != 0);
2281 assert(xehp_src1_index_table[ARRAY_SIZE(xehp_src1_index_table) - 1] != 0);
2282 assert(xe2_control_index_table[ARRAY_SIZE(xe2_control_index_table) - 1] != 0);
2283 assert(xe2_datatype_table[ARRAY_SIZE(xe2_datatype_table) - 1] != 0);
2284 assert(xe2_subreg_table[ARRAY_SIZE(xe2_subreg_table) - 1] != 0);
2285 assert(xe2_src0_index_table[ARRAY_SIZE(xe2_src0_index_table) - 1] != 0);
2286 assert(xe2_src1_index_table[ARRAY_SIZE(xe2_src1_index_table) - 1] != 0);
2287
2288 c->isa = isa;
2289 switch (devinfo->ver) {
2290 case 20:
2291 c->control_index_table = xe2_control_index_table;
2292 c->datatype_table = xe2_datatype_table;
2293 c->subreg_table = xe2_subreg_table;
2294 c->src0_index_table = xe2_src0_index_table;
2295 c->src1_index_table = xe2_src1_index_table;
2296 break;
2297 case 12:
2298 c->control_index_table = gfx12_control_index_table;;
2299 c->datatype_table = gfx12_datatype_table;
2300 c->subreg_table = gfx12_subreg_table;
2301 if (devinfo->verx10 >= 125) {
2302 c->src0_index_table = xehp_src0_index_table;
2303 c->src1_index_table = xehp_src1_index_table;
2304 } else {
2305 c->src0_index_table = gfx12_src0_index_table;
2306 c->src1_index_table = gfx12_src1_index_table;
2307 }
2308 break;
2309 case 11:
2310 c->control_index_table = gfx8_control_index_table;
2311 c->datatype_table = gfx11_datatype_table;
2312 c->subreg_table = gfx8_subreg_table;
2313 c->src0_index_table = gfx8_src_index_table;
2314 c->src1_index_table = gfx8_src_index_table;
2315 break;
2316 case 9:
2317 c->control_index_table = gfx8_control_index_table;
2318 c->datatype_table = gfx8_datatype_table;
2319 c->subreg_table = gfx8_subreg_table;
2320 c->src0_index_table = gfx8_src_index_table;
2321 c->src1_index_table = gfx8_src_index_table;
2322 break;
2323 default:
2324 unreachable("unknown generation");
2325 }
2326 }
2327
2328 void
brw_compact_instructions(struct brw_codegen * p,int start_offset,struct disasm_info * disasm)2329 brw_compact_instructions(struct brw_codegen *p, int start_offset,
2330 struct disasm_info *disasm)
2331 {
2332 if (INTEL_DEBUG(DEBUG_NO_COMPACTION))
2333 return;
2334
2335 const struct intel_device_info *devinfo = p->devinfo;
2336
2337 void *store = p->store + start_offset / 16;
2338 /* For an instruction at byte offset 16*i before compaction, this is the
2339 * number of compacted instructions minus the number of padding NOP/NENOPs
2340 * that preceded it.
2341 */
2342 unsigned num_compacted_counts =
2343 (p->next_insn_offset - start_offset) / sizeof(brw_inst);
2344 int *compacted_counts =
2345 calloc(1, sizeof(*compacted_counts) * num_compacted_counts);
2346
2347 /* For an instruction at byte offset 8*i after compaction, this was its IP
2348 * (in 16-byte units) before compaction.
2349 */
2350 unsigned num_old_ip =
2351 (p->next_insn_offset - start_offset) / sizeof(brw_compact_inst) + 1;
2352 int *old_ip = calloc(1, sizeof(*old_ip) * num_old_ip);
2353
2354 struct compaction_state c;
2355 compaction_state_init(&c, p->isa);
2356
2357 int offset = 0;
2358 int compacted_count = 0;
2359 for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset;
2360 src_offset += sizeof(brw_inst)) {
2361 brw_inst *src = store + src_offset;
2362 void *dst = store + offset;
2363
2364 old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
2365 compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
2366
2367 brw_inst inst = precompact(p->isa, *src);
2368 brw_inst saved = inst;
2369
2370 if (try_compact_instruction(&c, dst, &inst)) {
2371 compacted_count++;
2372
2373 if (INTEL_DEBUG(DEBUG_VS | DEBUG_GS | DEBUG_TCS | DEBUG_TASK |
2374 DEBUG_WM | DEBUG_CS | DEBUG_TES | DEBUG_MESH |
2375 DEBUG_RT)) {
2376 brw_inst uncompacted;
2377 uncompact_instruction(&c, &uncompacted, dst);
2378 if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
2379 brw_debug_compact_uncompact(p->isa, &saved, &uncompacted);
2380 }
2381 }
2382
2383 offset += sizeof(brw_compact_inst);
2384 } else {
2385 /* If we didn't compact this instruction, we need to move it down into
2386 * place.
2387 */
2388 if (offset != src_offset) {
2389 memmove(dst, src, sizeof(brw_inst));
2390 }
2391 offset += sizeof(brw_inst);
2392 }
2393 }
2394
2395 /* Add an entry for the ending offset of the program. This greatly
2396 * simplifies the linked list walk at the end of the function.
2397 */
2398 old_ip[offset / sizeof(brw_compact_inst)] =
2399 (p->next_insn_offset - start_offset) / sizeof(brw_inst);
2400
2401 /* Fix up control flow offsets. */
2402 p->next_insn_offset = start_offset + offset;
2403 for (offset = 0; offset < p->next_insn_offset - start_offset;
2404 offset = next_offset(devinfo, store, offset)) {
2405 brw_inst *insn = store + offset;
2406 int this_old_ip = old_ip[offset / sizeof(brw_compact_inst)];
2407 int this_compacted_count = compacted_counts[this_old_ip];
2408
2409 switch (brw_inst_opcode(p->isa, insn)) {
2410 case BRW_OPCODE_BREAK:
2411 case BRW_OPCODE_CONTINUE:
2412 case BRW_OPCODE_HALT:
2413 update_uip_jip(p->isa, insn, this_old_ip, compacted_counts);
2414 break;
2415
2416 case BRW_OPCODE_IF:
2417 case BRW_OPCODE_ELSE:
2418 case BRW_OPCODE_ENDIF:
2419 case BRW_OPCODE_WHILE:
2420 if (brw_inst_cmpt_control(devinfo, insn)) {
2421 brw_inst uncompacted;
2422 uncompact_instruction(&c, &uncompacted,
2423 (brw_compact_inst *)insn);
2424
2425 update_uip_jip(p->isa, &uncompacted, this_old_ip,
2426 compacted_counts);
2427
2428 bool ret = try_compact_instruction(&c, (brw_compact_inst *)insn,
2429 &uncompacted);
2430 assert(ret); (void)ret;
2431 } else {
2432 update_uip_jip(p->isa, insn, this_old_ip, compacted_counts);
2433 }
2434 break;
2435
2436 case BRW_OPCODE_ADD:
2437 /* Add instructions modifying the IP register use an immediate src1,
2438 * and Gens that use this cannot compact instructions with immediate
2439 * operands.
2440 */
2441 if (brw_inst_cmpt_control(devinfo, insn))
2442 break;
2443
2444 if (brw_inst_dst_reg_file(devinfo, insn) == ARF &&
2445 brw_inst_dst_da_reg_nr(devinfo, insn) == BRW_ARF_IP) {
2446 assert(brw_inst_src1_reg_file(devinfo, insn) == IMM);
2447
2448 int shift = 3;
2449 int jump_compacted = brw_inst_imm_d(devinfo, insn) >> shift;
2450
2451 int target_old_ip = this_old_ip + (jump_compacted / 2);
2452 int target_compacted_count = compacted_counts[target_old_ip];
2453 jump_compacted -= (target_compacted_count - this_compacted_count);
2454 brw_inst_set_imm_ud(devinfo, insn, jump_compacted << shift);
2455 }
2456 break;
2457
2458 default:
2459 break;
2460 }
2461 }
2462
2463 /* p->nr_insn is counting the number of uncompacted instructions still, so
2464 * divide. We do want to be sure there's a valid instruction in any
2465 * alignment padding, so that the next compression pass (for the FS 8/16
2466 * compile passes) parses correctly.
2467 */
2468 if (p->next_insn_offset & sizeof(brw_compact_inst)) {
2469 brw_compact_inst *align = store + offset;
2470 memset(align, 0, sizeof(*align));
2471 brw_compact_inst_set_hw_opcode(
2472 devinfo, align, brw_opcode_encode(p->isa, BRW_OPCODE_NOP));
2473 brw_compact_inst_set_cmpt_control(devinfo, align, true);
2474 p->next_insn_offset += sizeof(brw_compact_inst);
2475 }
2476 p->nr_insn = p->next_insn_offset / sizeof(brw_inst);
2477
2478 for (int i = 0; i < p->num_relocs; i++) {
2479 if (p->relocs[i].offset < (uint32_t)start_offset)
2480 continue;
2481
2482 assert(p->relocs[i].offset % 16 == 0);
2483 unsigned idx = (p->relocs[i].offset - start_offset) / 16;
2484 p->relocs[i].offset -= compacted_counts[idx] * 8;
2485 }
2486
2487 /* Update the instruction offsets for each group. */
2488 if (disasm) {
2489 int offset = 0;
2490
2491 foreach_list_typed(struct inst_group, group, link, &disasm->group_list) {
2492 while (start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
2493 sizeof(brw_inst) != group->offset) {
2494 assert(start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
2495 sizeof(brw_inst) < group->offset);
2496 offset = next_offset(devinfo, store, offset);
2497 }
2498
2499 group->offset = start_offset + offset;
2500
2501 offset = next_offset(devinfo, store, offset);
2502 }
2503 }
2504
2505 free(compacted_counts);
2506 free(old_ip);
2507 }
2508