xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/brw_eu_compact.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2012-2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /** @file
25  *
26  * Instruction compaction is a feature of G45 and newer hardware that allows
27  * for a smaller instruction encoding.
28  *
29  * The instruction cache is on the order of 32KB, and many programs generate
30  * far more instructions than that.  The instruction cache is built to barely
31  * keep up with instruction dispatch ability in cache hit cases -- L1
32  * instruction cache misses that still hit in the next level could limit
33  * throughput by around 50%.
34  *
35  * The idea of instruction compaction is that most instructions use a tiny
36  * subset of the GPU functionality, so we can encode what would be a 16 byte
37  * instruction in 8 bytes using some lookup tables for various fields.
38  *
39  *
40  * Instruction compaction capabilities vary subtly by generation.
41  *
42  * G45's support for instruction compaction is very limited. Jump counts on
43  * this generation are in units of 16-byte uncompacted instructions. As such,
44  * all jump targets must be 16-byte aligned. Also, all instructions must be
45  * naturally aligned, i.e. uncompacted instructions must be 16-byte aligned.
46  * A G45-only instruction, NENOP, must be used to provide padding to align
47  * uncompacted instructions.
48  *
49  * Gfx5 removes these restrictions and changes jump counts to be in units of
50  * 8-byte compacted instructions, allowing jump targets to be only 8-byte
51  * aligned. Uncompacted instructions can also be placed on 8-byte boundaries.
52  *
53  * Gfx6 adds the ability to compact instructions with a limited range of
54  * immediate values. Compactable immediates have 12 unrestricted bits, and a
55  * 13th bit that's replicated through the high 20 bits, to create the 32-bit
56  * value of DW3 in the uncompacted instruction word.
57  *
58  * On Gfx7 we can compact some control flow instructions with a small positive
59  * immediate in the low bits of DW3, like ENDIF with the JIP field. Other
60  * control flow instructions with UIP cannot be compacted, because of the
61  * replicated 13th bit. No control flow instructions can be compacted on Gfx6
62  * since the jump count field is not in DW3.
63  *
64  *    break    JIP/UIP
65  *    cont     JIP/UIP
66  *    halt     JIP/UIP
67  *    if       JIP/UIP
68  *    else     JIP (plus UIP on BDW+)
69  *    endif    JIP
70  *    while    JIP (must be negative)
71  *
72  * Gen 8 adds support for compacting 3-src instructions.
73  *
74  * Gfx12 reduces the number of bits that available to compacted immediates from
75  * 13 to 12, but improves the compaction of floating-point immediates by
76  * allowing the high bits to be encoded (the sign, 8-bit exponent, and the
77  * three most significant bits of the mantissa), rather than the lowest bits of
78  * the mantissa.
79  */
80 
81 #include "brw_eu.h"
82 #include "brw_disasm.h"
83 #include "brw_disasm_info.h"
84 #include "dev/intel_debug.h"
85 
86 static const uint16_t g45_subreg_table[32] = {
87    0b000000000000000,
88    0b000000010000000,
89    0b000001000000000,
90    0b000100000000000,
91    0b000000000100000,
92    0b100000000000000,
93    0b000000000010000,
94    0b001100000000000,
95    0b001010000000000,
96    0b000000100000000,
97    0b001000000000000,
98    0b000000000001000,
99    0b000000001000000,
100    0b000000000000001,
101    0b000010000000000,
102    0b000000010100000,
103    0b000000000000111,
104    0b000001000100000,
105    0b011000000000000,
106    0b000000110000000,
107    0b000000000000010,
108    0b000000000000100,
109    0b000000001100000,
110    0b000100000000010,
111    0b001110011000110,
112    0b001110100001000,
113    0b000110011000110,
114    0b000001000011000,
115    0b000110010000100,
116    0b001100000000110,
117    0b000000010000110,
118    0b000001000110000,
119 };
120 
121 static const uint32_t gfx8_control_index_table[32] = {
122    0b0000000000000000010,
123    0b0000100000000000000,
124    0b0000100000000000001,
125    0b0000100000000000010,
126    0b0000100000000000011,
127    0b0000100000000000100,
128    0b0000100000000000101,
129    0b0000100000000000111,
130    0b0000100000000001000,
131    0b0000100000000001001,
132    0b0000100000000001101,
133    0b0000110000000000000,
134    0b0000110000000000001,
135    0b0000110000000000010,
136    0b0000110000000000011,
137    0b0000110000000000100,
138    0b0000110000000000101,
139    0b0000110000000000111,
140    0b0000110000000001001,
141    0b0000110000000001101,
142    0b0000110000000010000,
143    0b0000110000100000000,
144    0b0001000000000000000,
145    0b0001000000000000010,
146    0b0001000000000000100,
147    0b0001000000100000000,
148    0b0010110000000000000,
149    0b0010110000000010000,
150    0b0011000000000000000,
151    0b0011000000100000000,
152    0b0101000000000000000,
153    0b0101000000100000000,
154 };
155 
156 static const uint32_t gfx8_datatype_table[32] = {
157    0b001000000000000000001,
158    0b001000000000001000000,
159    0b001000000000001000001,
160    0b001000000000011000001,
161    0b001000000000101011101,
162    0b001000000010111011101,
163    0b001000000011101000001,
164    0b001000000011101000101,
165    0b001000000011101011101,
166    0b001000001000001000001,
167    0b001000011000001000000,
168    0b001000011000001000001,
169    0b001000101000101000101,
170    0b001000111000101000100,
171    0b001000111000101000101,
172    0b001011100011101011101,
173    0b001011101011100011101,
174    0b001011101011101011100,
175    0b001011101011101011101,
176    0b001011111011101011100,
177    0b000000000010000001100,
178    0b001000000000001011101,
179    0b001000000000101000101,
180    0b001000001000001000000,
181    0b001000101000101000100,
182    0b001000111000100000100,
183    0b001001001001000001001,
184    0b001010111011101011101,
185    0b001011111011101011101,
186    0b001001111001101001100,
187    0b001001001001001001000,
188    0b001001011001001001000,
189 };
190 
191 static const uint16_t gfx8_subreg_table[32] = {
192    0b000000000000000,
193    0b000000000000001,
194    0b000000000001000,
195    0b000000000001111,
196    0b000000000010000,
197    0b000000010000000,
198    0b000000100000000,
199    0b000000110000000,
200    0b000001000000000,
201    0b000001000010000,
202    0b000001010000000,
203    0b001000000000000,
204    0b001000000000001,
205    0b001000010000001,
206    0b001000010000010,
207    0b001000010000011,
208    0b001000010000100,
209    0b001000010000111,
210    0b001000010001000,
211    0b001000010001110,
212    0b001000010001111,
213    0b001000110000000,
214    0b001000111101000,
215    0b010000000000000,
216    0b010000110000000,
217    0b011000000000000,
218    0b011110010000111,
219    0b100000000000000,
220    0b101000000000000,
221    0b110000000000000,
222    0b111000000000000,
223    0b111000000011100,
224 };
225 
226 static const uint16_t gfx8_src_index_table[32] = {
227    0b000000000000,
228    0b000000000010,
229    0b000000010000,
230    0b000000010010,
231    0b000000011000,
232    0b000000100000,
233    0b000000101000,
234    0b000001001000,
235    0b000001010000,
236    0b000001110000,
237    0b000001111000,
238    0b001100000000,
239    0b001100000010,
240    0b001100001000,
241    0b001100010000,
242    0b001100010010,
243    0b001100100000,
244    0b001100101000,
245    0b001100111000,
246    0b001101000000,
247    0b001101000010,
248    0b001101001000,
249    0b001101010000,
250    0b001101100000,
251    0b001101101000,
252    0b001101110000,
253    0b001101110001,
254    0b001101111000,
255    0b010001101000,
256    0b010001101001,
257    0b010001101010,
258    0b010110001000,
259 };
260 
261 static const uint32_t gfx11_datatype_table[32] = {
262    0b001000000000000000001,
263    0b001000000000001000000,
264    0b001000000000001000001,
265    0b001000000000011000001,
266    0b001000000000101100101,
267    0b001000000101111100101,
268    0b001000000100101000001,
269    0b001000000100101000101,
270    0b001000000100101100101,
271    0b001000001000001000001,
272    0b001000011000001000000,
273    0b001000011000001000001,
274    0b001000101000101000101,
275    0b001000111000101000100,
276    0b001000111000101000101,
277    0b001100100100101100101,
278    0b001100101100100100101,
279    0b001100101100101100100,
280    0b001100101100101100101,
281    0b001100111100101100100,
282    0b000000000010000001100,
283    0b001000000000001100101,
284    0b001000000000101000101,
285    0b001000001000001000000,
286    0b001000101000101000100,
287    0b001000111000100000100,
288    0b001001001001000001001,
289    0b001101111100101100101,
290    0b001100111100101100101,
291    0b001001111001101001100,
292    0b001001001001001001000,
293    0b001001011001001001000,
294 };
295 
296 static const uint32_t gfx12_control_index_table[32] = {
297    0b000000000000000000100, /* 	       (16|M0)                            */
298    0b000000000000000000011, /* 	       (8|M0)                             */
299    0b000000010000000000000, /* 	(W)    (1|M0)                             */
300    0b000000010000000000100, /* 	(W)    (16|M0)                            */
301    0b000000010000000000011, /* 	(W)    (8|M0)                             */
302    0b010000000000000000100, /* 	       (16|M0)  (ge)f0.0                  */
303    0b000000000000000100100, /* 	       (16|M16)                           */
304    0b010100000000000000100, /* 	       (16|M0)  (lt)f0.0                  */
305    0b000000000000000000000, /* 	       (1|M0)                             */
306    0b000010000000000000100, /* 	       (16|M0)           (sat)            */
307    0b000000000000000010011, /* 	       (8|M8)                             */
308    0b001100000000000000100, /* 	       (16|M0)  (gt)f0.0                  */
309    0b000100000000000000100, /* 	       (16|M0)  (eq)f0.0                  */
310    0b000100010000000000100, /* 	(W)    (16|M0)  (eq)f0.0                  */
311    0b001000000000000000100, /* 	       (16|M0)  (ne)f0.0                  */
312    0b000000000000100000100, /* 	(f0.0) (16|M0)                            */
313    0b010100000000000000011, /* 	       (8|M0)   (lt)f0.0                  */
314    0b000000000000110000100, /* 	(f1.0) (16|M0)                            */
315    0b000000010000000000001, /* 	(W)    (2|M0)                             */
316    0b000000000000101000100, /* 	(f0.1) (16|M0)                            */
317    0b000000000000111000100, /* 	(f1.1) (16|M0)                            */
318    0b010000010000000000100, /* 	(W)    (16|M0)  (ge)f0.0                  */
319    0b000000000000000100011, /* 	       (8|M16)                            */
320    0b000000000000000110011, /* 	       (8|M24)                            */
321    0b010100010000000000100, /* 	(W)    (16|M0)  (lt)f0.0                  */
322    0b010000000000000000011, /* 	       (8|M0)   (ge)f0.0                  */
323    0b000100010000000000000, /* 	(W)    (1|M0)   (eq)f0.0                  */
324    0b000010000000000000011, /* 	       (8|M0)            (sat)            */
325    0b010100000000010000100, /* 	       (16|M0)  (lt)f1.0                  */
326    0b000100000000000000011, /* 	       (8|M0)   (eq)f0.0                  */
327    0b000001000000000000011, /* 	       (8|M0)                   {AccWrEn} */
328    0b000000010000000100100, /* 	(W)    (16|M16)                           */
329 };
330 
331 static const uint32_t gfx12_datatype_table[32] = {
332    0b11010110100101010100, /* grf<1>:f  grf:f  grf:f  */
333    0b00000110100101010100, /* grf<1>:f  grf:f  arf:ub */
334    0b00000010101101010100, /* grf<1>:f  imm:f  arf:ub */
335    0b01010110110101010100, /* grf<1>:f  grf:f  imm:f  */
336    0b11010100100101010100, /* arf<1>:f  grf:f  grf:f  */
337    0b11010010100101010100, /* grf<1>:f  arf:f  grf:f  */
338    0b01010100110101010100, /* arf<1>:f  grf:f  imm:f  */
339    0b00000000100000000000, /* arf<1>:ub arf:ub arf:ub */
340    0b11010000100101010100, /* arf<1>:f  arf:f  grf:f  */
341    0b00101110110011001100, /* grf<1>:d  grf:d  imm:w  */
342    0b10110110100011001100, /* grf<1>:d  grf:d  grf:d  */
343    0b01010010110101010100, /* grf<1>:f  arf:f  imm:f  */
344    0b10010110100001000100, /* grf<1>:ud grf:ud grf:ud */
345    0b01010000110101010100, /* arf<1>:f  arf:f  imm:f  */
346    0b00110110110011001100, /* grf<1>:d  grf:d  imm:d  */
347    0b00010110110001000100, /* grf<1>:ud grf:ud imm:ud */
348    0b00000111000101010100, /* grf<2>:f  grf:f  arf:ub */
349    0b00101100110011001100, /* arf<1>:d  grf:d  imm:w  */
350    0b00000000100000100010, /* arf<1>:uw arf:uw arf:ub */
351    0b00000010100001000100, /* grf<1>:ud arf:ud arf:ub */
352    0b00100110110000101010, /* grf<1>:w  grf:uw imm:uv */
353    0b00001110110000100010, /* grf<1>:uw grf:uw imm:uw */
354    0b10010111000001000100, /* grf<2>:ud grf:ud grf:ud */
355    0b00000110100101001100, /* grf<1>:d  grf:f  arf:ub */
356    0b10001100100011001100, /* arf<1>:d  grf:d  grf:uw */
357    0b00000110100001010100, /* grf<1>:f  grf:ud arf:ub */
358    0b00101110110001001100, /* grf<1>:d  grf:ud imm:w  */
359    0b00000010100000100010, /* grf<1>:uw arf:uw arf:ub */
360    0b00000110100000110100, /* grf<1>:f  grf:uw arf:ub */
361    0b00000110100000010100, /* grf<1>:f  grf:ub arf:ub */
362    0b00000110100011010100, /* grf<1>:f  grf:d  arf:ub */
363    0b00000010100101010100, /* grf<1>:f  arf:f  arf:ub */
364 };
365 
366 static const uint16_t gfx12_subreg_table[32] = {
367    0b000000000000000, /* .0  .0  .0  */
368    0b100000000000000, /* .0  .0  .16 */
369    0b001000000000000, /* .0  .0  .4  */
370    0b011000000000000, /* .0  .0  .12 */
371    0b000000010000000, /* .0  .4  .0  */
372    0b010000000000000, /* .0  .0  .8  */
373    0b101000000000000, /* .0  .0  .20 */
374    0b000000000001000, /* .8  .0  .0  */
375    0b000000100000000, /* .0  .8  .0  */
376    0b110000000000000, /* .0  .0  .24 */
377    0b111000000000000, /* .0  .0  .28 */
378    0b000001000000000, /* .0  .16 .0  */
379    0b000000000000100, /* .4  .0  .0  */
380    0b000001100000000, /* .0  .24 .0  */
381    0b000001010000000, /* .0  .20 .0  */
382    0b000000110000000, /* .0  .12 .0  */
383    0b000001110000000, /* .0  .28 .0  */
384    0b000000000011100, /* .28 .0  .0  */
385    0b000000000010000, /* .16 .0  .0  */
386    0b000000000001100, /* .12 .0  .0  */
387    0b000000000011000, /* .24 .0  .0  */
388    0b000000000010100, /* .20 .0  .0  */
389    0b000000000000010, /* .2  .0  .0  */
390    0b000000101000000, /* .0  .10 .0  */
391    0b000000001000000, /* .0  .2  .0  */
392    0b000000010000100, /* .4  .4  .0  */
393    0b000000001011100, /* .28 .2  .0  */
394    0b000000001000010, /* .2  .2  .0  */
395    0b000000110001100, /* .12 .12 .0  */
396    0b000000000100000, /* .0  .1  .0  */
397    0b000000001100000, /* .0  .3  .0  */
398    0b110001100000000, /* .0  .24 .24 */
399 };
400 
401 static const uint16_t gfx12_src0_index_table[16] = {
402    0b010001100100, /*       r<8;8,1>  */
403    0b000000000000, /*       r<0;1,0>  */
404    0b010001100110, /*      -r<8;8,1>  */
405    0b010001100101, /*  (abs)r<8;8,1>  */
406    0b000000000010, /*      -r<0;1,0>  */
407    0b001000000000, /*       r<2;1,0>  */
408    0b001001000000, /*       r<2;4,0>  */
409    0b001101000000, /*       r<4;4,0>  */
410    0b001000100100, /*       r<2;2,1>  */
411    0b001100000000, /*       r<4;1,0>  */
412    0b001000100110, /*      -r<2;2,1>  */
413    0b001101000100, /*       r<4;4,1>  */
414    0b010001100111, /* -(abs)r<8;8,1>  */
415    0b000100000000, /*       r<1;1,0>  */
416    0b000000000001, /*  (abs)r<0;1,0>  */
417    0b111100010000, /*       r[a]<1,0> */
418 };
419 
420 static const uint16_t gfx12_src1_index_table[16] = {
421    0b000100011001, /*       r<8;8,1> */
422    0b000000000000, /*       r<0;1,0> */
423    0b100100011001, /*      -r<8;8,1> */
424    0b100000000000, /*      -r<0;1,0> */
425    0b010100011001, /*  (abs)r<8;8,1> */
426    0b100011010000, /*      -r<4;4,0> */
427    0b000010000000, /*       r<2;1,0> */
428    0b000010001001, /*       r<2;2,1> */
429    0b100010001001, /*      -r<2;2,1> */
430    0b000011010000, /*       r<4;4,0> */
431    0b000011010001, /*       r<4;4,1> */
432    0b000011000000, /*       r<4;1,0> */
433    0b110100011001, /* -(abs)r<8;8,1> */
434    0b010000000000, /*  (abs)r<0;1,0> */
435    0b110000000000, /* -(abs)r<0;1,0> */
436    0b100011010001, /*      -r<4;4,1> */
437 };
438 
439 static const uint16_t xehp_src0_index_table[16] = {
440    0b000100000000, /*       r<1;1,0>  */
441    0b000000000000, /*       r<0;1,0>  */
442    0b000100000010, /*      -r<1;1,0>  */
443    0b000100000001, /*  (abs)r<1;1,0>  */
444    0b000000000010, /*      -r<0;1,0>  */
445    0b001000000000, /*       r<2;1,0>  */
446    0b001001000000, /*       r<2;4,0>  */
447    0b001101000000, /*       r<4;4,0>  */
448    0b001100000000, /*       r<4;1,0>  */
449    0b000100000011, /* -(abs)r<1;1,0>  */
450    0b000000000001, /*  (abs)r<0;1,0>  */
451    0b111100010000, /*       r[a]<1,0> */
452    0b010001100000, /*       r<8;8,0>  */
453    0b000101000000, /*       r<1;4,0>  */
454    0b010001001000, /*       r<8;4,2>  */
455    0b001000000010, /*      -r<2;1,0>  */
456 };
457 
458 static const uint16_t xehp_src1_index_table[16] = {
459    0b000001000000, /*       r<1;1,0>    */
460    0b000000000000, /*       r<0;1,0>    */
461    0b100001000000, /*      -r<1;1,0>    */
462    0b100000000000, /*      -r<0;1,0>    */
463    0b010001000000, /*  (abs)r<1;1,0>    */
464    0b100011010000, /*      -r<4;4,0>    */
465    0b000010000000, /*       r<2;1,0>    */
466    0b000011010000, /*       r<4;4,0>    */
467    0b000011000000, /*       r<4;1,0>    */
468    0b110001000000, /* -(abs)r<1;1,0>    */
469    0b010000000000, /*  (abs)r<0;1,0>    */
470    0b110000000000, /* -(abs)r<0;1,0>    */
471    0b000100011000, /*       r<8;8,0>    */
472    0b100010000000, /*      -r<2;1,0>    */
473    0b100000001001, /*      -r<0;2,1>    */
474    0b100001000100, /*      -r[a]<1;1,0> */
475 };
476 
477 static const uint32_t xe2_control_index_table[32] = {
478    0b000000000000000100, /* (16|M0)               */
479    0b000000100000000000, /* (W) (1|M0)            */
480    0b000000000010000100, /* (16|M16)              */
481    0b000000000000000000, /* (1|M0)                */
482    0b000000100000000100, /* (W) (16|M0)           */
483    0b010000000000000100, /* (16|M0) (.ge)f0.0     */
484    0b010100000000000100, /* (16|M0) (.lt)f0.0     */
485    0b000000100000000010, /* (W) (4|M0)            */
486    0b000000000000000101, /* (32|M0)               */
487    0b000000100000000011, /* (W) (8|M0)            */
488    0b001100100000000000, /* (W) (1|M0) (.gt)f0.0  */
489    0b000010000000000100, /* (16|M0) (sat)         */
490    0b000100000000000100, /* (16|M0) (.eq)f0.0     */
491    0b000000100000000001, /* (W) (2|M0)            */
492    0b001100000000000100, /* (16|M0) (.gt)f0.0     */
493    0b000100100000000000, /* (W) (1|M0) (.eq)f0.0  */
494    0b010100100000000010, /* (W) (4|M0) (.lt)f0.0  */
495    0b010000100000000000, /* (W) (1|M0) (.ge)f0.0  */
496    0b010000100000000010, /* (W) (4|M0) (.ge)f0.0  */
497    0b010100100000000000, /* (W) (1|M0) (.lt)f0.0  */
498    0b001000000000000100, /* (16|M0) (.ne)f0.0     */
499    0b000000000100100100, /* (f2.0) (16|M0)        */
500    0b010100100000000011, /* (W) (8|M0) (.lt)f0.0  */
501    0b000000000100011100, /* (f1.1) (16|M0)        */
502    0b010000100000000011, /* (W) (8|M0) (.ge)f0.0  */
503    0b000000000100001100, /* (f0.1) (16|M0)        */
504    0b000000000100010100, /* (f1.0) (16|M0)        */
505    0b000000000100110100, /* (f3.0) (16|M0)        */
506    0b000000000100111100, /* (f3.1) (16|M0)        */
507    0b000000000100101100, /* (f2.1) (16|M0)        */
508    0b000000000100000100, /* (f0.0) (16|M0)        */
509    0b010100000000100100, /* (16|M0) (.lt)f2.0     */
510 };
511 
512 static const uint32_t xe2_datatype_table[32] = {
513    0b11010110100101010100, /* grf<1>:f grf:f grf:f    */
514    0b11010100100101010100, /* arf<1>:f grf:f grf:f    */
515    0b00000110100101010100, /* grf<1>:f grf:f arf:ub   */
516    0b00000110100001000100, /* grf<1>:ud grf:ud arf:ub */
517    0b01010110110101010100, /* grf<1>:f grf:f imm:f    */
518    0b11010010100101010100, /* grf<1>:f arf:f grf:f    */
519    0b10111110100011101110, /* grf<1>:q grf:q grf:q    */
520    0b00000000100000000000, /* arf<1>:ub arf:ub arf:ub */
521    0b01010110100101010100, /* grf<1>:f grf:f arf:f    */
522    0b00000010101001000100, /* grf<1>:ud imm:ud        */
523    0b00101110110011001100, /* grf<1>:d grf:d imm:w    */
524    0b11010000100101010100, /* arf<1>:f arf:f grf:f    */
525    0b01010100100101010100, /* arf<1>:f grf:f arf:f    */
526    0b01010100110101010100, /* arf<1>:f grf:f imm:f    */
527    0b00000010101101010100, /* grf<1>:f imm:f          */
528    0b00000110100011001100, /* grf<1>:d grf:d arf:ub   */
529    0b00101110110011101110, /* grf<1>:q grf:q imm:w    */
530    0b00000110100001100110, /* grf<1>:uq grf:uq arf:ub */
531    0b01010000100101010100, /* arf<1>:f arf:f arf:f    */
532    0b10110110100011001100, /* grf<1>:d grf:d grf:d    */
533    0b01010010100101010100, /* grf<1>:f arf:f arf:f    */
534    0b00000111000001000100, /* grf<2>:ud grf:ud arf:ub */
535    0b00110110110011001110, /* grf<1>:q grf:d imm:d    */
536    0b00101100110011001100, /* arf<1>:d grf:d imm:w    */
537    0b11011110100101110110, /* grf<1>:df grf:df grf:df */
538    0b01010010110101010100, /* grf<1>:f arf:f imm:f    */
539    0b10010110100001000100, /* grf<1>:ud grf:ud grf:ud */
540    0b00000010100001000100, /* grf<1>:ud arf:ud arf:ub */
541    0b00001110110001000100, /* grf<1>:ud grf:ud imm:uw */
542    0b00000010101010101100, /* grf<1>:d imm:w          */
543    0b01010000110101010100, /* arf<1>:f arf:f imm:f    */
544    0b00000100100001000100, /* arf<1>:ud grf:ud arf:ub */
545 };
546 
547 static const uint16_t xe2_subreg_table[16] = {
548    0b000000000000, /* .0 .0  */
549    0b000010000000, /* .0 .4  */
550    0b000000000100, /* .4 .0  */
551    0b010000000000, /* .0 .32 */
552    0b001000000000, /* .0 .16 */
553    0b000000001000, /* .8 .0  */
554    0b000100000000, /* .0 .8  */
555    0b010100000000, /* .0 .40 */
556    0b011000000000, /* .0 .48 */
557    0b000110000000, /* .0 .12 */
558    0b000000010000, /* .16 .0 */
559    0b011010000000, /* .0 .52 */
560    0b001100000000, /* .0 .24 */
561    0b011100000000, /* .0 .56 */
562    0b010110000000, /* .0 .44 */
563    0b010010000000, /* .0 .36 */
564 };
565 
566 static const uint16_t xe2_src0_index_table[8] = {
567    0b00100000000, /* r<1;1,0>      */
568    0b00000000000, /* r<0;1,0>      */
569    0b01000000000, /* r<2;1,0>      */
570    0b00100000010, /* -r<1;1,0>     */
571    0b01100000000, /* r<4;1,0>      */
572    0b00100000001, /* (abs)r<1;1,0> */
573    0b00000000010, /* -r<0;1,0>     */
574    0b01001000000, /* r<2;4,0>      */
575 };
576 
577 static const uint16_t xe2_src1_index_table[16] = {
578    0b0000100000000000, /* r<1;1,0>.0  */
579    0b0000000000000000, /* r<0;1,0>.0  */
580    0b1000100000000000, /* -r<1;1,0>.0 */
581    0b0000000000010000, /* r<0;1,0>.8  */
582    0b0000000000001000, /* r<0;1,0>.4  */
583    0b0000000000011000, /* r<0;1,0>.12 */
584    0b0000000001010000, /* r<0;1,0>.40 */
585    0b0000000001000000, /* r<0;1,0>.32 */
586    0b0000000000100000, /* r<0;1,0>.16 */
587    0b0000000001111000, /* r<0;1,0>.60 */
588    0b0000000000111000, /* r<0;1,0>.28 */
589    0b0000000000101000, /* r<0;1,0>.20 */
590    0b0000000001011000, /* r<0;1,0>.44 */
591    0b0000000001001000, /* r<0;1,0>.36 */
592    0b0000000001110000, /* r<0;1,0>.56 */
593    0b0000000000110000, /* r<0;1,0>.24 */
594 };
595 
596 /* This is actually the control index table for Cherryview (26 bits), but the
597  * only difference from Broadwell (24 bits) is that it has two extra 0-bits at
598  * the start.
599  *
600  * The low 24 bits have the same mappings on both hardware.
601  */
602 static const uint32_t gfx8_3src_control_index_table[4] = {
603    0b00100000000110000000000001,
604    0b00000000000110000000000001,
605    0b00000000001000000000000001,
606    0b00000000001000000000100001,
607 };
608 
609 /* This is actually the control index table for Cherryview (49 bits), but the
610  * only difference from Broadwell (46 bits) is that it has three extra 0-bits
611  * at the start.
612  *
613  * The low 44 bits have the same mappings on both hardware, and since the high
614  * three bits on Broadwell are zero, we can reuse Cherryview's table.
615  */
616 static const uint64_t gfx8_3src_source_index_table[4] = {
617    0b0000001110010011100100111001000001111000000000000,
618    0b0000001110010011100100111001000001111000000000010,
619    0b0000001110010011100100111001000001111000000001000,
620    0b0000001110010011100100111001000001111000000100000,
621 };
622 
623 static const uint64_t gfx12_3src_control_index_table[32] = {
624    0b000001001010010101000000000000000100, /*      (16|M0)       grf<1>:f   :f  :f  :f */
625    0b000001001010010101000000000000000011, /*      (8|M0)        grf<1>:f   :f  :f  :f */
626    0b000001001000010101000000000000000011, /*      (8|M0)        arf<1>:f   :f  :f  :f */
627    0b000001001010010101000010000000000011, /* (W)  (8|M0)        grf<1>:f   :f  :f  :f */
628    0b000001001000010101000010000000000011, /* (W)  (8|M0)        arf<1>:f   :f  :f  :f */
629    0b000001001000010101000000000000010011, /*      (8|M8)        arf<1>:f   :f  :f  :f */
630    0b000001001010010101000000000000010011, /*      (8|M8)        grf<1>:f   :f  :f  :f */
631    0b000001001000010101000010000000010011, /* (W)  (8|M8)        arf<1>:f   :f  :f  :f */
632    0b000001001010010101000010000000010011, /* (W)  (8|M8)        grf<1>:f   :f  :f  :f */
633    0b000001001010010101000010000000000100, /* (W)  (16|M0)       grf<1>:f   :f  :f  :f */
634    0b000001001000010101000000000000000100, /*      (16|M0)       arf<1>:f   :f  :f  :f */
635    0b000001001010010101010000000000000100, /*      (16|M0)  (sat)grf<1>:f   :f  :f  :f */
636    0b000001001010010101000000000000100100, /*      (16|M16)      grf<1>:f   :f  :f  :f */
637    0b000001001000010101000010000000000100, /* (W)  (16|M0)       arf<1>:f   :f  :f  :f */
638    0b000001001010010101000010000000000000, /* (W)  (1|M0)        grf<1>:f   :f  :f  :f */
639    0b000001001010010101010000000000000011, /*      (8|M0)   (sat)grf<1>:f   :f  :f  :f */
640    0b000001001000010101000010000000110011, /* (W)  (8|M24)       arf<1>:f   :f  :f  :f */
641    0b000001001000010101000010000000100011, /* (W)  (8|M16)       arf<1>:f   :f  :f  :f */
642    0b000001001010010101000010000000110011, /* (W)  (8|M24)       grf<1>:f   :f  :f  :f */
643    0b000001001010010101000010000000100011, /* (W)  (8|M16)       grf<1>:f   :f  :f  :f */
644    0b000001001000010101000000000000100011, /*      (8|M16)       arf<1>:f   :f  :f  :f */
645    0b000001001000010101000000000000110011, /*      (8|M24)       arf<1>:f   :f  :f  :f */
646    0b000001001010010101000000000000100011, /*      (8|M16)       grf<1>:f   :f  :f  :f */
647    0b000001001010010101000000000000110011, /*      (8|M24)       grf<1>:f   :f  :f  :f */
648    0b000001001000010101010000000000000100, /*      (16|M0)  (sat)arf<1>:f   :f  :f  :f */
649    0b000001001010010101010010000000000100, /* (W)  (16|M0)  (sat)grf<1>:f   :f  :f  :f */
650    0b000001001010010101000010000000100100, /* (W)  (16|M16)      grf<1>:f   :f  :f  :f */
651    0b000001001010010001000010000000000000, /* (W)  (1|M0)        grf<1>:ud :ud :ud :ud */
652    0b000001001000010101000000000000100100, /*      (16|M16)      arf<1>:f   :f  :f  :f */
653    0b000001001010010101010000000000100100, /*      (16|M16) (sat)grf<1>:f   :f  :f  :f */
654    0b000001001010010101000010000000000010, /* (W)  (4|M0)        grf<1>:f   :f  :f  :f */
655    0b000001001000010101010000000000000011, /*      (8|M0)   (sat)arf<1>:f   :f  :f  :f */
656 };
657 
658 static const uint64_t xehp_3src_control_index_table[32] = {
659    0b0000010010100010101000000000000000100, /*          (16|M0)       grf<1>:f   :f   :f   :f          */
660    0b0000010010100010101000000000000000011, /*          (8|M0)        grf<1>:f   :f   :f   :f          */
661    0b0000010010000010101000000000000000011, /*          (8|M0)        arf<1>:f   :f   :f   :f          */
662    0b0000010010100010101000010000000000011, /*     (W)  (8|M0)        grf<1>:f   :f   :f   :f          */
663    0b0000010010000010101000010000000000011, /*     (W)  (8|M0)        arf<1>:f   :f   :f   :f          */
664    0b0000010010000010101000000000000010011, /*          (8|M8)        arf<1>:f   :f   :f   :f          */
665    0b0000010010100010101000000000000010011, /*          (8|M8)        grf<1>:f   :f   :f   :f          */
666    0b0000010010000010101000010000000010011, /*     (W)  (8|M8)        arf<1>:f   :f   :f   :f          */
667    0b0000010010100010101000010000000010011, /*     (W)  (8|M8)        grf<1>:f   :f   :f   :f          */
668    0b0000010010100010101000010000000000100, /*     (W)  (16|M0)       grf<1>:f   :f   :f   :f          */
669    0b0000010010000010101000000000000000100, /*          (16|M0)       arf<1>:f   :f   :f   :f          */
670    0b0000010010100010101010000000000000100, /*          (16|M0)  (sat)grf<1>:f   :f   :f   :f          */
671    0b0000010010100010101000000000000100100, /*          (16|M16)      grf<1>:f   :f   :f   :f          */
672    0b0000010010000010101000010000000000100, /*     (W)  (16|M0)       arf<1>:f   :f   :f   :f          */
673    0b0000010010100010101000010000000000000, /*     (W)  (1|M0)        grf<1>:f   :f   :f   :f          */
674    0b0000010010100010101010000000000000011, /*          (8|M0)   (sat)grf<1>:f   :f   :f   :f          */
675    0b0000010010000010101000010000000100011, /*     (W)  (8|M16)       arf<1>:f   :f   :f   :f          */
676    0b0000010010000010101000010000000110011, /*     (W)  (8|M24)       arf<1>:f   :f   :f   :f          */
677    0b0000010010100010101000010000000100011, /*     (W)  (8|M16)       grf<1>:f   :f   :f   :f          */
678    0b0000010010100010101000010000000110011, /*     (W)  (8|M24)       grf<1>:f   :f   :f   :f          */
679    0b0000010010000010101000000000000110011, /*          (8|M24)       arf<1>:f   :f   :f   :f          */
680    0b0000010010000010101000000000000100011, /*          (8|M16)       arf<1>:f   :f   :f   :f          */
681    0b0000000100111110011000000000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d  :ub   :b          */
682    0b0000000000111110011000100000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d  :ub  :ub {Atomic} */
683    0b0000100100111110011000100000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d   :b   :b {Atomic} */
684    0b0000100000111110011000100000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d   :b  :ub {Atomic} */
685    0b0000100100111110011000000000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d   :b   :b          */
686    0b0000000000111110011000000000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d  :ub  :ub          */
687    0b0000000100111110011000100000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d  :ub   :b {Atomic} */
688    0b0000100000111110011000000000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d   :b  :ub          */
689    0b0000101101111010101000100000000000011, /* dpas.8x* (8|M0)        grf<1>:f   :f  :bf  :bf {Atomic} */
690    0b0000101101111010101000000000000000011, /* dpas.8x* (8|M0)        grf<1>:f   :f  :bf  :bf          */
691 };
692 
693 static const uint64_t xe2_3src_control_index_table[16] = {
694    0b0000010010100010101000000000000100, /* (16|M0) grf<1>:f :f :f :f      */
695    0b0000010010000010101000000000000100, /* (16|M0) arf<1>:f :f :f :f      */
696    0b0000010010100010101000100000000100, /* (W)(16|M0) grf<1>:f :f :f :f   */
697    0b0000010010000010101000100000000100, /* (W)(16|M0) arf<1>:f :f :f :f   */
698    0b0000011011100011101100000000000100, /* (16|M0) grf<1>:df :df :df :df  */
699    0b0000011011100011101100000010000100, /* (16|M16) grf<1>:df :df :df :df */
700    0b0000011011000011101100000000000100, /* (16|M0) arf<1>:df :df :df :df  */
701    0b0000010010100010101000000000000101, /* (32|M0) grf<1>:f :f :f :f      */
702    0b0000010010000010101000000000000101, /* (32|M0) arf<1>:f :f :f :f      */
703    0b0000010010000010101010000000000100, /* (16|M0) (sat)arf<1>:f :f :f :f */
704    0b0000010010100010101010000000000100, /* (16|M0) (sat)grf<1>:f :f :f :f */
705    0b0000011011000011101100000010000100, /* (16|M16) arf<1>:df :df :df :df */
706    0b0000010010100010101000100000000000, /* (W)(1|M0) grf<1>:f :f :f :f    */
707    0b0000010010100010001000000000000100, /* (16|M0) grf<1>:ud :ud :ud :ud  */
708    0b0000110110100110011000000000000101, /* (32|M0) grf<1>:d :d :d :d      */
709    0b0000011011000011101100000000000011, /* (8|M0) arf<1>:df :df :df :df   */
710 };
711 
712 static const uint64_t xe2_3src_dpas_control_index_table[16] = {
713    0b0000000000111110011001000000000100, /* dpas.8x* (16|M0) grf:d :d :ub :ub Atomic */
714    0b0000000100111110011001000000000100, /* dpas.8x* (16|M0) grf:d :d :ub :b Atomic */
715    0b0000100000111110011001000000000100, /* dpas.8x* (16|M0) grf:d :d :b :ub Atomic */
716    0b0000100100111110011001000000000100, /* dpas.8x* (16|M0) grf:d :d :b :b Atomic */
717    0b0000000000111110011000000000000100, /* dpas.8x* (16|M0) grf:d :d :ub :ub */
718    0b0000100100111110011000000000000100, /* dpas.8x* (16|M0) grf:d :d :b :b */
719    0b0000101101111010101001000000000100, /* dpas.8x* (16|M0) grf:f :f :bf :bf Atomic */
720    0b0000101101111101101001000000000100, /* dpas.8x* (16|M0) grf:f :bf :bf :bf Atomic */
721    0b0000101101111010110101000000000100, /* dpas.8x* (16|M0) grf:bf :f :bf :bf Atomic */
722    0b0000101101111101110101000000000100, /* dpas.8x* (16|M0) grf:bf :bf :bf :bf Atomic */
723    0b0000101101111010101000000000000100, /* dpas.8x* (16|M0) grf:f :f :bf :bf */
724    0b0000001001111010101001000000000100, /* dpas.8x* (16|M0) grf:f :f :hf :hf Atomic */
725    0b0000001001111001101001000000000100, /* dpas.8x* (16|M0) grf:f :hf :hf :hf Atomic */
726    0b0000001001111010100101000000000100, /* dpas.8x* (16|M0) grf:hf :f :hf :hf Atomic */
727    0b0000001001111001100101000000000100, /* dpas.8x* (16|M0) grf:hf :hf :hf :hf Atomic */
728    0b0000001001111010101000000000000100, /* dpas.8x* (16|M0) grf:f :f :hf :hf */
729 };
730 
731 static const uint32_t gfx12_3src_source_index_table[32] = {
732    0b100101100001100000000, /*  grf<0;0>   grf<8;1>  grf<0> */
733    0b100101100001001000010, /*  arf<4;1>   grf<8;1>  grf<0> */
734    0b101101100001101000011, /*  grf<8;1>   grf<8;1>  grf<1> */
735    0b100101100001101000011, /*  grf<8;1>   grf<8;1>  grf<0> */
736    0b101100000000101000011, /*  grf<8;1>   grf<0;0>  grf<1> */
737    0b101101100001101001011, /* -grf<8;1>   grf<8;1>  grf<1> */
738    0b101001100001101000011, /*  grf<8;1>   arf<8;1>  grf<1> */
739    0b100001100001100000000, /*  grf<0;0>   arf<8;1>  grf<0> */
740    0b101101100001100000000, /*  grf<0;0>   grf<8;1>  grf<1> */
741    0b101101100101101000011, /*  grf<8;1>   grf<8;1> -grf<1> */
742    0b101101110001101000011, /*  grf<8;1>  -grf<8;1>  grf<1> */
743    0b101100000000100000000, /*  grf<0;0>   grf<0;0>  grf<1> */
744    0b100001100001101000011, /*  grf<8;1>   arf<8;1>  grf<0> */
745    0b100101110001100000000, /*  grf<0;0>  -grf<8;1>  grf<0> */
746    0b100101110001101000011, /*  grf<8;1>  -grf<8;1>  grf<0> */
747    0b100101100001101001011, /* -grf<8;1>   grf<8;1>  grf<0> */
748    0b100100000000101000011, /*  grf<8;1>   grf<0;0>  grf<0> */
749    0b100101100001100001000, /* -grf<0;0>   grf<8;1>  grf<0> */
750    0b100100000000100000000, /*  grf<0;0>   grf<0;0>  grf<0> */
751    0b101101110001100000000, /*  grf<0;0>  -grf<8;1>  grf<1> */
752    0b100101100101100000000, /*  grf<0;0>   grf<8;1> -grf<0> */
753    0b101001100001100000000, /*  grf<0;0>   arf<8;1>  grf<1> */
754    0b100101100101101000011, /*  grf<8;1>   grf<8;1> -grf<0> */
755    0b101101100101101001011, /* -grf<8;1>   grf<8;1> -grf<1> */
756    0b101001100001101001011, /* -grf<8;1>   arf<8;1>  grf<1> */
757    0b101101110001101001011, /* -grf<8;1>  -grf<8;1>  grf<1> */
758    0b101100010000101000011, /*  grf<8;1>  -grf<0;0>  grf<1> */
759    0b101100000100101000011, /*  grf<8;1>   grf<0;0> -grf<1> */
760    0b101101100001100001000, /* -grf<0;0>   grf<8;1>  grf<1> */
761    0b101101100101100000000, /*  grf<0;0>   grf<8;1> -grf<1> */
762    0b100100000100101000011, /*  grf<8;1>   grf<0;0> -grf<0> */
763    0b101001100101101000011, /*  grf<8;1>   arf<8;1> -grf<1> */
764 };
765 
766 static const uint32_t xehp_3src_source_index_table[32] = {
767    0b100100000001100000000, /*           grf<0;0>   grf<1;0>     grf<0>      */
768    0b100100000001000000001, /*           arf<1;0>   grf<1;0>     grf<0>      */
769    0b101100000001100000001, /*           grf<1;0>   grf<1;0>     grf<1>      */
770    0b100100000001100000001, /*           grf<1;0>   grf<1;0>     grf<0>      */
771    0b101100000000100000001, /*           grf<1;0>   grf<0;0>     grf<1>      */
772    0b101100000001100001001, /*          -grf<1;0>   grf<1;0>     grf<1>      */
773    0b101000000001100000001, /*           grf<1;0>   arf<1;0>     grf<1>      */
774    0b101100000001100000000, /*           grf<0;0>   grf<1;0>     grf<1>      */
775    0b100000000001100000000, /*           grf<0;0>   arf<1;0>     grf<0>      */
776    0b101100000101100000001, /*           grf<1;0>   grf<1;0>    -grf<1>      */
777    0b101100010001100000001, /*           grf<1;0>  -grf<1;0>     grf<1>      */
778    0b101100000000100000000, /*           grf<0;0>   grf<0;0>     grf<1>      */
779    0b100000000001100000001, /*           grf<1;0>   arf<1;0>     grf<0>      */
780    0b100100010001100000000, /*           grf<0;0>  -grf<1;0>     grf<0>      */
781    0b100100010001100000001, /*           grf<1;0>  -grf<1;0>     grf<0>      */
782    0b100100000001100001001, /*          -grf<1;0>   grf<1;0>     grf<0>      */
783    0b100100000000100000001, /*           grf<1;0>   grf<0;0>     grf<0>      */
784    0b100100000001100001000, /*          -grf<0;0>   grf<1;0>     grf<0>      */
785    0b100100000000100000000, /*           grf<0;0>   grf<0;0>     grf<0>
786                              * dpas.*x1  grf:d      grf:[ub,b]   grf:[ub,b]
787                              * dpas.*x1  grf:f      grf:bf       grf:bf
788                              */
789    0b101100010001100000000, /*           grf<0;0>  -grf<1;0>     grf<1>      */
790    0b100100000101100000000, /*           grf<0;0>   grf<1;0>    -grf<0>      */
791    0b101000000001100000000, /*           grf<0;0>   arf<1;0>     grf<1>      */
792    0b100100000101100000001, /*           grf<1;0>   grf<1;0>    -grf<0>      */
793    0b101100000101100001001, /*          -grf<1;0>   grf<1;0>    -grf<1>      */
794    0b100100010000100000000, /* dpas.*x1  grf:d      grf:[u2,s2]  grf:[ub,b]  */
795    0b100100000100100000000, /* dpas.*x1  grf:d      grf:[ub,b]   grf:[u2,s2] */
796    0b100100010100100000000, /* dpas.*x1  grf:d      grf:[u2,s2]  grf:[u2,s2] */
797    0b100100001000100000000, /* dpas.*x1  grf:d      grf:[u4,s4]  grf:[ub,b]  */
798    0b100100001100100000000, /* dpas.*x1  grf:d      grf:[u4,s4]  grf:[u2,s2] */
799    0b100100000010100000000, /* dpas.*x1  grf:d      grf:[ub,b]   grf:[u4,s4] */
800    0b100100001010100000000, /* dpas.*x1  grf:d      grf:[u4,s4]  grf:[u4,s4] */
801    0b100100010010100000000, /* dpas.*x1  grf:d      grf:[u2,s2]  grf:[u4,s4] */
802 };
803 
804 static const uint32_t xe2_3src_source_index_table[16] = {
805    0b101100000001100000001, /* grf<1;0> grf<1;0> grf<1>  */
806    0b101100000001000000001, /* arf<1;0> grf<1;0> grf<1>  */
807    0b100100000001100000000, /* grf<0;0> grf<1;0> grf<0>  */
808    0b100100000001000000001, /* arf<1;0> grf<1;0> grf<0>  */
809    0b100100000001100000001, /* grf<1;0> grf<1;0> grf<0>  */
810    0b100000000001100000000, /* grf<0;0> arf<1;0> grf<0>  */
811    0b100000000001100000001, /* grf<1;0> arf<1;0> grf<0>  */
812    0b101100000101100000001, /* grf<1;0> grf<1;0> -grf<1> */
813    0b101000000001100000001, /* grf<1;0> arf<1;0> grf<1>  */
814    0b101000000001000000001, /* arf<1;0> arf<1;0> grf<1>  */
815    0b100000000001000000001, /* arf<1;0> arf<1;0> grf<0>  */
816    0b100100000000100000000, /* grf<0;0> grf<0;0> grf<0>  */
817    0b100100000000100000001, /* grf<1;0> grf<0;0> grf<0>  */
818    0b101100000101000000001, /* arf<1;0> grf<1;0> -grf<1> */
819    0b100100010001100000001, /* grf<1;0> -grf<1;0> grf<0> */
820    0b100100010001000000001, /* arf<1;0> -grf<1;0> grf<0> */
821 };
822 
823 static const uint32_t xe2_3src_dpas_source_index_table[16] = {
824    0b100100000000100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[ub,b]
825                              * dpas.*x1 grf:[f,bf] grf:bf grf:bf
826                              * dpas.*x1 grf:[f,hf] grf:hf grf:hf
827                              */
828    0b100100000010100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[u4,s4] */
829    0b100100000100100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[u2,s2] */
830    0b100100001000100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[ub,b] */
831    0b100100001010100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[u4,s4] */
832    0b100100001100100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[u2,s2] */
833    0b100100010000100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[ub,b] */
834    0b100100010010100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[u4,s4] */
835    0b100100010100100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[u2,s2] */
836    0b100100000000100000010, /* dpas.*x2 grf:d grf:[ub,b] grf:[ub,b] */
837    0b100100000010100000010, /* dpas.*x2 grf:d grf:[ub,b] grf:[u4,s4] */
838    0b100100001000100000010, /* dpas.*x2 grf:d grf:[u4,s4] grf:[ub,b] */
839    0b100100001010100000010, /* dpas.*x2 grf:d grf:[u4,s4] grf:[u4,s4] */
840    0b100100010100100000010, /* dpas.*x2 grf:d grf:[u2,s2] grf:[u2,s2] */
841    0b100100000000100001110, /* dpas.*x8 grf:d grf:[ub,b] grf:[ub,b] */
842    0b100100001010100001110, /* dpas.*x8 grf:d grf:[u4,s4] grf:[u4,s4] */
843 };
844 
845 static const uint32_t gfx12_3src_subreg_table[32] = {
846    0b00000000000000000000, /* .0  .0  .0  .0  */
847    0b00100000000000000000, /* .0  .0  .0  .4  */
848    0b00000000000110000000, /* .0  .12 .0  .0  */
849    0b10100000000000000000, /* .0  .0  .0  .20 */
850    0b10000000001110000000, /* .0  .28 .0  .16 */
851    0b01100000000000000000, /* .0  .0  .0  .12 */
852    0b01000000000000000000, /* .0  .0  .0  .8  */
853    0b00000010000000000000, /* .0  .0  .8  .0  */
854    0b00000001000000000000, /* .0  .0  .4  .0  */
855    0b11000000000000000000, /* .0  .0  .0  .24 */
856    0b10000000000000000000, /* .0  .0  .0  .16 */
857    0b11100000000000000000, /* .0  .0  .0  .28 */
858    0b00000110000000000000, /* .0  .0  .24 .0  */
859    0b00000000000010000000, /* .0  .4  .0  .0  */
860    0b00000100000000000000, /* .0  .0  .16 .0  */
861    0b00000011000000000000, /* .0  .0  .12 .0  */
862    0b00000101000000000000, /* .0  .0  .20 .0  */
863    0b00000111000000000000, /* .0  .0  .28 .0  */
864    0b00000000000100000000, /* .0  .8  .0  .0  */
865    0b00000000001000000000, /* .0  .16 .0  .0  */
866    0b00000000001100000000, /* .0  .24 .0  .0  */
867    0b00000000001010000000, /* .0  .20 .0  .0  */
868    0b00000000001110000000, /* .0  .28 .0  .0  */
869    0b11000000001110000000, /* .0  .28 .0  .24 */
870    0b00100000000100000000, /* .0  .8  .0  .4  */
871    0b00100000000110000000, /* .0  .12 .0  .4  */
872    0b01000000000110000000, /* .0  .12 .0  .8  */
873    0b10000000001100000000, /* .0  .24 .0  .16 */
874    0b10000000001010000000, /* .0  .20 .0  .16 */
875    0b01100000000010000000, /* .0  .4  .0  .12 */
876    0b10100000001110000000, /* .0  .28 .0  .20 */
877    0b01000000000010000000, /* .0  .4  .0  .8  */
878 };
879 
880 static const uint32_t xe2_3src_subreg_table[32] = {
881    0b00000000000000000000, /* .0 .0 .0 .0   */
882    0b00100000000000000000, /* .0 .0 .0 .8   */
883    0b10000000000000000000, /* .0 .0 .0 .32  */
884    0b00010000000000000000, /* .0 .0 .0 .4   */
885    0b11100000000000000000, /* .0 .0 .0 .56  */
886    0b01010000000000000000, /* .0 .0 .0 .20  */
887    0b10110000000000000000, /* .0 .0 .0 .44  */
888    0b01000000000011000000, /* .0 .12 .0 .16 */
889    0b01100000000000000000, /* .0 .0 .0 .24  */
890    0b10100000000000000000, /* .0 .0 .0 .40  */
891    0b11000000000000000000, /* .0 .0 .0 .48  */
892    0b01000000000000000000, /* .0 .0 .0 .16  */
893    0b01110000000110000000, /* .0 .24 .0 .28 */
894    0b10100000001001000000, /* .0 .36 .0 .40 */
895    0b11010000001100000000, /* .0 .48 .0 .52 */
896    0b01110000000000000000, /* .0 .0 .0 .28  */
897    0b11110000000000000000, /* .0 .0 .0 .60  */
898    0b10010000000000000000, /* .0 .0 .0 .36  */
899    0b00110000000000000000, /* .0 .0 .0 .12  */
900    0b00100000000010000000, /* .0 .8 .0 .8   */
901    0b00010000000001000000, /* .0 .4 .0 .4   */
902    0b00110000000011000000, /* .0 .12 .0 .12 */
903    0b11010000000000000000, /* .0 .0 .0 .52  */
904    0b00000000000001000000, /* .0 .4 .0 .0   */
905    0b00000101100000000000, /* .0 .0 .44 .0  */
906    0b00000100000000000000, /* .0 .0 .32 .0  */
907    0b00000000000010000000, /* .0 .8 .0 .0   */
908    0b00000000001100000000, /* .0 .48 .0 .0  */
909    0b00000000001101000000, /* .0 .52 .0 .0  */
910    0b00000110100000000000, /* .0 .0 .52 .0  */
911    0b00000000001000000000, /* .0 .32 .0 .0  */
912    0b00000000001111000000, /* .0 .60 .0 .0  */
913 };
914 
915 struct compaction_state {
916    const struct brw_isa_info *isa;
917    const uint32_t *control_index_table;
918    const uint32_t *datatype_table;
919    const uint16_t *subreg_table;
920    const uint16_t *src0_index_table;
921    const uint16_t *src1_index_table;
922 };
923 
924 static void compaction_state_init(struct compaction_state *c,
925                                   const struct brw_isa_info *isa);
926 
927 static bool
set_control_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src)928 set_control_index(const struct compaction_state *c,
929                   brw_compact_inst *dst, const brw_inst *src)
930 {
931    const struct intel_device_info *devinfo = c->isa->devinfo;
932    uint32_t uncompacted; /* 19b/IVB+; 21b/TGL+ */
933 
934    if (devinfo->ver >= 20) {
935       uncompacted = (brw_inst_bits(src, 95, 92) << 14) | /*  4b */
936                     (brw_inst_bits(src, 34, 34) << 13) | /*  1b */
937                     (brw_inst_bits(src, 32, 32) << 12) | /*  1b */
938                     (brw_inst_bits(src, 31, 31) << 11) | /*  1b */
939                     (brw_inst_bits(src, 28, 28) << 10) | /*  1b */
940                     (brw_inst_bits(src, 27, 26) <<  8) | /*  2b */
941                     (brw_inst_bits(src, 25, 24) <<  6) | /*  2b */
942                     (brw_inst_bits(src, 23, 21) <<  3) | /*  3b */
943                     (brw_inst_bits(src, 20, 18));        /*  3b */
944    } else if (devinfo->ver >= 12) {
945       uncompacted = (brw_inst_bits(src, 95, 92) << 17) | /*  4b */
946                     (brw_inst_bits(src, 34, 34) << 16) | /*  1b */
947                     (brw_inst_bits(src, 33, 33) << 15) | /*  1b */
948                     (brw_inst_bits(src, 32, 32) << 14) | /*  1b */
949                     (brw_inst_bits(src, 31, 31) << 13) | /*  1b */
950                     (brw_inst_bits(src, 28, 28) << 12) | /*  1b */
951                     (brw_inst_bits(src, 27, 24) <<  8) | /*  4b */
952                     (brw_inst_bits(src, 23, 22) <<  6) | /*  2b */
953                     (brw_inst_bits(src, 21, 19) <<  3) | /*  3b */
954                     (brw_inst_bits(src, 18, 16));        /*  3b */
955    } else {
956       uncompacted = (brw_inst_bits(src, 33, 31) << 16) | /*  3b */
957                     (brw_inst_bits(src, 23, 12) <<  4) | /* 12b */
958                     (brw_inst_bits(src, 10,  9) <<  2) | /*  2b */
959                     (brw_inst_bits(src, 34, 34) <<  1) | /*  1b */
960                     (brw_inst_bits(src,  8,  8));        /*  1b */
961    }
962 
963    for (int i = 0; i < 32; i++) {
964       if (c->control_index_table[i] == uncompacted) {
965          brw_compact_inst_set_control_index(devinfo, dst, i);
966 	 return true;
967       }
968    }
969 
970    return false;
971 }
972 
973 static bool
set_datatype_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src,bool is_immediate)974 set_datatype_index(const struct compaction_state *c, brw_compact_inst *dst,
975                    const brw_inst *src, bool is_immediate)
976 {
977    const struct intel_device_info *devinfo = c->isa->devinfo;
978    uint32_t uncompacted; /* 18b/G45+; 21b/BDW+; 20b/TGL+ */
979 
980    if (devinfo->ver >= 12) {
981       uncompacted = (brw_inst_bits(src, 91, 88) << 15) | /*  4b */
982                     (brw_inst_bits(src, 66, 66) << 14) | /*  1b */
983                     (brw_inst_bits(src, 50, 50) << 13) | /*  1b */
984                     (brw_inst_bits(src, 49, 48) << 11) | /*  2b */
985                     (brw_inst_bits(src, 47, 47) << 10) | /*  1b */
986                     (brw_inst_bits(src, 46, 46) <<  9) | /*  1b */
987                     (brw_inst_bits(src, 43, 40) <<  5) | /*  4b */
988                     (brw_inst_bits(src, 39, 36) <<  1) | /*  4b */
989                     (brw_inst_bits(src, 35, 35));        /*  1b */
990 
991       /* Src1.RegFile overlaps with the immediate, so ignore it if an immediate
992        * is present
993        */
994       if (!is_immediate) {
995          uncompacted |= brw_inst_bits(src, 98, 98) << 19; /* 1b */
996       }
997    } else {
998       uncompacted = (brw_inst_bits(src, 63, 61) << 18) | /*  3b */
999                     (brw_inst_bits(src, 94, 89) << 12) | /*  6b */
1000                     (brw_inst_bits(src, 46, 35));        /* 12b */
1001    }
1002 
1003    for (int i = 0; i < 32; i++) {
1004       if (c->datatype_table[i] == uncompacted) {
1005          brw_compact_inst_set_datatype_index(devinfo, dst, i);
1006 	 return true;
1007       }
1008    }
1009 
1010    return false;
1011 }
1012 
1013 static bool
set_subreg_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src,bool is_immediate)1014 set_subreg_index(const struct compaction_state *c, brw_compact_inst *dst,
1015                  const brw_inst *src, bool is_immediate)
1016 {
1017    const struct intel_device_info *devinfo = c->isa->devinfo;
1018    const unsigned table_len = devinfo->ver >= 20 ?
1019       ARRAY_SIZE(xe2_subreg_table) : ARRAY_SIZE(g45_subreg_table);
1020    uint16_t uncompacted; /* 15b/G45+; 12b/Xe2+ */
1021 
1022    if (devinfo->ver >= 20) {
1023       uncompacted = (brw_inst_bits(src, 33, 33) << 0) |    /* 1b */
1024                     (brw_inst_bits(src, 55, 51) << 1) |    /* 5b */
1025                     (brw_inst_bits(src, 71, 67) << 6) |    /* 5b */
1026                     (brw_inst_bits(src, 87, 87) << 11);    /* 1b */
1027    } else if (devinfo->ver >= 12) {
1028       uncompacted = (brw_inst_bits(src, 55, 51) << 0) |    /* 5b */
1029                     (brw_inst_bits(src, 71, 67) << 5);     /* 5b */
1030 
1031       if (!is_immediate)
1032          uncompacted |= brw_inst_bits(src, 103, 99) << 10; /* 5b */
1033    } else {
1034       uncompacted = (brw_inst_bits(src, 52, 48) << 0) |    /* 5b */
1035                     (brw_inst_bits(src, 68, 64) << 5);     /* 5b */
1036 
1037       if (!is_immediate)
1038          uncompacted |= brw_inst_bits(src, 100, 96) << 10; /* 5b */
1039    }
1040 
1041    for (int i = 0; i < table_len; i++) {
1042       if (c->subreg_table[i] == uncompacted) {
1043          brw_compact_inst_set_subreg_index(devinfo, dst, i);
1044 	 return true;
1045       }
1046    }
1047 
1048    return false;
1049 }
1050 
1051 static bool
set_src0_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src)1052 set_src0_index(const struct compaction_state *c, brw_compact_inst *dst,
1053                const brw_inst *src)
1054 {
1055    const struct intel_device_info *devinfo = c->isa->devinfo;
1056    uint16_t uncompacted; /* 12b/G45+; 11b/Xe2+ */
1057    int table_len;
1058 
1059    if (devinfo->ver >= 12) {
1060       table_len = (devinfo->ver >= 20 ? ARRAY_SIZE(xe2_src0_index_table) :
1061                    ARRAY_SIZE(gfx12_src0_index_table));
1062       uncompacted = (devinfo->ver >= 20 ? 0 :
1063                      brw_inst_bits(src, 87, 87) << 11) | /*  1b */
1064                     (brw_inst_bits(src, 86, 84) << 8) | /*  3b */
1065                     (brw_inst_bits(src, 83, 81) << 5) | /*  3b */
1066                     (brw_inst_bits(src, 80, 80) << 4) | /*  1b */
1067                     (brw_inst_bits(src, 65, 64) << 2) | /*  2b */
1068                     (brw_inst_bits(src, 45, 44));       /*  2b */
1069    } else {
1070       table_len = ARRAY_SIZE(gfx8_src_index_table);
1071       uncompacted = brw_inst_bits(src, 88, 77);         /* 12b */
1072    }
1073 
1074    for (int i = 0; i < table_len; i++) {
1075       if (c->src0_index_table[i] == uncompacted) {
1076          brw_compact_inst_set_src0_index(devinfo, dst, i);
1077 	 return true;
1078       }
1079    }
1080 
1081    return false;
1082 }
1083 
1084 static bool
set_src1_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src,bool is_immediate,unsigned imm)1085 set_src1_index(const struct compaction_state *c, brw_compact_inst *dst,
1086                const brw_inst *src, bool is_immediate, unsigned imm)
1087 {
1088    const struct intel_device_info *devinfo = c->isa->devinfo;
1089    if (is_immediate) {
1090       if (devinfo->ver >= 12) {
1091          /* src1 index takes the low 4 bits of the 12-bit compacted value */
1092          brw_compact_inst_set_src1_index(devinfo, dst, imm & 0xf);
1093       } else {
1094          /* src1 index takes the high 5 bits of the 13-bit compacted value */
1095          brw_compact_inst_set_src1_index(devinfo, dst, imm >> 8);
1096       }
1097       return true;
1098    } else {
1099       uint16_t uncompacted; /* 12b/G45+ 16b/Xe2+ */
1100       int table_len;
1101 
1102       if (devinfo->ver >= 20) {
1103          table_len = ARRAY_SIZE(xe2_src1_index_table);
1104          uncompacted = (brw_inst_bits(src, 121, 120) << 14) | /*  2b */
1105                        (brw_inst_bits(src, 118, 116) << 11) | /*  3b */
1106                        (brw_inst_bits(src, 115, 113) <<  8) | /*  3b */
1107                        (brw_inst_bits(src, 112, 112) <<  7) | /*  1b */
1108                        (brw_inst_bits(src, 103,  99) <<  2) | /*  5b */
1109                        (brw_inst_bits(src,  97,  96));        /*  2b */
1110       } else if (devinfo->ver >= 12) {
1111          table_len = ARRAY_SIZE(gfx12_src0_index_table);
1112          uncompacted = (brw_inst_bits(src, 121, 120) << 10) | /*  2b */
1113                        (brw_inst_bits(src, 119, 116) <<  6) | /*  4b */
1114                        (brw_inst_bits(src, 115, 113) <<  3) | /*  3b */
1115                        (brw_inst_bits(src, 112, 112) <<  2) | /*  1b */
1116                        (brw_inst_bits(src,  97,  96));        /*  2b */
1117       } else {
1118          table_len = ARRAY_SIZE(gfx8_src_index_table);
1119          uncompacted = brw_inst_bits(src, 120, 109);          /* 12b */
1120       }
1121 
1122       for (int i = 0; i < table_len; i++) {
1123          if (c->src1_index_table[i] == uncompacted) {
1124             brw_compact_inst_set_src1_index(devinfo, dst, i);
1125             return true;
1126          }
1127       }
1128    }
1129 
1130    return false;
1131 }
1132 
1133 static bool
set_3src_control_index(const struct intel_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src,bool is_dpas)1134 set_3src_control_index(const struct intel_device_info *devinfo,
1135                        brw_compact_inst *dst, const brw_inst *src,
1136                        bool is_dpas)
1137 {
1138    if (devinfo->ver >= 20) {
1139       assert(is_dpas || !brw_inst_bits(src, 49, 49));
1140 
1141       const uint64_t uncompacted =        /* 34b/Xe2+ */
1142          (brw_inst_bits(src, 95, 92) << 30) | /*  4b */
1143          (brw_inst_bits(src, 90, 88) << 27) | /*  3b */
1144          (brw_inst_bits(src, 82, 80) << 24) | /*  3b */
1145          (brw_inst_bits(src, 50, 50) << 23) | /*  1b */
1146          (brw_inst_bits(src, 49, 48) << 21) | /*  2b */
1147          (brw_inst_bits(src, 42, 40) << 18) | /*  3b */
1148          (brw_inst_bits(src, 39, 39) << 17) | /*  1b */
1149          (brw_inst_bits(src, 38, 36) << 14) | /*  3b */
1150          (brw_inst_bits(src, 34, 34) << 13) | /*  1b */
1151          (brw_inst_bits(src, 32, 32) << 12) | /*  1b */
1152          (brw_inst_bits(src, 31, 31) << 11) | /*  1b */
1153          (brw_inst_bits(src, 28, 28) << 10) | /*  1b */
1154          (brw_inst_bits(src, 27, 26) <<  8) | /*  2b */
1155          (brw_inst_bits(src, 25, 24) <<  6) | /*  2b */
1156          (brw_inst_bits(src, 23, 21) <<  3) | /*  3b */
1157          (brw_inst_bits(src, 20, 18));        /*  3b */
1158 
1159       /* The bits used to index the tables for 3src and 3src-dpas
1160        * are the same, so just need to pick the right one.
1161        */
1162       const uint64_t *table = is_dpas ? xe2_3src_dpas_control_index_table :
1163                                         xe2_3src_control_index_table;
1164       const unsigned size = is_dpas ? ARRAY_SIZE(xe2_3src_dpas_control_index_table) :
1165                                       ARRAY_SIZE(xe2_3src_control_index_table);
1166       for (unsigned i = 0; i < size; i++) {
1167          if (table[i] == uncompacted) {
1168             brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1169             return true;
1170          }
1171       }
1172    } else if (devinfo->verx10 >= 125) {
1173       uint64_t uncompacted =             /* 37b/XeHP+ */
1174          (brw_inst_bits(src, 95, 92) << 33) | /*  4b */
1175          (brw_inst_bits(src, 90, 88) << 30) | /*  3b */
1176          (brw_inst_bits(src, 82, 80) << 27) | /*  3b */
1177          (brw_inst_bits(src, 50, 50) << 26) | /*  1b */
1178          (brw_inst_bits(src, 49, 48) << 24) | /*  2b */
1179          (brw_inst_bits(src, 42, 40) << 21) | /*  3b */
1180          (brw_inst_bits(src, 39, 39) << 20) | /*  1b */
1181          (brw_inst_bits(src, 38, 36) << 17) | /*  3b */
1182          (brw_inst_bits(src, 34, 34) << 16) | /*  1b */
1183          (brw_inst_bits(src, 33, 33) << 15) | /*  1b */
1184          (brw_inst_bits(src, 32, 32) << 14) | /*  1b */
1185          (brw_inst_bits(src, 31, 31) << 13) | /*  1b */
1186          (brw_inst_bits(src, 28, 28) << 12) | /*  1b */
1187          (brw_inst_bits(src, 27, 24) <<  8) | /*  4b */
1188          (brw_inst_bits(src, 23, 23) <<  7) | /*  1b */
1189          (brw_inst_bits(src, 22, 22) <<  6) | /*  1b */
1190          (brw_inst_bits(src, 21, 19) <<  3) | /*  3b */
1191          (brw_inst_bits(src, 18, 16));        /*  3b */
1192 
1193       for (unsigned i = 0; i < ARRAY_SIZE(xehp_3src_control_index_table); i++) {
1194          if (xehp_3src_control_index_table[i] == uncompacted) {
1195             brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1196             return true;
1197          }
1198       }
1199    } else if (devinfo->ver >= 12) {
1200       uint64_t uncompacted =             /* 36b/TGL+ */
1201          (brw_inst_bits(src, 95, 92) << 32) | /*  4b */
1202          (brw_inst_bits(src, 90, 88) << 29) | /*  3b */
1203          (brw_inst_bits(src, 82, 80) << 26) | /*  3b */
1204          (brw_inst_bits(src, 50, 50) << 25) | /*  1b */
1205          (brw_inst_bits(src, 48, 48) << 24) | /*  1b */
1206          (brw_inst_bits(src, 42, 40) << 21) | /*  3b */
1207          (brw_inst_bits(src, 39, 39) << 20) | /*  1b */
1208          (brw_inst_bits(src, 38, 36) << 17) | /*  3b */
1209          (brw_inst_bits(src, 34, 34) << 16) | /*  1b */
1210          (brw_inst_bits(src, 33, 33) << 15) | /*  1b */
1211          (brw_inst_bits(src, 32, 32) << 14) | /*  1b */
1212          (brw_inst_bits(src, 31, 31) << 13) | /*  1b */
1213          (brw_inst_bits(src, 28, 28) << 12) | /*  1b */
1214          (brw_inst_bits(src, 27, 24) <<  8) | /*  4b */
1215          (brw_inst_bits(src, 23, 23) <<  7) | /*  1b */
1216          (brw_inst_bits(src, 22, 22) <<  6) | /*  1b */
1217          (brw_inst_bits(src, 21, 19) <<  3) | /*  3b */
1218          (brw_inst_bits(src, 18, 16));        /*  3b */
1219 
1220       for (unsigned i = 0; i < ARRAY_SIZE(gfx12_3src_control_index_table); i++) {
1221          if (gfx12_3src_control_index_table[i] == uncompacted) {
1222             brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1223             return true;
1224          }
1225       }
1226    } else {
1227       uint32_t uncompacted = /* 26b/SKL+ */
1228          (brw_inst_bits(src, 36, 35) << 24) |  /*  2b */
1229          (brw_inst_bits(src, 34, 32) << 21) |  /*  3b */
1230          (brw_inst_bits(src, 28,  8));         /* 21b */
1231 
1232       for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_control_index_table); i++) {
1233          if (gfx8_3src_control_index_table[i] == uncompacted) {
1234             brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1235             return true;
1236          }
1237       }
1238    }
1239 
1240    return false;
1241 }
1242 
1243 static bool
set_3src_source_index(const struct intel_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src,bool is_dpas)1244 set_3src_source_index(const struct intel_device_info *devinfo,
1245                       brw_compact_inst *dst, const brw_inst *src,
1246                       bool is_dpas)
1247 {
1248    if (devinfo->ver >= 12) {
1249       uint32_t uncompacted =               /* 21b/TGL+ */
1250          (brw_inst_bits(src, 114, 114) << 20) | /*  1b */
1251          (brw_inst_bits(src, 113, 112) << 18) | /*  2b */
1252          (brw_inst_bits(src,  98,  98) << 17) | /*  1b */
1253          (brw_inst_bits(src,  97,  96) << 15) | /*  2b */
1254          (brw_inst_bits(src,  91,  91) << 14) | /*  1b */
1255          (brw_inst_bits(src,  87,  86) << 12) | /*  2b */
1256          (brw_inst_bits(src,  85,  84) << 10) | /*  2b */
1257          (brw_inst_bits(src,  83,  83) <<  9) | /*  1b */
1258          (brw_inst_bits(src,  66,  66) <<  8) | /*  1b */
1259          (brw_inst_bits(src,  65,  64) <<  6) | /*  2b */
1260          (brw_inst_bits(src,  47,  47) <<  5) | /*  1b */
1261          (brw_inst_bits(src,  46,  46) <<  4) | /*  1b */
1262          (brw_inst_bits(src,  45,  44) <<  2) | /*  2b */
1263          (brw_inst_bits(src,  43,  43) <<  1) | /*  1b */
1264          (brw_inst_bits(src,  35,  35));        /*  1b */
1265 
1266       /* In Xe2, the bits used to index the tables for 3src and 3src-dpas
1267        * are the same, so just need to pick the right one.
1268        */
1269       const uint32_t *three_src_source_index_table =
1270          devinfo->ver >= 20 ? (is_dpas ? xe2_3src_dpas_source_index_table :
1271                                          xe2_3src_source_index_table) :
1272          devinfo->verx10 >= 125 ? xehp_3src_source_index_table :
1273          gfx12_3src_source_index_table;
1274       const uint32_t three_src_source_index_table_len =
1275          devinfo->ver >= 20 ? (is_dpas ? ARRAY_SIZE(xe2_3src_dpas_source_index_table) :
1276                                          ARRAY_SIZE(xe2_3src_source_index_table)) :
1277          devinfo->verx10 >= 125 ? ARRAY_SIZE(xehp_3src_source_index_table) :
1278          ARRAY_SIZE(gfx12_3src_source_index_table);
1279 
1280       for (unsigned i = 0; i < three_src_source_index_table_len; i++) {
1281          if (three_src_source_index_table[i] == uncompacted) {
1282             brw_compact_inst_set_3src_source_index(devinfo, dst, i);
1283             return true;
1284          }
1285       }
1286    } else {
1287       uint64_t uncompacted =    /* 49b/SKL+ */
1288          (brw_inst_bits(src, 126, 125) << 47) |   /*  2b */
1289          (brw_inst_bits(src, 105, 104) << 45) |   /*  2b */
1290          (brw_inst_bits(src,  84,  84) << 44) |   /*  1b */
1291          (brw_inst_bits(src,  83,  83) << 43) |   /*  1b */
1292          (brw_inst_bits(src, 114, 107) << 35) |   /*  8b */
1293          (brw_inst_bits(src,  93,  86) << 27) |   /*  8b */
1294          (brw_inst_bits(src,  72,  65) << 19) |   /*  8b */
1295          (brw_inst_bits(src,  55,  37));          /* 19b */
1296 
1297       for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_source_index_table); i++) {
1298          if (gfx8_3src_source_index_table[i] == uncompacted) {
1299             brw_compact_inst_set_3src_source_index(devinfo, dst, i);
1300             return true;
1301          }
1302       }
1303    }
1304 
1305    return false;
1306 }
1307 
1308 static bool
set_3src_subreg_index(const struct intel_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src)1309 set_3src_subreg_index(const struct intel_device_info *devinfo,
1310                       brw_compact_inst *dst, const brw_inst *src)
1311 {
1312    assert(devinfo->ver >= 12);
1313 
1314    uint32_t uncompacted =               /* 20b/TGL+ */
1315       (brw_inst_bits(src, 119, 115) << 15) | /*  5b */
1316       (brw_inst_bits(src, 103,  99) << 10) | /*  5b */
1317       (brw_inst_bits(src,  71,  67) <<  5) | /*  5b */
1318       (brw_inst_bits(src,  55,  51));        /*  5b */
1319 
1320    const uint32_t *table = devinfo->ver >= 20 ? xe2_3src_subreg_table :
1321                            gfx12_3src_subreg_table;
1322    const uint32_t len =
1323       devinfo->ver >= 20 ? ARRAY_SIZE(xe2_3src_subreg_table) :
1324       ARRAY_SIZE(gfx12_3src_subreg_table);
1325 
1326    for (unsigned i = 0; i < len; i++) {
1327       if (table[i] == uncompacted) {
1328          brw_compact_inst_set_3src_subreg_index(devinfo, dst, i);
1329 	 return true;
1330       }
1331    }
1332 
1333    return false;
1334 }
1335 
1336 static bool
has_unmapped_bits(const struct brw_isa_info * isa,const brw_inst * src)1337 has_unmapped_bits(const struct brw_isa_info *isa, const brw_inst *src)
1338 {
1339    const struct intel_device_info *devinfo = isa->devinfo;
1340 
1341    /* EOT can only be mapped on a send if the src1 is an immediate */
1342    if ((brw_inst_opcode(isa, src) == BRW_OPCODE_SENDC ||
1343         brw_inst_opcode(isa, src) == BRW_OPCODE_SEND) &&
1344        brw_inst_eot(devinfo, src))
1345       return true;
1346 
1347    /* Check for instruction bits that don't map to any of the fields of the
1348     * compacted instruction.  The instruction cannot be compacted if any of
1349     * them are set.  They overlap with:
1350     *  - NibCtrl (bit 11 on Gfx8)
1351     *  - Dst.AddrImm[9] (bit 47 on Gfx8)
1352     *  - Src0.AddrImm[9] (bit 95 on Gfx8)
1353     *  - Imm64[27:31] (bit 95 on Gfx8)
1354     *  - UIP[31] (bit 95 on Gfx8)
1355     */
1356    if (devinfo->ver >= 12) {
1357       assert(!brw_inst_bits(src, 7,  7));
1358       return false;
1359    } else {
1360       assert(!brw_inst_bits(src, 7,  7));
1361       return brw_inst_bits(src, 95, 95) ||
1362              brw_inst_bits(src, 47, 47) ||
1363              brw_inst_bits(src, 11, 11);
1364    }
1365 }
1366 
1367 static bool
has_3src_unmapped_bits(const struct intel_device_info * devinfo,const brw_inst * src,bool is_dpas)1368 has_3src_unmapped_bits(const struct intel_device_info *devinfo,
1369                        const brw_inst *src, bool is_dpas)
1370 {
1371    /* Check for three-source instruction bits that don't map to any of the
1372     * fields of the compacted instruction.  All of them seem to be reserved
1373     * bits currently.
1374     */
1375    if (devinfo->ver >= 20) {
1376       assert(is_dpas || !brw_inst_bits(src, 49, 49));
1377       assert(!brw_inst_bits(src, 33, 33));
1378       assert(!brw_inst_bits(src, 7, 7));
1379    } else if (devinfo->ver >= 12) {
1380       assert(is_dpas || !brw_inst_bits(src, 49, 49));
1381       assert(!brw_inst_bits(src, 7, 7));
1382    } else {
1383       assert(!brw_inst_bits(src, 127, 127) &&
1384              !brw_inst_bits(src, 7,  7));
1385    }
1386 
1387    return false;
1388 }
1389 
1390 static bool
brw_try_compact_3src_instruction(const struct brw_isa_info * isa,brw_compact_inst * dst,const brw_inst * src)1391 brw_try_compact_3src_instruction(const struct brw_isa_info *isa,
1392                                  brw_compact_inst *dst, const brw_inst *src)
1393 {
1394    const struct intel_device_info *devinfo = isa->devinfo;
1395 
1396    bool is_dpas = brw_inst_opcode(isa, src) == BRW_OPCODE_DPAS;
1397    if (has_3src_unmapped_bits(devinfo, src, is_dpas))
1398       return false;
1399 
1400 #define compact(field) \
1401    brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_##field(devinfo, src))
1402 #define compact_a16(field) \
1403    brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_a16_##field(devinfo, src))
1404 
1405    compact(hw_opcode);
1406 
1407    if (!set_3src_control_index(devinfo, dst, src, is_dpas))
1408       return false;
1409 
1410    if (!set_3src_source_index(devinfo, dst, src, is_dpas))
1411       return false;
1412 
1413    if (devinfo->ver >= 12) {
1414       if (!set_3src_subreg_index(devinfo, dst, src))
1415          return false;
1416 
1417       compact(swsb);
1418       compact(debug_control);
1419       compact(dst_reg_nr);
1420       compact(src0_reg_nr);
1421       compact(src1_reg_nr);
1422       compact(src2_reg_nr);
1423    } else {
1424       compact(dst_reg_nr);
1425       compact_a16(src0_rep_ctrl);
1426       compact(debug_control);
1427       compact(saturate);
1428       compact_a16(src1_rep_ctrl);
1429       compact_a16(src2_rep_ctrl);
1430       compact(src0_reg_nr);
1431       compact(src1_reg_nr);
1432       compact(src2_reg_nr);
1433       compact_a16(src0_subreg_nr);
1434       compact_a16(src1_subreg_nr);
1435       compact_a16(src2_subreg_nr);
1436    }
1437    brw_compact_inst_set_3src_cmpt_control(devinfo, dst, true);
1438 
1439 #undef compact
1440 #undef compact_a16
1441 
1442    return true;
1443 }
1444 
1445 /* On SNB through ICL, compacted instructions have 12-bits for immediate
1446  * sources, and a 13th bit that's replicated through the high 20 bits.
1447  *
1448  * Effectively this means we get 12-bit integers, 0.0f, and some limited uses
1449  * of packed vectors as compactable immediates.
1450  *
1451  * On TGL+, the high 12-bits of floating-point values (:f and :hf) are encoded
1452  * rather than the low 12-bits. For signed integer the 12th bit is replicated,
1453  * while for unsigned integers it is not.
1454  *
1455  * Returns the compacted immediate, or -1 if immediate cannot be compacted
1456  */
1457 static int
compact_immediate(const struct intel_device_info * devinfo,enum brw_reg_type type,unsigned imm)1458 compact_immediate(const struct intel_device_info *devinfo,
1459                   enum brw_reg_type type, unsigned imm)
1460 {
1461    if (devinfo->ver >= 12) {
1462       /* 16-bit immediates need to be replicated through the 32-bit immediate
1463        * field
1464        */
1465       switch (type) {
1466       case BRW_TYPE_W:
1467       case BRW_TYPE_UW:
1468       case BRW_TYPE_HF:
1469          if ((imm >> 16) != (imm & 0xffff))
1470             return -1;
1471          break;
1472       default:
1473          break;
1474       }
1475 
1476       switch (type) {
1477       case BRW_TYPE_F:
1478          /* We get the high 12-bits as-is; rest must be zero */
1479          if ((imm & 0xfffff) == 0)
1480             return (imm >> 20) & 0xfff;
1481          break;
1482       case BRW_TYPE_HF:
1483          /* We get the high 12-bits as-is; rest must be zero */
1484          if ((imm & 0xf) == 0)
1485             return (imm >> 4) & 0xfff;
1486          break;
1487       case BRW_TYPE_UD:
1488       case BRW_TYPE_VF:
1489       case BRW_TYPE_UV:
1490       case BRW_TYPE_V:
1491          /* We get the low 12-bits as-is; rest must be zero */
1492          if ((imm & 0xfffff000) == 0)
1493             return imm & 0xfff;
1494          break;
1495       case BRW_TYPE_UW:
1496          /* We get the low 12-bits as-is; rest must be zero */
1497          if ((imm & 0xf000) == 0)
1498             return imm & 0xfff;
1499          break;
1500       case BRW_TYPE_D:
1501          /* We get the low 11-bits as-is; 12th is replicated */
1502          if (((int)imm >> 11) == 0 || ((int)imm >> 11) == -1)
1503             return imm & 0xfff;
1504          break;
1505       case BRW_TYPE_W:
1506          /* We get the low 11-bits as-is; 12th is replicated */
1507          if (((short)imm >> 11) == 0 || ((short)imm >> 11) == -1)
1508             return imm & 0xfff;
1509          break;
1510       case BRW_TYPE_DF:
1511       case BRW_TYPE_Q:
1512       case BRW_TYPE_UQ:
1513       case BRW_TYPE_B:
1514       case BRW_TYPE_UB:
1515       default:
1516          return -1;
1517       }
1518    } else {
1519       /* We get the low 12 bits as-is; 13th is replicated */
1520       if (((int)imm >> 12) == 0 || ((int)imm >> 12 == -1)) {
1521          return imm & 0x1fff;
1522       }
1523    }
1524 
1525    return -1;
1526 }
1527 
1528 static int
uncompact_immediate(const struct intel_device_info * devinfo,enum brw_reg_type type,unsigned compact_imm)1529 uncompact_immediate(const struct intel_device_info *devinfo,
1530                     enum brw_reg_type type, unsigned compact_imm)
1531 {
1532    if (devinfo->ver >= 12) {
1533       switch (type) {
1534       case BRW_TYPE_F:
1535          return compact_imm << 20;
1536       case BRW_TYPE_HF:
1537          return (compact_imm << 20) | (compact_imm << 4);
1538       case BRW_TYPE_UD:
1539       case BRW_TYPE_VF:
1540       case BRW_TYPE_UV:
1541       case BRW_TYPE_V:
1542          return compact_imm;
1543       case BRW_TYPE_UW:
1544          /* Replicate */
1545          return compact_imm << 16 | compact_imm;
1546       case BRW_TYPE_D:
1547          /* Extend the 12th bit into the high 20 bits */
1548          return (int)(compact_imm << 20) >> 20;
1549       case BRW_TYPE_W:
1550          /* Extend the 12th bit into the high 4 bits and replicate */
1551          return ((int)(compact_imm << 20) >> 4) |
1552                 ((unsigned short)((short)(compact_imm << 4) >> 4));
1553       case BRW_TYPE_DF:
1554       case BRW_TYPE_Q:
1555       case BRW_TYPE_UQ:
1556       case BRW_TYPE_B:
1557       case BRW_TYPE_UB:
1558          unreachable("not reached");
1559       default:
1560          unreachable("invalid type");
1561       }
1562    } else {
1563       /* Replicate the 13th bit into the high 19 bits */
1564       return (int)(compact_imm << 19) >> 19;
1565    }
1566 
1567    unreachable("not reached");
1568 }
1569 
1570 static bool
has_immediate(const struct intel_device_info * devinfo,const brw_inst * inst,enum brw_reg_type * type)1571 has_immediate(const struct intel_device_info *devinfo, const brw_inst *inst,
1572               enum brw_reg_type *type)
1573 {
1574    if (brw_inst_src0_reg_file(devinfo, inst) == IMM) {
1575       *type = brw_inst_src0_type(devinfo, inst);
1576       return *type != BRW_TYPE_INVALID;
1577    } else if (brw_inst_src1_reg_file(devinfo, inst) == IMM) {
1578       *type = brw_inst_src1_type(devinfo, inst);
1579       return *type != BRW_TYPE_INVALID;
1580    }
1581 
1582    return false;
1583 }
1584 
1585 /**
1586  * Applies some small changes to instruction types to increase chances of
1587  * compaction.
1588  */
1589 static brw_inst
precompact(const struct brw_isa_info * isa,brw_inst inst)1590 precompact(const struct brw_isa_info *isa, brw_inst inst)
1591 {
1592    const struct intel_device_info *devinfo = isa->devinfo;
1593 
1594    /* In XeHP the compaction tables removed the entries for source regions
1595     * <8;8,1> giving preference to <1;1,0> as the way to indicate
1596     * sequential elements, so convert to those before compacting.
1597     */
1598    if (devinfo->verx10 >= 125) {
1599       if (brw_inst_src0_reg_file(devinfo, &inst) == FIXED_GRF &&
1600           brw_inst_src0_vstride(devinfo, &inst) > BRW_VERTICAL_STRIDE_1 &&
1601           brw_inst_src0_vstride(devinfo, &inst) == (brw_inst_src0_width(devinfo, &inst) + 1) &&
1602           brw_inst_src0_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) {
1603          brw_inst_set_src0_vstride(devinfo, &inst, BRW_VERTICAL_STRIDE_1);
1604          brw_inst_set_src0_width(devinfo, &inst, BRW_WIDTH_1);
1605          brw_inst_set_src0_hstride(devinfo, &inst, BRW_HORIZONTAL_STRIDE_0);
1606       }
1607 
1608       if (brw_inst_src1_reg_file(devinfo, &inst) == FIXED_GRF &&
1609           brw_inst_src1_vstride(devinfo, &inst) > BRW_VERTICAL_STRIDE_1 &&
1610           brw_inst_src1_vstride(devinfo, &inst) == (brw_inst_src1_width(devinfo, &inst) + 1) &&
1611           brw_inst_src1_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) {
1612          brw_inst_set_src1_vstride(devinfo, &inst, BRW_VERTICAL_STRIDE_1);
1613          brw_inst_set_src1_width(devinfo, &inst, BRW_WIDTH_1);
1614          brw_inst_set_src1_hstride(devinfo, &inst, BRW_HORIZONTAL_STRIDE_0);
1615       }
1616    }
1617 
1618    if (brw_inst_src0_reg_file(devinfo, &inst) != IMM)
1619       return inst;
1620 
1621    /* The Bspec's section titled "Non-present Operands" claims that if src0
1622     * is an immediate that src1's type must be the same as that of src0.
1623     *
1624     * The SNB+ DataTypeIndex instruction compaction tables contain mappings
1625     * that do not follow this rule. E.g., from the IVB/HSW table:
1626     *
1627     *  DataTypeIndex   18-Bit Mapping       Mapped Meaning
1628     *        3         001000001011111101   r:f | i:vf | a:ud | <1> | dir |
1629     *
1630     * And from the SNB table:
1631     *
1632     *  DataTypeIndex   18-Bit Mapping       Mapped Meaning
1633     *        8         001000000111101100   a:w | i:w | a:ud | <1> | dir |
1634     *
1635     * Neither of these cause warnings from the simulator when used,
1636     * compacted or otherwise. In fact, all compaction mappings that have an
1637     * immediate in src0 use a:ud for src1.
1638     *
1639     * Don't do any of this for 64-bit immediates, since the src1 fields
1640     * overlap with the immediate and setting them would overwrite the
1641     * immediate we set.
1642     */
1643    if (!(brw_inst_src0_type(devinfo, &inst) == BRW_TYPE_DF ||
1644          brw_inst_src0_type(devinfo, &inst) == BRW_TYPE_UQ ||
1645          brw_inst_src0_type(devinfo, &inst) == BRW_TYPE_Q)) {
1646       brw_inst_set_src1_reg_hw_type(devinfo, &inst, 0);
1647    }
1648 
1649    /* Compacted instructions only have 12-bits (plus 1 for the other 20)
1650     * for immediate values. Presumably the hardware engineers realized
1651     * that the only useful floating-point value that could be represented
1652     * in this format is 0.0, which can also be represented as a VF-typed
1653     * immediate, so they gave us the previously mentioned mapping on IVB+.
1654     *
1655     * Strangely, we do have a mapping for imm:f in src1, so we don't need
1656     * to do this there.
1657     *
1658     * If we see a 0.0:F, change the type to VF so that it can be compacted.
1659     *
1660     * Compaction of floating-point immediates is improved on Gfx12, thus
1661     * removing the need for this.
1662     */
1663    if (devinfo->ver < 12 &&
1664        brw_inst_imm_ud(devinfo, &inst) == 0x0 &&
1665        brw_inst_src0_type(devinfo, &inst) == BRW_TYPE_F &&
1666        brw_inst_dst_type(devinfo, &inst) == BRW_TYPE_F &&
1667        brw_inst_dst_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) {
1668       enum brw_reg_file file = brw_inst_src0_reg_file(devinfo, &inst);
1669       brw_inst_set_src0_file_type(devinfo, &inst, file, BRW_TYPE_VF);
1670    }
1671 
1672    /* There are no mappings for dst:d | i:d, so if the immediate is suitable
1673     * set the types to :UD so the instruction can be compacted.
1674     *
1675     * FINISHME: Use dst:f | imm:f on Gfx12
1676     */
1677    if (devinfo->ver < 12 &&
1678        compact_immediate(devinfo, BRW_TYPE_D,
1679                          brw_inst_imm_ud(devinfo, &inst)) != -1 &&
1680        brw_inst_cond_modifier(devinfo, &inst) == BRW_CONDITIONAL_NONE &&
1681        brw_inst_src0_type(devinfo, &inst) == BRW_TYPE_D &&
1682        brw_inst_dst_type(devinfo, &inst) == BRW_TYPE_D) {
1683       enum brw_reg_file src_file = brw_inst_src0_reg_file(devinfo, &inst);
1684       enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, &inst);
1685 
1686       brw_inst_set_src0_file_type(devinfo, &inst, src_file, BRW_TYPE_UD);
1687       brw_inst_set_dst_file_type(devinfo, &inst, dst_file, BRW_TYPE_UD);
1688    }
1689 
1690    return inst;
1691 }
1692 
1693 /**
1694  * Tries to compact instruction src into dst.
1695  *
1696  * It doesn't modify dst unless src is compactable, which is relied on by
1697  * brw_compact_instructions().
1698  */
1699 static bool
try_compact_instruction(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src)1700 try_compact_instruction(const struct compaction_state *c,
1701                         brw_compact_inst *dst, const brw_inst *src)
1702 {
1703    const struct intel_device_info *devinfo = c->isa->devinfo;
1704    brw_compact_inst temp;
1705 
1706    assert(brw_inst_cmpt_control(devinfo, src) == 0);
1707 
1708    if (is_3src(c->isa, brw_inst_opcode(c->isa, src))) {
1709       memset(&temp, 0, sizeof(temp));
1710       if (brw_try_compact_3src_instruction(c->isa, &temp, src)) {
1711          *dst = temp;
1712          return true;
1713       } else {
1714          return false;
1715       }
1716    }
1717 
1718    enum brw_reg_type type;
1719    bool is_immediate = has_immediate(devinfo, src, &type);
1720 
1721    unsigned compacted_imm = 0;
1722 
1723    if (is_immediate) {
1724       compacted_imm = compact_immediate(devinfo, type,
1725                                         brw_inst_imm_ud(devinfo, src));
1726       if (compacted_imm == -1)
1727          return false;
1728    }
1729 
1730    if (has_unmapped_bits(c->isa, src))
1731       return false;
1732 
1733    memset(&temp, 0, sizeof(temp));
1734 
1735 #define compact(field) \
1736    brw_compact_inst_set_##field(devinfo, &temp, brw_inst_##field(devinfo, src))
1737 #define compact_reg(field) \
1738    brw_compact_inst_set_##field##_reg_nr(devinfo, &temp, \
1739                                        brw_inst_##field##_da_reg_nr(devinfo, src))
1740 
1741    compact(hw_opcode);
1742    compact(debug_control);
1743 
1744    if (!set_control_index(c, &temp, src))
1745       return false;
1746    if (!set_datatype_index(c, &temp, src, is_immediate))
1747       return false;
1748    if (!set_subreg_index(c, &temp, src, is_immediate))
1749       return false;
1750    if (!set_src0_index(c, &temp, src))
1751       return false;
1752    if (!set_src1_index(c, &temp, src, is_immediate, compacted_imm))
1753       return false;
1754 
1755    if (devinfo->ver >= 12) {
1756       compact(swsb);
1757       compact_reg(dst);
1758       compact_reg(src0);
1759 
1760       if (is_immediate) {
1761          /* src1 reg takes the high 8 bits (of the 12-bit compacted value) */
1762          brw_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm >> 4);
1763       } else {
1764          compact_reg(src1);
1765       }
1766    } else {
1767       compact(acc_wr_control);
1768 
1769       compact(cond_modifier);
1770 
1771       compact_reg(dst);
1772       compact_reg(src0);
1773 
1774       if (is_immediate) {
1775          /* src1 reg takes the low 8 bits (of the 13-bit compacted value) */
1776          brw_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm & 0xff);
1777       } else {
1778          compact_reg(src1);
1779       }
1780    }
1781    brw_compact_inst_set_cmpt_control(devinfo, &temp, true);
1782 
1783 #undef compact
1784 #undef compact_reg
1785 
1786    *dst = temp;
1787 
1788    return true;
1789 }
1790 
1791 bool
brw_try_compact_instruction(const struct brw_isa_info * isa,brw_compact_inst * dst,const brw_inst * src)1792 brw_try_compact_instruction(const struct brw_isa_info *isa,
1793                             brw_compact_inst *dst, const brw_inst *src)
1794 {
1795    struct compaction_state c;
1796    compaction_state_init(&c, isa);
1797    return try_compact_instruction(&c, dst, src);
1798 }
1799 
1800 static void
set_uncompacted_control(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)1801 set_uncompacted_control(const struct compaction_state *c, brw_inst *dst,
1802                         brw_compact_inst *src)
1803 {
1804    const struct intel_device_info *devinfo = c->isa->devinfo;
1805    uint32_t uncompacted =
1806       c->control_index_table[brw_compact_inst_control_index(devinfo, src)];
1807 
1808    if (devinfo->ver >= 20) {
1809       brw_inst_set_bits(dst, 95, 92, (uncompacted >> 14) & 0xf);
1810       brw_inst_set_bits(dst, 34, 34, (uncompacted >> 13) & 0x1);
1811       brw_inst_set_bits(dst, 32, 32, (uncompacted >> 12) & 0x1);
1812       brw_inst_set_bits(dst, 31, 31, (uncompacted >> 11) & 0x1);
1813       brw_inst_set_bits(dst, 28, 28, (uncompacted >> 10) & 0x1);
1814       brw_inst_set_bits(dst, 27, 26, (uncompacted >>  8) & 0x3);
1815       brw_inst_set_bits(dst, 25, 24, (uncompacted >>  6) & 0x3);
1816       brw_inst_set_bits(dst, 23, 21, (uncompacted >>  3) & 0x7);
1817       brw_inst_set_bits(dst, 20, 18, (uncompacted >>  0) & 0x7);
1818    } else if (devinfo->ver >= 12) {
1819       brw_inst_set_bits(dst, 95, 92, (uncompacted >> 17));
1820       brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
1821       brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
1822       brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
1823       brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
1824       brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
1825       brw_inst_set_bits(dst, 27, 24, (uncompacted >>  8) & 0xf);
1826       brw_inst_set_bits(dst, 23, 22, (uncompacted >>  6) & 0x3);
1827       brw_inst_set_bits(dst, 21, 19, (uncompacted >>  3) & 0x7);
1828       brw_inst_set_bits(dst, 18, 16, (uncompacted >>  0) & 0x7);
1829    } else {
1830       brw_inst_set_bits(dst, 33, 31, (uncompacted >> 16));
1831       brw_inst_set_bits(dst, 23, 12, (uncompacted >>  4) & 0xfff);
1832       brw_inst_set_bits(dst, 10,  9, (uncompacted >>  2) & 0x3);
1833       brw_inst_set_bits(dst, 34, 34, (uncompacted >>  1) & 0x1);
1834       brw_inst_set_bits(dst,  8,  8, (uncompacted >>  0) & 0x1);
1835    }
1836 }
1837 
1838 static void
set_uncompacted_datatype(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)1839 set_uncompacted_datatype(const struct compaction_state *c, brw_inst *dst,
1840                          brw_compact_inst *src)
1841 {
1842    const struct intel_device_info *devinfo = c->isa->devinfo;
1843    uint32_t uncompacted =
1844       c->datatype_table[brw_compact_inst_datatype_index(devinfo, src)];
1845 
1846    if (devinfo->ver >= 12) {
1847       brw_inst_set_bits(dst, 98, 98, (uncompacted >> 19));
1848       brw_inst_set_bits(dst, 91, 88, (uncompacted >> 15) & 0xf);
1849       brw_inst_set_bits(dst, 66, 66, (uncompacted >> 14) & 0x1);
1850       brw_inst_set_bits(dst, 50, 50, (uncompacted >> 13) & 0x1);
1851       brw_inst_set_bits(dst, 49, 48, (uncompacted >> 11) & 0x3);
1852       brw_inst_set_bits(dst, 47, 47, (uncompacted >> 10) & 0x1);
1853       brw_inst_set_bits(dst, 46, 46, (uncompacted >>  9) & 0x1);
1854       brw_inst_set_bits(dst, 43, 40, (uncompacted >>  5) & 0xf);
1855       brw_inst_set_bits(dst, 39, 36, (uncompacted >>  1) & 0xf);
1856       brw_inst_set_bits(dst, 35, 35, (uncompacted >>  0) & 0x1);
1857    } else {
1858       brw_inst_set_bits(dst, 63, 61, (uncompacted >> 18));
1859       brw_inst_set_bits(dst, 94, 89, (uncompacted >> 12) & 0x3f);
1860       brw_inst_set_bits(dst, 46, 35, (uncompacted >>  0) & 0xfff);
1861    }
1862 }
1863 
1864 static void
set_uncompacted_subreg(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)1865 set_uncompacted_subreg(const struct compaction_state *c, brw_inst *dst,
1866                        brw_compact_inst *src)
1867 {
1868    const struct intel_device_info *devinfo = c->isa->devinfo;
1869    uint16_t uncompacted =
1870       c->subreg_table[brw_compact_inst_subreg_index(devinfo, src)];
1871 
1872    if (devinfo->ver >= 20) {
1873       brw_inst_set_bits(dst, 33, 33, (uncompacted >> 0) & 0x1);
1874       brw_inst_set_bits(dst, 55, 51, (uncompacted >> 1) & 0x1f);
1875       brw_inst_set_bits(dst, 71, 67, (uncompacted >> 6) & 0x1f);
1876       brw_inst_set_bits(dst, 87, 87, (uncompacted >> 11) & 0x1);
1877    } else if (devinfo->ver >= 12) {
1878       brw_inst_set_bits(dst, 103, 99, (uncompacted >> 10));
1879       brw_inst_set_bits(dst,  71, 67, (uncompacted >>  5) & 0x1f);
1880       brw_inst_set_bits(dst,  55, 51, (uncompacted >>  0) & 0x1f);
1881    } else {
1882       brw_inst_set_bits(dst, 100, 96, (uncompacted >> 10));
1883       brw_inst_set_bits(dst,  68, 64, (uncompacted >>  5) & 0x1f);
1884       brw_inst_set_bits(dst,  52, 48, (uncompacted >>  0) & 0x1f);
1885    }
1886 }
1887 
1888 static void
set_uncompacted_src0(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)1889 set_uncompacted_src0(const struct compaction_state *c, brw_inst *dst,
1890                      brw_compact_inst *src)
1891 {
1892    const struct intel_device_info *devinfo = c->isa->devinfo;
1893    uint32_t compacted = brw_compact_inst_src0_index(devinfo, src);
1894    uint16_t uncompacted = c->src0_index_table[compacted];
1895 
1896    if (devinfo->ver >= 12) {
1897       if (devinfo->ver < 20)
1898          brw_inst_set_bits(dst, 87, 87, (uncompacted >> 11) & 0x1);
1899       brw_inst_set_bits(dst, 86, 84, (uncompacted >> 8) & 0x7);
1900       brw_inst_set_bits(dst, 83, 81, (uncompacted >> 5) & 0x7);
1901       brw_inst_set_bits(dst, 80, 80, (uncompacted >> 4) & 0x1);
1902       brw_inst_set_bits(dst, 65, 64, (uncompacted >> 2) & 0x3);
1903       brw_inst_set_bits(dst, 45, 44, (uncompacted >> 0) & 0x3);
1904    } else {
1905       brw_inst_set_bits(dst, 88, 77, uncompacted);
1906    }
1907 }
1908 
1909 static void
set_uncompacted_src1(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)1910 set_uncompacted_src1(const struct compaction_state *c, brw_inst *dst,
1911                      brw_compact_inst *src)
1912 {
1913    const struct intel_device_info *devinfo = c->isa->devinfo;
1914    uint16_t uncompacted =
1915       c->src1_index_table[brw_compact_inst_src1_index(devinfo, src)];
1916 
1917    if (devinfo->ver >= 20) {
1918       brw_inst_set_bits(dst, 121, 120, (uncompacted >> 14) & 0x3);
1919       brw_inst_set_bits(dst, 118, 116, (uncompacted >> 11) & 0x7);
1920       brw_inst_set_bits(dst, 115, 113, (uncompacted >>  8) & 0x7);
1921       brw_inst_set_bits(dst, 112, 112, (uncompacted >>  7) & 0x1);
1922       brw_inst_set_bits(dst, 103,  99, (uncompacted >>  2) & 0x1f);
1923       brw_inst_set_bits(dst,  97,  96, (uncompacted >>  0) & 0x3);
1924    } else if (devinfo->ver >= 12) {
1925       brw_inst_set_bits(dst, 121, 120, (uncompacted >> 10));
1926       brw_inst_set_bits(dst, 119, 116, (uncompacted >>  6) & 0xf);
1927       brw_inst_set_bits(dst, 115, 113, (uncompacted >>  3) & 0x7);
1928       brw_inst_set_bits(dst, 112, 112, (uncompacted >>  2) & 0x1);
1929       brw_inst_set_bits(dst,  97,  96, (uncompacted >>  0) & 0x3);
1930    } else {
1931       brw_inst_set_bits(dst, 120, 109, uncompacted);
1932    }
1933 }
1934 
1935 static void
set_uncompacted_3src_control_index(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src,bool is_dpas)1936 set_uncompacted_3src_control_index(const struct compaction_state *c,
1937                                    brw_inst *dst, brw_compact_inst *src,
1938                                    bool is_dpas)
1939 {
1940    const struct intel_device_info *devinfo = c->isa->devinfo;
1941 
1942    if (devinfo->ver >= 20) {
1943       uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
1944       uint64_t uncompacted = is_dpas ? xe2_3src_dpas_control_index_table[compacted] :
1945                                        xe2_3src_control_index_table[compacted];
1946 
1947       brw_inst_set_bits(dst, 95, 92, (uncompacted >> 30) & 0xf);
1948       brw_inst_set_bits(dst, 90, 88, (uncompacted >> 27) & 0x7);
1949       brw_inst_set_bits(dst, 82, 80, (uncompacted >> 24) & 0x7);
1950       brw_inst_set_bits(dst, 50, 50, (uncompacted >> 23) & 0x1);
1951       brw_inst_set_bits(dst, 49, 48, (uncompacted >> 21) & 0x3);
1952       brw_inst_set_bits(dst, 42, 40, (uncompacted >> 18) & 0x7);
1953       brw_inst_set_bits(dst, 39, 39, (uncompacted >> 17) & 0x1);
1954       brw_inst_set_bits(dst, 38, 36, (uncompacted >> 14) & 0x7);
1955       brw_inst_set_bits(dst, 34, 34, (uncompacted >> 13) & 0x1);
1956       brw_inst_set_bits(dst, 32, 32, (uncompacted >> 12) & 0x1);
1957       brw_inst_set_bits(dst, 31, 31, (uncompacted >> 11) & 0x1);
1958       brw_inst_set_bits(dst, 28, 28, (uncompacted >> 10) & 0x1);
1959       brw_inst_set_bits(dst, 27, 26, (uncompacted >>  8) & 0x3);
1960       brw_inst_set_bits(dst, 25, 24, (uncompacted >>  6) & 0x3);
1961       brw_inst_set_bits(dst, 23, 21, (uncompacted >>  3) & 0x7);
1962       brw_inst_set_bits(dst, 20, 18, (uncompacted >>  0) & 0x7);
1963 
1964    } else if (devinfo->verx10 >= 125) {
1965       uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
1966       uint64_t uncompacted = xehp_3src_control_index_table[compacted];
1967 
1968       brw_inst_set_bits(dst, 95, 92, (uncompacted >> 33));
1969       brw_inst_set_bits(dst, 90, 88, (uncompacted >> 30) & 0x7);
1970       brw_inst_set_bits(dst, 82, 80, (uncompacted >> 27) & 0x7);
1971       brw_inst_set_bits(dst, 50, 50, (uncompacted >> 26) & 0x1);
1972       brw_inst_set_bits(dst, 49, 48, (uncompacted >> 24) & 0x3);
1973       brw_inst_set_bits(dst, 42, 40, (uncompacted >> 21) & 0x7);
1974       brw_inst_set_bits(dst, 39, 39, (uncompacted >> 20) & 0x1);
1975       brw_inst_set_bits(dst, 38, 36, (uncompacted >> 17) & 0x7);
1976       brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
1977       brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
1978       brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
1979       brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
1980       brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
1981       brw_inst_set_bits(dst, 27, 24, (uncompacted >>  8) & 0xf);
1982       brw_inst_set_bits(dst, 23, 23, (uncompacted >>  7) & 0x1);
1983       brw_inst_set_bits(dst, 22, 22, (uncompacted >>  6) & 0x1);
1984       brw_inst_set_bits(dst, 21, 19, (uncompacted >>  3) & 0x7);
1985       brw_inst_set_bits(dst, 18, 16, (uncompacted >>  0) & 0x7);
1986 
1987    } else if (devinfo->ver >= 12) {
1988       uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
1989       uint64_t uncompacted = gfx12_3src_control_index_table[compacted];
1990 
1991       brw_inst_set_bits(dst, 95, 92, (uncompacted >> 32));
1992       brw_inst_set_bits(dst, 90, 88, (uncompacted >> 29) & 0x7);
1993       brw_inst_set_bits(dst, 82, 80, (uncompacted >> 26) & 0x7);
1994       brw_inst_set_bits(dst, 50, 50, (uncompacted >> 25) & 0x1);
1995       brw_inst_set_bits(dst, 48, 48, (uncompacted >> 24) & 0x1);
1996       brw_inst_set_bits(dst, 42, 40, (uncompacted >> 21) & 0x7);
1997       brw_inst_set_bits(dst, 39, 39, (uncompacted >> 20) & 0x1);
1998       brw_inst_set_bits(dst, 38, 36, (uncompacted >> 17) & 0x7);
1999       brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
2000       brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
2001       brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
2002       brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
2003       brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
2004       brw_inst_set_bits(dst, 27, 24, (uncompacted >>  8) & 0xf);
2005       brw_inst_set_bits(dst, 23, 23, (uncompacted >>  7) & 0x1);
2006       brw_inst_set_bits(dst, 22, 22, (uncompacted >>  6) & 0x1);
2007       brw_inst_set_bits(dst, 21, 19, (uncompacted >>  3) & 0x7);
2008       brw_inst_set_bits(dst, 18, 16, (uncompacted >>  0) & 0x7);
2009    } else {
2010       uint32_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
2011       uint32_t uncompacted = gfx8_3src_control_index_table[compacted];
2012 
2013       brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
2014       brw_inst_set_bits(dst, 28,  8, (uncompacted >>  0) & 0x1fffff);
2015 
2016       brw_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3);
2017    }
2018 }
2019 
2020 static void
set_uncompacted_3src_source_index(const struct intel_device_info * devinfo,brw_inst * dst,brw_compact_inst * src,bool is_dpas)2021 set_uncompacted_3src_source_index(const struct intel_device_info *devinfo,
2022                                   brw_inst *dst, brw_compact_inst *src,
2023                                   bool is_dpas)
2024 {
2025    uint32_t compacted = brw_compact_inst_3src_source_index(devinfo, src);
2026 
2027    if (devinfo->ver >= 12) {
2028       const uint32_t *three_src_source_index_table =
2029          devinfo->ver >= 20 ? (is_dpas ? xe2_3src_dpas_source_index_table :
2030                                          xe2_3src_source_index_table) :
2031          devinfo->verx10 >= 125 ? xehp_3src_source_index_table :
2032                                   gfx12_3src_source_index_table;
2033       uint32_t uncompacted = three_src_source_index_table[compacted];
2034 
2035       brw_inst_set_bits(dst, 114, 114, (uncompacted >> 20));
2036       brw_inst_set_bits(dst, 113, 112, (uncompacted >> 18) & 0x3);
2037       brw_inst_set_bits(dst,  98,  98, (uncompacted >> 17) & 0x1);
2038       brw_inst_set_bits(dst,  97,  96, (uncompacted >> 15) & 0x3);
2039       brw_inst_set_bits(dst,  91,  91, (uncompacted >> 14) & 0x1);
2040       brw_inst_set_bits(dst,  87,  86, (uncompacted >> 12) & 0x3);
2041       brw_inst_set_bits(dst,  85,  84, (uncompacted >> 10) & 0x3);
2042       brw_inst_set_bits(dst,  83,  83, (uncompacted >>  9) & 0x1);
2043       brw_inst_set_bits(dst,  66,  66, (uncompacted >>  8) & 0x1);
2044       brw_inst_set_bits(dst,  65,  64, (uncompacted >>  6) & 0x3);
2045       brw_inst_set_bits(dst,  47,  47, (uncompacted >>  5) & 0x1);
2046       brw_inst_set_bits(dst,  46,  46, (uncompacted >>  4) & 0x1);
2047       brw_inst_set_bits(dst,  45,  44, (uncompacted >>  2) & 0x3);
2048       brw_inst_set_bits(dst,  43,  43, (uncompacted >>  1) & 0x1);
2049       brw_inst_set_bits(dst,  35,  35, (uncompacted >>  0) & 0x1);
2050    } else {
2051       uint64_t uncompacted = gfx8_3src_source_index_table[compacted];
2052 
2053       brw_inst_set_bits(dst,  83,  83, (uncompacted >> 43) & 0x1);
2054       brw_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff);
2055       brw_inst_set_bits(dst,  93,  86, (uncompacted >> 27) & 0xff);
2056       brw_inst_set_bits(dst,  72,  65, (uncompacted >> 19) & 0xff);
2057       brw_inst_set_bits(dst,  55,  37, (uncompacted >>  0) & 0x7ffff);
2058 
2059       brw_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3);
2060       brw_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3);
2061       brw_inst_set_bits(dst,  84,  84, (uncompacted >> 44) & 0x1);
2062    }
2063 }
2064 
2065 static void
set_uncompacted_3src_subreg_index(const struct intel_device_info * devinfo,brw_inst * dst,brw_compact_inst * src)2066 set_uncompacted_3src_subreg_index(const struct intel_device_info *devinfo,
2067                                   brw_inst *dst, brw_compact_inst *src)
2068 {
2069    assert(devinfo->ver >= 12);
2070 
2071    uint32_t compacted = brw_compact_inst_3src_subreg_index(devinfo, src);
2072    uint32_t uncompacted = (devinfo->ver >= 20 ? xe2_3src_subreg_table[compacted]:
2073                            gfx12_3src_subreg_table[compacted]);
2074 
2075    brw_inst_set_bits(dst, 119, 115, (uncompacted >> 15));
2076    brw_inst_set_bits(dst, 103,  99, (uncompacted >> 10) & 0x1f);
2077    brw_inst_set_bits(dst,  71,  67, (uncompacted >>  5) & 0x1f);
2078    brw_inst_set_bits(dst,  55,  51, (uncompacted >>  0) & 0x1f);
2079 }
2080 
2081 static void
brw_uncompact_3src_instruction(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src,bool is_dpas)2082 brw_uncompact_3src_instruction(const struct compaction_state *c,
2083                                brw_inst *dst, brw_compact_inst *src, bool is_dpas)
2084 {
2085    const struct intel_device_info *devinfo = c->isa->devinfo;
2086 
2087 #define uncompact(field) \
2088    brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
2089 #define uncompact_a16(field) \
2090    brw_inst_set_3src_a16_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
2091 
2092    uncompact(hw_opcode);
2093 
2094    if (devinfo->ver >= 12) {
2095       set_uncompacted_3src_control_index(c, dst, src, is_dpas);
2096       set_uncompacted_3src_source_index(devinfo, dst, src, is_dpas);
2097       set_uncompacted_3src_subreg_index(devinfo, dst, src);
2098 
2099       uncompact(debug_control);
2100       uncompact(swsb);
2101       uncompact(dst_reg_nr);
2102       uncompact(src0_reg_nr);
2103       uncompact(src1_reg_nr);
2104       uncompact(src2_reg_nr);
2105    } else {
2106       set_uncompacted_3src_control_index(c, dst, src, is_dpas);
2107       set_uncompacted_3src_source_index(devinfo, dst, src, is_dpas);
2108 
2109       uncompact(dst_reg_nr);
2110       uncompact_a16(src0_rep_ctrl);
2111       uncompact(debug_control);
2112       uncompact(saturate);
2113       uncompact_a16(src1_rep_ctrl);
2114       uncompact_a16(src2_rep_ctrl);
2115       uncompact(src0_reg_nr);
2116       uncompact(src1_reg_nr);
2117       uncompact(src2_reg_nr);
2118       uncompact_a16(src0_subreg_nr);
2119       uncompact_a16(src1_subreg_nr);
2120       uncompact_a16(src2_subreg_nr);
2121    }
2122    brw_inst_set_3src_cmpt_control(devinfo, dst, false);
2123 
2124 #undef uncompact
2125 #undef uncompact_a16
2126 }
2127 
2128 static void
uncompact_instruction(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)2129 uncompact_instruction(const struct compaction_state *c, brw_inst *dst,
2130                       brw_compact_inst *src)
2131 {
2132    const struct intel_device_info *devinfo = c->isa->devinfo;
2133    memset(dst, 0, sizeof(*dst));
2134 
2135    const enum opcode opcode =
2136       brw_opcode_decode(c->isa, brw_compact_inst_3src_hw_opcode(devinfo, src));
2137    if (is_3src(c->isa, opcode)) {
2138       const bool is_dpas = opcode == BRW_OPCODE_DPAS;
2139       brw_uncompact_3src_instruction(c, dst, src, is_dpas);
2140       return;
2141    }
2142 
2143 #define uncompact(field) \
2144    brw_inst_set_##field(devinfo, dst, brw_compact_inst_##field(devinfo, src))
2145 #define uncompact_reg(field) \
2146    brw_inst_set_##field##_da_reg_nr(devinfo, dst, \
2147                                     brw_compact_inst_##field##_reg_nr(devinfo, src))
2148 
2149    uncompact(hw_opcode);
2150    uncompact(debug_control);
2151 
2152    set_uncompacted_control(c, dst, src);
2153    set_uncompacted_datatype(c, dst, src);
2154    set_uncompacted_subreg(c, dst, src);
2155    set_uncompacted_src0(c, dst, src);
2156 
2157    enum brw_reg_type type;
2158    if (has_immediate(devinfo, dst, &type)) {
2159       unsigned imm = uncompact_immediate(devinfo, type,
2160                                          brw_compact_inst_imm(devinfo, src));
2161       brw_inst_set_imm_ud(devinfo, dst, imm);
2162    } else {
2163       set_uncompacted_src1(c, dst, src);
2164       uncompact_reg(src1);
2165    }
2166 
2167    if (devinfo->ver >= 12) {
2168       uncompact(swsb);
2169       uncompact_reg(dst);
2170       uncompact_reg(src0);
2171    } else {
2172       uncompact(acc_wr_control);
2173 
2174       uncompact(cond_modifier);
2175 
2176       uncompact_reg(dst);
2177       uncompact_reg(src0);
2178    }
2179    brw_inst_set_cmpt_control(devinfo, dst, false);
2180 
2181 #undef uncompact
2182 #undef uncompact_reg
2183 }
2184 
2185 void
brw_uncompact_instruction(const struct brw_isa_info * isa,brw_inst * dst,brw_compact_inst * src)2186 brw_uncompact_instruction(const struct brw_isa_info *isa,
2187                           brw_inst *dst, brw_compact_inst *src)
2188 {
2189    struct compaction_state c;
2190    compaction_state_init(&c, isa);
2191    uncompact_instruction(&c, dst, src);
2192 }
2193 
2194 void
brw_debug_compact_uncompact(const struct brw_isa_info * isa,brw_inst * orig,brw_inst * uncompacted)2195 brw_debug_compact_uncompact(const struct brw_isa_info *isa,
2196                             brw_inst *orig,
2197                             brw_inst *uncompacted)
2198 {
2199    fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
2200            isa->devinfo->ver);
2201 
2202    fprintf(stderr, "  before: ");
2203    brw_disassemble_inst(stderr, isa, orig, true, 0, NULL);
2204 
2205    fprintf(stderr, "  after:  ");
2206    brw_disassemble_inst(stderr, isa, uncompacted, false, 0, NULL);
2207 
2208    uint32_t *before_bits = (uint32_t *)orig;
2209    uint32_t *after_bits = (uint32_t *)uncompacted;
2210    fprintf(stderr, "  changed bits:\n");
2211    for (int i = 0; i < 128; i++) {
2212       uint32_t before = before_bits[i / 32] & (1 << (i & 31));
2213       uint32_t after = after_bits[i / 32] & (1 << (i & 31));
2214 
2215       if (before != after) {
2216          fprintf(stderr, "  bit %d, %s to %s\n", i,
2217                  before ? "set" : "unset",
2218                  after ? "set" : "unset");
2219       }
2220    }
2221 }
2222 
2223 static int
compacted_between(int old_ip,int old_target_ip,int * compacted_counts)2224 compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
2225 {
2226    int this_compacted_count = compacted_counts[old_ip];
2227    int target_compacted_count = compacted_counts[old_target_ip];
2228    return target_compacted_count - this_compacted_count;
2229 }
2230 
2231 static void
update_uip_jip(const struct brw_isa_info * isa,brw_inst * insn,int this_old_ip,int * compacted_counts)2232 update_uip_jip(const struct brw_isa_info *isa, brw_inst *insn,
2233                int this_old_ip, int *compacted_counts)
2234 {
2235    const struct intel_device_info *devinfo = isa->devinfo;
2236 
2237    /* JIP and UIP are in units of bytes on Gfx8+. */
2238    int shift = 3;
2239 
2240    /* Even though the values are signed, we don't need the rounding behavior
2241     * of integer division. The shifts are safe.
2242     */
2243    assert(brw_inst_jip(devinfo, insn) % 8 == 0 &&
2244           brw_inst_uip(devinfo, insn) % 8 == 0);
2245 
2246    int32_t jip_compacted = brw_inst_jip(devinfo, insn) >> shift;
2247    jip_compacted -= compacted_between(this_old_ip,
2248                                       this_old_ip + (jip_compacted / 2),
2249                                       compacted_counts);
2250    brw_inst_set_jip(devinfo, insn, (uint32_t)jip_compacted << shift);
2251 
2252    if (brw_inst_opcode(isa, insn) == BRW_OPCODE_ENDIF ||
2253        brw_inst_opcode(isa, insn) == BRW_OPCODE_WHILE)
2254       return;
2255 
2256    int32_t uip_compacted = brw_inst_uip(devinfo, insn) >> shift;
2257    uip_compacted -= compacted_between(this_old_ip,
2258                                       this_old_ip + (uip_compacted / 2),
2259                                       compacted_counts);
2260    brw_inst_set_uip(devinfo, insn, (uint32_t)uip_compacted << shift);
2261 }
2262 
2263 static void
compaction_state_init(struct compaction_state * c,const struct brw_isa_info * isa)2264 compaction_state_init(struct compaction_state *c,
2265                       const struct brw_isa_info *isa)
2266 {
2267    const struct intel_device_info *devinfo = isa->devinfo;
2268 
2269    assert(g45_subreg_table[ARRAY_SIZE(g45_subreg_table) - 1] != 0);
2270    assert(gfx8_control_index_table[ARRAY_SIZE(gfx8_control_index_table) - 1] != 0);
2271    assert(gfx8_datatype_table[ARRAY_SIZE(gfx8_datatype_table) - 1] != 0);
2272    assert(gfx8_subreg_table[ARRAY_SIZE(gfx8_subreg_table) - 1] != 0);
2273    assert(gfx8_src_index_table[ARRAY_SIZE(gfx8_src_index_table) - 1] != 0);
2274    assert(gfx11_datatype_table[ARRAY_SIZE(gfx11_datatype_table) - 1] != 0);
2275    assert(gfx12_control_index_table[ARRAY_SIZE(gfx12_control_index_table) - 1] != 0);
2276    assert(gfx12_datatype_table[ARRAY_SIZE(gfx12_datatype_table) - 1] != 0);
2277    assert(gfx12_subreg_table[ARRAY_SIZE(gfx12_subreg_table) - 1] != 0);
2278    assert(gfx12_src0_index_table[ARRAY_SIZE(gfx12_src0_index_table) - 1] != 0);
2279    assert(gfx12_src1_index_table[ARRAY_SIZE(gfx12_src1_index_table) - 1] != 0);
2280    assert(xehp_src0_index_table[ARRAY_SIZE(xehp_src0_index_table) - 1] != 0);
2281    assert(xehp_src1_index_table[ARRAY_SIZE(xehp_src1_index_table) - 1] != 0);
2282    assert(xe2_control_index_table[ARRAY_SIZE(xe2_control_index_table) - 1] != 0);
2283    assert(xe2_datatype_table[ARRAY_SIZE(xe2_datatype_table) - 1] != 0);
2284    assert(xe2_subreg_table[ARRAY_SIZE(xe2_subreg_table) - 1] != 0);
2285    assert(xe2_src0_index_table[ARRAY_SIZE(xe2_src0_index_table) - 1] != 0);
2286    assert(xe2_src1_index_table[ARRAY_SIZE(xe2_src1_index_table) - 1] != 0);
2287 
2288    c->isa = isa;
2289    switch (devinfo->ver) {
2290    case 20:
2291       c->control_index_table = xe2_control_index_table;
2292       c->datatype_table = xe2_datatype_table;
2293       c->subreg_table = xe2_subreg_table;
2294       c->src0_index_table = xe2_src0_index_table;
2295       c->src1_index_table = xe2_src1_index_table;
2296       break;
2297    case 12:
2298       c->control_index_table = gfx12_control_index_table;;
2299       c->datatype_table = gfx12_datatype_table;
2300       c->subreg_table = gfx12_subreg_table;
2301       if (devinfo->verx10 >= 125) {
2302          c->src0_index_table = xehp_src0_index_table;
2303          c->src1_index_table = xehp_src1_index_table;
2304       } else {
2305          c->src0_index_table = gfx12_src0_index_table;
2306          c->src1_index_table = gfx12_src1_index_table;
2307       }
2308       break;
2309    case 11:
2310       c->control_index_table = gfx8_control_index_table;
2311       c->datatype_table = gfx11_datatype_table;
2312       c->subreg_table = gfx8_subreg_table;
2313       c->src0_index_table = gfx8_src_index_table;
2314       c->src1_index_table = gfx8_src_index_table;
2315       break;
2316    case 9:
2317       c->control_index_table = gfx8_control_index_table;
2318       c->datatype_table = gfx8_datatype_table;
2319       c->subreg_table = gfx8_subreg_table;
2320       c->src0_index_table = gfx8_src_index_table;
2321       c->src1_index_table = gfx8_src_index_table;
2322       break;
2323    default:
2324       unreachable("unknown generation");
2325    }
2326 }
2327 
2328 void
brw_compact_instructions(struct brw_codegen * p,int start_offset,struct disasm_info * disasm)2329 brw_compact_instructions(struct brw_codegen *p, int start_offset,
2330                          struct disasm_info *disasm)
2331 {
2332    if (INTEL_DEBUG(DEBUG_NO_COMPACTION))
2333       return;
2334 
2335    const struct intel_device_info *devinfo = p->devinfo;
2336 
2337    void *store = p->store + start_offset / 16;
2338    /* For an instruction at byte offset 16*i before compaction, this is the
2339     * number of compacted instructions minus the number of padding NOP/NENOPs
2340     * that preceded it.
2341     */
2342    unsigned num_compacted_counts =
2343       (p->next_insn_offset - start_offset) / sizeof(brw_inst);
2344    int *compacted_counts =
2345       calloc(1, sizeof(*compacted_counts) * num_compacted_counts);
2346 
2347    /* For an instruction at byte offset 8*i after compaction, this was its IP
2348     * (in 16-byte units) before compaction.
2349     */
2350    unsigned num_old_ip =
2351       (p->next_insn_offset - start_offset) / sizeof(brw_compact_inst) + 1;
2352    int *old_ip = calloc(1, sizeof(*old_ip) * num_old_ip);
2353 
2354    struct compaction_state c;
2355    compaction_state_init(&c, p->isa);
2356 
2357    int offset = 0;
2358    int compacted_count = 0;
2359    for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset;
2360         src_offset += sizeof(brw_inst)) {
2361       brw_inst *src = store + src_offset;
2362       void *dst = store + offset;
2363 
2364       old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
2365       compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
2366 
2367       brw_inst inst = precompact(p->isa, *src);
2368       brw_inst saved = inst;
2369 
2370       if (try_compact_instruction(&c, dst, &inst)) {
2371          compacted_count++;
2372 
2373          if (INTEL_DEBUG(DEBUG_VS | DEBUG_GS | DEBUG_TCS | DEBUG_TASK |
2374                          DEBUG_WM | DEBUG_CS | DEBUG_TES | DEBUG_MESH |
2375                          DEBUG_RT)) {
2376             brw_inst uncompacted;
2377             uncompact_instruction(&c, &uncompacted, dst);
2378             if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
2379                brw_debug_compact_uncompact(p->isa, &saved, &uncompacted);
2380             }
2381          }
2382 
2383          offset += sizeof(brw_compact_inst);
2384       } else {
2385          /* If we didn't compact this instruction, we need to move it down into
2386           * place.
2387           */
2388          if (offset != src_offset) {
2389             memmove(dst, src, sizeof(brw_inst));
2390          }
2391          offset += sizeof(brw_inst);
2392       }
2393    }
2394 
2395    /* Add an entry for the ending offset of the program. This greatly
2396     * simplifies the linked list walk at the end of the function.
2397     */
2398    old_ip[offset / sizeof(brw_compact_inst)] =
2399       (p->next_insn_offset - start_offset) / sizeof(brw_inst);
2400 
2401    /* Fix up control flow offsets. */
2402    p->next_insn_offset = start_offset + offset;
2403    for (offset = 0; offset < p->next_insn_offset - start_offset;
2404         offset = next_offset(devinfo, store, offset)) {
2405       brw_inst *insn = store + offset;
2406       int this_old_ip = old_ip[offset / sizeof(brw_compact_inst)];
2407       int this_compacted_count = compacted_counts[this_old_ip];
2408 
2409       switch (brw_inst_opcode(p->isa, insn)) {
2410       case BRW_OPCODE_BREAK:
2411       case BRW_OPCODE_CONTINUE:
2412       case BRW_OPCODE_HALT:
2413          update_uip_jip(p->isa, insn, this_old_ip, compacted_counts);
2414          break;
2415 
2416       case BRW_OPCODE_IF:
2417       case BRW_OPCODE_ELSE:
2418       case BRW_OPCODE_ENDIF:
2419       case BRW_OPCODE_WHILE:
2420          if (brw_inst_cmpt_control(devinfo, insn)) {
2421             brw_inst uncompacted;
2422             uncompact_instruction(&c, &uncompacted,
2423                                   (brw_compact_inst *)insn);
2424 
2425             update_uip_jip(p->isa, &uncompacted, this_old_ip,
2426                            compacted_counts);
2427 
2428             bool ret = try_compact_instruction(&c, (brw_compact_inst *)insn,
2429                                                &uncompacted);
2430             assert(ret); (void)ret;
2431          } else {
2432             update_uip_jip(p->isa, insn, this_old_ip, compacted_counts);
2433          }
2434          break;
2435 
2436       case BRW_OPCODE_ADD:
2437          /* Add instructions modifying the IP register use an immediate src1,
2438           * and Gens that use this cannot compact instructions with immediate
2439           * operands.
2440           */
2441          if (brw_inst_cmpt_control(devinfo, insn))
2442             break;
2443 
2444          if (brw_inst_dst_reg_file(devinfo, insn) == ARF &&
2445              brw_inst_dst_da_reg_nr(devinfo, insn) == BRW_ARF_IP) {
2446             assert(brw_inst_src1_reg_file(devinfo, insn) == IMM);
2447 
2448             int shift = 3;
2449             int jump_compacted = brw_inst_imm_d(devinfo, insn) >> shift;
2450 
2451             int target_old_ip = this_old_ip + (jump_compacted / 2);
2452             int target_compacted_count = compacted_counts[target_old_ip];
2453             jump_compacted -= (target_compacted_count - this_compacted_count);
2454             brw_inst_set_imm_ud(devinfo, insn, jump_compacted << shift);
2455          }
2456          break;
2457 
2458       default:
2459          break;
2460       }
2461    }
2462 
2463    /* p->nr_insn is counting the number of uncompacted instructions still, so
2464     * divide.  We do want to be sure there's a valid instruction in any
2465     * alignment padding, so that the next compression pass (for the FS 8/16
2466     * compile passes) parses correctly.
2467     */
2468    if (p->next_insn_offset & sizeof(brw_compact_inst)) {
2469       brw_compact_inst *align = store + offset;
2470       memset(align, 0, sizeof(*align));
2471       brw_compact_inst_set_hw_opcode(
2472          devinfo, align, brw_opcode_encode(p->isa, BRW_OPCODE_NOP));
2473       brw_compact_inst_set_cmpt_control(devinfo, align, true);
2474       p->next_insn_offset += sizeof(brw_compact_inst);
2475    }
2476    p->nr_insn = p->next_insn_offset / sizeof(brw_inst);
2477 
2478    for (int i = 0; i < p->num_relocs; i++) {
2479       if (p->relocs[i].offset < (uint32_t)start_offset)
2480          continue;
2481 
2482       assert(p->relocs[i].offset % 16 == 0);
2483       unsigned idx = (p->relocs[i].offset - start_offset) / 16;
2484       p->relocs[i].offset -= compacted_counts[idx] * 8;
2485    }
2486 
2487    /* Update the instruction offsets for each group. */
2488    if (disasm) {
2489       int offset = 0;
2490 
2491       foreach_list_typed(struct inst_group, group, link, &disasm->group_list) {
2492          while (start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
2493                 sizeof(brw_inst) != group->offset) {
2494             assert(start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
2495                    sizeof(brw_inst) < group->offset);
2496             offset = next_offset(devinfo, store, offset);
2497          }
2498 
2499          group->offset = start_offset + offset;
2500 
2501          offset = next_offset(devinfo, store, offset);
2502       }
2503    }
2504 
2505    free(compacted_counts);
2506    free(old_ip);
2507 }
2508