xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/elk/elk_eu_compact.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2012-2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /** @file elk_eu_compact.c
25  *
26  * Instruction compaction is a feature of G45 and newer hardware that allows
27  * for a smaller instruction encoding.
28  *
29  * The instruction cache is on the order of 32KB, and many programs generate
30  * far more instructions than that.  The instruction cache is built to barely
31  * keep up with instruction dispatch ability in cache hit cases -- L1
32  * instruction cache misses that still hit in the next level could limit
33  * throughput by around 50%.
34  *
35  * The idea of instruction compaction is that most instructions use a tiny
36  * subset of the GPU functionality, so we can encode what would be a 16 byte
37  * instruction in 8 bytes using some lookup tables for various fields.
38  *
39  *
40  * Instruction compaction capabilities vary subtly by generation.
41  *
42  * G45's support for instruction compaction is very limited. Jump counts on
43  * this generation are in units of 16-byte uncompacted instructions. As such,
44  * all jump targets must be 16-byte aligned. Also, all instructions must be
45  * naturally aligned, i.e. uncompacted instructions must be 16-byte aligned.
46  * A G45-only instruction, NENOP, must be used to provide padding to align
47  * uncompacted instructions.
48  *
49  * Gfx5 removes these restrictions and changes jump counts to be in units of
50  * 8-byte compacted instructions, allowing jump targets to be only 8-byte
51  * aligned. Uncompacted instructions can also be placed on 8-byte boundaries.
52  *
53  * Gfx6 adds the ability to compact instructions with a limited range of
54  * immediate values. Compactable immediates have 12 unrestricted bits, and a
55  * 13th bit that's replicated through the high 20 bits, to create the 32-bit
56  * value of DW3 in the uncompacted instruction word.
57  *
58  * On Gfx7 we can compact some control flow instructions with a small positive
59  * immediate in the low bits of DW3, like ENDIF with the JIP field. Other
60  * control flow instructions with UIP cannot be compacted, because of the
61  * replicated 13th bit. No control flow instructions can be compacted on Gfx6
62  * since the jump count field is not in DW3.
63  *
64  *    break    JIP/UIP
65  *    cont     JIP/UIP
66  *    halt     JIP/UIP
67  *    if       JIP/UIP
68  *    else     JIP (plus UIP on BDW+)
69  *    endif    JIP
70  *    while    JIP (must be negative)
71  *
72  * Gen 8 adds support for compacting 3-src instructions.
73  *
74  * Gfx12 reduces the number of bits that available to compacted immediates from
75  * 13 to 12, but improves the compaction of floating-point immediates by
76  * allowing the high bits to be encoded (the sign, 8-bit exponent, and the
77  * three most significant bits of the mantissa), rather than the lowest bits of
78  * the mantissa.
79  */
80 
81 #include "elk_eu.h"
82 #include "elk_disasm.h"
83 #include "elk_shader.h"
84 #include "elk_disasm_info.h"
85 #include "dev/intel_debug.h"
86 
87 static const uint32_t g45_control_index_table[32] = {
88    0b00000000000000000,
89    0b01000000000000000,
90    0b00110000000000000,
91    0b00000000000000010,
92    0b00100000000000000,
93    0b00010000000000000,
94    0b01000000000100000,
95    0b01000000100000000,
96    0b01010000000100000,
97    0b00000000100000010,
98    0b11000000000000000,
99    0b00001000100000010,
100    0b01001000100000000,
101    0b00000000100000000,
102    0b11000000000100000,
103    0b00001000100000000,
104    0b10110000000000000,
105    0b11010000000100000,
106    0b00110000100000000,
107    0b00100000100000000,
108    0b01000000000001000,
109    0b01000000000000100,
110    0b00111100000000000,
111    0b00101011000000000,
112    0b00110000000010000,
113    0b00010000100000000,
114    0b01000000000100100,
115    0b01000000000101000,
116    0b00110000000000110,
117    0b00000000000001010,
118    0b01010000000101000,
119    0b01010000000100100,
120 };
121 
122 static const uint32_t g45_datatype_table[32] = {
123    0b001000000000100001,
124    0b001011010110101101,
125    0b001000001000110001,
126    0b001111011110111101,
127    0b001011010110101100,
128    0b001000000110101101,
129    0b001000000000100000,
130    0b010100010110110001,
131    0b001100011000101101,
132    0b001000000000100010,
133    0b001000001000110110,
134    0b010000001000110001,
135    0b001000001000110010,
136    0b011000001000110010,
137    0b001111011110111100,
138    0b001000000100101000,
139    0b010100011000110001,
140    0b001010010100101001,
141    0b001000001000101001,
142    0b010000001000110110,
143    0b101000001000110001,
144    0b001011011000101101,
145    0b001000000100001001,
146    0b001011011000101100,
147    0b110100011000110001,
148    0b001000001110111101,
149    0b110000001000110001,
150    0b011000000100101010,
151    0b101000001000101001,
152    0b001011010110001100,
153    0b001000000110100001,
154    0b001010010100001000,
155 };
156 
157 static const uint16_t g45_subreg_table[32] = {
158    0b000000000000000,
159    0b000000010000000,
160    0b000001000000000,
161    0b000100000000000,
162    0b000000000100000,
163    0b100000000000000,
164    0b000000000010000,
165    0b001100000000000,
166    0b001010000000000,
167    0b000000100000000,
168    0b001000000000000,
169    0b000000000001000,
170    0b000000001000000,
171    0b000000000000001,
172    0b000010000000000,
173    0b000000010100000,
174    0b000000000000111,
175    0b000001000100000,
176    0b011000000000000,
177    0b000000110000000,
178    0b000000000000010,
179    0b000000000000100,
180    0b000000001100000,
181    0b000100000000010,
182    0b001110011000110,
183    0b001110100001000,
184    0b000110011000110,
185    0b000001000011000,
186    0b000110010000100,
187    0b001100000000110,
188    0b000000010000110,
189    0b000001000110000,
190 };
191 
192 static const uint16_t g45_src_index_table[32] = {
193    0b000000000000,
194    0b010001101000,
195    0b010110001000,
196    0b011010010000,
197    0b001101001000,
198    0b010110001010,
199    0b010101110000,
200    0b011001111000,
201    0b001000101000,
202    0b000000101000,
203    0b010001010000,
204    0b111101101100,
205    0b010110001100,
206    0b010001101100,
207    0b011010010100,
208    0b010001001100,
209    0b001100101000,
210    0b000000000010,
211    0b111101001100,
212    0b011001101000,
213    0b010101001000,
214    0b000000000100,
215    0b000000101100,
216    0b010001101010,
217    0b000000111000,
218    0b010101011000,
219    0b000100100000,
220    0b010110000000,
221    0b010000000100,
222    0b010000111000,
223    0b000101100000,
224    0b111101110100,
225 };
226 
227 static const uint32_t gfx6_control_index_table[32] = {
228    0b00000000000000000,
229    0b01000000000000000,
230    0b00110000000000000,
231    0b00000000100000000,
232    0b00010000000000000,
233    0b00001000100000000,
234    0b00000000100000010,
235    0b00000000000000010,
236    0b01000000100000000,
237    0b01010000000000000,
238    0b10110000000000000,
239    0b00100000000000000,
240    0b11010000000000000,
241    0b11000000000000000,
242    0b01001000100000000,
243    0b01000000000001000,
244    0b01000000000000100,
245    0b00000000000001000,
246    0b00000000000000100,
247    0b00111000100000000,
248    0b00001000100000010,
249    0b00110000100000000,
250    0b00110000000000001,
251    0b00100000000000001,
252    0b00110000000000010,
253    0b00110000000000101,
254    0b00110000000001001,
255    0b00110000000010000,
256    0b00110000000000011,
257    0b00110000000000100,
258    0b00110000100001000,
259    0b00100000000001001,
260 };
261 
262 static const uint32_t gfx6_datatype_table[32] = {
263    0b001001110000000000,
264    0b001000110000100000,
265    0b001001110000000001,
266    0b001000000001100000,
267    0b001010110100101001,
268    0b001000000110101101,
269    0b001100011000101100,
270    0b001011110110101101,
271    0b001000000111101100,
272    0b001000000001100001,
273    0b001000110010100101,
274    0b001000000001000001,
275    0b001000001000110001,
276    0b001000001000101001,
277    0b001000000000100000,
278    0b001000001000110010,
279    0b001010010100101001,
280    0b001011010010100101,
281    0b001000000110100101,
282    0b001100011000101001,
283    0b001011011000101100,
284    0b001011010110100101,
285    0b001011110110100101,
286    0b001111011110111101,
287    0b001111011110111100,
288    0b001111011110111101,
289    0b001111011110011101,
290    0b001111011110111110,
291    0b001000000000100001,
292    0b001000000000100010,
293    0b001001111111011101,
294    0b001000001110111110,
295 };
296 
297 static const uint16_t gfx6_subreg_table[32] = {
298    0b000000000000000,
299    0b000000000000100,
300    0b000000110000000,
301    0b111000000000000,
302    0b011110000001000,
303    0b000010000000000,
304    0b000000000010000,
305    0b000110000001100,
306    0b001000000000000,
307    0b000001000000000,
308    0b000001010010100,
309    0b000000001010110,
310    0b010000000000000,
311    0b110000000000000,
312    0b000100000000000,
313    0b000000010000000,
314    0b000000000001000,
315    0b100000000000000,
316    0b000001010000000,
317    0b001010000000000,
318    0b001100000000000,
319    0b000000001010100,
320    0b101101010010100,
321    0b010100000000000,
322    0b000000010001111,
323    0b011000000000000,
324    0b111110000000000,
325    0b101000000000000,
326    0b000000000001111,
327    0b000100010001111,
328    0b001000010001111,
329    0b000110000000000,
330 };
331 
332 static const uint16_t gfx6_src_index_table[32] = {
333    0b000000000000,
334    0b010110001000,
335    0b010001101000,
336    0b001000101000,
337    0b011010010000,
338    0b000100100000,
339    0b010001101100,
340    0b010101110000,
341    0b011001111000,
342    0b001100101000,
343    0b010110001100,
344    0b001000100000,
345    0b010110001010,
346    0b000000000010,
347    0b010101010000,
348    0b010101101000,
349    0b111101001100,
350    0b111100101100,
351    0b011001110000,
352    0b010110001001,
353    0b010101011000,
354    0b001101001000,
355    0b010000101100,
356    0b010000000000,
357    0b001101110000,
358    0b001100010000,
359    0b001100000000,
360    0b010001101010,
361    0b001101111000,
362    0b000001110000,
363    0b001100100000,
364    0b001101010000,
365 };
366 
367 static const uint32_t gfx7_control_index_table[32] = {
368    0b0000000000000000010,
369    0b0000100000000000000,
370    0b0000100000000000001,
371    0b0000100000000000010,
372    0b0000100000000000011,
373    0b0000100000000000100,
374    0b0000100000000000101,
375    0b0000100000000000111,
376    0b0000100000000001000,
377    0b0000100000000001001,
378    0b0000100000000001101,
379    0b0000110000000000000,
380    0b0000110000000000001,
381    0b0000110000000000010,
382    0b0000110000000000011,
383    0b0000110000000000100,
384    0b0000110000000000101,
385    0b0000110000000000111,
386    0b0000110000000001001,
387    0b0000110000000001101,
388    0b0000110000000010000,
389    0b0000110000100000000,
390    0b0001000000000000000,
391    0b0001000000000000010,
392    0b0001000000000000100,
393    0b0001000000100000000,
394    0b0010110000000000000,
395    0b0010110000000010000,
396    0b0011000000000000000,
397    0b0011000000100000000,
398    0b0101000000000000000,
399    0b0101000000100000000,
400 };
401 
402 static const uint32_t gfx7_datatype_table[32] = {
403    0b001000000000000001,
404    0b001000000000100000,
405    0b001000000000100001,
406    0b001000000001100001,
407    0b001000000010111101,
408    0b001000001011111101,
409    0b001000001110100001,
410    0b001000001110100101,
411    0b001000001110111101,
412    0b001000010000100001,
413    0b001000110000100000,
414    0b001000110000100001,
415    0b001001010010100101,
416    0b001001110010100100,
417    0b001001110010100101,
418    0b001111001110111101,
419    0b001111011110011101,
420    0b001111011110111100,
421    0b001111011110111101,
422    0b001111111110111100,
423    0b000000001000001100,
424    0b001000000000111101,
425    0b001000000010100101,
426    0b001000010000100000,
427    0b001001010010100100,
428    0b001001110010000100,
429    0b001010010100001001,
430    0b001101111110111101,
431    0b001111111110111101,
432    0b001011110110101100,
433    0b001010010100101000,
434    0b001010110100101000,
435 };
436 
437 static const uint16_t gfx7_subreg_table[32] = {
438    0b000000000000000,
439    0b000000000000001,
440    0b000000000001000,
441    0b000000000001111,
442    0b000000000010000,
443    0b000000010000000,
444    0b000000100000000,
445    0b000000110000000,
446    0b000001000000000,
447    0b000001000010000,
448    0b000010100000000,
449    0b001000000000000,
450    0b001000000000001,
451    0b001000010000001,
452    0b001000010000010,
453    0b001000010000011,
454    0b001000010000100,
455    0b001000010000111,
456    0b001000010001000,
457    0b001000010001110,
458    0b001000010001111,
459    0b001000110000000,
460    0b001000111101000,
461    0b010000000000000,
462    0b010000110000000,
463    0b011000000000000,
464    0b011110010000111,
465    0b100000000000000,
466    0b101000000000000,
467    0b110000000000000,
468    0b111000000000000,
469    0b111000000011100,
470 };
471 
472 static const uint16_t gfx7_src_index_table[32] = {
473    0b000000000000,
474    0b000000000010,
475    0b000000010000,
476    0b000000010010,
477    0b000000011000,
478    0b000000100000,
479    0b000000101000,
480    0b000001001000,
481    0b000001010000,
482    0b000001110000,
483    0b000001111000,
484    0b001100000000,
485    0b001100000010,
486    0b001100001000,
487    0b001100010000,
488    0b001100010010,
489    0b001100100000,
490    0b001100101000,
491    0b001100111000,
492    0b001101000000,
493    0b001101000010,
494    0b001101001000,
495    0b001101010000,
496    0b001101100000,
497    0b001101101000,
498    0b001101110000,
499    0b001101110001,
500    0b001101111000,
501    0b010001101000,
502    0b010001101001,
503    0b010001101010,
504    0b010110001000,
505 };
506 
507 static const uint32_t gfx8_control_index_table[32] = {
508    0b0000000000000000010,
509    0b0000100000000000000,
510    0b0000100000000000001,
511    0b0000100000000000010,
512    0b0000100000000000011,
513    0b0000100000000000100,
514    0b0000100000000000101,
515    0b0000100000000000111,
516    0b0000100000000001000,
517    0b0000100000000001001,
518    0b0000100000000001101,
519    0b0000110000000000000,
520    0b0000110000000000001,
521    0b0000110000000000010,
522    0b0000110000000000011,
523    0b0000110000000000100,
524    0b0000110000000000101,
525    0b0000110000000000111,
526    0b0000110000000001001,
527    0b0000110000000001101,
528    0b0000110000000010000,
529    0b0000110000100000000,
530    0b0001000000000000000,
531    0b0001000000000000010,
532    0b0001000000000000100,
533    0b0001000000100000000,
534    0b0010110000000000000,
535    0b0010110000000010000,
536    0b0011000000000000000,
537    0b0011000000100000000,
538    0b0101000000000000000,
539    0b0101000000100000000,
540 };
541 
542 static const uint32_t gfx8_datatype_table[32] = {
543    0b001000000000000000001,
544    0b001000000000001000000,
545    0b001000000000001000001,
546    0b001000000000011000001,
547    0b001000000000101011101,
548    0b001000000010111011101,
549    0b001000000011101000001,
550    0b001000000011101000101,
551    0b001000000011101011101,
552    0b001000001000001000001,
553    0b001000011000001000000,
554    0b001000011000001000001,
555    0b001000101000101000101,
556    0b001000111000101000100,
557    0b001000111000101000101,
558    0b001011100011101011101,
559    0b001011101011100011101,
560    0b001011101011101011100,
561    0b001011101011101011101,
562    0b001011111011101011100,
563    0b000000000010000001100,
564    0b001000000000001011101,
565    0b001000000000101000101,
566    0b001000001000001000000,
567    0b001000101000101000100,
568    0b001000111000100000100,
569    0b001001001001000001001,
570    0b001010111011101011101,
571    0b001011111011101011101,
572    0b001001111001101001100,
573    0b001001001001001001000,
574    0b001001011001001001000,
575 };
576 
577 static const uint16_t gfx8_subreg_table[32] = {
578    0b000000000000000,
579    0b000000000000001,
580    0b000000000001000,
581    0b000000000001111,
582    0b000000000010000,
583    0b000000010000000,
584    0b000000100000000,
585    0b000000110000000,
586    0b000001000000000,
587    0b000001000010000,
588    0b000001010000000,
589    0b001000000000000,
590    0b001000000000001,
591    0b001000010000001,
592    0b001000010000010,
593    0b001000010000011,
594    0b001000010000100,
595    0b001000010000111,
596    0b001000010001000,
597    0b001000010001110,
598    0b001000010001111,
599    0b001000110000000,
600    0b001000111101000,
601    0b010000000000000,
602    0b010000110000000,
603    0b011000000000000,
604    0b011110010000111,
605    0b100000000000000,
606    0b101000000000000,
607    0b110000000000000,
608    0b111000000000000,
609    0b111000000011100,
610 };
611 
612 static const uint16_t gfx8_src_index_table[32] = {
613    0b000000000000,
614    0b000000000010,
615    0b000000010000,
616    0b000000010010,
617    0b000000011000,
618    0b000000100000,
619    0b000000101000,
620    0b000001001000,
621    0b000001010000,
622    0b000001110000,
623    0b000001111000,
624    0b001100000000,
625    0b001100000010,
626    0b001100001000,
627    0b001100010000,
628    0b001100010010,
629    0b001100100000,
630    0b001100101000,
631    0b001100111000,
632    0b001101000000,
633    0b001101000010,
634    0b001101001000,
635    0b001101010000,
636    0b001101100000,
637    0b001101101000,
638    0b001101110000,
639    0b001101110001,
640    0b001101111000,
641    0b010001101000,
642    0b010001101001,
643    0b010001101010,
644    0b010110001000,
645 };
646 
647 static const uint32_t gfx11_datatype_table[32] = {
648    0b001000000000000000001,
649    0b001000000000001000000,
650    0b001000000000001000001,
651    0b001000000000011000001,
652    0b001000000000101100101,
653    0b001000000101111100101,
654    0b001000000100101000001,
655    0b001000000100101000101,
656    0b001000000100101100101,
657    0b001000001000001000001,
658    0b001000011000001000000,
659    0b001000011000001000001,
660    0b001000101000101000101,
661    0b001000111000101000100,
662    0b001000111000101000101,
663    0b001100100100101100101,
664    0b001100101100100100101,
665    0b001100101100101100100,
666    0b001100101100101100101,
667    0b001100111100101100100,
668    0b000000000010000001100,
669    0b001000000000001100101,
670    0b001000000000101000101,
671    0b001000001000001000000,
672    0b001000101000101000100,
673    0b001000111000100000100,
674    0b001001001001000001001,
675    0b001101111100101100101,
676    0b001100111100101100101,
677    0b001001111001101001100,
678    0b001001001001001001000,
679    0b001001011001001001000,
680 };
681 
682 static const uint32_t gfx12_control_index_table[32] = {
683    0b000000000000000000100, /* 	       (16|M0)                            */
684    0b000000000000000000011, /* 	       (8|M0)                             */
685    0b000000010000000000000, /* 	(W)    (1|M0)                             */
686    0b000000010000000000100, /* 	(W)    (16|M0)                            */
687    0b000000010000000000011, /* 	(W)    (8|M0)                             */
688    0b010000000000000000100, /* 	       (16|M0)  (ge)f0.0                  */
689    0b000000000000000100100, /* 	       (16|M16)                           */
690    0b010100000000000000100, /* 	       (16|M0)  (lt)f0.0                  */
691    0b000000000000000000000, /* 	       (1|M0)                             */
692    0b000010000000000000100, /* 	       (16|M0)           (sat)            */
693    0b000000000000000010011, /* 	       (8|M8)                             */
694    0b001100000000000000100, /* 	       (16|M0)  (gt)f0.0                  */
695    0b000100000000000000100, /* 	       (16|M0)  (eq)f0.0                  */
696    0b000100010000000000100, /* 	(W)    (16|M0)  (eq)f0.0                  */
697    0b001000000000000000100, /* 	       (16|M0)  (ne)f0.0                  */
698    0b000000000000100000100, /* 	(f0.0) (16|M0)                            */
699    0b010100000000000000011, /* 	       (8|M0)   (lt)f0.0                  */
700    0b000000000000110000100, /* 	(f1.0) (16|M0)                            */
701    0b000000010000000000001, /* 	(W)    (2|M0)                             */
702    0b000000000000101000100, /* 	(f0.1) (16|M0)                            */
703    0b000000000000111000100, /* 	(f1.1) (16|M0)                            */
704    0b010000010000000000100, /* 	(W)    (16|M0)  (ge)f0.0                  */
705    0b000000000000000100011, /* 	       (8|M16)                            */
706    0b000000000000000110011, /* 	       (8|M24)                            */
707    0b010100010000000000100, /* 	(W)    (16|M0)  (lt)f0.0                  */
708    0b010000000000000000011, /* 	       (8|M0)   (ge)f0.0                  */
709    0b000100010000000000000, /* 	(W)    (1|M0)   (eq)f0.0                  */
710    0b000010000000000000011, /* 	       (8|M0)            (sat)            */
711    0b010100000000010000100, /* 	       (16|M0)  (lt)f1.0                  */
712    0b000100000000000000011, /* 	       (8|M0)   (eq)f0.0                  */
713    0b000001000000000000011, /* 	       (8|M0)                   {AccWrEn} */
714    0b000000010000000100100, /* 	(W)    (16|M16)                           */
715 };
716 
717 static const uint32_t gfx12_datatype_table[32] = {
718    0b11010110100101010100, /* grf<1>:f  grf:f  grf:f  */
719    0b00000110100101010100, /* grf<1>:f  grf:f  arf:ub */
720    0b00000010101101010100, /* grf<1>:f  imm:f  arf:ub */
721    0b01010110110101010100, /* grf<1>:f  grf:f  imm:f  */
722    0b11010100100101010100, /* arf<1>:f  grf:f  grf:f  */
723    0b11010010100101010100, /* grf<1>:f  arf:f  grf:f  */
724    0b01010100110101010100, /* arf<1>:f  grf:f  imm:f  */
725    0b00000000100000000000, /* arf<1>:ub arf:ub arf:ub */
726    0b11010000100101010100, /* arf<1>:f  arf:f  grf:f  */
727    0b00101110110011001100, /* grf<1>:d  grf:d  imm:w  */
728    0b10110110100011001100, /* grf<1>:d  grf:d  grf:d  */
729    0b01010010110101010100, /* grf<1>:f  arf:f  imm:f  */
730    0b10010110100001000100, /* grf<1>:ud grf:ud grf:ud */
731    0b01010000110101010100, /* arf<1>:f  arf:f  imm:f  */
732    0b00110110110011001100, /* grf<1>:d  grf:d  imm:d  */
733    0b00010110110001000100, /* grf<1>:ud grf:ud imm:ud */
734    0b00000111000101010100, /* grf<2>:f  grf:f  arf:ub */
735    0b00101100110011001100, /* arf<1>:d  grf:d  imm:w  */
736    0b00000000100000100010, /* arf<1>:uw arf:uw arf:ub */
737    0b00000010100001000100, /* grf<1>:ud arf:ud arf:ub */
738    0b00100110110000101010, /* grf<1>:w  grf:uw imm:uv */
739    0b00001110110000100010, /* grf<1>:uw grf:uw imm:uw */
740    0b10010111000001000100, /* grf<2>:ud grf:ud grf:ud */
741    0b00000110100101001100, /* grf<1>:d  grf:f  arf:ub */
742    0b10001100100011001100, /* arf<1>:d  grf:d  grf:uw */
743    0b00000110100001010100, /* grf<1>:f  grf:ud arf:ub */
744    0b00101110110001001100, /* grf<1>:d  grf:ud imm:w  */
745    0b00000010100000100010, /* grf<1>:uw arf:uw arf:ub */
746    0b00000110100000110100, /* grf<1>:f  grf:uw arf:ub */
747    0b00000110100000010100, /* grf<1>:f  grf:ub arf:ub */
748    0b00000110100011010100, /* grf<1>:f  grf:d  arf:ub */
749    0b00000010100101010100, /* grf<1>:f  arf:f  arf:ub */
750 };
751 
752 static const uint16_t gfx12_subreg_table[32] = {
753    0b000000000000000, /* .0  .0  .0  */
754    0b100000000000000, /* .0  .0  .16 */
755    0b001000000000000, /* .0  .0  .4  */
756    0b011000000000000, /* .0  .0  .12 */
757    0b000000010000000, /* .0  .4  .0  */
758    0b010000000000000, /* .0  .0  .8  */
759    0b101000000000000, /* .0  .0  .20 */
760    0b000000000001000, /* .8  .0  .0  */
761    0b000000100000000, /* .0  .8  .0  */
762    0b110000000000000, /* .0  .0  .24 */
763    0b111000000000000, /* .0  .0  .28 */
764    0b000001000000000, /* .0  .16 .0  */
765    0b000000000000100, /* .4  .0  .0  */
766    0b000001100000000, /* .0  .24 .0  */
767    0b000001010000000, /* .0  .20 .0  */
768    0b000000110000000, /* .0  .12 .0  */
769    0b000001110000000, /* .0  .28 .0  */
770    0b000000000011100, /* .28 .0  .0  */
771    0b000000000010000, /* .16 .0  .0  */
772    0b000000000001100, /* .12 .0  .0  */
773    0b000000000011000, /* .24 .0  .0  */
774    0b000000000010100, /* .20 .0  .0  */
775    0b000000000000010, /* .2  .0  .0  */
776    0b000000101000000, /* .0  .10 .0  */
777    0b000000001000000, /* .0  .2  .0  */
778    0b000000010000100, /* .4  .4  .0  */
779    0b000000001011100, /* .28 .2  .0  */
780    0b000000001000010, /* .2  .2  .0  */
781    0b000000110001100, /* .12 .12 .0  */
782    0b000000000100000, /* .0  .1  .0  */
783    0b000000001100000, /* .0  .3  .0  */
784    0b110001100000000, /* .0  .24 .24 */
785 };
786 
787 static const uint16_t gfx12_src0_index_table[16] = {
788    0b010001100100, /*       r<8;8,1>  */
789    0b000000000000, /*       r<0;1,0>  */
790    0b010001100110, /*      -r<8;8,1>  */
791    0b010001100101, /*  (abs)r<8;8,1>  */
792    0b000000000010, /*      -r<0;1,0>  */
793    0b001000000000, /*       r<2;1,0>  */
794    0b001001000000, /*       r<2;4,0>  */
795    0b001101000000, /*       r<4;4,0>  */
796    0b001000100100, /*       r<2;2,1>  */
797    0b001100000000, /*       r<4;1,0>  */
798    0b001000100110, /*      -r<2;2,1>  */
799    0b001101000100, /*       r<4;4,1>  */
800    0b010001100111, /* -(abs)r<8;8,1>  */
801    0b000100000000, /*       r<1;1,0>  */
802    0b000000000001, /*  (abs)r<0;1,0>  */
803    0b111100010000, /*       r[a]<1,0> */
804 };
805 
806 static const uint16_t gfx12_src1_index_table[16] = {
807    0b000100011001, /*       r<8;8,1> */
808    0b000000000000, /*       r<0;1,0> */
809    0b100100011001, /*      -r<8;8,1> */
810    0b100000000000, /*      -r<0;1,0> */
811    0b010100011001, /*  (abs)r<8;8,1> */
812    0b100011010000, /*      -r<4;4,0> */
813    0b000010000000, /*       r<2;1,0> */
814    0b000010001001, /*       r<2;2,1> */
815    0b100010001001, /*      -r<2;2,1> */
816    0b000011010000, /*       r<4;4,0> */
817    0b000011010001, /*       r<4;4,1> */
818    0b000011000000, /*       r<4;1,0> */
819    0b110100011001, /* -(abs)r<8;8,1> */
820    0b010000000000, /*  (abs)r<0;1,0> */
821    0b110000000000, /* -(abs)r<0;1,0> */
822    0b100011010001, /*      -r<4;4,1> */
823 };
824 
825 static const uint16_t xehp_src0_index_table[16] = {
826    0b000100000000, /*       r<1;1,0>  */
827    0b000000000000, /*       r<0;1,0>  */
828    0b000100000010, /*      -r<1;1,0>  */
829    0b000100000001, /*  (abs)r<1;1,0>  */
830    0b000000000010, /*      -r<0;1,0>  */
831    0b001000000000, /*       r<2;1,0>  */
832    0b001001000000, /*       r<2;4,0>  */
833    0b001101000000, /*       r<4;4,0>  */
834    0b001100000000, /*       r<4;1,0>  */
835    0b000100000011, /* -(abs)r<1;1,0>  */
836    0b000000000001, /*  (abs)r<0;1,0>  */
837    0b111100010000, /*       r[a]<1,0> */
838    0b010001100000, /*       r<8;8,0>  */
839    0b000101000000, /*       r<1;4,0>  */
840    0b010001001000, /*       r<8;4,2>  */
841    0b001000000010, /*      -r<2;1,0>  */
842 };
843 
844 static const uint16_t xehp_src1_index_table[16] = {
845    0b000001000000, /*       r<1;1,0>    */
846    0b000000000000, /*       r<0;1,0>    */
847    0b100001000000, /*      -r<1;1,0>    */
848    0b100000000000, /*      -r<0;1,0>    */
849    0b010001000000, /*  (abs)r<1;1,0>    */
850    0b100011010000, /*      -r<4;4,0>    */
851    0b000010000000, /*       r<2;1,0>    */
852    0b000011010000, /*       r<4;4,0>    */
853    0b000011000000, /*       r<4;1,0>    */
854    0b110001000000, /* -(abs)r<1;1,0>    */
855    0b010000000000, /*  (abs)r<0;1,0>    */
856    0b110000000000, /* -(abs)r<0;1,0>    */
857    0b000100011000, /*       r<8;8,0>    */
858    0b100010000000, /*      -r<2;1,0>    */
859    0b100000001001, /*      -r<0;2,1>    */
860    0b100001000100, /*      -r[a]<1;1,0> */
861 };
862 
863 static const uint32_t xe2_control_index_table[32] = {
864    0b000000000000000100, /* (16|M0)               */
865    0b000000100000000000, /* (W) (1|M0)            */
866    0b000000000010000100, /* (16|M16)              */
867    0b000000000000000000, /* (1|M0)                */
868    0b000000100000000100, /* (W) (16|M0)           */
869    0b010000000000000100, /* (16|M0) (.ge)f0.0     */
870    0b010100000000000100, /* (16|M0) (.lt)f0.0     */
871    0b000000100000000010, /* (W) (4|M0)            */
872    0b000000000000000101, /* (32|M0)               */
873    0b000000100000000011, /* (W) (8|M0)            */
874    0b001100100000000000, /* (W) (1|M0) (.gt)f0.0  */
875    0b000010000000000100, /* (16|M0) (sat)         */
876    0b000100000000000100, /* (16|M0) (.eq)f0.0     */
877    0b000000100000000001, /* (W) (2|M0)            */
878    0b001100000000000100, /* (16|M0) (.gt)f0.0     */
879    0b000100100000000000, /* (W) (1|M0) (.eq)f0.0  */
880    0b010100100000000010, /* (W) (4|M0) (.lt)f0.0  */
881    0b010000100000000000, /* (W) (1|M0) (.ge)f0.0  */
882    0b010000100000000010, /* (W) (4|M0) (.ge)f0.0  */
883    0b010100100000000000, /* (W) (1|M0) (.lt)f0.0  */
884    0b001000000000000100, /* (16|M0) (.ne)f0.0     */
885    0b000000000100100100, /* (f2.0) (16|M0)        */
886    0b010100100000000011, /* (W) (8|M0) (.lt)f0.0  */
887    0b000000000100011100, /* (f1.1) (16|M0)        */
888    0b010000100000000011, /* (W) (8|M0) (.ge)f0.0  */
889    0b000000000100001100, /* (f0.1) (16|M0)        */
890    0b000000000100010100, /* (f1.0) (16|M0)        */
891    0b000000000100110100, /* (f3.0) (16|M0)        */
892    0b000000000100111100, /* (f3.1) (16|M0)        */
893    0b000000000100101100, /* (f2.1) (16|M0)        */
894    0b000000000100000100, /* (f0.0) (16|M0)        */
895    0b010100000000100100, /* (16|M0) (.lt)f2.0     */
896 };
897 
898 static const uint32_t xe2_datatype_table[32] = {
899    0b11010110100101010100, /* grf<1>:f grf:f grf:f    */
900    0b11010100100101010100, /* arf<1>:f grf:f grf:f    */
901    0b00000110100101010100, /* grf<1>:f grf:f arf:ub   */
902    0b00000110100001000100, /* grf<1>:ud grf:ud arf:ub */
903    0b01010110110101010100, /* grf<1>:f grf:f imm:f    */
904    0b11010010100101010100, /* grf<1>:f arf:f grf:f    */
905    0b10111110100011101110, /* grf<1>:q grf:q grf:q    */
906    0b00000000100000000000, /* arf<1>:ub arf:ub arf:ub */
907    0b01010110100101010100, /* grf<1>:f grf:f arf:f    */
908    0b00000010101001000100, /* grf<1>:ud imm:ud        */
909    0b00101110110011001100, /* grf<1>:d grf:d imm:w    */
910    0b11010000100101010100, /* arf<1>:f arf:f grf:f    */
911    0b01010100100101010100, /* arf<1>:f grf:f arf:f    */
912    0b01010100110101010100, /* arf<1>:f grf:f imm:f    */
913    0b00000010101101010100, /* grf<1>:f imm:f          */
914    0b00000110100011001100, /* grf<1>:d grf:d arf:ub   */
915    0b00101110110011101110, /* grf<1>:q grf:q imm:w    */
916    0b00000110100001100110, /* grf<1>:uq grf:uq arf:ub */
917    0b01010000100101010100, /* arf<1>:f arf:f arf:f    */
918    0b10110110100011001100, /* grf<1>:d grf:d grf:d    */
919    0b01010010100101010100, /* grf<1>:f arf:f arf:f    */
920    0b00000111000001000100, /* grf<2>:ud grf:ud arf:ub */
921    0b00110110110011001110, /* grf<1>:q grf:d imm:d    */
922    0b00101100110011001100, /* arf<1>:d grf:d imm:w    */
923    0b11011110100101110110, /* grf<1>:df grf:df grf:df */
924    0b01010010110101010100, /* grf<1>:f arf:f imm:f    */
925    0b10010110100001000100, /* grf<1>:ud grf:ud grf:ud */
926    0b00000010100001000100, /* grf<1>:ud arf:ud arf:ub */
927    0b00001110110001000100, /* grf<1>:ud grf:ud imm:uw */
928    0b00000010101010101100, /* grf<1>:d imm:w          */
929    0b01010000110101010100, /* arf<1>:f arf:f imm:f    */
930    0b00000100100001000100, /* arf<1>:ud grf:ud arf:ub */
931 };
932 
933 static const uint16_t xe2_subreg_table[16] = {
934    0b000000000000, /* .0 .0  */
935    0b000010000000, /* .0 .4  */
936    0b000000000100, /* .4 .0  */
937    0b010000000000, /* .0 .32 */
938    0b001000000000, /* .0 .16 */
939    0b000000001000, /* .8 .0  */
940    0b000100000000, /* .0 .8  */
941    0b010100000000, /* .0 .40 */
942    0b011000000000, /* .0 .48 */
943    0b000110000000, /* .0 .12 */
944    0b000000010000, /* .16 .0 */
945    0b011010000000, /* .0 .52 */
946    0b001100000000, /* .0 .24 */
947    0b011100000000, /* .0 .56 */
948    0b010110000000, /* .0 .44 */
949    0b010010000000, /* .0 .36 */
950 };
951 
952 static const uint16_t xe2_src0_index_table[8] = {
953    0b00100000000, /* r<1;1,0>      */
954    0b00000000000, /* r<0;1,0>      */
955    0b01000000000, /* r<2;1,0>      */
956    0b00100000010, /* -r<1;1,0>     */
957    0b01100000000, /* r<4;1,0>      */
958    0b00100000001, /* (abs)r<1;1,0> */
959    0b00000000010, /* -r<0;1,0>     */
960    0b01001000000, /* r<2;4,0>      */
961 };
962 
963 static const uint16_t xe2_src1_index_table[16] = {
964    0b0000100000000000, /* r<1;1,0>.0  */
965    0b0000000000000000, /* r<0;1,0>.0  */
966    0b1000100000000000, /* -r<1;1,0>.0 */
967    0b0000000000010000, /* r<0;1,0>.8  */
968    0b0000000000001000, /* r<0;1,0>.4  */
969    0b0000000000011000, /* r<0;1,0>.12 */
970    0b0000000001010000, /* r<0;1,0>.40 */
971    0b0000000001000000, /* r<0;1,0>.32 */
972    0b0000000000100000, /* r<0;1,0>.16 */
973    0b0000000001111000, /* r<0;1,0>.60 */
974    0b0000000000111000, /* r<0;1,0>.28 */
975    0b0000000000101000, /* r<0;1,0>.20 */
976    0b0000000001011000, /* r<0;1,0>.44 */
977    0b0000000001001000, /* r<0;1,0>.36 */
978    0b0000000001110000, /* r<0;1,0>.56 */
979    0b0000000000110000, /* r<0;1,0>.24 */
980 };
981 
982 /* This is actually the control index table for Cherryview (26 bits), but the
983  * only difference from Broadwell (24 bits) is that it has two extra 0-bits at
984  * the start.
985  *
986  * The low 24 bits have the same mappings on both hardware.
987  */
988 static const uint32_t gfx8_3src_control_index_table[4] = {
989    0b00100000000110000000000001,
990    0b00000000000110000000000001,
991    0b00000000001000000000000001,
992    0b00000000001000000000100001,
993 };
994 
995 /* This is actually the control index table for Cherryview (49 bits), but the
996  * only difference from Broadwell (46 bits) is that it has three extra 0-bits
997  * at the start.
998  *
999  * The low 44 bits have the same mappings on both hardware, and since the high
1000  * three bits on Broadwell are zero, we can reuse Cherryview's table.
1001  */
1002 static const uint64_t gfx8_3src_source_index_table[4] = {
1003    0b0000001110010011100100111001000001111000000000000,
1004    0b0000001110010011100100111001000001111000000000010,
1005    0b0000001110010011100100111001000001111000000001000,
1006    0b0000001110010011100100111001000001111000000100000,
1007 };
1008 
1009 struct compaction_state {
1010    const struct elk_isa_info *isa;
1011    const uint32_t *control_index_table;
1012    const uint32_t *datatype_table;
1013    const uint16_t *subreg_table;
1014    const uint16_t *src0_index_table;
1015    const uint16_t *src1_index_table;
1016 };
1017 
1018 static void compaction_state_init(struct compaction_state *c,
1019                                   const struct elk_isa_info *isa);
1020 
1021 static bool
set_control_index(const struct compaction_state * c,elk_compact_inst * dst,const elk_inst * src)1022 set_control_index(const struct compaction_state *c,
1023                   elk_compact_inst *dst, const elk_inst *src)
1024 {
1025    const struct intel_device_info *devinfo = c->isa->devinfo;
1026    uint32_t uncompacted; /* 17b/G45; 19b/IVB+ */
1027 
1028    if (devinfo->ver >= 8) {
1029       uncompacted = (elk_inst_bits(src, 33, 31) << 16) | /*  3b */
1030                     (elk_inst_bits(src, 23, 12) <<  4) | /* 12b */
1031                     (elk_inst_bits(src, 10,  9) <<  2) | /*  2b */
1032                     (elk_inst_bits(src, 34, 34) <<  1) | /*  1b */
1033                     (elk_inst_bits(src,  8,  8));        /*  1b */
1034    } else {
1035       uncompacted = (elk_inst_bits(src, 31, 31) << 16) | /*  1b */
1036                     (elk_inst_bits(src, 23,  8));        /* 16b */
1037 
1038       /* On gfx7, the flag register and subregister numbers are integrated into
1039        * the control index.
1040        */
1041       if (devinfo->ver == 7)
1042          uncompacted |= elk_inst_bits(src, 90, 89) << 17; /* 2b */
1043    }
1044 
1045    for (int i = 0; i < 32; i++) {
1046       if (c->control_index_table[i] == uncompacted) {
1047          elk_compact_inst_set_control_index(devinfo, dst, i);
1048 	 return true;
1049       }
1050    }
1051 
1052    return false;
1053 }
1054 
1055 static bool
set_datatype_index(const struct compaction_state * c,elk_compact_inst * dst,const elk_inst * src,bool is_immediate)1056 set_datatype_index(const struct compaction_state *c, elk_compact_inst *dst,
1057                    const elk_inst *src, bool is_immediate)
1058 {
1059    const struct intel_device_info *devinfo = c->isa->devinfo;
1060    uint32_t uncompacted; /* 18b/G45+; 21b/BDW+ */
1061 
1062    if (devinfo->ver >= 8) {
1063       uncompacted = (elk_inst_bits(src, 63, 61) << 18) | /*  3b */
1064                     (elk_inst_bits(src, 94, 89) << 12) | /*  6b */
1065                     (elk_inst_bits(src, 46, 35));        /* 12b */
1066    } else {
1067       uncompacted = (elk_inst_bits(src, 63, 61) << 15) | /*  3b */
1068                     (elk_inst_bits(src, 46, 32));        /* 15b */
1069    }
1070 
1071    for (int i = 0; i < 32; i++) {
1072       if (c->datatype_table[i] == uncompacted) {
1073          elk_compact_inst_set_datatype_index(devinfo, dst, i);
1074 	 return true;
1075       }
1076    }
1077 
1078    return false;
1079 }
1080 
1081 static bool
set_subreg_index(const struct compaction_state * c,elk_compact_inst * dst,const elk_inst * src,bool is_immediate)1082 set_subreg_index(const struct compaction_state *c, elk_compact_inst *dst,
1083                  const elk_inst *src, bool is_immediate)
1084 {
1085    const struct intel_device_info *devinfo = c->isa->devinfo;
1086 
1087    uint16_t uncompacted =              /* 15b/G45+ */
1088       (elk_inst_bits(src, 52, 48) << 0) |    /* 5b */
1089       (elk_inst_bits(src, 68, 64) << 5);     /* 5b */
1090 
1091    if (!is_immediate)
1092       uncompacted |= elk_inst_bits(src, 100, 96) << 10; /* 5b */
1093 
1094    for (int i = 0; i < ARRAY_SIZE(g45_subreg_table); i++) {
1095       if (c->subreg_table[i] == uncompacted) {
1096          elk_compact_inst_set_subreg_index(devinfo, dst, i);
1097 	 return true;
1098       }
1099    }
1100 
1101    return false;
1102 }
1103 
1104 static bool
set_src0_index(const struct compaction_state * c,elk_compact_inst * dst,const elk_inst * src)1105 set_src0_index(const struct compaction_state *c, elk_compact_inst *dst,
1106                const elk_inst *src)
1107 {
1108    const struct intel_device_info *devinfo = c->isa->devinfo;
1109 
1110    const uint16_t uncompacted =      /* 12b/G45+ */
1111       elk_inst_bits(src, 88, 77);         /* 12b */
1112 
1113    for (int i = 0; i < ARRAY_SIZE(gfx8_src_index_table); i++) {
1114       if (c->src0_index_table[i] == uncompacted) {
1115          elk_compact_inst_set_src0_index(devinfo, dst, i);
1116 	 return true;
1117       }
1118    }
1119 
1120    return false;
1121 }
1122 
1123 static bool
set_src1_index(const struct compaction_state * c,elk_compact_inst * dst,const elk_inst * src,bool is_immediate,unsigned imm)1124 set_src1_index(const struct compaction_state *c, elk_compact_inst *dst,
1125                const elk_inst *src, bool is_immediate, unsigned imm)
1126 {
1127    const struct intel_device_info *devinfo = c->isa->devinfo;
1128    if (is_immediate) {
1129       /* src1 index takes the high 5 bits of the 13-bit compacted value */
1130       elk_compact_inst_set_src1_index(devinfo, dst, imm >> 8);
1131       return true;
1132    } else {
1133       const uint16_t uncompacted =         /* 12b/G45+ */
1134          elk_inst_bits(src, 120, 109);          /* 12b */
1135 
1136       for (int i = 0; i < ARRAY_SIZE(gfx8_src_index_table); i++) {
1137          if (c->src1_index_table[i] == uncompacted) {
1138             elk_compact_inst_set_src1_index(devinfo, dst, i);
1139             return true;
1140          }
1141       }
1142    }
1143 
1144    return false;
1145 }
1146 
1147 static bool
set_3src_control_index(const struct intel_device_info * devinfo,elk_compact_inst * dst,const elk_inst * src)1148 set_3src_control_index(const struct intel_device_info *devinfo,
1149                        elk_compact_inst *dst, const elk_inst *src)
1150 {
1151    assert(devinfo->ver >= 8);
1152 
1153    uint32_t uncompacted =      /* 24b/BDW; 26b/CHV */
1154       (elk_inst_bits(src, 34, 32) << 21) |  /*  3b */
1155       (elk_inst_bits(src, 28,  8));         /* 21b */
1156 
1157    if (devinfo->platform == INTEL_PLATFORM_CHV) {
1158       uncompacted |=
1159          elk_inst_bits(src, 36, 35) << 24;  /*  2b */
1160    }
1161 
1162    for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_control_index_table); i++) {
1163       if (gfx8_3src_control_index_table[i] == uncompacted) {
1164          elk_compact_inst_set_3src_control_index(devinfo, dst, i);
1165          return true;
1166       }
1167    }
1168 
1169    return false;
1170 }
1171 
1172 static bool
set_3src_source_index(const struct intel_device_info * devinfo,elk_compact_inst * dst,const elk_inst * src)1173 set_3src_source_index(const struct intel_device_info *devinfo,
1174                       elk_compact_inst *dst, const elk_inst *src)
1175 {
1176    assert(devinfo->ver >= 8);
1177 
1178    uint64_t uncompacted =         /* 46b/BDW; 49b/CHV */
1179       (elk_inst_bits(src,  83,  83) << 43) |   /*  1b */
1180       (elk_inst_bits(src, 114, 107) << 35) |   /*  8b */
1181       (elk_inst_bits(src,  93,  86) << 27) |   /*  8b */
1182       (elk_inst_bits(src,  72,  65) << 19) |   /*  8b */
1183       (elk_inst_bits(src,  55,  37));          /* 19b */
1184 
1185    if (devinfo->platform == INTEL_PLATFORM_CHV) {
1186       uncompacted |=
1187          (elk_inst_bits(src, 126, 125) << 47) | /* 2b */
1188          (elk_inst_bits(src, 105, 104) << 45) | /* 2b */
1189          (elk_inst_bits(src,  84,  84) << 44);  /* 1b */
1190    } else {
1191       uncompacted |=
1192          (elk_inst_bits(src, 125, 125) << 45) | /* 1b */
1193          (elk_inst_bits(src, 104, 104) << 44);  /* 1b */
1194    }
1195 
1196    for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_source_index_table); i++) {
1197       if (gfx8_3src_source_index_table[i] == uncompacted) {
1198          elk_compact_inst_set_3src_source_index(devinfo, dst, i);
1199          return true;
1200       }
1201    }
1202 
1203    return false;
1204 }
1205 
1206 static bool
has_unmapped_bits(const struct elk_isa_info * isa,const elk_inst * src)1207 has_unmapped_bits(const struct elk_isa_info *isa, const elk_inst *src)
1208 {
1209    const struct intel_device_info *devinfo = isa->devinfo;
1210 
1211    /* EOT can only be mapped on a send if the src1 is an immediate */
1212    if ((elk_inst_opcode(isa, src) == ELK_OPCODE_SENDC ||
1213         elk_inst_opcode(isa, src) == ELK_OPCODE_SEND) &&
1214        elk_inst_eot(devinfo, src))
1215       return true;
1216 
1217    /* Check for instruction bits that don't map to any of the fields of the
1218     * compacted instruction.  The instruction cannot be compacted if any of
1219     * them are set.  They overlap with:
1220     *  - NibCtrl (bit 47 on Gfx7, bit 11 on Gfx8)
1221     *  - Dst.AddrImm[9] (bit 47 on Gfx8)
1222     *  - Src0.AddrImm[9] (bit 95 on Gfx8)
1223     *  - Imm64[27:31] (bits 91-95 on Gfx7, bit 95 on Gfx8)
1224     *  - UIP[31] (bit 95 on Gfx8)
1225     */
1226    if (devinfo->ver >= 8) {
1227       assert(!elk_inst_bits(src, 7,  7));
1228       return elk_inst_bits(src, 95, 95) ||
1229              elk_inst_bits(src, 47, 47) ||
1230              elk_inst_bits(src, 11, 11);
1231    } else {
1232       assert(!elk_inst_bits(src, 7,  7) &&
1233              !(devinfo->ver < 7 && elk_inst_bits(src, 90, 90)));
1234       return elk_inst_bits(src, 95, 91) ||
1235              elk_inst_bits(src, 47, 47);
1236    }
1237 }
1238 
1239 static bool
has_3src_unmapped_bits(const struct intel_device_info * devinfo,const elk_inst * src)1240 has_3src_unmapped_bits(const struct intel_device_info *devinfo,
1241                        const elk_inst *src)
1242 {
1243    /* Check for three-source instruction bits that don't map to any of the
1244     * fields of the compacted instruction.  All of them seem to be reserved
1245     * bits currently.
1246     */
1247    if (devinfo->platform == INTEL_PLATFORM_CHV) {
1248       assert(!elk_inst_bits(src, 127, 127) &&
1249              !elk_inst_bits(src, 7,  7));
1250    } else {
1251       assert(devinfo->ver >= 8);
1252       assert(!elk_inst_bits(src, 127, 126) &&
1253              !elk_inst_bits(src, 105, 105) &&
1254              !elk_inst_bits(src, 84, 84) &&
1255              !elk_inst_bits(src, 7,  7));
1256 
1257       /* Src1Type and Src2Type, used for mixed-precision floating point */
1258       if (elk_inst_bits(src, 36, 35))
1259          return true;
1260    }
1261 
1262    return false;
1263 }
1264 
1265 static bool
elk_try_compact_3src_instruction(const struct elk_isa_info * isa,elk_compact_inst * dst,const elk_inst * src)1266 elk_try_compact_3src_instruction(const struct elk_isa_info *isa,
1267                                  elk_compact_inst *dst, const elk_inst *src)
1268 {
1269    const struct intel_device_info *devinfo = isa->devinfo;
1270    assert(devinfo->ver >= 8);
1271 
1272    if (has_3src_unmapped_bits(devinfo, src))
1273       return false;
1274 
1275 #define compact(field) \
1276    elk_compact_inst_set_3src_##field(devinfo, dst, elk_inst_3src_##field(devinfo, src))
1277 #define compact_a16(field) \
1278    elk_compact_inst_set_3src_##field(devinfo, dst, elk_inst_3src_a16_##field(devinfo, src))
1279 
1280    compact(hw_opcode);
1281 
1282    if (!set_3src_control_index(devinfo, dst, src))
1283       return false;
1284 
1285    if (!set_3src_source_index(devinfo, dst, src))
1286       return false;
1287 
1288    compact(dst_reg_nr);
1289    compact_a16(src0_rep_ctrl);
1290    compact(debug_control);
1291    compact(saturate);
1292    compact_a16(src1_rep_ctrl);
1293    compact_a16(src2_rep_ctrl);
1294    compact(src0_reg_nr);
1295    compact(src1_reg_nr);
1296    compact(src2_reg_nr);
1297    compact_a16(src0_subreg_nr);
1298    compact_a16(src1_subreg_nr);
1299    compact_a16(src2_subreg_nr);
1300 
1301    elk_compact_inst_set_3src_cmpt_control(devinfo, dst, true);
1302 
1303 #undef compact
1304 #undef compact_a16
1305 
1306    return true;
1307 }
1308 
1309 /* On SNB through ICL, compacted instructions have 12-bits for immediate
1310  * sources, and a 13th bit that's replicated through the high 20 bits.
1311  *
1312  * Effectively this means we get 12-bit integers, 0.0f, and some limited uses
1313  * of packed vectors as compactable immediates.
1314  *
1315  * Returns the compacted immediate, or -1 if immediate cannot be compacted
1316  */
1317 static int
compact_immediate(const struct intel_device_info * devinfo,enum elk_reg_type type,unsigned imm)1318 compact_immediate(const struct intel_device_info *devinfo,
1319                   enum elk_reg_type type, unsigned imm)
1320 {
1321    /* We get the low 12 bits as-is; 13th is replicated */
1322    if (((int)imm >> 12) == 0 || ((int)imm >> 12 == -1)) {
1323       return imm & 0x1fff;
1324    }
1325    return -1;
1326 }
1327 
1328 static int
uncompact_immediate(const struct intel_device_info * devinfo,enum elk_reg_type type,unsigned compact_imm)1329 uncompact_immediate(const struct intel_device_info *devinfo,
1330                     enum elk_reg_type type, unsigned compact_imm)
1331 {
1332    /* Replicate the 13th bit into the high 19 bits */
1333    return (int)(compact_imm << 19) >> 19;
1334 }
1335 
1336 static bool
has_immediate(const struct intel_device_info * devinfo,const elk_inst * inst,enum elk_reg_type * type)1337 has_immediate(const struct intel_device_info *devinfo, const elk_inst *inst,
1338               enum elk_reg_type *type)
1339 {
1340    if (elk_inst_src0_reg_file(devinfo, inst) == ELK_IMMEDIATE_VALUE) {
1341       *type = elk_inst_src0_type(devinfo, inst);
1342       return *type != INVALID_REG_TYPE;
1343    } else if (elk_inst_src1_reg_file(devinfo, inst) == ELK_IMMEDIATE_VALUE) {
1344       *type = elk_inst_src1_type(devinfo, inst);
1345       return *type != INVALID_REG_TYPE;
1346    }
1347 
1348    return false;
1349 }
1350 
1351 /**
1352  * Applies some small changes to instruction types to increase chances of
1353  * compaction.
1354  */
1355 static elk_inst
precompact(const struct elk_isa_info * isa,elk_inst inst)1356 precompact(const struct elk_isa_info *isa, elk_inst inst)
1357 {
1358    const struct intel_device_info *devinfo = isa->devinfo;
1359 
1360    if (elk_inst_src0_reg_file(devinfo, &inst) != ELK_IMMEDIATE_VALUE)
1361       return inst;
1362 
1363    /* The Bspec's section titled "Non-present Operands" claims that if src0
1364     * is an immediate that src1's type must be the same as that of src0.
1365     *
1366     * The SNB+ DataTypeIndex instruction compaction tables contain mappings
1367     * that do not follow this rule. E.g., from the IVB/HSW table:
1368     *
1369     *  DataTypeIndex   18-Bit Mapping       Mapped Meaning
1370     *        3         001000001011111101   r:f | i:vf | a:ud | <1> | dir |
1371     *
1372     * And from the SNB table:
1373     *
1374     *  DataTypeIndex   18-Bit Mapping       Mapped Meaning
1375     *        8         001000000111101100   a:w | i:w | a:ud | <1> | dir |
1376     *
1377     * Neither of these cause warnings from the simulator when used,
1378     * compacted or otherwise. In fact, all compaction mappings that have an
1379     * immediate in src0 use a:ud for src1.
1380     *
1381     * The GM45 instruction compaction tables do not contain mapped meanings
1382     * so it's not clear whether it has the restriction. We'll assume it was
1383     * lifted on SNB. (FINISHME: decode the GM45 tables and check.)
1384     *
1385     * Don't do any of this for 64-bit immediates, since the src1 fields
1386     * overlap with the immediate and setting them would overwrite the
1387     * immediate we set.
1388     */
1389    if (devinfo->ver >= 6 &&
1390        !(devinfo->platform == INTEL_PLATFORM_HSW &&
1391          elk_inst_opcode(isa, &inst) == ELK_OPCODE_DIM) &&
1392        !(devinfo->ver >= 8 &&
1393          (elk_inst_src0_type(devinfo, &inst) == ELK_REGISTER_TYPE_DF ||
1394           elk_inst_src0_type(devinfo, &inst) == ELK_REGISTER_TYPE_UQ ||
1395           elk_inst_src0_type(devinfo, &inst) == ELK_REGISTER_TYPE_Q))) {
1396       elk_inst_set_src1_reg_hw_type(devinfo, &inst, 0);
1397    }
1398 
1399    /* There are no mappings for dst:d | i:d, so if the immediate is suitable
1400     * set the types to :UD so the instruction can be compacted.
1401     */
1402    if (compact_immediate(devinfo, ELK_REGISTER_TYPE_D,
1403                          elk_inst_imm_ud(devinfo, &inst)) != -1 &&
1404        elk_inst_cond_modifier(devinfo, &inst) == ELK_CONDITIONAL_NONE &&
1405        elk_inst_src0_type(devinfo, &inst) == ELK_REGISTER_TYPE_D &&
1406        elk_inst_dst_type(devinfo, &inst) == ELK_REGISTER_TYPE_D) {
1407       enum elk_reg_file src_file = elk_inst_src0_reg_file(devinfo, &inst);
1408       enum elk_reg_file dst_file = elk_inst_dst_reg_file(devinfo, &inst);
1409 
1410       elk_inst_set_src0_file_type(devinfo, &inst, src_file, ELK_REGISTER_TYPE_UD);
1411       elk_inst_set_dst_file_type(devinfo, &inst, dst_file, ELK_REGISTER_TYPE_UD);
1412    }
1413 
1414    return inst;
1415 }
1416 
1417 /**
1418  * Tries to compact instruction src into dst.
1419  *
1420  * It doesn't modify dst unless src is compactable, which is relied on by
1421  * elk_compact_instructions().
1422  */
1423 static bool
try_compact_instruction(const struct compaction_state * c,elk_compact_inst * dst,const elk_inst * src)1424 try_compact_instruction(const struct compaction_state *c,
1425                         elk_compact_inst *dst, const elk_inst *src)
1426 {
1427    const struct intel_device_info *devinfo = c->isa->devinfo;
1428    elk_compact_inst temp;
1429 
1430    assert(elk_inst_cmpt_control(devinfo, src) == 0);
1431 
1432    if (elk_is_3src(c->isa, elk_inst_opcode(c->isa, src))) {
1433       if (devinfo->ver >= 8) {
1434          memset(&temp, 0, sizeof(temp));
1435          if (elk_try_compact_3src_instruction(c->isa, &temp, src)) {
1436             *dst = temp;
1437             return true;
1438          } else {
1439             return false;
1440          }
1441       } else {
1442          return false;
1443       }
1444    }
1445 
1446    enum elk_reg_type type;
1447    bool is_immediate = has_immediate(devinfo, src, &type);
1448 
1449    unsigned compacted_imm = 0;
1450 
1451    if (is_immediate) {
1452       /* Instructions with immediates cannot be compacted on Gen < 6 */
1453       if (devinfo->ver < 6)
1454          return false;
1455 
1456       compacted_imm = compact_immediate(devinfo, type,
1457                                         elk_inst_imm_ud(devinfo, src));
1458       if (compacted_imm == -1)
1459          return false;
1460    }
1461 
1462    if (has_unmapped_bits(c->isa, src))
1463       return false;
1464 
1465    memset(&temp, 0, sizeof(temp));
1466 
1467 #define compact(field) \
1468    elk_compact_inst_set_##field(devinfo, &temp, elk_inst_##field(devinfo, src))
1469 #define compact_reg(field) \
1470    elk_compact_inst_set_##field##_reg_nr(devinfo, &temp, \
1471                                        elk_inst_##field##_da_reg_nr(devinfo, src))
1472 
1473    compact(hw_opcode);
1474    compact(debug_control);
1475 
1476    if (!set_control_index(c, &temp, src))
1477       return false;
1478    if (!set_datatype_index(c, &temp, src, is_immediate))
1479       return false;
1480    if (!set_subreg_index(c, &temp, src, is_immediate))
1481       return false;
1482    if (!set_src0_index(c, &temp, src))
1483       return false;
1484    if (!set_src1_index(c, &temp, src, is_immediate, compacted_imm))
1485       return false;
1486 
1487    if (devinfo->ver >= 6) {
1488       compact(acc_wr_control);
1489    } else {
1490       compact(mask_control_ex);
1491    }
1492 
1493    if (devinfo->ver <= 6)
1494       compact(flag_subreg_nr);
1495 
1496    compact(cond_modifier);
1497 
1498    compact_reg(dst);
1499    compact_reg(src0);
1500 
1501    if (is_immediate) {
1502       /* src1 reg takes the low 8 bits (of the 13-bit compacted value) */
1503       elk_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm & 0xff);
1504    } else {
1505       compact_reg(src1);
1506    }
1507 
1508    elk_compact_inst_set_cmpt_control(devinfo, &temp, true);
1509 
1510 #undef compact
1511 #undef compact_reg
1512 
1513    *dst = temp;
1514 
1515    return true;
1516 }
1517 
1518 bool
elk_try_compact_instruction(const struct elk_isa_info * isa,elk_compact_inst * dst,const elk_inst * src)1519 elk_try_compact_instruction(const struct elk_isa_info *isa,
1520                             elk_compact_inst *dst, const elk_inst *src)
1521 {
1522    struct compaction_state c;
1523    compaction_state_init(&c, isa);
1524    return try_compact_instruction(&c, dst, src);
1525 }
1526 
1527 static void
set_uncompacted_control(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)1528 set_uncompacted_control(const struct compaction_state *c, elk_inst *dst,
1529                         elk_compact_inst *src)
1530 {
1531    const struct intel_device_info *devinfo = c->isa->devinfo;
1532    uint32_t uncompacted =
1533       c->control_index_table[elk_compact_inst_control_index(devinfo, src)];
1534 
1535    if (devinfo->ver >= 8) {
1536       elk_inst_set_bits(dst, 33, 31, (uncompacted >> 16));
1537       elk_inst_set_bits(dst, 23, 12, (uncompacted >>  4) & 0xfff);
1538       elk_inst_set_bits(dst, 10,  9, (uncompacted >>  2) & 0x3);
1539       elk_inst_set_bits(dst, 34, 34, (uncompacted >>  1) & 0x1);
1540       elk_inst_set_bits(dst,  8,  8, (uncompacted >>  0) & 0x1);
1541    } else {
1542       elk_inst_set_bits(dst, 31, 31, (uncompacted >> 16) & 0x1);
1543       elk_inst_set_bits(dst, 23,  8, (uncompacted & 0xffff));
1544 
1545       if (devinfo->ver == 7)
1546          elk_inst_set_bits(dst, 90, 89, uncompacted >> 17);
1547    }
1548 }
1549 
1550 static void
set_uncompacted_datatype(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)1551 set_uncompacted_datatype(const struct compaction_state *c, elk_inst *dst,
1552                          elk_compact_inst *src)
1553 {
1554    const struct intel_device_info *devinfo = c->isa->devinfo;
1555    uint32_t uncompacted =
1556       c->datatype_table[elk_compact_inst_datatype_index(devinfo, src)];
1557 
1558    if (devinfo->ver >= 8) {
1559       elk_inst_set_bits(dst, 63, 61, (uncompacted >> 18));
1560       elk_inst_set_bits(dst, 94, 89, (uncompacted >> 12) & 0x3f);
1561       elk_inst_set_bits(dst, 46, 35, (uncompacted >>  0) & 0xfff);
1562    } else {
1563       elk_inst_set_bits(dst, 63, 61, (uncompacted >> 15));
1564       elk_inst_set_bits(dst, 46, 32, (uncompacted & 0x7fff));
1565    }
1566 }
1567 
1568 static void
set_uncompacted_subreg(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)1569 set_uncompacted_subreg(const struct compaction_state *c, elk_inst *dst,
1570                        elk_compact_inst *src)
1571 {
1572    const struct intel_device_info *devinfo = c->isa->devinfo;
1573    uint16_t uncompacted =
1574       c->subreg_table[elk_compact_inst_subreg_index(devinfo, src)];
1575 
1576    elk_inst_set_bits(dst, 100, 96, (uncompacted >> 10));
1577    elk_inst_set_bits(dst,  68, 64, (uncompacted >>  5) & 0x1f);
1578    elk_inst_set_bits(dst,  52, 48, (uncompacted >>  0) & 0x1f);
1579 }
1580 
1581 static void
set_uncompacted_src0(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)1582 set_uncompacted_src0(const struct compaction_state *c, elk_inst *dst,
1583                      elk_compact_inst *src)
1584 {
1585    const struct intel_device_info *devinfo = c->isa->devinfo;
1586    uint32_t compacted = elk_compact_inst_src0_index(devinfo, src);
1587    uint16_t uncompacted = c->src0_index_table[compacted];
1588 
1589    elk_inst_set_bits(dst, 88, 77, uncompacted);
1590 }
1591 
1592 static void
set_uncompacted_src1(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)1593 set_uncompacted_src1(const struct compaction_state *c, elk_inst *dst,
1594                      elk_compact_inst *src)
1595 {
1596    const struct intel_device_info *devinfo = c->isa->devinfo;
1597    uint16_t uncompacted =
1598       c->src1_index_table[elk_compact_inst_src1_index(devinfo, src)];
1599 
1600    elk_inst_set_bits(dst, 120, 109, uncompacted);
1601 }
1602 
1603 static void
set_uncompacted_3src_control_index(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)1604 set_uncompacted_3src_control_index(const struct compaction_state *c,
1605                                    elk_inst *dst, elk_compact_inst *src)
1606 {
1607    const struct intel_device_info *devinfo = c->isa->devinfo;
1608    assert(devinfo->ver >= 8);
1609 
1610    uint32_t compacted = elk_compact_inst_3src_control_index(devinfo, src);
1611    uint32_t uncompacted = gfx8_3src_control_index_table[compacted];
1612 
1613    elk_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
1614    elk_inst_set_bits(dst, 28,  8, (uncompacted >>  0) & 0x1fffff);
1615 
1616    if (devinfo->platform == INTEL_PLATFORM_CHV)
1617       elk_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3);
1618 }
1619 
1620 static void
set_uncompacted_3src_source_index(const struct intel_device_info * devinfo,elk_inst * dst,elk_compact_inst * src)1621 set_uncompacted_3src_source_index(const struct intel_device_info *devinfo,
1622                                   elk_inst *dst, elk_compact_inst *src)
1623 {
1624    assert(devinfo->ver >= 8);
1625 
1626    uint32_t compacted = elk_compact_inst_3src_source_index(devinfo, src);
1627    uint64_t uncompacted = gfx8_3src_source_index_table[compacted];
1628 
1629    elk_inst_set_bits(dst,  83,  83, (uncompacted >> 43) & 0x1);
1630    elk_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff);
1631    elk_inst_set_bits(dst,  93,  86, (uncompacted >> 27) & 0xff);
1632    elk_inst_set_bits(dst,  72,  65, (uncompacted >> 19) & 0xff);
1633    elk_inst_set_bits(dst,  55,  37, (uncompacted >>  0) & 0x7ffff);
1634 
1635    if (devinfo->platform == INTEL_PLATFORM_CHV) {
1636       elk_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3);
1637       elk_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3);
1638       elk_inst_set_bits(dst,  84,  84, (uncompacted >> 44) & 0x1);
1639    } else {
1640       elk_inst_set_bits(dst, 125, 125, (uncompacted >> 45) & 0x1);
1641       elk_inst_set_bits(dst, 104, 104, (uncompacted >> 44) & 0x1);
1642    }
1643 }
1644 
1645 static void
elk_uncompact_3src_instruction(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)1646 elk_uncompact_3src_instruction(const struct compaction_state *c,
1647                                elk_inst *dst, elk_compact_inst *src)
1648 {
1649    const struct intel_device_info *devinfo = c->isa->devinfo;
1650    assert(devinfo->ver >= 8);
1651 
1652 #define uncompact(field) \
1653    elk_inst_set_3src_##field(devinfo, dst, elk_compact_inst_3src_##field(devinfo, src))
1654 #define uncompact_a16(field) \
1655    elk_inst_set_3src_a16_##field(devinfo, dst, elk_compact_inst_3src_##field(devinfo, src))
1656 
1657    uncompact(hw_opcode);
1658 
1659    set_uncompacted_3src_control_index(c, dst, src);
1660    set_uncompacted_3src_source_index(devinfo, dst, src);
1661 
1662    uncompact(dst_reg_nr);
1663    uncompact_a16(src0_rep_ctrl);
1664    uncompact(debug_control);
1665    uncompact(saturate);
1666    uncompact_a16(src1_rep_ctrl);
1667    uncompact_a16(src2_rep_ctrl);
1668    uncompact(src0_reg_nr);
1669    uncompact(src1_reg_nr);
1670    uncompact(src2_reg_nr);
1671    uncompact_a16(src0_subreg_nr);
1672    uncompact_a16(src1_subreg_nr);
1673    uncompact_a16(src2_subreg_nr);
1674 
1675    elk_inst_set_3src_cmpt_control(devinfo, dst, false);
1676 
1677 #undef uncompact
1678 #undef uncompact_a16
1679 }
1680 
1681 static void
uncompact_instruction(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)1682 uncompact_instruction(const struct compaction_state *c, elk_inst *dst,
1683                       elk_compact_inst *src)
1684 {
1685    const struct intel_device_info *devinfo = c->isa->devinfo;
1686    memset(dst, 0, sizeof(*dst));
1687 
1688    if (devinfo->ver >= 8) {
1689       const enum elk_opcode opcode =
1690          elk_opcode_decode(c->isa, elk_compact_inst_3src_hw_opcode(devinfo, src));
1691       if (elk_is_3src(c->isa, opcode)) {
1692          elk_uncompact_3src_instruction(c, dst, src);
1693          return;
1694       }
1695    }
1696 
1697 #define uncompact(field) \
1698    elk_inst_set_##field(devinfo, dst, elk_compact_inst_##field(devinfo, src))
1699 #define uncompact_reg(field) \
1700    elk_inst_set_##field##_da_reg_nr(devinfo, dst, \
1701                                     elk_compact_inst_##field##_reg_nr(devinfo, src))
1702 
1703    uncompact(hw_opcode);
1704    uncompact(debug_control);
1705 
1706    set_uncompacted_control(c, dst, src);
1707    set_uncompacted_datatype(c, dst, src);
1708    set_uncompacted_subreg(c, dst, src);
1709    set_uncompacted_src0(c, dst, src);
1710 
1711    enum elk_reg_type type;
1712    if (has_immediate(devinfo, dst, &type)) {
1713       unsigned imm = uncompact_immediate(devinfo, type,
1714                                          elk_compact_inst_imm(devinfo, src));
1715       elk_inst_set_imm_ud(devinfo, dst, imm);
1716    } else {
1717       set_uncompacted_src1(c, dst, src);
1718       uncompact_reg(src1);
1719    }
1720 
1721    if (devinfo->ver >= 6) {
1722       uncompact(acc_wr_control);
1723    } else {
1724       uncompact(mask_control_ex);
1725    }
1726 
1727    uncompact(cond_modifier);
1728 
1729    if (devinfo->ver <= 6)
1730       uncompact(flag_subreg_nr);
1731 
1732    uncompact_reg(dst);
1733    uncompact_reg(src0);
1734 
1735    elk_inst_set_cmpt_control(devinfo, dst, false);
1736 
1737 #undef uncompact
1738 #undef uncompact_reg
1739 }
1740 
1741 void
elk_uncompact_instruction(const struct elk_isa_info * isa,elk_inst * dst,elk_compact_inst * src)1742 elk_uncompact_instruction(const struct elk_isa_info *isa,
1743                           elk_inst *dst, elk_compact_inst *src)
1744 {
1745    struct compaction_state c;
1746    compaction_state_init(&c, isa);
1747    uncompact_instruction(&c, dst, src);
1748 }
1749 
1750 void
elk_debug_compact_uncompact(const struct elk_isa_info * isa,elk_inst * orig,elk_inst * uncompacted)1751 elk_debug_compact_uncompact(const struct elk_isa_info *isa,
1752                             elk_inst *orig,
1753                             elk_inst *uncompacted)
1754 {
1755    fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
1756            isa->devinfo->ver);
1757 
1758    fprintf(stderr, "  before: ");
1759    elk_disassemble_inst(stderr, isa, orig, true, 0, NULL);
1760 
1761    fprintf(stderr, "  after:  ");
1762    elk_disassemble_inst(stderr, isa, uncompacted, false, 0, NULL);
1763 
1764    uint32_t *before_bits = (uint32_t *)orig;
1765    uint32_t *after_bits = (uint32_t *)uncompacted;
1766    fprintf(stderr, "  changed bits:\n");
1767    for (int i = 0; i < 128; i++) {
1768       uint32_t before = before_bits[i / 32] & (1 << (i & 31));
1769       uint32_t after = after_bits[i / 32] & (1 << (i & 31));
1770 
1771       if (before != after) {
1772          fprintf(stderr, "  bit %d, %s to %s\n", i,
1773                  before ? "set" : "unset",
1774                  after ? "set" : "unset");
1775       }
1776    }
1777 }
1778 
1779 static int
compacted_between(int old_ip,int old_target_ip,int * compacted_counts)1780 compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
1781 {
1782    int this_compacted_count = compacted_counts[old_ip];
1783    int target_compacted_count = compacted_counts[old_target_ip];
1784    return target_compacted_count - this_compacted_count;
1785 }
1786 
1787 static void
update_uip_jip(const struct elk_isa_info * isa,elk_inst * insn,int this_old_ip,int * compacted_counts)1788 update_uip_jip(const struct elk_isa_info *isa, elk_inst *insn,
1789                int this_old_ip, int *compacted_counts)
1790 {
1791    const struct intel_device_info *devinfo = isa->devinfo;
1792 
1793    /* JIP and UIP are in units of:
1794     *    - bytes on Gfx8+; and
1795     *    - compacted instructions on Gfx6+.
1796     */
1797    int shift = devinfo->ver >= 8 ? 3 : 0;
1798 
1799    /* Even though the values are signed, we don't need the rounding behavior
1800     * of integer division. The shifts are safe.
1801     */
1802    if (devinfo->ver >= 8) {
1803       assert(elk_inst_jip(devinfo, insn) % 8 == 0 &&
1804              elk_inst_uip(devinfo, insn) % 8 == 0);
1805    }
1806 
1807    int32_t jip_compacted = elk_inst_jip(devinfo, insn) >> shift;
1808    jip_compacted -= compacted_between(this_old_ip,
1809                                       this_old_ip + (jip_compacted / 2),
1810                                       compacted_counts);
1811    elk_inst_set_jip(devinfo, insn, (uint32_t)jip_compacted << shift);
1812 
1813    if (elk_inst_opcode(isa, insn) == ELK_OPCODE_ENDIF ||
1814        elk_inst_opcode(isa, insn) == ELK_OPCODE_WHILE ||
1815        (elk_inst_opcode(isa, insn) == ELK_OPCODE_ELSE && devinfo->ver <= 7))
1816       return;
1817 
1818    int32_t uip_compacted = elk_inst_uip(devinfo, insn) >> shift;
1819    uip_compacted -= compacted_between(this_old_ip,
1820                                       this_old_ip + (uip_compacted / 2),
1821                                       compacted_counts);
1822    elk_inst_set_uip(devinfo, insn, (uint32_t)uip_compacted << shift);
1823 }
1824 
1825 static void
update_gfx4_jump_count(const struct intel_device_info * devinfo,elk_inst * insn,int this_old_ip,int * compacted_counts)1826 update_gfx4_jump_count(const struct intel_device_info *devinfo, elk_inst *insn,
1827                        int this_old_ip, int *compacted_counts)
1828 {
1829    assert(devinfo->ver == 5 || devinfo->platform == INTEL_PLATFORM_G4X);
1830 
1831    /* Jump Count is in units of:
1832     *    - uncompacted instructions on G45; and
1833     *    - compacted instructions on Gfx5.
1834     */
1835    int shift = devinfo->platform == INTEL_PLATFORM_G4X ? 1 : 0;
1836 
1837    int jump_count_compacted = elk_inst_gfx4_jump_count(devinfo, insn) << shift;
1838 
1839    int target_old_ip = this_old_ip + (jump_count_compacted / 2);
1840 
1841    int this_compacted_count = compacted_counts[this_old_ip];
1842    int target_compacted_count = compacted_counts[target_old_ip];
1843 
1844    jump_count_compacted -= (target_compacted_count - this_compacted_count);
1845    elk_inst_set_gfx4_jump_count(devinfo, insn, jump_count_compacted >> shift);
1846 }
1847 
1848 static void
compaction_state_init(struct compaction_state * c,const struct elk_isa_info * isa)1849 compaction_state_init(struct compaction_state *c,
1850                       const struct elk_isa_info *isa)
1851 {
1852    const struct intel_device_info *devinfo = isa->devinfo;
1853 
1854    assert(g45_control_index_table[ARRAY_SIZE(g45_control_index_table) - 1] != 0);
1855    assert(g45_datatype_table[ARRAY_SIZE(g45_datatype_table) - 1] != 0);
1856    assert(g45_subreg_table[ARRAY_SIZE(g45_subreg_table) - 1] != 0);
1857    assert(g45_src_index_table[ARRAY_SIZE(g45_src_index_table) - 1] != 0);
1858    assert(gfx6_control_index_table[ARRAY_SIZE(gfx6_control_index_table) - 1] != 0);
1859    assert(gfx6_datatype_table[ARRAY_SIZE(gfx6_datatype_table) - 1] != 0);
1860    assert(gfx6_subreg_table[ARRAY_SIZE(gfx6_subreg_table) - 1] != 0);
1861    assert(gfx6_src_index_table[ARRAY_SIZE(gfx6_src_index_table) - 1] != 0);
1862    assert(gfx7_control_index_table[ARRAY_SIZE(gfx7_control_index_table) - 1] != 0);
1863    assert(gfx7_datatype_table[ARRAY_SIZE(gfx7_datatype_table) - 1] != 0);
1864    assert(gfx7_subreg_table[ARRAY_SIZE(gfx7_subreg_table) - 1] != 0);
1865    assert(gfx7_src_index_table[ARRAY_SIZE(gfx7_src_index_table) - 1] != 0);
1866    assert(gfx8_control_index_table[ARRAY_SIZE(gfx8_control_index_table) - 1] != 0);
1867    assert(gfx8_datatype_table[ARRAY_SIZE(gfx8_datatype_table) - 1] != 0);
1868    assert(gfx8_subreg_table[ARRAY_SIZE(gfx8_subreg_table) - 1] != 0);
1869    assert(gfx8_src_index_table[ARRAY_SIZE(gfx8_src_index_table) - 1] != 0);
1870    assert(gfx11_datatype_table[ARRAY_SIZE(gfx11_datatype_table) - 1] != 0);
1871    assert(gfx12_control_index_table[ARRAY_SIZE(gfx12_control_index_table) - 1] != 0);
1872    assert(gfx12_datatype_table[ARRAY_SIZE(gfx12_datatype_table) - 1] != 0);
1873    assert(gfx12_subreg_table[ARRAY_SIZE(gfx12_subreg_table) - 1] != 0);
1874    assert(gfx12_src0_index_table[ARRAY_SIZE(gfx12_src0_index_table) - 1] != 0);
1875    assert(gfx12_src1_index_table[ARRAY_SIZE(gfx12_src1_index_table) - 1] != 0);
1876    assert(xehp_src0_index_table[ARRAY_SIZE(xehp_src0_index_table) - 1] != 0);
1877    assert(xehp_src1_index_table[ARRAY_SIZE(xehp_src1_index_table) - 1] != 0);
1878    assert(xe2_control_index_table[ARRAY_SIZE(xe2_control_index_table) - 1] != 0);
1879    assert(xe2_datatype_table[ARRAY_SIZE(xe2_datatype_table) - 1] != 0);
1880    assert(xe2_subreg_table[ARRAY_SIZE(xe2_subreg_table) - 1] != 0);
1881    assert(xe2_src0_index_table[ARRAY_SIZE(xe2_src0_index_table) - 1] != 0);
1882    assert(xe2_src1_index_table[ARRAY_SIZE(xe2_src1_index_table) - 1] != 0);
1883 
1884    c->isa = isa;
1885    switch (devinfo->ver) {
1886    case 8:
1887       c->control_index_table = gfx8_control_index_table;
1888       c->datatype_table = gfx8_datatype_table;
1889       c->subreg_table = gfx8_subreg_table;
1890       c->src0_index_table = gfx8_src_index_table;
1891       c->src1_index_table = gfx8_src_index_table;
1892       break;
1893    case 7:
1894       c->control_index_table = gfx7_control_index_table;
1895       c->datatype_table = gfx7_datatype_table;
1896       c->subreg_table = gfx7_subreg_table;
1897       c->src0_index_table = gfx7_src_index_table;
1898       c->src1_index_table = gfx7_src_index_table;
1899       break;
1900    case 6:
1901       c->control_index_table = gfx6_control_index_table;
1902       c->datatype_table = gfx6_datatype_table;
1903       c->subreg_table = gfx6_subreg_table;
1904       c->src0_index_table = gfx6_src_index_table;
1905       c->src1_index_table = gfx6_src_index_table;
1906       break;
1907    case 5:
1908    case 4:
1909       c->control_index_table = g45_control_index_table;
1910       c->datatype_table = g45_datatype_table;
1911       c->subreg_table = g45_subreg_table;
1912       c->src0_index_table = g45_src_index_table;
1913       c->src1_index_table = g45_src_index_table;
1914       break;
1915    default:
1916       unreachable("unknown generation");
1917    }
1918 }
1919 
1920 void
elk_compact_instructions(struct elk_codegen * p,int start_offset,struct elk_disasm_info * disasm)1921 elk_compact_instructions(struct elk_codegen *p, int start_offset,
1922                          struct elk_disasm_info *disasm)
1923 {
1924    if (INTEL_DEBUG(DEBUG_NO_COMPACTION))
1925       return;
1926 
1927    const struct intel_device_info *devinfo = p->devinfo;
1928    if (devinfo->ver == 4 && devinfo->platform != INTEL_PLATFORM_G4X)
1929       return;
1930 
1931    void *store = p->store + start_offset / 16;
1932    /* For an instruction at byte offset 16*i before compaction, this is the
1933     * number of compacted instructions minus the number of padding NOP/NENOPs
1934     * that preceded it.
1935     */
1936    unsigned num_compacted_counts =
1937       (p->next_insn_offset - start_offset) / sizeof(elk_inst);
1938    int *compacted_counts =
1939       calloc(1, sizeof(*compacted_counts) * num_compacted_counts);
1940 
1941    /* For an instruction at byte offset 8*i after compaction, this was its IP
1942     * (in 16-byte units) before compaction.
1943     */
1944    unsigned num_old_ip =
1945       (p->next_insn_offset - start_offset) / sizeof(elk_compact_inst) + 1;
1946    int *old_ip = calloc(1, sizeof(*old_ip) * num_old_ip);
1947 
1948    struct compaction_state c;
1949    compaction_state_init(&c, p->isa);
1950 
1951    int offset = 0;
1952    int compacted_count = 0;
1953    for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset;
1954         src_offset += sizeof(elk_inst)) {
1955       elk_inst *src = store + src_offset;
1956       void *dst = store + offset;
1957 
1958       old_ip[offset / sizeof(elk_compact_inst)] = src_offset / sizeof(elk_inst);
1959       compacted_counts[src_offset / sizeof(elk_inst)] = compacted_count;
1960 
1961       elk_inst inst = precompact(p->isa, *src);
1962       elk_inst saved = inst;
1963 
1964       if (try_compact_instruction(&c, dst, &inst)) {
1965          compacted_count++;
1966 
1967          if (INTEL_DEBUG(DEBUG_VS | DEBUG_GS | DEBUG_TCS |
1968                          DEBUG_WM | DEBUG_CS | DEBUG_TES)) {
1969             elk_inst uncompacted;
1970             uncompact_instruction(&c, &uncompacted, dst);
1971             if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
1972                elk_debug_compact_uncompact(p->isa, &saved, &uncompacted);
1973             }
1974          }
1975 
1976          offset += sizeof(elk_compact_inst);
1977       } else {
1978          /* All uncompacted instructions need to be aligned on G45. */
1979          if ((offset & sizeof(elk_compact_inst)) != 0 &&
1980              devinfo->platform == INTEL_PLATFORM_G4X) {
1981             elk_compact_inst *align = store + offset;
1982             memset(align, 0, sizeof(*align));
1983             elk_compact_inst_set_hw_opcode(
1984                devinfo, align, elk_opcode_encode(p->isa, ELK_OPCODE_NENOP));
1985             elk_compact_inst_set_cmpt_control(devinfo, align, true);
1986             offset += sizeof(elk_compact_inst);
1987             compacted_count--;
1988             compacted_counts[src_offset / sizeof(elk_inst)] = compacted_count;
1989             old_ip[offset / sizeof(elk_compact_inst)] = src_offset / sizeof(elk_inst);
1990 
1991             dst = store + offset;
1992          }
1993 
1994          /* If we didn't compact this instruction, we need to move it down into
1995           * place.
1996           */
1997          if (offset != src_offset) {
1998             memmove(dst, src, sizeof(elk_inst));
1999          }
2000          offset += sizeof(elk_inst);
2001       }
2002    }
2003 
2004    /* Add an entry for the ending offset of the program. This greatly
2005     * simplifies the linked list walk at the end of the function.
2006     */
2007    old_ip[offset / sizeof(elk_compact_inst)] =
2008       (p->next_insn_offset - start_offset) / sizeof(elk_inst);
2009 
2010    /* Fix up control flow offsets. */
2011    p->next_insn_offset = start_offset + offset;
2012    for (offset = 0; offset < p->next_insn_offset - start_offset;
2013         offset = next_offset(devinfo, store, offset)) {
2014       elk_inst *insn = store + offset;
2015       int this_old_ip = old_ip[offset / sizeof(elk_compact_inst)];
2016       int this_compacted_count = compacted_counts[this_old_ip];
2017 
2018       switch (elk_inst_opcode(p->isa, insn)) {
2019       case ELK_OPCODE_BREAK:
2020       case ELK_OPCODE_CONTINUE:
2021       case ELK_OPCODE_HALT:
2022          if (devinfo->ver >= 6) {
2023             update_uip_jip(p->isa, insn, this_old_ip, compacted_counts);
2024          } else {
2025             update_gfx4_jump_count(devinfo, insn, this_old_ip,
2026                                    compacted_counts);
2027          }
2028          break;
2029 
2030       case ELK_OPCODE_IF:
2031       case ELK_OPCODE_IFF:
2032       case ELK_OPCODE_ELSE:
2033       case ELK_OPCODE_ENDIF:
2034       case ELK_OPCODE_WHILE:
2035          if (devinfo->ver >= 7) {
2036             if (elk_inst_cmpt_control(devinfo, insn)) {
2037                elk_inst uncompacted;
2038                uncompact_instruction(&c, &uncompacted,
2039                                      (elk_compact_inst *)insn);
2040 
2041                update_uip_jip(p->isa, &uncompacted, this_old_ip,
2042                               compacted_counts);
2043 
2044                bool ret = try_compact_instruction(&c, (elk_compact_inst *)insn,
2045                                                   &uncompacted);
2046                assert(ret); (void)ret;
2047             } else {
2048                update_uip_jip(p->isa, insn, this_old_ip, compacted_counts);
2049             }
2050          } else if (devinfo->ver == 6) {
2051             assert(!elk_inst_cmpt_control(devinfo, insn));
2052 
2053             /* Jump Count is in units of compacted instructions on Gfx6. */
2054             int jump_count_compacted = elk_inst_gfx6_jump_count(devinfo, insn);
2055 
2056             int target_old_ip = this_old_ip + (jump_count_compacted / 2);
2057             int target_compacted_count = compacted_counts[target_old_ip];
2058             jump_count_compacted -= (target_compacted_count - this_compacted_count);
2059             elk_inst_set_gfx6_jump_count(devinfo, insn, jump_count_compacted);
2060          } else {
2061             update_gfx4_jump_count(devinfo, insn, this_old_ip,
2062                                    compacted_counts);
2063          }
2064          break;
2065 
2066       case ELK_OPCODE_ADD:
2067          /* Add instructions modifying the IP register use an immediate src1,
2068           * and Gens that use this cannot compact instructions with immediate
2069           * operands.
2070           */
2071          if (elk_inst_cmpt_control(devinfo, insn))
2072             break;
2073 
2074          if (elk_inst_dst_reg_file(devinfo, insn) == ELK_ARCHITECTURE_REGISTER_FILE &&
2075              elk_inst_dst_da_reg_nr(devinfo, insn) == ELK_ARF_IP) {
2076             assert(elk_inst_src1_reg_file(devinfo, insn) == ELK_IMMEDIATE_VALUE);
2077 
2078             int shift = 3;
2079             int jump_compacted = elk_inst_imm_d(devinfo, insn) >> shift;
2080 
2081             int target_old_ip = this_old_ip + (jump_compacted / 2);
2082             int target_compacted_count = compacted_counts[target_old_ip];
2083             jump_compacted -= (target_compacted_count - this_compacted_count);
2084             elk_inst_set_imm_ud(devinfo, insn, jump_compacted << shift);
2085          }
2086          break;
2087 
2088       default:
2089          break;
2090       }
2091    }
2092 
2093    /* p->nr_insn is counting the number of uncompacted instructions still, so
2094     * divide.  We do want to be sure there's a valid instruction in any
2095     * alignment padding, so that the next compression pass (for the FS 8/16
2096     * compile passes) parses correctly.
2097     */
2098    if (p->next_insn_offset & sizeof(elk_compact_inst)) {
2099       elk_compact_inst *align = store + offset;
2100       memset(align, 0, sizeof(*align));
2101       elk_compact_inst_set_hw_opcode(
2102          devinfo, align, elk_opcode_encode(p->isa, ELK_OPCODE_NOP));
2103       elk_compact_inst_set_cmpt_control(devinfo, align, true);
2104       p->next_insn_offset += sizeof(elk_compact_inst);
2105    }
2106    p->nr_insn = p->next_insn_offset / sizeof(elk_inst);
2107 
2108    for (int i = 0; i < p->num_relocs; i++) {
2109       if (p->relocs[i].offset < (uint32_t)start_offset)
2110          continue;
2111 
2112       assert(p->relocs[i].offset % 16 == 0);
2113       unsigned idx = (p->relocs[i].offset - start_offset) / 16;
2114       p->relocs[i].offset -= compacted_counts[idx] * 8;
2115    }
2116 
2117    /* Update the instruction offsets for each group. */
2118    if (disasm) {
2119       int offset = 0;
2120 
2121       foreach_list_typed(struct inst_group, group, link, &disasm->group_list) {
2122          while (start_offset + old_ip[offset / sizeof(elk_compact_inst)] *
2123                 sizeof(elk_inst) != group->offset) {
2124             assert(start_offset + old_ip[offset / sizeof(elk_compact_inst)] *
2125                    sizeof(elk_inst) < group->offset);
2126             offset = next_offset(devinfo, store, offset);
2127          }
2128 
2129          group->offset = start_offset + offset;
2130 
2131          offset = next_offset(devinfo, store, offset);
2132       }
2133    }
2134 
2135    free(compacted_counts);
2136    free(old_ip);
2137 }
2138