1 /*
2 * Copyright © 2012-2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file elk_eu_compact.c
25 *
26 * Instruction compaction is a feature of G45 and newer hardware that allows
27 * for a smaller instruction encoding.
28 *
29 * The instruction cache is on the order of 32KB, and many programs generate
30 * far more instructions than that. The instruction cache is built to barely
31 * keep up with instruction dispatch ability in cache hit cases -- L1
32 * instruction cache misses that still hit in the next level could limit
33 * throughput by around 50%.
34 *
35 * The idea of instruction compaction is that most instructions use a tiny
36 * subset of the GPU functionality, so we can encode what would be a 16 byte
37 * instruction in 8 bytes using some lookup tables for various fields.
38 *
39 *
40 * Instruction compaction capabilities vary subtly by generation.
41 *
42 * G45's support for instruction compaction is very limited. Jump counts on
43 * this generation are in units of 16-byte uncompacted instructions. As such,
44 * all jump targets must be 16-byte aligned. Also, all instructions must be
45 * naturally aligned, i.e. uncompacted instructions must be 16-byte aligned.
46 * A G45-only instruction, NENOP, must be used to provide padding to align
47 * uncompacted instructions.
48 *
49 * Gfx5 removes these restrictions and changes jump counts to be in units of
50 * 8-byte compacted instructions, allowing jump targets to be only 8-byte
51 * aligned. Uncompacted instructions can also be placed on 8-byte boundaries.
52 *
53 * Gfx6 adds the ability to compact instructions with a limited range of
54 * immediate values. Compactable immediates have 12 unrestricted bits, and a
55 * 13th bit that's replicated through the high 20 bits, to create the 32-bit
56 * value of DW3 in the uncompacted instruction word.
57 *
58 * On Gfx7 we can compact some control flow instructions with a small positive
59 * immediate in the low bits of DW3, like ENDIF with the JIP field. Other
60 * control flow instructions with UIP cannot be compacted, because of the
61 * replicated 13th bit. No control flow instructions can be compacted on Gfx6
62 * since the jump count field is not in DW3.
63 *
64 * break JIP/UIP
65 * cont JIP/UIP
66 * halt JIP/UIP
67 * if JIP/UIP
68 * else JIP (plus UIP on BDW+)
69 * endif JIP
70 * while JIP (must be negative)
71 *
72 * Gen 8 adds support for compacting 3-src instructions.
73 *
74 * Gfx12 reduces the number of bits that available to compacted immediates from
75 * 13 to 12, but improves the compaction of floating-point immediates by
76 * allowing the high bits to be encoded (the sign, 8-bit exponent, and the
77 * three most significant bits of the mantissa), rather than the lowest bits of
78 * the mantissa.
79 */
80
81 #include "elk_eu.h"
82 #include "elk_disasm.h"
83 #include "elk_shader.h"
84 #include "elk_disasm_info.h"
85 #include "dev/intel_debug.h"
86
87 static const uint32_t g45_control_index_table[32] = {
88 0b00000000000000000,
89 0b01000000000000000,
90 0b00110000000000000,
91 0b00000000000000010,
92 0b00100000000000000,
93 0b00010000000000000,
94 0b01000000000100000,
95 0b01000000100000000,
96 0b01010000000100000,
97 0b00000000100000010,
98 0b11000000000000000,
99 0b00001000100000010,
100 0b01001000100000000,
101 0b00000000100000000,
102 0b11000000000100000,
103 0b00001000100000000,
104 0b10110000000000000,
105 0b11010000000100000,
106 0b00110000100000000,
107 0b00100000100000000,
108 0b01000000000001000,
109 0b01000000000000100,
110 0b00111100000000000,
111 0b00101011000000000,
112 0b00110000000010000,
113 0b00010000100000000,
114 0b01000000000100100,
115 0b01000000000101000,
116 0b00110000000000110,
117 0b00000000000001010,
118 0b01010000000101000,
119 0b01010000000100100,
120 };
121
122 static const uint32_t g45_datatype_table[32] = {
123 0b001000000000100001,
124 0b001011010110101101,
125 0b001000001000110001,
126 0b001111011110111101,
127 0b001011010110101100,
128 0b001000000110101101,
129 0b001000000000100000,
130 0b010100010110110001,
131 0b001100011000101101,
132 0b001000000000100010,
133 0b001000001000110110,
134 0b010000001000110001,
135 0b001000001000110010,
136 0b011000001000110010,
137 0b001111011110111100,
138 0b001000000100101000,
139 0b010100011000110001,
140 0b001010010100101001,
141 0b001000001000101001,
142 0b010000001000110110,
143 0b101000001000110001,
144 0b001011011000101101,
145 0b001000000100001001,
146 0b001011011000101100,
147 0b110100011000110001,
148 0b001000001110111101,
149 0b110000001000110001,
150 0b011000000100101010,
151 0b101000001000101001,
152 0b001011010110001100,
153 0b001000000110100001,
154 0b001010010100001000,
155 };
156
157 static const uint16_t g45_subreg_table[32] = {
158 0b000000000000000,
159 0b000000010000000,
160 0b000001000000000,
161 0b000100000000000,
162 0b000000000100000,
163 0b100000000000000,
164 0b000000000010000,
165 0b001100000000000,
166 0b001010000000000,
167 0b000000100000000,
168 0b001000000000000,
169 0b000000000001000,
170 0b000000001000000,
171 0b000000000000001,
172 0b000010000000000,
173 0b000000010100000,
174 0b000000000000111,
175 0b000001000100000,
176 0b011000000000000,
177 0b000000110000000,
178 0b000000000000010,
179 0b000000000000100,
180 0b000000001100000,
181 0b000100000000010,
182 0b001110011000110,
183 0b001110100001000,
184 0b000110011000110,
185 0b000001000011000,
186 0b000110010000100,
187 0b001100000000110,
188 0b000000010000110,
189 0b000001000110000,
190 };
191
192 static const uint16_t g45_src_index_table[32] = {
193 0b000000000000,
194 0b010001101000,
195 0b010110001000,
196 0b011010010000,
197 0b001101001000,
198 0b010110001010,
199 0b010101110000,
200 0b011001111000,
201 0b001000101000,
202 0b000000101000,
203 0b010001010000,
204 0b111101101100,
205 0b010110001100,
206 0b010001101100,
207 0b011010010100,
208 0b010001001100,
209 0b001100101000,
210 0b000000000010,
211 0b111101001100,
212 0b011001101000,
213 0b010101001000,
214 0b000000000100,
215 0b000000101100,
216 0b010001101010,
217 0b000000111000,
218 0b010101011000,
219 0b000100100000,
220 0b010110000000,
221 0b010000000100,
222 0b010000111000,
223 0b000101100000,
224 0b111101110100,
225 };
226
227 static const uint32_t gfx6_control_index_table[32] = {
228 0b00000000000000000,
229 0b01000000000000000,
230 0b00110000000000000,
231 0b00000000100000000,
232 0b00010000000000000,
233 0b00001000100000000,
234 0b00000000100000010,
235 0b00000000000000010,
236 0b01000000100000000,
237 0b01010000000000000,
238 0b10110000000000000,
239 0b00100000000000000,
240 0b11010000000000000,
241 0b11000000000000000,
242 0b01001000100000000,
243 0b01000000000001000,
244 0b01000000000000100,
245 0b00000000000001000,
246 0b00000000000000100,
247 0b00111000100000000,
248 0b00001000100000010,
249 0b00110000100000000,
250 0b00110000000000001,
251 0b00100000000000001,
252 0b00110000000000010,
253 0b00110000000000101,
254 0b00110000000001001,
255 0b00110000000010000,
256 0b00110000000000011,
257 0b00110000000000100,
258 0b00110000100001000,
259 0b00100000000001001,
260 };
261
262 static const uint32_t gfx6_datatype_table[32] = {
263 0b001001110000000000,
264 0b001000110000100000,
265 0b001001110000000001,
266 0b001000000001100000,
267 0b001010110100101001,
268 0b001000000110101101,
269 0b001100011000101100,
270 0b001011110110101101,
271 0b001000000111101100,
272 0b001000000001100001,
273 0b001000110010100101,
274 0b001000000001000001,
275 0b001000001000110001,
276 0b001000001000101001,
277 0b001000000000100000,
278 0b001000001000110010,
279 0b001010010100101001,
280 0b001011010010100101,
281 0b001000000110100101,
282 0b001100011000101001,
283 0b001011011000101100,
284 0b001011010110100101,
285 0b001011110110100101,
286 0b001111011110111101,
287 0b001111011110111100,
288 0b001111011110111101,
289 0b001111011110011101,
290 0b001111011110111110,
291 0b001000000000100001,
292 0b001000000000100010,
293 0b001001111111011101,
294 0b001000001110111110,
295 };
296
297 static const uint16_t gfx6_subreg_table[32] = {
298 0b000000000000000,
299 0b000000000000100,
300 0b000000110000000,
301 0b111000000000000,
302 0b011110000001000,
303 0b000010000000000,
304 0b000000000010000,
305 0b000110000001100,
306 0b001000000000000,
307 0b000001000000000,
308 0b000001010010100,
309 0b000000001010110,
310 0b010000000000000,
311 0b110000000000000,
312 0b000100000000000,
313 0b000000010000000,
314 0b000000000001000,
315 0b100000000000000,
316 0b000001010000000,
317 0b001010000000000,
318 0b001100000000000,
319 0b000000001010100,
320 0b101101010010100,
321 0b010100000000000,
322 0b000000010001111,
323 0b011000000000000,
324 0b111110000000000,
325 0b101000000000000,
326 0b000000000001111,
327 0b000100010001111,
328 0b001000010001111,
329 0b000110000000000,
330 };
331
332 static const uint16_t gfx6_src_index_table[32] = {
333 0b000000000000,
334 0b010110001000,
335 0b010001101000,
336 0b001000101000,
337 0b011010010000,
338 0b000100100000,
339 0b010001101100,
340 0b010101110000,
341 0b011001111000,
342 0b001100101000,
343 0b010110001100,
344 0b001000100000,
345 0b010110001010,
346 0b000000000010,
347 0b010101010000,
348 0b010101101000,
349 0b111101001100,
350 0b111100101100,
351 0b011001110000,
352 0b010110001001,
353 0b010101011000,
354 0b001101001000,
355 0b010000101100,
356 0b010000000000,
357 0b001101110000,
358 0b001100010000,
359 0b001100000000,
360 0b010001101010,
361 0b001101111000,
362 0b000001110000,
363 0b001100100000,
364 0b001101010000,
365 };
366
367 static const uint32_t gfx7_control_index_table[32] = {
368 0b0000000000000000010,
369 0b0000100000000000000,
370 0b0000100000000000001,
371 0b0000100000000000010,
372 0b0000100000000000011,
373 0b0000100000000000100,
374 0b0000100000000000101,
375 0b0000100000000000111,
376 0b0000100000000001000,
377 0b0000100000000001001,
378 0b0000100000000001101,
379 0b0000110000000000000,
380 0b0000110000000000001,
381 0b0000110000000000010,
382 0b0000110000000000011,
383 0b0000110000000000100,
384 0b0000110000000000101,
385 0b0000110000000000111,
386 0b0000110000000001001,
387 0b0000110000000001101,
388 0b0000110000000010000,
389 0b0000110000100000000,
390 0b0001000000000000000,
391 0b0001000000000000010,
392 0b0001000000000000100,
393 0b0001000000100000000,
394 0b0010110000000000000,
395 0b0010110000000010000,
396 0b0011000000000000000,
397 0b0011000000100000000,
398 0b0101000000000000000,
399 0b0101000000100000000,
400 };
401
402 static const uint32_t gfx7_datatype_table[32] = {
403 0b001000000000000001,
404 0b001000000000100000,
405 0b001000000000100001,
406 0b001000000001100001,
407 0b001000000010111101,
408 0b001000001011111101,
409 0b001000001110100001,
410 0b001000001110100101,
411 0b001000001110111101,
412 0b001000010000100001,
413 0b001000110000100000,
414 0b001000110000100001,
415 0b001001010010100101,
416 0b001001110010100100,
417 0b001001110010100101,
418 0b001111001110111101,
419 0b001111011110011101,
420 0b001111011110111100,
421 0b001111011110111101,
422 0b001111111110111100,
423 0b000000001000001100,
424 0b001000000000111101,
425 0b001000000010100101,
426 0b001000010000100000,
427 0b001001010010100100,
428 0b001001110010000100,
429 0b001010010100001001,
430 0b001101111110111101,
431 0b001111111110111101,
432 0b001011110110101100,
433 0b001010010100101000,
434 0b001010110100101000,
435 };
436
437 static const uint16_t gfx7_subreg_table[32] = {
438 0b000000000000000,
439 0b000000000000001,
440 0b000000000001000,
441 0b000000000001111,
442 0b000000000010000,
443 0b000000010000000,
444 0b000000100000000,
445 0b000000110000000,
446 0b000001000000000,
447 0b000001000010000,
448 0b000010100000000,
449 0b001000000000000,
450 0b001000000000001,
451 0b001000010000001,
452 0b001000010000010,
453 0b001000010000011,
454 0b001000010000100,
455 0b001000010000111,
456 0b001000010001000,
457 0b001000010001110,
458 0b001000010001111,
459 0b001000110000000,
460 0b001000111101000,
461 0b010000000000000,
462 0b010000110000000,
463 0b011000000000000,
464 0b011110010000111,
465 0b100000000000000,
466 0b101000000000000,
467 0b110000000000000,
468 0b111000000000000,
469 0b111000000011100,
470 };
471
472 static const uint16_t gfx7_src_index_table[32] = {
473 0b000000000000,
474 0b000000000010,
475 0b000000010000,
476 0b000000010010,
477 0b000000011000,
478 0b000000100000,
479 0b000000101000,
480 0b000001001000,
481 0b000001010000,
482 0b000001110000,
483 0b000001111000,
484 0b001100000000,
485 0b001100000010,
486 0b001100001000,
487 0b001100010000,
488 0b001100010010,
489 0b001100100000,
490 0b001100101000,
491 0b001100111000,
492 0b001101000000,
493 0b001101000010,
494 0b001101001000,
495 0b001101010000,
496 0b001101100000,
497 0b001101101000,
498 0b001101110000,
499 0b001101110001,
500 0b001101111000,
501 0b010001101000,
502 0b010001101001,
503 0b010001101010,
504 0b010110001000,
505 };
506
507 static const uint32_t gfx8_control_index_table[32] = {
508 0b0000000000000000010,
509 0b0000100000000000000,
510 0b0000100000000000001,
511 0b0000100000000000010,
512 0b0000100000000000011,
513 0b0000100000000000100,
514 0b0000100000000000101,
515 0b0000100000000000111,
516 0b0000100000000001000,
517 0b0000100000000001001,
518 0b0000100000000001101,
519 0b0000110000000000000,
520 0b0000110000000000001,
521 0b0000110000000000010,
522 0b0000110000000000011,
523 0b0000110000000000100,
524 0b0000110000000000101,
525 0b0000110000000000111,
526 0b0000110000000001001,
527 0b0000110000000001101,
528 0b0000110000000010000,
529 0b0000110000100000000,
530 0b0001000000000000000,
531 0b0001000000000000010,
532 0b0001000000000000100,
533 0b0001000000100000000,
534 0b0010110000000000000,
535 0b0010110000000010000,
536 0b0011000000000000000,
537 0b0011000000100000000,
538 0b0101000000000000000,
539 0b0101000000100000000,
540 };
541
542 static const uint32_t gfx8_datatype_table[32] = {
543 0b001000000000000000001,
544 0b001000000000001000000,
545 0b001000000000001000001,
546 0b001000000000011000001,
547 0b001000000000101011101,
548 0b001000000010111011101,
549 0b001000000011101000001,
550 0b001000000011101000101,
551 0b001000000011101011101,
552 0b001000001000001000001,
553 0b001000011000001000000,
554 0b001000011000001000001,
555 0b001000101000101000101,
556 0b001000111000101000100,
557 0b001000111000101000101,
558 0b001011100011101011101,
559 0b001011101011100011101,
560 0b001011101011101011100,
561 0b001011101011101011101,
562 0b001011111011101011100,
563 0b000000000010000001100,
564 0b001000000000001011101,
565 0b001000000000101000101,
566 0b001000001000001000000,
567 0b001000101000101000100,
568 0b001000111000100000100,
569 0b001001001001000001001,
570 0b001010111011101011101,
571 0b001011111011101011101,
572 0b001001111001101001100,
573 0b001001001001001001000,
574 0b001001011001001001000,
575 };
576
577 static const uint16_t gfx8_subreg_table[32] = {
578 0b000000000000000,
579 0b000000000000001,
580 0b000000000001000,
581 0b000000000001111,
582 0b000000000010000,
583 0b000000010000000,
584 0b000000100000000,
585 0b000000110000000,
586 0b000001000000000,
587 0b000001000010000,
588 0b000001010000000,
589 0b001000000000000,
590 0b001000000000001,
591 0b001000010000001,
592 0b001000010000010,
593 0b001000010000011,
594 0b001000010000100,
595 0b001000010000111,
596 0b001000010001000,
597 0b001000010001110,
598 0b001000010001111,
599 0b001000110000000,
600 0b001000111101000,
601 0b010000000000000,
602 0b010000110000000,
603 0b011000000000000,
604 0b011110010000111,
605 0b100000000000000,
606 0b101000000000000,
607 0b110000000000000,
608 0b111000000000000,
609 0b111000000011100,
610 };
611
612 static const uint16_t gfx8_src_index_table[32] = {
613 0b000000000000,
614 0b000000000010,
615 0b000000010000,
616 0b000000010010,
617 0b000000011000,
618 0b000000100000,
619 0b000000101000,
620 0b000001001000,
621 0b000001010000,
622 0b000001110000,
623 0b000001111000,
624 0b001100000000,
625 0b001100000010,
626 0b001100001000,
627 0b001100010000,
628 0b001100010010,
629 0b001100100000,
630 0b001100101000,
631 0b001100111000,
632 0b001101000000,
633 0b001101000010,
634 0b001101001000,
635 0b001101010000,
636 0b001101100000,
637 0b001101101000,
638 0b001101110000,
639 0b001101110001,
640 0b001101111000,
641 0b010001101000,
642 0b010001101001,
643 0b010001101010,
644 0b010110001000,
645 };
646
647 static const uint32_t gfx11_datatype_table[32] = {
648 0b001000000000000000001,
649 0b001000000000001000000,
650 0b001000000000001000001,
651 0b001000000000011000001,
652 0b001000000000101100101,
653 0b001000000101111100101,
654 0b001000000100101000001,
655 0b001000000100101000101,
656 0b001000000100101100101,
657 0b001000001000001000001,
658 0b001000011000001000000,
659 0b001000011000001000001,
660 0b001000101000101000101,
661 0b001000111000101000100,
662 0b001000111000101000101,
663 0b001100100100101100101,
664 0b001100101100100100101,
665 0b001100101100101100100,
666 0b001100101100101100101,
667 0b001100111100101100100,
668 0b000000000010000001100,
669 0b001000000000001100101,
670 0b001000000000101000101,
671 0b001000001000001000000,
672 0b001000101000101000100,
673 0b001000111000100000100,
674 0b001001001001000001001,
675 0b001101111100101100101,
676 0b001100111100101100101,
677 0b001001111001101001100,
678 0b001001001001001001000,
679 0b001001011001001001000,
680 };
681
682 static const uint32_t gfx12_control_index_table[32] = {
683 0b000000000000000000100, /* (16|M0) */
684 0b000000000000000000011, /* (8|M0) */
685 0b000000010000000000000, /* (W) (1|M0) */
686 0b000000010000000000100, /* (W) (16|M0) */
687 0b000000010000000000011, /* (W) (8|M0) */
688 0b010000000000000000100, /* (16|M0) (ge)f0.0 */
689 0b000000000000000100100, /* (16|M16) */
690 0b010100000000000000100, /* (16|M0) (lt)f0.0 */
691 0b000000000000000000000, /* (1|M0) */
692 0b000010000000000000100, /* (16|M0) (sat) */
693 0b000000000000000010011, /* (8|M8) */
694 0b001100000000000000100, /* (16|M0) (gt)f0.0 */
695 0b000100000000000000100, /* (16|M0) (eq)f0.0 */
696 0b000100010000000000100, /* (W) (16|M0) (eq)f0.0 */
697 0b001000000000000000100, /* (16|M0) (ne)f0.0 */
698 0b000000000000100000100, /* (f0.0) (16|M0) */
699 0b010100000000000000011, /* (8|M0) (lt)f0.0 */
700 0b000000000000110000100, /* (f1.0) (16|M0) */
701 0b000000010000000000001, /* (W) (2|M0) */
702 0b000000000000101000100, /* (f0.1) (16|M0) */
703 0b000000000000111000100, /* (f1.1) (16|M0) */
704 0b010000010000000000100, /* (W) (16|M0) (ge)f0.0 */
705 0b000000000000000100011, /* (8|M16) */
706 0b000000000000000110011, /* (8|M24) */
707 0b010100010000000000100, /* (W) (16|M0) (lt)f0.0 */
708 0b010000000000000000011, /* (8|M0) (ge)f0.0 */
709 0b000100010000000000000, /* (W) (1|M0) (eq)f0.0 */
710 0b000010000000000000011, /* (8|M0) (sat) */
711 0b010100000000010000100, /* (16|M0) (lt)f1.0 */
712 0b000100000000000000011, /* (8|M0) (eq)f0.0 */
713 0b000001000000000000011, /* (8|M0) {AccWrEn} */
714 0b000000010000000100100, /* (W) (16|M16) */
715 };
716
717 static const uint32_t gfx12_datatype_table[32] = {
718 0b11010110100101010100, /* grf<1>:f grf:f grf:f */
719 0b00000110100101010100, /* grf<1>:f grf:f arf:ub */
720 0b00000010101101010100, /* grf<1>:f imm:f arf:ub */
721 0b01010110110101010100, /* grf<1>:f grf:f imm:f */
722 0b11010100100101010100, /* arf<1>:f grf:f grf:f */
723 0b11010010100101010100, /* grf<1>:f arf:f grf:f */
724 0b01010100110101010100, /* arf<1>:f grf:f imm:f */
725 0b00000000100000000000, /* arf<1>:ub arf:ub arf:ub */
726 0b11010000100101010100, /* arf<1>:f arf:f grf:f */
727 0b00101110110011001100, /* grf<1>:d grf:d imm:w */
728 0b10110110100011001100, /* grf<1>:d grf:d grf:d */
729 0b01010010110101010100, /* grf<1>:f arf:f imm:f */
730 0b10010110100001000100, /* grf<1>:ud grf:ud grf:ud */
731 0b01010000110101010100, /* arf<1>:f arf:f imm:f */
732 0b00110110110011001100, /* grf<1>:d grf:d imm:d */
733 0b00010110110001000100, /* grf<1>:ud grf:ud imm:ud */
734 0b00000111000101010100, /* grf<2>:f grf:f arf:ub */
735 0b00101100110011001100, /* arf<1>:d grf:d imm:w */
736 0b00000000100000100010, /* arf<1>:uw arf:uw arf:ub */
737 0b00000010100001000100, /* grf<1>:ud arf:ud arf:ub */
738 0b00100110110000101010, /* grf<1>:w grf:uw imm:uv */
739 0b00001110110000100010, /* grf<1>:uw grf:uw imm:uw */
740 0b10010111000001000100, /* grf<2>:ud grf:ud grf:ud */
741 0b00000110100101001100, /* grf<1>:d grf:f arf:ub */
742 0b10001100100011001100, /* arf<1>:d grf:d grf:uw */
743 0b00000110100001010100, /* grf<1>:f grf:ud arf:ub */
744 0b00101110110001001100, /* grf<1>:d grf:ud imm:w */
745 0b00000010100000100010, /* grf<1>:uw arf:uw arf:ub */
746 0b00000110100000110100, /* grf<1>:f grf:uw arf:ub */
747 0b00000110100000010100, /* grf<1>:f grf:ub arf:ub */
748 0b00000110100011010100, /* grf<1>:f grf:d arf:ub */
749 0b00000010100101010100, /* grf<1>:f arf:f arf:ub */
750 };
751
752 static const uint16_t gfx12_subreg_table[32] = {
753 0b000000000000000, /* .0 .0 .0 */
754 0b100000000000000, /* .0 .0 .16 */
755 0b001000000000000, /* .0 .0 .4 */
756 0b011000000000000, /* .0 .0 .12 */
757 0b000000010000000, /* .0 .4 .0 */
758 0b010000000000000, /* .0 .0 .8 */
759 0b101000000000000, /* .0 .0 .20 */
760 0b000000000001000, /* .8 .0 .0 */
761 0b000000100000000, /* .0 .8 .0 */
762 0b110000000000000, /* .0 .0 .24 */
763 0b111000000000000, /* .0 .0 .28 */
764 0b000001000000000, /* .0 .16 .0 */
765 0b000000000000100, /* .4 .0 .0 */
766 0b000001100000000, /* .0 .24 .0 */
767 0b000001010000000, /* .0 .20 .0 */
768 0b000000110000000, /* .0 .12 .0 */
769 0b000001110000000, /* .0 .28 .0 */
770 0b000000000011100, /* .28 .0 .0 */
771 0b000000000010000, /* .16 .0 .0 */
772 0b000000000001100, /* .12 .0 .0 */
773 0b000000000011000, /* .24 .0 .0 */
774 0b000000000010100, /* .20 .0 .0 */
775 0b000000000000010, /* .2 .0 .0 */
776 0b000000101000000, /* .0 .10 .0 */
777 0b000000001000000, /* .0 .2 .0 */
778 0b000000010000100, /* .4 .4 .0 */
779 0b000000001011100, /* .28 .2 .0 */
780 0b000000001000010, /* .2 .2 .0 */
781 0b000000110001100, /* .12 .12 .0 */
782 0b000000000100000, /* .0 .1 .0 */
783 0b000000001100000, /* .0 .3 .0 */
784 0b110001100000000, /* .0 .24 .24 */
785 };
786
787 static const uint16_t gfx12_src0_index_table[16] = {
788 0b010001100100, /* r<8;8,1> */
789 0b000000000000, /* r<0;1,0> */
790 0b010001100110, /* -r<8;8,1> */
791 0b010001100101, /* (abs)r<8;8,1> */
792 0b000000000010, /* -r<0;1,0> */
793 0b001000000000, /* r<2;1,0> */
794 0b001001000000, /* r<2;4,0> */
795 0b001101000000, /* r<4;4,0> */
796 0b001000100100, /* r<2;2,1> */
797 0b001100000000, /* r<4;1,0> */
798 0b001000100110, /* -r<2;2,1> */
799 0b001101000100, /* r<4;4,1> */
800 0b010001100111, /* -(abs)r<8;8,1> */
801 0b000100000000, /* r<1;1,0> */
802 0b000000000001, /* (abs)r<0;1,0> */
803 0b111100010000, /* r[a]<1,0> */
804 };
805
806 static const uint16_t gfx12_src1_index_table[16] = {
807 0b000100011001, /* r<8;8,1> */
808 0b000000000000, /* r<0;1,0> */
809 0b100100011001, /* -r<8;8,1> */
810 0b100000000000, /* -r<0;1,0> */
811 0b010100011001, /* (abs)r<8;8,1> */
812 0b100011010000, /* -r<4;4,0> */
813 0b000010000000, /* r<2;1,0> */
814 0b000010001001, /* r<2;2,1> */
815 0b100010001001, /* -r<2;2,1> */
816 0b000011010000, /* r<4;4,0> */
817 0b000011010001, /* r<4;4,1> */
818 0b000011000000, /* r<4;1,0> */
819 0b110100011001, /* -(abs)r<8;8,1> */
820 0b010000000000, /* (abs)r<0;1,0> */
821 0b110000000000, /* -(abs)r<0;1,0> */
822 0b100011010001, /* -r<4;4,1> */
823 };
824
825 static const uint16_t xehp_src0_index_table[16] = {
826 0b000100000000, /* r<1;1,0> */
827 0b000000000000, /* r<0;1,0> */
828 0b000100000010, /* -r<1;1,0> */
829 0b000100000001, /* (abs)r<1;1,0> */
830 0b000000000010, /* -r<0;1,0> */
831 0b001000000000, /* r<2;1,0> */
832 0b001001000000, /* r<2;4,0> */
833 0b001101000000, /* r<4;4,0> */
834 0b001100000000, /* r<4;1,0> */
835 0b000100000011, /* -(abs)r<1;1,0> */
836 0b000000000001, /* (abs)r<0;1,0> */
837 0b111100010000, /* r[a]<1,0> */
838 0b010001100000, /* r<8;8,0> */
839 0b000101000000, /* r<1;4,0> */
840 0b010001001000, /* r<8;4,2> */
841 0b001000000010, /* -r<2;1,0> */
842 };
843
844 static const uint16_t xehp_src1_index_table[16] = {
845 0b000001000000, /* r<1;1,0> */
846 0b000000000000, /* r<0;1,0> */
847 0b100001000000, /* -r<1;1,0> */
848 0b100000000000, /* -r<0;1,0> */
849 0b010001000000, /* (abs)r<1;1,0> */
850 0b100011010000, /* -r<4;4,0> */
851 0b000010000000, /* r<2;1,0> */
852 0b000011010000, /* r<4;4,0> */
853 0b000011000000, /* r<4;1,0> */
854 0b110001000000, /* -(abs)r<1;1,0> */
855 0b010000000000, /* (abs)r<0;1,0> */
856 0b110000000000, /* -(abs)r<0;1,0> */
857 0b000100011000, /* r<8;8,0> */
858 0b100010000000, /* -r<2;1,0> */
859 0b100000001001, /* -r<0;2,1> */
860 0b100001000100, /* -r[a]<1;1,0> */
861 };
862
863 static const uint32_t xe2_control_index_table[32] = {
864 0b000000000000000100, /* (16|M0) */
865 0b000000100000000000, /* (W) (1|M0) */
866 0b000000000010000100, /* (16|M16) */
867 0b000000000000000000, /* (1|M0) */
868 0b000000100000000100, /* (W) (16|M0) */
869 0b010000000000000100, /* (16|M0) (.ge)f0.0 */
870 0b010100000000000100, /* (16|M0) (.lt)f0.0 */
871 0b000000100000000010, /* (W) (4|M0) */
872 0b000000000000000101, /* (32|M0) */
873 0b000000100000000011, /* (W) (8|M0) */
874 0b001100100000000000, /* (W) (1|M0) (.gt)f0.0 */
875 0b000010000000000100, /* (16|M0) (sat) */
876 0b000100000000000100, /* (16|M0) (.eq)f0.0 */
877 0b000000100000000001, /* (W) (2|M0) */
878 0b001100000000000100, /* (16|M0) (.gt)f0.0 */
879 0b000100100000000000, /* (W) (1|M0) (.eq)f0.0 */
880 0b010100100000000010, /* (W) (4|M0) (.lt)f0.0 */
881 0b010000100000000000, /* (W) (1|M0) (.ge)f0.0 */
882 0b010000100000000010, /* (W) (4|M0) (.ge)f0.0 */
883 0b010100100000000000, /* (W) (1|M0) (.lt)f0.0 */
884 0b001000000000000100, /* (16|M0) (.ne)f0.0 */
885 0b000000000100100100, /* (f2.0) (16|M0) */
886 0b010100100000000011, /* (W) (8|M0) (.lt)f0.0 */
887 0b000000000100011100, /* (f1.1) (16|M0) */
888 0b010000100000000011, /* (W) (8|M0) (.ge)f0.0 */
889 0b000000000100001100, /* (f0.1) (16|M0) */
890 0b000000000100010100, /* (f1.0) (16|M0) */
891 0b000000000100110100, /* (f3.0) (16|M0) */
892 0b000000000100111100, /* (f3.1) (16|M0) */
893 0b000000000100101100, /* (f2.1) (16|M0) */
894 0b000000000100000100, /* (f0.0) (16|M0) */
895 0b010100000000100100, /* (16|M0) (.lt)f2.0 */
896 };
897
898 static const uint32_t xe2_datatype_table[32] = {
899 0b11010110100101010100, /* grf<1>:f grf:f grf:f */
900 0b11010100100101010100, /* arf<1>:f grf:f grf:f */
901 0b00000110100101010100, /* grf<1>:f grf:f arf:ub */
902 0b00000110100001000100, /* grf<1>:ud grf:ud arf:ub */
903 0b01010110110101010100, /* grf<1>:f grf:f imm:f */
904 0b11010010100101010100, /* grf<1>:f arf:f grf:f */
905 0b10111110100011101110, /* grf<1>:q grf:q grf:q */
906 0b00000000100000000000, /* arf<1>:ub arf:ub arf:ub */
907 0b01010110100101010100, /* grf<1>:f grf:f arf:f */
908 0b00000010101001000100, /* grf<1>:ud imm:ud */
909 0b00101110110011001100, /* grf<1>:d grf:d imm:w */
910 0b11010000100101010100, /* arf<1>:f arf:f grf:f */
911 0b01010100100101010100, /* arf<1>:f grf:f arf:f */
912 0b01010100110101010100, /* arf<1>:f grf:f imm:f */
913 0b00000010101101010100, /* grf<1>:f imm:f */
914 0b00000110100011001100, /* grf<1>:d grf:d arf:ub */
915 0b00101110110011101110, /* grf<1>:q grf:q imm:w */
916 0b00000110100001100110, /* grf<1>:uq grf:uq arf:ub */
917 0b01010000100101010100, /* arf<1>:f arf:f arf:f */
918 0b10110110100011001100, /* grf<1>:d grf:d grf:d */
919 0b01010010100101010100, /* grf<1>:f arf:f arf:f */
920 0b00000111000001000100, /* grf<2>:ud grf:ud arf:ub */
921 0b00110110110011001110, /* grf<1>:q grf:d imm:d */
922 0b00101100110011001100, /* arf<1>:d grf:d imm:w */
923 0b11011110100101110110, /* grf<1>:df grf:df grf:df */
924 0b01010010110101010100, /* grf<1>:f arf:f imm:f */
925 0b10010110100001000100, /* grf<1>:ud grf:ud grf:ud */
926 0b00000010100001000100, /* grf<1>:ud arf:ud arf:ub */
927 0b00001110110001000100, /* grf<1>:ud grf:ud imm:uw */
928 0b00000010101010101100, /* grf<1>:d imm:w */
929 0b01010000110101010100, /* arf<1>:f arf:f imm:f */
930 0b00000100100001000100, /* arf<1>:ud grf:ud arf:ub */
931 };
932
933 static const uint16_t xe2_subreg_table[16] = {
934 0b000000000000, /* .0 .0 */
935 0b000010000000, /* .0 .4 */
936 0b000000000100, /* .4 .0 */
937 0b010000000000, /* .0 .32 */
938 0b001000000000, /* .0 .16 */
939 0b000000001000, /* .8 .0 */
940 0b000100000000, /* .0 .8 */
941 0b010100000000, /* .0 .40 */
942 0b011000000000, /* .0 .48 */
943 0b000110000000, /* .0 .12 */
944 0b000000010000, /* .16 .0 */
945 0b011010000000, /* .0 .52 */
946 0b001100000000, /* .0 .24 */
947 0b011100000000, /* .0 .56 */
948 0b010110000000, /* .0 .44 */
949 0b010010000000, /* .0 .36 */
950 };
951
952 static const uint16_t xe2_src0_index_table[8] = {
953 0b00100000000, /* r<1;1,0> */
954 0b00000000000, /* r<0;1,0> */
955 0b01000000000, /* r<2;1,0> */
956 0b00100000010, /* -r<1;1,0> */
957 0b01100000000, /* r<4;1,0> */
958 0b00100000001, /* (abs)r<1;1,0> */
959 0b00000000010, /* -r<0;1,0> */
960 0b01001000000, /* r<2;4,0> */
961 };
962
963 static const uint16_t xe2_src1_index_table[16] = {
964 0b0000100000000000, /* r<1;1,0>.0 */
965 0b0000000000000000, /* r<0;1,0>.0 */
966 0b1000100000000000, /* -r<1;1,0>.0 */
967 0b0000000000010000, /* r<0;1,0>.8 */
968 0b0000000000001000, /* r<0;1,0>.4 */
969 0b0000000000011000, /* r<0;1,0>.12 */
970 0b0000000001010000, /* r<0;1,0>.40 */
971 0b0000000001000000, /* r<0;1,0>.32 */
972 0b0000000000100000, /* r<0;1,0>.16 */
973 0b0000000001111000, /* r<0;1,0>.60 */
974 0b0000000000111000, /* r<0;1,0>.28 */
975 0b0000000000101000, /* r<0;1,0>.20 */
976 0b0000000001011000, /* r<0;1,0>.44 */
977 0b0000000001001000, /* r<0;1,0>.36 */
978 0b0000000001110000, /* r<0;1,0>.56 */
979 0b0000000000110000, /* r<0;1,0>.24 */
980 };
981
982 /* This is actually the control index table for Cherryview (26 bits), but the
983 * only difference from Broadwell (24 bits) is that it has two extra 0-bits at
984 * the start.
985 *
986 * The low 24 bits have the same mappings on both hardware.
987 */
988 static const uint32_t gfx8_3src_control_index_table[4] = {
989 0b00100000000110000000000001,
990 0b00000000000110000000000001,
991 0b00000000001000000000000001,
992 0b00000000001000000000100001,
993 };
994
995 /* This is actually the control index table for Cherryview (49 bits), but the
996 * only difference from Broadwell (46 bits) is that it has three extra 0-bits
997 * at the start.
998 *
999 * The low 44 bits have the same mappings on both hardware, and since the high
1000 * three bits on Broadwell are zero, we can reuse Cherryview's table.
1001 */
1002 static const uint64_t gfx8_3src_source_index_table[4] = {
1003 0b0000001110010011100100111001000001111000000000000,
1004 0b0000001110010011100100111001000001111000000000010,
1005 0b0000001110010011100100111001000001111000000001000,
1006 0b0000001110010011100100111001000001111000000100000,
1007 };
1008
1009 struct compaction_state {
1010 const struct elk_isa_info *isa;
1011 const uint32_t *control_index_table;
1012 const uint32_t *datatype_table;
1013 const uint16_t *subreg_table;
1014 const uint16_t *src0_index_table;
1015 const uint16_t *src1_index_table;
1016 };
1017
1018 static void compaction_state_init(struct compaction_state *c,
1019 const struct elk_isa_info *isa);
1020
1021 static bool
set_control_index(const struct compaction_state * c,elk_compact_inst * dst,const elk_inst * src)1022 set_control_index(const struct compaction_state *c,
1023 elk_compact_inst *dst, const elk_inst *src)
1024 {
1025 const struct intel_device_info *devinfo = c->isa->devinfo;
1026 uint32_t uncompacted; /* 17b/G45; 19b/IVB+ */
1027
1028 if (devinfo->ver >= 8) {
1029 uncompacted = (elk_inst_bits(src, 33, 31) << 16) | /* 3b */
1030 (elk_inst_bits(src, 23, 12) << 4) | /* 12b */
1031 (elk_inst_bits(src, 10, 9) << 2) | /* 2b */
1032 (elk_inst_bits(src, 34, 34) << 1) | /* 1b */
1033 (elk_inst_bits(src, 8, 8)); /* 1b */
1034 } else {
1035 uncompacted = (elk_inst_bits(src, 31, 31) << 16) | /* 1b */
1036 (elk_inst_bits(src, 23, 8)); /* 16b */
1037
1038 /* On gfx7, the flag register and subregister numbers are integrated into
1039 * the control index.
1040 */
1041 if (devinfo->ver == 7)
1042 uncompacted |= elk_inst_bits(src, 90, 89) << 17; /* 2b */
1043 }
1044
1045 for (int i = 0; i < 32; i++) {
1046 if (c->control_index_table[i] == uncompacted) {
1047 elk_compact_inst_set_control_index(devinfo, dst, i);
1048 return true;
1049 }
1050 }
1051
1052 return false;
1053 }
1054
1055 static bool
set_datatype_index(const struct compaction_state * c,elk_compact_inst * dst,const elk_inst * src,bool is_immediate)1056 set_datatype_index(const struct compaction_state *c, elk_compact_inst *dst,
1057 const elk_inst *src, bool is_immediate)
1058 {
1059 const struct intel_device_info *devinfo = c->isa->devinfo;
1060 uint32_t uncompacted; /* 18b/G45+; 21b/BDW+ */
1061
1062 if (devinfo->ver >= 8) {
1063 uncompacted = (elk_inst_bits(src, 63, 61) << 18) | /* 3b */
1064 (elk_inst_bits(src, 94, 89) << 12) | /* 6b */
1065 (elk_inst_bits(src, 46, 35)); /* 12b */
1066 } else {
1067 uncompacted = (elk_inst_bits(src, 63, 61) << 15) | /* 3b */
1068 (elk_inst_bits(src, 46, 32)); /* 15b */
1069 }
1070
1071 for (int i = 0; i < 32; i++) {
1072 if (c->datatype_table[i] == uncompacted) {
1073 elk_compact_inst_set_datatype_index(devinfo, dst, i);
1074 return true;
1075 }
1076 }
1077
1078 return false;
1079 }
1080
1081 static bool
set_subreg_index(const struct compaction_state * c,elk_compact_inst * dst,const elk_inst * src,bool is_immediate)1082 set_subreg_index(const struct compaction_state *c, elk_compact_inst *dst,
1083 const elk_inst *src, bool is_immediate)
1084 {
1085 const struct intel_device_info *devinfo = c->isa->devinfo;
1086
1087 uint16_t uncompacted = /* 15b/G45+ */
1088 (elk_inst_bits(src, 52, 48) << 0) | /* 5b */
1089 (elk_inst_bits(src, 68, 64) << 5); /* 5b */
1090
1091 if (!is_immediate)
1092 uncompacted |= elk_inst_bits(src, 100, 96) << 10; /* 5b */
1093
1094 for (int i = 0; i < ARRAY_SIZE(g45_subreg_table); i++) {
1095 if (c->subreg_table[i] == uncompacted) {
1096 elk_compact_inst_set_subreg_index(devinfo, dst, i);
1097 return true;
1098 }
1099 }
1100
1101 return false;
1102 }
1103
1104 static bool
set_src0_index(const struct compaction_state * c,elk_compact_inst * dst,const elk_inst * src)1105 set_src0_index(const struct compaction_state *c, elk_compact_inst *dst,
1106 const elk_inst *src)
1107 {
1108 const struct intel_device_info *devinfo = c->isa->devinfo;
1109
1110 const uint16_t uncompacted = /* 12b/G45+ */
1111 elk_inst_bits(src, 88, 77); /* 12b */
1112
1113 for (int i = 0; i < ARRAY_SIZE(gfx8_src_index_table); i++) {
1114 if (c->src0_index_table[i] == uncompacted) {
1115 elk_compact_inst_set_src0_index(devinfo, dst, i);
1116 return true;
1117 }
1118 }
1119
1120 return false;
1121 }
1122
1123 static bool
set_src1_index(const struct compaction_state * c,elk_compact_inst * dst,const elk_inst * src,bool is_immediate,unsigned imm)1124 set_src1_index(const struct compaction_state *c, elk_compact_inst *dst,
1125 const elk_inst *src, bool is_immediate, unsigned imm)
1126 {
1127 const struct intel_device_info *devinfo = c->isa->devinfo;
1128 if (is_immediate) {
1129 /* src1 index takes the high 5 bits of the 13-bit compacted value */
1130 elk_compact_inst_set_src1_index(devinfo, dst, imm >> 8);
1131 return true;
1132 } else {
1133 const uint16_t uncompacted = /* 12b/G45+ */
1134 elk_inst_bits(src, 120, 109); /* 12b */
1135
1136 for (int i = 0; i < ARRAY_SIZE(gfx8_src_index_table); i++) {
1137 if (c->src1_index_table[i] == uncompacted) {
1138 elk_compact_inst_set_src1_index(devinfo, dst, i);
1139 return true;
1140 }
1141 }
1142 }
1143
1144 return false;
1145 }
1146
1147 static bool
set_3src_control_index(const struct intel_device_info * devinfo,elk_compact_inst * dst,const elk_inst * src)1148 set_3src_control_index(const struct intel_device_info *devinfo,
1149 elk_compact_inst *dst, const elk_inst *src)
1150 {
1151 assert(devinfo->ver >= 8);
1152
1153 uint32_t uncompacted = /* 24b/BDW; 26b/CHV */
1154 (elk_inst_bits(src, 34, 32) << 21) | /* 3b */
1155 (elk_inst_bits(src, 28, 8)); /* 21b */
1156
1157 if (devinfo->platform == INTEL_PLATFORM_CHV) {
1158 uncompacted |=
1159 elk_inst_bits(src, 36, 35) << 24; /* 2b */
1160 }
1161
1162 for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_control_index_table); i++) {
1163 if (gfx8_3src_control_index_table[i] == uncompacted) {
1164 elk_compact_inst_set_3src_control_index(devinfo, dst, i);
1165 return true;
1166 }
1167 }
1168
1169 return false;
1170 }
1171
1172 static bool
set_3src_source_index(const struct intel_device_info * devinfo,elk_compact_inst * dst,const elk_inst * src)1173 set_3src_source_index(const struct intel_device_info *devinfo,
1174 elk_compact_inst *dst, const elk_inst *src)
1175 {
1176 assert(devinfo->ver >= 8);
1177
1178 uint64_t uncompacted = /* 46b/BDW; 49b/CHV */
1179 (elk_inst_bits(src, 83, 83) << 43) | /* 1b */
1180 (elk_inst_bits(src, 114, 107) << 35) | /* 8b */
1181 (elk_inst_bits(src, 93, 86) << 27) | /* 8b */
1182 (elk_inst_bits(src, 72, 65) << 19) | /* 8b */
1183 (elk_inst_bits(src, 55, 37)); /* 19b */
1184
1185 if (devinfo->platform == INTEL_PLATFORM_CHV) {
1186 uncompacted |=
1187 (elk_inst_bits(src, 126, 125) << 47) | /* 2b */
1188 (elk_inst_bits(src, 105, 104) << 45) | /* 2b */
1189 (elk_inst_bits(src, 84, 84) << 44); /* 1b */
1190 } else {
1191 uncompacted |=
1192 (elk_inst_bits(src, 125, 125) << 45) | /* 1b */
1193 (elk_inst_bits(src, 104, 104) << 44); /* 1b */
1194 }
1195
1196 for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_source_index_table); i++) {
1197 if (gfx8_3src_source_index_table[i] == uncompacted) {
1198 elk_compact_inst_set_3src_source_index(devinfo, dst, i);
1199 return true;
1200 }
1201 }
1202
1203 return false;
1204 }
1205
1206 static bool
has_unmapped_bits(const struct elk_isa_info * isa,const elk_inst * src)1207 has_unmapped_bits(const struct elk_isa_info *isa, const elk_inst *src)
1208 {
1209 const struct intel_device_info *devinfo = isa->devinfo;
1210
1211 /* EOT can only be mapped on a send if the src1 is an immediate */
1212 if ((elk_inst_opcode(isa, src) == ELK_OPCODE_SENDC ||
1213 elk_inst_opcode(isa, src) == ELK_OPCODE_SEND) &&
1214 elk_inst_eot(devinfo, src))
1215 return true;
1216
1217 /* Check for instruction bits that don't map to any of the fields of the
1218 * compacted instruction. The instruction cannot be compacted if any of
1219 * them are set. They overlap with:
1220 * - NibCtrl (bit 47 on Gfx7, bit 11 on Gfx8)
1221 * - Dst.AddrImm[9] (bit 47 on Gfx8)
1222 * - Src0.AddrImm[9] (bit 95 on Gfx8)
1223 * - Imm64[27:31] (bits 91-95 on Gfx7, bit 95 on Gfx8)
1224 * - UIP[31] (bit 95 on Gfx8)
1225 */
1226 if (devinfo->ver >= 8) {
1227 assert(!elk_inst_bits(src, 7, 7));
1228 return elk_inst_bits(src, 95, 95) ||
1229 elk_inst_bits(src, 47, 47) ||
1230 elk_inst_bits(src, 11, 11);
1231 } else {
1232 assert(!elk_inst_bits(src, 7, 7) &&
1233 !(devinfo->ver < 7 && elk_inst_bits(src, 90, 90)));
1234 return elk_inst_bits(src, 95, 91) ||
1235 elk_inst_bits(src, 47, 47);
1236 }
1237 }
1238
1239 static bool
has_3src_unmapped_bits(const struct intel_device_info * devinfo,const elk_inst * src)1240 has_3src_unmapped_bits(const struct intel_device_info *devinfo,
1241 const elk_inst *src)
1242 {
1243 /* Check for three-source instruction bits that don't map to any of the
1244 * fields of the compacted instruction. All of them seem to be reserved
1245 * bits currently.
1246 */
1247 if (devinfo->platform == INTEL_PLATFORM_CHV) {
1248 assert(!elk_inst_bits(src, 127, 127) &&
1249 !elk_inst_bits(src, 7, 7));
1250 } else {
1251 assert(devinfo->ver >= 8);
1252 assert(!elk_inst_bits(src, 127, 126) &&
1253 !elk_inst_bits(src, 105, 105) &&
1254 !elk_inst_bits(src, 84, 84) &&
1255 !elk_inst_bits(src, 7, 7));
1256
1257 /* Src1Type and Src2Type, used for mixed-precision floating point */
1258 if (elk_inst_bits(src, 36, 35))
1259 return true;
1260 }
1261
1262 return false;
1263 }
1264
1265 static bool
elk_try_compact_3src_instruction(const struct elk_isa_info * isa,elk_compact_inst * dst,const elk_inst * src)1266 elk_try_compact_3src_instruction(const struct elk_isa_info *isa,
1267 elk_compact_inst *dst, const elk_inst *src)
1268 {
1269 const struct intel_device_info *devinfo = isa->devinfo;
1270 assert(devinfo->ver >= 8);
1271
1272 if (has_3src_unmapped_bits(devinfo, src))
1273 return false;
1274
1275 #define compact(field) \
1276 elk_compact_inst_set_3src_##field(devinfo, dst, elk_inst_3src_##field(devinfo, src))
1277 #define compact_a16(field) \
1278 elk_compact_inst_set_3src_##field(devinfo, dst, elk_inst_3src_a16_##field(devinfo, src))
1279
1280 compact(hw_opcode);
1281
1282 if (!set_3src_control_index(devinfo, dst, src))
1283 return false;
1284
1285 if (!set_3src_source_index(devinfo, dst, src))
1286 return false;
1287
1288 compact(dst_reg_nr);
1289 compact_a16(src0_rep_ctrl);
1290 compact(debug_control);
1291 compact(saturate);
1292 compact_a16(src1_rep_ctrl);
1293 compact_a16(src2_rep_ctrl);
1294 compact(src0_reg_nr);
1295 compact(src1_reg_nr);
1296 compact(src2_reg_nr);
1297 compact_a16(src0_subreg_nr);
1298 compact_a16(src1_subreg_nr);
1299 compact_a16(src2_subreg_nr);
1300
1301 elk_compact_inst_set_3src_cmpt_control(devinfo, dst, true);
1302
1303 #undef compact
1304 #undef compact_a16
1305
1306 return true;
1307 }
1308
1309 /* On SNB through ICL, compacted instructions have 12-bits for immediate
1310 * sources, and a 13th bit that's replicated through the high 20 bits.
1311 *
1312 * Effectively this means we get 12-bit integers, 0.0f, and some limited uses
1313 * of packed vectors as compactable immediates.
1314 *
1315 * Returns the compacted immediate, or -1 if immediate cannot be compacted
1316 */
1317 static int
compact_immediate(const struct intel_device_info * devinfo,enum elk_reg_type type,unsigned imm)1318 compact_immediate(const struct intel_device_info *devinfo,
1319 enum elk_reg_type type, unsigned imm)
1320 {
1321 /* We get the low 12 bits as-is; 13th is replicated */
1322 if (((int)imm >> 12) == 0 || ((int)imm >> 12 == -1)) {
1323 return imm & 0x1fff;
1324 }
1325 return -1;
1326 }
1327
1328 static int
uncompact_immediate(const struct intel_device_info * devinfo,enum elk_reg_type type,unsigned compact_imm)1329 uncompact_immediate(const struct intel_device_info *devinfo,
1330 enum elk_reg_type type, unsigned compact_imm)
1331 {
1332 /* Replicate the 13th bit into the high 19 bits */
1333 return (int)(compact_imm << 19) >> 19;
1334 }
1335
1336 static bool
has_immediate(const struct intel_device_info * devinfo,const elk_inst * inst,enum elk_reg_type * type)1337 has_immediate(const struct intel_device_info *devinfo, const elk_inst *inst,
1338 enum elk_reg_type *type)
1339 {
1340 if (elk_inst_src0_reg_file(devinfo, inst) == ELK_IMMEDIATE_VALUE) {
1341 *type = elk_inst_src0_type(devinfo, inst);
1342 return *type != INVALID_REG_TYPE;
1343 } else if (elk_inst_src1_reg_file(devinfo, inst) == ELK_IMMEDIATE_VALUE) {
1344 *type = elk_inst_src1_type(devinfo, inst);
1345 return *type != INVALID_REG_TYPE;
1346 }
1347
1348 return false;
1349 }
1350
1351 /**
1352 * Applies some small changes to instruction types to increase chances of
1353 * compaction.
1354 */
1355 static elk_inst
precompact(const struct elk_isa_info * isa,elk_inst inst)1356 precompact(const struct elk_isa_info *isa, elk_inst inst)
1357 {
1358 const struct intel_device_info *devinfo = isa->devinfo;
1359
1360 if (elk_inst_src0_reg_file(devinfo, &inst) != ELK_IMMEDIATE_VALUE)
1361 return inst;
1362
1363 /* The Bspec's section titled "Non-present Operands" claims that if src0
1364 * is an immediate that src1's type must be the same as that of src0.
1365 *
1366 * The SNB+ DataTypeIndex instruction compaction tables contain mappings
1367 * that do not follow this rule. E.g., from the IVB/HSW table:
1368 *
1369 * DataTypeIndex 18-Bit Mapping Mapped Meaning
1370 * 3 001000001011111101 r:f | i:vf | a:ud | <1> | dir |
1371 *
1372 * And from the SNB table:
1373 *
1374 * DataTypeIndex 18-Bit Mapping Mapped Meaning
1375 * 8 001000000111101100 a:w | i:w | a:ud | <1> | dir |
1376 *
1377 * Neither of these cause warnings from the simulator when used,
1378 * compacted or otherwise. In fact, all compaction mappings that have an
1379 * immediate in src0 use a:ud for src1.
1380 *
1381 * The GM45 instruction compaction tables do not contain mapped meanings
1382 * so it's not clear whether it has the restriction. We'll assume it was
1383 * lifted on SNB. (FINISHME: decode the GM45 tables and check.)
1384 *
1385 * Don't do any of this for 64-bit immediates, since the src1 fields
1386 * overlap with the immediate and setting them would overwrite the
1387 * immediate we set.
1388 */
1389 if (devinfo->ver >= 6 &&
1390 !(devinfo->platform == INTEL_PLATFORM_HSW &&
1391 elk_inst_opcode(isa, &inst) == ELK_OPCODE_DIM) &&
1392 !(devinfo->ver >= 8 &&
1393 (elk_inst_src0_type(devinfo, &inst) == ELK_REGISTER_TYPE_DF ||
1394 elk_inst_src0_type(devinfo, &inst) == ELK_REGISTER_TYPE_UQ ||
1395 elk_inst_src0_type(devinfo, &inst) == ELK_REGISTER_TYPE_Q))) {
1396 elk_inst_set_src1_reg_hw_type(devinfo, &inst, 0);
1397 }
1398
1399 /* There are no mappings for dst:d | i:d, so if the immediate is suitable
1400 * set the types to :UD so the instruction can be compacted.
1401 */
1402 if (compact_immediate(devinfo, ELK_REGISTER_TYPE_D,
1403 elk_inst_imm_ud(devinfo, &inst)) != -1 &&
1404 elk_inst_cond_modifier(devinfo, &inst) == ELK_CONDITIONAL_NONE &&
1405 elk_inst_src0_type(devinfo, &inst) == ELK_REGISTER_TYPE_D &&
1406 elk_inst_dst_type(devinfo, &inst) == ELK_REGISTER_TYPE_D) {
1407 enum elk_reg_file src_file = elk_inst_src0_reg_file(devinfo, &inst);
1408 enum elk_reg_file dst_file = elk_inst_dst_reg_file(devinfo, &inst);
1409
1410 elk_inst_set_src0_file_type(devinfo, &inst, src_file, ELK_REGISTER_TYPE_UD);
1411 elk_inst_set_dst_file_type(devinfo, &inst, dst_file, ELK_REGISTER_TYPE_UD);
1412 }
1413
1414 return inst;
1415 }
1416
1417 /**
1418 * Tries to compact instruction src into dst.
1419 *
1420 * It doesn't modify dst unless src is compactable, which is relied on by
1421 * elk_compact_instructions().
1422 */
1423 static bool
try_compact_instruction(const struct compaction_state * c,elk_compact_inst * dst,const elk_inst * src)1424 try_compact_instruction(const struct compaction_state *c,
1425 elk_compact_inst *dst, const elk_inst *src)
1426 {
1427 const struct intel_device_info *devinfo = c->isa->devinfo;
1428 elk_compact_inst temp;
1429
1430 assert(elk_inst_cmpt_control(devinfo, src) == 0);
1431
1432 if (elk_is_3src(c->isa, elk_inst_opcode(c->isa, src))) {
1433 if (devinfo->ver >= 8) {
1434 memset(&temp, 0, sizeof(temp));
1435 if (elk_try_compact_3src_instruction(c->isa, &temp, src)) {
1436 *dst = temp;
1437 return true;
1438 } else {
1439 return false;
1440 }
1441 } else {
1442 return false;
1443 }
1444 }
1445
1446 enum elk_reg_type type;
1447 bool is_immediate = has_immediate(devinfo, src, &type);
1448
1449 unsigned compacted_imm = 0;
1450
1451 if (is_immediate) {
1452 /* Instructions with immediates cannot be compacted on Gen < 6 */
1453 if (devinfo->ver < 6)
1454 return false;
1455
1456 compacted_imm = compact_immediate(devinfo, type,
1457 elk_inst_imm_ud(devinfo, src));
1458 if (compacted_imm == -1)
1459 return false;
1460 }
1461
1462 if (has_unmapped_bits(c->isa, src))
1463 return false;
1464
1465 memset(&temp, 0, sizeof(temp));
1466
1467 #define compact(field) \
1468 elk_compact_inst_set_##field(devinfo, &temp, elk_inst_##field(devinfo, src))
1469 #define compact_reg(field) \
1470 elk_compact_inst_set_##field##_reg_nr(devinfo, &temp, \
1471 elk_inst_##field##_da_reg_nr(devinfo, src))
1472
1473 compact(hw_opcode);
1474 compact(debug_control);
1475
1476 if (!set_control_index(c, &temp, src))
1477 return false;
1478 if (!set_datatype_index(c, &temp, src, is_immediate))
1479 return false;
1480 if (!set_subreg_index(c, &temp, src, is_immediate))
1481 return false;
1482 if (!set_src0_index(c, &temp, src))
1483 return false;
1484 if (!set_src1_index(c, &temp, src, is_immediate, compacted_imm))
1485 return false;
1486
1487 if (devinfo->ver >= 6) {
1488 compact(acc_wr_control);
1489 } else {
1490 compact(mask_control_ex);
1491 }
1492
1493 if (devinfo->ver <= 6)
1494 compact(flag_subreg_nr);
1495
1496 compact(cond_modifier);
1497
1498 compact_reg(dst);
1499 compact_reg(src0);
1500
1501 if (is_immediate) {
1502 /* src1 reg takes the low 8 bits (of the 13-bit compacted value) */
1503 elk_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm & 0xff);
1504 } else {
1505 compact_reg(src1);
1506 }
1507
1508 elk_compact_inst_set_cmpt_control(devinfo, &temp, true);
1509
1510 #undef compact
1511 #undef compact_reg
1512
1513 *dst = temp;
1514
1515 return true;
1516 }
1517
1518 bool
elk_try_compact_instruction(const struct elk_isa_info * isa,elk_compact_inst * dst,const elk_inst * src)1519 elk_try_compact_instruction(const struct elk_isa_info *isa,
1520 elk_compact_inst *dst, const elk_inst *src)
1521 {
1522 struct compaction_state c;
1523 compaction_state_init(&c, isa);
1524 return try_compact_instruction(&c, dst, src);
1525 }
1526
1527 static void
set_uncompacted_control(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)1528 set_uncompacted_control(const struct compaction_state *c, elk_inst *dst,
1529 elk_compact_inst *src)
1530 {
1531 const struct intel_device_info *devinfo = c->isa->devinfo;
1532 uint32_t uncompacted =
1533 c->control_index_table[elk_compact_inst_control_index(devinfo, src)];
1534
1535 if (devinfo->ver >= 8) {
1536 elk_inst_set_bits(dst, 33, 31, (uncompacted >> 16));
1537 elk_inst_set_bits(dst, 23, 12, (uncompacted >> 4) & 0xfff);
1538 elk_inst_set_bits(dst, 10, 9, (uncompacted >> 2) & 0x3);
1539 elk_inst_set_bits(dst, 34, 34, (uncompacted >> 1) & 0x1);
1540 elk_inst_set_bits(dst, 8, 8, (uncompacted >> 0) & 0x1);
1541 } else {
1542 elk_inst_set_bits(dst, 31, 31, (uncompacted >> 16) & 0x1);
1543 elk_inst_set_bits(dst, 23, 8, (uncompacted & 0xffff));
1544
1545 if (devinfo->ver == 7)
1546 elk_inst_set_bits(dst, 90, 89, uncompacted >> 17);
1547 }
1548 }
1549
1550 static void
set_uncompacted_datatype(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)1551 set_uncompacted_datatype(const struct compaction_state *c, elk_inst *dst,
1552 elk_compact_inst *src)
1553 {
1554 const struct intel_device_info *devinfo = c->isa->devinfo;
1555 uint32_t uncompacted =
1556 c->datatype_table[elk_compact_inst_datatype_index(devinfo, src)];
1557
1558 if (devinfo->ver >= 8) {
1559 elk_inst_set_bits(dst, 63, 61, (uncompacted >> 18));
1560 elk_inst_set_bits(dst, 94, 89, (uncompacted >> 12) & 0x3f);
1561 elk_inst_set_bits(dst, 46, 35, (uncompacted >> 0) & 0xfff);
1562 } else {
1563 elk_inst_set_bits(dst, 63, 61, (uncompacted >> 15));
1564 elk_inst_set_bits(dst, 46, 32, (uncompacted & 0x7fff));
1565 }
1566 }
1567
1568 static void
set_uncompacted_subreg(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)1569 set_uncompacted_subreg(const struct compaction_state *c, elk_inst *dst,
1570 elk_compact_inst *src)
1571 {
1572 const struct intel_device_info *devinfo = c->isa->devinfo;
1573 uint16_t uncompacted =
1574 c->subreg_table[elk_compact_inst_subreg_index(devinfo, src)];
1575
1576 elk_inst_set_bits(dst, 100, 96, (uncompacted >> 10));
1577 elk_inst_set_bits(dst, 68, 64, (uncompacted >> 5) & 0x1f);
1578 elk_inst_set_bits(dst, 52, 48, (uncompacted >> 0) & 0x1f);
1579 }
1580
1581 static void
set_uncompacted_src0(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)1582 set_uncompacted_src0(const struct compaction_state *c, elk_inst *dst,
1583 elk_compact_inst *src)
1584 {
1585 const struct intel_device_info *devinfo = c->isa->devinfo;
1586 uint32_t compacted = elk_compact_inst_src0_index(devinfo, src);
1587 uint16_t uncompacted = c->src0_index_table[compacted];
1588
1589 elk_inst_set_bits(dst, 88, 77, uncompacted);
1590 }
1591
1592 static void
set_uncompacted_src1(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)1593 set_uncompacted_src1(const struct compaction_state *c, elk_inst *dst,
1594 elk_compact_inst *src)
1595 {
1596 const struct intel_device_info *devinfo = c->isa->devinfo;
1597 uint16_t uncompacted =
1598 c->src1_index_table[elk_compact_inst_src1_index(devinfo, src)];
1599
1600 elk_inst_set_bits(dst, 120, 109, uncompacted);
1601 }
1602
1603 static void
set_uncompacted_3src_control_index(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)1604 set_uncompacted_3src_control_index(const struct compaction_state *c,
1605 elk_inst *dst, elk_compact_inst *src)
1606 {
1607 const struct intel_device_info *devinfo = c->isa->devinfo;
1608 assert(devinfo->ver >= 8);
1609
1610 uint32_t compacted = elk_compact_inst_3src_control_index(devinfo, src);
1611 uint32_t uncompacted = gfx8_3src_control_index_table[compacted];
1612
1613 elk_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
1614 elk_inst_set_bits(dst, 28, 8, (uncompacted >> 0) & 0x1fffff);
1615
1616 if (devinfo->platform == INTEL_PLATFORM_CHV)
1617 elk_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3);
1618 }
1619
1620 static void
set_uncompacted_3src_source_index(const struct intel_device_info * devinfo,elk_inst * dst,elk_compact_inst * src)1621 set_uncompacted_3src_source_index(const struct intel_device_info *devinfo,
1622 elk_inst *dst, elk_compact_inst *src)
1623 {
1624 assert(devinfo->ver >= 8);
1625
1626 uint32_t compacted = elk_compact_inst_3src_source_index(devinfo, src);
1627 uint64_t uncompacted = gfx8_3src_source_index_table[compacted];
1628
1629 elk_inst_set_bits(dst, 83, 83, (uncompacted >> 43) & 0x1);
1630 elk_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff);
1631 elk_inst_set_bits(dst, 93, 86, (uncompacted >> 27) & 0xff);
1632 elk_inst_set_bits(dst, 72, 65, (uncompacted >> 19) & 0xff);
1633 elk_inst_set_bits(dst, 55, 37, (uncompacted >> 0) & 0x7ffff);
1634
1635 if (devinfo->platform == INTEL_PLATFORM_CHV) {
1636 elk_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3);
1637 elk_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3);
1638 elk_inst_set_bits(dst, 84, 84, (uncompacted >> 44) & 0x1);
1639 } else {
1640 elk_inst_set_bits(dst, 125, 125, (uncompacted >> 45) & 0x1);
1641 elk_inst_set_bits(dst, 104, 104, (uncompacted >> 44) & 0x1);
1642 }
1643 }
1644
1645 static void
elk_uncompact_3src_instruction(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)1646 elk_uncompact_3src_instruction(const struct compaction_state *c,
1647 elk_inst *dst, elk_compact_inst *src)
1648 {
1649 const struct intel_device_info *devinfo = c->isa->devinfo;
1650 assert(devinfo->ver >= 8);
1651
1652 #define uncompact(field) \
1653 elk_inst_set_3src_##field(devinfo, dst, elk_compact_inst_3src_##field(devinfo, src))
1654 #define uncompact_a16(field) \
1655 elk_inst_set_3src_a16_##field(devinfo, dst, elk_compact_inst_3src_##field(devinfo, src))
1656
1657 uncompact(hw_opcode);
1658
1659 set_uncompacted_3src_control_index(c, dst, src);
1660 set_uncompacted_3src_source_index(devinfo, dst, src);
1661
1662 uncompact(dst_reg_nr);
1663 uncompact_a16(src0_rep_ctrl);
1664 uncompact(debug_control);
1665 uncompact(saturate);
1666 uncompact_a16(src1_rep_ctrl);
1667 uncompact_a16(src2_rep_ctrl);
1668 uncompact(src0_reg_nr);
1669 uncompact(src1_reg_nr);
1670 uncompact(src2_reg_nr);
1671 uncompact_a16(src0_subreg_nr);
1672 uncompact_a16(src1_subreg_nr);
1673 uncompact_a16(src2_subreg_nr);
1674
1675 elk_inst_set_3src_cmpt_control(devinfo, dst, false);
1676
1677 #undef uncompact
1678 #undef uncompact_a16
1679 }
1680
1681 static void
uncompact_instruction(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)1682 uncompact_instruction(const struct compaction_state *c, elk_inst *dst,
1683 elk_compact_inst *src)
1684 {
1685 const struct intel_device_info *devinfo = c->isa->devinfo;
1686 memset(dst, 0, sizeof(*dst));
1687
1688 if (devinfo->ver >= 8) {
1689 const enum elk_opcode opcode =
1690 elk_opcode_decode(c->isa, elk_compact_inst_3src_hw_opcode(devinfo, src));
1691 if (elk_is_3src(c->isa, opcode)) {
1692 elk_uncompact_3src_instruction(c, dst, src);
1693 return;
1694 }
1695 }
1696
1697 #define uncompact(field) \
1698 elk_inst_set_##field(devinfo, dst, elk_compact_inst_##field(devinfo, src))
1699 #define uncompact_reg(field) \
1700 elk_inst_set_##field##_da_reg_nr(devinfo, dst, \
1701 elk_compact_inst_##field##_reg_nr(devinfo, src))
1702
1703 uncompact(hw_opcode);
1704 uncompact(debug_control);
1705
1706 set_uncompacted_control(c, dst, src);
1707 set_uncompacted_datatype(c, dst, src);
1708 set_uncompacted_subreg(c, dst, src);
1709 set_uncompacted_src0(c, dst, src);
1710
1711 enum elk_reg_type type;
1712 if (has_immediate(devinfo, dst, &type)) {
1713 unsigned imm = uncompact_immediate(devinfo, type,
1714 elk_compact_inst_imm(devinfo, src));
1715 elk_inst_set_imm_ud(devinfo, dst, imm);
1716 } else {
1717 set_uncompacted_src1(c, dst, src);
1718 uncompact_reg(src1);
1719 }
1720
1721 if (devinfo->ver >= 6) {
1722 uncompact(acc_wr_control);
1723 } else {
1724 uncompact(mask_control_ex);
1725 }
1726
1727 uncompact(cond_modifier);
1728
1729 if (devinfo->ver <= 6)
1730 uncompact(flag_subreg_nr);
1731
1732 uncompact_reg(dst);
1733 uncompact_reg(src0);
1734
1735 elk_inst_set_cmpt_control(devinfo, dst, false);
1736
1737 #undef uncompact
1738 #undef uncompact_reg
1739 }
1740
1741 void
elk_uncompact_instruction(const struct elk_isa_info * isa,elk_inst * dst,elk_compact_inst * src)1742 elk_uncompact_instruction(const struct elk_isa_info *isa,
1743 elk_inst *dst, elk_compact_inst *src)
1744 {
1745 struct compaction_state c;
1746 compaction_state_init(&c, isa);
1747 uncompact_instruction(&c, dst, src);
1748 }
1749
1750 void
elk_debug_compact_uncompact(const struct elk_isa_info * isa,elk_inst * orig,elk_inst * uncompacted)1751 elk_debug_compact_uncompact(const struct elk_isa_info *isa,
1752 elk_inst *orig,
1753 elk_inst *uncompacted)
1754 {
1755 fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
1756 isa->devinfo->ver);
1757
1758 fprintf(stderr, " before: ");
1759 elk_disassemble_inst(stderr, isa, orig, true, 0, NULL);
1760
1761 fprintf(stderr, " after: ");
1762 elk_disassemble_inst(stderr, isa, uncompacted, false, 0, NULL);
1763
1764 uint32_t *before_bits = (uint32_t *)orig;
1765 uint32_t *after_bits = (uint32_t *)uncompacted;
1766 fprintf(stderr, " changed bits:\n");
1767 for (int i = 0; i < 128; i++) {
1768 uint32_t before = before_bits[i / 32] & (1 << (i & 31));
1769 uint32_t after = after_bits[i / 32] & (1 << (i & 31));
1770
1771 if (before != after) {
1772 fprintf(stderr, " bit %d, %s to %s\n", i,
1773 before ? "set" : "unset",
1774 after ? "set" : "unset");
1775 }
1776 }
1777 }
1778
1779 static int
compacted_between(int old_ip,int old_target_ip,int * compacted_counts)1780 compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
1781 {
1782 int this_compacted_count = compacted_counts[old_ip];
1783 int target_compacted_count = compacted_counts[old_target_ip];
1784 return target_compacted_count - this_compacted_count;
1785 }
1786
1787 static void
update_uip_jip(const struct elk_isa_info * isa,elk_inst * insn,int this_old_ip,int * compacted_counts)1788 update_uip_jip(const struct elk_isa_info *isa, elk_inst *insn,
1789 int this_old_ip, int *compacted_counts)
1790 {
1791 const struct intel_device_info *devinfo = isa->devinfo;
1792
1793 /* JIP and UIP are in units of:
1794 * - bytes on Gfx8+; and
1795 * - compacted instructions on Gfx6+.
1796 */
1797 int shift = devinfo->ver >= 8 ? 3 : 0;
1798
1799 /* Even though the values are signed, we don't need the rounding behavior
1800 * of integer division. The shifts are safe.
1801 */
1802 if (devinfo->ver >= 8) {
1803 assert(elk_inst_jip(devinfo, insn) % 8 == 0 &&
1804 elk_inst_uip(devinfo, insn) % 8 == 0);
1805 }
1806
1807 int32_t jip_compacted = elk_inst_jip(devinfo, insn) >> shift;
1808 jip_compacted -= compacted_between(this_old_ip,
1809 this_old_ip + (jip_compacted / 2),
1810 compacted_counts);
1811 elk_inst_set_jip(devinfo, insn, (uint32_t)jip_compacted << shift);
1812
1813 if (elk_inst_opcode(isa, insn) == ELK_OPCODE_ENDIF ||
1814 elk_inst_opcode(isa, insn) == ELK_OPCODE_WHILE ||
1815 (elk_inst_opcode(isa, insn) == ELK_OPCODE_ELSE && devinfo->ver <= 7))
1816 return;
1817
1818 int32_t uip_compacted = elk_inst_uip(devinfo, insn) >> shift;
1819 uip_compacted -= compacted_between(this_old_ip,
1820 this_old_ip + (uip_compacted / 2),
1821 compacted_counts);
1822 elk_inst_set_uip(devinfo, insn, (uint32_t)uip_compacted << shift);
1823 }
1824
1825 static void
update_gfx4_jump_count(const struct intel_device_info * devinfo,elk_inst * insn,int this_old_ip,int * compacted_counts)1826 update_gfx4_jump_count(const struct intel_device_info *devinfo, elk_inst *insn,
1827 int this_old_ip, int *compacted_counts)
1828 {
1829 assert(devinfo->ver == 5 || devinfo->platform == INTEL_PLATFORM_G4X);
1830
1831 /* Jump Count is in units of:
1832 * - uncompacted instructions on G45; and
1833 * - compacted instructions on Gfx5.
1834 */
1835 int shift = devinfo->platform == INTEL_PLATFORM_G4X ? 1 : 0;
1836
1837 int jump_count_compacted = elk_inst_gfx4_jump_count(devinfo, insn) << shift;
1838
1839 int target_old_ip = this_old_ip + (jump_count_compacted / 2);
1840
1841 int this_compacted_count = compacted_counts[this_old_ip];
1842 int target_compacted_count = compacted_counts[target_old_ip];
1843
1844 jump_count_compacted -= (target_compacted_count - this_compacted_count);
1845 elk_inst_set_gfx4_jump_count(devinfo, insn, jump_count_compacted >> shift);
1846 }
1847
1848 static void
compaction_state_init(struct compaction_state * c,const struct elk_isa_info * isa)1849 compaction_state_init(struct compaction_state *c,
1850 const struct elk_isa_info *isa)
1851 {
1852 const struct intel_device_info *devinfo = isa->devinfo;
1853
1854 assert(g45_control_index_table[ARRAY_SIZE(g45_control_index_table) - 1] != 0);
1855 assert(g45_datatype_table[ARRAY_SIZE(g45_datatype_table) - 1] != 0);
1856 assert(g45_subreg_table[ARRAY_SIZE(g45_subreg_table) - 1] != 0);
1857 assert(g45_src_index_table[ARRAY_SIZE(g45_src_index_table) - 1] != 0);
1858 assert(gfx6_control_index_table[ARRAY_SIZE(gfx6_control_index_table) - 1] != 0);
1859 assert(gfx6_datatype_table[ARRAY_SIZE(gfx6_datatype_table) - 1] != 0);
1860 assert(gfx6_subreg_table[ARRAY_SIZE(gfx6_subreg_table) - 1] != 0);
1861 assert(gfx6_src_index_table[ARRAY_SIZE(gfx6_src_index_table) - 1] != 0);
1862 assert(gfx7_control_index_table[ARRAY_SIZE(gfx7_control_index_table) - 1] != 0);
1863 assert(gfx7_datatype_table[ARRAY_SIZE(gfx7_datatype_table) - 1] != 0);
1864 assert(gfx7_subreg_table[ARRAY_SIZE(gfx7_subreg_table) - 1] != 0);
1865 assert(gfx7_src_index_table[ARRAY_SIZE(gfx7_src_index_table) - 1] != 0);
1866 assert(gfx8_control_index_table[ARRAY_SIZE(gfx8_control_index_table) - 1] != 0);
1867 assert(gfx8_datatype_table[ARRAY_SIZE(gfx8_datatype_table) - 1] != 0);
1868 assert(gfx8_subreg_table[ARRAY_SIZE(gfx8_subreg_table) - 1] != 0);
1869 assert(gfx8_src_index_table[ARRAY_SIZE(gfx8_src_index_table) - 1] != 0);
1870 assert(gfx11_datatype_table[ARRAY_SIZE(gfx11_datatype_table) - 1] != 0);
1871 assert(gfx12_control_index_table[ARRAY_SIZE(gfx12_control_index_table) - 1] != 0);
1872 assert(gfx12_datatype_table[ARRAY_SIZE(gfx12_datatype_table) - 1] != 0);
1873 assert(gfx12_subreg_table[ARRAY_SIZE(gfx12_subreg_table) - 1] != 0);
1874 assert(gfx12_src0_index_table[ARRAY_SIZE(gfx12_src0_index_table) - 1] != 0);
1875 assert(gfx12_src1_index_table[ARRAY_SIZE(gfx12_src1_index_table) - 1] != 0);
1876 assert(xehp_src0_index_table[ARRAY_SIZE(xehp_src0_index_table) - 1] != 0);
1877 assert(xehp_src1_index_table[ARRAY_SIZE(xehp_src1_index_table) - 1] != 0);
1878 assert(xe2_control_index_table[ARRAY_SIZE(xe2_control_index_table) - 1] != 0);
1879 assert(xe2_datatype_table[ARRAY_SIZE(xe2_datatype_table) - 1] != 0);
1880 assert(xe2_subreg_table[ARRAY_SIZE(xe2_subreg_table) - 1] != 0);
1881 assert(xe2_src0_index_table[ARRAY_SIZE(xe2_src0_index_table) - 1] != 0);
1882 assert(xe2_src1_index_table[ARRAY_SIZE(xe2_src1_index_table) - 1] != 0);
1883
1884 c->isa = isa;
1885 switch (devinfo->ver) {
1886 case 8:
1887 c->control_index_table = gfx8_control_index_table;
1888 c->datatype_table = gfx8_datatype_table;
1889 c->subreg_table = gfx8_subreg_table;
1890 c->src0_index_table = gfx8_src_index_table;
1891 c->src1_index_table = gfx8_src_index_table;
1892 break;
1893 case 7:
1894 c->control_index_table = gfx7_control_index_table;
1895 c->datatype_table = gfx7_datatype_table;
1896 c->subreg_table = gfx7_subreg_table;
1897 c->src0_index_table = gfx7_src_index_table;
1898 c->src1_index_table = gfx7_src_index_table;
1899 break;
1900 case 6:
1901 c->control_index_table = gfx6_control_index_table;
1902 c->datatype_table = gfx6_datatype_table;
1903 c->subreg_table = gfx6_subreg_table;
1904 c->src0_index_table = gfx6_src_index_table;
1905 c->src1_index_table = gfx6_src_index_table;
1906 break;
1907 case 5:
1908 case 4:
1909 c->control_index_table = g45_control_index_table;
1910 c->datatype_table = g45_datatype_table;
1911 c->subreg_table = g45_subreg_table;
1912 c->src0_index_table = g45_src_index_table;
1913 c->src1_index_table = g45_src_index_table;
1914 break;
1915 default:
1916 unreachable("unknown generation");
1917 }
1918 }
1919
1920 void
elk_compact_instructions(struct elk_codegen * p,int start_offset,struct elk_disasm_info * disasm)1921 elk_compact_instructions(struct elk_codegen *p, int start_offset,
1922 struct elk_disasm_info *disasm)
1923 {
1924 if (INTEL_DEBUG(DEBUG_NO_COMPACTION))
1925 return;
1926
1927 const struct intel_device_info *devinfo = p->devinfo;
1928 if (devinfo->ver == 4 && devinfo->platform != INTEL_PLATFORM_G4X)
1929 return;
1930
1931 void *store = p->store + start_offset / 16;
1932 /* For an instruction at byte offset 16*i before compaction, this is the
1933 * number of compacted instructions minus the number of padding NOP/NENOPs
1934 * that preceded it.
1935 */
1936 unsigned num_compacted_counts =
1937 (p->next_insn_offset - start_offset) / sizeof(elk_inst);
1938 int *compacted_counts =
1939 calloc(1, sizeof(*compacted_counts) * num_compacted_counts);
1940
1941 /* For an instruction at byte offset 8*i after compaction, this was its IP
1942 * (in 16-byte units) before compaction.
1943 */
1944 unsigned num_old_ip =
1945 (p->next_insn_offset - start_offset) / sizeof(elk_compact_inst) + 1;
1946 int *old_ip = calloc(1, sizeof(*old_ip) * num_old_ip);
1947
1948 struct compaction_state c;
1949 compaction_state_init(&c, p->isa);
1950
1951 int offset = 0;
1952 int compacted_count = 0;
1953 for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset;
1954 src_offset += sizeof(elk_inst)) {
1955 elk_inst *src = store + src_offset;
1956 void *dst = store + offset;
1957
1958 old_ip[offset / sizeof(elk_compact_inst)] = src_offset / sizeof(elk_inst);
1959 compacted_counts[src_offset / sizeof(elk_inst)] = compacted_count;
1960
1961 elk_inst inst = precompact(p->isa, *src);
1962 elk_inst saved = inst;
1963
1964 if (try_compact_instruction(&c, dst, &inst)) {
1965 compacted_count++;
1966
1967 if (INTEL_DEBUG(DEBUG_VS | DEBUG_GS | DEBUG_TCS |
1968 DEBUG_WM | DEBUG_CS | DEBUG_TES)) {
1969 elk_inst uncompacted;
1970 uncompact_instruction(&c, &uncompacted, dst);
1971 if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
1972 elk_debug_compact_uncompact(p->isa, &saved, &uncompacted);
1973 }
1974 }
1975
1976 offset += sizeof(elk_compact_inst);
1977 } else {
1978 /* All uncompacted instructions need to be aligned on G45. */
1979 if ((offset & sizeof(elk_compact_inst)) != 0 &&
1980 devinfo->platform == INTEL_PLATFORM_G4X) {
1981 elk_compact_inst *align = store + offset;
1982 memset(align, 0, sizeof(*align));
1983 elk_compact_inst_set_hw_opcode(
1984 devinfo, align, elk_opcode_encode(p->isa, ELK_OPCODE_NENOP));
1985 elk_compact_inst_set_cmpt_control(devinfo, align, true);
1986 offset += sizeof(elk_compact_inst);
1987 compacted_count--;
1988 compacted_counts[src_offset / sizeof(elk_inst)] = compacted_count;
1989 old_ip[offset / sizeof(elk_compact_inst)] = src_offset / sizeof(elk_inst);
1990
1991 dst = store + offset;
1992 }
1993
1994 /* If we didn't compact this instruction, we need to move it down into
1995 * place.
1996 */
1997 if (offset != src_offset) {
1998 memmove(dst, src, sizeof(elk_inst));
1999 }
2000 offset += sizeof(elk_inst);
2001 }
2002 }
2003
2004 /* Add an entry for the ending offset of the program. This greatly
2005 * simplifies the linked list walk at the end of the function.
2006 */
2007 old_ip[offset / sizeof(elk_compact_inst)] =
2008 (p->next_insn_offset - start_offset) / sizeof(elk_inst);
2009
2010 /* Fix up control flow offsets. */
2011 p->next_insn_offset = start_offset + offset;
2012 for (offset = 0; offset < p->next_insn_offset - start_offset;
2013 offset = next_offset(devinfo, store, offset)) {
2014 elk_inst *insn = store + offset;
2015 int this_old_ip = old_ip[offset / sizeof(elk_compact_inst)];
2016 int this_compacted_count = compacted_counts[this_old_ip];
2017
2018 switch (elk_inst_opcode(p->isa, insn)) {
2019 case ELK_OPCODE_BREAK:
2020 case ELK_OPCODE_CONTINUE:
2021 case ELK_OPCODE_HALT:
2022 if (devinfo->ver >= 6) {
2023 update_uip_jip(p->isa, insn, this_old_ip, compacted_counts);
2024 } else {
2025 update_gfx4_jump_count(devinfo, insn, this_old_ip,
2026 compacted_counts);
2027 }
2028 break;
2029
2030 case ELK_OPCODE_IF:
2031 case ELK_OPCODE_IFF:
2032 case ELK_OPCODE_ELSE:
2033 case ELK_OPCODE_ENDIF:
2034 case ELK_OPCODE_WHILE:
2035 if (devinfo->ver >= 7) {
2036 if (elk_inst_cmpt_control(devinfo, insn)) {
2037 elk_inst uncompacted;
2038 uncompact_instruction(&c, &uncompacted,
2039 (elk_compact_inst *)insn);
2040
2041 update_uip_jip(p->isa, &uncompacted, this_old_ip,
2042 compacted_counts);
2043
2044 bool ret = try_compact_instruction(&c, (elk_compact_inst *)insn,
2045 &uncompacted);
2046 assert(ret); (void)ret;
2047 } else {
2048 update_uip_jip(p->isa, insn, this_old_ip, compacted_counts);
2049 }
2050 } else if (devinfo->ver == 6) {
2051 assert(!elk_inst_cmpt_control(devinfo, insn));
2052
2053 /* Jump Count is in units of compacted instructions on Gfx6. */
2054 int jump_count_compacted = elk_inst_gfx6_jump_count(devinfo, insn);
2055
2056 int target_old_ip = this_old_ip + (jump_count_compacted / 2);
2057 int target_compacted_count = compacted_counts[target_old_ip];
2058 jump_count_compacted -= (target_compacted_count - this_compacted_count);
2059 elk_inst_set_gfx6_jump_count(devinfo, insn, jump_count_compacted);
2060 } else {
2061 update_gfx4_jump_count(devinfo, insn, this_old_ip,
2062 compacted_counts);
2063 }
2064 break;
2065
2066 case ELK_OPCODE_ADD:
2067 /* Add instructions modifying the IP register use an immediate src1,
2068 * and Gens that use this cannot compact instructions with immediate
2069 * operands.
2070 */
2071 if (elk_inst_cmpt_control(devinfo, insn))
2072 break;
2073
2074 if (elk_inst_dst_reg_file(devinfo, insn) == ELK_ARCHITECTURE_REGISTER_FILE &&
2075 elk_inst_dst_da_reg_nr(devinfo, insn) == ELK_ARF_IP) {
2076 assert(elk_inst_src1_reg_file(devinfo, insn) == ELK_IMMEDIATE_VALUE);
2077
2078 int shift = 3;
2079 int jump_compacted = elk_inst_imm_d(devinfo, insn) >> shift;
2080
2081 int target_old_ip = this_old_ip + (jump_compacted / 2);
2082 int target_compacted_count = compacted_counts[target_old_ip];
2083 jump_compacted -= (target_compacted_count - this_compacted_count);
2084 elk_inst_set_imm_ud(devinfo, insn, jump_compacted << shift);
2085 }
2086 break;
2087
2088 default:
2089 break;
2090 }
2091 }
2092
2093 /* p->nr_insn is counting the number of uncompacted instructions still, so
2094 * divide. We do want to be sure there's a valid instruction in any
2095 * alignment padding, so that the next compression pass (for the FS 8/16
2096 * compile passes) parses correctly.
2097 */
2098 if (p->next_insn_offset & sizeof(elk_compact_inst)) {
2099 elk_compact_inst *align = store + offset;
2100 memset(align, 0, sizeof(*align));
2101 elk_compact_inst_set_hw_opcode(
2102 devinfo, align, elk_opcode_encode(p->isa, ELK_OPCODE_NOP));
2103 elk_compact_inst_set_cmpt_control(devinfo, align, true);
2104 p->next_insn_offset += sizeof(elk_compact_inst);
2105 }
2106 p->nr_insn = p->next_insn_offset / sizeof(elk_inst);
2107
2108 for (int i = 0; i < p->num_relocs; i++) {
2109 if (p->relocs[i].offset < (uint32_t)start_offset)
2110 continue;
2111
2112 assert(p->relocs[i].offset % 16 == 0);
2113 unsigned idx = (p->relocs[i].offset - start_offset) / 16;
2114 p->relocs[i].offset -= compacted_counts[idx] * 8;
2115 }
2116
2117 /* Update the instruction offsets for each group. */
2118 if (disasm) {
2119 int offset = 0;
2120
2121 foreach_list_typed(struct inst_group, group, link, &disasm->group_list) {
2122 while (start_offset + old_ip[offset / sizeof(elk_compact_inst)] *
2123 sizeof(elk_inst) != group->offset) {
2124 assert(start_offset + old_ip[offset / sizeof(elk_compact_inst)] *
2125 sizeof(elk_inst) < group->offset);
2126 offset = next_offset(devinfo, store, offset);
2127 }
2128
2129 group->offset = start_offset + offset;
2130
2131 offset = next_offset(devinfo, store, offset);
2132 }
2133 }
2134
2135 free(compacted_counts);
2136 free(old_ip);
2137 }
2138