xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/brw_reg.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics to
4  develop this 3D driver.
5 
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13 
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17 
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <[email protected]>
30   */
31 
32 /** @file
33  *
34  * This file defines struct brw_reg, which is our representation for EU
35  * registers.  They're not a hardware specific format, just an abstraction
36  * that intends to capture the full flexibility of the hardware registers.
37  *
38  * The brw_eu_emit.c layer's brw_set_dest/brw_set_src[01] functions encode
39  * the abstract brw_reg type into the actual hardware instruction encoding.
40  */
41 
42 #ifndef BRW_REG_H
43 #define BRW_REG_H
44 
45 #include <stdbool.h>
46 #include "util/compiler.h"
47 #include "util/glheader.h"
48 #include "util/macros.h"
49 #include "util/rounding.h"
50 #include "util/u_math.h"
51 #include "brw_eu_defines.h"
52 #include "brw_reg_type.h"
53 
54 #ifdef __cplusplus
55 extern "C" {
56 #endif
57 
58 struct intel_device_info;
59 
60 /** Size of general purpose register space in REG_SIZE units */
61 #define BRW_MAX_GRF 128
62 #define XE2_MAX_GRF 256
63 
64 /**
65  * BRW hardware swizzles.
66  * Only defines XYZW to ensure it can be contained in 2 bits
67  */
68 #define BRW_SWIZZLE_X 0
69 #define BRW_SWIZZLE_Y 1
70 #define BRW_SWIZZLE_Z 2
71 #define BRW_SWIZZLE_W 3
72 
73 #define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
74 #define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
75 
76 #define BRW_SWIZZLE_NOOP      BRW_SWIZZLE4(0,1,2,3)
77 #define BRW_SWIZZLE_XYZW      BRW_SWIZZLE4(0,1,2,3)
78 #define BRW_SWIZZLE_XXXX      BRW_SWIZZLE4(0,0,0,0)
79 #define BRW_SWIZZLE_YYYY      BRW_SWIZZLE4(1,1,1,1)
80 #define BRW_SWIZZLE_ZZZZ      BRW_SWIZZLE4(2,2,2,2)
81 #define BRW_SWIZZLE_WWWW      BRW_SWIZZLE4(3,3,3,3)
82 #define BRW_SWIZZLE_XYXY      BRW_SWIZZLE4(0,1,0,1)
83 #define BRW_SWIZZLE_YXYX      BRW_SWIZZLE4(1,0,1,0)
84 #define BRW_SWIZZLE_XZXZ      BRW_SWIZZLE4(0,2,0,2)
85 #define BRW_SWIZZLE_YZXW      BRW_SWIZZLE4(1,2,0,3)
86 #define BRW_SWIZZLE_YWYW      BRW_SWIZZLE4(1,3,1,3)
87 #define BRW_SWIZZLE_ZXYW      BRW_SWIZZLE4(2,0,1,3)
88 #define BRW_SWIZZLE_ZWZW      BRW_SWIZZLE4(2,3,2,3)
89 #define BRW_SWIZZLE_WZWZ      BRW_SWIZZLE4(3,2,3,2)
90 #define BRW_SWIZZLE_WZYX      BRW_SWIZZLE4(3,2,1,0)
91 #define BRW_SWIZZLE_XXZZ      BRW_SWIZZLE4(0,0,2,2)
92 #define BRW_SWIZZLE_YYWW      BRW_SWIZZLE4(1,1,3,3)
93 #define BRW_SWIZZLE_YXWZ      BRW_SWIZZLE4(1,0,3,2)
94 
95 #define BRW_SWZ_COMP_INPUT(comp) (BRW_SWIZZLE_XYZW >> ((comp)*2))
96 #define BRW_SWZ_COMP_OUTPUT(comp) (BRW_SWIZZLE_XYZW << ((comp)*2))
97 
98 static inline bool
brw_is_single_value_swizzle(unsigned swiz)99 brw_is_single_value_swizzle(unsigned swiz)
100 {
101    return (swiz == BRW_SWIZZLE_XXXX ||
102            swiz == BRW_SWIZZLE_YYYY ||
103            swiz == BRW_SWIZZLE_ZZZZ ||
104            swiz == BRW_SWIZZLE_WWWW);
105 }
106 
107 /**
108  * Compute the swizzle obtained from the application of \p swz0 on the result
109  * of \p swz1.  The argument ordering is expected to match function
110  * composition.
111  */
112 static inline unsigned
brw_compose_swizzle(unsigned swz0,unsigned swz1)113 brw_compose_swizzle(unsigned swz0, unsigned swz1)
114 {
115    return BRW_SWIZZLE4(
116       BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 0)),
117       BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 1)),
118       BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 2)),
119       BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 3)));
120 }
121 
122 /**
123  * Construct an identity swizzle for the set of enabled channels given by \p
124  * mask.  The result will only reference channels enabled in the provided \p
125  * mask, assuming that \p mask is non-zero.  The constructed swizzle will
126  * satisfy the property that for any instruction OP and any mask:
127  *
128  *    brw_OP(p, brw_writemask(dst, mask),
129  *           brw_swizzle(src, brw_swizzle_for_mask(mask)));
130  *
131  * will be equivalent to the same instruction without swizzle:
132  *
133  *    brw_OP(p, brw_writemask(dst, mask), src);
134  */
135 static inline unsigned
brw_swizzle_for_mask(unsigned mask)136 brw_swizzle_for_mask(unsigned mask)
137 {
138    unsigned last = (mask ? ffs(mask) - 1 : 0);
139    unsigned swz[4];
140 
141    for (unsigned i = 0; i < 4; i++)
142       last = swz[i] = (mask & (1 << i) ? i : last);
143 
144    return BRW_SWIZZLE4(swz[0], swz[1], swz[2], swz[3]);
145 }
146 
147 uint32_t brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz);
148 
149 #define REG_SIZE (8*4)
150 
151 /* These aren't hardware structs, just something useful for us to pass around:
152  *
153  * Align1 operation has a lot of control over input ranges.  Used in
154  * WM programs to implement shaders decomposed into "channel serial"
155  * or "structure of array" form:
156  */
157 typedef struct brw_reg {
158    union {
159       struct {
160          enum brw_reg_type type:5;
161          enum brw_reg_file file:3;
162          unsigned negate:1;             /* source only */
163          unsigned abs:1;                /* source only */
164          unsigned address_mode:1;       /* relative addressing, hopefully! */
165          unsigned pad0:16;
166          unsigned subnr:5;              /* :1 in align16 */
167       };
168       uint32_t bits;
169    };
170 
171    /** Offset from the start of the virtual register in bytes. */
172    uint16_t offset;
173 
174    /** Register region horizontal stride of virtual registers */
175    uint8_t stride;
176 
177    union {
178       struct {
179          unsigned nr;
180          unsigned swizzle:8;      /* src only, align16 only */
181          unsigned writemask:4;    /* dest only, align16 only */
182          int  indirect_offset:10; /* relative addressing offset */
183          unsigned vstride:4;      /* source only */
184          unsigned width:3;        /* src only, align1 only */
185          unsigned hstride:2;      /* align1 only */
186          unsigned pad1:1;
187       };
188 
189       double df;
190       uint64_t u64;
191       int64_t d64;
192       float f;
193       int   d;
194       unsigned ud;
195    };
196 
197 #ifdef __cplusplus
198    /* TODO: Remove this constructor to make this type a POD.  Need
199     * to make sure that rest of compiler doesn't rely on type or
200     * stride of BAD_FILE registers.
201     */
brw_regbrw_reg202    brw_reg() {
203       memset((void*)this, 0, sizeof(*this));
204       this->type = BRW_TYPE_UD;
205       this->stride = 1;
206       this->file = BAD_FILE;
207    }
208 
209    bool equals(const brw_reg &r) const;
210    bool negative_equals(const brw_reg &r) const;
211    bool is_contiguous() const;
212 
213    bool is_zero() const;
214    bool is_one() const;
215    bool is_negative_one() const;
216    bool is_null() const;
217    bool is_accumulator() const;
218 
219    /**
220     * Return the size in bytes of a single logical component of the
221     * register assuming the given execution width.
222     */
223    unsigned component_size(unsigned width) const;
224 #endif /* __cplusplus */
225 } brw_reg;
226 
227 static inline unsigned
phys_nr(const struct intel_device_info * devinfo,const struct brw_reg reg)228 phys_nr(const struct intel_device_info *devinfo, const struct brw_reg reg)
229 {
230    if (devinfo->ver >= 20) {
231       if (reg.file == FIXED_GRF)
232          return reg.nr / 2;
233       else if (reg.file == ARF &&
234                reg.nr >= BRW_ARF_ACCUMULATOR &&
235                reg.nr < BRW_ARF_FLAG)
236          return BRW_ARF_ACCUMULATOR + (reg.nr - BRW_ARF_ACCUMULATOR) / 2;
237       else
238          return reg.nr;
239    } else {
240       return reg.nr;
241    }
242 }
243 
244 static inline unsigned
phys_subnr(const struct intel_device_info * devinfo,const struct brw_reg reg)245 phys_subnr(const struct intel_device_info *devinfo, const struct brw_reg reg)
246 {
247    if (devinfo->ver >= 20) {
248       if (reg.file == FIXED_GRF ||
249           (reg.file == ARF &&
250            reg.nr >= BRW_ARF_ACCUMULATOR &&
251            reg.nr < BRW_ARF_FLAG))
252          return (reg.nr & 1) * REG_SIZE + reg.subnr;
253       else
254          return reg.subnr;
255    } else {
256       return reg.subnr;
257    }
258 }
259 
260 static inline bool
brw_regs_equal(const struct brw_reg * a,const struct brw_reg * b)261 brw_regs_equal(const struct brw_reg *a, const struct brw_reg *b)
262 {
263    return a->bits   == b->bits &&
264           a->u64    == b->u64 &&
265           a->offset == b->offset &&
266           a->stride == b->stride;
267 }
268 
269 static inline bool
brw_regs_negative_equal(const struct brw_reg * a,const struct brw_reg * b)270 brw_regs_negative_equal(const struct brw_reg *a, const struct brw_reg *b)
271 {
272    if (a->file == IMM) {
273       if (a->bits != b->bits)
274          return false;
275 
276       switch ((enum brw_reg_type) a->type) {
277       case BRW_TYPE_UQ:
278       case BRW_TYPE_Q:
279          return a->d64 == -b->d64;
280       case BRW_TYPE_DF:
281          return a->df == -b->df;
282       case BRW_TYPE_UD:
283       case BRW_TYPE_D:
284          return a->d == -b->d;
285       case BRW_TYPE_F:
286          return a->f == -b->f;
287       case BRW_TYPE_VF:
288          /* It is tempting to treat 0 as a negation of 0 (and -0 as a negation
289           * of -0).  There are occasions where 0 or -0 is used and the exact
290           * bit pattern is desired.  At the very least, changing this to allow
291           * 0 as a negation of 0 causes some fp64 tests to fail on IVB.
292           */
293          return a->ud == (b->ud ^ 0x80808080);
294       case BRW_TYPE_UW:
295       case BRW_TYPE_W:
296       case BRW_TYPE_UV:
297       case BRW_TYPE_V:
298       case BRW_TYPE_HF:
299          /* FINISHME: Implement support for these types once there is
300           * something in the compiler that can generate them.  Until then,
301           * they cannot be tested.
302           */
303          return false;
304       case BRW_TYPE_UB:
305       case BRW_TYPE_B:
306       default:
307          unreachable("not reached");
308       }
309    } else {
310       struct brw_reg tmp = *a;
311 
312       tmp.negate = !tmp.negate;
313 
314       return brw_regs_equal(&tmp, b);
315    }
316 }
317 
318 static inline enum brw_reg_type
get_exec_type(const enum brw_reg_type type)319 get_exec_type(const enum brw_reg_type type)
320 {
321    switch (type) {
322    case BRW_TYPE_B:
323    case BRW_TYPE_V:
324       return BRW_TYPE_W;
325    case BRW_TYPE_UB:
326    case BRW_TYPE_UV:
327       return BRW_TYPE_UW;
328    case BRW_TYPE_VF:
329       return BRW_TYPE_F;
330    default:
331       return type;
332    }
333 }
334 
335 /**
336  * Return an integer type of the requested size and signedness.
337  */
338 static inline enum brw_reg_type
brw_int_type(unsigned sz,bool is_signed)339 brw_int_type(unsigned sz, bool is_signed)
340 {
341    switch (sz) {
342    case 1:
343       return (is_signed ? BRW_TYPE_B : BRW_TYPE_UB);
344    case 2:
345       return (is_signed ? BRW_TYPE_W : BRW_TYPE_UW);
346    case 4:
347       return (is_signed ? BRW_TYPE_D : BRW_TYPE_UD);
348    case 8:
349       return (is_signed ? BRW_TYPE_Q : BRW_TYPE_UQ);
350    default:
351       unreachable("Not reached.");
352    }
353 }
354 
355 /**
356  * Construct a brw_reg.
357  * \param file      one of the BRW_x_REGISTER_FILE values
358  * \param nr        register number/index
359  * \param subnr     register sub number
360  * \param negate    register negate modifier
361  * \param abs       register abs modifier
362  * \param type      one of BRW_TYPE_x
363  * \param vstride   one of BRW_VERTICAL_STRIDE_x
364  * \param width     one of BRW_WIDTH_x
365  * \param hstride   one of BRW_HORIZONTAL_STRIDE_x
366  * \param swizzle   one of BRW_SWIZZLE_x
367  * \param writemask WRITEMASK_X/Y/Z/W bitfield
368  */
369 static inline struct brw_reg
brw_make_reg(enum brw_reg_file file,unsigned nr,unsigned subnr,unsigned negate,unsigned abs,enum brw_reg_type type,unsigned vstride,unsigned width,unsigned hstride,unsigned swizzle,unsigned writemask)370 brw_make_reg(enum brw_reg_file file,
371              unsigned nr,
372              unsigned subnr,
373              unsigned negate,
374              unsigned abs,
375              enum brw_reg_type type,
376              unsigned vstride,
377              unsigned width,
378              unsigned hstride,
379              unsigned swizzle,
380              unsigned writemask)
381 {
382    struct brw_reg reg;
383    if (file == FIXED_GRF)
384       assert(nr < XE2_MAX_GRF);
385    else if (file == ARF)
386       assert(nr <= BRW_ARF_TIMESTAMP);
387 
388    reg.type = type;
389    reg.file = file;
390    reg.negate = negate;
391    reg.abs = abs;
392    reg.address_mode = BRW_ADDRESS_DIRECT;
393    reg.pad0 = 0;
394    reg.subnr = subnr * brw_type_size_bytes(type);
395    reg.nr = nr;
396 
397    /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
398     * set swizzle and writemask to W, as the lower bits of subnr will
399     * be lost when converted to align16.  This is probably too much to
400     * keep track of as you'd want it adjusted by suboffset(), etc.
401     * Perhaps fix up when converting to align16?
402     */
403    reg.swizzle = swizzle;
404    reg.writemask = writemask;
405    reg.indirect_offset = 0;
406    reg.vstride = vstride;
407    reg.width = width;
408    reg.hstride = hstride;
409    reg.pad1 = 0;
410 
411    reg.offset = 0;
412    reg.stride = 1;
413    if (file == IMM &&
414        type != BRW_TYPE_V &&
415        type != BRW_TYPE_UV &&
416        type != BRW_TYPE_VF) {
417       reg.stride = 0;
418    }
419 
420    return reg;
421 }
422 
423 /** Construct float[16] register */
424 static inline struct brw_reg
brw_vec16_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)425 brw_vec16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
426 {
427    return brw_make_reg(file,
428                   nr,
429                   subnr,
430                   0,
431                   0,
432                   BRW_TYPE_F,
433                   BRW_VERTICAL_STRIDE_16,
434                   BRW_WIDTH_16,
435                   BRW_HORIZONTAL_STRIDE_1,
436                   BRW_SWIZZLE_XYZW,
437                   WRITEMASK_XYZW);
438 }
439 
440 /** Construct float[8] register */
441 static inline struct brw_reg
brw_vec8_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)442 brw_vec8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
443 {
444    return brw_make_reg(file,
445                   nr,
446                   subnr,
447                   0,
448                   0,
449                   BRW_TYPE_F,
450                   BRW_VERTICAL_STRIDE_8,
451                   BRW_WIDTH_8,
452                   BRW_HORIZONTAL_STRIDE_1,
453                   BRW_SWIZZLE_XYZW,
454                   WRITEMASK_XYZW);
455 }
456 
457 /** Construct float[4] register */
458 static inline struct brw_reg
brw_vec4_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)459 brw_vec4_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
460 {
461    return brw_make_reg(file,
462                   nr,
463                   subnr,
464                   0,
465                   0,
466                   BRW_TYPE_F,
467                   BRW_VERTICAL_STRIDE_4,
468                   BRW_WIDTH_4,
469                   BRW_HORIZONTAL_STRIDE_1,
470                   BRW_SWIZZLE_XYZW,
471                   WRITEMASK_XYZW);
472 }
473 
474 /** Construct float[2] register */
475 static inline struct brw_reg
brw_vec2_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)476 brw_vec2_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
477 {
478    return brw_make_reg(file,
479                   nr,
480                   subnr,
481                   0,
482                   0,
483                   BRW_TYPE_F,
484                   BRW_VERTICAL_STRIDE_2,
485                   BRW_WIDTH_2,
486                   BRW_HORIZONTAL_STRIDE_1,
487                   BRW_SWIZZLE_XYXY,
488                   WRITEMASK_XY);
489 }
490 
491 /** Construct float[1] register */
492 static inline struct brw_reg
brw_vec1_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)493 brw_vec1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
494 {
495    return brw_make_reg(file,
496                   nr,
497                   subnr,
498                   0,
499                   0,
500                   BRW_TYPE_F,
501                   BRW_VERTICAL_STRIDE_0,
502                   BRW_WIDTH_1,
503                   BRW_HORIZONTAL_STRIDE_0,
504                   BRW_SWIZZLE_XXXX,
505                   WRITEMASK_X);
506 }
507 
508 static inline struct brw_reg
brw_vecn_reg(unsigned width,enum brw_reg_file file,unsigned nr,unsigned subnr)509 brw_vecn_reg(unsigned width, enum brw_reg_file file,
510              unsigned nr, unsigned subnr)
511 {
512    switch (width) {
513    case 1:
514       return brw_vec1_reg(file, nr, subnr);
515    case 2:
516       return brw_vec2_reg(file, nr, subnr);
517    case 4:
518       return brw_vec4_reg(file, nr, subnr);
519    case 8:
520       return brw_vec8_reg(file, nr, subnr);
521    case 16:
522       return brw_vec16_reg(file, nr, subnr);
523    default:
524       unreachable("Invalid register width");
525    }
526 }
527 
528 static inline struct brw_reg
retype(struct brw_reg reg,enum brw_reg_type type)529 retype(struct brw_reg reg, enum brw_reg_type type)
530 {
531    reg.type = type;
532    return reg;
533 }
534 
535 static inline struct brw_reg
firsthalf(struct brw_reg reg)536 firsthalf(struct brw_reg reg)
537 {
538    return reg;
539 }
540 
541 static inline struct brw_reg
sechalf(struct brw_reg reg)542 sechalf(struct brw_reg reg)
543 {
544    if (reg.vstride)
545       reg.nr++;
546    return reg;
547 }
548 
549 static inline struct brw_reg
offset(struct brw_reg reg,unsigned delta)550 offset(struct brw_reg reg, unsigned delta)
551 {
552    reg.nr += delta;
553    return reg;
554 }
555 
556 
557 static inline struct brw_reg
byte_offset(struct brw_reg reg,unsigned bytes)558 byte_offset(struct brw_reg reg, unsigned bytes)
559 {
560    switch (reg.file) {
561    case BAD_FILE:
562       break;
563    case VGRF:
564    case ATTR:
565    case UNIFORM:
566       reg.offset += bytes;
567       break;
568    case ARF:
569    case FIXED_GRF: {
570       const unsigned suboffset = reg.subnr + bytes;
571       reg.nr += suboffset / REG_SIZE;
572       reg.subnr = suboffset % REG_SIZE;
573       break;
574    }
575    case IMM:
576    default:
577       assert(bytes == 0);
578    }
579    return reg;
580 }
581 
582 static inline struct brw_reg
suboffset(struct brw_reg reg,unsigned delta)583 suboffset(struct brw_reg reg, unsigned delta)
584 {
585    return byte_offset(reg, delta * brw_type_size_bytes(reg.type));
586 }
587 
588 /** Construct unsigned word[16] register */
589 static inline struct brw_reg
brw_uw16_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)590 brw_uw16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
591 {
592    return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_TYPE_UW), subnr);
593 }
594 
595 /** Construct unsigned word[8] register */
596 static inline struct brw_reg
brw_uw8_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)597 brw_uw8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
598 {
599    return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_TYPE_UW), subnr);
600 }
601 
602 /** Construct unsigned word[1] register */
603 static inline struct brw_reg
brw_uw1_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)604 brw_uw1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
605 {
606    return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_TYPE_UW), subnr);
607 }
608 
609 static inline struct brw_reg
brw_ud8_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)610 brw_ud8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
611 {
612    return retype(brw_vec8_reg(file, nr, subnr), BRW_TYPE_UD);
613 }
614 
615 static inline struct brw_reg
brw_ud1_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)616 brw_ud1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
617 {
618    return retype(brw_vec1_reg(file, nr, subnr), BRW_TYPE_UD);
619 }
620 
621 static inline struct brw_reg
brw_imm_reg(enum brw_reg_type type)622 brw_imm_reg(enum brw_reg_type type)
623 {
624    return brw_make_reg(IMM,
625                   0,
626                   0,
627                   0,
628                   0,
629                   type,
630                   BRW_VERTICAL_STRIDE_0,
631                   BRW_WIDTH_1,
632                   BRW_HORIZONTAL_STRIDE_0,
633                   0,
634                   0);
635 }
636 
637 /** Construct float immediate register */
638 static inline struct brw_reg
brw_imm_df(double df)639 brw_imm_df(double df)
640 {
641    struct brw_reg imm = brw_imm_reg(BRW_TYPE_DF);
642    imm.df = df;
643    return imm;
644 }
645 
646 static inline struct brw_reg
brw_imm_u64(uint64_t u64)647 brw_imm_u64(uint64_t u64)
648 {
649    struct brw_reg imm = brw_imm_reg(BRW_TYPE_UQ);
650    imm.u64 = u64;
651    return imm;
652 }
653 
654 static inline struct brw_reg
brw_imm_f(float f)655 brw_imm_f(float f)
656 {
657    struct brw_reg imm = brw_imm_reg(BRW_TYPE_F);
658    imm.f = f;
659    return imm;
660 }
661 
662 /** Construct int64_t immediate register */
663 static inline struct brw_reg
brw_imm_q(int64_t q)664 brw_imm_q(int64_t q)
665 {
666    struct brw_reg imm = brw_imm_reg(BRW_TYPE_Q);
667    imm.d64 = q;
668    return imm;
669 }
670 
671 /** Construct int64_t immediate register */
672 static inline struct brw_reg
brw_imm_uq(uint64_t uq)673 brw_imm_uq(uint64_t uq)
674 {
675    struct brw_reg imm = brw_imm_reg(BRW_TYPE_UQ);
676    imm.u64 = uq;
677    return imm;
678 }
679 
680 /** Construct integer immediate register */
681 static inline struct brw_reg
brw_imm_d(int d)682 brw_imm_d(int d)
683 {
684    struct brw_reg imm = brw_imm_reg(BRW_TYPE_D);
685    imm.d = d;
686    return imm;
687 }
688 
689 /** Construct uint immediate register */
690 static inline struct brw_reg
brw_imm_ud(unsigned ud)691 brw_imm_ud(unsigned ud)
692 {
693    struct brw_reg imm = brw_imm_reg(BRW_TYPE_UD);
694    imm.ud = ud;
695    return imm;
696 }
697 
698 /** Construct ushort immediate register */
699 static inline struct brw_reg
brw_imm_uw(uint16_t uw)700 brw_imm_uw(uint16_t uw)
701 {
702    struct brw_reg imm = brw_imm_reg(BRW_TYPE_UW);
703    imm.ud = uw | ((uint32_t)uw << 16);
704    return imm;
705 }
706 
707 /** Construct short immediate register */
708 static inline struct brw_reg
brw_imm_w(int16_t w)709 brw_imm_w(int16_t w)
710 {
711    struct brw_reg imm = brw_imm_reg(BRW_TYPE_W);
712    imm.ud = (uint16_t)w | (uint32_t)(uint16_t)w << 16;
713    return imm;
714 }
715 
716 /* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
717  * numbers alias with _V and _VF below:
718  */
719 
720 /** Construct vector of eight signed half-byte values */
721 static inline struct brw_reg
brw_imm_v(unsigned v)722 brw_imm_v(unsigned v)
723 {
724    struct brw_reg imm = brw_imm_reg(BRW_TYPE_V);
725    imm.ud = v;
726    return imm;
727 }
728 
729 /** Construct vector of eight unsigned half-byte values */
730 static inline struct brw_reg
brw_imm_uv(unsigned uv)731 brw_imm_uv(unsigned uv)
732 {
733    struct brw_reg imm = brw_imm_reg(BRW_TYPE_UV);
734    imm.ud = uv;
735    return imm;
736 }
737 
738 /** Construct vector of four 8-bit float values */
739 static inline struct brw_reg
brw_imm_vf(unsigned v)740 brw_imm_vf(unsigned v)
741 {
742    struct brw_reg imm = brw_imm_reg(BRW_TYPE_VF);
743    imm.ud = v;
744    return imm;
745 }
746 
747 static inline struct brw_reg
brw_imm_vf4(unsigned v0,unsigned v1,unsigned v2,unsigned v3)748 brw_imm_vf4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
749 {
750    struct brw_reg imm = brw_imm_reg(BRW_TYPE_VF);
751    imm.vstride = BRW_VERTICAL_STRIDE_0;
752    imm.width = BRW_WIDTH_4;
753    imm.hstride = BRW_HORIZONTAL_STRIDE_1;
754    imm.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24));
755    return imm;
756 }
757 
758 
759 static inline struct brw_reg
brw_address(struct brw_reg reg)760 brw_address(struct brw_reg reg)
761 {
762    return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
763 }
764 
765 /** Construct float[1] general-purpose register */
766 static inline struct brw_reg
brw_vec1_grf(unsigned nr,unsigned subnr)767 brw_vec1_grf(unsigned nr, unsigned subnr)
768 {
769    return brw_vec1_reg(FIXED_GRF, nr, subnr);
770 }
771 
772 static inline struct brw_reg
xe2_vec1_grf(unsigned nr,unsigned subnr)773 xe2_vec1_grf(unsigned nr, unsigned subnr)
774 {
775    return brw_vec1_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
776 }
777 
778 /** Construct float[2] general-purpose register */
779 static inline struct brw_reg
brw_vec2_grf(unsigned nr,unsigned subnr)780 brw_vec2_grf(unsigned nr, unsigned subnr)
781 {
782    return brw_vec2_reg(FIXED_GRF, nr, subnr);
783 }
784 
785 static inline struct brw_reg
xe2_vec2_grf(unsigned nr,unsigned subnr)786 xe2_vec2_grf(unsigned nr, unsigned subnr)
787 {
788    return brw_vec2_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
789 }
790 
791 /** Construct float[4] general-purpose register */
792 static inline struct brw_reg
brw_vec4_grf(unsigned nr,unsigned subnr)793 brw_vec4_grf(unsigned nr, unsigned subnr)
794 {
795    return brw_vec4_reg(FIXED_GRF, nr, subnr);
796 }
797 
798 static inline struct brw_reg
xe2_vec4_grf(unsigned nr,unsigned subnr)799 xe2_vec4_grf(unsigned nr, unsigned subnr)
800 {
801    return brw_vec4_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
802 }
803 
804 /** Construct float[8] general-purpose register */
805 static inline struct brw_reg
brw_vec8_grf(unsigned nr,unsigned subnr)806 brw_vec8_grf(unsigned nr, unsigned subnr)
807 {
808    return brw_vec8_reg(FIXED_GRF, nr, subnr);
809 }
810 
811 static inline struct brw_reg
xe2_vec8_grf(unsigned nr,unsigned subnr)812 xe2_vec8_grf(unsigned nr, unsigned subnr)
813 {
814    return brw_vec8_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
815 }
816 
817 /** Construct float[16] general-purpose register */
818 static inline struct brw_reg
brw_vec16_grf(unsigned nr,unsigned subnr)819 brw_vec16_grf(unsigned nr, unsigned subnr)
820 {
821    return brw_vec16_reg(FIXED_GRF, nr, subnr);
822 }
823 
824 static inline struct brw_reg
xe2_vec16_grf(unsigned nr,unsigned subnr)825 xe2_vec16_grf(unsigned nr, unsigned subnr)
826 {
827    return brw_vec16_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
828 }
829 
830 static inline struct brw_reg
brw_vecn_grf(unsigned width,unsigned nr,unsigned subnr)831 brw_vecn_grf(unsigned width, unsigned nr, unsigned subnr)
832 {
833    return brw_vecn_reg(width, FIXED_GRF, nr, subnr);
834 }
835 
836 static inline struct brw_reg
xe2_vecn_grf(unsigned width,unsigned nr,unsigned subnr)837 xe2_vecn_grf(unsigned width, unsigned nr, unsigned subnr)
838 {
839    return brw_vecn_reg(width, FIXED_GRF, nr + subnr / 8, subnr % 8);
840 }
841 
842 static inline struct brw_reg
brw_uw1_grf(unsigned nr,unsigned subnr)843 brw_uw1_grf(unsigned nr, unsigned subnr)
844 {
845    return brw_uw1_reg(FIXED_GRF, nr, subnr);
846 }
847 
848 static inline struct brw_reg
brw_uw8_grf(unsigned nr,unsigned subnr)849 brw_uw8_grf(unsigned nr, unsigned subnr)
850 {
851    return brw_uw8_reg(FIXED_GRF, nr, subnr);
852 }
853 
854 static inline struct brw_reg
brw_uw16_grf(unsigned nr,unsigned subnr)855 brw_uw16_grf(unsigned nr, unsigned subnr)
856 {
857    return brw_uw16_reg(FIXED_GRF, nr, subnr);
858 }
859 
860 static inline struct brw_reg
brw_ud8_grf(unsigned nr,unsigned subnr)861 brw_ud8_grf(unsigned nr, unsigned subnr)
862 {
863    return brw_ud8_reg(FIXED_GRF, nr, subnr);
864 }
865 
866 static inline struct brw_reg
brw_ud1_grf(unsigned nr,unsigned subnr)867 brw_ud1_grf(unsigned nr, unsigned subnr)
868 {
869    return brw_ud1_reg(FIXED_GRF, nr, subnr);
870 }
871 
872 
873 /** Construct null register (usually used for setting condition codes) */
874 static inline struct brw_reg
brw_null_reg(void)875 brw_null_reg(void)
876 {
877    return brw_vec8_reg(ARF, BRW_ARF_NULL, 0);
878 }
879 
880 static inline struct brw_reg
brw_null_vec(unsigned width)881 brw_null_vec(unsigned width)
882 {
883    return brw_vecn_reg(width, ARF, BRW_ARF_NULL, 0);
884 }
885 
886 static inline struct brw_reg
brw_address_reg(unsigned subnr)887 brw_address_reg(unsigned subnr)
888 {
889    return brw_uw1_reg(ARF, BRW_ARF_ADDRESS, subnr);
890 }
891 
892 static inline struct brw_reg
brw_tdr_reg(void)893 brw_tdr_reg(void)
894 {
895    return brw_uw1_reg(ARF, BRW_ARF_TDR, 0);
896 }
897 
898 /* If/else instructions break in align16 mode if writemask & swizzle
899  * aren't xyzw.  This goes against the convention for other scalar
900  * regs:
901  */
902 static inline struct brw_reg
brw_ip_reg(void)903 brw_ip_reg(void)
904 {
905    return brw_make_reg(ARF,
906                   BRW_ARF_IP,
907                   0,
908                   0,
909                   0,
910                   BRW_TYPE_UD,
911                   BRW_VERTICAL_STRIDE_4, /* ? */
912                   BRW_WIDTH_1,
913                   BRW_HORIZONTAL_STRIDE_0,
914                   BRW_SWIZZLE_XYZW, /* NOTE! */
915                   WRITEMASK_XYZW); /* NOTE! */
916 }
917 
918 static inline struct brw_reg
brw_notification_reg(void)919 brw_notification_reg(void)
920 {
921    return brw_make_reg(ARF,
922                   BRW_ARF_NOTIFICATION_COUNT,
923                   0,
924                   0,
925                   0,
926                   BRW_TYPE_UD,
927                   BRW_VERTICAL_STRIDE_0,
928                   BRW_WIDTH_1,
929                   BRW_HORIZONTAL_STRIDE_0,
930                   BRW_SWIZZLE_XXXX,
931                   WRITEMASK_X);
932 }
933 
934 static inline struct brw_reg
brw_cr0_reg(unsigned subnr)935 brw_cr0_reg(unsigned subnr)
936 {
937    return brw_ud1_reg(ARF, BRW_ARF_CONTROL, subnr);
938 }
939 
940 static inline struct brw_reg
brw_sr0_reg(unsigned subnr)941 brw_sr0_reg(unsigned subnr)
942 {
943    return brw_ud1_reg(ARF, BRW_ARF_STATE, subnr);
944 }
945 
946 static inline struct brw_reg
brw_acc_reg(unsigned width)947 brw_acc_reg(unsigned width)
948 {
949    return brw_vecn_reg(width, ARF,
950                        BRW_ARF_ACCUMULATOR, 0);
951 }
952 
953 static inline struct brw_reg
brw_flag_reg(int reg,int subreg)954 brw_flag_reg(int reg, int subreg)
955 {
956    return brw_uw1_reg(ARF,
957                       BRW_ARF_FLAG + reg, subreg);
958 }
959 
960 static inline struct brw_reg
brw_flag_subreg(unsigned subreg)961 brw_flag_subreg(unsigned subreg)
962 {
963    return brw_uw1_reg(ARF,
964                       BRW_ARF_FLAG + subreg / 2, subreg % 2);
965 }
966 
967 /**
968  * Return the mask register present in Gfx4-5, or the related register present
969  * in Gfx7.5 and later hardware referred to as "channel enable" register in
970  * the documentation.
971  */
972 static inline struct brw_reg
brw_mask_reg(unsigned subnr)973 brw_mask_reg(unsigned subnr)
974 {
975    return brw_uw1_reg(ARF, BRW_ARF_MASK, subnr);
976 }
977 
978 static inline struct brw_reg
brw_vmask_reg()979 brw_vmask_reg()
980 {
981    return brw_sr0_reg(3);
982 }
983 
984 static inline struct brw_reg
brw_dmask_reg()985 brw_dmask_reg()
986 {
987    return brw_sr0_reg(2);
988 }
989 
990 static inline struct brw_reg
brw_vgrf(unsigned nr,enum brw_reg_type type)991 brw_vgrf(unsigned nr, enum brw_reg_type type)
992 {
993    struct brw_reg reg = {};
994    reg.file = VGRF;
995    reg.nr = nr;
996    reg.type = type;
997    reg.stride = 1;
998    return reg;
999 }
1000 
1001 static inline struct brw_reg
brw_attr_reg(unsigned nr,enum brw_reg_type type)1002 brw_attr_reg(unsigned nr, enum brw_reg_type type)
1003 {
1004    struct brw_reg reg = {};
1005    reg.file = ATTR;
1006    reg.nr = nr;
1007    reg.type = type;
1008    reg.stride = 1;
1009    return reg;
1010 }
1011 
1012 static inline struct brw_reg
brw_uniform_reg(unsigned nr,enum brw_reg_type type)1013 brw_uniform_reg(unsigned nr, enum brw_reg_type type)
1014 {
1015    struct brw_reg reg = {};
1016    reg.file = UNIFORM;
1017    reg.nr = nr;
1018    reg.type = type;
1019    reg.stride = 0;
1020    return reg;
1021 }
1022 
1023 /* This is almost always called with a numeric constant argument, so
1024  * make things easy to evaluate at compile time:
1025  */
cvt(unsigned val)1026 static inline unsigned cvt(unsigned val)
1027 {
1028    switch (val) {
1029    case 0: return 0;
1030    case 1: return 1;
1031    case 2: return 2;
1032    case 4: return 3;
1033    case 8: return 4;
1034    case 16: return 5;
1035    case 32: return 6;
1036    }
1037    return 0;
1038 }
1039 
1040 static inline struct brw_reg
stride(struct brw_reg reg,unsigned vstride,unsigned width,unsigned hstride)1041 stride(struct brw_reg reg, unsigned vstride, unsigned width, unsigned hstride)
1042 {
1043    reg.vstride = cvt(vstride);
1044    reg.width = cvt(width) - 1;
1045    reg.hstride = cvt(hstride);
1046    return reg;
1047 }
1048 
1049 /**
1050  * Multiply the vertical and horizontal stride of a register by the given
1051  * factor \a s.
1052  */
1053 static inline struct brw_reg
spread(struct brw_reg reg,unsigned s)1054 spread(struct brw_reg reg, unsigned s)
1055 {
1056    if (s) {
1057       assert(util_is_power_of_two_nonzero(s));
1058 
1059       if (reg.hstride)
1060          reg.hstride += cvt(s) - 1;
1061 
1062       if (reg.vstride)
1063          reg.vstride += cvt(s) - 1;
1064 
1065       return reg;
1066    } else {
1067       return stride(reg, 0, 1, 0);
1068    }
1069 }
1070 
1071 /**
1072  * Reinterpret each channel of register \p reg as a vector of values of the
1073  * given smaller type and take the i-th subcomponent from each.
1074  */
1075 static inline struct brw_reg
subscript(struct brw_reg reg,enum brw_reg_type type,unsigned i)1076 subscript(struct brw_reg reg, enum brw_reg_type type, unsigned i)
1077 {
1078    assert((i + 1) * brw_type_size_bytes(type) <= brw_type_size_bytes(reg.type));
1079 
1080    if (reg.file == ARF || reg.file == FIXED_GRF) {
1081       /* The stride is encoded inconsistently for fixed GRF and ARF registers
1082        * as the log2 of the actual vertical and horizontal strides.
1083        */
1084       const int delta = util_logbase2(brw_type_size_bytes(reg.type)) -
1085                         util_logbase2(brw_type_size_bytes(type));
1086       reg.hstride += (reg.hstride ? delta : 0);
1087       reg.vstride += (reg.vstride ? delta : 0);
1088 
1089    } else if (reg.file == IMM) {
1090       unsigned bit_size = brw_type_size_bits(type);
1091       reg.u64 >>= i * bit_size;
1092       reg.u64 &= BITFIELD64_MASK(bit_size);
1093       if (bit_size <= 16)
1094          reg.u64 |= reg.u64 << 16;
1095       return retype(reg, type);
1096    } else {
1097       reg.stride *= brw_type_size_bytes(reg.type) / brw_type_size_bytes(type);
1098    }
1099 
1100    return byte_offset(retype(reg, type), i * brw_type_size_bytes(type));
1101 }
1102 
1103 static inline struct brw_reg
vec16(struct brw_reg reg)1104 vec16(struct brw_reg reg)
1105 {
1106    return stride(reg, 16,16,1);
1107 }
1108 
1109 static inline struct brw_reg
vec8(struct brw_reg reg)1110 vec8(struct brw_reg reg)
1111 {
1112    return stride(reg, 8,8,1);
1113 }
1114 
1115 static inline struct brw_reg
vec4(struct brw_reg reg)1116 vec4(struct brw_reg reg)
1117 {
1118    return stride(reg, 4,4,1);
1119 }
1120 
1121 static inline struct brw_reg
vec2(struct brw_reg reg)1122 vec2(struct brw_reg reg)
1123 {
1124    return stride(reg, 2,2,1);
1125 }
1126 
1127 static inline struct brw_reg
vec1(struct brw_reg reg)1128 vec1(struct brw_reg reg)
1129 {
1130    return stride(reg, 0,1,0);
1131 }
1132 
1133 
1134 static inline struct brw_reg
get_element(struct brw_reg reg,unsigned elt)1135 get_element(struct brw_reg reg, unsigned elt)
1136 {
1137    return vec1(suboffset(reg, elt));
1138 }
1139 
1140 static inline struct brw_reg
get_element_ud(struct brw_reg reg,unsigned elt)1141 get_element_ud(struct brw_reg reg, unsigned elt)
1142 {
1143    return vec1(suboffset(retype(reg, BRW_TYPE_UD), elt));
1144 }
1145 
1146 static inline struct brw_reg
get_element_d(struct brw_reg reg,unsigned elt)1147 get_element_d(struct brw_reg reg, unsigned elt)
1148 {
1149    return vec1(suboffset(retype(reg, BRW_TYPE_D), elt));
1150 }
1151 
1152 static inline struct brw_reg
brw_swizzle(struct brw_reg reg,unsigned swz)1153 brw_swizzle(struct brw_reg reg, unsigned swz)
1154 {
1155    if (reg.file == IMM)
1156       reg.ud = brw_swizzle_immediate(reg.type, reg.ud, swz);
1157    else
1158       reg.swizzle = brw_compose_swizzle(swz, reg.swizzle);
1159 
1160    return reg;
1161 }
1162 
1163 static inline struct brw_reg
brw_writemask(struct brw_reg reg,unsigned mask)1164 brw_writemask(struct brw_reg reg, unsigned mask)
1165 {
1166    assert(reg.file != IMM);
1167    reg.writemask &= mask;
1168    return reg;
1169 }
1170 
1171 static inline struct brw_reg
brw_set_writemask(struct brw_reg reg,unsigned mask)1172 brw_set_writemask(struct brw_reg reg, unsigned mask)
1173 {
1174    assert(reg.file != IMM);
1175    reg.writemask = mask;
1176    return reg;
1177 }
1178 
1179 static inline unsigned
brw_writemask_for_size(unsigned n)1180 brw_writemask_for_size(unsigned n)
1181 {
1182    return (1 << n) - 1;
1183 }
1184 
1185 static inline unsigned
brw_writemask_for_component_packing(unsigned n,unsigned first_component)1186 brw_writemask_for_component_packing(unsigned n, unsigned first_component)
1187 {
1188    assert(first_component + n <= 4);
1189    return (((1 << n) - 1) << first_component);
1190 }
1191 
1192 static inline struct brw_reg
negate(struct brw_reg reg)1193 negate(struct brw_reg reg)
1194 {
1195    reg.negate ^= 1;
1196    return reg;
1197 }
1198 
1199 static inline struct brw_reg
brw_abs(struct brw_reg reg)1200 brw_abs(struct brw_reg reg)
1201 {
1202    reg.abs = 1;
1203    reg.negate = 0;
1204    return reg;
1205 }
1206 
1207 /************************************************************************/
1208 
1209 static inline struct brw_reg
brw_vec1_indirect(unsigned subnr,int offset)1210 brw_vec1_indirect(unsigned subnr, int offset)
1211 {
1212    struct brw_reg reg =  brw_vec1_grf(0, 0);
1213    reg.subnr = subnr;
1214    reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1215    reg.indirect_offset = offset;
1216    return reg;
1217 }
1218 
1219 static inline struct brw_reg
brw_VxH_indirect(unsigned subnr,int offset)1220 brw_VxH_indirect(unsigned subnr, int offset)
1221 {
1222    struct brw_reg reg = brw_vec1_grf(0, 0);
1223    reg.vstride = BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL;
1224    reg.subnr = subnr;
1225    reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1226    reg.indirect_offset = offset;
1227    return reg;
1228 }
1229 
1230 static inline bool
region_matches(struct brw_reg reg,enum brw_vertical_stride v,enum brw_width w,enum brw_horizontal_stride h)1231 region_matches(struct brw_reg reg, enum brw_vertical_stride v,
1232                enum brw_width w, enum brw_horizontal_stride h)
1233 {
1234    return reg.vstride == v &&
1235           reg.width == w &&
1236           reg.hstride == h;
1237 }
1238 
1239 #define has_scalar_region(reg) \
1240    region_matches(reg, BRW_VERTICAL_STRIDE_0, BRW_WIDTH_1, \
1241                   BRW_HORIZONTAL_STRIDE_0)
1242 
1243 /**
1244  * Return the size in bytes per data element of register \p reg on the
1245  * corresponding register file.
1246  */
1247 static inline unsigned
element_sz(struct brw_reg reg)1248 element_sz(struct brw_reg reg)
1249 {
1250    if (reg.file == IMM || has_scalar_region(reg)) {
1251       return brw_type_size_bytes(reg.type);
1252 
1253    } else if (reg.width == BRW_WIDTH_1 &&
1254               reg.hstride == BRW_HORIZONTAL_STRIDE_0) {
1255       assert(reg.vstride != BRW_VERTICAL_STRIDE_0);
1256       return brw_type_size_bytes(reg.type) << (reg.vstride - 1);
1257 
1258    } else {
1259       assert(reg.hstride != BRW_HORIZONTAL_STRIDE_0);
1260       assert(reg.vstride == reg.hstride + reg.width);
1261       return brw_type_size_bytes(reg.type) << (reg.hstride - 1);
1262    }
1263 }
1264 
1265 /* brw_packed_float.c */
1266 int brw_float_to_vf(float f);
1267 float brw_vf_to_float(unsigned char vf);
1268 
1269 bool brw_reg_saturate_immediate(brw_reg *reg);
1270 bool brw_reg_negate_immediate(brw_reg *reg);
1271 bool brw_reg_abs_immediate(brw_reg *reg);
1272 
1273 #ifdef __cplusplus
1274 }
1275 #endif
1276 
1277 #ifdef __cplusplus
1278 
1279 static inline brw_reg
horiz_offset(const brw_reg & reg,unsigned delta)1280 horiz_offset(const brw_reg &reg, unsigned delta)
1281 {
1282    switch (reg.file) {
1283    case BAD_FILE:
1284    case UNIFORM:
1285    case IMM:
1286       /* These only have a single component that is implicitly splatted.  A
1287        * horizontal offset should be a harmless no-op.
1288        * XXX - Handle vector immediates correctly.
1289        */
1290       return reg;
1291    case VGRF:
1292    case ATTR:
1293       return byte_offset(reg, delta * reg.stride * brw_type_size_bytes(reg.type));
1294    case ARF:
1295    case FIXED_GRF:
1296       if (reg.is_null()) {
1297          return reg;
1298       } else {
1299          const unsigned hstride = reg.hstride ? 1 << (reg.hstride - 1) : 0;
1300          const unsigned vstride = reg.vstride ? 1 << (reg.vstride - 1) : 0;
1301          const unsigned width = 1 << reg.width;
1302 
1303          if (delta % width == 0) {
1304             return byte_offset(reg, delta / width * vstride * brw_type_size_bytes(reg.type));
1305          } else {
1306             assert(vstride == hstride * width);
1307             return byte_offset(reg, delta * hstride * brw_type_size_bytes(reg.type));
1308          }
1309       }
1310    }
1311    unreachable("Invalid register file");
1312 }
1313 
1314 static inline brw_reg
offset(brw_reg reg,unsigned width,unsigned delta)1315 offset(brw_reg reg, unsigned width, unsigned delta)
1316 {
1317    switch (reg.file) {
1318    case BAD_FILE:
1319       break;
1320    case ARF:
1321    case FIXED_GRF:
1322    case VGRF:
1323    case ATTR:
1324    case UNIFORM:
1325       return byte_offset(reg, delta * reg.component_size(width));
1326    case IMM:
1327       assert(delta == 0);
1328    }
1329    return reg;
1330 }
1331 
1332 /**
1333  * Get the scalar channel of \p reg given by \p idx and replicate it to all
1334  * channels of the result.
1335  */
1336 static inline brw_reg
component(brw_reg reg,unsigned idx)1337 component(brw_reg reg, unsigned idx)
1338 {
1339    reg = horiz_offset(reg, idx);
1340    reg.stride = 0;
1341    if (reg.file == ARF || reg.file == FIXED_GRF) {
1342       reg.vstride = BRW_VERTICAL_STRIDE_0;
1343       reg.width = BRW_WIDTH_1;
1344       reg.hstride = BRW_HORIZONTAL_STRIDE_0;
1345    }
1346    return reg;
1347 }
1348 
1349 /**
1350  * Return an integer identifying the discrete address space a register is
1351  * contained in.  A register is by definition fully contained in the single
1352  * reg_space it belongs to, so two registers with different reg_space ids are
1353  * guaranteed not to overlap.  Most register files are a single reg_space of
1354  * its own, only the VGRF and ATTR files are composed of multiple discrete
1355  * address spaces, one for each allocation and input attribute respectively.
1356  */
1357 static inline uint32_t
reg_space(const brw_reg & r)1358 reg_space(const brw_reg &r)
1359 {
1360    return r.file << 16 | (r.file == VGRF || r.file == ATTR ? r.nr : 0);
1361 }
1362 
1363 /**
1364  * Return the base offset in bytes of a register relative to the start of its
1365  * reg_space().
1366  */
1367 static inline unsigned
reg_offset(const brw_reg & r)1368 reg_offset(const brw_reg &r)
1369 {
1370    return (r.file == VGRF || r.file == IMM || r.file == ATTR ? 0 : r.nr) *
1371           (r.file == UNIFORM ? 4 : REG_SIZE) + r.offset +
1372           (r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0);
1373 }
1374 
1375 /**
1376  * Return the amount of padding in bytes left unused between individual
1377  * components of register \p r due to a (horizontal) stride value greater than
1378  * one, or zero if components are tightly packed in the register file.
1379  */
1380 static inline unsigned
reg_padding(const brw_reg & r)1381 reg_padding(const brw_reg &r)
1382 {
1383    const unsigned stride = ((r.file != ARF && r.file != FIXED_GRF) ? r.stride :
1384                             r.hstride == 0 ? 0 :
1385                             1 << (r.hstride - 1));
1386    return (MAX2(1, stride) - 1) * brw_type_size_bytes(r.type);
1387 }
1388 
1389 /**
1390  * Return whether the register region starting at \p r and spanning \p dr
1391  * bytes could potentially overlap the register region starting at \p s and
1392  * spanning \p ds bytes.
1393  */
1394 static inline bool
regions_overlap(const brw_reg & r,unsigned dr,const brw_reg & s,unsigned ds)1395 regions_overlap(const brw_reg &r, unsigned dr, const brw_reg &s, unsigned ds)
1396 {
1397    if (r.file != s.file)
1398       return false;
1399 
1400    if (r.file == VGRF) {
1401       return r.nr == s.nr &&
1402              !(r.offset + dr <= s.offset || s.offset + ds <= r.offset);
1403    } else {
1404       return !(reg_offset(r) + dr <= reg_offset(s) ||
1405                reg_offset(s) + ds <= reg_offset(r));
1406    }
1407 }
1408 
1409 /**
1410  * Check that the register region given by r [r.offset, r.offset + dr[
1411  * is fully contained inside the register region given by s
1412  * [s.offset, s.offset + ds[.
1413  */
1414 static inline bool
region_contained_in(const brw_reg & r,unsigned dr,const brw_reg & s,unsigned ds)1415 region_contained_in(const brw_reg &r, unsigned dr, const brw_reg &s, unsigned ds)
1416 {
1417    return reg_space(r) == reg_space(s) &&
1418           reg_offset(r) >= reg_offset(s) &&
1419           reg_offset(r) + dr <= reg_offset(s) + ds;
1420 }
1421 
1422 /**
1423  * Return whether the given register region is n-periodic, i.e. whether the
1424  * original region remains invariant after shifting it by \p n scalar
1425  * channels.
1426  */
1427 static inline bool
is_periodic(const brw_reg & reg,unsigned n)1428 is_periodic(const brw_reg &reg, unsigned n)
1429 {
1430    if (reg.file == BAD_FILE || reg.is_null()) {
1431       return true;
1432 
1433    } else if (reg.file == IMM) {
1434       const unsigned period = (reg.type == BRW_TYPE_UV ||
1435                                reg.type == BRW_TYPE_V ? 8 :
1436                                reg.type == BRW_TYPE_VF ? 4 :
1437                                1);
1438       return n % period == 0;
1439 
1440    } else if (reg.file == ARF || reg.file == FIXED_GRF) {
1441       const unsigned period = (reg.hstride == 0 && reg.vstride == 0 ? 1 :
1442                                reg.vstride == 0 ? 1 << reg.width :
1443                                ~0);
1444       return n % period == 0;
1445 
1446    } else {
1447       return reg.stride == 0;
1448    }
1449 }
1450 
1451 static inline bool
is_uniform(const brw_reg & reg)1452 is_uniform(const brw_reg &reg)
1453 {
1454    return is_periodic(reg, 1);
1455 }
1456 
1457 /**
1458  * Get the specified 8-component quarter of a register.
1459  */
1460 static inline brw_reg
quarter(const brw_reg & reg,unsigned idx)1461 quarter(const brw_reg &reg, unsigned idx)
1462 {
1463    assert(idx < 4);
1464    return horiz_offset(reg, 8 * idx);
1465 }
1466 
1467 static inline brw_reg
horiz_stride(brw_reg reg,unsigned s)1468 horiz_stride(brw_reg reg, unsigned s)
1469 {
1470    reg.stride *= s;
1471    return reg;
1472 }
1473 
1474 static const brw_reg reg_undef;
1475 
1476 /*
1477  * Return the stride between channels of the specified register in
1478  * byte units, or ~0u if the region cannot be represented with a
1479  * single one-dimensional stride.
1480  */
1481 static inline unsigned
byte_stride(const brw_reg & reg)1482 byte_stride(const brw_reg &reg)
1483 {
1484    switch (reg.file) {
1485    case BAD_FILE:
1486    case UNIFORM:
1487    case IMM:
1488    case VGRF:
1489    case ATTR:
1490       return reg.stride * brw_type_size_bytes(reg.type);
1491    case ARF:
1492    case FIXED_GRF:
1493       if (reg.is_null()) {
1494          return 0;
1495       } else {
1496          const unsigned hstride = reg.hstride ? 1 << (reg.hstride - 1) : 0;
1497          const unsigned vstride = reg.vstride ? 1 << (reg.vstride - 1) : 0;
1498          const unsigned width = 1 << reg.width;
1499 
1500          if (width == 1) {
1501             return vstride * brw_type_size_bytes(reg.type);
1502          } else if (hstride * width == vstride) {
1503             return hstride * brw_type_size_bytes(reg.type);
1504          } else {
1505             return ~0u;
1506          }
1507       }
1508    default:
1509       unreachable("Invalid register file");
1510    }
1511 }
1512 
1513 #endif /* __cplusplus */
1514 
1515 #endif
1516