xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/elk/elk_compile_sf.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2006 - 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "elk_compiler.h"
25 #include "elk_disasm.h"
26 #include "elk_eu.h"
27 #include "elk_prim.h"
28 
29 #include "dev/intel_debug.h"
30 
31 struct elk_sf_compile {
32    struct elk_codegen func;
33    struct elk_sf_prog_key key;
34    struct elk_sf_prog_data prog_data;
35 
36    struct elk_reg pv;
37    struct elk_reg det;
38    struct elk_reg dx0;
39    struct elk_reg dx2;
40    struct elk_reg dy0;
41    struct elk_reg dy2;
42 
43    /* z and 1/w passed in separately:
44     */
45    struct elk_reg z[3];
46    struct elk_reg inv_w[3];
47 
48    /* The vertices:
49     */
50    struct elk_reg vert[3];
51 
52     /* Temporaries, allocated after last vertex reg.
53     */
54    struct elk_reg inv_det;
55    struct elk_reg a1_sub_a0;
56    struct elk_reg a2_sub_a0;
57    struct elk_reg tmp;
58 
59    struct elk_reg m1Cx;
60    struct elk_reg m2Cy;
61    struct elk_reg m3C0;
62 
63    GLuint nr_verts;
64    GLuint nr_attr_regs;
65    GLuint nr_setup_regs;
66    int urb_entry_read_offset;
67 
68    /** The last known value of the f0.0 flag register. */
69    unsigned flag_value;
70 
71    struct intel_vue_map vue_map;
72 };
73 
74 /**
75  * Determine the vue slot corresponding to the given half of the given register.
76  */
vert_reg_to_vue_slot(struct elk_sf_compile * c,GLuint reg,int half)77 static inline int vert_reg_to_vue_slot(struct elk_sf_compile *c, GLuint reg,
78                                        int half)
79 {
80    return (reg + c->urb_entry_read_offset) * 2 + half;
81 }
82 
83 /**
84  * Determine the varying corresponding to the given half of the given
85  * register.  half=0 means the first half of a register, half=1 means the
86  * second half.
87  */
vert_reg_to_varying(struct elk_sf_compile * c,GLuint reg,int half)88 static inline int vert_reg_to_varying(struct elk_sf_compile *c, GLuint reg,
89                                       int half)
90 {
91    int vue_slot = vert_reg_to_vue_slot(c, reg, half);
92    return c->vue_map.slot_to_varying[vue_slot];
93 }
94 
95 /**
96  * Determine the register corresponding to the given vue slot
97  */
get_vue_slot(struct elk_sf_compile * c,struct elk_reg vert,int vue_slot)98 static struct elk_reg get_vue_slot(struct elk_sf_compile *c,
99                                    struct elk_reg vert,
100                                    int vue_slot)
101 {
102    GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
103    GLuint sub = vue_slot % 2;
104 
105    return elk_vec4_grf(vert.nr + off, sub * 4);
106 }
107 
108 /**
109  * Determine the register corresponding to the given varying.
110  */
get_varying(struct elk_sf_compile * c,struct elk_reg vert,GLuint varying)111 static struct elk_reg get_varying(struct elk_sf_compile *c,
112                                   struct elk_reg vert,
113                                   GLuint varying)
114 {
115    int vue_slot = c->vue_map.varying_to_slot[varying];
116    assert (vue_slot >= c->urb_entry_read_offset);
117    return get_vue_slot(c, vert, vue_slot);
118 }
119 
120 static bool
have_attr(struct elk_sf_compile * c,GLuint attr)121 have_attr(struct elk_sf_compile *c, GLuint attr)
122 {
123    return (c->key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0;
124 }
125 
126 /***********************************************************************
127  * Twoside lighting
128  */
copy_bfc(struct elk_sf_compile * c,struct elk_reg vert)129 static void copy_bfc( struct elk_sf_compile *c,
130 		      struct elk_reg vert )
131 {
132    struct elk_codegen *p = &c->func;
133    GLuint i;
134 
135    for (i = 0; i < 2; i++) {
136       if (have_attr(c, VARYING_SLOT_COL0+i) &&
137 	  have_attr(c, VARYING_SLOT_BFC0+i))
138 	 elk_MOV(p,
139 		 get_varying(c, vert, VARYING_SLOT_COL0+i),
140 		 get_varying(c, vert, VARYING_SLOT_BFC0+i));
141    }
142 }
143 
144 
do_twoside_color(struct elk_sf_compile * c)145 static void do_twoside_color( struct elk_sf_compile *c )
146 {
147    struct elk_codegen *p = &c->func;
148    GLuint backface_conditional = c->key.frontface_ccw ? ELK_CONDITIONAL_G : ELK_CONDITIONAL_L;
149 
150    /* Already done in clip program:
151     */
152    if (c->key.primitive == ELK_SF_PRIM_UNFILLED_TRIS)
153       return;
154 
155    /* If the vertex shader provides backface color, do the selection. The VS
156     * promises to set up the front color if the backface color is provided, but
157     * it may contain junk if never written to.
158     */
159    if (!(have_attr(c, VARYING_SLOT_COL0) && have_attr(c, VARYING_SLOT_BFC0)) &&
160        !(have_attr(c, VARYING_SLOT_COL1) && have_attr(c, VARYING_SLOT_BFC1)))
161       return;
162 
163    /* Need to use ELK_EXECUTE_4 and also do an 4-wide compare in order
164     * to get all channels active inside the IF.  In the clipping code
165     * we run with NoMask, so it's not an option and we can use
166     * ELK_EXECUTE_1 for all comparisons.
167     */
168    elk_CMP(p, vec4(elk_null_reg()), backface_conditional, c->det, elk_imm_f(0));
169    elk_IF(p, ELK_EXECUTE_4);
170    {
171       switch (c->nr_verts) {
172       case 3: copy_bfc(c, c->vert[2]); FALLTHROUGH;
173       case 2: copy_bfc(c, c->vert[1]); FALLTHROUGH;
174       case 1: copy_bfc(c, c->vert[0]);
175       }
176    }
177    elk_ENDIF(p);
178 }
179 
180 
181 
182 /***********************************************************************
183  * Flat shading
184  */
185 
copy_flatshaded_attributes(struct elk_sf_compile * c,struct elk_reg dst,struct elk_reg src)186 static void copy_flatshaded_attributes(struct elk_sf_compile *c,
187                                        struct elk_reg dst,
188                                        struct elk_reg src)
189 {
190    struct elk_codegen *p = &c->func;
191    int i;
192 
193    for (i = 0; i < c->vue_map.num_slots; i++) {
194       if (c->key.interp_mode[i] == INTERP_MODE_FLAT) {
195          elk_MOV(p,
196                  get_vue_slot(c, dst, i),
197                  get_vue_slot(c, src, i));
198       }
199    }
200 }
201 
count_flatshaded_attributes(struct elk_sf_compile * c)202 static int count_flatshaded_attributes(struct elk_sf_compile *c)
203 {
204    int i;
205    int count = 0;
206 
207    for (i = 0; i < c->vue_map.num_slots; i++)
208       if (c->key.interp_mode[i] == INTERP_MODE_FLAT)
209          count++;
210 
211    return count;
212 }
213 
214 
215 
216 /* Need to use a computed jump to copy flatshaded attributes as the
217  * vertices are ordered according to y-coordinate before reaching this
218  * point, so the PV could be anywhere.
219  */
do_flatshade_triangle(struct elk_sf_compile * c)220 static void do_flatshade_triangle( struct elk_sf_compile *c )
221 {
222    struct elk_codegen *p = &c->func;
223    GLuint nr;
224    GLuint jmpi = 1;
225 
226    /* Already done in clip program:
227     */
228    if (c->key.primitive == ELK_SF_PRIM_UNFILLED_TRIS)
229       return;
230 
231    if (p->devinfo->ver == 5)
232        jmpi = 2;
233 
234    nr = count_flatshaded_attributes(c);
235 
236    elk_MUL(p, c->pv, c->pv, elk_imm_d(jmpi*(nr*2+1)));
237    elk_JMPI(p, c->pv, ELK_PREDICATE_NONE);
238 
239    copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
240    copy_flatshaded_attributes(c, c->vert[2], c->vert[0]);
241    elk_JMPI(p, elk_imm_d(jmpi*(nr*4+1)), ELK_PREDICATE_NONE);
242 
243    copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
244    copy_flatshaded_attributes(c, c->vert[2], c->vert[1]);
245    elk_JMPI(p, elk_imm_d(jmpi*nr*2), ELK_PREDICATE_NONE);
246 
247    copy_flatshaded_attributes(c, c->vert[0], c->vert[2]);
248    copy_flatshaded_attributes(c, c->vert[1], c->vert[2]);
249 }
250 
251 
do_flatshade_line(struct elk_sf_compile * c)252 static void do_flatshade_line( struct elk_sf_compile *c )
253 {
254    struct elk_codegen *p = &c->func;
255    GLuint nr;
256    GLuint jmpi = 1;
257 
258    /* Already done in clip program:
259     */
260    if (c->key.primitive == ELK_SF_PRIM_UNFILLED_TRIS)
261       return;
262 
263    if (p->devinfo->ver == 5)
264        jmpi = 2;
265 
266    nr = count_flatshaded_attributes(c);
267 
268    elk_MUL(p, c->pv, c->pv, elk_imm_d(jmpi*(nr+1)));
269    elk_JMPI(p, c->pv, ELK_PREDICATE_NONE);
270    copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
271 
272    elk_JMPI(p, elk_imm_ud(jmpi*nr), ELK_PREDICATE_NONE);
273    copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
274 }
275 
276 
277 /***********************************************************************
278  * Triangle setup.
279  */
280 
281 
alloc_regs(struct elk_sf_compile * c)282 static void alloc_regs( struct elk_sf_compile *c )
283 {
284    GLuint reg, i;
285 
286    /* Values computed by fixed function unit:
287     */
288    c->pv  = retype(elk_vec1_grf(1, 1), ELK_REGISTER_TYPE_D);
289    c->det = elk_vec1_grf(1, 2);
290    c->dx0 = elk_vec1_grf(1, 3);
291    c->dx2 = elk_vec1_grf(1, 4);
292    c->dy0 = elk_vec1_grf(1, 5);
293    c->dy2 = elk_vec1_grf(1, 6);
294 
295    /* z and 1/w passed in separately:
296     */
297    c->z[0]     = elk_vec1_grf(2, 0);
298    c->inv_w[0] = elk_vec1_grf(2, 1);
299    c->z[1]     = elk_vec1_grf(2, 2);
300    c->inv_w[1] = elk_vec1_grf(2, 3);
301    c->z[2]     = elk_vec1_grf(2, 4);
302    c->inv_w[2] = elk_vec1_grf(2, 5);
303 
304    /* The vertices:
305     */
306    reg = 3;
307    for (i = 0; i < c->nr_verts; i++) {
308       c->vert[i] = elk_vec8_grf(reg, 0);
309       reg += c->nr_attr_regs;
310    }
311 
312    /* Temporaries, allocated after last vertex reg.
313     */
314    c->inv_det = elk_vec1_grf(reg, 0);  reg++;
315    c->a1_sub_a0 = elk_vec8_grf(reg, 0);  reg++;
316    c->a2_sub_a0 = elk_vec8_grf(reg, 0);  reg++;
317    c->tmp = elk_vec8_grf(reg, 0);  reg++;
318 
319    /* Note grf allocation:
320     */
321    c->prog_data.total_grf = reg;
322 
323 
324    /* Outputs of this program - interpolation coefficients for
325     * rasterization:
326     */
327    c->m1Cx = elk_vec8_reg(ELK_MESSAGE_REGISTER_FILE, 1, 0);
328    c->m2Cy = elk_vec8_reg(ELK_MESSAGE_REGISTER_FILE, 2, 0);
329    c->m3C0 = elk_vec8_reg(ELK_MESSAGE_REGISTER_FILE, 3, 0);
330 }
331 
332 
copy_z_inv_w(struct elk_sf_compile * c)333 static void copy_z_inv_w( struct elk_sf_compile *c )
334 {
335    struct elk_codegen *p = &c->func;
336    GLuint i;
337 
338    /* Copy both scalars with a single MOV:
339     */
340    for (i = 0; i < c->nr_verts; i++)
341       elk_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
342 }
343 
344 
invert_det(struct elk_sf_compile * c)345 static void invert_det( struct elk_sf_compile *c)
346 {
347    /* Looks like we invert all 8 elements just to get 1/det in
348     * position 2 !?!
349     */
350    elk_gfx4_math(&c->func,
351 	     c->inv_det,
352 	     ELK_MATH_FUNCTION_INV,
353 	     0,
354 	     c->det,
355 	     ELK_MATH_PRECISION_FULL);
356 
357 }
358 
359 
360 static bool
calculate_masks(struct elk_sf_compile * c,GLuint reg,GLushort * pc,GLushort * pc_persp,GLushort * pc_linear)361 calculate_masks(struct elk_sf_compile *c,
362                 GLuint reg,
363                 GLushort *pc,
364                 GLushort *pc_persp,
365                 GLushort *pc_linear)
366 {
367    bool is_last_attr = (reg == c->nr_setup_regs - 1);
368    enum glsl_interp_mode interp;
369 
370    *pc_persp = 0;
371    *pc_linear = 0;
372    *pc = 0xf;
373 
374    interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 0)];
375    if (interp == INTERP_MODE_SMOOTH) {
376       *pc_linear = 0xf;
377       *pc_persp = 0xf;
378    } else if (interp == INTERP_MODE_NOPERSPECTIVE)
379       *pc_linear = 0xf;
380 
381    /* Maybe only process one attribute on the final round:
382     */
383    if (vert_reg_to_varying(c, reg, 1) != ELK_VARYING_SLOT_COUNT) {
384       *pc |= 0xf0;
385 
386       interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 1)];
387       if (interp == INTERP_MODE_SMOOTH) {
388          *pc_linear |= 0xf0;
389          *pc_persp |= 0xf0;
390       } else if (interp == INTERP_MODE_NOPERSPECTIVE)
391          *pc_linear |= 0xf0;
392    }
393 
394    return is_last_attr;
395 }
396 
397 /* Calculates the predicate control for which channels of a reg
398  * (containing 2 attrs) to do point sprite coordinate replacement on.
399  */
400 static uint16_t
calculate_point_sprite_mask(struct elk_sf_compile * c,GLuint reg)401 calculate_point_sprite_mask(struct elk_sf_compile *c, GLuint reg)
402 {
403    int varying1, varying2;
404    uint16_t pc = 0;
405 
406    varying1 = vert_reg_to_varying(c, reg, 0);
407    if (varying1 >= VARYING_SLOT_TEX0 && varying1 <= VARYING_SLOT_TEX7) {
408       if (c->key.point_sprite_coord_replace & (1 << (varying1 - VARYING_SLOT_TEX0)))
409 	 pc |= 0x0f;
410    }
411    if (varying1 == ELK_VARYING_SLOT_PNTC)
412       pc |= 0x0f;
413 
414    varying2 = vert_reg_to_varying(c, reg, 1);
415    if (varying2 >= VARYING_SLOT_TEX0 && varying2 <= VARYING_SLOT_TEX7) {
416       if (c->key.point_sprite_coord_replace & (1 << (varying2 -
417                                                      VARYING_SLOT_TEX0)))
418          pc |= 0xf0;
419    }
420    if (varying2 == ELK_VARYING_SLOT_PNTC)
421       pc |= 0xf0;
422 
423    return pc;
424 }
425 
426 static void
set_predicate_control_flag_value(struct elk_codegen * p,struct elk_sf_compile * c,unsigned value)427 set_predicate_control_flag_value(struct elk_codegen *p,
428                                  struct elk_sf_compile *c,
429                                  unsigned value)
430 {
431    elk_set_default_predicate_control(p, ELK_PREDICATE_NONE);
432 
433    if (value != 0xff) {
434       if (value != c->flag_value) {
435          elk_MOV(p, elk_flag_reg(0, 0), elk_imm_uw(value));
436          c->flag_value = value;
437       }
438 
439       elk_set_default_predicate_control(p, ELK_PREDICATE_NORMAL);
440    }
441 }
442 
elk_emit_tri_setup(struct elk_sf_compile * c,bool allocate)443 static void elk_emit_tri_setup(struct elk_sf_compile *c, bool allocate)
444 {
445    struct elk_codegen *p = &c->func;
446    GLuint i;
447 
448    c->flag_value = 0xff;
449    c->nr_verts = 3;
450 
451    if (allocate)
452       alloc_regs(c);
453 
454    invert_det(c);
455    copy_z_inv_w(c);
456 
457    if (c->key.do_twoside_color)
458       do_twoside_color(c);
459 
460    if (c->key.contains_flat_varying)
461       do_flatshade_triangle(c);
462 
463 
464    for (i = 0; i < c->nr_setup_regs; i++)
465    {
466       /* Pair of incoming attributes:
467        */
468       struct elk_reg a0 = offset(c->vert[0], i);
469       struct elk_reg a1 = offset(c->vert[1], i);
470       struct elk_reg a2 = offset(c->vert[2], i);
471       GLushort pc, pc_persp, pc_linear;
472       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
473 
474       if (pc_persp)
475       {
476 	 set_predicate_control_flag_value(p, c, pc_persp);
477 	 elk_MUL(p, a0, a0, c->inv_w[0]);
478 	 elk_MUL(p, a1, a1, c->inv_w[1]);
479 	 elk_MUL(p, a2, a2, c->inv_w[2]);
480       }
481 
482 
483       /* Calculate coefficients for interpolated values:
484        */
485       if (pc_linear)
486       {
487 	 set_predicate_control_flag_value(p, c, pc_linear);
488 
489 	 elk_ADD(p, c->a1_sub_a0, a1, negate(a0));
490 	 elk_ADD(p, c->a2_sub_a0, a2, negate(a0));
491 
492 	 /* calculate dA/dx
493 	  */
494 	 elk_MUL(p, elk_null_reg(), c->a1_sub_a0, c->dy2);
495 	 elk_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
496 	 elk_MUL(p, c->m1Cx, c->tmp, c->inv_det);
497 
498 	 /* calculate dA/dy
499 	  */
500 	 elk_MUL(p, elk_null_reg(), c->a2_sub_a0, c->dx0);
501 	 elk_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
502 	 elk_MUL(p, c->m2Cy, c->tmp, c->inv_det);
503       }
504 
505       {
506 	 set_predicate_control_flag_value(p, c, pc);
507 	 /* start point for interpolation
508 	  */
509 	 elk_MOV(p, c->m3C0, a0);
510 
511 	 /* Copy m0..m3 to URB.  m0 is implicitly copied from r0 in
512 	  * the send instruction:
513 	  */
514 	 elk_urb_WRITE(p,
515 		       elk_null_reg(),
516 		       0,
517 		       elk_vec8_grf(0, 0), /* r0, will be copied to m0 */
518                        last ? ELK_URB_WRITE_EOT_COMPLETE
519                        : ELK_URB_WRITE_NO_FLAGS,
520 		       4, 	/* msg len */
521 		       0,	/* response len */
522 		       i*4,	/* offset */
523 		       ELK_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
524       }
525    }
526 
527    elk_set_default_predicate_control(p, ELK_PREDICATE_NONE);
528 }
529 
530 
531 
elk_emit_line_setup(struct elk_sf_compile * c,bool allocate)532 static void elk_emit_line_setup(struct elk_sf_compile *c, bool allocate)
533 {
534    struct elk_codegen *p = &c->func;
535    GLuint i;
536 
537    c->flag_value = 0xff;
538    c->nr_verts = 2;
539 
540    if (allocate)
541       alloc_regs(c);
542 
543    invert_det(c);
544    copy_z_inv_w(c);
545 
546    if (c->key.contains_flat_varying)
547       do_flatshade_line(c);
548 
549    for (i = 0; i < c->nr_setup_regs; i++)
550    {
551       /* Pair of incoming attributes:
552        */
553       struct elk_reg a0 = offset(c->vert[0], i);
554       struct elk_reg a1 = offset(c->vert[1], i);
555       GLushort pc, pc_persp, pc_linear;
556       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
557 
558       if (pc_persp)
559       {
560 	 set_predicate_control_flag_value(p, c, pc_persp);
561 	 elk_MUL(p, a0, a0, c->inv_w[0]);
562 	 elk_MUL(p, a1, a1, c->inv_w[1]);
563       }
564 
565       /* Calculate coefficients for position, color:
566        */
567       if (pc_linear) {
568 	 set_predicate_control_flag_value(p, c, pc_linear);
569 
570 	 elk_ADD(p, c->a1_sub_a0, a1, negate(a0));
571 
572 	 elk_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
573 	 elk_MUL(p, c->m1Cx, c->tmp, c->inv_det);
574 
575 	 elk_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
576 	 elk_MUL(p, c->m2Cy, c->tmp, c->inv_det);
577       }
578 
579       {
580 	 set_predicate_control_flag_value(p, c, pc);
581 
582 	 /* start point for interpolation
583 	  */
584 	 elk_MOV(p, c->m3C0, a0);
585 
586 	 /* Copy m0..m3 to URB.
587 	  */
588 	 elk_urb_WRITE(p,
589 		       elk_null_reg(),
590 		       0,
591 		       elk_vec8_grf(0, 0),
592                        last ? ELK_URB_WRITE_EOT_COMPLETE
593                        : ELK_URB_WRITE_NO_FLAGS,
594 		       4, 	/* msg len */
595 		       0,	/* response len */
596 		       i*4,	/* urb destination offset */
597 		       ELK_URB_SWIZZLE_TRANSPOSE);
598       }
599    }
600 
601    elk_set_default_predicate_control(p, ELK_PREDICATE_NONE);
602 }
603 
elk_emit_point_sprite_setup(struct elk_sf_compile * c,bool allocate)604 static void elk_emit_point_sprite_setup(struct elk_sf_compile *c, bool allocate)
605 {
606    struct elk_codegen *p = &c->func;
607    GLuint i;
608 
609    c->flag_value = 0xff;
610    c->nr_verts = 1;
611 
612    if (allocate)
613       alloc_regs(c);
614 
615    copy_z_inv_w(c);
616    for (i = 0; i < c->nr_setup_regs; i++)
617    {
618       struct elk_reg a0 = offset(c->vert[0], i);
619       GLushort pc, pc_persp, pc_linear, pc_coord_replace;
620       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
621 
622       pc_coord_replace = calculate_point_sprite_mask(c, i);
623       pc_persp &= ~pc_coord_replace;
624 
625       if (pc_persp) {
626 	 set_predicate_control_flag_value(p, c, pc_persp);
627 	 elk_MUL(p, a0, a0, c->inv_w[0]);
628       }
629 
630       /* Point sprite coordinate replacement: A texcoord with this
631        * enabled gets replaced with the value (x, y, 0, 1) where x and
632        * y vary from 0 to 1 across the horizontal and vertical of the
633        * point.
634        */
635       if (pc_coord_replace) {
636 	 set_predicate_control_flag_value(p, c, pc_coord_replace);
637 	 /* Calculate 1.0/PointWidth */
638 	 elk_gfx4_math(&c->func,
639 		   c->tmp,
640 		   ELK_MATH_FUNCTION_INV,
641 		   0,
642 		   c->dx0,
643 		   ELK_MATH_PRECISION_FULL);
644 
645 	 elk_set_default_access_mode(p, ELK_ALIGN_16);
646 
647 	 /* dA/dx, dA/dy */
648 	 elk_MOV(p, c->m1Cx, elk_imm_f(0.0));
649 	 elk_MOV(p, c->m2Cy, elk_imm_f(0.0));
650 	 elk_MOV(p, elk_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
651 	 if (c->key.sprite_origin_lower_left) {
652 	    elk_MOV(p, elk_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
653 	 } else {
654 	    elk_MOV(p, elk_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
655 	 }
656 
657 	 /* attribute constant offset */
658 	 elk_MOV(p, c->m3C0, elk_imm_f(0.0));
659 	 if (c->key.sprite_origin_lower_left) {
660 	    elk_MOV(p, elk_writemask(c->m3C0, WRITEMASK_YW), elk_imm_f(1.0));
661 	 } else {
662 	    elk_MOV(p, elk_writemask(c->m3C0, WRITEMASK_W), elk_imm_f(1.0));
663 	 }
664 
665 	 elk_set_default_access_mode(p, ELK_ALIGN_1);
666       }
667 
668       if (pc & ~pc_coord_replace) {
669 	 set_predicate_control_flag_value(p, c, pc & ~pc_coord_replace);
670 	 elk_MOV(p, c->m1Cx, elk_imm_ud(0));
671 	 elk_MOV(p, c->m2Cy, elk_imm_ud(0));
672 	 elk_MOV(p, c->m3C0, a0); /* constant value */
673       }
674 
675 
676       set_predicate_control_flag_value(p, c, pc);
677       /* Copy m0..m3 to URB. */
678       elk_urb_WRITE(p,
679 		    elk_null_reg(),
680 		    0,
681 		    elk_vec8_grf(0, 0),
682                     last ? ELK_URB_WRITE_EOT_COMPLETE
683                     : ELK_URB_WRITE_NO_FLAGS,
684 		    4, 	/* msg len */
685 		    0,	/* response len */
686 		    i*4,	/* urb destination offset */
687 		    ELK_URB_SWIZZLE_TRANSPOSE);
688    }
689 
690    elk_set_default_predicate_control(p, ELK_PREDICATE_NONE);
691 }
692 
693 /* Points setup - several simplifications as all attributes are
694  * constant across the face of the point (point sprites excluded!)
695  */
elk_emit_point_setup(struct elk_sf_compile * c,bool allocate)696 static void elk_emit_point_setup(struct elk_sf_compile *c, bool allocate)
697 {
698    struct elk_codegen *p = &c->func;
699    GLuint i;
700 
701    c->flag_value = 0xff;
702    c->nr_verts = 1;
703 
704    if (allocate)
705       alloc_regs(c);
706 
707    copy_z_inv_w(c);
708 
709    elk_MOV(p, c->m1Cx, elk_imm_ud(0)); /* zero - move out of loop */
710    elk_MOV(p, c->m2Cy, elk_imm_ud(0)); /* zero - move out of loop */
711 
712    for (i = 0; i < c->nr_setup_regs; i++)
713    {
714       struct elk_reg a0 = offset(c->vert[0], i);
715       GLushort pc, pc_persp, pc_linear;
716       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
717 
718       if (pc_persp)
719       {
720 	 /* This seems odd as the values are all constant, but the
721 	  * fragment shader will be expecting it:
722 	  */
723 	 set_predicate_control_flag_value(p, c, pc_persp);
724 	 elk_MUL(p, a0, a0, c->inv_w[0]);
725       }
726 
727 
728       /* The delta values are always zero, just send the starting
729        * coordinate.  Again, this is to fit in with the interpolation
730        * code in the fragment shader.
731        */
732       {
733 	 set_predicate_control_flag_value(p, c, pc);
734 
735 	 elk_MOV(p, c->m3C0, a0); /* constant value */
736 
737 	 /* Copy m0..m3 to URB.
738 	  */
739 	 elk_urb_WRITE(p,
740 		       elk_null_reg(),
741 		       0,
742 		       elk_vec8_grf(0, 0),
743                        last ? ELK_URB_WRITE_EOT_COMPLETE
744                        : ELK_URB_WRITE_NO_FLAGS,
745 		       4, 	/* msg len */
746 		       0,	/* response len */
747 		       i*4,	/* urb destination offset */
748 		       ELK_URB_SWIZZLE_TRANSPOSE);
749       }
750    }
751 
752    elk_set_default_predicate_control(p, ELK_PREDICATE_NONE);
753 }
754 
elk_emit_anyprim_setup(struct elk_sf_compile * c)755 static void elk_emit_anyprim_setup( struct elk_sf_compile *c )
756 {
757    struct elk_codegen *p = &c->func;
758    struct elk_reg payload_prim = elk_uw1_reg(ELK_GENERAL_REGISTER_FILE, 1, 0);
759    struct elk_reg payload_attr = get_element_ud(elk_vec1_reg(ELK_GENERAL_REGISTER_FILE, 1, 0), 0);
760    struct elk_reg primmask;
761    int jmp;
762    struct elk_reg v1_null_ud = vec1(retype(elk_null_reg(), ELK_REGISTER_TYPE_UD));
763 
764    c->nr_verts = 3;
765    alloc_regs(c);
766 
767    primmask = retype(get_element(c->tmp, 0), ELK_REGISTER_TYPE_UD);
768 
769    elk_MOV(p, primmask, elk_imm_ud(1));
770    elk_SHL(p, primmask, primmask, payload_prim);
771 
772    elk_AND(p, v1_null_ud, primmask, elk_imm_ud((1<<_3DPRIM_TRILIST) |
773 					       (1<<_3DPRIM_TRISTRIP) |
774 					       (1<<_3DPRIM_TRIFAN) |
775 					       (1<<_3DPRIM_TRISTRIP_REVERSE) |
776 					       (1<<_3DPRIM_POLYGON) |
777 					       (1<<_3DPRIM_RECTLIST) |
778 					       (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
779    elk_inst_set_cond_modifier(p->devinfo, elk_last_inst, ELK_CONDITIONAL_Z);
780    jmp = elk_JMPI(p, elk_imm_d(0), ELK_PREDICATE_NORMAL) - p->store;
781    elk_emit_tri_setup(c, false);
782    elk_land_fwd_jump(p, jmp);
783 
784    elk_AND(p, v1_null_ud, primmask, elk_imm_ud((1<<_3DPRIM_LINELIST) |
785 					       (1<<_3DPRIM_LINESTRIP) |
786 					       (1<<_3DPRIM_LINELOOP) |
787 					       (1<<_3DPRIM_LINESTRIP_CONT) |
788 					       (1<<_3DPRIM_LINESTRIP_BF) |
789 					       (1<<_3DPRIM_LINESTRIP_CONT_BF)));
790    elk_inst_set_cond_modifier(p->devinfo, elk_last_inst, ELK_CONDITIONAL_Z);
791    jmp = elk_JMPI(p, elk_imm_d(0), ELK_PREDICATE_NORMAL) - p->store;
792    elk_emit_line_setup(c, false);
793    elk_land_fwd_jump(p, jmp);
794 
795    elk_AND(p, v1_null_ud, payload_attr, elk_imm_ud(1<<ELK_SPRITE_POINT_ENABLE));
796    elk_inst_set_cond_modifier(p->devinfo, elk_last_inst, ELK_CONDITIONAL_Z);
797    jmp = elk_JMPI(p, elk_imm_d(0), ELK_PREDICATE_NORMAL) - p->store;
798    elk_emit_point_sprite_setup(c, false);
799    elk_land_fwd_jump(p, jmp);
800 
801    elk_emit_point_setup( c, false );
802 }
803 
804 const unsigned *
elk_compile_sf(const struct elk_compiler * compiler,void * mem_ctx,const struct elk_sf_prog_key * key,struct elk_sf_prog_data * prog_data,struct intel_vue_map * vue_map,unsigned * final_assembly_size)805 elk_compile_sf(const struct elk_compiler *compiler,
806                void *mem_ctx,
807                const struct elk_sf_prog_key *key,
808                struct elk_sf_prog_data *prog_data,
809                struct intel_vue_map *vue_map,
810                unsigned *final_assembly_size)
811 {
812    struct elk_sf_compile c;
813    memset(&c, 0, sizeof(c));
814 
815    /* Begin the compilation:
816     */
817    elk_init_codegen(&compiler->isa, &c.func, mem_ctx);
818 
819    c.key = *key;
820    c.vue_map = *vue_map;
821    if (c.key.do_point_coord) {
822       /*
823        * gl_PointCoord is a FS instead of VS builtin variable, thus it's
824        * not included in c.vue_map generated in VS stage. Here we add
825        * it manually to let SF shader generate the needed interpolation
826        * coefficient for FS shader.
827        */
828       c.vue_map.varying_to_slot[ELK_VARYING_SLOT_PNTC] = c.vue_map.num_slots;
829       c.vue_map.slot_to_varying[c.vue_map.num_slots++] = ELK_VARYING_SLOT_PNTC;
830    }
831    c.urb_entry_read_offset = ELK_SF_URB_ENTRY_READ_OFFSET;
832    c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset;
833    c.nr_setup_regs = c.nr_attr_regs;
834 
835    c.prog_data.urb_read_length = c.nr_attr_regs;
836    c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
837 
838    /* Which primitive?  Or all three?
839     */
840    switch (key->primitive) {
841    case ELK_SF_PRIM_TRIANGLES:
842       c.nr_verts = 3;
843       elk_emit_tri_setup( &c, true );
844       break;
845    case ELK_SF_PRIM_LINES:
846       c.nr_verts = 2;
847       elk_emit_line_setup( &c, true );
848       break;
849    case ELK_SF_PRIM_POINTS:
850       c.nr_verts = 1;
851       if (key->do_point_sprite)
852 	  elk_emit_point_sprite_setup( &c, true );
853       else
854 	  elk_emit_point_setup( &c, true );
855       break;
856    case ELK_SF_PRIM_UNFILLED_TRIS:
857       c.nr_verts = 3;
858       elk_emit_anyprim_setup( &c );
859       break;
860    default:
861       unreachable("not reached");
862    }
863 
864    /* FINISHME: SF programs use calculated jumps (i.e., JMPI with a register
865     * source). Compacting would be difficult.
866     */
867    /* elk_compact_instructions(&c.func, 0, 0, NULL); */
868 
869    *prog_data = c.prog_data;
870 
871    const unsigned *program = elk_get_program(&c.func, final_assembly_size);
872 
873    if (INTEL_DEBUG(DEBUG_SF)) {
874       fprintf(stderr, "sf:\n");
875       elk_disassemble_with_labels(&compiler->isa,
876                                   program, 0, *final_assembly_size, stderr);
877       fprintf(stderr, "\n");
878    }
879 
880    return program;
881 }
882