1 /*
2 * Copyright © 2006 - 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "elk_compiler.h"
25 #include "elk_disasm.h"
26 #include "elk_eu.h"
27 #include "elk_prim.h"
28
29 #include "dev/intel_debug.h"
30
31 struct elk_sf_compile {
32 struct elk_codegen func;
33 struct elk_sf_prog_key key;
34 struct elk_sf_prog_data prog_data;
35
36 struct elk_reg pv;
37 struct elk_reg det;
38 struct elk_reg dx0;
39 struct elk_reg dx2;
40 struct elk_reg dy0;
41 struct elk_reg dy2;
42
43 /* z and 1/w passed in separately:
44 */
45 struct elk_reg z[3];
46 struct elk_reg inv_w[3];
47
48 /* The vertices:
49 */
50 struct elk_reg vert[3];
51
52 /* Temporaries, allocated after last vertex reg.
53 */
54 struct elk_reg inv_det;
55 struct elk_reg a1_sub_a0;
56 struct elk_reg a2_sub_a0;
57 struct elk_reg tmp;
58
59 struct elk_reg m1Cx;
60 struct elk_reg m2Cy;
61 struct elk_reg m3C0;
62
63 GLuint nr_verts;
64 GLuint nr_attr_regs;
65 GLuint nr_setup_regs;
66 int urb_entry_read_offset;
67
68 /** The last known value of the f0.0 flag register. */
69 unsigned flag_value;
70
71 struct intel_vue_map vue_map;
72 };
73
74 /**
75 * Determine the vue slot corresponding to the given half of the given register.
76 */
vert_reg_to_vue_slot(struct elk_sf_compile * c,GLuint reg,int half)77 static inline int vert_reg_to_vue_slot(struct elk_sf_compile *c, GLuint reg,
78 int half)
79 {
80 return (reg + c->urb_entry_read_offset) * 2 + half;
81 }
82
83 /**
84 * Determine the varying corresponding to the given half of the given
85 * register. half=0 means the first half of a register, half=1 means the
86 * second half.
87 */
vert_reg_to_varying(struct elk_sf_compile * c,GLuint reg,int half)88 static inline int vert_reg_to_varying(struct elk_sf_compile *c, GLuint reg,
89 int half)
90 {
91 int vue_slot = vert_reg_to_vue_slot(c, reg, half);
92 return c->vue_map.slot_to_varying[vue_slot];
93 }
94
95 /**
96 * Determine the register corresponding to the given vue slot
97 */
get_vue_slot(struct elk_sf_compile * c,struct elk_reg vert,int vue_slot)98 static struct elk_reg get_vue_slot(struct elk_sf_compile *c,
99 struct elk_reg vert,
100 int vue_slot)
101 {
102 GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
103 GLuint sub = vue_slot % 2;
104
105 return elk_vec4_grf(vert.nr + off, sub * 4);
106 }
107
108 /**
109 * Determine the register corresponding to the given varying.
110 */
get_varying(struct elk_sf_compile * c,struct elk_reg vert,GLuint varying)111 static struct elk_reg get_varying(struct elk_sf_compile *c,
112 struct elk_reg vert,
113 GLuint varying)
114 {
115 int vue_slot = c->vue_map.varying_to_slot[varying];
116 assert (vue_slot >= c->urb_entry_read_offset);
117 return get_vue_slot(c, vert, vue_slot);
118 }
119
120 static bool
have_attr(struct elk_sf_compile * c,GLuint attr)121 have_attr(struct elk_sf_compile *c, GLuint attr)
122 {
123 return (c->key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0;
124 }
125
126 /***********************************************************************
127 * Twoside lighting
128 */
copy_bfc(struct elk_sf_compile * c,struct elk_reg vert)129 static void copy_bfc( struct elk_sf_compile *c,
130 struct elk_reg vert )
131 {
132 struct elk_codegen *p = &c->func;
133 GLuint i;
134
135 for (i = 0; i < 2; i++) {
136 if (have_attr(c, VARYING_SLOT_COL0+i) &&
137 have_attr(c, VARYING_SLOT_BFC0+i))
138 elk_MOV(p,
139 get_varying(c, vert, VARYING_SLOT_COL0+i),
140 get_varying(c, vert, VARYING_SLOT_BFC0+i));
141 }
142 }
143
144
do_twoside_color(struct elk_sf_compile * c)145 static void do_twoside_color( struct elk_sf_compile *c )
146 {
147 struct elk_codegen *p = &c->func;
148 GLuint backface_conditional = c->key.frontface_ccw ? ELK_CONDITIONAL_G : ELK_CONDITIONAL_L;
149
150 /* Already done in clip program:
151 */
152 if (c->key.primitive == ELK_SF_PRIM_UNFILLED_TRIS)
153 return;
154
155 /* If the vertex shader provides backface color, do the selection. The VS
156 * promises to set up the front color if the backface color is provided, but
157 * it may contain junk if never written to.
158 */
159 if (!(have_attr(c, VARYING_SLOT_COL0) && have_attr(c, VARYING_SLOT_BFC0)) &&
160 !(have_attr(c, VARYING_SLOT_COL1) && have_attr(c, VARYING_SLOT_BFC1)))
161 return;
162
163 /* Need to use ELK_EXECUTE_4 and also do an 4-wide compare in order
164 * to get all channels active inside the IF. In the clipping code
165 * we run with NoMask, so it's not an option and we can use
166 * ELK_EXECUTE_1 for all comparisons.
167 */
168 elk_CMP(p, vec4(elk_null_reg()), backface_conditional, c->det, elk_imm_f(0));
169 elk_IF(p, ELK_EXECUTE_4);
170 {
171 switch (c->nr_verts) {
172 case 3: copy_bfc(c, c->vert[2]); FALLTHROUGH;
173 case 2: copy_bfc(c, c->vert[1]); FALLTHROUGH;
174 case 1: copy_bfc(c, c->vert[0]);
175 }
176 }
177 elk_ENDIF(p);
178 }
179
180
181
182 /***********************************************************************
183 * Flat shading
184 */
185
copy_flatshaded_attributes(struct elk_sf_compile * c,struct elk_reg dst,struct elk_reg src)186 static void copy_flatshaded_attributes(struct elk_sf_compile *c,
187 struct elk_reg dst,
188 struct elk_reg src)
189 {
190 struct elk_codegen *p = &c->func;
191 int i;
192
193 for (i = 0; i < c->vue_map.num_slots; i++) {
194 if (c->key.interp_mode[i] == INTERP_MODE_FLAT) {
195 elk_MOV(p,
196 get_vue_slot(c, dst, i),
197 get_vue_slot(c, src, i));
198 }
199 }
200 }
201
count_flatshaded_attributes(struct elk_sf_compile * c)202 static int count_flatshaded_attributes(struct elk_sf_compile *c)
203 {
204 int i;
205 int count = 0;
206
207 for (i = 0; i < c->vue_map.num_slots; i++)
208 if (c->key.interp_mode[i] == INTERP_MODE_FLAT)
209 count++;
210
211 return count;
212 }
213
214
215
216 /* Need to use a computed jump to copy flatshaded attributes as the
217 * vertices are ordered according to y-coordinate before reaching this
218 * point, so the PV could be anywhere.
219 */
do_flatshade_triangle(struct elk_sf_compile * c)220 static void do_flatshade_triangle( struct elk_sf_compile *c )
221 {
222 struct elk_codegen *p = &c->func;
223 GLuint nr;
224 GLuint jmpi = 1;
225
226 /* Already done in clip program:
227 */
228 if (c->key.primitive == ELK_SF_PRIM_UNFILLED_TRIS)
229 return;
230
231 if (p->devinfo->ver == 5)
232 jmpi = 2;
233
234 nr = count_flatshaded_attributes(c);
235
236 elk_MUL(p, c->pv, c->pv, elk_imm_d(jmpi*(nr*2+1)));
237 elk_JMPI(p, c->pv, ELK_PREDICATE_NONE);
238
239 copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
240 copy_flatshaded_attributes(c, c->vert[2], c->vert[0]);
241 elk_JMPI(p, elk_imm_d(jmpi*(nr*4+1)), ELK_PREDICATE_NONE);
242
243 copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
244 copy_flatshaded_attributes(c, c->vert[2], c->vert[1]);
245 elk_JMPI(p, elk_imm_d(jmpi*nr*2), ELK_PREDICATE_NONE);
246
247 copy_flatshaded_attributes(c, c->vert[0], c->vert[2]);
248 copy_flatshaded_attributes(c, c->vert[1], c->vert[2]);
249 }
250
251
do_flatshade_line(struct elk_sf_compile * c)252 static void do_flatshade_line( struct elk_sf_compile *c )
253 {
254 struct elk_codegen *p = &c->func;
255 GLuint nr;
256 GLuint jmpi = 1;
257
258 /* Already done in clip program:
259 */
260 if (c->key.primitive == ELK_SF_PRIM_UNFILLED_TRIS)
261 return;
262
263 if (p->devinfo->ver == 5)
264 jmpi = 2;
265
266 nr = count_flatshaded_attributes(c);
267
268 elk_MUL(p, c->pv, c->pv, elk_imm_d(jmpi*(nr+1)));
269 elk_JMPI(p, c->pv, ELK_PREDICATE_NONE);
270 copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
271
272 elk_JMPI(p, elk_imm_ud(jmpi*nr), ELK_PREDICATE_NONE);
273 copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
274 }
275
276
277 /***********************************************************************
278 * Triangle setup.
279 */
280
281
alloc_regs(struct elk_sf_compile * c)282 static void alloc_regs( struct elk_sf_compile *c )
283 {
284 GLuint reg, i;
285
286 /* Values computed by fixed function unit:
287 */
288 c->pv = retype(elk_vec1_grf(1, 1), ELK_REGISTER_TYPE_D);
289 c->det = elk_vec1_grf(1, 2);
290 c->dx0 = elk_vec1_grf(1, 3);
291 c->dx2 = elk_vec1_grf(1, 4);
292 c->dy0 = elk_vec1_grf(1, 5);
293 c->dy2 = elk_vec1_grf(1, 6);
294
295 /* z and 1/w passed in separately:
296 */
297 c->z[0] = elk_vec1_grf(2, 0);
298 c->inv_w[0] = elk_vec1_grf(2, 1);
299 c->z[1] = elk_vec1_grf(2, 2);
300 c->inv_w[1] = elk_vec1_grf(2, 3);
301 c->z[2] = elk_vec1_grf(2, 4);
302 c->inv_w[2] = elk_vec1_grf(2, 5);
303
304 /* The vertices:
305 */
306 reg = 3;
307 for (i = 0; i < c->nr_verts; i++) {
308 c->vert[i] = elk_vec8_grf(reg, 0);
309 reg += c->nr_attr_regs;
310 }
311
312 /* Temporaries, allocated after last vertex reg.
313 */
314 c->inv_det = elk_vec1_grf(reg, 0); reg++;
315 c->a1_sub_a0 = elk_vec8_grf(reg, 0); reg++;
316 c->a2_sub_a0 = elk_vec8_grf(reg, 0); reg++;
317 c->tmp = elk_vec8_grf(reg, 0); reg++;
318
319 /* Note grf allocation:
320 */
321 c->prog_data.total_grf = reg;
322
323
324 /* Outputs of this program - interpolation coefficients for
325 * rasterization:
326 */
327 c->m1Cx = elk_vec8_reg(ELK_MESSAGE_REGISTER_FILE, 1, 0);
328 c->m2Cy = elk_vec8_reg(ELK_MESSAGE_REGISTER_FILE, 2, 0);
329 c->m3C0 = elk_vec8_reg(ELK_MESSAGE_REGISTER_FILE, 3, 0);
330 }
331
332
copy_z_inv_w(struct elk_sf_compile * c)333 static void copy_z_inv_w( struct elk_sf_compile *c )
334 {
335 struct elk_codegen *p = &c->func;
336 GLuint i;
337
338 /* Copy both scalars with a single MOV:
339 */
340 for (i = 0; i < c->nr_verts; i++)
341 elk_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
342 }
343
344
invert_det(struct elk_sf_compile * c)345 static void invert_det( struct elk_sf_compile *c)
346 {
347 /* Looks like we invert all 8 elements just to get 1/det in
348 * position 2 !?!
349 */
350 elk_gfx4_math(&c->func,
351 c->inv_det,
352 ELK_MATH_FUNCTION_INV,
353 0,
354 c->det,
355 ELK_MATH_PRECISION_FULL);
356
357 }
358
359
360 static bool
calculate_masks(struct elk_sf_compile * c,GLuint reg,GLushort * pc,GLushort * pc_persp,GLushort * pc_linear)361 calculate_masks(struct elk_sf_compile *c,
362 GLuint reg,
363 GLushort *pc,
364 GLushort *pc_persp,
365 GLushort *pc_linear)
366 {
367 bool is_last_attr = (reg == c->nr_setup_regs - 1);
368 enum glsl_interp_mode interp;
369
370 *pc_persp = 0;
371 *pc_linear = 0;
372 *pc = 0xf;
373
374 interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 0)];
375 if (interp == INTERP_MODE_SMOOTH) {
376 *pc_linear = 0xf;
377 *pc_persp = 0xf;
378 } else if (interp == INTERP_MODE_NOPERSPECTIVE)
379 *pc_linear = 0xf;
380
381 /* Maybe only process one attribute on the final round:
382 */
383 if (vert_reg_to_varying(c, reg, 1) != ELK_VARYING_SLOT_COUNT) {
384 *pc |= 0xf0;
385
386 interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 1)];
387 if (interp == INTERP_MODE_SMOOTH) {
388 *pc_linear |= 0xf0;
389 *pc_persp |= 0xf0;
390 } else if (interp == INTERP_MODE_NOPERSPECTIVE)
391 *pc_linear |= 0xf0;
392 }
393
394 return is_last_attr;
395 }
396
397 /* Calculates the predicate control for which channels of a reg
398 * (containing 2 attrs) to do point sprite coordinate replacement on.
399 */
400 static uint16_t
calculate_point_sprite_mask(struct elk_sf_compile * c,GLuint reg)401 calculate_point_sprite_mask(struct elk_sf_compile *c, GLuint reg)
402 {
403 int varying1, varying2;
404 uint16_t pc = 0;
405
406 varying1 = vert_reg_to_varying(c, reg, 0);
407 if (varying1 >= VARYING_SLOT_TEX0 && varying1 <= VARYING_SLOT_TEX7) {
408 if (c->key.point_sprite_coord_replace & (1 << (varying1 - VARYING_SLOT_TEX0)))
409 pc |= 0x0f;
410 }
411 if (varying1 == ELK_VARYING_SLOT_PNTC)
412 pc |= 0x0f;
413
414 varying2 = vert_reg_to_varying(c, reg, 1);
415 if (varying2 >= VARYING_SLOT_TEX0 && varying2 <= VARYING_SLOT_TEX7) {
416 if (c->key.point_sprite_coord_replace & (1 << (varying2 -
417 VARYING_SLOT_TEX0)))
418 pc |= 0xf0;
419 }
420 if (varying2 == ELK_VARYING_SLOT_PNTC)
421 pc |= 0xf0;
422
423 return pc;
424 }
425
426 static void
set_predicate_control_flag_value(struct elk_codegen * p,struct elk_sf_compile * c,unsigned value)427 set_predicate_control_flag_value(struct elk_codegen *p,
428 struct elk_sf_compile *c,
429 unsigned value)
430 {
431 elk_set_default_predicate_control(p, ELK_PREDICATE_NONE);
432
433 if (value != 0xff) {
434 if (value != c->flag_value) {
435 elk_MOV(p, elk_flag_reg(0, 0), elk_imm_uw(value));
436 c->flag_value = value;
437 }
438
439 elk_set_default_predicate_control(p, ELK_PREDICATE_NORMAL);
440 }
441 }
442
elk_emit_tri_setup(struct elk_sf_compile * c,bool allocate)443 static void elk_emit_tri_setup(struct elk_sf_compile *c, bool allocate)
444 {
445 struct elk_codegen *p = &c->func;
446 GLuint i;
447
448 c->flag_value = 0xff;
449 c->nr_verts = 3;
450
451 if (allocate)
452 alloc_regs(c);
453
454 invert_det(c);
455 copy_z_inv_w(c);
456
457 if (c->key.do_twoside_color)
458 do_twoside_color(c);
459
460 if (c->key.contains_flat_varying)
461 do_flatshade_triangle(c);
462
463
464 for (i = 0; i < c->nr_setup_regs; i++)
465 {
466 /* Pair of incoming attributes:
467 */
468 struct elk_reg a0 = offset(c->vert[0], i);
469 struct elk_reg a1 = offset(c->vert[1], i);
470 struct elk_reg a2 = offset(c->vert[2], i);
471 GLushort pc, pc_persp, pc_linear;
472 bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
473
474 if (pc_persp)
475 {
476 set_predicate_control_flag_value(p, c, pc_persp);
477 elk_MUL(p, a0, a0, c->inv_w[0]);
478 elk_MUL(p, a1, a1, c->inv_w[1]);
479 elk_MUL(p, a2, a2, c->inv_w[2]);
480 }
481
482
483 /* Calculate coefficients for interpolated values:
484 */
485 if (pc_linear)
486 {
487 set_predicate_control_flag_value(p, c, pc_linear);
488
489 elk_ADD(p, c->a1_sub_a0, a1, negate(a0));
490 elk_ADD(p, c->a2_sub_a0, a2, negate(a0));
491
492 /* calculate dA/dx
493 */
494 elk_MUL(p, elk_null_reg(), c->a1_sub_a0, c->dy2);
495 elk_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
496 elk_MUL(p, c->m1Cx, c->tmp, c->inv_det);
497
498 /* calculate dA/dy
499 */
500 elk_MUL(p, elk_null_reg(), c->a2_sub_a0, c->dx0);
501 elk_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
502 elk_MUL(p, c->m2Cy, c->tmp, c->inv_det);
503 }
504
505 {
506 set_predicate_control_flag_value(p, c, pc);
507 /* start point for interpolation
508 */
509 elk_MOV(p, c->m3C0, a0);
510
511 /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in
512 * the send instruction:
513 */
514 elk_urb_WRITE(p,
515 elk_null_reg(),
516 0,
517 elk_vec8_grf(0, 0), /* r0, will be copied to m0 */
518 last ? ELK_URB_WRITE_EOT_COMPLETE
519 : ELK_URB_WRITE_NO_FLAGS,
520 4, /* msg len */
521 0, /* response len */
522 i*4, /* offset */
523 ELK_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
524 }
525 }
526
527 elk_set_default_predicate_control(p, ELK_PREDICATE_NONE);
528 }
529
530
531
elk_emit_line_setup(struct elk_sf_compile * c,bool allocate)532 static void elk_emit_line_setup(struct elk_sf_compile *c, bool allocate)
533 {
534 struct elk_codegen *p = &c->func;
535 GLuint i;
536
537 c->flag_value = 0xff;
538 c->nr_verts = 2;
539
540 if (allocate)
541 alloc_regs(c);
542
543 invert_det(c);
544 copy_z_inv_w(c);
545
546 if (c->key.contains_flat_varying)
547 do_flatshade_line(c);
548
549 for (i = 0; i < c->nr_setup_regs; i++)
550 {
551 /* Pair of incoming attributes:
552 */
553 struct elk_reg a0 = offset(c->vert[0], i);
554 struct elk_reg a1 = offset(c->vert[1], i);
555 GLushort pc, pc_persp, pc_linear;
556 bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
557
558 if (pc_persp)
559 {
560 set_predicate_control_flag_value(p, c, pc_persp);
561 elk_MUL(p, a0, a0, c->inv_w[0]);
562 elk_MUL(p, a1, a1, c->inv_w[1]);
563 }
564
565 /* Calculate coefficients for position, color:
566 */
567 if (pc_linear) {
568 set_predicate_control_flag_value(p, c, pc_linear);
569
570 elk_ADD(p, c->a1_sub_a0, a1, negate(a0));
571
572 elk_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
573 elk_MUL(p, c->m1Cx, c->tmp, c->inv_det);
574
575 elk_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
576 elk_MUL(p, c->m2Cy, c->tmp, c->inv_det);
577 }
578
579 {
580 set_predicate_control_flag_value(p, c, pc);
581
582 /* start point for interpolation
583 */
584 elk_MOV(p, c->m3C0, a0);
585
586 /* Copy m0..m3 to URB.
587 */
588 elk_urb_WRITE(p,
589 elk_null_reg(),
590 0,
591 elk_vec8_grf(0, 0),
592 last ? ELK_URB_WRITE_EOT_COMPLETE
593 : ELK_URB_WRITE_NO_FLAGS,
594 4, /* msg len */
595 0, /* response len */
596 i*4, /* urb destination offset */
597 ELK_URB_SWIZZLE_TRANSPOSE);
598 }
599 }
600
601 elk_set_default_predicate_control(p, ELK_PREDICATE_NONE);
602 }
603
elk_emit_point_sprite_setup(struct elk_sf_compile * c,bool allocate)604 static void elk_emit_point_sprite_setup(struct elk_sf_compile *c, bool allocate)
605 {
606 struct elk_codegen *p = &c->func;
607 GLuint i;
608
609 c->flag_value = 0xff;
610 c->nr_verts = 1;
611
612 if (allocate)
613 alloc_regs(c);
614
615 copy_z_inv_w(c);
616 for (i = 0; i < c->nr_setup_regs; i++)
617 {
618 struct elk_reg a0 = offset(c->vert[0], i);
619 GLushort pc, pc_persp, pc_linear, pc_coord_replace;
620 bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
621
622 pc_coord_replace = calculate_point_sprite_mask(c, i);
623 pc_persp &= ~pc_coord_replace;
624
625 if (pc_persp) {
626 set_predicate_control_flag_value(p, c, pc_persp);
627 elk_MUL(p, a0, a0, c->inv_w[0]);
628 }
629
630 /* Point sprite coordinate replacement: A texcoord with this
631 * enabled gets replaced with the value (x, y, 0, 1) where x and
632 * y vary from 0 to 1 across the horizontal and vertical of the
633 * point.
634 */
635 if (pc_coord_replace) {
636 set_predicate_control_flag_value(p, c, pc_coord_replace);
637 /* Calculate 1.0/PointWidth */
638 elk_gfx4_math(&c->func,
639 c->tmp,
640 ELK_MATH_FUNCTION_INV,
641 0,
642 c->dx0,
643 ELK_MATH_PRECISION_FULL);
644
645 elk_set_default_access_mode(p, ELK_ALIGN_16);
646
647 /* dA/dx, dA/dy */
648 elk_MOV(p, c->m1Cx, elk_imm_f(0.0));
649 elk_MOV(p, c->m2Cy, elk_imm_f(0.0));
650 elk_MOV(p, elk_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
651 if (c->key.sprite_origin_lower_left) {
652 elk_MOV(p, elk_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
653 } else {
654 elk_MOV(p, elk_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
655 }
656
657 /* attribute constant offset */
658 elk_MOV(p, c->m3C0, elk_imm_f(0.0));
659 if (c->key.sprite_origin_lower_left) {
660 elk_MOV(p, elk_writemask(c->m3C0, WRITEMASK_YW), elk_imm_f(1.0));
661 } else {
662 elk_MOV(p, elk_writemask(c->m3C0, WRITEMASK_W), elk_imm_f(1.0));
663 }
664
665 elk_set_default_access_mode(p, ELK_ALIGN_1);
666 }
667
668 if (pc & ~pc_coord_replace) {
669 set_predicate_control_flag_value(p, c, pc & ~pc_coord_replace);
670 elk_MOV(p, c->m1Cx, elk_imm_ud(0));
671 elk_MOV(p, c->m2Cy, elk_imm_ud(0));
672 elk_MOV(p, c->m3C0, a0); /* constant value */
673 }
674
675
676 set_predicate_control_flag_value(p, c, pc);
677 /* Copy m0..m3 to URB. */
678 elk_urb_WRITE(p,
679 elk_null_reg(),
680 0,
681 elk_vec8_grf(0, 0),
682 last ? ELK_URB_WRITE_EOT_COMPLETE
683 : ELK_URB_WRITE_NO_FLAGS,
684 4, /* msg len */
685 0, /* response len */
686 i*4, /* urb destination offset */
687 ELK_URB_SWIZZLE_TRANSPOSE);
688 }
689
690 elk_set_default_predicate_control(p, ELK_PREDICATE_NONE);
691 }
692
693 /* Points setup - several simplifications as all attributes are
694 * constant across the face of the point (point sprites excluded!)
695 */
elk_emit_point_setup(struct elk_sf_compile * c,bool allocate)696 static void elk_emit_point_setup(struct elk_sf_compile *c, bool allocate)
697 {
698 struct elk_codegen *p = &c->func;
699 GLuint i;
700
701 c->flag_value = 0xff;
702 c->nr_verts = 1;
703
704 if (allocate)
705 alloc_regs(c);
706
707 copy_z_inv_w(c);
708
709 elk_MOV(p, c->m1Cx, elk_imm_ud(0)); /* zero - move out of loop */
710 elk_MOV(p, c->m2Cy, elk_imm_ud(0)); /* zero - move out of loop */
711
712 for (i = 0; i < c->nr_setup_regs; i++)
713 {
714 struct elk_reg a0 = offset(c->vert[0], i);
715 GLushort pc, pc_persp, pc_linear;
716 bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
717
718 if (pc_persp)
719 {
720 /* This seems odd as the values are all constant, but the
721 * fragment shader will be expecting it:
722 */
723 set_predicate_control_flag_value(p, c, pc_persp);
724 elk_MUL(p, a0, a0, c->inv_w[0]);
725 }
726
727
728 /* The delta values are always zero, just send the starting
729 * coordinate. Again, this is to fit in with the interpolation
730 * code in the fragment shader.
731 */
732 {
733 set_predicate_control_flag_value(p, c, pc);
734
735 elk_MOV(p, c->m3C0, a0); /* constant value */
736
737 /* Copy m0..m3 to URB.
738 */
739 elk_urb_WRITE(p,
740 elk_null_reg(),
741 0,
742 elk_vec8_grf(0, 0),
743 last ? ELK_URB_WRITE_EOT_COMPLETE
744 : ELK_URB_WRITE_NO_FLAGS,
745 4, /* msg len */
746 0, /* response len */
747 i*4, /* urb destination offset */
748 ELK_URB_SWIZZLE_TRANSPOSE);
749 }
750 }
751
752 elk_set_default_predicate_control(p, ELK_PREDICATE_NONE);
753 }
754
elk_emit_anyprim_setup(struct elk_sf_compile * c)755 static void elk_emit_anyprim_setup( struct elk_sf_compile *c )
756 {
757 struct elk_codegen *p = &c->func;
758 struct elk_reg payload_prim = elk_uw1_reg(ELK_GENERAL_REGISTER_FILE, 1, 0);
759 struct elk_reg payload_attr = get_element_ud(elk_vec1_reg(ELK_GENERAL_REGISTER_FILE, 1, 0), 0);
760 struct elk_reg primmask;
761 int jmp;
762 struct elk_reg v1_null_ud = vec1(retype(elk_null_reg(), ELK_REGISTER_TYPE_UD));
763
764 c->nr_verts = 3;
765 alloc_regs(c);
766
767 primmask = retype(get_element(c->tmp, 0), ELK_REGISTER_TYPE_UD);
768
769 elk_MOV(p, primmask, elk_imm_ud(1));
770 elk_SHL(p, primmask, primmask, payload_prim);
771
772 elk_AND(p, v1_null_ud, primmask, elk_imm_ud((1<<_3DPRIM_TRILIST) |
773 (1<<_3DPRIM_TRISTRIP) |
774 (1<<_3DPRIM_TRIFAN) |
775 (1<<_3DPRIM_TRISTRIP_REVERSE) |
776 (1<<_3DPRIM_POLYGON) |
777 (1<<_3DPRIM_RECTLIST) |
778 (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
779 elk_inst_set_cond_modifier(p->devinfo, elk_last_inst, ELK_CONDITIONAL_Z);
780 jmp = elk_JMPI(p, elk_imm_d(0), ELK_PREDICATE_NORMAL) - p->store;
781 elk_emit_tri_setup(c, false);
782 elk_land_fwd_jump(p, jmp);
783
784 elk_AND(p, v1_null_ud, primmask, elk_imm_ud((1<<_3DPRIM_LINELIST) |
785 (1<<_3DPRIM_LINESTRIP) |
786 (1<<_3DPRIM_LINELOOP) |
787 (1<<_3DPRIM_LINESTRIP_CONT) |
788 (1<<_3DPRIM_LINESTRIP_BF) |
789 (1<<_3DPRIM_LINESTRIP_CONT_BF)));
790 elk_inst_set_cond_modifier(p->devinfo, elk_last_inst, ELK_CONDITIONAL_Z);
791 jmp = elk_JMPI(p, elk_imm_d(0), ELK_PREDICATE_NORMAL) - p->store;
792 elk_emit_line_setup(c, false);
793 elk_land_fwd_jump(p, jmp);
794
795 elk_AND(p, v1_null_ud, payload_attr, elk_imm_ud(1<<ELK_SPRITE_POINT_ENABLE));
796 elk_inst_set_cond_modifier(p->devinfo, elk_last_inst, ELK_CONDITIONAL_Z);
797 jmp = elk_JMPI(p, elk_imm_d(0), ELK_PREDICATE_NORMAL) - p->store;
798 elk_emit_point_sprite_setup(c, false);
799 elk_land_fwd_jump(p, jmp);
800
801 elk_emit_point_setup( c, false );
802 }
803
804 const unsigned *
elk_compile_sf(const struct elk_compiler * compiler,void * mem_ctx,const struct elk_sf_prog_key * key,struct elk_sf_prog_data * prog_data,struct intel_vue_map * vue_map,unsigned * final_assembly_size)805 elk_compile_sf(const struct elk_compiler *compiler,
806 void *mem_ctx,
807 const struct elk_sf_prog_key *key,
808 struct elk_sf_prog_data *prog_data,
809 struct intel_vue_map *vue_map,
810 unsigned *final_assembly_size)
811 {
812 struct elk_sf_compile c;
813 memset(&c, 0, sizeof(c));
814
815 /* Begin the compilation:
816 */
817 elk_init_codegen(&compiler->isa, &c.func, mem_ctx);
818
819 c.key = *key;
820 c.vue_map = *vue_map;
821 if (c.key.do_point_coord) {
822 /*
823 * gl_PointCoord is a FS instead of VS builtin variable, thus it's
824 * not included in c.vue_map generated in VS stage. Here we add
825 * it manually to let SF shader generate the needed interpolation
826 * coefficient for FS shader.
827 */
828 c.vue_map.varying_to_slot[ELK_VARYING_SLOT_PNTC] = c.vue_map.num_slots;
829 c.vue_map.slot_to_varying[c.vue_map.num_slots++] = ELK_VARYING_SLOT_PNTC;
830 }
831 c.urb_entry_read_offset = ELK_SF_URB_ENTRY_READ_OFFSET;
832 c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset;
833 c.nr_setup_regs = c.nr_attr_regs;
834
835 c.prog_data.urb_read_length = c.nr_attr_regs;
836 c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
837
838 /* Which primitive? Or all three?
839 */
840 switch (key->primitive) {
841 case ELK_SF_PRIM_TRIANGLES:
842 c.nr_verts = 3;
843 elk_emit_tri_setup( &c, true );
844 break;
845 case ELK_SF_PRIM_LINES:
846 c.nr_verts = 2;
847 elk_emit_line_setup( &c, true );
848 break;
849 case ELK_SF_PRIM_POINTS:
850 c.nr_verts = 1;
851 if (key->do_point_sprite)
852 elk_emit_point_sprite_setup( &c, true );
853 else
854 elk_emit_point_setup( &c, true );
855 break;
856 case ELK_SF_PRIM_UNFILLED_TRIS:
857 c.nr_verts = 3;
858 elk_emit_anyprim_setup( &c );
859 break;
860 default:
861 unreachable("not reached");
862 }
863
864 /* FINISHME: SF programs use calculated jumps (i.e., JMPI with a register
865 * source). Compacting would be difficult.
866 */
867 /* elk_compact_instructions(&c.func, 0, 0, NULL); */
868
869 *prog_data = c.prog_data;
870
871 const unsigned *program = elk_get_program(&c.func, final_assembly_size);
872
873 if (INTEL_DEBUG(DEBUG_SF)) {
874 fprintf(stderr, "sf:\n");
875 elk_disassemble_with_labels(&compiler->isa,
876 program, 0, *final_assembly_size, stderr);
877 fprintf(stderr, "\n");
878 }
879
880 return program;
881 }
882