1 /*
2 * Copyright © 2019 Google, Inc
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file lower_precision.cpp
26 */
27
28 #include "main/macros.h"
29 #include "main/consts_exts.h"
30 #include "compiler/glsl_types.h"
31 #include "ir.h"
32 #include "ir_builder.h"
33 #include "ir_optimization.h"
34 #include "ir_rvalue_visitor.h"
35 #include "util/half_float.h"
36 #include "util/set.h"
37 #include "util/hash_table.h"
38 #include <vector>
39
40 namespace {
41
42 class find_precision_visitor : public ir_rvalue_enter_visitor {
43 public:
44 find_precision_visitor(const struct gl_shader_compiler_options *options);
45 find_precision_visitor(const find_precision_visitor &) = delete;
46 ~find_precision_visitor();
47 find_precision_visitor & operator=(const find_precision_visitor &) = delete;
48
49 virtual void handle_rvalue(ir_rvalue **rvalue);
50 virtual ir_visitor_status visit_enter(ir_call *ir);
51
52 ir_function_signature *map_builtin(ir_function_signature *sig);
53
54 /* Set of rvalues that can be lowered. This will be filled in by
55 * find_lowerable_rvalues_visitor. Only the root node of a lowerable section
56 * will be added to this set.
57 */
58 struct set *lowerable_rvalues;
59
60 /**
61 * A mapping of builtin signature functions to lowered versions. This is
62 * filled in lazily when a lowered version is needed.
63 */
64 struct hash_table *lowered_builtins;
65 /**
66 * A temporary hash table only used in order to clone functions.
67 */
68 struct hash_table *clone_ht;
69
70 void *lowered_builtin_mem_ctx;
71
72 const struct gl_shader_compiler_options *options;
73 };
74
75 class find_lowerable_rvalues_visitor : public ir_hierarchical_visitor {
76 public:
77 enum can_lower_state {
78 UNKNOWN,
79 CANT_LOWER,
80 SHOULD_LOWER,
81 };
82
83 enum parent_relation {
84 /* The parent performs a further operation involving the result from the
85 * child and can be lowered along with it.
86 */
87 COMBINED_OPERATION,
88 /* The parent instruction’s operation is independent of the child type so
89 * the child should be lowered separately.
90 */
91 INDEPENDENT_OPERATION,
92 };
93
94 struct stack_entry {
95 ir_instruction *instr;
96 enum can_lower_state state;
97 /* List of child rvalues that can be lowered. When this stack entry is
98 * popped, if this node itself can’t be lowered than all of the children
99 * are root nodes to lower so we will add them to lowerable_rvalues.
100 * Otherwise if this node can also be lowered then we won’t add the
101 * children because we only want to add the topmost lowerable nodes to
102 * lowerable_rvalues and the children will be lowered as part of lowering
103 * this node.
104 */
105 std::vector<ir_instruction *> lowerable_children;
106 };
107
108 find_lowerable_rvalues_visitor(struct set *result,
109 const struct gl_shader_compiler_options *options);
110
111 static void stack_enter(class ir_instruction *ir, void *data);
112 static void stack_leave(class ir_instruction *ir, void *data);
113
114 virtual ir_visitor_status visit(ir_constant *ir);
115 virtual ir_visitor_status visit(ir_dereference_variable *ir);
116
117 virtual ir_visitor_status visit_enter(ir_dereference_record *ir);
118 virtual ir_visitor_status visit_enter(ir_dereference_array *ir);
119 virtual ir_visitor_status visit_enter(ir_texture *ir);
120 virtual ir_visitor_status visit_enter(ir_expression *ir);
121
122 virtual ir_visitor_status visit_leave(ir_assignment *ir);
123 virtual ir_visitor_status visit_leave(ir_call *ir);
124
125 can_lower_state handle_precision(const glsl_type *type,
126 int precision) const;
127
128 static parent_relation get_parent_relation(ir_instruction *parent,
129 ir_instruction *child);
130
131 std::vector<stack_entry> stack;
132 struct set *lowerable_rvalues;
133 const struct gl_shader_compiler_options *options;
134
135 void pop_stack_entry();
136 void add_lowerable_children(const stack_entry &entry);
137 };
138
139 class lower_precision_visitor : public ir_rvalue_visitor {
140 public:
141 virtual void handle_rvalue(ir_rvalue **rvalue);
142 virtual ir_visitor_status visit_enter(ir_dereference_array *);
143 virtual ir_visitor_status visit_enter(ir_dereference_record *);
144 virtual ir_visitor_status visit_enter(ir_call *ir);
145 virtual ir_visitor_status visit_enter(ir_texture *ir);
146 virtual ir_visitor_status visit_leave(ir_expression *);
147 };
148
149 static bool
can_lower_type(const struct gl_shader_compiler_options * options,const glsl_type * type)150 can_lower_type(const struct gl_shader_compiler_options *options,
151 const glsl_type *type)
152 {
153 /* Don’t lower any expressions involving non-float types except bool and
154 * texture samplers. This will rule out operations that change the type such
155 * as conversion to ints. Instead it will end up lowering the arguments
156 * instead and adding a final conversion to float32. We want to handle
157 * boolean types so that it will do comparisons as 16-bit.
158 */
159
160 switch (glsl_without_array(type)->base_type) {
161 /* TODO: should we do anything for these two with regard to Int16 vs FP16
162 * support?
163 */
164 case GLSL_TYPE_BOOL:
165 case GLSL_TYPE_SAMPLER:
166 case GLSL_TYPE_IMAGE:
167 return true;
168
169 case GLSL_TYPE_FLOAT:
170 return options->LowerPrecisionFloat16;
171
172 case GLSL_TYPE_UINT:
173 case GLSL_TYPE_INT:
174 return options->LowerPrecisionInt16;
175
176 default:
177 return false;
178 }
179 }
180
find_lowerable_rvalues_visitor(struct set * res,const struct gl_shader_compiler_options * opts)181 find_lowerable_rvalues_visitor::find_lowerable_rvalues_visitor(struct set *res,
182 const struct gl_shader_compiler_options *opts)
183 {
184 lowerable_rvalues = res;
185 options = opts;
186 callback_enter = stack_enter;
187 callback_leave = stack_leave;
188 data_enter = this;
189 data_leave = this;
190 }
191
192 void
stack_enter(class ir_instruction * ir,void * data)193 find_lowerable_rvalues_visitor::stack_enter(class ir_instruction *ir,
194 void *data)
195 {
196 find_lowerable_rvalues_visitor *state =
197 (find_lowerable_rvalues_visitor *) data;
198
199 /* Add a new stack entry for this instruction */
200 stack_entry entry;
201
202 entry.instr = ir;
203 entry.state = state->in_assignee ? CANT_LOWER : UNKNOWN;
204
205 state->stack.push_back(entry);
206 }
207
208 void
add_lowerable_children(const stack_entry & entry)209 find_lowerable_rvalues_visitor::add_lowerable_children(const stack_entry &entry)
210 {
211 /* We can’t lower this node so if there were any pending children then they
212 * are all root lowerable nodes and we should add them to the set.
213 */
214 for (auto &it : entry.lowerable_children)
215 _mesa_set_add(lowerable_rvalues, it);
216 }
217
218 void
pop_stack_entry()219 find_lowerable_rvalues_visitor::pop_stack_entry()
220 {
221 const stack_entry &entry = stack.back();
222
223 if (stack.size() >= 2) {
224 /* Combine this state into the parent state, unless the parent operation
225 * doesn’t have any relation to the child operations
226 */
227 stack_entry &parent = stack.end()[-2];
228 parent_relation rel = get_parent_relation(parent.instr, entry.instr);
229
230 if (rel == COMBINED_OPERATION) {
231 switch (entry.state) {
232 case CANT_LOWER:
233 parent.state = CANT_LOWER;
234 break;
235 case SHOULD_LOWER:
236 if (parent.state == UNKNOWN)
237 parent.state = SHOULD_LOWER;
238 break;
239 case UNKNOWN:
240 break;
241 }
242 }
243 }
244
245 if (entry.state == SHOULD_LOWER) {
246 ir_rvalue *rv = entry.instr->as_rvalue();
247
248 if (rv == NULL) {
249 add_lowerable_children(entry);
250 } else if (stack.size() >= 2) {
251 stack_entry &parent = stack.end()[-2];
252
253 switch (get_parent_relation(parent.instr, rv)) {
254 case COMBINED_OPERATION:
255 /* We only want to add the toplevel lowerable instructions to the
256 * lowerable set. Therefore if there is a parent then instead of
257 * adding this instruction to the set we will queue depending on
258 * the result of the parent instruction.
259 */
260 parent.lowerable_children.push_back(entry.instr);
261 break;
262 case INDEPENDENT_OPERATION:
263 _mesa_set_add(lowerable_rvalues, rv);
264 break;
265 }
266 } else {
267 /* This is a toplevel node so add it directly to the lowerable
268 * set.
269 */
270 _mesa_set_add(lowerable_rvalues, rv);
271 }
272 } else if (entry.state == CANT_LOWER) {
273 add_lowerable_children(entry);
274 }
275
276 stack.pop_back();
277 }
278
279 void
stack_leave(class ir_instruction * ir,void * data)280 find_lowerable_rvalues_visitor::stack_leave(class ir_instruction *ir,
281 void *data)
282 {
283 find_lowerable_rvalues_visitor *state =
284 (find_lowerable_rvalues_visitor *) data;
285
286 state->pop_stack_entry();
287 }
288
289 enum find_lowerable_rvalues_visitor::can_lower_state
handle_precision(const glsl_type * type,int precision) const290 find_lowerable_rvalues_visitor::handle_precision(const glsl_type *type,
291 int precision) const
292 {
293 if (!can_lower_type(options, type))
294 return CANT_LOWER;
295
296 switch (precision) {
297 case GLSL_PRECISION_NONE:
298 return UNKNOWN;
299 case GLSL_PRECISION_HIGH:
300 return CANT_LOWER;
301 case GLSL_PRECISION_MEDIUM:
302 case GLSL_PRECISION_LOW:
303 return SHOULD_LOWER;
304 }
305
306 return CANT_LOWER;
307 }
308
309 enum find_lowerable_rvalues_visitor::parent_relation
get_parent_relation(ir_instruction * parent,ir_instruction * child)310 find_lowerable_rvalues_visitor::get_parent_relation(ir_instruction *parent,
311 ir_instruction *child)
312 {
313 /* If the parent is a dereference instruction then the only child could be
314 * for example an array dereference and that should be lowered independently
315 * of the parent.
316 */
317 if (parent->as_dereference())
318 return INDEPENDENT_OPERATION;
319
320 /* The precision of texture sampling depend on the precision of the sampler.
321 * The rest of the arguments don’t matter so we can treat it as an
322 * independent operation.
323 */
324 if (parent->as_texture())
325 return INDEPENDENT_OPERATION;
326
327 return COMBINED_OPERATION;
328 }
329
330 ir_visitor_status
visit(ir_constant * ir)331 find_lowerable_rvalues_visitor::visit(ir_constant *ir)
332 {
333 stack_enter(ir, this);
334
335 if (!can_lower_type(options, ir->type))
336 stack.back().state = CANT_LOWER;
337
338 stack_leave(ir, this);
339
340 return visit_continue;
341 }
342
343 ir_visitor_status
visit(ir_dereference_variable * ir)344 find_lowerable_rvalues_visitor::visit(ir_dereference_variable *ir)
345 {
346 stack_enter(ir, this);
347
348 if (stack.back().state == UNKNOWN)
349 stack.back().state = handle_precision(ir->type, ir->precision());
350
351 stack_leave(ir, this);
352
353 return visit_continue;
354 }
355
356 ir_visitor_status
visit_enter(ir_dereference_record * ir)357 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_record *ir)
358 {
359 ir_hierarchical_visitor::visit_enter(ir);
360
361 if (stack.back().state == UNKNOWN)
362 stack.back().state = handle_precision(ir->type, ir->precision());
363
364 return visit_continue;
365 }
366
367 ir_visitor_status
visit_enter(ir_dereference_array * ir)368 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_array *ir)
369 {
370 ir_hierarchical_visitor::visit_enter(ir);
371
372 if (stack.back().state == UNKNOWN)
373 stack.back().state = handle_precision(ir->type, ir->precision());
374
375 return visit_continue;
376 }
377
378 ir_visitor_status
visit_enter(ir_texture * ir)379 find_lowerable_rvalues_visitor::visit_enter(ir_texture *ir)
380 {
381 ir_hierarchical_visitor::visit_enter(ir);
382
383 /* The precision of the sample value depends on the precision of the
384 * sampler.
385 */
386 stack.back().state = handle_precision(ir->type,
387 ir->sampler->precision());
388 return visit_continue;
389 }
390
391 ir_visitor_status
visit_enter(ir_expression * ir)392 find_lowerable_rvalues_visitor::visit_enter(ir_expression *ir)
393 {
394 ir_hierarchical_visitor::visit_enter(ir);
395
396 if (!can_lower_type(options, ir->type))
397 stack.back().state = CANT_LOWER;
398
399 /* Don't lower precision for derivative calculations */
400 if (!options->LowerPrecisionDerivatives &&
401 (ir->operation == ir_unop_dFdx ||
402 ir->operation == ir_unop_dFdx_coarse ||
403 ir->operation == ir_unop_dFdx_fine ||
404 ir->operation == ir_unop_dFdy ||
405 ir->operation == ir_unop_dFdy_coarse ||
406 ir->operation == ir_unop_dFdy_fine)) {
407 stack.back().state = CANT_LOWER;
408 }
409
410 return visit_continue;
411 }
412
413 static unsigned
handle_call(ir_call * ir,const struct set * lowerable_rvalues)414 handle_call(ir_call *ir, const struct set *lowerable_rvalues)
415 {
416 /* The intrinsic call is inside the wrapper imageLoad function that will
417 * be inlined. We have to handle both of them.
418 */
419 if (ir->callee->intrinsic_id == ir_intrinsic_image_load ||
420 (ir->callee->is_builtin() &&
421 !strcmp(ir->callee_name(), "imageLoad"))) {
422 ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head();
423 ir_variable *resource = param->variable_referenced();
424
425 assert(ir->callee->return_precision == GLSL_PRECISION_HIGH);
426 assert(glsl_type_is_image(glsl_without_array(resource->type)));
427
428 /* GLSL ES 3.20 requires that images have a precision modifier, but if
429 * you set one, it doesn't do anything, because all intrinsics are
430 * defined with highp. This seems to be a spec bug.
431 *
432 * In theory we could set the return value to mediump if the image
433 * format has a lower precision. This appears to be the most sensible
434 * thing to do.
435 */
436 const struct util_format_description *desc =
437 util_format_description(resource->data.image_format);
438 int i =
439 util_format_get_first_non_void_channel(resource->data.image_format);
440 bool mediump;
441
442 assert(i >= 0);
443
444 if (desc->channel[i].pure_integer ||
445 desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
446 mediump = desc->channel[i].size <= 16;
447 else
448 mediump = desc->channel[i].size <= 10; /* unorm/snorm */
449
450 return mediump ? GLSL_PRECISION_MEDIUM : GLSL_PRECISION_HIGH;
451 }
452
453 /* Return the declared precision for user-defined functions. */
454 if (!ir->callee->is_builtin() || ir->callee->return_precision != GLSL_PRECISION_NONE)
455 return ir->callee->return_precision;
456
457 /* Handle special calls. */
458 if (ir->callee->is_builtin() && ir->actual_parameters.length()) {
459 ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head();
460 ir_variable *var = param->variable_referenced();
461
462 /* Handle builtin wrappers around ir_texture opcodes. These wrappers will
463 * be inlined by lower_precision() if we return true here, so that we can
464 * get to ir_texture later and do proper lowering.
465 *
466 * We should lower the type of the return value if the sampler type
467 * uses lower precision. The function parameters don't matter.
468 */
469 if (var && glsl_type_is_sampler(glsl_without_array(var->type))) {
470 /* textureGatherOffsets always takes a highp array of constants. As
471 * per the discussion https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16547#note_1393704
472 * trying to lower the precision results in segfault later on
473 * in the compiler as textureGatherOffsets will end up being passed
474 * a temp when its expecting a constant as required by the spec.
475 */
476 if (!strcmp(ir->callee_name(), "textureGatherOffsets"))
477 return GLSL_PRECISION_HIGH;
478
479 return var->data.precision;
480 }
481 }
482
483 if (ir->callee->return_precision != GLSL_PRECISION_NONE)
484 return ir->callee->return_precision;
485
486 if (/* Parameters are always implicitly promoted to highp: */
487 !strcmp(ir->callee_name(), "floatBitsToInt") ||
488 !strcmp(ir->callee_name(), "floatBitsToUint") ||
489 !strcmp(ir->callee_name(), "intBitsToFloat") ||
490 !strcmp(ir->callee_name(), "uintBitsToFloat"))
491 return GLSL_PRECISION_HIGH;
492
493 /* Number of parameters to check if they are lowerable. */
494 unsigned check_parameters = ir->actual_parameters.length();
495
496 /* "For the interpolateAt* functions, the call will return a precision
497 * qualification matching the precision of the interpolant argument to the
498 * function call."
499 *
500 * and
501 *
502 * "The precision qualification of the value returned from bitfieldExtract()
503 * matches the precision qualification of the call's input argument
504 * “value”."
505 */
506 if (!strcmp(ir->callee_name(), "interpolateAtOffset") ||
507 !strcmp(ir->callee_name(), "interpolateAtSample") ||
508 !strcmp(ir->callee_name(), "bitfieldExtract")) {
509 check_parameters = 1;
510 } else if (!strcmp(ir->callee_name(), "bitfieldInsert")) {
511 /* "The precision qualification of the value returned from bitfieldInsert
512 * matches the highest precision qualification of the call's input
513 * arguments “base” and “insert”."
514 */
515 check_parameters = 2;
516 }
517
518 /* If the call is to a builtin, then the function won’t have a return
519 * precision and we should determine it from the precision of the arguments.
520 */
521 foreach_in_list(ir_rvalue, param, &ir->actual_parameters) {
522 if (!check_parameters)
523 break;
524
525 if (!param->as_constant() &&
526 _mesa_set_search(lowerable_rvalues, param) == NULL)
527 return GLSL_PRECISION_HIGH;
528
529 --check_parameters;
530 }
531
532 return GLSL_PRECISION_MEDIUM;
533 }
534
535 ir_visitor_status
visit_leave(ir_call * ir)536 find_lowerable_rvalues_visitor::visit_leave(ir_call *ir)
537 {
538 ir_hierarchical_visitor::visit_leave(ir);
539
540 /* Special case for handling temporary variables generated by the compiler
541 * for function calls. If we assign to one of these using a function call
542 * that has a lowerable return type then we can assume the temporary
543 * variable should have a medium precision too.
544 */
545
546 /* Do nothing if the return type is void. */
547 if (!ir->return_deref)
548 return visit_continue;
549
550 ir_variable *var = ir->return_deref->variable_referenced();
551
552 assert(var->data.mode == ir_var_temporary);
553
554 unsigned return_precision = handle_call(ir, lowerable_rvalues);
555
556 can_lower_state lower_state =
557 handle_precision(var->type, return_precision);
558
559 if (lower_state == SHOULD_LOWER) {
560 /* Function calls always write to a temporary return value in the caller,
561 * which has no other users. That temp may start with the precision of
562 * the function's signature, but if we're inferring the precision of an
563 * unqualified builtin operation (particularly the imageLoad overrides!)
564 * then we need to update it.
565 */
566 var->data.precision = GLSL_PRECISION_MEDIUM;
567 } else {
568 var->data.precision = GLSL_PRECISION_HIGH;
569 }
570
571 return visit_continue;
572 }
573
574 ir_visitor_status
visit_leave(ir_assignment * ir)575 find_lowerable_rvalues_visitor::visit_leave(ir_assignment *ir)
576 {
577 ir_hierarchical_visitor::visit_leave(ir);
578
579 /* Special case for handling temporary variables generated by the compiler.
580 * If we assign to one of these using a lowered precision then we can assume
581 * the temporary variable should have a medium precision too.
582 */
583 ir_variable *var = ir->lhs->variable_referenced();
584
585 if (var->data.mode == ir_var_temporary) {
586 if (_mesa_set_search(lowerable_rvalues, ir->rhs)) {
587 /* Only override the precision if this is the first assignment. For
588 * temporaries such as the ones generated for the ?: operator there
589 * can be multiple assignments with different precisions. This way we
590 * get the highest precision of all of the assignments.
591 */
592 if (var->data.precision == GLSL_PRECISION_NONE)
593 var->data.precision = GLSL_PRECISION_MEDIUM;
594 } else if (!ir->rhs->as_constant()) {
595 var->data.precision = GLSL_PRECISION_HIGH;
596 }
597 }
598
599 return visit_continue;
600 }
601
602 void
find_lowerable_rvalues(const struct gl_shader_compiler_options * options,exec_list * instructions,struct set * result)603 find_lowerable_rvalues(const struct gl_shader_compiler_options *options,
604 exec_list *instructions,
605 struct set *result)
606 {
607 find_lowerable_rvalues_visitor v(result, options);
608
609 visit_list_elements(&v, instructions);
610
611 assert(v.stack.empty());
612 }
613
614 static const glsl_type *
convert_type(bool up,const glsl_type * type)615 convert_type(bool up, const glsl_type *type)
616 {
617 if (glsl_type_is_array(type)) {
618 return glsl_array_type(convert_type(up, type->fields.array),
619 glsl_array_size(type),
620 type->explicit_stride);
621 }
622
623 glsl_base_type new_base_type;
624
625 if (up) {
626 switch (type->base_type) {
627 case GLSL_TYPE_FLOAT16:
628 new_base_type = GLSL_TYPE_FLOAT;
629 break;
630 case GLSL_TYPE_INT16:
631 new_base_type = GLSL_TYPE_INT;
632 break;
633 case GLSL_TYPE_UINT16:
634 new_base_type = GLSL_TYPE_UINT;
635 break;
636 default:
637 unreachable("invalid type");
638 return NULL;
639 }
640 } else {
641 switch (type->base_type) {
642 case GLSL_TYPE_FLOAT:
643 new_base_type = GLSL_TYPE_FLOAT16;
644 break;
645 case GLSL_TYPE_INT:
646 new_base_type = GLSL_TYPE_INT16;
647 break;
648 case GLSL_TYPE_UINT:
649 new_base_type = GLSL_TYPE_UINT16;
650 break;
651 default:
652 unreachable("invalid type");
653 return NULL;
654 }
655 }
656
657 return glsl_simple_explicit_type(new_base_type,
658 type->vector_elements,
659 type->matrix_columns,
660 type->explicit_stride,
661 type->interface_row_major,
662 0 /* explicit_alignment */);
663 }
664
665 static const glsl_type *
lower_glsl_type(const glsl_type * type)666 lower_glsl_type(const glsl_type *type)
667 {
668 return convert_type(false, type);
669 }
670
671 static ir_rvalue *
convert_precision(bool up,ir_rvalue * ir)672 convert_precision(bool up, ir_rvalue *ir)
673 {
674 unsigned op;
675
676 if (up) {
677 switch (ir->type->base_type) {
678 case GLSL_TYPE_FLOAT16:
679 op = ir_unop_f162f;
680 break;
681 case GLSL_TYPE_INT16:
682 op = ir_unop_i2i;
683 break;
684 case GLSL_TYPE_UINT16:
685 op = ir_unop_u2u;
686 break;
687 default:
688 unreachable("invalid type");
689 return NULL;
690 }
691 } else {
692 switch (ir->type->base_type) {
693 case GLSL_TYPE_FLOAT:
694 op = ir_unop_f2fmp;
695 break;
696 case GLSL_TYPE_INT:
697 op = ir_unop_i2imp;
698 break;
699 case GLSL_TYPE_UINT:
700 op = ir_unop_u2ump;
701 break;
702 default:
703 unreachable("invalid type");
704 return NULL;
705 }
706 }
707
708 const glsl_type *desired_type = convert_type(up, ir->type);
709 void *mem_ctx = ralloc_parent(ir);
710 return new(mem_ctx) ir_expression(op, desired_type, ir, NULL);
711 }
712
713 void
handle_rvalue(ir_rvalue ** rvalue)714 lower_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
715 {
716 ir_rvalue *ir = *rvalue;
717
718 if (ir == NULL)
719 return;
720
721 if (ir->as_dereference()) {
722 if (!glsl_type_is_boolean(ir->type))
723 *rvalue = convert_precision(false, ir);
724 } else if (glsl_type_is_32bit(ir->type)) {
725 ir->type = lower_glsl_type(ir->type);
726
727 ir_constant *const_ir = ir->as_constant();
728
729 if (const_ir) {
730 ir_constant_data value;
731
732 if (ir->type->base_type == GLSL_TYPE_FLOAT16) {
733 for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++)
734 value.f16[i] = _mesa_float_to_half(const_ir->value.f[i]);
735 } else if (ir->type->base_type == GLSL_TYPE_INT16) {
736 for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++)
737 value.i16[i] = const_ir->value.i[i];
738 } else if (ir->type->base_type == GLSL_TYPE_UINT16) {
739 for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++)
740 value.u16[i] = const_ir->value.u[i];
741 } else {
742 unreachable("invalid type");
743 }
744
745 const_ir->value = value;
746 }
747 }
748 }
749
750 ir_visitor_status
visit_enter(ir_dereference_record * ir)751 lower_precision_visitor::visit_enter(ir_dereference_record *ir)
752 {
753 /* We don’t want to lower the variable */
754 return visit_continue_with_parent;
755 }
756
757 ir_visitor_status
visit_enter(ir_dereference_array * ir)758 lower_precision_visitor::visit_enter(ir_dereference_array *ir)
759 {
760 /* We don’t want to convert the array index or the variable. If the array
761 * index itself is lowerable that will be handled separately.
762 */
763 return visit_continue_with_parent;
764 }
765
766 ir_visitor_status
visit_enter(ir_call * ir)767 lower_precision_visitor::visit_enter(ir_call *ir)
768 {
769 /* We don’t want to convert the arguments. These will be handled separately.
770 */
771 return visit_continue_with_parent;
772 }
773
774 ir_visitor_status
visit_enter(ir_texture * ir)775 lower_precision_visitor::visit_enter(ir_texture *ir)
776 {
777 /* We don’t want to convert the arguments. These will be handled separately.
778 */
779 return visit_continue_with_parent;
780 }
781
782 ir_visitor_status
visit_leave(ir_expression * ir)783 lower_precision_visitor::visit_leave(ir_expression *ir)
784 {
785 ir_rvalue_visitor::visit_leave(ir);
786
787 /* If the expression is a conversion operation to or from bool then fix the
788 * operation.
789 */
790 switch (ir->operation) {
791 case ir_unop_b2f:
792 ir->operation = ir_unop_b2f16;
793 break;
794 case ir_unop_f2b:
795 ir->operation = ir_unop_f162b;
796 break;
797 case ir_unop_b2i:
798 case ir_unop_i2b:
799 /* Nothing to do - they both support int16. */
800 break;
801 default:
802 break;
803 }
804
805 return visit_continue;
806 }
807
808 void
handle_rvalue(ir_rvalue ** rvalue)809 find_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
810 {
811 /* Checking the precision of rvalue can be lowered first throughout
812 * find_lowerable_rvalues_visitor.
813 * Once it found the precision of rvalue can be lowered, then we can
814 * add conversion f2fmp, etc. through lower_precision_visitor.
815 */
816 if (*rvalue == NULL)
817 return;
818
819 struct set_entry *entry = _mesa_set_search(lowerable_rvalues, *rvalue);
820
821 if (!entry)
822 return;
823
824 _mesa_set_remove(lowerable_rvalues, entry);
825
826 /* If the entire expression is just a variable dereference then trying to
827 * lower it will just directly add pointless to and from conversions without
828 * any actual operation in-between. Although these will eventually get
829 * optimised out, avoiding generating them here also avoids breaking inout
830 * parameters to functions.
831 */
832 if ((*rvalue)->as_dereference())
833 return;
834
835 lower_precision_visitor v;
836
837 (*rvalue)->accept(&v);
838 v.handle_rvalue(rvalue);
839
840 /* We don’t need to add the final conversion if the final type has been
841 * converted to bool
842 */
843 if ((*rvalue)->type->base_type != GLSL_TYPE_BOOL) {
844 *rvalue = convert_precision(true, *rvalue);
845 }
846 }
847
848 ir_visitor_status
visit_enter(ir_call * ir)849 find_precision_visitor::visit_enter(ir_call *ir)
850 {
851 ir_rvalue_enter_visitor::visit_enter(ir);
852
853 ir_variable *return_var =
854 ir->return_deref ? ir->return_deref->variable_referenced() : NULL;
855
856 /* Don't do anything for image_load here. We have only changed the return
857 * value to mediump/lowp, so that following instructions can use reduced
858 * precision.
859 *
860 * The return value type of the intrinsic itself isn't changed here, but
861 * can be changed in NIR if all users use the *2*mp opcode.
862 */
863 if (ir->callee->intrinsic_id == ir_intrinsic_image_load)
864 return visit_continue;
865
866 /* If this is a call to a builtin and the find_lowerable_rvalues_visitor
867 * overrode the precision of the temporary return variable, then we can
868 * replace the builtin implementation with a lowered version.
869 */
870
871 if (!ir->callee->is_builtin() ||
872 ir->callee->is_intrinsic() ||
873 return_var == NULL ||
874 (return_var->data.precision != GLSL_PRECISION_MEDIUM &&
875 return_var->data.precision != GLSL_PRECISION_LOW))
876 return visit_continue;
877
878 ir->callee = map_builtin(ir->callee);
879 ir->generate_inline(ir);
880 ir->remove();
881
882 return visit_continue_with_parent;
883 }
884
885 ir_function_signature *
map_builtin(ir_function_signature * sig)886 find_precision_visitor::map_builtin(ir_function_signature *sig)
887 {
888 if (lowered_builtins == NULL) {
889 lowered_builtins = _mesa_pointer_hash_table_create(NULL);
890 clone_ht =_mesa_pointer_hash_table_create(NULL);
891 lowered_builtin_mem_ctx = ralloc_context(NULL);
892 } else {
893 struct hash_entry *entry = _mesa_hash_table_search(lowered_builtins, sig);
894 if (entry)
895 return (ir_function_signature *) entry->data;
896 }
897
898 ir_function_signature *lowered_sig =
899 sig->clone(lowered_builtin_mem_ctx, clone_ht);
900
901 /* If we're lowering the output precision of the function, then also lower
902 * the precision of its inputs unless they have a specific qualifier. The
903 * exception is bitCount, which doesn't declare its arguments highp but
904 * should not be lowering the args to mediump just because the output is
905 * lowp.
906 */
907 if (strcmp(sig->function_name(), "bitCount") != 0) {
908 foreach_in_list(ir_variable, param, &lowered_sig->parameters) {
909 /* Demote the precision of unqualified function arguments. */
910 if (param->data.precision == GLSL_PRECISION_NONE)
911 param->data.precision = GLSL_PRECISION_MEDIUM;
912 }
913 }
914
915 lower_precision(options, &lowered_sig->body);
916
917 _mesa_hash_table_clear(clone_ht, NULL);
918
919 _mesa_hash_table_insert(lowered_builtins, sig, lowered_sig);
920
921 return lowered_sig;
922 }
923
find_precision_visitor(const struct gl_shader_compiler_options * options)924 find_precision_visitor::find_precision_visitor(const struct gl_shader_compiler_options *options)
925 : lowerable_rvalues(_mesa_pointer_set_create(NULL)),
926 lowered_builtins(NULL),
927 clone_ht(NULL),
928 lowered_builtin_mem_ctx(NULL),
929 options(options)
930 {
931 }
932
~find_precision_visitor()933 find_precision_visitor::~find_precision_visitor()
934 {
935 _mesa_set_destroy(lowerable_rvalues, NULL);
936
937 if (lowered_builtins) {
938 _mesa_hash_table_destroy(lowered_builtins, NULL);
939 _mesa_hash_table_destroy(clone_ht, NULL);
940 ralloc_free(lowered_builtin_mem_ctx);
941 }
942 }
943
944 /* Lowering opcodes to 16 bits is not enough for programs with control flow
945 * (and the ?: operator, which is represented by if-then-else in the IR),
946 * because temporary variables, which are used for passing values between
947 * code blocks, are not lowered, resulting in 32-bit phis in NIR.
948 *
949 * First change the variable types to 16 bits, then change all ir_dereference
950 * types to 16 bits.
951 */
952 class lower_variables_visitor : public ir_rvalue_enter_visitor {
953 public:
lower_variables_visitor(const struct gl_shader_compiler_options * options)954 lower_variables_visitor(const struct gl_shader_compiler_options *options)
955 : options(options) {
956 lower_vars = _mesa_pointer_set_create(NULL);
957 }
958
~lower_variables_visitor()959 virtual ~lower_variables_visitor()
960 {
961 _mesa_set_destroy(lower_vars, NULL);
962 }
963
964 lower_variables_visitor(const lower_variables_visitor &) = delete;
965 lower_variables_visitor & operator=(const lower_variables_visitor &) = delete;
966
967 virtual ir_visitor_status visit(ir_variable *var);
968 virtual ir_visitor_status visit_enter(ir_assignment *ir);
969 virtual ir_visitor_status visit_enter(ir_return *ir);
970 virtual ir_visitor_status visit_enter(ir_call *ir);
971 virtual void handle_rvalue(ir_rvalue **rvalue);
972
973 void fix_types_in_deref_chain(ir_dereference *ir);
974 void convert_split_assignment(ir_dereference *lhs, ir_rvalue *rhs,
975 bool insert_before);
976
977 const struct gl_shader_compiler_options *options;
978 set *lower_vars;
979 };
980
981 static void
lower_constant(ir_constant * ir)982 lower_constant(ir_constant *ir)
983 {
984 if (glsl_type_is_array(ir->type)) {
985 for (int i = 0; i < glsl_array_size(ir->type); i++)
986 lower_constant(ir->get_array_element(i));
987
988 ir->type = lower_glsl_type(ir->type);
989 return;
990 }
991
992 ir->type = lower_glsl_type(ir->type);
993 ir_constant_data value;
994
995 if (ir->type->base_type == GLSL_TYPE_FLOAT16) {
996 for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++)
997 value.f16[i] = _mesa_float_to_half(ir->value.f[i]);
998 } else if (ir->type->base_type == GLSL_TYPE_INT16) {
999 for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++)
1000 value.i16[i] = ir->value.i[i];
1001 } else if (ir->type->base_type == GLSL_TYPE_UINT16) {
1002 for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++)
1003 value.u16[i] = ir->value.u[i];
1004 } else {
1005 unreachable("invalid type");
1006 }
1007
1008 ir->value = value;
1009 }
1010
1011 ir_visitor_status
visit(ir_variable * var)1012 lower_variables_visitor::visit(ir_variable *var)
1013 {
1014 if ((var->data.mode != ir_var_temporary &&
1015 var->data.mode != ir_var_auto &&
1016 /* Lower uniforms but not UBOs. */
1017 (var->data.mode != ir_var_uniform ||
1018 var->is_in_buffer_block() ||
1019 !(options->LowerPrecisionFloat16Uniforms &&
1020 glsl_without_array(var->type)->base_type == GLSL_TYPE_FLOAT))) ||
1021 !glsl_type_is_32bit(glsl_without_array(var->type)) ||
1022 (var->data.precision != GLSL_PRECISION_MEDIUM &&
1023 var->data.precision != GLSL_PRECISION_LOW) ||
1024 !can_lower_type(options, var->type))
1025 return visit_continue;
1026
1027 /* Lower constant initializers. */
1028 if (var->constant_value &&
1029 var->type == var->constant_value->type) {
1030 if (!options->LowerPrecisionConstants)
1031 return visit_continue;
1032 var->constant_value =
1033 var->constant_value->clone(ralloc_parent(var), NULL);
1034 lower_constant(var->constant_value);
1035 }
1036
1037 if (var->constant_initializer &&
1038 var->type == var->constant_initializer->type) {
1039 if (!options->LowerPrecisionConstants)
1040 return visit_continue;
1041 var->constant_initializer =
1042 var->constant_initializer->clone(ralloc_parent(var), NULL);
1043 lower_constant(var->constant_initializer);
1044 }
1045
1046 var->type = lower_glsl_type(var->type);
1047 _mesa_set_add(lower_vars, var);
1048
1049 return visit_continue;
1050 }
1051
1052 void
fix_types_in_deref_chain(ir_dereference * ir)1053 lower_variables_visitor::fix_types_in_deref_chain(ir_dereference *ir)
1054 {
1055 assert(glsl_type_is_32bit(glsl_without_array(ir->type)));
1056 assert(_mesa_set_search(lower_vars, ir->variable_referenced()));
1057
1058 /* Fix the type in the dereference node. */
1059 ir->type = lower_glsl_type(ir->type);
1060
1061 /* If it's an array, fix the types in the whole dereference chain. */
1062 for (ir_dereference_array *deref_array = ir->as_dereference_array();
1063 deref_array;
1064 deref_array = deref_array->array->as_dereference_array()) {
1065 assert(glsl_type_is_32bit(glsl_without_array(deref_array->array->type)));
1066 deref_array->array->type = lower_glsl_type(deref_array->array->type);
1067 }
1068 }
1069
1070 void
convert_split_assignment(ir_dereference * lhs,ir_rvalue * rhs,bool insert_before)1071 lower_variables_visitor::convert_split_assignment(ir_dereference *lhs,
1072 ir_rvalue *rhs,
1073 bool insert_before)
1074 {
1075 void *mem_ctx = ralloc_parent(lhs);
1076
1077 if (glsl_type_is_array(lhs->type)) {
1078 for (unsigned i = 0; i < lhs->type->length; i++) {
1079 ir_dereference *l, *r;
1080
1081 l = new(mem_ctx) ir_dereference_array(lhs->clone(mem_ctx, NULL),
1082 new(mem_ctx) ir_constant(i));
1083 r = new(mem_ctx) ir_dereference_array(rhs->clone(mem_ctx, NULL),
1084 new(mem_ctx) ir_constant(i));
1085 convert_split_assignment(l, r, insert_before);
1086 }
1087 return;
1088 }
1089
1090 assert(glsl_type_is_16bit(lhs->type) || glsl_type_is_32bit(lhs->type));
1091 assert(glsl_type_is_16bit(rhs->type) || glsl_type_is_32bit(rhs->type));
1092 assert(glsl_type_is_16bit(lhs->type) != glsl_type_is_16bit(rhs->type));
1093
1094 ir_assignment *assign =
1095 new(mem_ctx) ir_assignment(lhs, convert_precision(glsl_type_is_32bit(lhs->type), rhs));
1096
1097 if (insert_before)
1098 base_ir->insert_before(assign);
1099 else
1100 base_ir->insert_after(assign);
1101 }
1102
1103 ir_visitor_status
visit_enter(ir_assignment * ir)1104 lower_variables_visitor::visit_enter(ir_assignment *ir)
1105 {
1106 ir_dereference *lhs = ir->lhs;
1107 ir_variable *var = lhs->variable_referenced();
1108 ir_dereference *rhs_deref = ir->rhs->as_dereference();
1109 ir_variable *rhs_var = rhs_deref ? rhs_deref->variable_referenced() : NULL;
1110 ir_constant *rhs_const = ir->rhs->as_constant();
1111
1112 /* Legalize array assignments between lowered and non-lowered variables. */
1113 if (glsl_type_is_array(lhs->type) &&
1114 (rhs_var || rhs_const) &&
1115 (!rhs_var ||
1116 (var &&
1117 glsl_type_is_16bit(glsl_without_array(var->type)) !=
1118 glsl_type_is_16bit(glsl_without_array(rhs_var->type)))) &&
1119 (!rhs_const ||
1120 (var &&
1121 glsl_type_is_16bit(glsl_without_array(var->type)) &&
1122 glsl_type_is_32bit(glsl_without_array(rhs_const->type))))) {
1123 assert(glsl_type_is_array(ir->rhs->type));
1124
1125 /* Fix array assignments from lowered to non-lowered. */
1126 if (rhs_var && _mesa_set_search(lower_vars, rhs_var)) {
1127 fix_types_in_deref_chain(rhs_deref);
1128 /* Convert to 32 bits for LHS. */
1129 convert_split_assignment(lhs, rhs_deref, true);
1130 ir->remove();
1131 return visit_continue;
1132 }
1133
1134 /* Fix array assignments from non-lowered to lowered. */
1135 if (var &&
1136 _mesa_set_search(lower_vars, var) &&
1137 glsl_type_is_32bit(glsl_without_array(ir->rhs->type))) {
1138 fix_types_in_deref_chain(lhs);
1139 /* Convert to 16 bits for LHS. */
1140 convert_split_assignment(lhs, ir->rhs, true);
1141 ir->remove();
1142 return visit_continue;
1143 }
1144 }
1145
1146 /* Fix assignment types. */
1147 if (var &&
1148 _mesa_set_search(lower_vars, var)) {
1149 /* Fix the LHS type. */
1150 if (glsl_type_is_32bit(glsl_without_array(lhs->type)))
1151 fix_types_in_deref_chain(lhs);
1152
1153 /* Fix the RHS type if it's a lowered variable. */
1154 if (rhs_var &&
1155 _mesa_set_search(lower_vars, rhs_var) &&
1156 glsl_type_is_32bit(glsl_without_array(rhs_deref->type)))
1157 fix_types_in_deref_chain(rhs_deref);
1158
1159 /* Fix the RHS type if it's a non-array expression. */
1160 if (glsl_type_is_32bit(ir->rhs->type)) {
1161 ir_expression *expr = ir->rhs->as_expression();
1162
1163 /* Convert the RHS to the LHS type. */
1164 if (expr &&
1165 (expr->operation == ir_unop_f162f ||
1166 expr->operation == ir_unop_i2i ||
1167 expr->operation == ir_unop_u2u) &&
1168 glsl_type_is_16bit(expr->operands[0]->type)) {
1169 /* If there is an "up" conversion, just remove it.
1170 * This is optional. We could as well execute the else statement and
1171 * let NIR eliminate the up+down conversions.
1172 */
1173 ir->rhs = expr->operands[0];
1174 } else {
1175 /* Add a "down" conversion operation to fix the type of RHS. */
1176 ir->rhs = convert_precision(false, ir->rhs);
1177 }
1178 }
1179 }
1180
1181 return ir_rvalue_enter_visitor::visit_enter(ir);
1182 }
1183
1184 ir_visitor_status
visit_enter(ir_return * ir)1185 lower_variables_visitor::visit_enter(ir_return *ir)
1186 {
1187 void *mem_ctx = ralloc_parent(ir);
1188
1189 ir_dereference *deref = ir->value ? ir->value->as_dereference() : NULL;
1190 if (deref) {
1191 ir_variable *var = deref->variable_referenced();
1192
1193 /* Fix the type of the return value. */
1194 if (var &&
1195 _mesa_set_search(lower_vars, var) &&
1196 glsl_type_is_32bit(glsl_without_array(deref->type))) {
1197 /* Create a 32-bit temporary variable. */
1198 ir_variable *new_var =
1199 new(mem_ctx) ir_variable(deref->type, "lowerp", ir_var_temporary);
1200 base_ir->insert_before(new_var);
1201
1202 /* Fix types in dereferences. */
1203 fix_types_in_deref_chain(deref);
1204
1205 /* Convert to 32 bits for the return value. */
1206 convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var),
1207 deref, true);
1208 ir->value = new(mem_ctx) ir_dereference_variable(new_var);
1209 }
1210 }
1211
1212 return ir_rvalue_enter_visitor::visit_enter(ir);
1213 }
1214
handle_rvalue(ir_rvalue ** rvalue)1215 void lower_variables_visitor::handle_rvalue(ir_rvalue **rvalue)
1216 {
1217 ir_rvalue *ir = *rvalue;
1218
1219 if (in_assignee || ir == NULL)
1220 return;
1221
1222 ir_expression *expr = ir->as_expression();
1223 ir_dereference *expr_op0_deref = expr ? expr->operands[0]->as_dereference() : NULL;
1224
1225 /* Remove f2fmp(float16). Same for int16 and uint16. */
1226 if (expr &&
1227 expr_op0_deref &&
1228 (expr->operation == ir_unop_f2fmp ||
1229 expr->operation == ir_unop_i2imp ||
1230 expr->operation == ir_unop_u2ump ||
1231 expr->operation == ir_unop_f2f16 ||
1232 expr->operation == ir_unop_i2i ||
1233 expr->operation == ir_unop_u2u) &&
1234 glsl_type_is_16bit(glsl_without_array(expr->type)) &&
1235 glsl_type_is_32bit(glsl_without_array(expr_op0_deref->type)) &&
1236 expr_op0_deref->variable_referenced() &&
1237 _mesa_set_search(lower_vars, expr_op0_deref->variable_referenced())) {
1238 fix_types_in_deref_chain(expr_op0_deref);
1239
1240 /* Remove f2fmp/i2imp/u2ump. */
1241 *rvalue = expr_op0_deref;
1242 return;
1243 }
1244
1245 ir_dereference *deref = ir->as_dereference();
1246
1247 if (deref) {
1248 ir_variable *var = deref->variable_referenced();
1249
1250 /* var can be NULL if we are dereferencing ir_constant. */
1251 if (var &&
1252 _mesa_set_search(lower_vars, var) &&
1253 glsl_type_is_32bit(glsl_without_array(deref->type))) {
1254 void *mem_ctx = ralloc_parent(ir);
1255
1256 /* Create a 32-bit temporary variable. */
1257 ir_variable *new_var =
1258 new(mem_ctx) ir_variable(deref->type, "lowerp", ir_var_temporary);
1259 base_ir->insert_before(new_var);
1260
1261 /* Fix types in dereferences. */
1262 fix_types_in_deref_chain(deref);
1263
1264 /* Convert to 32 bits for the rvalue. */
1265 convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var),
1266 deref, true);
1267 *rvalue = new(mem_ctx) ir_dereference_variable(new_var);
1268 }
1269 }
1270 }
1271
1272 ir_visitor_status
visit_enter(ir_call * ir)1273 lower_variables_visitor::visit_enter(ir_call *ir)
1274 {
1275 void *mem_ctx = ralloc_parent(ir);
1276
1277 /* We can't pass 16-bit variables as 32-bit inout/out parameters. */
1278 foreach_two_lists(formal_node, &ir->callee->parameters,
1279 actual_node, &ir->actual_parameters) {
1280 ir_dereference *param_deref =
1281 ((ir_rvalue *)actual_node)->as_dereference();
1282 ir_variable *param = (ir_variable *)formal_node;
1283
1284 if (!param_deref)
1285 continue;
1286
1287 ir_variable *var = param_deref->variable_referenced();
1288
1289 /* var can be NULL if we are dereferencing ir_constant. */
1290 if (var &&
1291 _mesa_set_search(lower_vars, var) &&
1292 glsl_type_is_32bit(glsl_without_array(param->type))) {
1293 fix_types_in_deref_chain(param_deref);
1294
1295 /* Create a 32-bit temporary variable for the parameter. */
1296 ir_variable *new_var =
1297 new(mem_ctx) ir_variable(param->type, "lowerp", ir_var_temporary);
1298 base_ir->insert_before(new_var);
1299
1300 /* Replace the parameter. */
1301 actual_node->replace_with(new(mem_ctx) ir_dereference_variable(new_var));
1302
1303 if (param->data.mode == ir_var_function_in ||
1304 param->data.mode == ir_var_function_inout) {
1305 /* Convert to 32 bits for passing in. */
1306 convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var),
1307 param_deref->clone(mem_ctx, NULL), true);
1308 }
1309 if (param->data.mode == ir_var_function_out ||
1310 param->data.mode == ir_var_function_inout) {
1311 /* Convert to 16 bits after returning. */
1312 convert_split_assignment(param_deref,
1313 new(mem_ctx) ir_dereference_variable(new_var),
1314 false);
1315 }
1316 }
1317 }
1318
1319 /* Fix the type of return value dereferencies. */
1320 ir_dereference_variable *ret_deref = ir->return_deref;
1321 ir_variable *ret_var = ret_deref ? ret_deref->variable_referenced() : NULL;
1322
1323 if (ret_var &&
1324 _mesa_set_search(lower_vars, ret_var) &&
1325 glsl_type_is_32bit(glsl_without_array(ret_deref->type))) {
1326 /* Create a 32-bit temporary variable. */
1327 ir_variable *new_var =
1328 new(mem_ctx) ir_variable(ir->callee->return_type, "lowerp",
1329 ir_var_temporary);
1330 base_ir->insert_before(new_var);
1331
1332 /* Replace the return variable. */
1333 ret_deref->var = new_var;
1334
1335 /* Convert to 16 bits after returning. */
1336 convert_split_assignment(new(mem_ctx) ir_dereference_variable(ret_var),
1337 new(mem_ctx) ir_dereference_variable(new_var),
1338 false);
1339 }
1340
1341 return ir_rvalue_enter_visitor::visit_enter(ir);
1342 }
1343
1344 }
1345
1346 void
lower_precision(const struct gl_shader_compiler_options * options,exec_list * instructions)1347 lower_precision(const struct gl_shader_compiler_options *options,
1348 exec_list *instructions)
1349 {
1350 find_precision_visitor v(options);
1351 find_lowerable_rvalues(options, instructions, v.lowerable_rvalues);
1352 visit_list_elements(&v, instructions);
1353
1354 lower_variables_visitor vars(options);
1355 visit_list_elements(&vars, instructions);
1356 }
1357