xref: /aosp_15_r20/external/mesa3d/src/compiler/glsl/lower_precision.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2019 Google, Inc
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file lower_precision.cpp
26  */
27 
28 #include "main/macros.h"
29 #include "main/consts_exts.h"
30 #include "compiler/glsl_types.h"
31 #include "ir.h"
32 #include "ir_builder.h"
33 #include "ir_optimization.h"
34 #include "ir_rvalue_visitor.h"
35 #include "util/half_float.h"
36 #include "util/set.h"
37 #include "util/hash_table.h"
38 #include <vector>
39 
40 namespace {
41 
42 class find_precision_visitor : public ir_rvalue_enter_visitor {
43 public:
44    find_precision_visitor(const struct gl_shader_compiler_options *options);
45    find_precision_visitor(const find_precision_visitor &) = delete;
46    ~find_precision_visitor();
47    find_precision_visitor & operator=(const find_precision_visitor &) = delete;
48 
49    virtual void handle_rvalue(ir_rvalue **rvalue);
50    virtual ir_visitor_status visit_enter(ir_call *ir);
51 
52    ir_function_signature *map_builtin(ir_function_signature *sig);
53 
54    /* Set of rvalues that can be lowered. This will be filled in by
55     * find_lowerable_rvalues_visitor. Only the root node of a lowerable section
56     * will be added to this set.
57     */
58    struct set *lowerable_rvalues;
59 
60    /**
61     * A mapping of builtin signature functions to lowered versions. This is
62     * filled in lazily when a lowered version is needed.
63     */
64    struct hash_table *lowered_builtins;
65    /**
66     * A temporary hash table only used in order to clone functions.
67     */
68    struct hash_table *clone_ht;
69 
70    void *lowered_builtin_mem_ctx;
71 
72    const struct gl_shader_compiler_options *options;
73 };
74 
75 class find_lowerable_rvalues_visitor : public ir_hierarchical_visitor {
76 public:
77    enum can_lower_state {
78       UNKNOWN,
79       CANT_LOWER,
80       SHOULD_LOWER,
81    };
82 
83    enum parent_relation {
84       /* The parent performs a further operation involving the result from the
85        * child and can be lowered along with it.
86        */
87       COMBINED_OPERATION,
88       /* The parent instruction’s operation is independent of the child type so
89        * the child should be lowered separately.
90        */
91       INDEPENDENT_OPERATION,
92    };
93 
94    struct stack_entry {
95       ir_instruction *instr;
96       enum can_lower_state state;
97       /* List of child rvalues that can be lowered. When this stack entry is
98        * popped, if this node itself can’t be lowered than all of the children
99        * are root nodes to lower so we will add them to lowerable_rvalues.
100        * Otherwise if this node can also be lowered then we won’t add the
101        * children because we only want to add the topmost lowerable nodes to
102        * lowerable_rvalues and the children will be lowered as part of lowering
103        * this node.
104        */
105       std::vector<ir_instruction *> lowerable_children;
106    };
107 
108    find_lowerable_rvalues_visitor(struct set *result,
109                                   const struct gl_shader_compiler_options *options);
110 
111    static void stack_enter(class ir_instruction *ir, void *data);
112    static void stack_leave(class ir_instruction *ir, void *data);
113 
114    virtual ir_visitor_status visit(ir_constant *ir);
115    virtual ir_visitor_status visit(ir_dereference_variable *ir);
116 
117    virtual ir_visitor_status visit_enter(ir_dereference_record *ir);
118    virtual ir_visitor_status visit_enter(ir_dereference_array *ir);
119    virtual ir_visitor_status visit_enter(ir_texture *ir);
120    virtual ir_visitor_status visit_enter(ir_expression *ir);
121 
122    virtual ir_visitor_status visit_leave(ir_assignment *ir);
123    virtual ir_visitor_status visit_leave(ir_call *ir);
124 
125    can_lower_state handle_precision(const glsl_type *type,
126                                     int precision) const;
127 
128    static parent_relation get_parent_relation(ir_instruction *parent,
129                                               ir_instruction *child);
130 
131    std::vector<stack_entry> stack;
132    struct set *lowerable_rvalues;
133    const struct gl_shader_compiler_options *options;
134 
135    void pop_stack_entry();
136    void add_lowerable_children(const stack_entry &entry);
137 };
138 
139 class lower_precision_visitor : public ir_rvalue_visitor {
140 public:
141    virtual void handle_rvalue(ir_rvalue **rvalue);
142    virtual ir_visitor_status visit_enter(ir_dereference_array *);
143    virtual ir_visitor_status visit_enter(ir_dereference_record *);
144    virtual ir_visitor_status visit_enter(ir_call *ir);
145    virtual ir_visitor_status visit_enter(ir_texture *ir);
146    virtual ir_visitor_status visit_leave(ir_expression *);
147 };
148 
149 static bool
can_lower_type(const struct gl_shader_compiler_options * options,const glsl_type * type)150 can_lower_type(const struct gl_shader_compiler_options *options,
151                const glsl_type *type)
152 {
153    /* Don’t lower any expressions involving non-float types except bool and
154     * texture samplers. This will rule out operations that change the type such
155     * as conversion to ints. Instead it will end up lowering the arguments
156     * instead and adding a final conversion to float32. We want to handle
157     * boolean types so that it will do comparisons as 16-bit.
158     */
159 
160    switch (glsl_without_array(type)->base_type) {
161    /* TODO: should we do anything for these two with regard to Int16 vs FP16
162     * support?
163     */
164    case GLSL_TYPE_BOOL:
165    case GLSL_TYPE_SAMPLER:
166    case GLSL_TYPE_IMAGE:
167       return true;
168 
169    case GLSL_TYPE_FLOAT:
170       return options->LowerPrecisionFloat16;
171 
172    case GLSL_TYPE_UINT:
173    case GLSL_TYPE_INT:
174       return options->LowerPrecisionInt16;
175 
176    default:
177       return false;
178    }
179 }
180 
find_lowerable_rvalues_visitor(struct set * res,const struct gl_shader_compiler_options * opts)181 find_lowerable_rvalues_visitor::find_lowerable_rvalues_visitor(struct set *res,
182                                  const struct gl_shader_compiler_options *opts)
183 {
184    lowerable_rvalues = res;
185    options = opts;
186    callback_enter = stack_enter;
187    callback_leave = stack_leave;
188    data_enter = this;
189    data_leave = this;
190 }
191 
192 void
stack_enter(class ir_instruction * ir,void * data)193 find_lowerable_rvalues_visitor::stack_enter(class ir_instruction *ir,
194                                             void *data)
195 {
196    find_lowerable_rvalues_visitor *state =
197       (find_lowerable_rvalues_visitor *) data;
198 
199    /* Add a new stack entry for this instruction */
200    stack_entry entry;
201 
202    entry.instr = ir;
203    entry.state = state->in_assignee ? CANT_LOWER : UNKNOWN;
204 
205    state->stack.push_back(entry);
206 }
207 
208 void
add_lowerable_children(const stack_entry & entry)209 find_lowerable_rvalues_visitor::add_lowerable_children(const stack_entry &entry)
210 {
211    /* We can’t lower this node so if there were any pending children then they
212     * are all root lowerable nodes and we should add them to the set.
213     */
214    for (auto &it : entry.lowerable_children)
215       _mesa_set_add(lowerable_rvalues, it);
216 }
217 
218 void
pop_stack_entry()219 find_lowerable_rvalues_visitor::pop_stack_entry()
220 {
221    const stack_entry &entry = stack.back();
222 
223    if (stack.size() >= 2) {
224       /* Combine this state into the parent state, unless the parent operation
225        * doesn’t have any relation to the child operations
226        */
227       stack_entry &parent = stack.end()[-2];
228       parent_relation rel = get_parent_relation(parent.instr, entry.instr);
229 
230       if (rel == COMBINED_OPERATION) {
231          switch (entry.state) {
232          case CANT_LOWER:
233             parent.state = CANT_LOWER;
234             break;
235          case SHOULD_LOWER:
236             if (parent.state == UNKNOWN)
237                parent.state = SHOULD_LOWER;
238             break;
239          case UNKNOWN:
240             break;
241          }
242       }
243    }
244 
245    if (entry.state == SHOULD_LOWER) {
246       ir_rvalue *rv = entry.instr->as_rvalue();
247 
248       if (rv == NULL) {
249          add_lowerable_children(entry);
250       } else if (stack.size() >= 2) {
251          stack_entry &parent = stack.end()[-2];
252 
253          switch (get_parent_relation(parent.instr, rv)) {
254          case COMBINED_OPERATION:
255             /* We only want to add the toplevel lowerable instructions to the
256              * lowerable set. Therefore if there is a parent then instead of
257              * adding this instruction to the set we will queue depending on
258              * the result of the parent instruction.
259              */
260             parent.lowerable_children.push_back(entry.instr);
261             break;
262          case INDEPENDENT_OPERATION:
263             _mesa_set_add(lowerable_rvalues, rv);
264             break;
265          }
266       } else {
267          /* This is a toplevel node so add it directly to the lowerable
268           * set.
269           */
270          _mesa_set_add(lowerable_rvalues, rv);
271       }
272    } else if (entry.state == CANT_LOWER) {
273       add_lowerable_children(entry);
274    }
275 
276    stack.pop_back();
277 }
278 
279 void
stack_leave(class ir_instruction * ir,void * data)280 find_lowerable_rvalues_visitor::stack_leave(class ir_instruction *ir,
281                                             void *data)
282 {
283    find_lowerable_rvalues_visitor *state =
284       (find_lowerable_rvalues_visitor *) data;
285 
286    state->pop_stack_entry();
287 }
288 
289 enum find_lowerable_rvalues_visitor::can_lower_state
handle_precision(const glsl_type * type,int precision) const290 find_lowerable_rvalues_visitor::handle_precision(const glsl_type *type,
291                                                  int precision) const
292 {
293    if (!can_lower_type(options, type))
294       return CANT_LOWER;
295 
296    switch (precision) {
297    case GLSL_PRECISION_NONE:
298       return UNKNOWN;
299    case GLSL_PRECISION_HIGH:
300       return CANT_LOWER;
301    case GLSL_PRECISION_MEDIUM:
302    case GLSL_PRECISION_LOW:
303       return SHOULD_LOWER;
304    }
305 
306    return CANT_LOWER;
307 }
308 
309 enum find_lowerable_rvalues_visitor::parent_relation
get_parent_relation(ir_instruction * parent,ir_instruction * child)310 find_lowerable_rvalues_visitor::get_parent_relation(ir_instruction *parent,
311                                                     ir_instruction *child)
312 {
313    /* If the parent is a dereference instruction then the only child could be
314     * for example an array dereference and that should be lowered independently
315     * of the parent.
316     */
317    if (parent->as_dereference())
318       return INDEPENDENT_OPERATION;
319 
320    /* The precision of texture sampling depend on the precision of the sampler.
321     * The rest of the arguments don’t matter so we can treat it as an
322     * independent operation.
323     */
324    if (parent->as_texture())
325       return INDEPENDENT_OPERATION;
326 
327    return COMBINED_OPERATION;
328 }
329 
330 ir_visitor_status
visit(ir_constant * ir)331 find_lowerable_rvalues_visitor::visit(ir_constant *ir)
332 {
333    stack_enter(ir, this);
334 
335    if (!can_lower_type(options, ir->type))
336       stack.back().state = CANT_LOWER;
337 
338    stack_leave(ir, this);
339 
340    return visit_continue;
341 }
342 
343 ir_visitor_status
visit(ir_dereference_variable * ir)344 find_lowerable_rvalues_visitor::visit(ir_dereference_variable *ir)
345 {
346    stack_enter(ir, this);
347 
348    if (stack.back().state == UNKNOWN)
349       stack.back().state = handle_precision(ir->type, ir->precision());
350 
351    stack_leave(ir, this);
352 
353    return visit_continue;
354 }
355 
356 ir_visitor_status
visit_enter(ir_dereference_record * ir)357 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_record *ir)
358 {
359    ir_hierarchical_visitor::visit_enter(ir);
360 
361    if (stack.back().state == UNKNOWN)
362       stack.back().state = handle_precision(ir->type, ir->precision());
363 
364    return visit_continue;
365 }
366 
367 ir_visitor_status
visit_enter(ir_dereference_array * ir)368 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_array *ir)
369 {
370    ir_hierarchical_visitor::visit_enter(ir);
371 
372    if (stack.back().state == UNKNOWN)
373       stack.back().state = handle_precision(ir->type, ir->precision());
374 
375    return visit_continue;
376 }
377 
378 ir_visitor_status
visit_enter(ir_texture * ir)379 find_lowerable_rvalues_visitor::visit_enter(ir_texture *ir)
380 {
381    ir_hierarchical_visitor::visit_enter(ir);
382 
383    /* The precision of the sample value depends on the precision of the
384     * sampler.
385     */
386    stack.back().state = handle_precision(ir->type,
387                                          ir->sampler->precision());
388    return visit_continue;
389 }
390 
391 ir_visitor_status
visit_enter(ir_expression * ir)392 find_lowerable_rvalues_visitor::visit_enter(ir_expression *ir)
393 {
394    ir_hierarchical_visitor::visit_enter(ir);
395 
396    if (!can_lower_type(options, ir->type))
397       stack.back().state = CANT_LOWER;
398 
399    /* Don't lower precision for derivative calculations */
400    if (!options->LowerPrecisionDerivatives &&
401        (ir->operation == ir_unop_dFdx ||
402         ir->operation == ir_unop_dFdx_coarse ||
403         ir->operation == ir_unop_dFdx_fine ||
404         ir->operation == ir_unop_dFdy ||
405         ir->operation == ir_unop_dFdy_coarse ||
406         ir->operation == ir_unop_dFdy_fine)) {
407       stack.back().state = CANT_LOWER;
408    }
409 
410    return visit_continue;
411 }
412 
413 static unsigned
handle_call(ir_call * ir,const struct set * lowerable_rvalues)414 handle_call(ir_call *ir, const struct set *lowerable_rvalues)
415 {
416    /* The intrinsic call is inside the wrapper imageLoad function that will
417     * be inlined. We have to handle both of them.
418     */
419    if (ir->callee->intrinsic_id == ir_intrinsic_image_load ||
420        (ir->callee->is_builtin() &&
421         !strcmp(ir->callee_name(), "imageLoad"))) {
422       ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head();
423       ir_variable *resource = param->variable_referenced();
424 
425       assert(ir->callee->return_precision == GLSL_PRECISION_HIGH);
426       assert(glsl_type_is_image(glsl_without_array(resource->type)));
427 
428       /* GLSL ES 3.20 requires that images have a precision modifier, but if
429        * you set one, it doesn't do anything, because all intrinsics are
430        * defined with highp. This seems to be a spec bug.
431        *
432        * In theory we could set the return value to mediump if the image
433        * format has a lower precision. This appears to be the most sensible
434        * thing to do.
435        */
436       const struct util_format_description *desc =
437          util_format_description(resource->data.image_format);
438       int i =
439          util_format_get_first_non_void_channel(resource->data.image_format);
440       bool mediump;
441 
442       assert(i >= 0);
443 
444       if (desc->channel[i].pure_integer ||
445           desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
446          mediump = desc->channel[i].size <= 16;
447       else
448          mediump = desc->channel[i].size <= 10; /* unorm/snorm */
449 
450       return mediump ? GLSL_PRECISION_MEDIUM : GLSL_PRECISION_HIGH;
451    }
452 
453    /* Return the declared precision for user-defined functions. */
454    if (!ir->callee->is_builtin() || ir->callee->return_precision != GLSL_PRECISION_NONE)
455       return ir->callee->return_precision;
456 
457    /* Handle special calls. */
458    if (ir->callee->is_builtin() && ir->actual_parameters.length()) {
459       ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head();
460       ir_variable *var = param->variable_referenced();
461 
462       /* Handle builtin wrappers around ir_texture opcodes. These wrappers will
463        * be inlined by lower_precision() if we return true here, so that we can
464        * get to ir_texture later and do proper lowering.
465        *
466        * We should lower the type of the return value if the sampler type
467        * uses lower precision. The function parameters don't matter.
468        */
469       if (var && glsl_type_is_sampler(glsl_without_array(var->type))) {
470          /* textureGatherOffsets always takes a highp array of constants. As
471           * per the discussion https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16547#note_1393704
472           * trying to lower the precision results in segfault later on
473           * in the compiler as textureGatherOffsets will end up being passed
474           * a temp when its expecting a constant as required by the spec.
475           */
476          if (!strcmp(ir->callee_name(), "textureGatherOffsets"))
477             return GLSL_PRECISION_HIGH;
478 
479          return var->data.precision;
480       }
481    }
482 
483    if (ir->callee->return_precision != GLSL_PRECISION_NONE)
484       return ir->callee->return_precision;
485 
486    if (/* Parameters are always implicitly promoted to highp: */
487        !strcmp(ir->callee_name(), "floatBitsToInt") ||
488        !strcmp(ir->callee_name(), "floatBitsToUint") ||
489        !strcmp(ir->callee_name(), "intBitsToFloat") ||
490        !strcmp(ir->callee_name(), "uintBitsToFloat"))
491       return GLSL_PRECISION_HIGH;
492 
493    /* Number of parameters to check if they are lowerable. */
494    unsigned check_parameters = ir->actual_parameters.length();
495 
496    /* "For the interpolateAt* functions, the call will return a precision
497     *  qualification matching the precision of the interpolant argument to the
498     *  function call."
499     *
500     * and
501     *
502     * "The precision qualification of the value returned from bitfieldExtract()
503     *  matches the precision qualification of the call's input argument
504     *  “value”."
505     */
506    if (!strcmp(ir->callee_name(), "interpolateAtOffset") ||
507        !strcmp(ir->callee_name(), "interpolateAtSample") ||
508        !strcmp(ir->callee_name(), "bitfieldExtract")) {
509       check_parameters = 1;
510    } else if (!strcmp(ir->callee_name(), "bitfieldInsert")) {
511       /* "The precision qualification of the value returned from bitfieldInsert
512        * matches the highest precision qualification of the call's input
513        * arguments “base” and “insert”."
514        */
515       check_parameters = 2;
516    }
517 
518    /* If the call is to a builtin, then the function won’t have a return
519     * precision and we should determine it from the precision of the arguments.
520     */
521    foreach_in_list(ir_rvalue, param, &ir->actual_parameters) {
522       if (!check_parameters)
523          break;
524 
525       if (!param->as_constant() &&
526           _mesa_set_search(lowerable_rvalues, param) == NULL)
527          return GLSL_PRECISION_HIGH;
528 
529       --check_parameters;
530    }
531 
532    return GLSL_PRECISION_MEDIUM;
533 }
534 
535 ir_visitor_status
visit_leave(ir_call * ir)536 find_lowerable_rvalues_visitor::visit_leave(ir_call *ir)
537 {
538    ir_hierarchical_visitor::visit_leave(ir);
539 
540    /* Special case for handling temporary variables generated by the compiler
541     * for function calls. If we assign to one of these using a function call
542     * that has a lowerable return type then we can assume the temporary
543     * variable should have a medium precision too.
544     */
545 
546    /* Do nothing if the return type is void. */
547    if (!ir->return_deref)
548       return visit_continue;
549 
550    ir_variable *var = ir->return_deref->variable_referenced();
551 
552    assert(var->data.mode == ir_var_temporary);
553 
554    unsigned return_precision = handle_call(ir, lowerable_rvalues);
555 
556    can_lower_state lower_state =
557       handle_precision(var->type, return_precision);
558 
559    if (lower_state == SHOULD_LOWER) {
560       /* Function calls always write to a temporary return value in the caller,
561        * which has no other users.  That temp may start with the precision of
562        * the function's signature, but if we're inferring the precision of an
563        * unqualified builtin operation (particularly the imageLoad overrides!)
564        * then we need to update it.
565        */
566       var->data.precision = GLSL_PRECISION_MEDIUM;
567    } else {
568       var->data.precision = GLSL_PRECISION_HIGH;
569    }
570 
571    return visit_continue;
572 }
573 
574 ir_visitor_status
visit_leave(ir_assignment * ir)575 find_lowerable_rvalues_visitor::visit_leave(ir_assignment *ir)
576 {
577    ir_hierarchical_visitor::visit_leave(ir);
578 
579    /* Special case for handling temporary variables generated by the compiler.
580     * If we assign to one of these using a lowered precision then we can assume
581     * the temporary variable should have a medium precision too.
582     */
583    ir_variable *var = ir->lhs->variable_referenced();
584 
585    if (var->data.mode == ir_var_temporary) {
586       if (_mesa_set_search(lowerable_rvalues, ir->rhs)) {
587          /* Only override the precision if this is the first assignment. For
588           * temporaries such as the ones generated for the ?: operator there
589           * can be multiple assignments with different precisions. This way we
590           * get the highest precision of all of the assignments.
591           */
592          if (var->data.precision == GLSL_PRECISION_NONE)
593             var->data.precision = GLSL_PRECISION_MEDIUM;
594       } else if (!ir->rhs->as_constant()) {
595          var->data.precision = GLSL_PRECISION_HIGH;
596       }
597    }
598 
599    return visit_continue;
600 }
601 
602 void
find_lowerable_rvalues(const struct gl_shader_compiler_options * options,exec_list * instructions,struct set * result)603 find_lowerable_rvalues(const struct gl_shader_compiler_options *options,
604                        exec_list *instructions,
605                        struct set *result)
606 {
607    find_lowerable_rvalues_visitor v(result, options);
608 
609    visit_list_elements(&v, instructions);
610 
611    assert(v.stack.empty());
612 }
613 
614 static const glsl_type *
convert_type(bool up,const glsl_type * type)615 convert_type(bool up, const glsl_type *type)
616 {
617    if (glsl_type_is_array(type)) {
618       return glsl_array_type(convert_type(up, type->fields.array),
619                              glsl_array_size(type),
620                              type->explicit_stride);
621    }
622 
623    glsl_base_type new_base_type;
624 
625    if (up) {
626       switch (type->base_type) {
627       case GLSL_TYPE_FLOAT16:
628          new_base_type = GLSL_TYPE_FLOAT;
629          break;
630       case GLSL_TYPE_INT16:
631          new_base_type = GLSL_TYPE_INT;
632          break;
633       case GLSL_TYPE_UINT16:
634          new_base_type = GLSL_TYPE_UINT;
635          break;
636       default:
637          unreachable("invalid type");
638          return NULL;
639       }
640    } else {
641       switch (type->base_type) {
642       case GLSL_TYPE_FLOAT:
643          new_base_type = GLSL_TYPE_FLOAT16;
644          break;
645       case GLSL_TYPE_INT:
646          new_base_type = GLSL_TYPE_INT16;
647          break;
648       case GLSL_TYPE_UINT:
649          new_base_type = GLSL_TYPE_UINT16;
650          break;
651       default:
652          unreachable("invalid type");
653          return NULL;
654       }
655    }
656 
657    return glsl_simple_explicit_type(new_base_type,
658                                     type->vector_elements,
659                                     type->matrix_columns,
660                                     type->explicit_stride,
661                                     type->interface_row_major,
662                                     0 /* explicit_alignment */);
663 }
664 
665 static const glsl_type *
lower_glsl_type(const glsl_type * type)666 lower_glsl_type(const glsl_type *type)
667 {
668    return convert_type(false, type);
669 }
670 
671 static ir_rvalue *
convert_precision(bool up,ir_rvalue * ir)672 convert_precision(bool up, ir_rvalue *ir)
673 {
674    unsigned op;
675 
676    if (up) {
677       switch (ir->type->base_type) {
678       case GLSL_TYPE_FLOAT16:
679          op = ir_unop_f162f;
680          break;
681       case GLSL_TYPE_INT16:
682          op = ir_unop_i2i;
683          break;
684       case GLSL_TYPE_UINT16:
685          op = ir_unop_u2u;
686          break;
687       default:
688          unreachable("invalid type");
689          return NULL;
690       }
691    } else {
692       switch (ir->type->base_type) {
693       case GLSL_TYPE_FLOAT:
694          op = ir_unop_f2fmp;
695          break;
696       case GLSL_TYPE_INT:
697          op = ir_unop_i2imp;
698          break;
699       case GLSL_TYPE_UINT:
700          op = ir_unop_u2ump;
701          break;
702       default:
703          unreachable("invalid type");
704          return NULL;
705       }
706    }
707 
708    const glsl_type *desired_type = convert_type(up, ir->type);
709    void *mem_ctx = ralloc_parent(ir);
710    return new(mem_ctx) ir_expression(op, desired_type, ir, NULL);
711 }
712 
713 void
handle_rvalue(ir_rvalue ** rvalue)714 lower_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
715 {
716    ir_rvalue *ir = *rvalue;
717 
718    if (ir == NULL)
719       return;
720 
721    if (ir->as_dereference()) {
722       if (!glsl_type_is_boolean(ir->type))
723          *rvalue = convert_precision(false, ir);
724    } else if (glsl_type_is_32bit(ir->type)) {
725       ir->type = lower_glsl_type(ir->type);
726 
727       ir_constant *const_ir = ir->as_constant();
728 
729       if (const_ir) {
730          ir_constant_data value;
731 
732          if (ir->type->base_type == GLSL_TYPE_FLOAT16) {
733             for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++)
734                value.f16[i] = _mesa_float_to_half(const_ir->value.f[i]);
735          } else if (ir->type->base_type == GLSL_TYPE_INT16) {
736             for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++)
737                value.i16[i] = const_ir->value.i[i];
738          } else if (ir->type->base_type == GLSL_TYPE_UINT16) {
739             for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++)
740                value.u16[i] = const_ir->value.u[i];
741          } else {
742             unreachable("invalid type");
743          }
744 
745          const_ir->value = value;
746       }
747    }
748 }
749 
750 ir_visitor_status
visit_enter(ir_dereference_record * ir)751 lower_precision_visitor::visit_enter(ir_dereference_record *ir)
752 {
753    /* We don’t want to lower the variable */
754    return visit_continue_with_parent;
755 }
756 
757 ir_visitor_status
visit_enter(ir_dereference_array * ir)758 lower_precision_visitor::visit_enter(ir_dereference_array *ir)
759 {
760    /* We don’t want to convert the array index or the variable. If the array
761     * index itself is lowerable that will be handled separately.
762     */
763    return visit_continue_with_parent;
764 }
765 
766 ir_visitor_status
visit_enter(ir_call * ir)767 lower_precision_visitor::visit_enter(ir_call *ir)
768 {
769    /* We don’t want to convert the arguments. These will be handled separately.
770     */
771    return visit_continue_with_parent;
772 }
773 
774 ir_visitor_status
visit_enter(ir_texture * ir)775 lower_precision_visitor::visit_enter(ir_texture *ir)
776 {
777    /* We don’t want to convert the arguments. These will be handled separately.
778     */
779    return visit_continue_with_parent;
780 }
781 
782 ir_visitor_status
visit_leave(ir_expression * ir)783 lower_precision_visitor::visit_leave(ir_expression *ir)
784 {
785    ir_rvalue_visitor::visit_leave(ir);
786 
787    /* If the expression is a conversion operation to or from bool then fix the
788     * operation.
789     */
790    switch (ir->operation) {
791    case ir_unop_b2f:
792       ir->operation = ir_unop_b2f16;
793       break;
794    case ir_unop_f2b:
795       ir->operation = ir_unop_f162b;
796       break;
797    case ir_unop_b2i:
798    case ir_unop_i2b:
799       /* Nothing to do - they both support int16. */
800       break;
801    default:
802       break;
803    }
804 
805    return visit_continue;
806 }
807 
808 void
handle_rvalue(ir_rvalue ** rvalue)809 find_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
810 {
811    /* Checking the precision of rvalue can be lowered first throughout
812     * find_lowerable_rvalues_visitor.
813     * Once it found the precision of rvalue can be lowered, then we can
814     * add conversion f2fmp, etc. through lower_precision_visitor.
815     */
816    if (*rvalue == NULL)
817       return;
818 
819    struct set_entry *entry = _mesa_set_search(lowerable_rvalues, *rvalue);
820 
821    if (!entry)
822       return;
823 
824    _mesa_set_remove(lowerable_rvalues, entry);
825 
826    /* If the entire expression is just a variable dereference then trying to
827     * lower it will just directly add pointless to and from conversions without
828     * any actual operation in-between. Although these will eventually get
829     * optimised out, avoiding generating them here also avoids breaking inout
830     * parameters to functions.
831     */
832    if ((*rvalue)->as_dereference())
833       return;
834 
835    lower_precision_visitor v;
836 
837    (*rvalue)->accept(&v);
838    v.handle_rvalue(rvalue);
839 
840    /* We don’t need to add the final conversion if the final type has been
841     * converted to bool
842     */
843    if ((*rvalue)->type->base_type != GLSL_TYPE_BOOL) {
844       *rvalue = convert_precision(true, *rvalue);
845    }
846 }
847 
848 ir_visitor_status
visit_enter(ir_call * ir)849 find_precision_visitor::visit_enter(ir_call *ir)
850 {
851    ir_rvalue_enter_visitor::visit_enter(ir);
852 
853    ir_variable *return_var =
854       ir->return_deref ? ir->return_deref->variable_referenced() : NULL;
855 
856    /* Don't do anything for image_load here. We have only changed the return
857     * value to mediump/lowp, so that following instructions can use reduced
858     * precision.
859     *
860     * The return value type of the intrinsic itself isn't changed here, but
861     * can be changed in NIR if all users use the *2*mp opcode.
862     */
863    if (ir->callee->intrinsic_id == ir_intrinsic_image_load)
864       return visit_continue;
865 
866    /* If this is a call to a builtin and the find_lowerable_rvalues_visitor
867     * overrode the precision of the temporary return variable, then we can
868     * replace the builtin implementation with a lowered version.
869     */
870 
871    if (!ir->callee->is_builtin() ||
872        ir->callee->is_intrinsic() ||
873        return_var == NULL ||
874        (return_var->data.precision != GLSL_PRECISION_MEDIUM &&
875         return_var->data.precision != GLSL_PRECISION_LOW))
876       return visit_continue;
877 
878    ir->callee = map_builtin(ir->callee);
879    ir->generate_inline(ir);
880    ir->remove();
881 
882    return visit_continue_with_parent;
883 }
884 
885 ir_function_signature *
map_builtin(ir_function_signature * sig)886 find_precision_visitor::map_builtin(ir_function_signature *sig)
887 {
888    if (lowered_builtins == NULL) {
889       lowered_builtins = _mesa_pointer_hash_table_create(NULL);
890       clone_ht =_mesa_pointer_hash_table_create(NULL);
891       lowered_builtin_mem_ctx = ralloc_context(NULL);
892    } else {
893       struct hash_entry *entry = _mesa_hash_table_search(lowered_builtins, sig);
894       if (entry)
895          return (ir_function_signature *) entry->data;
896    }
897 
898    ir_function_signature *lowered_sig =
899       sig->clone(lowered_builtin_mem_ctx, clone_ht);
900 
901    /* If we're lowering the output precision of the function, then also lower
902     * the precision of its inputs unless they have a specific qualifier.  The
903     * exception is bitCount, which doesn't declare its arguments highp but
904     * should not be lowering the args to mediump just because the output is
905     * lowp.
906     */
907    if (strcmp(sig->function_name(), "bitCount") != 0) {
908       foreach_in_list(ir_variable, param, &lowered_sig->parameters) {
909          /* Demote the precision of unqualified function arguments. */
910          if (param->data.precision == GLSL_PRECISION_NONE)
911             param->data.precision = GLSL_PRECISION_MEDIUM;
912       }
913    }
914 
915    lower_precision(options, &lowered_sig->body);
916 
917    _mesa_hash_table_clear(clone_ht, NULL);
918 
919    _mesa_hash_table_insert(lowered_builtins, sig, lowered_sig);
920 
921    return lowered_sig;
922 }
923 
find_precision_visitor(const struct gl_shader_compiler_options * options)924 find_precision_visitor::find_precision_visitor(const struct gl_shader_compiler_options *options)
925    : lowerable_rvalues(_mesa_pointer_set_create(NULL)),
926      lowered_builtins(NULL),
927      clone_ht(NULL),
928      lowered_builtin_mem_ctx(NULL),
929      options(options)
930 {
931 }
932 
~find_precision_visitor()933 find_precision_visitor::~find_precision_visitor()
934 {
935    _mesa_set_destroy(lowerable_rvalues, NULL);
936 
937    if (lowered_builtins) {
938       _mesa_hash_table_destroy(lowered_builtins, NULL);
939       _mesa_hash_table_destroy(clone_ht, NULL);
940       ralloc_free(lowered_builtin_mem_ctx);
941    }
942 }
943 
944 /* Lowering opcodes to 16 bits is not enough for programs with control flow
945  * (and the ?: operator, which is represented by if-then-else in the IR),
946  * because temporary variables, which are used for passing values between
947  * code blocks, are not lowered, resulting in 32-bit phis in NIR.
948  *
949  * First change the variable types to 16 bits, then change all ir_dereference
950  * types to 16 bits.
951  */
952 class lower_variables_visitor : public ir_rvalue_enter_visitor {
953 public:
lower_variables_visitor(const struct gl_shader_compiler_options * options)954    lower_variables_visitor(const struct gl_shader_compiler_options *options)
955       : options(options) {
956       lower_vars = _mesa_pointer_set_create(NULL);
957    }
958 
~lower_variables_visitor()959    virtual ~lower_variables_visitor()
960    {
961       _mesa_set_destroy(lower_vars, NULL);
962    }
963 
964    lower_variables_visitor(const lower_variables_visitor &) = delete;
965    lower_variables_visitor & operator=(const lower_variables_visitor &) = delete;
966 
967    virtual ir_visitor_status visit(ir_variable *var);
968    virtual ir_visitor_status visit_enter(ir_assignment *ir);
969    virtual ir_visitor_status visit_enter(ir_return *ir);
970    virtual ir_visitor_status visit_enter(ir_call *ir);
971    virtual void handle_rvalue(ir_rvalue **rvalue);
972 
973    void fix_types_in_deref_chain(ir_dereference *ir);
974    void convert_split_assignment(ir_dereference *lhs, ir_rvalue *rhs,
975                                  bool insert_before);
976 
977    const struct gl_shader_compiler_options *options;
978    set *lower_vars;
979 };
980 
981 static void
lower_constant(ir_constant * ir)982 lower_constant(ir_constant *ir)
983 {
984    if (glsl_type_is_array(ir->type)) {
985       for (int i = 0; i < glsl_array_size(ir->type); i++)
986          lower_constant(ir->get_array_element(i));
987 
988       ir->type = lower_glsl_type(ir->type);
989       return;
990    }
991 
992    ir->type = lower_glsl_type(ir->type);
993    ir_constant_data value;
994 
995    if (ir->type->base_type == GLSL_TYPE_FLOAT16) {
996       for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++)
997          value.f16[i] = _mesa_float_to_half(ir->value.f[i]);
998    } else if (ir->type->base_type == GLSL_TYPE_INT16) {
999       for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++)
1000          value.i16[i] = ir->value.i[i];
1001    } else if (ir->type->base_type == GLSL_TYPE_UINT16) {
1002       for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++)
1003          value.u16[i] = ir->value.u[i];
1004    } else {
1005       unreachable("invalid type");
1006    }
1007 
1008    ir->value = value;
1009 }
1010 
1011 ir_visitor_status
visit(ir_variable * var)1012 lower_variables_visitor::visit(ir_variable *var)
1013 {
1014    if ((var->data.mode != ir_var_temporary &&
1015         var->data.mode != ir_var_auto &&
1016         /* Lower uniforms but not UBOs. */
1017         (var->data.mode != ir_var_uniform ||
1018          var->is_in_buffer_block() ||
1019          !(options->LowerPrecisionFloat16Uniforms &&
1020            glsl_without_array(var->type)->base_type == GLSL_TYPE_FLOAT))) ||
1021        !glsl_type_is_32bit(glsl_without_array(var->type)) ||
1022        (var->data.precision != GLSL_PRECISION_MEDIUM &&
1023         var->data.precision != GLSL_PRECISION_LOW) ||
1024        !can_lower_type(options, var->type))
1025       return visit_continue;
1026 
1027    /* Lower constant initializers. */
1028    if (var->constant_value &&
1029        var->type == var->constant_value->type) {
1030       if (!options->LowerPrecisionConstants)
1031          return visit_continue;
1032       var->constant_value =
1033          var->constant_value->clone(ralloc_parent(var), NULL);
1034       lower_constant(var->constant_value);
1035    }
1036 
1037    if (var->constant_initializer &&
1038        var->type == var->constant_initializer->type) {
1039       if (!options->LowerPrecisionConstants)
1040          return visit_continue;
1041       var->constant_initializer =
1042          var->constant_initializer->clone(ralloc_parent(var), NULL);
1043       lower_constant(var->constant_initializer);
1044    }
1045 
1046    var->type = lower_glsl_type(var->type);
1047    _mesa_set_add(lower_vars, var);
1048 
1049    return visit_continue;
1050 }
1051 
1052 void
fix_types_in_deref_chain(ir_dereference * ir)1053 lower_variables_visitor::fix_types_in_deref_chain(ir_dereference *ir)
1054 {
1055    assert(glsl_type_is_32bit(glsl_without_array(ir->type)));
1056    assert(_mesa_set_search(lower_vars, ir->variable_referenced()));
1057 
1058    /* Fix the type in the dereference node. */
1059    ir->type = lower_glsl_type(ir->type);
1060 
1061    /* If it's an array, fix the types in the whole dereference chain. */
1062    for (ir_dereference_array *deref_array = ir->as_dereference_array();
1063         deref_array;
1064         deref_array = deref_array->array->as_dereference_array()) {
1065       assert(glsl_type_is_32bit(glsl_without_array(deref_array->array->type)));
1066       deref_array->array->type = lower_glsl_type(deref_array->array->type);
1067    }
1068 }
1069 
1070 void
convert_split_assignment(ir_dereference * lhs,ir_rvalue * rhs,bool insert_before)1071 lower_variables_visitor::convert_split_assignment(ir_dereference *lhs,
1072                                                   ir_rvalue *rhs,
1073                                                   bool insert_before)
1074 {
1075    void *mem_ctx = ralloc_parent(lhs);
1076 
1077    if (glsl_type_is_array(lhs->type)) {
1078       for (unsigned i = 0; i < lhs->type->length; i++) {
1079          ir_dereference *l, *r;
1080 
1081          l = new(mem_ctx) ir_dereference_array(lhs->clone(mem_ctx, NULL),
1082                                                new(mem_ctx) ir_constant(i));
1083          r = new(mem_ctx) ir_dereference_array(rhs->clone(mem_ctx, NULL),
1084                                                new(mem_ctx) ir_constant(i));
1085          convert_split_assignment(l, r, insert_before);
1086       }
1087       return;
1088    }
1089 
1090    assert(glsl_type_is_16bit(lhs->type) || glsl_type_is_32bit(lhs->type));
1091    assert(glsl_type_is_16bit(rhs->type) || glsl_type_is_32bit(rhs->type));
1092    assert(glsl_type_is_16bit(lhs->type) != glsl_type_is_16bit(rhs->type));
1093 
1094    ir_assignment *assign =
1095       new(mem_ctx) ir_assignment(lhs, convert_precision(glsl_type_is_32bit(lhs->type), rhs));
1096 
1097    if (insert_before)
1098       base_ir->insert_before(assign);
1099    else
1100       base_ir->insert_after(assign);
1101 }
1102 
1103 ir_visitor_status
visit_enter(ir_assignment * ir)1104 lower_variables_visitor::visit_enter(ir_assignment *ir)
1105 {
1106    ir_dereference *lhs = ir->lhs;
1107    ir_variable *var = lhs->variable_referenced();
1108    ir_dereference *rhs_deref = ir->rhs->as_dereference();
1109    ir_variable *rhs_var = rhs_deref ? rhs_deref->variable_referenced() : NULL;
1110    ir_constant *rhs_const = ir->rhs->as_constant();
1111 
1112    /* Legalize array assignments between lowered and non-lowered variables. */
1113    if (glsl_type_is_array(lhs->type) &&
1114        (rhs_var || rhs_const) &&
1115        (!rhs_var ||
1116         (var &&
1117          glsl_type_is_16bit(glsl_without_array(var->type)) !=
1118          glsl_type_is_16bit(glsl_without_array(rhs_var->type)))) &&
1119        (!rhs_const ||
1120         (var &&
1121          glsl_type_is_16bit(glsl_without_array(var->type)) &&
1122          glsl_type_is_32bit(glsl_without_array(rhs_const->type))))) {
1123       assert(glsl_type_is_array(ir->rhs->type));
1124 
1125       /* Fix array assignments from lowered to non-lowered. */
1126       if (rhs_var && _mesa_set_search(lower_vars, rhs_var)) {
1127          fix_types_in_deref_chain(rhs_deref);
1128          /* Convert to 32 bits for LHS. */
1129          convert_split_assignment(lhs, rhs_deref, true);
1130          ir->remove();
1131          return visit_continue;
1132       }
1133 
1134       /* Fix array assignments from non-lowered to lowered. */
1135       if (var &&
1136           _mesa_set_search(lower_vars, var) &&
1137           glsl_type_is_32bit(glsl_without_array(ir->rhs->type))) {
1138          fix_types_in_deref_chain(lhs);
1139          /* Convert to 16 bits for LHS. */
1140          convert_split_assignment(lhs, ir->rhs, true);
1141          ir->remove();
1142          return visit_continue;
1143       }
1144    }
1145 
1146    /* Fix assignment types. */
1147    if (var &&
1148        _mesa_set_search(lower_vars, var)) {
1149       /* Fix the LHS type. */
1150       if (glsl_type_is_32bit(glsl_without_array(lhs->type)))
1151          fix_types_in_deref_chain(lhs);
1152 
1153       /* Fix the RHS type if it's a lowered variable. */
1154       if (rhs_var &&
1155           _mesa_set_search(lower_vars, rhs_var) &&
1156           glsl_type_is_32bit(glsl_without_array(rhs_deref->type)))
1157          fix_types_in_deref_chain(rhs_deref);
1158 
1159       /* Fix the RHS type if it's a non-array expression. */
1160       if (glsl_type_is_32bit(ir->rhs->type)) {
1161          ir_expression *expr = ir->rhs->as_expression();
1162 
1163          /* Convert the RHS to the LHS type. */
1164          if (expr &&
1165              (expr->operation == ir_unop_f162f ||
1166               expr->operation == ir_unop_i2i ||
1167               expr->operation == ir_unop_u2u) &&
1168              glsl_type_is_16bit(expr->operands[0]->type)) {
1169             /* If there is an "up" conversion, just remove it.
1170              * This is optional. We could as well execute the else statement and
1171              * let NIR eliminate the up+down conversions.
1172              */
1173             ir->rhs = expr->operands[0];
1174          } else {
1175             /* Add a "down" conversion operation to fix the type of RHS. */
1176             ir->rhs = convert_precision(false, ir->rhs);
1177          }
1178       }
1179    }
1180 
1181    return ir_rvalue_enter_visitor::visit_enter(ir);
1182 }
1183 
1184 ir_visitor_status
visit_enter(ir_return * ir)1185 lower_variables_visitor::visit_enter(ir_return *ir)
1186 {
1187    void *mem_ctx = ralloc_parent(ir);
1188 
1189    ir_dereference *deref = ir->value ? ir->value->as_dereference() : NULL;
1190    if (deref) {
1191       ir_variable *var = deref->variable_referenced();
1192 
1193       /* Fix the type of the return value. */
1194       if (var &&
1195           _mesa_set_search(lower_vars, var) &&
1196           glsl_type_is_32bit(glsl_without_array(deref->type))) {
1197          /* Create a 32-bit temporary variable. */
1198          ir_variable *new_var =
1199             new(mem_ctx) ir_variable(deref->type, "lowerp", ir_var_temporary);
1200          base_ir->insert_before(new_var);
1201 
1202          /* Fix types in dereferences. */
1203          fix_types_in_deref_chain(deref);
1204 
1205          /* Convert to 32 bits for the return value. */
1206          convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var),
1207                                   deref, true);
1208          ir->value = new(mem_ctx) ir_dereference_variable(new_var);
1209       }
1210    }
1211 
1212    return ir_rvalue_enter_visitor::visit_enter(ir);
1213 }
1214 
handle_rvalue(ir_rvalue ** rvalue)1215 void lower_variables_visitor::handle_rvalue(ir_rvalue **rvalue)
1216 {
1217    ir_rvalue *ir = *rvalue;
1218 
1219    if (in_assignee || ir == NULL)
1220       return;
1221 
1222    ir_expression *expr = ir->as_expression();
1223    ir_dereference *expr_op0_deref = expr ? expr->operands[0]->as_dereference() : NULL;
1224 
1225    /* Remove f2fmp(float16). Same for int16 and uint16. */
1226    if (expr &&
1227        expr_op0_deref &&
1228        (expr->operation == ir_unop_f2fmp ||
1229         expr->operation == ir_unop_i2imp ||
1230         expr->operation == ir_unop_u2ump ||
1231         expr->operation == ir_unop_f2f16 ||
1232         expr->operation == ir_unop_i2i ||
1233         expr->operation == ir_unop_u2u) &&
1234        glsl_type_is_16bit(glsl_without_array(expr->type)) &&
1235        glsl_type_is_32bit(glsl_without_array(expr_op0_deref->type)) &&
1236        expr_op0_deref->variable_referenced() &&
1237        _mesa_set_search(lower_vars, expr_op0_deref->variable_referenced())) {
1238       fix_types_in_deref_chain(expr_op0_deref);
1239 
1240       /* Remove f2fmp/i2imp/u2ump. */
1241       *rvalue = expr_op0_deref;
1242       return;
1243    }
1244 
1245    ir_dereference *deref = ir->as_dereference();
1246 
1247    if (deref) {
1248       ir_variable *var = deref->variable_referenced();
1249 
1250       /* var can be NULL if we are dereferencing ir_constant. */
1251       if (var &&
1252           _mesa_set_search(lower_vars, var) &&
1253           glsl_type_is_32bit(glsl_without_array(deref->type))) {
1254          void *mem_ctx = ralloc_parent(ir);
1255 
1256          /* Create a 32-bit temporary variable. */
1257          ir_variable *new_var =
1258             new(mem_ctx) ir_variable(deref->type, "lowerp", ir_var_temporary);
1259          base_ir->insert_before(new_var);
1260 
1261          /* Fix types in dereferences. */
1262          fix_types_in_deref_chain(deref);
1263 
1264          /* Convert to 32 bits for the rvalue. */
1265          convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var),
1266                                   deref, true);
1267          *rvalue = new(mem_ctx) ir_dereference_variable(new_var);
1268       }
1269    }
1270 }
1271 
1272 ir_visitor_status
visit_enter(ir_call * ir)1273 lower_variables_visitor::visit_enter(ir_call *ir)
1274 {
1275    void *mem_ctx = ralloc_parent(ir);
1276 
1277    /* We can't pass 16-bit variables as 32-bit inout/out parameters. */
1278    foreach_two_lists(formal_node, &ir->callee->parameters,
1279                      actual_node, &ir->actual_parameters) {
1280       ir_dereference *param_deref =
1281          ((ir_rvalue *)actual_node)->as_dereference();
1282       ir_variable *param = (ir_variable *)formal_node;
1283 
1284       if (!param_deref)
1285             continue;
1286 
1287       ir_variable *var = param_deref->variable_referenced();
1288 
1289       /* var can be NULL if we are dereferencing ir_constant. */
1290       if (var &&
1291           _mesa_set_search(lower_vars, var) &&
1292           glsl_type_is_32bit(glsl_without_array(param->type))) {
1293          fix_types_in_deref_chain(param_deref);
1294 
1295          /* Create a 32-bit temporary variable for the parameter. */
1296          ir_variable *new_var =
1297             new(mem_ctx) ir_variable(param->type, "lowerp", ir_var_temporary);
1298          base_ir->insert_before(new_var);
1299 
1300          /* Replace the parameter. */
1301          actual_node->replace_with(new(mem_ctx) ir_dereference_variable(new_var));
1302 
1303          if (param->data.mode == ir_var_function_in ||
1304              param->data.mode == ir_var_function_inout) {
1305             /* Convert to 32 bits for passing in. */
1306             convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var),
1307                                      param_deref->clone(mem_ctx, NULL), true);
1308          }
1309          if (param->data.mode == ir_var_function_out ||
1310              param->data.mode == ir_var_function_inout) {
1311             /* Convert to 16 bits after returning. */
1312             convert_split_assignment(param_deref,
1313                                      new(mem_ctx) ir_dereference_variable(new_var),
1314                                      false);
1315          }
1316       }
1317    }
1318 
1319    /* Fix the type of return value dereferencies. */
1320    ir_dereference_variable *ret_deref = ir->return_deref;
1321    ir_variable *ret_var = ret_deref ? ret_deref->variable_referenced() : NULL;
1322 
1323    if (ret_var &&
1324        _mesa_set_search(lower_vars, ret_var) &&
1325        glsl_type_is_32bit(glsl_without_array(ret_deref->type))) {
1326       /* Create a 32-bit temporary variable. */
1327       ir_variable *new_var =
1328          new(mem_ctx) ir_variable(ir->callee->return_type, "lowerp",
1329                                   ir_var_temporary);
1330       base_ir->insert_before(new_var);
1331 
1332       /* Replace the return variable. */
1333       ret_deref->var = new_var;
1334 
1335       /* Convert to 16 bits after returning. */
1336       convert_split_assignment(new(mem_ctx) ir_dereference_variable(ret_var),
1337                                new(mem_ctx) ir_dereference_variable(new_var),
1338                                false);
1339    }
1340 
1341    return ir_rvalue_enter_visitor::visit_enter(ir);
1342 }
1343 
1344 }
1345 
1346 void
lower_precision(const struct gl_shader_compiler_options * options,exec_list * instructions)1347 lower_precision(const struct gl_shader_compiler_options *options,
1348                 exec_list *instructions)
1349 {
1350    find_precision_visitor v(options);
1351    find_lowerable_rvalues(options, instructions, v.lowerable_rvalues);
1352    visit_list_elements(&v, instructions);
1353 
1354    lower_variables_visitor vars(options);
1355    visit_list_elements(&vars, instructions);
1356 }
1357