1 /*
2 * Copyright © 2010 Intel Corporation
3 * Copyright © 2024 Valve Corporation
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * Implements the GLSL 1.30 revision 9 rule for fragment shader
27 * discard handling:
28 *
29 * "Control flow exits the shader, and subsequent implicit or
30 * explicit derivatives are undefined when this control flow is
31 * non-uniform (meaning different fragments within the primitive
32 * take different control paths)."
33 *
34 * There seem to be two conflicting things here. "Control flow exits
35 * the shader" sounds like the discarded fragments should effectively
36 * jump to the end of the shader, but that breaks derivatives in the
37 * case of uniform control flow and causes rendering failure in the
38 * bushes in Unigine Tropics.
39 *
40 * The question, then, is whether the intent was "loops stop at the
41 * point that the only active channels left are discarded pixels" or
42 * "discarded pixels become inactive at the point that control flow
43 * returns to the top of a loop". This implements the second
44 * interpretation.
45 */
46
47 #include "compiler/glsl_types.h"
48 #include "nir.h"
49 #include "nir_builder.h"
50 #include "gl_nir.h"
51
52 static void
set_discard_global(nir_builder * b,nir_variable * discarded,nir_intrinsic_instr * intrin)53 set_discard_global(nir_builder *b, nir_variable *discarded,
54 nir_intrinsic_instr *intrin)
55 {
56 nir_deref_instr *lhs = nir_build_deref_var(b, discarded);
57 nir_def *rhs;
58 if (intrin->intrinsic == nir_intrinsic_terminate_if ||
59 intrin->intrinsic == nir_intrinsic_demote_if) {
60 /* discarded <- condition, use discarded as the condition */
61 rhs = intrin->src[0].ssa;
62 nir_src_rewrite(&intrin->src[0], &lhs->def);
63 } else {
64 rhs = nir_imm_bool(b, true);
65 }
66
67 nir_store_deref(b, lhs, rhs, ~0);
68 }
69
70 static void
generate_discard_break(nir_builder * b,nir_variable * discarded)71 generate_discard_break(nir_builder *b, nir_variable *discarded)
72 {
73 nir_deref_instr *condition = nir_build_deref_var(b, discarded);
74 nir_if *nif = nir_push_if(b, nir_load_deref(b, condition));
75 nir_jump(b, nir_jump_break);
76 nir_pop_if(b, nif);
77 }
78
79 static void
lower_discard_flow(nir_builder * b,nir_cf_node * cf_node,nir_variable * discarded)80 lower_discard_flow(nir_builder *b, nir_cf_node *cf_node,
81 nir_variable *discarded)
82 {
83 switch (cf_node->type) {
84 case nir_cf_node_block: {
85 nir_block *block = nir_cf_node_as_block(cf_node);
86 nir_foreach_instr(instr, block) {
87 if (instr->type == nir_instr_type_jump) {
88 nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
89 if (jump_instr->type == nir_jump_continue) {
90 b->cursor = nir_before_instr(instr);
91 generate_discard_break(b, discarded);
92 }
93 } else if (instr->type == nir_instr_type_intrinsic) {
94 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
95 if (intrin->intrinsic == nir_intrinsic_terminate_if ||
96 intrin->intrinsic == nir_intrinsic_terminate ||
97 intrin->intrinsic == nir_intrinsic_demote_if ||
98 intrin->intrinsic == nir_intrinsic_demote) {
99 b->cursor = nir_before_instr(instr);
100 set_discard_global(b, discarded, intrin);
101 }
102 }
103 }
104 return;
105 }
106 case nir_cf_node_if: {
107 nir_if *if_stmt = nir_cf_node_as_if(cf_node);
108 foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list)
109 lower_discard_flow(b, nested_node, discarded);
110 foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list)
111 lower_discard_flow(b, nested_node, discarded);
112 return;
113 }
114 case nir_cf_node_loop: {
115 nir_loop *loop = nir_cf_node_as_loop(cf_node);
116 assert(!nir_loop_has_continue_construct(loop));
117
118 /* Insert discard break at the end of the loop body */
119 nir_block *last_block = nir_loop_last_block(loop);
120 nir_instr *last_instr = nir_block_last_instr(last_block);
121 if (last_instr == NULL || last_instr->type != nir_instr_type_jump) {
122 b->cursor = nir_after_block(last_block);
123 generate_discard_break(b, discarded);
124 }
125
126 foreach_list_typed(nir_cf_node, nested_node, node, &loop->body)
127 lower_discard_flow(b, nested_node, discarded);
128 return;
129 }
130 default:
131 unreachable("unknown cf node type");
132 }
133 }
134
135 void
gl_nir_lower_discard_flow(nir_shader * shader)136 gl_nir_lower_discard_flow(nir_shader *shader)
137 {
138 nir_function_impl *main = nir_shader_get_entrypoint(shader);
139
140 nir_variable *discarded = rzalloc(shader, nir_variable);
141 discarded->name = ralloc_strdup(discarded, "discarded");
142 discarded->type = glsl_bool_type();
143 discarded->data.mode = nir_var_shader_temp;
144
145 nir_shader_add_variable(shader, discarded);
146
147 nir_foreach_function_impl(impl, shader) {
148 nir_builder b = nir_builder_at(nir_before_impl(impl));
149
150 if (impl == main) {
151 nir_deref_instr *deref = nir_build_deref_var(&b, discarded);
152 nir_store_deref(&b, deref, nir_imm_bool(&b, false), ~0);
153 }
154
155 foreach_list_typed(nir_cf_node, cf_node, node, &impl->body) {
156 lower_discard_flow(&b, cf_node, discarded);
157 }
158 }
159 }
160