1 /*
2 * Copyright (C) 2019 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 /* Midgard has some accelerated support for perspective projection on the
25 * load/store pipes. So the first perspective projection pass looks for
26 * lowered/open-coded perspective projection of the form "fmul (A.xyz,
27 * frcp(A.w))" or "fmul (A.xy, frcp(A.z))" and rewrite with a native
28 * perspective division opcode (on the load/store pipe). Caveats apply: the
29 * frcp should be used only once to make this optimization worthwhile. And the
30 * source of the frcp ought to be a varying to make it worthwhile...
31 *
32 * The second pass in this file is a step #2 of sorts: fusing that load/store
33 * projection into a varying load instruction (they can be done together
34 * implicitly). This depends on the combination pass. Again caveat: the vary
35 * should only be used once to make this worthwhile.
36 */
37
38 #include "compiler.h"
39
40 static bool
is_swizzle_0(unsigned * swizzle)41 is_swizzle_0(unsigned *swizzle)
42 {
43 for (unsigned c = 0; c < MIR_VEC_COMPONENTS; ++c)
44 if (swizzle[c])
45 return false;
46
47 return true;
48 }
49
50 bool
midgard_opt_combine_projection(compiler_context * ctx,midgard_block * block)51 midgard_opt_combine_projection(compiler_context *ctx, midgard_block *block)
52 {
53 bool progress = false;
54
55 mir_foreach_instr_in_block_safe(block, ins) {
56 /* First search for fmul */
57 if (ins->type != TAG_ALU_4)
58 continue;
59 if (ins->op != midgard_alu_op_fmul)
60 continue;
61
62 /* TODO: Flip */
63
64 /* Check the swizzles */
65
66 if (!mir_is_simple_swizzle(ins->swizzle[0], ins->mask))
67 continue;
68 if (!is_swizzle_0(ins->swizzle[1]))
69 continue;
70
71 /* Awesome, we're the right form. Now check where src2 is from */
72 unsigned frcp = ins->src[1];
73 unsigned to = ins->dest;
74
75 if (frcp & PAN_IS_REG)
76 continue;
77 if (to & PAN_IS_REG)
78 continue;
79
80 bool frcp_found = false;
81 unsigned frcp_component = 0;
82 unsigned frcp_from = 0;
83
84 mir_foreach_instr_in_block_safe(block, sub) {
85 if (sub->dest != frcp)
86 continue;
87
88 frcp_component = sub->swizzle[0][0];
89 frcp_from = sub->src[0];
90
91 frcp_found =
92 (sub->type == TAG_ALU_4) && (sub->op == midgard_alu_op_frcp);
93 break;
94 }
95
96 if (!frcp_found)
97 continue;
98 if (frcp_from != ins->src[0])
99 continue;
100 if (frcp_component != COMPONENT_W && frcp_component != COMPONENT_Z)
101 continue;
102 if (!mir_single_use(ctx, frcp))
103 continue;
104
105 /* Heuristic: check if the frcp is from a single-use varying */
106
107 bool ok = false;
108
109 /* One for frcp and one for fmul */
110 if (mir_use_count(ctx, frcp_from) > 2)
111 continue;
112
113 mir_foreach_instr_in_block_safe(block, v) {
114 if (v->dest != frcp_from)
115 continue;
116 if (v->type != TAG_LOAD_STORE_4)
117 break;
118 if (!OP_IS_LOAD_VARY_F(v->op))
119 break;
120
121 ok = true;
122 break;
123 }
124
125 if (!ok)
126 continue;
127
128 /* Nice, we got the form spot on. Let's convert! */
129
130 midgard_instruction accel = {
131 .type = TAG_LOAD_STORE_4,
132 .mask = ins->mask,
133 .dest = to,
134 .dest_type = nir_type_float32,
135 .src =
136 {
137 frcp_from,
138 ~0,
139 ~0,
140 ~0,
141 },
142 .src_types =
143 {
144 nir_type_float32,
145 },
146 .swizzle = SWIZZLE_IDENTITY_4,
147 .op = frcp_component == COMPONENT_W
148 ? midgard_op_ldst_perspective_div_w
149 : midgard_op_ldst_perspective_div_z,
150 .load_store =
151 {
152 .bitsize_toggle = true,
153 },
154 };
155
156 mir_insert_instruction_before(ctx, ins, accel);
157 mir_remove_instruction(ins);
158
159 progress |= true;
160 }
161
162 return progress;
163 }
164
165 bool
midgard_opt_varying_projection(compiler_context * ctx,midgard_block * block)166 midgard_opt_varying_projection(compiler_context *ctx, midgard_block *block)
167 {
168 bool progress = false;
169
170 mir_foreach_instr_in_block_safe(block, ins) {
171 /* Search for a projection */
172 if (ins->type != TAG_LOAD_STORE_4)
173 continue;
174 if (!OP_IS_PROJECTION(ins->op))
175 continue;
176
177 unsigned vary = ins->src[0];
178 unsigned to = ins->dest;
179
180 if (vary & PAN_IS_REG)
181 continue;
182 if (to & PAN_IS_REG)
183 continue;
184 if (!mir_single_use(ctx, vary))
185 continue;
186
187 /* Check for a varying source. If we find it, we rewrite */
188
189 bool rewritten = false;
190
191 mir_foreach_instr_in_block_safe(block, v) {
192 if (v->dest != vary)
193 continue;
194 if (v->type != TAG_LOAD_STORE_4)
195 break;
196 if (!OP_IS_LOAD_VARY_F(v->op))
197 break;
198
199 /* We found it, so rewrite it to project. Grab the
200 * modifier */
201
202 midgard_varying_params p =
203 midgard_unpack_varying_params(v->load_store);
204
205 if (p.modifier != midgard_varying_mod_none)
206 break;
207
208 bool projects_w = ins->op == midgard_op_ldst_perspective_div_w;
209
210 p.modifier = projects_w ? midgard_varying_mod_perspective_w
211 : midgard_varying_mod_perspective_z;
212
213 midgard_pack_varying_params(&v->load_store, p);
214
215 /* Use the new destination */
216 v->dest = to;
217
218 rewritten = true;
219 break;
220 }
221
222 if (rewritten)
223 mir_remove_instruction(ins);
224
225 progress |= rewritten;
226 }
227
228 return progress;
229 }
230