xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1# SPDX-License-Identifier: MIT
2# Copyright 2019 Vasily Khoruzhick <[email protected]>
3# Copyright 2021 Pavel Ondračka
4
5import argparse
6import sys
7from math import pi
8
9# Convenience variables
10a = 'a'
11b = 'b'
12c = 'c'
13d = 'd'
14e = 'e'
15
16# Transform input to range [-PI, PI]:
17#
18# y = frac(x / 2PI + 0.5) * 2PI - PI
19#
20transform_trig_input_vs_r500 = [
21        (('fsin', 'a(needs_vs_trig_input_fixup)'), ('fsin', ('fadd', ('fmul', ('ffract', ('fadd', ('fmul', 'a', 1 / (2 * pi)) , 0.5)), 2 * pi), -pi))),
22        (('fcos', 'a(needs_vs_trig_input_fixup)'), ('fcos', ('fadd', ('fmul', ('ffract', ('fadd', ('fmul', 'a', 1 / (2 * pi)) , 0.5)), 2 * pi), -pi))),
23]
24
25# Transform input to range [-PI, PI]:
26#
27# y = frac(x / 2PI)
28#
29transform_trig_input_fs_r500 = [
30        (('fsin', 'a(needs_fs_trig_input_fixup)'), ('fsin', ('ffract', ('fmul', 'a', 1 / (2 * pi))))),
31        (('fcos', 'a(needs_fs_trig_input_fixup)'), ('fcos', ('ffract', ('fmul', 'a', 1 / (2 * pi))))),
32]
33
34# The is a pattern produced by wined3d for A0 register load.
35# The specific pattern wined3d emits looks like this
36# A0.x = (int(floor(abs(R0.x) + 0.5) * sign(R0.x)));
37# however we lower both sign and floor so here we check for the already lowered
38# sequence.
39r300_nir_fuse_fround_d3d9 = [
40        (('fmul', ('fadd', ('fadd', ('fabs', 'a') , 0.5),
41                           ('fneg', ('ffract', ('fadd', ('fabs', 'a') , 0.5)))),
42                  ('fadd', ('b2f', ('!flt', 0.0, 'a')),
43                           ('fneg', ('b2f', ('!flt', 'a', 0.0))))),
44         ('fround_even', 'a'))
45]
46
47# Here are some specific optimizations for code reordering such that the backend
48# has easier task of recognizing output modifiers and presubtract patterns.
49r300_nir_prepare_presubtract = [
50        # Backend can only recognize 1 - x pattern.
51        (('fadd', ('fneg', a), 1.0), ('fadd', 1.0, ('fneg', a))),
52        (('fadd', a, -1.0), ('fneg', ('fadd', 1.0, ('fneg', a)))),
53        (('fadd', -1.0, a), ('fneg', ('fadd', 1.0, ('fneg', a)))),
54        # Bias presubtract 1 - 2 * x expects MAD -a 2.0 1.0 form.
55        (('ffma', 2.0, ('fneg', a), 1.0), ('ffma', ('fneg', a), 2.0, 1.0)),
56        (('ffma', a, -2.0, 1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))),
57        (('ffma', -2.0, a, 1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))),
58        (('ffma', 2.0, a, -1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))),
59        (('ffma', a, 2.0, -1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))),
60        # x * 2 can be usually folded into output modifier for the previous
61        # instruction, but that only works if x is a temporary. If it is input or
62        # constant just convert it to add instead.
63        (('fmul', 'a(is_ubo_or_input)', 2.0), ('fadd', a, a)),
64]
65
66for multiplier in [2.0, 4.0, 8.0, 16.0, 0.5, 0.25, 0.125, 0.0625]:
67    r300_nir_prepare_presubtract.extend([
68        (('fmul', a, ('fmul(is_used_once)', 'b(is_ubo_or_input)', multiplier)), ('fmul', multiplier, ('fmul', a, b))),
69])
70
71r300_nir_opt_algebraic_late = [
72        # Previous prepare_presubtract pass can sometimes produce double fneg patterns.
73        # The backend copy propagate could handle it, but the nir to tgsi translation
74        # does not and blows up. Clean this up.
75        (('fneg', ('fneg', a)), a),
76        (('fabs', ('fneg', a)), ('fabs', a)),
77        # Some cleanups after comparison lowering if one of the operands is 0.
78        (('fadd', a, 0.0), a),
79        (('fadd', a, ('fneg', 0.0)), a)
80]
81
82# This is very late flrp lowering to clean up after bcsel->fcsel->flrp.
83r300_nir_lower_flrp = [
84        (('flrp', a, b, c), ('ffma', b, c, ('ffma', ('fneg', a), c, a)))
85]
86
87# Lower fcsel_ge from ftrunc on r300
88r300_nir_lower_fcsel_r300 = [
89        (('fcsel_ge', a, b, c), ('flrp', c, b, ('sge', a, 0.0)))
90]
91
92# Fragment shaders have no comparison opcodes. However, we can encode the comparison
93# in the aluresults operation, which is than used by next if. So if the comparison result
94# is used only in a single if, we can handle it just fine on R500.
95r300_nir_lower_comparison_fs = [
96        (('seq(is_not_used_in_single_if)', 'a@32', 'b@32'),
97            ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), 1.0, 0.0)),
98        (('sne(is_not_used_in_single_if)', 'a@32', 'b@32'),
99            ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), 0.0, 1.0)),
100        (('slt(is_not_used_in_single_if)', 'a@32', 'b@32'),
101            ('fcsel_ge', ('fadd', a, ('fneg', b)), 0.0, 1.0)),
102        (('sge(is_not_used_in_single_if)', 'a@32', 'b@32'),
103            ('fcsel_ge', ('fadd', a, ('fneg', b)), 1.0, 0.0)),
104]
105
106r300_nir_post_integer_lowering = [
107        # If ffloor result is used only for indirect constant load, we can get rid of it
108        # completely as ntt emits ARL by default which already does the flooring.
109        # This actually checks for the lowered ffloor(a) = a - ffract(a) patterns.
110        (('fadd(is_only_used_by_load_ubo_vec4)', a, ('fneg', ('ffract', a))), a),
111        # This is a D3D9 pattern from Wine when shader wants ffloor instead of fround on register load.
112        (('fround_even(is_only_used_by_load_ubo_vec4)', ('fadd', a, ('fneg', ('ffract', a)))), a),
113        # Lower ftrunc
114        (('ftrunc', 'a@32'), ('fcsel_ge', a, ('fadd', ('fabs', a), ('fneg', ('ffract', ('fabs', a)))),
115                                     ('fneg', ('fadd', ('fabs', a), ('fneg', ('ffract', ('fabs', a)))))))
116]
117
118def main():
119    parser = argparse.ArgumentParser()
120    parser.add_argument('-p', '--import-path', required=True)
121    parser.add_argument('output')
122    args = parser.parse_args()
123    sys.path.insert(0, args.import_path)
124
125    import nir_algebraic  # pylint: disable=import-error
126    ignore_exact = nir_algebraic.ignore_exact
127
128    r300_nir_lower_bool_to_float = [
129        (('bcsel@32(is_only_used_as_float)', ignore_exact('feq', 'a@32', 'b@32'), c, d),
130             ('fadd', ('fmul', c, ('seq', a, b)), ('fsub', d, ('fmul', d, ('seq', a, b)))),
131             "!options->has_fused_comp_and_csel"),
132        (('bcsel@32(is_only_used_as_float)', ignore_exact('fneu', 'a@32', 'b@32'), c, d),
133             ('fadd', ('fmul', c, ('sne', a, b)), ('fsub', d, ('fmul', d, ('sne', a, b)))),
134          "!options->has_fused_comp_and_csel"),
135        (('bcsel@32(is_only_used_as_float)', ignore_exact('flt', 'a@32', 'b@32'), c, d),
136             ('fadd', ('fmul', c, ('slt', a, b)), ('fsub', d, ('fmul', d, ('slt', a, b)))),
137          "!options->has_fused_comp_and_csel"),
138        (('bcsel@32(is_only_used_as_float)', ignore_exact('fge', 'a@32', 'b@32'), c, d),
139             ('fadd', ('fmul', c, ('sge', a, b)), ('fsub', d, ('fmul', d, ('sge', a, b)))),
140          "!options->has_fused_comp_and_csel"),
141        (('bcsel@32(is_only_used_as_float)', ('feq', 'a@32', 'b@32'), c, d),
142             ('fcsel', ('seq', a, b), c, d), "options->has_fused_comp_and_csel"),
143        (('bcsel@32(is_only_used_as_float)', ('fneu', 'a@32', 'b@32'), c, d),
144             ('fcsel', ('sne', a, b), c, d), "options->has_fused_comp_and_csel"),
145        (('bcsel@32(is_only_used_as_float)', ('flt', 'a@32', 'b@32'), c, d),
146             ('fcsel', ('slt', a, b), c, d), "options->has_fused_comp_and_csel"),
147        (('bcsel@32(is_only_used_as_float)', ('fge', 'a@32', 'b@32'), c, d),
148             ('fcsel', ('sge', a, b), c, d), "options->has_fused_comp_and_csel"),
149    ]
150
151    r300_nir_lower_bool_to_float_fs = [
152        (('bcsel@32(r300_is_only_used_as_float)', ignore_exact('feq', 'a@32', 'b@32'), c, d),
153             ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), c, d)),
154        (('bcsel@32(r300_is_only_used_as_float)', ignore_exact('fneu', 'a@32', 'b@32'), c, d),
155             ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), d, c)),
156        (('bcsel@32(r300_is_only_used_as_float)', ignore_exact('flt', 'a@32', 'b@32'), c, d),
157             ('fcsel_ge', ('fadd', a, ('fneg', b)), d, c)),
158        (('bcsel@32(r300_is_only_used_as_float)', ignore_exact('fge', 'a@32', 'b@32'), c, d),
159             ('fcsel_ge', ('fadd', a, ('fneg', b)), c, d)),
160        (('b2f32', ('feq', 'a@32', 'b@32')),
161             ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), 1.0, 0.0)),
162        (('b2f32', ('fneu', 'a@32', 'b@32')),
163             ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), 0.0, 1.0)),
164        (('b2f32', ('flt', 'a@32', 'b@32')),
165             ('fcsel_ge', ('fadd', a, ('fneg', b)), 0.0, 1.0)),
166        (('b2f32', ('fge', 'a@32', 'b@32')),
167             ('fcsel_ge', ('fadd', a, ('fneg', b)), 1.0, 0.0)),
168    ]
169
170    with open(args.output, 'w') as f:
171        f.write('#include "compiler/r300_nir.h"')
172
173        f.write(nir_algebraic.AlgebraicPass("r300_transform_vs_trig_input",
174                                            transform_trig_input_vs_r500).render())
175
176        f.write(nir_algebraic.AlgebraicPass("r300_transform_fs_trig_input",
177                                            transform_trig_input_fs_r500).render())
178
179        f.write(nir_algebraic.AlgebraicPass("r300_nir_fuse_fround_d3d9",
180                                            r300_nir_fuse_fround_d3d9).render())
181
182        f.write(nir_algebraic.AlgebraicPass("r300_nir_lower_bool_to_float",
183                                            r300_nir_lower_bool_to_float).render())
184
185        f.write(nir_algebraic.AlgebraicPass("r300_nir_lower_bool_to_float_fs",
186                                            r300_nir_lower_bool_to_float_fs).render())
187
188        f.write(nir_algebraic.AlgebraicPass("r300_nir_prepare_presubtract",
189                                            r300_nir_prepare_presubtract).render())
190
191        f.write(nir_algebraic.AlgebraicPass("r300_nir_opt_algebraic_late",
192                                            r300_nir_opt_algebraic_late).render())
193
194        f.write(nir_algebraic.AlgebraicPass("r300_nir_post_integer_lowering",
195                                            r300_nir_post_integer_lowering).render())
196
197        f.write(nir_algebraic.AlgebraicPass("r300_nir_lower_flrp",
198                                            r300_nir_lower_flrp).render())
199
200        f.write(nir_algebraic.AlgebraicPass("r300_nir_lower_fcsel_r300",
201                                            r300_nir_lower_fcsel_r300).render())
202
203        f.write(nir_algebraic.AlgebraicPass("r300_nir_lower_comparison_fs",
204                                            r300_nir_lower_comparison_fs).render())
205
206if __name__ == '__main__':
207    main()
208