1# SPDX-License-Identifier: MIT 2# Copyright 2019 Vasily Khoruzhick <[email protected]> 3# Copyright 2021 Pavel Ondračka 4 5import argparse 6import sys 7from math import pi 8 9# Convenience variables 10a = 'a' 11b = 'b' 12c = 'c' 13d = 'd' 14e = 'e' 15 16# Transform input to range [-PI, PI]: 17# 18# y = frac(x / 2PI + 0.5) * 2PI - PI 19# 20transform_trig_input_vs_r500 = [ 21 (('fsin', 'a(needs_vs_trig_input_fixup)'), ('fsin', ('fadd', ('fmul', ('ffract', ('fadd', ('fmul', 'a', 1 / (2 * pi)) , 0.5)), 2 * pi), -pi))), 22 (('fcos', 'a(needs_vs_trig_input_fixup)'), ('fcos', ('fadd', ('fmul', ('ffract', ('fadd', ('fmul', 'a', 1 / (2 * pi)) , 0.5)), 2 * pi), -pi))), 23] 24 25# Transform input to range [-PI, PI]: 26# 27# y = frac(x / 2PI) 28# 29transform_trig_input_fs_r500 = [ 30 (('fsin', 'a(needs_fs_trig_input_fixup)'), ('fsin', ('ffract', ('fmul', 'a', 1 / (2 * pi))))), 31 (('fcos', 'a(needs_fs_trig_input_fixup)'), ('fcos', ('ffract', ('fmul', 'a', 1 / (2 * pi))))), 32] 33 34# The is a pattern produced by wined3d for A0 register load. 35# The specific pattern wined3d emits looks like this 36# A0.x = (int(floor(abs(R0.x) + 0.5) * sign(R0.x))); 37# however we lower both sign and floor so here we check for the already lowered 38# sequence. 39r300_nir_fuse_fround_d3d9 = [ 40 (('fmul', ('fadd', ('fadd', ('fabs', 'a') , 0.5), 41 ('fneg', ('ffract', ('fadd', ('fabs', 'a') , 0.5)))), 42 ('fadd', ('b2f', ('!flt', 0.0, 'a')), 43 ('fneg', ('b2f', ('!flt', 'a', 0.0))))), 44 ('fround_even', 'a')) 45] 46 47# Here are some specific optimizations for code reordering such that the backend 48# has easier task of recognizing output modifiers and presubtract patterns. 49r300_nir_prepare_presubtract = [ 50 # Backend can only recognize 1 - x pattern. 51 (('fadd', ('fneg', a), 1.0), ('fadd', 1.0, ('fneg', a))), 52 (('fadd', a, -1.0), ('fneg', ('fadd', 1.0, ('fneg', a)))), 53 (('fadd', -1.0, a), ('fneg', ('fadd', 1.0, ('fneg', a)))), 54 # Bias presubtract 1 - 2 * x expects MAD -a 2.0 1.0 form. 55 (('ffma', 2.0, ('fneg', a), 1.0), ('ffma', ('fneg', a), 2.0, 1.0)), 56 (('ffma', a, -2.0, 1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))), 57 (('ffma', -2.0, a, 1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))), 58 (('ffma', 2.0, a, -1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))), 59 (('ffma', a, 2.0, -1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))), 60 # x * 2 can be usually folded into output modifier for the previous 61 # instruction, but that only works if x is a temporary. If it is input or 62 # constant just convert it to add instead. 63 (('fmul', 'a(is_ubo_or_input)', 2.0), ('fadd', a, a)), 64] 65 66for multiplier in [2.0, 4.0, 8.0, 16.0, 0.5, 0.25, 0.125, 0.0625]: 67 r300_nir_prepare_presubtract.extend([ 68 (('fmul', a, ('fmul(is_used_once)', 'b(is_ubo_or_input)', multiplier)), ('fmul', multiplier, ('fmul', a, b))), 69]) 70 71r300_nir_opt_algebraic_late = [ 72 # Previous prepare_presubtract pass can sometimes produce double fneg patterns. 73 # The backend copy propagate could handle it, but the nir to tgsi translation 74 # does not and blows up. Clean this up. 75 (('fneg', ('fneg', a)), a), 76 (('fabs', ('fneg', a)), ('fabs', a)), 77 # Some cleanups after comparison lowering if one of the operands is 0. 78 (('fadd', a, 0.0), a), 79 (('fadd', a, ('fneg', 0.0)), a) 80] 81 82# This is very late flrp lowering to clean up after bcsel->fcsel->flrp. 83r300_nir_lower_flrp = [ 84 (('flrp', a, b, c), ('ffma', b, c, ('ffma', ('fneg', a), c, a))) 85] 86 87# Lower fcsel_ge from ftrunc on r300 88r300_nir_lower_fcsel_r300 = [ 89 (('fcsel_ge', a, b, c), ('flrp', c, b, ('sge', a, 0.0))) 90] 91 92# Fragment shaders have no comparison opcodes. However, we can encode the comparison 93# in the aluresults operation, which is than used by next if. So if the comparison result 94# is used only in a single if, we can handle it just fine on R500. 95r300_nir_lower_comparison_fs = [ 96 (('seq(is_not_used_in_single_if)', 'a@32', 'b@32'), 97 ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), 1.0, 0.0)), 98 (('sne(is_not_used_in_single_if)', 'a@32', 'b@32'), 99 ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), 0.0, 1.0)), 100 (('slt(is_not_used_in_single_if)', 'a@32', 'b@32'), 101 ('fcsel_ge', ('fadd', a, ('fneg', b)), 0.0, 1.0)), 102 (('sge(is_not_used_in_single_if)', 'a@32', 'b@32'), 103 ('fcsel_ge', ('fadd', a, ('fneg', b)), 1.0, 0.0)), 104] 105 106r300_nir_post_integer_lowering = [ 107 # If ffloor result is used only for indirect constant load, we can get rid of it 108 # completely as ntt emits ARL by default which already does the flooring. 109 # This actually checks for the lowered ffloor(a) = a - ffract(a) patterns. 110 (('fadd(is_only_used_by_load_ubo_vec4)', a, ('fneg', ('ffract', a))), a), 111 # This is a D3D9 pattern from Wine when shader wants ffloor instead of fround on register load. 112 (('fround_even(is_only_used_by_load_ubo_vec4)', ('fadd', a, ('fneg', ('ffract', a)))), a), 113 # Lower ftrunc 114 (('ftrunc', 'a@32'), ('fcsel_ge', a, ('fadd', ('fabs', a), ('fneg', ('ffract', ('fabs', a)))), 115 ('fneg', ('fadd', ('fabs', a), ('fneg', ('ffract', ('fabs', a))))))) 116] 117 118def main(): 119 parser = argparse.ArgumentParser() 120 parser.add_argument('-p', '--import-path', required=True) 121 parser.add_argument('output') 122 args = parser.parse_args() 123 sys.path.insert(0, args.import_path) 124 125 import nir_algebraic # pylint: disable=import-error 126 ignore_exact = nir_algebraic.ignore_exact 127 128 r300_nir_lower_bool_to_float = [ 129 (('bcsel@32(is_only_used_as_float)', ignore_exact('feq', 'a@32', 'b@32'), c, d), 130 ('fadd', ('fmul', c, ('seq', a, b)), ('fsub', d, ('fmul', d, ('seq', a, b)))), 131 "!options->has_fused_comp_and_csel"), 132 (('bcsel@32(is_only_used_as_float)', ignore_exact('fneu', 'a@32', 'b@32'), c, d), 133 ('fadd', ('fmul', c, ('sne', a, b)), ('fsub', d, ('fmul', d, ('sne', a, b)))), 134 "!options->has_fused_comp_and_csel"), 135 (('bcsel@32(is_only_used_as_float)', ignore_exact('flt', 'a@32', 'b@32'), c, d), 136 ('fadd', ('fmul', c, ('slt', a, b)), ('fsub', d, ('fmul', d, ('slt', a, b)))), 137 "!options->has_fused_comp_and_csel"), 138 (('bcsel@32(is_only_used_as_float)', ignore_exact('fge', 'a@32', 'b@32'), c, d), 139 ('fadd', ('fmul', c, ('sge', a, b)), ('fsub', d, ('fmul', d, ('sge', a, b)))), 140 "!options->has_fused_comp_and_csel"), 141 (('bcsel@32(is_only_used_as_float)', ('feq', 'a@32', 'b@32'), c, d), 142 ('fcsel', ('seq', a, b), c, d), "options->has_fused_comp_and_csel"), 143 (('bcsel@32(is_only_used_as_float)', ('fneu', 'a@32', 'b@32'), c, d), 144 ('fcsel', ('sne', a, b), c, d), "options->has_fused_comp_and_csel"), 145 (('bcsel@32(is_only_used_as_float)', ('flt', 'a@32', 'b@32'), c, d), 146 ('fcsel', ('slt', a, b), c, d), "options->has_fused_comp_and_csel"), 147 (('bcsel@32(is_only_used_as_float)', ('fge', 'a@32', 'b@32'), c, d), 148 ('fcsel', ('sge', a, b), c, d), "options->has_fused_comp_and_csel"), 149 ] 150 151 r300_nir_lower_bool_to_float_fs = [ 152 (('bcsel@32(r300_is_only_used_as_float)', ignore_exact('feq', 'a@32', 'b@32'), c, d), 153 ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), c, d)), 154 (('bcsel@32(r300_is_only_used_as_float)', ignore_exact('fneu', 'a@32', 'b@32'), c, d), 155 ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), d, c)), 156 (('bcsel@32(r300_is_only_used_as_float)', ignore_exact('flt', 'a@32', 'b@32'), c, d), 157 ('fcsel_ge', ('fadd', a, ('fneg', b)), d, c)), 158 (('bcsel@32(r300_is_only_used_as_float)', ignore_exact('fge', 'a@32', 'b@32'), c, d), 159 ('fcsel_ge', ('fadd', a, ('fneg', b)), c, d)), 160 (('b2f32', ('feq', 'a@32', 'b@32')), 161 ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), 1.0, 0.0)), 162 (('b2f32', ('fneu', 'a@32', 'b@32')), 163 ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), 0.0, 1.0)), 164 (('b2f32', ('flt', 'a@32', 'b@32')), 165 ('fcsel_ge', ('fadd', a, ('fneg', b)), 0.0, 1.0)), 166 (('b2f32', ('fge', 'a@32', 'b@32')), 167 ('fcsel_ge', ('fadd', a, ('fneg', b)), 1.0, 0.0)), 168 ] 169 170 with open(args.output, 'w') as f: 171 f.write('#include "compiler/r300_nir.h"') 172 173 f.write(nir_algebraic.AlgebraicPass("r300_transform_vs_trig_input", 174 transform_trig_input_vs_r500).render()) 175 176 f.write(nir_algebraic.AlgebraicPass("r300_transform_fs_trig_input", 177 transform_trig_input_fs_r500).render()) 178 179 f.write(nir_algebraic.AlgebraicPass("r300_nir_fuse_fround_d3d9", 180 r300_nir_fuse_fround_d3d9).render()) 181 182 f.write(nir_algebraic.AlgebraicPass("r300_nir_lower_bool_to_float", 183 r300_nir_lower_bool_to_float).render()) 184 185 f.write(nir_algebraic.AlgebraicPass("r300_nir_lower_bool_to_float_fs", 186 r300_nir_lower_bool_to_float_fs).render()) 187 188 f.write(nir_algebraic.AlgebraicPass("r300_nir_prepare_presubtract", 189 r300_nir_prepare_presubtract).render()) 190 191 f.write(nir_algebraic.AlgebraicPass("r300_nir_opt_algebraic_late", 192 r300_nir_opt_algebraic_late).render()) 193 194 f.write(nir_algebraic.AlgebraicPass("r300_nir_post_integer_lowering", 195 r300_nir_post_integer_lowering).render()) 196 197 f.write(nir_algebraic.AlgebraicPass("r300_nir_lower_flrp", 198 r300_nir_lower_flrp).render()) 199 200 f.write(nir_algebraic.AlgebraicPass("r300_nir_lower_fcsel_r300", 201 r300_nir_lower_fcsel_r300).render()) 202 203 f.write(nir_algebraic.AlgebraicPass("r300_nir_lower_comparison_fs", 204 r300_nir_lower_comparison_fs).render()) 205 206if __name__ == '__main__': 207 main() 208