1# 2# Copyright (C) 2018 Alyssa Rosenzweig 3# Copyright (C) 2019-2020 Collabora, Ltd. 4# 5# Copyright (C) 2016 Intel Corporation 6# 7# Permission is hereby granted, free of charge, to any person obtaining a 8# copy of this software and associated documentation files (the "Software"), 9# to deal in the Software without restriction, including without limitation 10# the rights to use, copy, modify, merge, publish, distribute, sublicense, 11# and/or sell copies of the Software, and to permit persons to whom the 12# Software is furnished to do so, subject to the following conditions: 13# 14# The above copyright notice and this permission notice (including the next 15# paragraph) shall be included in all copies or substantial portions of the 16# Software. 17# 18# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 24# IN THE SOFTWARE. 25 26import argparse 27import sys 28import math 29 30a = 'a' 31b = 'b' 32c = 'c' 33 34algebraic = [ 35 # Allows us to schedule as a multiply by 2 36 (('~fadd', ('fadd', a, b), a), ('fadd', ('fadd', a, a), b)), 37 38 # Midgard scales fsin/fcos arguments by pi. 39 (('fsin', a), ('fsin_mdg', ('fdiv', a, math.pi))), 40 (('fcos', a), ('fcos_mdg', ('fdiv', a, math.pi))), 41] 42 43algebraic_late = [ 44 # Likewise we want fsub lowered but not isub 45 (('fsub', a, b), ('fadd', a, ('fneg', b))), 46 47 # These two special-cases save space/an op than the actual csel op + 48 # scheduler flexibility 49 50 (('b32csel', a, 'b@32', 0), ('iand', a, b)), 51 (('b32csel', a, 0, 'b@32'), ('iand', ('inot', a), b)), 52 53 # Fuse sat_signed. This should probably be shared with Bifrost 54 (('~fmin', ('fmax', a, -1.0), 1.0), ('fsat_signed_mali', a)), 55 (('~fmax', ('fmin', a, 1.0), -1.0), ('fsat_signed_mali', a)), 56 57 # Fuse clamp_positive. This should probably be shared with Utgard/bifrost 58 (('fmax', a, 0.0), ('fclamp_pos_mali', a)), 59 60 (('ishl', 'a@16', b), ('u2u16', ('ishl', ('u2u32', a), b))), 61 (('ishr', 'a@16', b), ('i2i16', ('ishr', ('i2i32', a), b))), 62 (('ushr', 'a@16', b), ('u2u16', ('ushr', ('u2u32', a), b))), 63 64 (('ishl', 'a@8', b), ('u2u8', ('u2u16', ('ishl', ('u2u32', ('u2u16', a)), b)))), 65 (('ishr', 'a@8', b), ('i2i8', ('i2i16', ('ishr', ('i2i32', ('i2i16', a)), b)))), 66 (('ushr', 'a@8', b), ('u2u8', ('u2u16', ('ushr', ('u2u32', ('u2u16', a)), b)))), 67 68 # Canonical form. The scheduler will convert back if it makes sense. 69 (('fmul', a, 2.0), ('fadd', a, a)) 70] 71 72# Size conversion is redundant to Midgard but needed for NIR, and writing this 73# lowering in MIR would be painful without a competent builder, so eat the 74# extra instruction 75for sz in ('8', '16', '32'): 76 converted = ('u2u32', a) if sz != '32' else a 77 algebraic_late += [(('ufind_msb', 'a@' + sz), ('isub', 31, ('uclz', converted)))] 78 79# Midgard is able to type convert down by only one "step" per instruction; if 80# NIR wants more than one step, we need to break up into multiple instructions. 81# Nevertheless, we can do both a size step and a floating/int step at once. 82 83converts = [] 84 85for op in ('u2u', 'i2i', 'f2f', 'i2f', 'u2f', 'f2i', 'f2u'): 86 srcsz_max = 64 87 dstsz_max = 64 88 # 8 bit float doesn't exist 89 srcsz_min = 8 if op[0] != 'f' else 16 90 dstsz_min = 8 if op[2] != 'f' else 16 91 dstsz = dstsz_min 92 # Iterate over all possible destination and source sizes 93 while dstsz <= dstsz_max: 94 srcsz = srcsz_min 95 while srcsz <= srcsz_max: 96 # Size converter lowering is only needed if src and dst sizes are 97 # spaced by a factor > 2. 98 if srcsz != dstsz and (srcsz * 2 != dstsz and srcsz != dstsz * 2): 99 cursz = srcsz 100 rule = a 101 # When converting down we first do the type conversion followed 102 # by one or more size conversions. When converting up, we do 103 # the type conversion at the end. This way we don't have to 104 # deal with the fact that f2f8 doesn't exists. 105 sizeconvop = op[0] + '2' + op[0] if srcsz < dstsz else op[2] + '2' + op[2] 106 if srcsz > dstsz and op[0] != op[2]: 107 rule = (op + str(int(cursz)), rule) 108 while cursz != dstsz: 109 cursz = cursz / 2 if dstsz < srcsz else cursz * 2 110 rule = (sizeconvop + str(int(cursz)), rule) 111 if srcsz < dstsz and op[0] != op[2]: 112 rule = (op + str(int(cursz)), rule) 113 converts += [((op + str(int(dstsz)), 'a@' + str(int(srcsz))), rule)] 114 srcsz *= 2 115 dstsz *= 2 116 117# Try to force constants to the right 118constant_switch = [ 119 # fge gets flipped to fle, so we invert to keep the order 120 (('fge', 'a', '#b'), (('inot', ('flt', a, b)))), 121 (('fge32', 'a', '#b'), (('inot', ('flt32', a, b)))), 122 (('ige32', 'a', '#b'), (('inot', ('ilt32', a, b)))), 123 (('uge32', 'a', '#b'), (('inot', ('ult32', a, b)))), 124 125 # fge gets mapped to fle with a flip 126 (('flt32', '#a', 'b'), ('inot', ('fge32', a, b))), 127 (('ilt32', '#a', 'b'), ('inot', ('ige32', a, b))), 128 (('ult32', '#a', 'b'), ('inot', ('uge32', a, b))) 129] 130 131# ..since the above switching happens after algebraic stuff is done 132cancel_inot = [ 133 (('inot', ('inot', a)), a), 134 (('b32csel', ('inot', a), b, c), ('b32csel', a, c, b)), 135] 136 137def main(): 138 parser = argparse.ArgumentParser() 139 parser.add_argument('-p', '--import-path', required=True) 140 args = parser.parse_args() 141 sys.path.insert(0, args.import_path) 142 run() 143 144 145def run(): 146 import nir_algebraic # pylint: disable=import-error 147 148 print('#include "midgard_nir.h"') 149 150 print(nir_algebraic.AlgebraicPass("midgard_nir_lower_algebraic_early", 151 algebraic).render()) 152 153 print(nir_algebraic.AlgebraicPass("midgard_nir_lower_algebraic_late", 154 algebraic_late + converts + constant_switch).render()) 155 156 print(nir_algebraic.AlgebraicPass("midgard_nir_cancel_inot", 157 cancel_inot).render()) 158 159 160if __name__ == '__main__': 161 main() 162