/* * Copyright 2023 Valve Corporation * Copyright 2020 Collabora, Ltd. * SPDX-License-Identifier: MIT */ #include "nir_legacy.h" #include "nir.h" #include "nir_builder.h" bool nir_legacy_float_mod_folds(nir_alu_instr *mod) { assert(mod->op == nir_op_fabs || mod->op == nir_op_fneg); /* No legacy user supports fp64 modifiers */ if (mod->def.bit_size == 64) return false; nir_foreach_use_including_if(src, &mod->def) { if (nir_src_is_if(src)) return false; nir_instr *parent = nir_src_parent_instr(src); if (parent->type != nir_instr_type_alu) return false; nir_alu_instr *alu = nir_instr_as_alu(parent); nir_alu_src *alu_src = list_entry(src, nir_alu_src, src); unsigned src_index = alu_src - alu->src; assert(src_index < nir_op_infos[alu->op].num_inputs); nir_alu_type src_type = nir_op_infos[alu->op].input_types[src_index]; if (nir_alu_type_get_base_type(src_type) != nir_type_float) return false; } return true; } static nir_legacy_alu_src chase_alu_src_helper(const nir_src *src) { nir_intrinsic_instr *load = nir_load_reg_for_def(src->ssa); if (load) { bool indirect = (load->intrinsic == nir_intrinsic_load_reg_indirect); return (nir_legacy_alu_src){ .src.is_ssa = false, .src.reg = { .handle = load->src[0].ssa, .base_offset = nir_intrinsic_base(load), .indirect = indirect ? load->src[1].ssa : NULL }, .fabs = nir_intrinsic_legacy_fabs(load), .fneg = nir_intrinsic_legacy_fneg(load), }; } else { return (nir_legacy_alu_src){ .src.is_ssa = true, .src.ssa = src->ssa, }; } } static inline bool chase_source_mod(nir_def **ssa, nir_op op, uint8_t *swizzle) { if ((*ssa)->parent_instr->type != nir_instr_type_alu) return false; nir_alu_instr *alu = nir_instr_as_alu((*ssa)->parent_instr); if (alu->op != op) return false; /* If there are other uses of the modifier that don't fold, we can't fold it * here either, in case of it's reading from a load_reg that won't be * emitted. */ if (!nir_legacy_float_mod_folds(alu)) return false; /* This only works for unary ops */ assert(nir_op_infos[op].num_inputs == 1); /* To fuse the source mod in, we need to compose the swizzles and string * through the source. */ for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i) swizzle[i] = alu->src[0].swizzle[swizzle[i]]; *ssa = alu->src[0].src.ssa; return true; } nir_legacy_alu_src nir_legacy_chase_alu_src(const nir_alu_src *src, bool fuse_fabs) { if (src->src.ssa->parent_instr->type == nir_instr_type_alu) { nir_legacy_alu_src out = { .src.is_ssa = true, .src.ssa = src->src.ssa, }; STATIC_ASSERT(sizeof(src->swizzle) == sizeof(out.swizzle)); memcpy(out.swizzle, src->swizzle, sizeof(src->swizzle)); /* To properly handle foo(fneg(fabs(x))), we first chase fneg and then * fabs, since we chase from bottom-up. We don't handle fabs(fneg(x)) * since nir_opt_algebraic should have eliminated that. */ out.fneg = chase_source_mod(&out.src.ssa, nir_op_fneg, out.swizzle); if (fuse_fabs) out.fabs = chase_source_mod(&out.src.ssa, nir_op_fabs, out.swizzle); return out; } else { nir_legacy_alu_src out = chase_alu_src_helper(&src->src); memcpy(out.swizzle, src->swizzle, sizeof(src->swizzle)); return out; } } static nir_legacy_alu_dest chase_alu_dest_helper(nir_def *def) { nir_intrinsic_instr *store = nir_store_reg_for_def(def); if (store) { bool indirect = (store->intrinsic == nir_intrinsic_store_reg_indirect); return (nir_legacy_alu_dest){ .dest.is_ssa = false, .dest.reg = { .handle = store->src[1].ssa, .base_offset = nir_intrinsic_base(store), .indirect = indirect ? store->src[2].ssa : NULL }, .fsat = nir_intrinsic_legacy_fsat(store), .write_mask = nir_intrinsic_write_mask(store), }; } else { return (nir_legacy_alu_dest){ .dest.is_ssa = true, .dest.ssa = def, .write_mask = nir_component_mask(def->num_components), }; } } bool nir_legacy_fsat_folds(nir_alu_instr *fsat) { assert(fsat->op == nir_op_fsat); nir_def *def = fsat->src[0].src.ssa; /* No legacy user supports fp64 modifiers */ if (def->bit_size == 64) return false; /* Must be the only use */ if (!list_is_singular(&def->uses)) return false; assert(&fsat->src[0].src == list_first_entry(&def->uses, nir_src, use_link)); nir_instr *generate = def->parent_instr; if (generate->type != nir_instr_type_alu) return false; nir_alu_instr *generate_alu = nir_instr_as_alu(generate); nir_alu_type dest_type = nir_op_infos[generate_alu->op].output_type; if (dest_type != nir_type_float) return false; /* If we are a saturating a source modifier fsat(fabs(x)), we need to emit * either the fsat or the modifier or else the sequence disappears. */ if (generate_alu->op == nir_op_fabs || generate_alu->op == nir_op_fneg) return false; /* We can't do expansions without a move in the middle */ unsigned nr_components = generate_alu->def.num_components; if (fsat->def.num_components != nr_components) return false; /* We don't handle swizzles here, so check for the identity */ for (unsigned i = 0; i < nr_components; ++i) { if (fsat->src[0].swizzle[i] != i) return false; } return true; } static inline bool chase_fsat(nir_def **def) { /* No legacy user supports fp64 modifiers */ if ((*def)->bit_size == 64) return false; if (!list_is_singular(&(*def)->uses)) return false; nir_src *use = list_first_entry(&(*def)->uses, nir_src, use_link); if (nir_src_is_if(use) || nir_src_parent_instr(use)->type != nir_instr_type_alu) return false; nir_alu_instr *fsat = nir_instr_as_alu(nir_src_parent_instr(use)); if (fsat->op != nir_op_fsat || !nir_legacy_fsat_folds(fsat)) return false; /* Otherwise, we're good */ nir_alu_instr *alu = nir_instr_as_alu(nir_src_parent_instr(use)); *def = &alu->def; return true; } nir_legacy_alu_dest nir_legacy_chase_alu_dest(nir_def *def) { /* Try SSA fsat. No users support 64-bit modifiers. */ if (chase_fsat(&def)) { return (nir_legacy_alu_dest){ .dest.is_ssa = true, .dest.ssa = def, .fsat = true, .write_mask = nir_component_mask(def->num_components), }; } else { return chase_alu_dest_helper(def); } } nir_legacy_src nir_legacy_chase_src(const nir_src *src) { nir_legacy_alu_src alu_src = chase_alu_src_helper(src); assert(!alu_src.fabs && !alu_src.fneg); return alu_src.src; } nir_legacy_dest nir_legacy_chase_dest(nir_def *def) { nir_legacy_alu_dest alu_dest = chase_alu_dest_helper(def); assert(!alu_dest.fsat); assert(alu_dest.write_mask == nir_component_mask(def->num_components)); return alu_dest.dest; } static bool fuse_mods_with_registers(nir_builder *b, nir_instr *instr, void *fuse_fabs_) { bool *fuse_fabs = fuse_fabs_; if (instr->type != nir_instr_type_alu) return false; nir_alu_instr *alu = nir_instr_as_alu(instr); if ((alu->op == nir_op_fneg || (*fuse_fabs && alu->op == nir_op_fabs)) && nir_legacy_float_mod_folds(alu)) { /* Try to fold this instruction into the load, if possible. We only do * this for loads in the same block as the use because uses of loads * which cross block boundaries aren't trivial anyway. */ nir_intrinsic_instr *load = nir_load_reg_for_def(alu->src[0].src.ssa); if (load != NULL) { /* Duplicate the load before changing it in case there are other * users. We assume someone has run CSE so there should be at most * four load instructions generated (one for each possible modifier * combination), but likely only one or two. */ b->cursor = nir_before_instr(&load->instr); load = nir_instr_as_intrinsic(nir_instr_clone(b->shader, &load->instr)); nir_builder_instr_insert(b, &load->instr); if (alu->op == nir_op_fabs) { nir_intrinsic_set_legacy_fabs(load, true); nir_intrinsic_set_legacy_fneg(load, false); } else { assert(alu->op == nir_op_fneg); bool old_fneg = nir_intrinsic_legacy_fneg(load); nir_intrinsic_set_legacy_fneg(load, !old_fneg); } /* Rewrite all the users to use the modified load instruction. We * already know that they're all float ALU instructions because * nir_legacy_float_mod_folds() returned true. */ nir_foreach_use_including_if_safe(use, &alu->def) { assert(!nir_src_is_if(use)); assert(nir_src_parent_instr(use)->type == nir_instr_type_alu); nir_alu_src *alu_use = list_entry(use, nir_alu_src, src); nir_src_rewrite(&alu_use->src, &load->def); for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i) alu_use->swizzle[i] = alu->src[0].swizzle[alu_use->swizzle[i]]; } nir_instr_remove(&alu->instr); return true; } else { /* We don't want to attempt to add saturate to foldable mod ops */ return false; } } nir_legacy_alu_dest dest = nir_legacy_chase_alu_dest(&alu->def); if (dest.fsat) { nir_intrinsic_instr *store = nir_store_reg_for_def(dest.dest.ssa); if (store) { nir_intrinsic_set_legacy_fsat(store, true); nir_src_rewrite(&store->src[0], &alu->def); return true; } } return false; } void nir_legacy_trivialize(nir_shader *s, bool fuse_fabs) { /* First, fuse modifiers with registers. This ensures that the helpers do not * chase registers recursively, allowing registers to be trivialized easier. */ if (nir_shader_instructions_pass(s, fuse_mods_with_registers, nir_metadata_control_flow, &fuse_fabs)) { /* If we made progress, we likely left dead loads. Clean them up. */ NIR_PASS_V(s, nir_opt_dce); } /* Now that modifiers are dealt with, we can trivialize the regular way. */ NIR_PASS_V(s, nir_trivialize_registers); }