/* * Copyright © 2020 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include "nir.h" #include "nir_worklist.h" #include "util/u_vector.h" static bool combine_all_barriers(nir_intrinsic_instr *a, nir_intrinsic_instr *b, void *_) { nir_intrinsic_set_memory_modes( a, nir_intrinsic_memory_modes(a) | nir_intrinsic_memory_modes(b)); nir_intrinsic_set_memory_semantics( a, nir_intrinsic_memory_semantics(a) | nir_intrinsic_memory_semantics(b)); nir_intrinsic_set_memory_scope( a, MAX2(nir_intrinsic_memory_scope(a), nir_intrinsic_memory_scope(b))); nir_intrinsic_set_execution_scope( a, MAX2(nir_intrinsic_execution_scope(a), nir_intrinsic_execution_scope(b))); return true; } static bool nir_opt_combine_barriers_impl(nir_function_impl *impl, nir_combine_barrier_cb combine_cb, void *data) { bool progress = false; nir_foreach_block(block, impl) { nir_intrinsic_instr *prev = NULL; nir_foreach_instr_safe(instr, block) { if (instr->type != nir_instr_type_intrinsic) { prev = NULL; continue; } nir_intrinsic_instr *current = nir_instr_as_intrinsic(instr); if (current->intrinsic != nir_intrinsic_barrier) { prev = NULL; continue; } if (prev && combine_cb(prev, current, data)) { nir_instr_remove(¤t->instr); progress = true; } else { prev = current; } } } if (progress) { nir_metadata_preserve(impl, nir_metadata_control_flow | nir_metadata_live_defs); } else { nir_metadata_preserve(impl, nir_metadata_all); } return progress; } /* Combine adjacent scoped barriers. */ bool nir_opt_combine_barriers(nir_shader *shader, nir_combine_barrier_cb combine_cb, void *data) { /* Default to combining everything. Only some backends can do better. */ if (!combine_cb) combine_cb = combine_all_barriers; bool progress = false; nir_foreach_function_impl(impl, shader) { if (nir_opt_combine_barriers_impl(impl, combine_cb, data)) { progress = true; } } return progress; } static bool barrier_happens_before(const nir_instr *a, const nir_instr *b) { if (a->block == b->block) return a->index < b->index; return nir_block_dominates(a->block, b->block); } static bool nir_opt_barrier_modes_impl(nir_function_impl *impl) { bool progress = false; nir_instr_worklist *barriers = nir_instr_worklist_create(); if (!barriers) return false; struct u_vector mem_derefs; if (!u_vector_init(&mem_derefs, 32, sizeof(struct nir_instr *))) { nir_instr_worklist_destroy(barriers); return false; } const unsigned all_memory_modes = nir_var_image | nir_var_mem_ssbo | nir_var_mem_shared | nir_var_mem_global; nir_foreach_block_safe(block, impl) { nir_foreach_instr_safe(instr, block) { if (instr->type == nir_instr_type_intrinsic) { nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); if (intrin->intrinsic == nir_intrinsic_barrier) nir_instr_worklist_push_tail(barriers, instr); } else if (instr->type == nir_instr_type_deref) { nir_deref_instr *deref = nir_instr_as_deref(instr); if (nir_deref_mode_may_be(deref, all_memory_modes) || glsl_contains_atomic(deref->type)) { nir_deref_instr **tail = u_vector_add(&mem_derefs); *tail = deref; } } } } nir_foreach_instr_in_worklist(instr, barriers) { nir_intrinsic_instr *barrier = nir_instr_as_intrinsic(instr); const unsigned barrier_modes = nir_intrinsic_memory_modes(barrier); unsigned new_modes = barrier_modes & ~all_memory_modes; /* If a barrier dominates all memory accesses for a particular mode (or * there are none), then the barrier cannot affect those accesses. We * can drop that mode from the barrier. * * For each barrier, we look at the list of memory derefs, and see if * the barrier fails to dominate the deref. If so, then there's at * least one memory access that may happen before the barrier, so we * need to keep the mode. Any modes not kept are discarded. */ nir_deref_instr **p_deref; u_vector_foreach(p_deref, &mem_derefs) { nir_deref_instr *deref = *p_deref; const unsigned atomic_mode = glsl_contains_atomic(deref->type) ? nir_var_mem_ssbo : 0; const unsigned deref_modes = (deref->modes | atomic_mode) & barrier_modes; if (deref_modes && !barrier_happens_before(&barrier->instr, &deref->instr)) new_modes |= deref_modes; } /* If we don't need all the modes, update the barrier. */ if (barrier_modes != new_modes) { nir_intrinsic_set_memory_modes(barrier, new_modes); progress = true; } /* Shared memory only exists within a workgroup, so synchronizing it * beyond workgroup scope is nonsense. */ if (nir_intrinsic_execution_scope(barrier) == SCOPE_NONE && new_modes == nir_var_mem_shared) { nir_intrinsic_set_memory_scope(barrier, MIN2(nir_intrinsic_memory_scope(barrier), SCOPE_WORKGROUP)); progress = true; } } nir_instr_worklist_destroy(barriers); u_vector_finish(&mem_derefs); return progress; } /** * Reduce barriers to remove unnecessary modes and scope. * * This pass must be called before nir_lower_explicit_io lowers derefs! * * Many shaders issue full memory barriers, which may need to synchronize * access to images, SSBOs, shared local memory, or global memory. However, * many of them only use a subset of those memory types - say, only SSBOs. * * Shaders may also have patterns such as: * * 1. shared local memory access * 2. barrier with full variable modes * 3. more shared local memory access * 4. image access * * In this case, the barrier is needed to ensure synchronization between the * various shared memory operations. Image reads and writes do also exist, * but they are all on one side of the barrier, so it is a no-op for image * access. We can drop the image mode from the barrier in this case too. * * In addition, we can reduce the memory scope of shared-only barriers, as * shared local memory only exists within a workgroup. */ bool nir_opt_barrier_modes(nir_shader *shader) { bool progress = false; nir_foreach_function_impl(impl, shader) { nir_metadata_require(impl, nir_metadata_dominance | nir_metadata_instr_index); if (nir_opt_barrier_modes_impl(impl)) { nir_metadata_preserve(impl, nir_metadata_control_flow | nir_metadata_live_defs); progress = true; } else { nir_metadata_preserve(impl, nir_metadata_all); } } return progress; }