/* * Copyright (C) 2018 Alyssa Rosenzweig * Copyright (C) 2019-2021 Collabora, Ltd. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "pan_shader.h" #include "pan_format.h" #if PAN_ARCH <= 5 #include "panfrost/midgard/midgard_compile.h" #else #include "panfrost/compiler/bifrost_compile.h" #endif const nir_shader_compiler_options * GENX(pan_shader_get_compiler_options)(void) { #if PAN_ARCH >= 9 return &bifrost_nir_options_v9; #elif PAN_ARCH >= 6 return &bifrost_nir_options_v6; #else return &midgard_nir_options; #endif } #if PAN_ARCH >= 6 static enum mali_register_file_format bifrost_blend_type_from_nir(nir_alu_type nir_type) { switch (nir_type) { case 0: /* Render target not in use */ return 0; case nir_type_float16: return MALI_REGISTER_FILE_FORMAT_F16; case nir_type_float32: return MALI_REGISTER_FILE_FORMAT_F32; case nir_type_int32: return MALI_REGISTER_FILE_FORMAT_I32; case nir_type_uint32: return MALI_REGISTER_FILE_FORMAT_U32; case nir_type_int16: return MALI_REGISTER_FILE_FORMAT_I16; case nir_type_uint16: return MALI_REGISTER_FILE_FORMAT_U16; default: unreachable("Unsupported blend shader type for NIR alu type"); return 0; } } #if PAN_ARCH <= 7 enum mali_register_file_format GENX(pan_fixup_blend_type)(nir_alu_type T_size, enum pipe_format format) { const struct util_format_description *desc = util_format_description(format); unsigned size = nir_alu_type_get_type_size(T_size); nir_alu_type T_format = pan_unpacked_type_for_format(desc); nir_alu_type T = nir_alu_type_get_base_type(T_format) | size; return bifrost_blend_type_from_nir(T); } #endif #endif /* This is only needed on Midgard. It's the same on both v4 and v5, so only * compile once to avoid the GenXML dependency for calls. */ #if PAN_ARCH == 5 uint8_t pan_raw_format_mask_midgard(enum pipe_format *formats) { uint8_t out = 0; for (unsigned i = 0; i < 8; i++) { enum pipe_format fmt = formats[i]; unsigned wb_fmt = panfrost_blendable_formats_v6[fmt].writeback; if (wb_fmt < MALI_COLOR_FORMAT_R8) out |= BITFIELD_BIT(i); } return out; } #endif void GENX(pan_shader_compile)(nir_shader *s, struct panfrost_compile_inputs *inputs, struct util_dynarray *binary, struct pan_shader_info *info) { memset(info, 0, sizeof(*info)); #if PAN_ARCH >= 6 bifrost_compile_shader_nir(s, inputs, binary, info); #else midgard_compile_shader_nir(s, inputs, binary, info); #endif info->stage = s->info.stage; info->contains_barrier = s->info.uses_memory_barrier || s->info.uses_control_barrier; info->separable = s->info.separate_shader; switch (info->stage) { case MESA_SHADER_VERTEX: info->attributes_read = s->info.inputs_read; info->attributes_read_count = util_bitcount64(info->attributes_read); info->attribute_count = info->attributes_read_count; #if PAN_ARCH <= 5 bool vertex_id = BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE); if (vertex_id) info->attribute_count = MAX2(info->attribute_count, PAN_VERTEX_ID + 1); bool instance_id = BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID); if (instance_id) info->attribute_count = MAX2(info->attribute_count, PAN_INSTANCE_ID + 1); #endif info->vs.writes_point_size = s->info.outputs_written & (1 << VARYING_SLOT_PSIZ); #if PAN_ARCH >= 9 info->varyings.output_count = util_last_bit(s->info.outputs_written >> VARYING_SLOT_VAR0); #endif break; case MESA_SHADER_FRAGMENT: if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) info->fs.writes_depth = true; if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) info->fs.writes_stencil = true; if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) info->fs.writes_coverage = true; info->fs.outputs_read = s->info.outputs_read >> FRAG_RESULT_DATA0; info->fs.outputs_written = s->info.outputs_written >> FRAG_RESULT_DATA0; info->fs.sample_shading = s->info.fs.uses_sample_shading; info->fs.untyped_color_outputs = s->info.fs.untyped_color_outputs; info->fs.can_discard = s->info.fs.uses_discard; info->fs.early_fragment_tests = s->info.fs.early_fragment_tests; /* List of reasons we need to execute frag shaders when things * are masked off */ info->fs.sidefx = s->info.writes_memory || s->info.fs.uses_discard; /* With suitable ZSA/blend, is early-z possible? */ info->fs.can_early_z = !info->fs.sidefx && !info->fs.writes_depth && !info->fs.writes_stencil && !info->fs.writes_coverage; /* Similiarly with suitable state, is FPK possible? */ info->fs.can_fpk = !info->fs.writes_depth && !info->fs.writes_stencil && !info->fs.writes_coverage && !info->fs.can_discard && !info->fs.outputs_read; /* Requires the same hardware guarantees, so grouped as one bit * in the hardware. */ info->contains_barrier |= s->info.fs.needs_quad_helper_invocations; info->fs.reads_frag_coord = (s->info.inputs_read & (1 << VARYING_SLOT_POS)) || BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRAG_COORD); info->fs.reads_point_coord = s->info.inputs_read & (1 << VARYING_SLOT_PNTC); info->fs.reads_face = (s->info.inputs_read & (1 << VARYING_SLOT_FACE)) || BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRONT_FACE); #if PAN_ARCH >= 9 info->varyings.input_count = util_last_bit(s->info.inputs_read >> VARYING_SLOT_VAR0); #endif break; default: /* Everything else treated as compute */ info->wls_size = s->info.shared_size; break; } info->outputs_written = s->info.outputs_written; info->attribute_count += BITSET_LAST_BIT(s->info.images_used); info->writes_global = s->info.writes_memory; info->ubo_count = s->info.num_ubos; info->sampler_count = info->texture_count = BITSET_LAST_BIT(s->info.textures_used); unsigned execution_mode = s->info.float_controls_execution_mode; info->ftz_fp16 = nir_is_denorm_flush_to_zero(execution_mode, 16); info->ftz_fp32 = nir_is_denorm_flush_to_zero(execution_mode, 32); #if PAN_ARCH >= 6 /* This is "redundant" information, but is needed in a draw-time hot path */ for (unsigned i = 0; i < ARRAY_SIZE(info->bifrost.blend); ++i) { info->bifrost.blend[i].format = bifrost_blend_type_from_nir(info->bifrost.blend[i].type); } #endif }