xref: /aosp_15_r20/external/mesa3d/src/panfrost/compiler/bifrost_compile.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (C) 2018-2019 Alyssa Rosenzweig <[email protected]>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #ifndef __BIFROST_PUBLIC_H_
25 #define __BIFROST_PUBLIC_H_
26 
27 #include "compiler/nir/nir.h"
28 #include "panfrost/util/pan_ir.h"
29 #include "util/u_dynarray.h"
30 
31 void bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id);
32 
33 void bifrost_compile_shader_nir(nir_shader *nir,
34                                 const struct panfrost_compile_inputs *inputs,
35                                 struct util_dynarray *binary,
36                                 struct pan_shader_info *info);
37 
38 #define DEFINE_OPTIONS(arch)                                                   \
39    static const nir_shader_compiler_options bifrost_nir_options_v##arch = {    \
40       .lower_scmp = true,                                                      \
41       .lower_flrp16 = true,                                                    \
42       .lower_flrp32 = true,                                                    \
43       .lower_flrp64 = true,                                                    \
44       .lower_ffract = true,                                                    \
45       .lower_fmod = true,                                                      \
46       .lower_fdiv = true,                                                      \
47       .lower_isign = true,                                                     \
48       .lower_find_lsb = true,                                                  \
49       .lower_ifind_msb = true,                                                 \
50       .lower_fdph = true,                                                      \
51       .lower_fsqrt = true,                                                     \
52                                                                                \
53       .lower_fsign = true,                                                     \
54                                                                                \
55       .lower_bitfield_insert = true,                                           \
56       .lower_bitfield_extract = true,                                          \
57       .lower_insert_byte = true,                                               \
58                                                                                \
59       /* Vertex ID is zero based in the traditional geometry flows, but not in \
60        * the memory-allocated IDVS flow introduced and used exclusively in     \
61        * Valhall. So this is a machine property for us.                        \
62        */                                                                      \
63       .vertex_id_zero_based = (arch <= 7),                                     \
64                                                                                \
65       .lower_pack_64_4x16 = true,                                              \
66       .lower_pack_half_2x16 = true,                                            \
67       .lower_pack_unorm_2x16 = true,                                           \
68       .lower_pack_snorm_2x16 = true,                                           \
69       .lower_pack_unorm_4x8 = true,                                            \
70       .lower_pack_snorm_4x8 = true,                                            \
71       .lower_unpack_half_2x16 = true,                                          \
72       .lower_unpack_unorm_2x16 = true,                                         \
73       .lower_unpack_snorm_2x16 = true,                                         \
74       .lower_unpack_unorm_4x8 = true,                                          \
75       .lower_unpack_snorm_4x8 = true,                                          \
76       .lower_pack_split = true,                                                \
77                                                                                \
78       .lower_doubles_options =                                                 \
79          nir_lower_dmod, /* TODO: Don't lower supported 64-bit operations */   \
80       .lower_int64_options = ~0, /* TODO: Use IMULD on v7 */                   \
81       .lower_mul_high = true,                                                  \
82       .lower_fisnormal = true,                                                 \
83       .lower_uadd_carry = true,                                                \
84       .lower_usub_borrow = true,                                               \
85                                                                                \
86       .has_isub = true,                                                        \
87       .vectorize_io = true,                                                    \
88       .vectorize_vec2_16bit = true,                                            \
89       .fuse_ffma16 = true,                                                     \
90       .fuse_ffma32 = true,                                                     \
91       .fuse_ffma64 = true,                                                     \
92       .use_interpolated_input_intrinsics = true,                               \
93                                                                                \
94       .lower_uniforms_to_ubo = true,                                           \
95                                                                                \
96       .has_cs_global_id = true,                                                \
97       .lower_cs_local_index_to_id = true,                                      \
98       .lower_device_index_to_zero = true,                                      \
99       .max_unroll_iterations = 32,                                             \
100       .force_indirect_unrolling =                                              \
101          (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),     \
102       .force_indirect_unrolling_sampler = true,                                \
103       .has_ddx_intrinsics = true,                                              \
104       .scalarize_ddx = true,                                                   \
105    };
106 
107 DEFINE_OPTIONS(6);
108 DEFINE_OPTIONS(9);
109 
110 #endif
111