1 /* 2 * Copyright © 2022 Collabora, Ltd. 3 * SPDX-License-Identifier: MIT 4 */ 5 6 #ifndef NAK_H 7 #define NAK_H 8 9 #include "compiler/shader_enums.h" 10 #include "nir.h" 11 12 #include <assert.h> 13 #include <stdbool.h> 14 #include <stdint.h> 15 16 #ifdef __cplusplus 17 extern "C" { 18 #endif 19 20 #define NAK_SUBGROUP_SIZE 32 21 22 struct nak_compiler; 23 struct nir_shader_compiler_options; 24 struct nv_device_info; 25 26 struct nak_compiler *nak_compiler_create(const struct nv_device_info *dev); 27 void nak_compiler_destroy(struct nak_compiler *nak); 28 29 uint64_t nak_debug_flags(const struct nak_compiler *nak); 30 31 const struct nir_shader_compiler_options * 32 nak_nir_options(const struct nak_compiler *nak); 33 34 void nak_preprocess_nir(nir_shader *nir, const struct nak_compiler *nak); 35 36 struct nak_sample_location { 37 uint8_t x_u4 : 4; 38 uint8_t y_u4 : 4; 39 }; 40 static_assert(sizeof(struct nak_sample_location) == 1, 41 "This struct has no holes"); 42 43 struct nak_sample_mask { 44 uint16_t sample_mask; 45 }; 46 static_assert(sizeof(struct nak_sample_mask) == 2, 47 "This struct has no holes"); 48 49 PRAGMA_DIAGNOSTIC_PUSH 50 PRAGMA_DIAGNOSTIC_ERROR(-Wpadded) 51 struct nak_fs_key { 52 bool zs_self_dep; 53 54 /** True if sample shading is forced on via an API knob such as 55 * VkPipelineMultisampleStateCreateInfo::minSampleShading 56 */ 57 bool force_sample_shading; 58 bool uses_underestimate; 59 60 /** 61 * The constant buffer index and offset at which the sample locations and 62 * pass sample masks tables lives. 63 */ 64 uint8_t sample_info_cb; 65 66 /** 67 * The offset into sample_info_cb at which the sample locations live. The 68 * sample locations table is an array of nak_sample_location where each 69 * sample location is two 4-bit unorm values packed into an 8-bit value 70 * with the bottom 4 bits for x and the top 4 bits for y. 71 */ 72 uint32_t sample_locations_offset; 73 74 /** 75 * The offset into sample_info_cb at which the sample masks table lives. 76 * The sample masks table is an array of nak_sample_mask where each entry 77 * represents the set of samples covered by that pass corresponding to that 78 * sample in a multi-pass fragment shader invocaiton. 79 */ 80 uint32_t sample_masks_offset; 81 }; 82 PRAGMA_DIAGNOSTIC_POP 83 static_assert(sizeof(struct nak_fs_key) == 12, "This struct has no holes"); 84 85 86 void nak_postprocess_nir(nir_shader *nir, const struct nak_compiler *nak, 87 nir_variable_mode robust2_modes, 88 const struct nak_fs_key *fs_key); 89 90 enum ENUM_PACKED nak_ts_domain { 91 NAK_TS_DOMAIN_ISOLINE = 0, 92 NAK_TS_DOMAIN_TRIANGLE = 1, 93 NAK_TS_DOMAIN_QUAD = 2, 94 }; 95 96 enum ENUM_PACKED nak_ts_spacing { 97 NAK_TS_SPACING_INTEGER = 0, 98 NAK_TS_SPACING_FRACT_ODD = 1, 99 NAK_TS_SPACING_FRACT_EVEN = 2, 100 }; 101 102 enum ENUM_PACKED nak_ts_prims { 103 NAK_TS_PRIMS_POINTS = 0, 104 NAK_TS_PRIMS_LINES = 1, 105 NAK_TS_PRIMS_TRIANGLES_CW = 2, 106 NAK_TS_PRIMS_TRIANGLES_CCW = 3, 107 }; 108 109 struct nak_xfb_info { 110 uint32_t stride[4]; 111 uint8_t stream[4]; 112 uint8_t attr_count[4]; 113 uint8_t attr_index[4][128]; 114 }; 115 116 /* This struct MUST have explicit padding fields to ensure that all padding is 117 * zeroed and the zeros get properly copied, even across API boundaries. 118 */ 119 #pragma GCC diagnostic push 120 #pragma GCC diagnostic error "-Wpadded" 121 struct nak_shader_info { 122 gl_shader_stage stage; 123 124 /** Shader model */ 125 uint8_t sm; 126 127 /** Number of GPRs used */ 128 uint8_t num_gprs; 129 130 /** 131 * Number of control barriers used 132 * 133 * These are barriers in the sense of glsl barrier(), not reconvergence 134 * barriers. In CUDA, these barriers have an index, but we currently 135 * only use index zero for vulkan, which means this will be at most 1. 136 */ 137 uint8_t num_control_barriers; 138 139 uint8_t _pad0; 140 141 /** Number of instructions used */ 142 uint32_t num_instrs; 143 144 /** Size of shader local (scratch) memory */ 145 uint32_t slm_size; 146 147 /** Size of call/return stack in bytes/warp */ 148 uint32_t crs_size; 149 150 union { 151 struct { 152 /* Local workgroup size */ 153 uint16_t local_size[3]; 154 155 /* Shared memory size */ 156 uint16_t smem_size; 157 158 uint8_t _pad[4]; 159 } cs; 160 161 struct { 162 bool writes_depth; 163 bool reads_sample_mask; 164 bool post_depth_coverage; 165 bool uses_sample_shading; 166 bool early_fragment_tests; 167 168 uint8_t _pad[7]; 169 } fs; 170 171 struct { 172 enum nak_ts_domain domain; 173 enum nak_ts_spacing spacing; 174 enum nak_ts_prims prims; 175 176 uint8_t _pad[9]; 177 } ts; 178 179 /* Used to initialize the union for other stages */ 180 uint8_t _pad[12]; 181 }; 182 183 struct { 184 bool writes_layer; 185 bool writes_point_size; 186 uint8_t clip_enable; 187 uint8_t cull_enable; 188 189 struct nak_xfb_info xfb; 190 } vtg; 191 192 /** Shader header for 3D stages */ 193 uint32_t hdr[32]; 194 }; 195 #pragma GCC diagnostic pop 196 197 struct nak_shader_bin { 198 struct nak_shader_info info; 199 200 uint32_t code_size; 201 const void *code; 202 203 const char *asm_str; 204 }; 205 206 void nak_shader_bin_destroy(struct nak_shader_bin *bin); 207 208 struct nak_shader_bin * 209 nak_compile_shader(nir_shader *nir, bool dump_asm, 210 const struct nak_compiler *nak, 211 nir_variable_mode robust2_modes, 212 const struct nak_fs_key *fs_key); 213 214 struct nak_qmd_cbuf { 215 uint32_t index; 216 uint32_t size; 217 uint64_t addr; 218 }; 219 220 struct nak_qmd_info { 221 uint64_t addr; 222 223 uint16_t smem_size; 224 uint16_t smem_max; 225 226 uint32_t global_size[3]; 227 228 uint32_t num_cbufs; 229 struct nak_qmd_cbuf cbufs[8]; 230 }; 231 232 void nak_fill_qmd(const struct nv_device_info *dev, 233 const struct nak_shader_info *info, 234 const struct nak_qmd_info *qmd_info, 235 void *qmd_out, size_t qmd_size); 236 237 struct nak_qmd_dispatch_size_layout { 238 uint16_t x_start, x_end; 239 uint16_t y_start, y_end; 240 uint16_t z_start, z_end; 241 }; 242 243 struct nak_qmd_dispatch_size_layout 244 nak_get_qmd_dispatch_size_layout(const struct nv_device_info *dev); 245 246 #ifdef __cplusplus 247 } 248 #endif 249 250 #endif /* NAK_H */ 251