xref: /aosp_15_r20/external/mesa3d/src/panfrost/lib/pan_indirect_dispatch.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (C) 2021 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  */
24 
25 #include "pan_indirect_dispatch.h"
26 #include <stdio.h>
27 #include "compiler/nir/nir_builder.h"
28 #include "util/macros.h"
29 #include "util/u_memory.h"
30 #include "pan_encoder.h"
31 #include "pan_jc.h"
32 #include "pan_pool.h"
33 #include "pan_shader.h"
34 #include "pan_util.h"
35 
36 #define get_input_field(b, name)                                               \
37    nir_load_push_constant(                                                     \
38       b, 1, sizeof(((struct pan_indirect_dispatch_info *)0)->name) * 8,        \
39       nir_imm_int(b, 0),                                                       \
40       .base = offsetof(struct pan_indirect_dispatch_info, name))
41 
42 static void
pan_indirect_dispatch_init(struct pan_indirect_dispatch_meta * meta)43 pan_indirect_dispatch_init(struct pan_indirect_dispatch_meta *meta)
44 {
45    nir_builder b = nir_builder_init_simple_shader(
46       MESA_SHADER_COMPUTE, GENX(pan_shader_get_compiler_options)(), "%s",
47       "indirect_dispatch");
48    nir_def *zero = nir_imm_int(&b, 0);
49    nir_def *one = nir_imm_int(&b, 1);
50    nir_def *num_wg =
51       nir_load_global(&b, get_input_field(&b, indirect_dim), 4, 3, 32);
52    nir_def *num_wg_x = nir_channel(&b, num_wg, 0);
53    nir_def *num_wg_y = nir_channel(&b, num_wg, 1);
54    nir_def *num_wg_z = nir_channel(&b, num_wg, 2);
55 
56    nir_def *job_hdr_ptr = get_input_field(&b, job);
57    nir_def *num_wg_flat =
58       nir_imul(&b, num_wg_x, nir_imul(&b, num_wg_y, num_wg_z));
59 
60    nir_push_if(&b, nir_ieq(&b, num_wg_flat, zero));
61    {
62       nir_def *type_ptr = nir_iadd(&b, job_hdr_ptr, nir_imm_int64(&b, 4 * 4));
63       nir_def *ntype = nir_imm_intN_t(&b, (MALI_JOB_TYPE_NULL << 1) | 1, 8);
64       nir_store_global(&b, type_ptr, 1, ntype, 1);
65    }
66    nir_push_else(&b, NULL);
67    {
68       nir_def *job_dim_ptr = nir_iadd(
69          &b, job_hdr_ptr,
70          nir_imm_int64(&b, pan_section_offset(COMPUTE_JOB, INVOCATION)));
71       nir_def *num_wg_x_m1 = nir_isub(&b, num_wg_x, one);
72       nir_def *num_wg_y_m1 = nir_isub(&b, num_wg_y, one);
73       nir_def *num_wg_z_m1 = nir_isub(&b, num_wg_z, one);
74       nir_def *job_dim = nir_load_global(&b, job_dim_ptr, 8, 2, 32);
75       nir_def *dims = nir_channel(&b, job_dim, 0);
76       nir_def *split = nir_channel(&b, job_dim, 1);
77       nir_def *num_wg_x_split =
78          nir_iand_imm(&b, nir_ushr_imm(&b, split, 10), 0x3f);
79       nir_def *num_wg_y_split = nir_iadd(
80          &b, num_wg_x_split, nir_isub_imm(&b, 32, nir_uclz(&b, num_wg_x_m1)));
81       nir_def *num_wg_z_split = nir_iadd(
82          &b, num_wg_y_split, nir_isub_imm(&b, 32, nir_uclz(&b, num_wg_y_m1)));
83       split =
84          nir_ior(&b, split,
85                  nir_ior(&b, nir_ishl(&b, num_wg_y_split, nir_imm_int(&b, 16)),
86                          nir_ishl(&b, num_wg_z_split, nir_imm_int(&b, 22))));
87       dims =
88          nir_ior(&b, dims,
89                  nir_ior(&b, nir_ishl(&b, num_wg_x_m1, num_wg_x_split),
90                          nir_ior(&b, nir_ishl(&b, num_wg_y_m1, num_wg_y_split),
91                                  nir_ishl(&b, num_wg_z_m1, num_wg_z_split))));
92 
93       nir_store_global(&b, job_dim_ptr, 8, nir_vec2(&b, dims, split), 3);
94 
95       nir_def *num_wg_x_ptr = get_input_field(&b, num_wg_sysval[0]);
96 
97       nir_push_if(&b, nir_ine_imm(&b, num_wg_x_ptr, 0));
98       {
99          nir_store_global(&b, num_wg_x_ptr, 8, num_wg_x, 1);
100          nir_store_global(&b, get_input_field(&b, num_wg_sysval[1]), 8,
101                           num_wg_y, 1);
102          nir_store_global(&b, get_input_field(&b, num_wg_sysval[2]), 8,
103                           num_wg_z, 1);
104       }
105       nir_pop_if(&b, NULL);
106    }
107 
108    nir_pop_if(&b, NULL);
109 
110    struct panfrost_compile_inputs inputs = {
111       .gpu_id = meta->gpu_id,
112       .no_ubo_to_push = true,
113    };
114    struct pan_shader_info shader_info;
115    struct util_dynarray binary;
116 
117    util_dynarray_init(&binary, NULL);
118    pan_shader_preprocess(b.shader, inputs.gpu_id);
119    GENX(pan_shader_compile)(b.shader, &inputs, &binary, &shader_info);
120 
121    ralloc_free(b.shader);
122 
123    assert(!shader_info.tls_size);
124    assert(!shader_info.wls_size);
125 
126    shader_info.push.count =
127       DIV_ROUND_UP(sizeof(struct pan_indirect_dispatch_info), 4);
128 
129    struct panfrost_ptr bin =
130       pan_pool_alloc_aligned(meta->bin_pool, binary.size, 64);
131 
132    memcpy(bin.cpu, binary.data, binary.size);
133    util_dynarray_fini(&binary);
134 
135    struct panfrost_ptr rsd =
136       pan_pool_alloc_desc(meta->desc_pool, RENDERER_STATE);
137    struct panfrost_ptr tsd =
138       pan_pool_alloc_desc(meta->desc_pool, LOCAL_STORAGE);
139 
140    pan_pack(rsd.cpu, RENDERER_STATE, cfg) {
141       pan_shader_prepare_rsd(&shader_info, bin.gpu, &cfg);
142    }
143 
144    pan_pack(tsd.cpu, LOCAL_STORAGE, ls) {
145       ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
146    };
147 
148    meta->rsd = rsd.gpu;
149    meta->tsd = tsd.gpu;
150 }
151 
152 unsigned
GENX(pan_indirect_dispatch_emit)153 GENX(pan_indirect_dispatch_emit)(struct pan_indirect_dispatch_meta *meta,
154                                  struct pan_pool *pool, struct pan_jc *jc,
155                                  const struct pan_indirect_dispatch_info *inputs)
156 {
157    struct panfrost_ptr job = pan_pool_alloc_desc(pool, COMPUTE_JOB);
158    void *invocation = pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION);
159 
160    /* If we haven't compiled the indirect dispatch shader yet, do it now */
161    if (!meta->rsd)
162       pan_indirect_dispatch_init(meta);
163 
164    panfrost_pack_work_groups_compute(invocation, 1, 1, 1, 1, 1, 1, false,
165                                      false);
166 
167    pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
168       cfg.job_task_split = 2;
169    }
170 
171    pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) {
172       cfg.state = meta->rsd;
173       cfg.thread_storage = meta->tsd;
174       cfg.push_uniforms =
175          pan_pool_upload_aligned(pool, inputs, sizeof(*inputs), 16);
176    }
177 
178    return pan_jc_add_job(jc, MALI_JOB_TYPE_COMPUTE, false, true, 0, 0, &job,
179                          false);
180 }
181