xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/softpipe/sp_compute.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2016 Red Hat.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 #include "util/u_inlines.h"
24 #include "util/u_math.h"
25 #include "util/u_memory.h"
26 #include "pipe/p_shader_tokens.h"
27 #include "draw/draw_context.h"
28 #include "draw/draw_vertex.h"
29 #include "sp_context.h"
30 #include "sp_screen.h"
31 #include "sp_state.h"
32 #include "sp_texture.h"
33 #include "sp_tex_sample.h"
34 #include "sp_tex_tile_cache.h"
35 
36 static void
cs_prepare(const struct sp_compute_shader * cs,struct tgsi_exec_machine * machine,int local_x,int local_y,int local_z,int g_w,int g_h,int g_d,int b_w,int b_h,int b_d,struct tgsi_sampler * sampler,struct tgsi_image * image,struct tgsi_buffer * buffer)37 cs_prepare(const struct sp_compute_shader *cs,
38            struct tgsi_exec_machine *machine,
39            int local_x, int local_y, int local_z,
40            int g_w, int g_h, int g_d,
41            int b_w, int b_h, int b_d,
42            struct tgsi_sampler *sampler,
43            struct tgsi_image *image,
44            struct tgsi_buffer *buffer )
45 {
46    int j;
47    /*
48     * Bind tokens/shader to the interpreter's machine state.
49     */
50    tgsi_exec_machine_bind_shader(machine,
51                                  cs->tokens,
52                                  sampler, image, buffer);
53 
54    if (machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID] != -1) {
55       unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID];
56       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
57          machine->SystemValue[i].xyzw[0].i[j] = local_x + j;
58          machine->SystemValue[i].xyzw[1].i[j] = local_y;
59          machine->SystemValue[i].xyzw[2].i[j] = local_z;
60       }
61    }
62 
63    if (machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE] != -1) {
64       unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE];
65       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
66          machine->SystemValue[i].xyzw[0].i[j] = g_w;
67          machine->SystemValue[i].xyzw[1].i[j] = g_h;
68          machine->SystemValue[i].xyzw[2].i[j] = g_d;
69       }
70    }
71 
72    if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE] != -1) {
73       unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE];
74       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
75          machine->SystemValue[i].xyzw[0].i[j] = b_w;
76          machine->SystemValue[i].xyzw[1].i[j] = b_h;
77          machine->SystemValue[i].xyzw[2].i[j] = b_d;
78       }
79    }
80 }
81 
82 static bool
cs_run(const struct sp_compute_shader * cs,int g_w,int g_h,int g_d,struct tgsi_exec_machine * machine,bool restart)83 cs_run(const struct sp_compute_shader *cs,
84        int g_w, int g_h, int g_d,
85        struct tgsi_exec_machine *machine, bool restart)
86 {
87    if (!restart) {
88       if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID] != -1) {
89          unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID];
90          int j;
91          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
92             machine->SystemValue[i].xyzw[0].i[j] = g_w;
93             machine->SystemValue[i].xyzw[1].i[j] = g_h;
94             machine->SystemValue[i].xyzw[2].i[j] = g_d;
95          }
96       }
97    }
98 
99    tgsi_exec_machine_run(machine, restart ? machine->pc : 0);
100 
101    if (machine->pc != -1)
102       return true;
103    return false;
104 }
105 
106 static void
run_workgroup(const struct sp_compute_shader * cs,int g_w,int g_h,int g_d,int num_threads,struct tgsi_exec_machine ** machines)107 run_workgroup(const struct sp_compute_shader *cs,
108               int g_w, int g_h, int g_d, int num_threads,
109               struct tgsi_exec_machine **machines)
110 {
111    int i;
112    bool grp_hit_barrier, restart_threads = false;
113 
114    do {
115       grp_hit_barrier = false;
116       for (i = 0; i < num_threads; i++) {
117          grp_hit_barrier |= cs_run(cs, g_w, g_h, g_d, machines[i], restart_threads);
118       }
119       restart_threads = false;
120       if (grp_hit_barrier) {
121          grp_hit_barrier = false;
122          restart_threads = true;
123       }
124    } while (restart_threads);
125 }
126 
127 static void
cs_delete(const struct sp_compute_shader * cs,struct tgsi_exec_machine * machine)128 cs_delete(const struct sp_compute_shader *cs,
129           struct tgsi_exec_machine *machine)
130 {
131    if (machine->Tokens == cs->tokens) {
132       tgsi_exec_machine_bind_shader(machine, NULL, NULL, NULL, NULL);
133    }
134 }
135 
136 static void
fill_grid_size(struct pipe_context * context,const struct pipe_grid_info * info,uint32_t grid_size[3])137 fill_grid_size(struct pipe_context *context,
138                const struct pipe_grid_info *info,
139                uint32_t grid_size[3])
140 {
141    struct pipe_transfer *transfer;
142    uint32_t *params;
143    if (!info->indirect) {
144       grid_size[0] = info->grid[0];
145       grid_size[1] = info->grid[1];
146       grid_size[2] = info->grid[2];
147       return;
148    }
149    params = pipe_buffer_map_range(context, info->indirect,
150                                   info->indirect_offset,
151                                   3 * sizeof(uint32_t),
152                                   PIPE_MAP_READ,
153                                   &transfer);
154 
155    if (!transfer)
156       return;
157 
158    grid_size[0] = params[0];
159    grid_size[1] = params[1];
160    grid_size[2] = params[2];
161    pipe_buffer_unmap(context, transfer);
162 }
163 
164 void
softpipe_launch_grid(struct pipe_context * context,const struct pipe_grid_info * info)165 softpipe_launch_grid(struct pipe_context *context,
166                      const struct pipe_grid_info *info)
167 {
168    struct softpipe_context *softpipe = softpipe_context(context);
169    struct sp_compute_shader *cs = softpipe->cs;
170    int num_threads_in_group;
171    struct tgsi_exec_machine **machines;
172    int bwidth, bheight, bdepth;
173    int local_x, local_y, local_z, i;
174    int g_w, g_h, g_d;
175    uint32_t grid_size[3] = {0};
176    void *local_mem = NULL;
177 
178    softpipe_update_compute_samplers(softpipe);
179    bwidth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH];
180    bheight = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT];
181    bdepth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
182    num_threads_in_group = DIV_ROUND_UP(bwidth, TGSI_QUAD_SIZE) * bheight * bdepth;
183 
184    fill_grid_size(context, info, grid_size);
185 
186    uint32_t shared_mem_size = cs->shader.static_shared_mem + info->variable_shared_mem;
187    if (shared_mem_size) {
188       local_mem = CALLOC(1, shared_mem_size);
189    }
190 
191    machines = CALLOC(sizeof(struct tgsi_exec_machine *), num_threads_in_group);
192    if (!machines) {
193       FREE(local_mem);
194       return;
195    }
196 
197    /* initialise machines + GRID_SIZE + THREAD_ID  + BLOCK_SIZE */
198    int idx = 0;
199    for (local_z = 0; local_z < bdepth; local_z++) {
200       for (local_y = 0; local_y < bheight; local_y++) {
201          for (local_x = 0; local_x < bwidth; local_x += TGSI_QUAD_SIZE) {
202             machines[idx] = tgsi_exec_machine_create(PIPE_SHADER_COMPUTE);
203 
204             machines[idx]->LocalMem = local_mem;
205             machines[idx]->LocalMemSize = shared_mem_size;
206             machines[idx]->NonHelperMask = (1 << (MIN2(TGSI_QUAD_SIZE, bwidth - local_x))) - 1;
207             cs_prepare(cs, machines[idx],
208                        local_x, local_y, local_z,
209                        grid_size[0], grid_size[1], grid_size[2],
210                        bwidth, bheight, bdepth,
211                        (struct tgsi_sampler *)softpipe->tgsi.sampler[PIPE_SHADER_COMPUTE],
212                        (struct tgsi_image *)softpipe->tgsi.image[PIPE_SHADER_COMPUTE],
213                        (struct tgsi_buffer *)softpipe->tgsi.buffer[PIPE_SHADER_COMPUTE]);
214             tgsi_exec_set_constant_buffers(machines[idx], PIPE_MAX_CONSTANT_BUFFERS,
215                                            softpipe->mapped_constants[PIPE_SHADER_COMPUTE]);
216             idx++;
217          }
218       }
219    }
220 
221    for (g_d = 0; g_d < grid_size[2]; g_d++) {
222       for (g_h = 0; g_h < grid_size[1]; g_h++) {
223          for (g_w = 0; g_w < grid_size[0]; g_w++) {
224             run_workgroup(cs, g_w, g_h, g_d, num_threads_in_group, machines);
225          }
226       }
227    }
228 
229    if (softpipe->active_statistics_queries) {
230       softpipe->pipeline_statistics.cs_invocations +=
231           grid_size[0] * grid_size[1] * grid_size[2];
232    }
233 
234    for (i = 0; i < num_threads_in_group; i++) {
235       cs_delete(cs, machines[i]);
236       tgsi_exec_machine_destroy(machines[i]);
237    }
238 
239    FREE(local_mem);
240    FREE(machines);
241 }
242