xref: /aosp_15_r20/external/mesa3d/src/gallium/auxiliary/draw/draw_vs_variant.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /**************************************************************************
2  *
3  * Copyright 2007 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28  /*
29   * Authors:
30   *   Keith Whitwell <[email protected]>
31   */
32 
33 #include "util/u_memory.h"
34 #include "util/u_math.h"
35 #include "draw/draw_context.h"
36 #include "draw/draw_private.h"
37 #include "draw/draw_vbuf.h"
38 #include "draw/draw_vertex.h"
39 #include "draw/draw_vs.h"
40 #include "translate/translate.h"
41 
42 
43 /* A first pass at incorporating vertex fetch/emit functionality into
44  */
45 struct draw_vs_variant_generic {
46    struct draw_vs_variant base;
47 
48    struct draw_vertex_shader *shader;
49    struct draw_context *draw;
50 
51    /* Basic plan is to run these two translate functions before/after
52     * the vertex shader's existing run_linear() routine to simulate
53     * the inclusion of this functionality into the shader...
54     *
55     * Next will look at actually including it.
56     */
57    struct translate *fetch;
58    struct translate *emit;
59 
60    unsigned temp_vertex_stride;
61 };
62 
63 
64 
65 static void
vsvg_set_buffer(struct draw_vs_variant * variant,unsigned buffer,const void * ptr,unsigned stride,unsigned max_index)66 vsvg_set_buffer(struct draw_vs_variant *variant,
67                 unsigned buffer,
68                 const void *ptr,
69                 unsigned stride,
70                 unsigned max_index)
71 {
72    struct draw_vs_variant_generic *vsvg = (struct draw_vs_variant_generic *)variant;
73 
74    vsvg->fetch->set_buffer(vsvg->fetch, buffer, ptr, stride, max_index);
75 }
76 
77 
78 static const struct pipe_viewport_state *
find_viewport(struct draw_context * draw,char * buffer,unsigned vertex_idx,unsigned stride)79 find_viewport(struct draw_context *draw,
80               char *buffer,
81               unsigned vertex_idx,
82               unsigned stride)
83 {
84    int viewport_index_output =
85       draw_current_shader_viewport_index_output(draw);
86    const char *ptr = buffer + vertex_idx * stride;
87    const unsigned *data = (const unsigned *) ptr;
88    int viewport_index =
89       draw_current_shader_uses_viewport_index(draw) ?
90       data[viewport_index_output * 4] : 0;
91 
92    viewport_index = draw_clamp_viewport_idx(viewport_index);
93 
94    return &draw->viewports[viewport_index];
95 }
96 
97 
98 /* Mainly for debug at this stage:
99  */
100 static void
do_rhw_viewport(struct draw_vs_variant_generic * vsvg,unsigned count,void * output_buffer)101 do_rhw_viewport(struct draw_vs_variant_generic *vsvg,
102                 unsigned count,
103                 void *output_buffer)
104 {
105    char *ptr = (char *)output_buffer;
106    unsigned stride = vsvg->temp_vertex_stride;
107 
108    ptr += vsvg->base.vs->position_output * 4 * sizeof(float);
109 
110    for (unsigned j = 0; j < count; j++, ptr += stride) {
111       const struct pipe_viewport_state *viewport =
112          find_viewport(vsvg->base.vs->draw, (char*)output_buffer,
113                        j, stride);
114       const float *scale = viewport->scale;
115       const float *trans = viewport->translate;
116       float *data = (float *)ptr;
117       float w = 1.0f / data[3];
118 
119       data[0] = data[0] * w * scale[0] + trans[0];
120       data[1] = data[1] * w * scale[1] + trans[1];
121       data[2] = data[2] * w * scale[2] + trans[2];
122       data[3] = w;
123    }
124 }
125 
126 
127 static void
do_viewport(struct draw_vs_variant_generic * vsvg,unsigned count,void * output_buffer)128 do_viewport(struct draw_vs_variant_generic *vsvg,
129             unsigned count,
130             void *output_buffer)
131 {
132    char *ptr = (char *)output_buffer;
133    unsigned stride = vsvg->temp_vertex_stride;
134 
135    ptr += vsvg->base.vs->position_output * 4 * sizeof(float);
136 
137    for (unsigned j = 0; j < count; j++, ptr += stride) {
138       const struct pipe_viewport_state *viewport =
139          find_viewport(vsvg->base.vs->draw, (char*)output_buffer,
140                        j, stride);
141       const float *scale = viewport->scale;
142       const float *trans = viewport->translate;
143       float *data = (float *)ptr;
144 
145       data[0] = data[0] * scale[0] + trans[0];
146       data[1] = data[1] * scale[1] + trans[1];
147       data[2] = data[2] * scale[2] + trans[2];
148    }
149 }
150 
151 
152 static void UTIL_CDECL
vsvg_run_elts(struct draw_vs_variant * variant,const unsigned * elts,unsigned count,void * output_buffer)153 vsvg_run_elts(struct draw_vs_variant *variant,
154               const unsigned *elts,
155               unsigned count,
156               void *output_buffer)
157 {
158    struct draw_vs_variant_generic *vsvg = (struct draw_vs_variant_generic *)variant;
159    unsigned temp_vertex_stride = vsvg->temp_vertex_stride;
160    void *temp_buffer = MALLOC(align(count,4) * temp_vertex_stride +
161                               DRAW_EXTRA_VERTICES_PADDING);
162 
163    if (0) debug_printf("%s %d \n", __func__,  count);
164 
165    /* Want to do this in small batches for cache locality?
166     */
167 
168    vsvg->fetch->run_elts(vsvg->fetch,
169                          elts,
170                          count,
171                          vsvg->draw->start_instance,
172                          vsvg->draw->instance_id,
173                          temp_buffer);
174 
175    vsvg->base.vs->run_linear(vsvg->base.vs,
176                              temp_buffer,
177                              temp_buffer,
178                              vsvg->base.vs->draw->pt.user.constants[PIPE_SHADER_VERTEX],
179                              count,
180                              temp_vertex_stride,
181                              temp_vertex_stride, NULL);
182 
183    /* FIXME: geometry shading? */
184 
185    if (vsvg->base.key.clip) {
186       /* not really handling clipping, just do the rhw so we can
187        * see the results...
188        */
189       do_rhw_viewport(vsvg, count, temp_buffer);
190    } else if (vsvg->base.key.viewport) {
191       do_viewport(vsvg, count, temp_buffer);
192    }
193 
194    vsvg->emit->set_buffer(vsvg->emit,
195                           0,
196                           temp_buffer,
197                           temp_vertex_stride,
198                           ~0);
199 
200    vsvg->emit->set_buffer(vsvg->emit,
201                           1,
202                           &vsvg->draw->rasterizer->point_size,
203                           0,
204                           ~0);
205 
206    vsvg->emit->run(vsvg->emit,
207                    0, count,
208                    vsvg->draw->start_instance,
209                    vsvg->draw->instance_id,
210                    output_buffer);
211 
212    FREE(temp_buffer);
213 }
214 
215 
216 static void UTIL_CDECL
vsvg_run_linear(struct draw_vs_variant * variant,unsigned start,unsigned count,void * output_buffer)217 vsvg_run_linear(struct draw_vs_variant *variant,
218                 unsigned start,
219                 unsigned count,
220                 void *output_buffer)
221 {
222    struct draw_vs_variant_generic *vsvg = (struct draw_vs_variant_generic *)variant;
223    unsigned temp_vertex_stride = vsvg->temp_vertex_stride;
224    void *temp_buffer = MALLOC(align(count,4) * temp_vertex_stride +
225                               DRAW_EXTRA_VERTICES_PADDING);
226 
227    if (0) debug_printf("%s %d %d (sz %d, %d)\n", __func__, start, count,
228                        vsvg->base.key.output_stride,
229                        temp_vertex_stride);
230 
231    vsvg->fetch->run(vsvg->fetch,
232                     start,
233                     count,
234                     vsvg->draw->start_instance,
235                     vsvg->draw->instance_id,
236                     temp_buffer);
237 
238    vsvg->base.vs->run_linear(vsvg->base.vs,
239                              temp_buffer,
240                              temp_buffer,
241                              vsvg->base.vs->draw->pt.user.constants[PIPE_SHADER_VERTEX],
242                              count,
243                              temp_vertex_stride,
244                              temp_vertex_stride, NULL);
245 
246    if (vsvg->base.key.clip) {
247       /* not really handling clipping, just do the rhw so we can
248        * see the results...
249        */
250       do_rhw_viewport(vsvg, count, temp_buffer);
251    } else if (vsvg->base.key.viewport) {
252       do_viewport(vsvg, count, temp_buffer);
253    }
254 
255    vsvg->emit->set_buffer(vsvg->emit,
256                           0,
257                           temp_buffer,
258                           temp_vertex_stride,
259                           ~0);
260 
261    vsvg->emit->set_buffer(vsvg->emit,
262                           1,
263                           &vsvg->draw->rasterizer->point_size,
264                           0,
265                           ~0);
266 
267    vsvg->emit->run(vsvg->emit,
268                    0, count,
269                    vsvg->draw->start_instance,
270                    vsvg->draw->instance_id,
271                    output_buffer);
272 
273    FREE(temp_buffer);
274 }
275 
276 
277 static void
vsvg_destroy(struct draw_vs_variant * variant)278 vsvg_destroy(struct draw_vs_variant *variant)
279 {
280    FREE(variant);
281 }
282 
283 
284 struct draw_vs_variant *
draw_vs_create_variant_generic(struct draw_vertex_shader * vs,const struct draw_vs_variant_key * key)285 draw_vs_create_variant_generic(struct draw_vertex_shader *vs,
286                                const struct draw_vs_variant_key *key)
287 {
288    struct draw_vs_variant_generic *vsvg = CALLOC_STRUCT(draw_vs_variant_generic);
289    if (!vsvg)
290       return NULL;
291 
292    vsvg->base.key = *key;
293    vsvg->base.vs = vs;
294    vsvg->base.set_buffer    = vsvg_set_buffer;
295    vsvg->base.run_elts      = vsvg_run_elts;
296    vsvg->base.run_linear    = vsvg_run_linear;
297    vsvg->base.destroy       = vsvg_destroy;
298 
299    vsvg->draw = vs->draw;
300 
301    vsvg->temp_vertex_stride = MAX2(key->nr_inputs,
302                                    draw_total_vs_outputs(vs->draw)) * 4 * sizeof(float);
303 
304    /* Build free-standing fetch and emit functions:
305     */
306    struct translate_key fetch;
307    fetch.nr_elements = key->nr_inputs;
308    fetch.output_stride = vsvg->temp_vertex_stride;
309    for (unsigned i = 0; i < key->nr_inputs; i++) {
310       fetch.element[i].type = TRANSLATE_ELEMENT_NORMAL;
311       fetch.element[i].input_format = key->element[i].in.format;
312       fetch.element[i].input_buffer = key->element[i].in.buffer;
313       fetch.element[i].input_offset = key->element[i].in.offset;
314       fetch.element[i].instance_divisor = 0;
315       fetch.element[i].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
316       fetch.element[i].output_offset = i * 4 * sizeof(float);
317       assert(fetch.element[i].output_offset < fetch.output_stride);
318    }
319 
320    struct translate_key emit;
321    emit.nr_elements = key->nr_outputs;
322    emit.output_stride = key->output_stride;
323    for (unsigned i = 0; i < key->nr_outputs; i++) {
324       if (key->element[i].out.format != EMIT_1F_PSIZE) {
325          emit.element[i].type = TRANSLATE_ELEMENT_NORMAL;
326          emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
327          emit.element[i].input_buffer = 0;
328          emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float);
329          emit.element[i].instance_divisor = 0;
330          emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format);
331          emit.element[i].output_offset = key->element[i].out.offset;
332          assert(emit.element[i].input_offset <= fetch.output_stride);
333       } else {
334          emit.element[i].type = TRANSLATE_ELEMENT_NORMAL;
335          emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT;
336          emit.element[i].input_buffer = 1;
337          emit.element[i].input_offset = 0;
338          emit.element[i].instance_divisor = 0;
339          emit.element[i].output_format = PIPE_FORMAT_R32_FLOAT;
340          emit.element[i].output_offset = key->element[i].out.offset;
341       }
342    }
343 
344    vsvg->fetch = draw_vs_get_fetch(vs->draw, &fetch);
345    vsvg->emit = draw_vs_get_emit(vs->draw, &emit);
346 
347    return &vsvg->base;
348 }
349