1 /**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Keith Whitwell <[email protected]>
31 */
32
33 #include "util/u_memory.h"
34 #include "util/u_math.h"
35 #include "draw/draw_context.h"
36 #include "draw/draw_private.h"
37 #include "draw/draw_vbuf.h"
38 #include "draw/draw_vertex.h"
39 #include "draw/draw_vs.h"
40 #include "translate/translate.h"
41
42
43 /* A first pass at incorporating vertex fetch/emit functionality into
44 */
45 struct draw_vs_variant_generic {
46 struct draw_vs_variant base;
47
48 struct draw_vertex_shader *shader;
49 struct draw_context *draw;
50
51 /* Basic plan is to run these two translate functions before/after
52 * the vertex shader's existing run_linear() routine to simulate
53 * the inclusion of this functionality into the shader...
54 *
55 * Next will look at actually including it.
56 */
57 struct translate *fetch;
58 struct translate *emit;
59
60 unsigned temp_vertex_stride;
61 };
62
63
64
65 static void
vsvg_set_buffer(struct draw_vs_variant * variant,unsigned buffer,const void * ptr,unsigned stride,unsigned max_index)66 vsvg_set_buffer(struct draw_vs_variant *variant,
67 unsigned buffer,
68 const void *ptr,
69 unsigned stride,
70 unsigned max_index)
71 {
72 struct draw_vs_variant_generic *vsvg = (struct draw_vs_variant_generic *)variant;
73
74 vsvg->fetch->set_buffer(vsvg->fetch, buffer, ptr, stride, max_index);
75 }
76
77
78 static const struct pipe_viewport_state *
find_viewport(struct draw_context * draw,char * buffer,unsigned vertex_idx,unsigned stride)79 find_viewport(struct draw_context *draw,
80 char *buffer,
81 unsigned vertex_idx,
82 unsigned stride)
83 {
84 int viewport_index_output =
85 draw_current_shader_viewport_index_output(draw);
86 const char *ptr = buffer + vertex_idx * stride;
87 const unsigned *data = (const unsigned *) ptr;
88 int viewport_index =
89 draw_current_shader_uses_viewport_index(draw) ?
90 data[viewport_index_output * 4] : 0;
91
92 viewport_index = draw_clamp_viewport_idx(viewport_index);
93
94 return &draw->viewports[viewport_index];
95 }
96
97
98 /* Mainly for debug at this stage:
99 */
100 static void
do_rhw_viewport(struct draw_vs_variant_generic * vsvg,unsigned count,void * output_buffer)101 do_rhw_viewport(struct draw_vs_variant_generic *vsvg,
102 unsigned count,
103 void *output_buffer)
104 {
105 char *ptr = (char *)output_buffer;
106 unsigned stride = vsvg->temp_vertex_stride;
107
108 ptr += vsvg->base.vs->position_output * 4 * sizeof(float);
109
110 for (unsigned j = 0; j < count; j++, ptr += stride) {
111 const struct pipe_viewport_state *viewport =
112 find_viewport(vsvg->base.vs->draw, (char*)output_buffer,
113 j, stride);
114 const float *scale = viewport->scale;
115 const float *trans = viewport->translate;
116 float *data = (float *)ptr;
117 float w = 1.0f / data[3];
118
119 data[0] = data[0] * w * scale[0] + trans[0];
120 data[1] = data[1] * w * scale[1] + trans[1];
121 data[2] = data[2] * w * scale[2] + trans[2];
122 data[3] = w;
123 }
124 }
125
126
127 static void
do_viewport(struct draw_vs_variant_generic * vsvg,unsigned count,void * output_buffer)128 do_viewport(struct draw_vs_variant_generic *vsvg,
129 unsigned count,
130 void *output_buffer)
131 {
132 char *ptr = (char *)output_buffer;
133 unsigned stride = vsvg->temp_vertex_stride;
134
135 ptr += vsvg->base.vs->position_output * 4 * sizeof(float);
136
137 for (unsigned j = 0; j < count; j++, ptr += stride) {
138 const struct pipe_viewport_state *viewport =
139 find_viewport(vsvg->base.vs->draw, (char*)output_buffer,
140 j, stride);
141 const float *scale = viewport->scale;
142 const float *trans = viewport->translate;
143 float *data = (float *)ptr;
144
145 data[0] = data[0] * scale[0] + trans[0];
146 data[1] = data[1] * scale[1] + trans[1];
147 data[2] = data[2] * scale[2] + trans[2];
148 }
149 }
150
151
152 static void UTIL_CDECL
vsvg_run_elts(struct draw_vs_variant * variant,const unsigned * elts,unsigned count,void * output_buffer)153 vsvg_run_elts(struct draw_vs_variant *variant,
154 const unsigned *elts,
155 unsigned count,
156 void *output_buffer)
157 {
158 struct draw_vs_variant_generic *vsvg = (struct draw_vs_variant_generic *)variant;
159 unsigned temp_vertex_stride = vsvg->temp_vertex_stride;
160 void *temp_buffer = MALLOC(align(count,4) * temp_vertex_stride +
161 DRAW_EXTRA_VERTICES_PADDING);
162
163 if (0) debug_printf("%s %d \n", __func__, count);
164
165 /* Want to do this in small batches for cache locality?
166 */
167
168 vsvg->fetch->run_elts(vsvg->fetch,
169 elts,
170 count,
171 vsvg->draw->start_instance,
172 vsvg->draw->instance_id,
173 temp_buffer);
174
175 vsvg->base.vs->run_linear(vsvg->base.vs,
176 temp_buffer,
177 temp_buffer,
178 vsvg->base.vs->draw->pt.user.constants[PIPE_SHADER_VERTEX],
179 count,
180 temp_vertex_stride,
181 temp_vertex_stride, NULL);
182
183 /* FIXME: geometry shading? */
184
185 if (vsvg->base.key.clip) {
186 /* not really handling clipping, just do the rhw so we can
187 * see the results...
188 */
189 do_rhw_viewport(vsvg, count, temp_buffer);
190 } else if (vsvg->base.key.viewport) {
191 do_viewport(vsvg, count, temp_buffer);
192 }
193
194 vsvg->emit->set_buffer(vsvg->emit,
195 0,
196 temp_buffer,
197 temp_vertex_stride,
198 ~0);
199
200 vsvg->emit->set_buffer(vsvg->emit,
201 1,
202 &vsvg->draw->rasterizer->point_size,
203 0,
204 ~0);
205
206 vsvg->emit->run(vsvg->emit,
207 0, count,
208 vsvg->draw->start_instance,
209 vsvg->draw->instance_id,
210 output_buffer);
211
212 FREE(temp_buffer);
213 }
214
215
216 static void UTIL_CDECL
vsvg_run_linear(struct draw_vs_variant * variant,unsigned start,unsigned count,void * output_buffer)217 vsvg_run_linear(struct draw_vs_variant *variant,
218 unsigned start,
219 unsigned count,
220 void *output_buffer)
221 {
222 struct draw_vs_variant_generic *vsvg = (struct draw_vs_variant_generic *)variant;
223 unsigned temp_vertex_stride = vsvg->temp_vertex_stride;
224 void *temp_buffer = MALLOC(align(count,4) * temp_vertex_stride +
225 DRAW_EXTRA_VERTICES_PADDING);
226
227 if (0) debug_printf("%s %d %d (sz %d, %d)\n", __func__, start, count,
228 vsvg->base.key.output_stride,
229 temp_vertex_stride);
230
231 vsvg->fetch->run(vsvg->fetch,
232 start,
233 count,
234 vsvg->draw->start_instance,
235 vsvg->draw->instance_id,
236 temp_buffer);
237
238 vsvg->base.vs->run_linear(vsvg->base.vs,
239 temp_buffer,
240 temp_buffer,
241 vsvg->base.vs->draw->pt.user.constants[PIPE_SHADER_VERTEX],
242 count,
243 temp_vertex_stride,
244 temp_vertex_stride, NULL);
245
246 if (vsvg->base.key.clip) {
247 /* not really handling clipping, just do the rhw so we can
248 * see the results...
249 */
250 do_rhw_viewport(vsvg, count, temp_buffer);
251 } else if (vsvg->base.key.viewport) {
252 do_viewport(vsvg, count, temp_buffer);
253 }
254
255 vsvg->emit->set_buffer(vsvg->emit,
256 0,
257 temp_buffer,
258 temp_vertex_stride,
259 ~0);
260
261 vsvg->emit->set_buffer(vsvg->emit,
262 1,
263 &vsvg->draw->rasterizer->point_size,
264 0,
265 ~0);
266
267 vsvg->emit->run(vsvg->emit,
268 0, count,
269 vsvg->draw->start_instance,
270 vsvg->draw->instance_id,
271 output_buffer);
272
273 FREE(temp_buffer);
274 }
275
276
277 static void
vsvg_destroy(struct draw_vs_variant * variant)278 vsvg_destroy(struct draw_vs_variant *variant)
279 {
280 FREE(variant);
281 }
282
283
284 struct draw_vs_variant *
draw_vs_create_variant_generic(struct draw_vertex_shader * vs,const struct draw_vs_variant_key * key)285 draw_vs_create_variant_generic(struct draw_vertex_shader *vs,
286 const struct draw_vs_variant_key *key)
287 {
288 struct draw_vs_variant_generic *vsvg = CALLOC_STRUCT(draw_vs_variant_generic);
289 if (!vsvg)
290 return NULL;
291
292 vsvg->base.key = *key;
293 vsvg->base.vs = vs;
294 vsvg->base.set_buffer = vsvg_set_buffer;
295 vsvg->base.run_elts = vsvg_run_elts;
296 vsvg->base.run_linear = vsvg_run_linear;
297 vsvg->base.destroy = vsvg_destroy;
298
299 vsvg->draw = vs->draw;
300
301 vsvg->temp_vertex_stride = MAX2(key->nr_inputs,
302 draw_total_vs_outputs(vs->draw)) * 4 * sizeof(float);
303
304 /* Build free-standing fetch and emit functions:
305 */
306 struct translate_key fetch;
307 fetch.nr_elements = key->nr_inputs;
308 fetch.output_stride = vsvg->temp_vertex_stride;
309 for (unsigned i = 0; i < key->nr_inputs; i++) {
310 fetch.element[i].type = TRANSLATE_ELEMENT_NORMAL;
311 fetch.element[i].input_format = key->element[i].in.format;
312 fetch.element[i].input_buffer = key->element[i].in.buffer;
313 fetch.element[i].input_offset = key->element[i].in.offset;
314 fetch.element[i].instance_divisor = 0;
315 fetch.element[i].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
316 fetch.element[i].output_offset = i * 4 * sizeof(float);
317 assert(fetch.element[i].output_offset < fetch.output_stride);
318 }
319
320 struct translate_key emit;
321 emit.nr_elements = key->nr_outputs;
322 emit.output_stride = key->output_stride;
323 for (unsigned i = 0; i < key->nr_outputs; i++) {
324 if (key->element[i].out.format != EMIT_1F_PSIZE) {
325 emit.element[i].type = TRANSLATE_ELEMENT_NORMAL;
326 emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
327 emit.element[i].input_buffer = 0;
328 emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float);
329 emit.element[i].instance_divisor = 0;
330 emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format);
331 emit.element[i].output_offset = key->element[i].out.offset;
332 assert(emit.element[i].input_offset <= fetch.output_stride);
333 } else {
334 emit.element[i].type = TRANSLATE_ELEMENT_NORMAL;
335 emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT;
336 emit.element[i].input_buffer = 1;
337 emit.element[i].input_offset = 0;
338 emit.element[i].instance_divisor = 0;
339 emit.element[i].output_format = PIPE_FORMAT_R32_FLOAT;
340 emit.element[i].output_offset = key->element[i].out.offset;
341 }
342 }
343
344 vsvg->fetch = draw_vs_get_fetch(vs->draw, &fetch);
345 vsvg->emit = draw_vs_get_emit(vs->draw, &emit);
346
347 return &vsvg->base;
348 }
349