1 /**************************************************************************
2 *
3 * Copyright 2012-2021 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 *
26 **************************************************************************/
27
28 /*
29 * ShaderTGSI.c --
30 * Functions for translating shaders.
31 */
32
33 #include "Debug.h"
34 #include "ShaderParse.h"
35
36 #include "pipe/p_state.h"
37 #include "tgsi/tgsi_ureg.h"
38 #include "tgsi/tgsi_dump.h"
39 #include "util/u_memory.h"
40
41 #include "ShaderDump.h"
42
43
44 enum dx10_opcode_format {
45 OF_FLOAT,
46 OF_INT,
47 OF_UINT
48 };
49
50 struct dx10_opcode_xlate {
51 D3D10_SB_OPCODE_TYPE type;
52 enum dx10_opcode_format format;
53 uint tgsi_opcode;
54 };
55
56 /* Opcodes that we have not even attempted to implement:
57 */
58 #define TGSI_LOG_UNSUPPORTED TGSI_OPCODE_LAST
59
60 /* Opcodes which do not translate directly to a TGSI opcode, but which
61 * have at least a partial implemention coded below:
62 */
63 #define TGSI_EXPAND (TGSI_OPCODE_LAST+1)
64
65 static struct dx10_opcode_xlate opcode_xlate[D3D10_SB_NUM_OPCODES] = {
66 {D3D10_SB_OPCODE_ADD, OF_FLOAT, TGSI_OPCODE_ADD},
67 {D3D10_SB_OPCODE_AND, OF_UINT, TGSI_OPCODE_AND},
68 {D3D10_SB_OPCODE_BREAK, OF_FLOAT, TGSI_OPCODE_BRK},
69 {D3D10_SB_OPCODE_BREAKC, OF_UINT, TGSI_EXPAND},
70 {D3D10_SB_OPCODE_CALL, OF_UINT, TGSI_EXPAND},
71 {D3D10_SB_OPCODE_CALLC, OF_UINT, TGSI_EXPAND},
72 {D3D10_SB_OPCODE_CASE, OF_UINT, TGSI_OPCODE_CASE},
73 {D3D10_SB_OPCODE_CONTINUE, OF_FLOAT, TGSI_OPCODE_CONT},
74 {D3D10_SB_OPCODE_CONTINUEC, OF_UINT, TGSI_EXPAND},
75 {D3D10_SB_OPCODE_CUT, OF_FLOAT, TGSI_EXPAND},
76 {D3D10_SB_OPCODE_DEFAULT, OF_FLOAT, TGSI_OPCODE_DEFAULT},
77 {D3D10_SB_OPCODE_DERIV_RTX, OF_FLOAT, TGSI_OPCODE_DDX},
78 {D3D10_SB_OPCODE_DERIV_RTY, OF_FLOAT, TGSI_OPCODE_DDY},
79 {D3D10_SB_OPCODE_DISCARD, OF_UINT, TGSI_EXPAND},
80 {D3D10_SB_OPCODE_DIV, OF_FLOAT, TGSI_OPCODE_DIV},
81 {D3D10_SB_OPCODE_DP2, OF_FLOAT, TGSI_OPCODE_DP2},
82 {D3D10_SB_OPCODE_DP3, OF_FLOAT, TGSI_OPCODE_DP3},
83 {D3D10_SB_OPCODE_DP4, OF_FLOAT, TGSI_OPCODE_DP4},
84 {D3D10_SB_OPCODE_ELSE, OF_FLOAT, TGSI_OPCODE_ELSE},
85 {D3D10_SB_OPCODE_EMIT, OF_FLOAT, TGSI_EXPAND},
86 {D3D10_SB_OPCODE_EMITTHENCUT, OF_FLOAT, TGSI_EXPAND},
87 {D3D10_SB_OPCODE_ENDIF, OF_FLOAT, TGSI_OPCODE_ENDIF},
88 {D3D10_SB_OPCODE_ENDLOOP, OF_FLOAT, TGSI_OPCODE_ENDLOOP},
89 {D3D10_SB_OPCODE_ENDSWITCH, OF_FLOAT, TGSI_OPCODE_ENDSWITCH},
90 {D3D10_SB_OPCODE_EQ, OF_FLOAT, TGSI_OPCODE_FSEQ},
91 {D3D10_SB_OPCODE_EXP, OF_FLOAT, TGSI_EXPAND},
92 {D3D10_SB_OPCODE_FRC, OF_FLOAT, TGSI_OPCODE_FRC},
93 {D3D10_SB_OPCODE_FTOI, OF_FLOAT, TGSI_EXPAND},
94 {D3D10_SB_OPCODE_FTOU, OF_FLOAT, TGSI_EXPAND},
95 {D3D10_SB_OPCODE_GE, OF_FLOAT, TGSI_OPCODE_FSGE},
96 {D3D10_SB_OPCODE_IADD, OF_INT, TGSI_OPCODE_UADD},
97 {D3D10_SB_OPCODE_IF, OF_UINT, TGSI_EXPAND},
98 {D3D10_SB_OPCODE_IEQ, OF_INT, TGSI_OPCODE_USEQ},
99 {D3D10_SB_OPCODE_IGE, OF_INT, TGSI_OPCODE_ISGE},
100 {D3D10_SB_OPCODE_ILT, OF_INT, TGSI_OPCODE_ISLT},
101 {D3D10_SB_OPCODE_IMAD, OF_INT, TGSI_OPCODE_UMAD},
102 {D3D10_SB_OPCODE_IMAX, OF_INT, TGSI_OPCODE_IMAX},
103 {D3D10_SB_OPCODE_IMIN, OF_INT, TGSI_OPCODE_IMIN},
104 {D3D10_SB_OPCODE_IMUL, OF_INT, TGSI_EXPAND},
105 {D3D10_SB_OPCODE_INE, OF_INT, TGSI_OPCODE_USNE},
106 {D3D10_SB_OPCODE_INEG, OF_INT, TGSI_OPCODE_INEG},
107 {D3D10_SB_OPCODE_ISHL, OF_INT, TGSI_OPCODE_SHL},
108 {D3D10_SB_OPCODE_ISHR, OF_INT, TGSI_OPCODE_ISHR},
109 {D3D10_SB_OPCODE_ITOF, OF_INT, TGSI_OPCODE_I2F},
110 {D3D10_SB_OPCODE_LABEL, OF_INT, TGSI_EXPAND},
111 {D3D10_SB_OPCODE_LD, OF_UINT, TGSI_EXPAND},
112 {D3D10_SB_OPCODE_LD_MS, OF_UINT, TGSI_EXPAND},
113 {D3D10_SB_OPCODE_LOG, OF_FLOAT, TGSI_EXPAND},
114 {D3D10_SB_OPCODE_LOOP, OF_FLOAT, TGSI_OPCODE_BGNLOOP},
115 {D3D10_SB_OPCODE_LT, OF_FLOAT, TGSI_OPCODE_FSLT},
116 {D3D10_SB_OPCODE_MAD, OF_FLOAT, TGSI_OPCODE_MAD},
117 {D3D10_SB_OPCODE_MIN, OF_FLOAT, TGSI_OPCODE_MIN},
118 {D3D10_SB_OPCODE_MAX, OF_FLOAT, TGSI_OPCODE_MAX},
119 {D3D10_SB_OPCODE_CUSTOMDATA, OF_FLOAT, TGSI_EXPAND},
120 {D3D10_SB_OPCODE_MOV, OF_UINT, TGSI_OPCODE_MOV},
121 {D3D10_SB_OPCODE_MOVC, OF_UINT, TGSI_OPCODE_UCMP},
122 {D3D10_SB_OPCODE_MUL, OF_FLOAT, TGSI_OPCODE_MUL},
123 {D3D10_SB_OPCODE_NE, OF_FLOAT, TGSI_OPCODE_FSNE},
124 {D3D10_SB_OPCODE_NOP, OF_FLOAT, TGSI_OPCODE_NOP},
125 {D3D10_SB_OPCODE_NOT, OF_UINT, TGSI_OPCODE_NOT},
126 {D3D10_SB_OPCODE_OR, OF_UINT, TGSI_OPCODE_OR},
127 {D3D10_SB_OPCODE_RESINFO, OF_UINT, TGSI_EXPAND},
128 {D3D10_SB_OPCODE_RET, OF_FLOAT, TGSI_OPCODE_RET},
129 {D3D10_SB_OPCODE_RETC, OF_UINT, TGSI_EXPAND},
130 {D3D10_SB_OPCODE_ROUND_NE, OF_FLOAT, TGSI_OPCODE_ROUND},
131 {D3D10_SB_OPCODE_ROUND_NI, OF_FLOAT, TGSI_OPCODE_FLR},
132 {D3D10_SB_OPCODE_ROUND_PI, OF_FLOAT, TGSI_OPCODE_CEIL},
133 {D3D10_SB_OPCODE_ROUND_Z, OF_FLOAT, TGSI_OPCODE_TRUNC},
134 {D3D10_SB_OPCODE_RSQ, OF_FLOAT, TGSI_EXPAND},
135 {D3D10_SB_OPCODE_SAMPLE, OF_FLOAT, TGSI_EXPAND},
136 {D3D10_SB_OPCODE_SAMPLE_C, OF_FLOAT, TGSI_EXPAND},
137 {D3D10_SB_OPCODE_SAMPLE_C_LZ, OF_FLOAT, TGSI_EXPAND},
138 {D3D10_SB_OPCODE_SAMPLE_L, OF_FLOAT, TGSI_EXPAND},
139 {D3D10_SB_OPCODE_SAMPLE_D, OF_FLOAT, TGSI_EXPAND},
140 {D3D10_SB_OPCODE_SAMPLE_B, OF_FLOAT, TGSI_EXPAND},
141 {D3D10_SB_OPCODE_SQRT, OF_FLOAT, TGSI_EXPAND},
142 {D3D10_SB_OPCODE_SWITCH, OF_UINT, TGSI_OPCODE_SWITCH},
143 {D3D10_SB_OPCODE_SINCOS, OF_FLOAT, TGSI_EXPAND},
144 {D3D10_SB_OPCODE_UDIV, OF_UINT, TGSI_EXPAND},
145 {D3D10_SB_OPCODE_ULT, OF_UINT, TGSI_OPCODE_USLT},
146 {D3D10_SB_OPCODE_UGE, OF_UINT, TGSI_OPCODE_USGE},
147 {D3D10_SB_OPCODE_UMUL, OF_UINT, TGSI_EXPAND},
148 {D3D10_SB_OPCODE_UMAD, OF_UINT, TGSI_OPCODE_UMAD},
149 {D3D10_SB_OPCODE_UMAX, OF_UINT, TGSI_OPCODE_UMAX},
150 {D3D10_SB_OPCODE_UMIN, OF_UINT, TGSI_OPCODE_UMIN},
151 {D3D10_SB_OPCODE_USHR, OF_UINT, TGSI_OPCODE_USHR},
152 {D3D10_SB_OPCODE_UTOF, OF_UINT, TGSI_OPCODE_U2F},
153 {D3D10_SB_OPCODE_XOR, OF_UINT, TGSI_OPCODE_XOR},
154 {D3D10_SB_OPCODE_DCL_RESOURCE, OF_FLOAT, TGSI_EXPAND},
155 {D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER, OF_FLOAT, TGSI_EXPAND},
156 {D3D10_SB_OPCODE_DCL_SAMPLER, OF_FLOAT, TGSI_EXPAND},
157 {D3D10_SB_OPCODE_DCL_INDEX_RANGE, OF_FLOAT, TGSI_LOG_UNSUPPORTED},
158 {D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY, OF_FLOAT, TGSI_EXPAND},
159 {D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE, OF_FLOAT, TGSI_EXPAND},
160 {D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT, OF_FLOAT, TGSI_EXPAND},
161 {D3D10_SB_OPCODE_DCL_INPUT, OF_FLOAT, TGSI_EXPAND},
162 {D3D10_SB_OPCODE_DCL_INPUT_SGV, OF_FLOAT, TGSI_EXPAND},
163 {D3D10_SB_OPCODE_DCL_INPUT_SIV, OF_FLOAT, TGSI_EXPAND},
164 {D3D10_SB_OPCODE_DCL_INPUT_PS, OF_FLOAT, TGSI_EXPAND},
165 {D3D10_SB_OPCODE_DCL_INPUT_PS_SGV, OF_FLOAT, TGSI_EXPAND},
166 {D3D10_SB_OPCODE_DCL_INPUT_PS_SIV, OF_FLOAT, TGSI_EXPAND},
167 {D3D10_SB_OPCODE_DCL_OUTPUT, OF_FLOAT, TGSI_EXPAND},
168 {D3D10_SB_OPCODE_DCL_OUTPUT_SGV, OF_FLOAT, TGSI_EXPAND},
169 {D3D10_SB_OPCODE_DCL_OUTPUT_SIV, OF_FLOAT, TGSI_EXPAND},
170 {D3D10_SB_OPCODE_DCL_TEMPS, OF_FLOAT, TGSI_EXPAND},
171 {D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP, OF_FLOAT, TGSI_EXPAND},
172 {D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS, OF_FLOAT, TGSI_LOG_UNSUPPORTED},
173 {D3D10_SB_OPCODE_RESERVED0, OF_FLOAT, TGSI_LOG_UNSUPPORTED},
174 {D3D10_1_SB_OPCODE_LOD, OF_FLOAT, TGSI_LOG_UNSUPPORTED},
175 {D3D10_1_SB_OPCODE_GATHER4, OF_FLOAT, TGSI_LOG_UNSUPPORTED},
176 {D3D10_1_SB_OPCODE_SAMPLE_POS, OF_FLOAT, TGSI_LOG_UNSUPPORTED},
177 {D3D10_1_SB_OPCODE_SAMPLE_INFO, OF_FLOAT, TGSI_LOG_UNSUPPORTED}
178 };
179
180 #define SHADER_MAX_TEMPS 4096
181 #define SHADER_MAX_INPUTS 32
182 #define SHADER_MAX_OUTPUTS 32
183 #define SHADER_MAX_CONSTS 4096
184 #define SHADER_MAX_RESOURCES PIPE_MAX_SHADER_SAMPLER_VIEWS
185 #define SHADER_MAX_SAMPLERS PIPE_MAX_SAMPLERS
186 #define SHADER_MAX_INDEXABLE_TEMPS 4096
187
188 struct Shader_call {
189 unsigned d3d_label;
190 unsigned tgsi_label_token;
191 };
192
193 struct Shader_label {
194 unsigned d3d_label;
195 unsigned tgsi_insn_no;
196 };
197
198 struct Shader_resource {
199 uint target; /* TGSI_TEXTURE_x */
200 };
201
202 struct Shader_xlate {
203 struct ureg_program *ureg;
204
205 uint vertices_in;
206 uint declared_temps;
207
208 struct ureg_dst temps[SHADER_MAX_TEMPS];
209 struct ureg_dst output_depth;
210 struct Shader_resource resources[SHADER_MAX_RESOURCES];
211 struct ureg_src sv[SHADER_MAX_RESOURCES];
212 struct ureg_src samplers[SHADER_MAX_SAMPLERS];
213 struct ureg_src imms;
214 struct ureg_src prim_id;
215
216 uint temp_offset;
217 uint indexable_temp_offsets[SHADER_MAX_INDEXABLE_TEMPS];
218
219 struct {
220 bool declared;
221 uint writemask;
222 uint siv_name;
223 bool overloaded;
224 struct ureg_src reg;
225 } inputs[SHADER_MAX_INPUTS];
226
227 struct {
228 struct ureg_dst reg[4];
229 } outputs[SHADER_MAX_OUTPUTS];
230
231 struct {
232 uint d3d;
233 uint tgsi;
234 } clip_distance_mapping[2], cull_distance_mapping[2];
235 uint num_clip_distances_declared;
236 uint num_cull_distances_declared;
237
238 struct Shader_call *calls;
239 uint num_calls;
240 uint max_calls;
241 struct Shader_label *labels;
242 uint num_labels;
243 uint max_labels;
244 };
245
246 static uint
translate_interpolation(D3D10_SB_INTERPOLATION_MODE interpolation)247 translate_interpolation(D3D10_SB_INTERPOLATION_MODE interpolation)
248 {
249 switch (interpolation) {
250 case D3D10_SB_INTERPOLATION_UNDEFINED:
251 assert(0);
252 return TGSI_INTERPOLATE_LINEAR;
253
254 case D3D10_SB_INTERPOLATION_CONSTANT:
255 return TGSI_INTERPOLATE_CONSTANT;
256 case D3D10_SB_INTERPOLATION_LINEAR:
257 return TGSI_INTERPOLATE_PERSPECTIVE;
258 case D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE:
259 return TGSI_INTERPOLATE_LINEAR;
260
261 case D3D10_SB_INTERPOLATION_LINEAR_CENTROID:
262 case D3D10_SB_INTERPOLATION_LINEAR_SAMPLE: // DX10.1
263 LOG_UNSUPPORTED(true);
264 return TGSI_INTERPOLATE_PERSPECTIVE;
265
266 case D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID:
267 case D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: // DX10.1
268 LOG_UNSUPPORTED(true);
269 return TGSI_INTERPOLATE_LINEAR;
270 }
271
272 assert(0);
273 return TGSI_INTERPOLATE_LINEAR;
274 }
275
276 static uint
translate_system_name(D3D10_SB_NAME name)277 translate_system_name(D3D10_SB_NAME name)
278 {
279 switch (name) {
280 case D3D10_SB_NAME_UNDEFINED:
281 assert(0); /* should not happen */
282 return TGSI_SEMANTIC_GENERIC;
283 case D3D10_SB_NAME_POSITION:
284 return TGSI_SEMANTIC_POSITION;
285 case D3D10_SB_NAME_CLIP_DISTANCE:
286 case D3D10_SB_NAME_CULL_DISTANCE:
287 return TGSI_SEMANTIC_CLIPDIST;
288 case D3D10_SB_NAME_PRIMITIVE_ID:
289 return TGSI_SEMANTIC_PRIMID;
290 case D3D10_SB_NAME_INSTANCE_ID:
291 return TGSI_SEMANTIC_INSTANCEID;
292 case D3D10_SB_NAME_VERTEX_ID:
293 return TGSI_SEMANTIC_VERTEXID_NOBASE;
294 case D3D10_SB_NAME_VIEWPORT_ARRAY_INDEX:
295 return TGSI_SEMANTIC_VIEWPORT_INDEX;
296 case D3D10_SB_NAME_RENDER_TARGET_ARRAY_INDEX:
297 return TGSI_SEMANTIC_LAYER;
298 case D3D10_SB_NAME_IS_FRONT_FACE:
299 return TGSI_SEMANTIC_FACE;
300 case D3D10_SB_NAME_SAMPLE_INDEX:
301 LOG_UNSUPPORTED(true);
302 return TGSI_SEMANTIC_GENERIC;
303 }
304
305 assert(0);
306 return TGSI_SEMANTIC_GENERIC;
307 }
308
309 static uint
translate_semantic_index(struct Shader_xlate * sx,D3D10_SB_NAME name,const struct Shader_dst_operand * operand)310 translate_semantic_index(struct Shader_xlate *sx,
311 D3D10_SB_NAME name,
312 const struct Shader_dst_operand *operand)
313 {
314 unsigned idx;
315 switch (name) {
316 case D3D10_SB_NAME_CLIP_DISTANCE:
317 case D3D10_SB_NAME_CULL_DISTANCE:
318 if (sx->clip_distance_mapping[0].d3d == operand->base.index[0].imm) {
319 idx = sx->clip_distance_mapping[0].tgsi;
320 } else {
321 assert(sx->clip_distance_mapping[1].d3d == operand->base.index[0].imm);
322 idx = sx->clip_distance_mapping[1].tgsi;
323 }
324 break;
325 /* case D3D10_SB_NAME_CULL_DISTANCE:
326 if (sx->cull_distance_mapping[0].d3d == operand->base.index[0].imm) {
327 idx = sx->cull_distance_mapping[0].tgsi;
328 } else {
329 assert(sx->cull_distance_mapping[1].d3d == operand->base.index[0].imm);
330 idx = sx->cull_distance_mapping[1].tgsi;
331 }
332 break;*/
333 default:
334 idx = 0;
335 }
336 return idx;
337 }
338
339 static enum tgsi_return_type
trans_dcl_ret_type(D3D10_SB_RESOURCE_RETURN_TYPE d3drettype)340 trans_dcl_ret_type(D3D10_SB_RESOURCE_RETURN_TYPE d3drettype) {
341 switch (d3drettype) {
342 case D3D10_SB_RETURN_TYPE_UNORM:
343 return TGSI_RETURN_TYPE_UNORM;
344 case D3D10_SB_RETURN_TYPE_SNORM:
345 return TGSI_RETURN_TYPE_SNORM;
346 case D3D10_SB_RETURN_TYPE_SINT:
347 return TGSI_RETURN_TYPE_SINT;
348 case D3D10_SB_RETURN_TYPE_UINT:
349 return TGSI_RETURN_TYPE_UINT;
350 case D3D10_SB_RETURN_TYPE_FLOAT:
351 return TGSI_RETURN_TYPE_FLOAT;
352 case D3D10_SB_RETURN_TYPE_MIXED:
353 default:
354 LOG_UNSUPPORTED(true);
355 return TGSI_RETURN_TYPE_FLOAT;
356 }
357 }
358
359 static void
declare_vertices_in(struct Shader_xlate * sx,unsigned in)360 declare_vertices_in(struct Shader_xlate *sx,
361 unsigned in)
362 {
363 /* Make sure vertices_in is consistent with input primitive
364 * and other input declarations.
365 */
366 if (sx->vertices_in) {
367 assert(sx->vertices_in == in);
368 } else {
369 sx->vertices_in = in;
370 }
371 }
372
373 struct swizzle_mapping {
374 unsigned x;
375 unsigned y;
376 unsigned z;
377 unsigned w;
378 };
379
380 /* mapping of writmask to swizzles */
381 static const struct swizzle_mapping writemask_to_swizzle[] = {
382 { TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X }, //TGSI_WRITEMASK_NONE
383 { TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X }, //TGSI_WRITEMASK_X
384 { TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y }, //TGSI_WRITEMASK_Y
385 { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y }, //TGSI_WRITEMASK_XY
386 { TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z }, //TGSI_WRITEMASK_Z
387 { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z }, //TGSI_WRITEMASK_XZ
388 { TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z }, //TGSI_WRITEMASK_YZ
389 { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X }, //TGSI_WRITEMASK_XYZ
390 { TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_W
391 { TGSI_SWIZZLE_X, TGSI_SWIZZLE_W, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XW
392 { TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_YW
393 { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XYW
394 { TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_ZW
395 { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XZW
396 { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_YZW
397 { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XYZW
398 };
399
400 static struct ureg_src
swizzle_reg(struct ureg_src src,uint writemask,unsigned siv_name)401 swizzle_reg(struct ureg_src src, uint writemask,
402 unsigned siv_name)
403 {
404 switch (siv_name) {
405 case D3D10_SB_NAME_PRIMITIVE_ID:
406 case D3D10_SB_NAME_INSTANCE_ID:
407 case D3D10_SB_NAME_VERTEX_ID:
408 case D3D10_SB_NAME_VIEWPORT_ARRAY_INDEX:
409 case D3D10_SB_NAME_RENDER_TARGET_ARRAY_INDEX:
410 case D3D10_SB_NAME_IS_FRONT_FACE:
411 return ureg_scalar(src, TGSI_SWIZZLE_X);
412 default: {
413 const struct swizzle_mapping *swizzle =
414 &writemask_to_swizzle[writemask];
415 return ureg_swizzle(src, swizzle->x, swizzle->y,
416 swizzle->z, swizzle->w);
417 }
418 }
419 }
420
421 static void
dcl_base_output(struct Shader_xlate * sx,struct ureg_program * ureg,struct ureg_dst reg,const struct Shader_dst_operand * operand)422 dcl_base_output(struct Shader_xlate *sx,
423 struct ureg_program *ureg,
424 struct ureg_dst reg,
425 const struct Shader_dst_operand *operand)
426 {
427 unsigned writemask =
428 operand->mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT;
429 unsigned idx = operand->base.index[0].imm;
430 unsigned i;
431
432 if (!writemask) {
433 sx->outputs[idx].reg[0] = reg;
434 sx->outputs[idx].reg[1] = reg;
435 sx->outputs[idx].reg[2] = reg;
436 sx->outputs[idx].reg[3] = reg;
437 return;
438 }
439
440 for (i = 0; i < 4; ++i) {
441 unsigned mask = 1 << i;
442 if ((writemask & mask)) {
443 sx->outputs[idx].reg[i] = reg;
444 }
445 }
446 }
447
448 static void
dcl_base_input(struct Shader_xlate * sx,struct ureg_program * ureg,const struct Shader_dst_operand * operand,struct ureg_src dcl_reg,uint index,uint siv_name)449 dcl_base_input(struct Shader_xlate *sx,
450 struct ureg_program *ureg,
451 const struct Shader_dst_operand *operand,
452 struct ureg_src dcl_reg,
453 uint index,
454 uint siv_name)
455 {
456 unsigned writemask =
457 operand->mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT;
458
459 if (sx->inputs[index].declared && !sx->inputs[index].overloaded) {
460 struct ureg_dst temp = ureg_DECL_temporary(sx->ureg);
461
462 ureg_MOV(ureg,
463 ureg_writemask(temp, sx->inputs[index].writemask),
464 swizzle_reg(sx->inputs[index].reg, sx->inputs[index].writemask,
465 sx->inputs[index].siv_name));
466 ureg_MOV(ureg, ureg_writemask(temp, writemask),
467 swizzle_reg(dcl_reg, writemask, siv_name));
468 sx->inputs[index].reg = ureg_src(temp);
469 sx->inputs[index].overloaded = true;
470 sx->inputs[index].writemask |= writemask;
471 } else if (sx->inputs[index].overloaded) {
472 struct ureg_dst temp = ureg_dst(sx->inputs[index].reg);
473 ureg_MOV(ureg, ureg_writemask(temp, writemask),
474 swizzle_reg(dcl_reg, writemask, siv_name));
475 sx->inputs[index].writemask |= writemask;
476 } else {
477 assert(!sx->inputs[index].declared);
478
479 sx->inputs[index].reg = dcl_reg;
480 sx->inputs[index].declared = true;
481 sx->inputs[index].writemask = writemask;
482 sx->inputs[index].siv_name = siv_name;
483 }
484 }
485
486 static void
dcl_vs_input(struct Shader_xlate * sx,struct ureg_program * ureg,const struct Shader_dst_operand * dst)487 dcl_vs_input(struct Shader_xlate *sx,
488 struct ureg_program *ureg,
489 const struct Shader_dst_operand *dst)
490 {
491 struct ureg_src reg;
492 assert(dst->base.index_dim == 1);
493 assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
494
495 reg = ureg_DECL_vs_input(ureg, dst->base.index[0].imm);
496
497 dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
498 D3D10_SB_NAME_UNDEFINED);
499 }
500
501 static void
dcl_gs_input(struct Shader_xlate * sx,struct ureg_program * ureg,const struct Shader_dst_operand * dst)502 dcl_gs_input(struct Shader_xlate *sx,
503 struct ureg_program *ureg,
504 const struct Shader_dst_operand *dst)
505 {
506 if (dst->base.index_dim == 2) {
507 assert(dst->base.index[1].imm < SHADER_MAX_INPUTS);
508
509 declare_vertices_in(sx, dst->base.index[0].imm);
510
511 /* XXX: Implement declaration masks in gallium.
512 */
513 if (!sx->inputs[dst->base.index[1].imm].reg.File) {
514 struct ureg_src reg =
515 ureg_DECL_input(ureg,
516 TGSI_SEMANTIC_GENERIC,
517 dst->base.index[1].imm,
518 0, 1);
519 dcl_base_input(sx, ureg, dst, reg, dst->base.index[1].imm,
520 D3D10_SB_NAME_UNDEFINED);
521 }
522 } else {
523 assert(dst->base.type == D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID);
524 assert(dst->base.index_dim == 0);
525
526 sx->prim_id = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_PRIMID, 0);
527 }
528 }
529
530 static void
dcl_sgv_input(struct Shader_xlate * sx,struct ureg_program * ureg,const struct Shader_dst_operand * dst,uint dcl_siv_name)531 dcl_sgv_input(struct Shader_xlate *sx,
532 struct ureg_program *ureg,
533 const struct Shader_dst_operand *dst,
534 uint dcl_siv_name)
535 {
536 struct ureg_src reg;
537 assert(dst->base.index_dim == 1);
538 assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
539
540 reg = ureg_DECL_system_value(ureg, translate_system_name(dcl_siv_name), 0);
541
542 dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
543 dcl_siv_name);
544 }
545
546 static void
dcl_siv_input(struct Shader_xlate * sx,struct ureg_program * ureg,const struct Shader_dst_operand * dst,uint dcl_siv_name)547 dcl_siv_input(struct Shader_xlate *sx,
548 struct ureg_program *ureg,
549 const struct Shader_dst_operand *dst,
550 uint dcl_siv_name)
551 {
552 struct ureg_src reg;
553 assert(dst->base.index_dim == 2);
554 assert(dst->base.index[1].imm < SHADER_MAX_INPUTS);
555
556 declare_vertices_in(sx, dst->base.index[0].imm);
557
558 reg = ureg_DECL_input(ureg,
559 translate_system_name(dcl_siv_name), 0,
560 0, 1);
561
562 dcl_base_input(sx, ureg, dst, reg, dst->base.index[1].imm,
563 dcl_siv_name);
564 }
565
566 static void
dcl_ps_input(struct Shader_xlate * sx,struct ureg_program * ureg,const struct Shader_dst_operand * dst,uint dcl_in_ps_interp)567 dcl_ps_input(struct Shader_xlate *sx,
568 struct ureg_program *ureg,
569 const struct Shader_dst_operand *dst,
570 uint dcl_in_ps_interp)
571 {
572 struct ureg_src reg;
573 assert(dst->base.index_dim == 1);
574 assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
575
576 reg = ureg_DECL_fs_input(ureg,
577 TGSI_SEMANTIC_GENERIC,
578 dst->base.index[0].imm,
579 translate_interpolation(dcl_in_ps_interp));
580
581 dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
582 D3D10_SB_NAME_UNDEFINED);
583 }
584
585 static void
dcl_ps_sgv_input(struct Shader_xlate * sx,struct ureg_program * ureg,const struct Shader_dst_operand * dst,uint dcl_siv_name)586 dcl_ps_sgv_input(struct Shader_xlate *sx,
587 struct ureg_program *ureg,
588 const struct Shader_dst_operand *dst,
589 uint dcl_siv_name)
590 {
591 struct ureg_src reg;
592 assert(dst->base.index_dim == 1);
593 assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
594
595 if (dcl_siv_name == D3D10_SB_NAME_POSITION) {
596 ureg_property(ureg,
597 TGSI_PROPERTY_FS_COORD_ORIGIN,
598 TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
599 ureg_property(ureg,
600 TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
601 TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER);
602 }
603
604 reg = ureg_DECL_fs_input(ureg,
605 translate_system_name(dcl_siv_name),
606 0,
607 TGSI_INTERPOLATE_CONSTANT);
608
609 if (dcl_siv_name == D3D10_SB_NAME_IS_FRONT_FACE) {
610 /* We need to map gallium's front_face to the one expected
611 * by D3D10 */
612 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
613
614 tmp = ureg_writemask(tmp, TGSI_WRITEMASK_X);
615
616 ureg_CMP(ureg, tmp, reg,
617 ureg_imm1i(ureg, 0), ureg_imm1i(ureg, -1));
618
619 reg = ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X);
620 }
621
622 dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
623 dcl_siv_name);
624 }
625
626 static void
dcl_ps_siv_input(struct Shader_xlate * sx,struct ureg_program * ureg,const struct Shader_dst_operand * dst,uint dcl_siv_name,uint dcl_in_ps_interp)627 dcl_ps_siv_input(struct Shader_xlate *sx,
628 struct ureg_program *ureg,
629 const struct Shader_dst_operand *dst,
630 uint dcl_siv_name, uint dcl_in_ps_interp)
631 {
632 struct ureg_src reg;
633 assert(dst->base.index_dim == 1);
634 assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
635
636 reg = ureg_DECL_fs_input(ureg,
637 translate_system_name(dcl_siv_name),
638 0,
639 translate_interpolation(dcl_in_ps_interp));
640
641 if (dcl_siv_name == D3D10_SB_NAME_POSITION) {
642 /* D3D10 expects reciprocal of interpolated 1/w as 4th component,
643 * gallium/GL just interpolated 1/w */
644 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
645
646 ureg_MOV(ureg, tmp, reg);
647 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W),
648 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_W));
649 reg = ureg_src(tmp);
650 }
651
652 dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
653 dcl_siv_name);
654 }
655
656 static struct ureg_src
translate_relative_operand(struct Shader_xlate * sx,const struct Shader_relative_operand * operand)657 translate_relative_operand(struct Shader_xlate *sx,
658 const struct Shader_relative_operand *operand)
659 {
660 struct ureg_src reg;
661
662 switch (operand->type) {
663 case D3D10_SB_OPERAND_TYPE_TEMP:
664 assert(operand->index[0].imm < SHADER_MAX_TEMPS);
665
666 reg = ureg_src(sx->temps[sx->temp_offset + operand->index[0].imm]);
667 break;
668
669 case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID:
670 reg = sx->prim_id;
671 break;
672
673 case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:
674 assert(operand->index[1].imm < SHADER_MAX_TEMPS);
675
676 reg = ureg_src(sx->temps[sx->indexable_temp_offsets[operand->index[0].imm] +
677 operand->index[1].imm]);
678 break;
679
680 case D3D10_SB_OPERAND_TYPE_INPUT:
681 case D3D10_SB_OPERAND_TYPE_OUTPUT:
682 case D3D10_SB_OPERAND_TYPE_IMMEDIATE32:
683 case D3D10_SB_OPERAND_TYPE_IMMEDIATE64:
684 case D3D10_SB_OPERAND_TYPE_SAMPLER:
685 case D3D10_SB_OPERAND_TYPE_RESOURCE:
686 case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER:
687 case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
688 case D3D10_SB_OPERAND_TYPE_LABEL:
689 case D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH:
690 case D3D10_SB_OPERAND_TYPE_NULL:
691 case D3D10_SB_OPERAND_TYPE_RASTERIZER:
692 case D3D10_SB_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
693 LOG_UNSUPPORTED(true);
694 reg = ureg_src(ureg_DECL_temporary(sx->ureg));
695 break;
696
697 default:
698 assert(0); /* should never happen */
699 reg = ureg_src(ureg_DECL_temporary(sx->ureg));
700 }
701
702 reg = ureg_scalar(reg, operand->comp);
703 return reg;
704 }
705
706 static struct ureg_dst
translate_operand(struct Shader_xlate * sx,const struct Shader_operand * operand,unsigned writemask)707 translate_operand(struct Shader_xlate *sx,
708 const struct Shader_operand *operand,
709 unsigned writemask)
710 {
711 struct ureg_dst reg;
712
713 switch (operand->type) {
714 case D3D10_SB_OPERAND_TYPE_TEMP:
715 assert(operand->index_dim == 1);
716 assert(operand->index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
717 assert(operand->index[0].imm < SHADER_MAX_TEMPS);
718
719 reg = sx->temps[sx->temp_offset + operand->index[0].imm];
720 break;
721
722 case D3D10_SB_OPERAND_TYPE_OUTPUT:
723 assert(operand->index_dim == 1);
724 assert(operand->index[0].imm < SHADER_MAX_OUTPUTS);
725
726 if (operand->index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32) {
727 if (!writemask) {
728 reg = sx->outputs[operand->index[0].imm].reg[0];
729 } else {
730 unsigned i;
731 for (i = 0; i < 4; ++i) {
732 unsigned mask = 1 << i;
733 if ((writemask & mask)) {
734 reg = sx->outputs[operand->index[0].imm].reg[i];
735 break;
736 }
737 }
738 }
739 } else {
740 struct ureg_src addr =
741 translate_relative_operand(sx, &operand->index[0].rel);
742 assert(operand->index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE);
743 reg = ureg_dst_indirect(sx->outputs[operand->index[0].imm].reg[0], addr);
744 }
745 break;
746
747 case D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH:
748 assert(operand->index_dim == 0);
749
750 reg = sx->output_depth;
751 break;
752
753 case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID:
754 assert(operand->index_dim == 0);
755
756 reg = ureg_dst(sx->prim_id);
757 break;
758
759 case D3D10_SB_OPERAND_TYPE_INPUT:
760 case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:
761 case D3D10_SB_OPERAND_TYPE_IMMEDIATE32:
762 case D3D10_SB_OPERAND_TYPE_IMMEDIATE64:
763 case D3D10_SB_OPERAND_TYPE_SAMPLER:
764 case D3D10_SB_OPERAND_TYPE_RESOURCE:
765 case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER:
766 case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
767 case D3D10_SB_OPERAND_TYPE_LABEL:
768 case D3D10_SB_OPERAND_TYPE_NULL:
769 case D3D10_SB_OPERAND_TYPE_RASTERIZER:
770 case D3D10_SB_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
771 /* XXX: Translate more operands types.
772 */
773 LOG_UNSUPPORTED(true);
774 reg = ureg_DECL_temporary(sx->ureg);
775 }
776
777 return reg;
778 }
779
780 static struct ureg_src
translate_indexable_temp(struct Shader_xlate * sx,const struct Shader_operand * operand)781 translate_indexable_temp(struct Shader_xlate *sx,
782 const struct Shader_operand *operand)
783 {
784 struct ureg_src reg;
785 switch (operand->index[1].index_rep) {
786 case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
787 reg = ureg_src(
788 sx->temps[sx->indexable_temp_offsets[operand->index[0].imm] +
789 operand->index[1].imm]);
790 break;
791 case D3D10_SB_OPERAND_INDEX_RELATIVE:
792 reg = ureg_src_indirect(
793 ureg_src(sx->temps[
794 sx->indexable_temp_offsets[operand->index[0].imm]]),
795 translate_relative_operand(sx,
796 &operand->index[1].rel));
797 break;
798 case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
799 reg = ureg_src_indirect(
800 ureg_src(sx->temps[
801 operand->index[1].imm +
802 sx->indexable_temp_offsets[operand->index[0].imm]]),
803 translate_relative_operand(sx,
804 &operand->index[1].rel));
805 break;
806 default:
807 /* XXX: Other index representations.
808 */
809 LOG_UNSUPPORTED(true);
810 reg = ureg_src(ureg_DECL_temporary(sx->ureg));
811 }
812 return reg;
813 }
814
815 static struct ureg_dst
translate_dst_operand(struct Shader_xlate * sx,const struct Shader_dst_operand * operand,bool saturate)816 translate_dst_operand(struct Shader_xlate *sx,
817 const struct Shader_dst_operand *operand,
818 bool saturate)
819 {
820 struct ureg_dst reg;
821 unsigned writemask =
822 operand->mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT;
823
824 assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT) == 4);
825 assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_X >> 4) == TGSI_WRITEMASK_X);
826 assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_Y >> 4) == TGSI_WRITEMASK_Y);
827 assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_Z >> 4) == TGSI_WRITEMASK_Z);
828 assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_W >> 4) == TGSI_WRITEMASK_W);
829
830 switch (operand->base.type) {
831 case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:
832 assert(operand->base.index_dim == 2);
833 assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
834 assert(operand->base.index[0].imm < SHADER_MAX_INDEXABLE_TEMPS);
835
836 reg = ureg_dst(translate_indexable_temp(sx, &operand->base));
837 break;
838
839 default:
840 reg = translate_operand(sx, &operand->base, writemask);
841 }
842
843 /* oDepth often has an empty writemask */
844 if (operand->base.type != D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) {
845 reg = ureg_writemask(reg, writemask);
846 }
847
848 if (saturate) {
849 reg = ureg_saturate(reg);
850 }
851
852 return reg;
853 }
854
855 static struct ureg_src
translate_src_operand(struct Shader_xlate * sx,const struct Shader_src_operand * operand,const enum dx10_opcode_format format)856 translate_src_operand(struct Shader_xlate *sx,
857 const struct Shader_src_operand *operand,
858 const enum dx10_opcode_format format)
859 {
860 struct ureg_src reg;
861
862 switch (operand->base.type) {
863 case D3D10_SB_OPERAND_TYPE_INPUT:
864 if (operand->base.index_dim == 1) {
865 switch (operand->base.index[0].index_rep) {
866 case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
867 assert(operand->base.index[0].imm < SHADER_MAX_INPUTS);
868 reg = sx->inputs[operand->base.index[0].imm].reg;
869 break;
870 case D3D10_SB_OPERAND_INDEX_RELATIVE: {
871 struct ureg_src tmp =
872 translate_relative_operand(sx, &operand->base.index[0].rel);
873 reg = ureg_src_indirect(sx->inputs[0].reg, tmp);
874 }
875 break;
876 case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: {
877 struct ureg_src tmp =
878 translate_relative_operand(sx, &operand->base.index[0].rel);
879 reg = ureg_src_indirect(sx->inputs[operand->base.index[0].imm].reg, tmp);
880 }
881 break;
882 default:
883 /* XXX: Other index representations.
884 */
885 LOG_UNSUPPORTED(true);
886
887 }
888 } else {
889 assert(operand->base.index_dim == 2);
890 assert(operand->base.index[1].imm < SHADER_MAX_INPUTS);
891
892 switch (operand->base.index[1].index_rep) {
893 case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
894 reg = sx->inputs[operand->base.index[1].imm].reg;
895 break;
896 case D3D10_SB_OPERAND_INDEX_RELATIVE: {
897 struct ureg_src tmp =
898 translate_relative_operand(sx, &operand->base.index[1].rel);
899 reg = ureg_src_indirect(sx->inputs[0].reg, tmp);
900 }
901 break;
902 case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: {
903 struct ureg_src tmp =
904 translate_relative_operand(sx, &operand->base.index[1].rel);
905 reg = ureg_src_indirect(sx->inputs[operand->base.index[1].imm].reg, tmp);
906 }
907 break;
908 default:
909 /* XXX: Other index representations.
910 */
911 LOG_UNSUPPORTED(true);
912 }
913
914 switch (operand->base.index[0].index_rep) {
915 case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
916 reg = ureg_src_dimension(reg, operand->base.index[0].imm);
917 break;
918 case D3D10_SB_OPERAND_INDEX_RELATIVE:{
919 struct ureg_src tmp =
920 translate_relative_operand(sx, &operand->base.index[0].rel);
921 reg = ureg_src_dimension_indirect(reg, tmp, 0);
922 }
923 break;
924 case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: {
925 struct ureg_src tmp =
926 translate_relative_operand(sx, &operand->base.index[0].rel);
927 reg = ureg_src_dimension_indirect(reg, tmp, operand->base.index[0].imm);
928 }
929 break;
930 default:
931 /* XXX: Other index representations.
932 */
933 LOG_UNSUPPORTED(true);
934 }
935 }
936 break;
937
938 case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:
939 assert(operand->base.index_dim == 2);
940 assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
941 assert(operand->base.index[0].imm < SHADER_MAX_INDEXABLE_TEMPS);
942
943 reg = translate_indexable_temp(sx, &operand->base);
944 break;
945
946 case D3D10_SB_OPERAND_TYPE_IMMEDIATE32:
947 switch (format) {
948 case OF_FLOAT:
949 reg = ureg_imm4f(sx->ureg,
950 operand->imm[0].f32,
951 operand->imm[1].f32,
952 operand->imm[2].f32,
953 operand->imm[3].f32);
954 break;
955 case OF_INT:
956 reg = ureg_imm4i(sx->ureg,
957 operand->imm[0].i32,
958 operand->imm[1].i32,
959 operand->imm[2].i32,
960 operand->imm[3].i32);
961 break;
962 case OF_UINT:
963 reg = ureg_imm4u(sx->ureg,
964 operand->imm[0].u32,
965 operand->imm[1].u32,
966 operand->imm[2].u32,
967 operand->imm[3].u32);
968 break;
969 default:
970 assert(0);
971 reg = ureg_src(ureg_DECL_temporary(sx->ureg));
972 }
973 break;
974
975 case D3D10_SB_OPERAND_TYPE_SAMPLER:
976 assert(operand->base.index_dim == 1);
977 assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
978 assert(operand->base.index[0].imm < SHADER_MAX_SAMPLERS);
979
980 reg = sx->samplers[operand->base.index[0].imm];
981 break;
982
983 case D3D10_SB_OPERAND_TYPE_RESOURCE:
984 assert(operand->base.index_dim == 1);
985 assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
986 assert(operand->base.index[0].imm < SHADER_MAX_RESOURCES);
987
988 reg = sx->sv[operand->base.index[0].imm];
989 break;
990
991 case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER:
992 assert(operand->base.index_dim == 2);
993
994 assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
995 assert(operand->base.index[0].imm < PIPE_MAX_CONSTANT_BUFFERS);
996
997 switch (operand->base.index[1].index_rep) {
998 case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
999 assert(operand->base.index[1].imm < SHADER_MAX_CONSTS);
1000
1001 reg = ureg_src_register(TGSI_FILE_CONSTANT, operand->base.index[1].imm);
1002 reg = ureg_src_dimension(reg, operand->base.index[0].imm);
1003 break;
1004 case D3D10_SB_OPERAND_INDEX_RELATIVE:
1005 case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
1006 reg = ureg_src_register(TGSI_FILE_CONSTANT, operand->base.index[1].imm);
1007 reg = ureg_src_indirect(
1008 reg,
1009 translate_relative_operand(sx, &operand->base.index[1].rel));
1010 reg = ureg_src_dimension(reg, operand->base.index[0].imm);
1011 break;
1012 default:
1013 /* XXX: Other index representations.
1014 */
1015 LOG_UNSUPPORTED(true);
1016 }
1017
1018 break;
1019
1020 case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
1021 assert(operand->base.index_dim == 1);
1022
1023 switch (operand->base.index[0].index_rep) {
1024 case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
1025 reg = sx->imms;
1026 reg.Index += operand->base.index[0].imm;
1027 break;
1028 case D3D10_SB_OPERAND_INDEX_RELATIVE:
1029 case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
1030 reg = sx->imms;
1031 reg.Index += operand->base.index[0].imm;
1032 reg = ureg_src_indirect(
1033 sx->imms,
1034 translate_relative_operand(sx, &operand->base.index[0].rel));
1035 break;
1036 default:
1037 /* XXX: Other index representations.
1038 */
1039 LOG_UNSUPPORTED(true);
1040 }
1041 break;
1042
1043 case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID:
1044 reg = sx->prim_id;
1045 break;
1046
1047 default:
1048 reg = ureg_src(translate_operand(sx, &operand->base, 0));
1049 }
1050
1051 reg = ureg_swizzle(reg,
1052 operand->swizzle[0],
1053 operand->swizzle[1],
1054 operand->swizzle[2],
1055 operand->swizzle[3]);
1056
1057 switch (operand->modifier) {
1058 case D3D10_SB_OPERAND_MODIFIER_NONE:
1059 break;
1060 case D3D10_SB_OPERAND_MODIFIER_NEG:
1061 reg = ureg_negate(reg);
1062 break;
1063 case D3D10_SB_OPERAND_MODIFIER_ABS:
1064 reg = ureg_abs(reg);
1065 break;
1066 case D3D10_SB_OPERAND_MODIFIER_ABSNEG:
1067 reg = ureg_negate(ureg_abs(reg));
1068 break;
1069 default:
1070 assert(0);
1071 }
1072
1073 return reg;
1074 }
1075
1076 static uint
translate_resource_dimension(D3D10_SB_RESOURCE_DIMENSION dim)1077 translate_resource_dimension(D3D10_SB_RESOURCE_DIMENSION dim)
1078 {
1079 switch (dim) {
1080 case D3D10_SB_RESOURCE_DIMENSION_UNKNOWN:
1081 return TGSI_TEXTURE_UNKNOWN;
1082 case D3D10_SB_RESOURCE_DIMENSION_BUFFER:
1083 return TGSI_TEXTURE_BUFFER;
1084 case D3D10_SB_RESOURCE_DIMENSION_TEXTURE1D:
1085 return TGSI_TEXTURE_1D;
1086 case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2D:
1087 return TGSI_TEXTURE_2D;
1088 case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMS:
1089 return TGSI_TEXTURE_2D_MSAA;
1090 case D3D10_SB_RESOURCE_DIMENSION_TEXTURE3D:
1091 return TGSI_TEXTURE_3D;
1092 case D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBE:
1093 return TGSI_TEXTURE_CUBE;
1094 case D3D10_SB_RESOURCE_DIMENSION_TEXTURE1DARRAY:
1095 return TGSI_TEXTURE_1D_ARRAY;
1096 case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DARRAY:
1097 return TGSI_TEXTURE_2D_ARRAY;
1098 case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMSARRAY:
1099 return TGSI_TEXTURE_2D_ARRAY_MSAA;
1100 case D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBEARRAY:
1101 return TGSI_TEXTURE_CUBE_ARRAY;
1102 default:
1103 assert(0);
1104 return TGSI_TEXTURE_UNKNOWN;
1105 }
1106 }
1107
1108 static uint
texture_dim_from_tgsi_target(unsigned tgsi_target)1109 texture_dim_from_tgsi_target(unsigned tgsi_target)
1110 {
1111 switch (tgsi_target) {
1112 case TGSI_TEXTURE_BUFFER:
1113 case TGSI_TEXTURE_1D:
1114 case TGSI_TEXTURE_1D_ARRAY:
1115 return 1;
1116 case TGSI_TEXTURE_2D:
1117 case TGSI_TEXTURE_2D_MSAA:
1118 case TGSI_TEXTURE_CUBE:
1119 case TGSI_TEXTURE_2D_ARRAY:
1120 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1121 return 2;
1122 case TGSI_TEXTURE_3D:
1123 return 3;
1124 case TGSI_TEXTURE_UNKNOWN:
1125 default:
1126 assert(0);
1127 return 1;
1128 }
1129 }
1130
1131 static bool
operand_is_scalar(const struct Shader_src_operand * operand)1132 operand_is_scalar(const struct Shader_src_operand *operand)
1133 {
1134 return operand->swizzle[0] == operand->swizzle[1] &&
1135 operand->swizzle[1] == operand->swizzle[2] &&
1136 operand->swizzle[2] == operand->swizzle[3];
1137 }
1138
1139 static void
Shader_add_call(struct Shader_xlate * sx,unsigned d3d_label,unsigned tgsi_label_token)1140 Shader_add_call(struct Shader_xlate *sx,
1141 unsigned d3d_label,
1142 unsigned tgsi_label_token)
1143 {
1144 ASSERT(sx->num_calls < sx->max_calls);
1145
1146 sx->calls[sx->num_calls].d3d_label = d3d_label;
1147 sx->calls[sx->num_calls].tgsi_label_token = tgsi_label_token;
1148 sx->num_calls++;
1149 }
1150
1151 static void
Shader_add_label(struct Shader_xlate * sx,unsigned d3d_label,unsigned tgsi_insn_no)1152 Shader_add_label(struct Shader_xlate *sx,
1153 unsigned d3d_label,
1154 unsigned tgsi_insn_no)
1155 {
1156 ASSERT(sx->num_labels < sx->max_labels);
1157
1158 sx->labels[sx->num_labels].d3d_label = d3d_label;
1159 sx->labels[sx->num_labels].tgsi_insn_no = tgsi_insn_no;
1160 sx->num_labels++;
1161 }
1162
1163
1164 static void
sample_ureg_emit(struct ureg_program * ureg,unsigned tgsi_opcode,unsigned num_src,struct Shader_opcode * opcode,struct ureg_dst dst,struct ureg_src * src)1165 sample_ureg_emit(struct ureg_program *ureg,
1166 unsigned tgsi_opcode,
1167 unsigned num_src,
1168 struct Shader_opcode *opcode,
1169 struct ureg_dst dst,
1170 struct ureg_src *src)
1171 {
1172 unsigned num_offsets = 0;
1173 struct tgsi_texture_offset texoffsets;
1174
1175 memset(&texoffsets, 0, sizeof texoffsets);
1176
1177 if (opcode->imm_texel_offset.u ||
1178 opcode->imm_texel_offset.v ||
1179 opcode->imm_texel_offset.w) {
1180 struct ureg_src offsetreg;
1181 num_offsets = 1;
1182 /* don't actually always need all 3 values */
1183 offsetreg = ureg_imm3i(ureg,
1184 opcode->imm_texel_offset.u,
1185 opcode->imm_texel_offset.v,
1186 opcode->imm_texel_offset.w);
1187 texoffsets.File = offsetreg.File;
1188 texoffsets.Index = offsetreg.Index;
1189 texoffsets.SwizzleX = offsetreg.SwizzleX;
1190 texoffsets.SwizzleY = offsetreg.SwizzleY;
1191 texoffsets.SwizzleZ = offsetreg.SwizzleZ;
1192 }
1193
1194 ureg_tex_insn(ureg,
1195 tgsi_opcode,
1196 &dst, 1,
1197 TGSI_TEXTURE_UNKNOWN,
1198 TGSI_RETURN_TYPE_UNKNOWN,
1199 &texoffsets, num_offsets,
1200 src, num_src);
1201 }
1202
1203 typedef void (*unary_ureg_func)(struct ureg_program *ureg, struct ureg_dst dst,
1204 struct ureg_src src);
1205 static void
expand_unary_to_scalarf(struct ureg_program * ureg,unary_ureg_func func,struct Shader_xlate * sx,struct Shader_opcode * opcode)1206 expand_unary_to_scalarf(struct ureg_program *ureg, unary_ureg_func func,
1207 struct Shader_xlate *sx, struct Shader_opcode *opcode)
1208 {
1209 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1210 struct ureg_dst dst = translate_dst_operand(sx, &opcode->dst[0],
1211 opcode->saturate);
1212 struct ureg_src src = translate_src_operand(sx, &opcode->src[0], OF_FLOAT);
1213 struct ureg_dst scalar_dst;
1214 ureg_MOV(ureg, tmp, src);
1215 src = ureg_src(tmp);
1216
1217 scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_X);
1218 if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {
1219 func(ureg, scalar_dst,
1220 ureg_scalar(src, TGSI_SWIZZLE_X));
1221 }
1222 scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_Y);
1223 if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {
1224 func(ureg, scalar_dst,
1225 ureg_scalar(src, TGSI_SWIZZLE_Y));
1226 }
1227 scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_Z);
1228 if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {
1229 func(ureg, scalar_dst,
1230 ureg_scalar(src, TGSI_SWIZZLE_Z));
1231 }
1232 scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_W);
1233 if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {
1234 func(ureg, scalar_dst,
1235 ureg_scalar(src, TGSI_SWIZZLE_W));
1236 }
1237 ureg_release_temporary(ureg, tmp);
1238 }
1239
1240 const struct tgsi_token *
Shader_tgsi_translate(const unsigned * code,unsigned * output_mapping)1241 Shader_tgsi_translate(const unsigned *code,
1242 unsigned *output_mapping)
1243 {
1244 struct Shader_xlate sx;
1245 struct Shader_parser parser;
1246 struct ureg_program *ureg = NULL;
1247 struct Shader_opcode opcode;
1248 const struct tgsi_token *tokens = NULL;
1249 uint nr_tokens;
1250 bool shader_dumped = false;
1251 bool inside_sub = false;
1252 uint i, j;
1253
1254 memset(&sx, 0, sizeof sx);
1255
1256 Shader_parse_init(&parser, code);
1257
1258 if (st_debug & ST_DEBUG_TGSI) {
1259 dx10_shader_dump_tokens(code);
1260 shader_dumped = true;
1261 }
1262
1263 sx.max_calls = 64;
1264 sx.calls = (struct Shader_call *)MALLOC(sx.max_calls *
1265 sizeof(struct Shader_call));
1266 sx.num_calls = 0;
1267
1268 sx.max_labels = 64;
1269 sx.labels = (struct Shader_label *)MALLOC(sx.max_labels *
1270 sizeof(struct Shader_call));
1271 sx.num_labels = 0;
1272
1273
1274
1275 /* Header. */
1276 switch (parser.header.type) {
1277 case D3D10_SB_PIXEL_SHADER:
1278 ureg = ureg_create(PIPE_SHADER_FRAGMENT);
1279 break;
1280 case D3D10_SB_VERTEX_SHADER:
1281 ureg = ureg_create(PIPE_SHADER_VERTEX);
1282 break;
1283 case D3D10_SB_GEOMETRY_SHADER:
1284 ureg = ureg_create(PIPE_SHADER_GEOMETRY);
1285 break;
1286 }
1287
1288 assert(ureg);
1289 sx.ureg = ureg;
1290
1291 while (Shader_parse_opcode(&parser, &opcode)) {
1292 const struct dx10_opcode_xlate *ox;
1293
1294 assert(opcode.type < D3D10_SB_NUM_OPCODES);
1295 ox = &opcode_xlate[opcode.type];
1296
1297 switch (opcode.type) {
1298 case D3D10_SB_OPCODE_EXP:
1299 expand_unary_to_scalarf(ureg, ureg_EX2, &sx, &opcode);
1300 break;
1301 case D3D10_SB_OPCODE_SQRT:
1302 expand_unary_to_scalarf(ureg, ureg_SQRT, &sx, &opcode);
1303 break;
1304 case D3D10_SB_OPCODE_RSQ:
1305 expand_unary_to_scalarf(ureg, ureg_RSQ, &sx, &opcode);
1306 break;
1307 case D3D10_SB_OPCODE_LOG:
1308 expand_unary_to_scalarf(ureg, ureg_LG2, &sx, &opcode);
1309 break;
1310 case D3D10_SB_OPCODE_IMUL:
1311 if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1312 ureg_IMUL_HI(ureg,
1313 translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),
1314 translate_src_operand(&sx, &opcode.src[0], OF_INT),
1315 translate_src_operand(&sx, &opcode.src[1], OF_INT));
1316 }
1317
1318 if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1319 ureg_UMUL(ureg,
1320 translate_dst_operand(&sx, &opcode.dst[1], opcode.saturate),
1321 translate_src_operand(&sx, &opcode.src[0], OF_INT),
1322 translate_src_operand(&sx, &opcode.src[1], OF_INT));
1323 }
1324
1325 break;
1326
1327 case D3D10_SB_OPCODE_FTOI: {
1328 /* XXX: tgsi (and just about everybody else, c, opencl, glsl) has
1329 * out-of-range (and NaN) values undefined for f2i/f2u, but d3d10
1330 * requires clamping to min and max representable value (as well as 0
1331 * for NaNs) (this applies to both ftoi and ftou). At least the online
1332 * docs state that - this is consistent with generic d3d10 conversion
1333 * rules.
1334 * For FTOI, we cheat a bit here - in particular depending on noone
1335 * caring about NaNs, and depending on the (undefined!) behavior of
1336 * F2I returning 0x80000000 for too negative values (which works with
1337 * x86 sse). Hence only need to clamp too positive values.
1338 * Note that it is impossible to clamp using a float, since 2^31 - 1
1339 * is not exactly representable with a float.
1340 */
1341 struct ureg_dst too_large = ureg_DECL_temporary(ureg);
1342 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1343 ureg_FSGE(ureg, too_large,
1344 translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
1345 ureg_imm1f(ureg, 2147483648.0f));
1346 ureg_F2I(ureg, tmp,
1347 translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1348 ureg_UCMP(ureg,
1349 translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),
1350 ureg_src(too_large),
1351 ureg_imm1i(ureg, 0x7fffffff),
1352 ureg_src(tmp));
1353 ureg_release_temporary(ureg, too_large);
1354 ureg_release_temporary(ureg, tmp);
1355 }
1356 break;
1357
1358 case D3D10_SB_OPCODE_FTOU: {
1359 /* For ftou, we need to do both clamps, which as a bonus also
1360 * gets us correct NaN behavior.
1361 * Note that it is impossible to clamp using a float against the upper
1362 * limit, since 2^32 - 1 is not exactly representable with a float,
1363 * but the clamp against 0.0 certainly works just fine.
1364 */
1365 struct ureg_dst too_large = ureg_DECL_temporary(ureg);
1366 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1367 ureg_FSGE(ureg, too_large,
1368 translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
1369 ureg_imm1f(ureg, 4294967296.0f));
1370 /* clamp negative values + NaN to zero.
1371 * (Could be done slightly more efficient in llvmpipe due to
1372 * MAX NaN behavior handling.)
1373 */
1374 ureg_MAX(ureg, tmp,
1375 ureg_imm1f(ureg, 0.0f),
1376 translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1377 ureg_F2U(ureg, tmp,
1378 ureg_src(tmp));
1379 ureg_UCMP(ureg,
1380 translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),
1381 ureg_src(too_large),
1382 ureg_imm1u(ureg, 0xffffffff),
1383 ureg_src(tmp));
1384 ureg_release_temporary(ureg, too_large);
1385 ureg_release_temporary(ureg, tmp);
1386 }
1387 break;
1388
1389 case D3D10_SB_OPCODE_LD_MS:
1390 /* XXX: We don't support multi-sampling yet, but we need to parse
1391 * this opcode regardless, so we just ignore sample index operand
1392 * for now */
1393 case D3D10_SB_OPCODE_LD:
1394 if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1395 unsigned resource = opcode.src[1].base.index[0].imm;
1396 assert(opcode.src[1].base.index_dim == 1);
1397 assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1398
1399 if (ureg_src_is_undef(sx.samplers[resource])) {
1400 sx.samplers[resource] =
1401 ureg_DECL_sampler(ureg, resource);
1402 }
1403
1404 ureg_TXF(ureg,
1405 translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),
1406 sx.resources[resource].target,
1407 translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
1408 sx.samplers[resource]);
1409 }
1410 else {
1411 struct ureg_src srcreg[2];
1412 srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_INT);
1413 srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_INT);
1414
1415 sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_I, 2, &opcode,
1416 translate_dst_operand(&sx, &opcode.dst[0],
1417 opcode.saturate),
1418 srcreg);
1419 }
1420 break;
1421
1422 case D3D10_SB_OPCODE_CUSTOMDATA:
1423 if (opcode.customdata._class ==
1424 D3D10_SB_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER) {
1425 sx.imms =
1426 ureg_DECL_immediate_block_uint(ureg,
1427 opcode.customdata.u.constbuf.data,
1428 opcode.customdata.u.constbuf.count);
1429 } else {
1430 assert(0);
1431 }
1432 break;
1433
1434 case D3D10_SB_OPCODE_RESINFO:
1435 if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1436 unsigned resource = opcode.src[1].base.index[0].imm;
1437 assert(opcode.src[1].base.index_dim == 1);
1438 assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1439
1440 if (ureg_src_is_undef(sx.samplers[resource])) {
1441 sx.samplers[resource] =
1442 ureg_DECL_sampler(ureg, resource);
1443 }
1444 /* don't bother with swizzle, ret type etc. */
1445 ureg_TXQ(ureg,
1446 translate_dst_operand(&sx, &opcode.dst[0],
1447 opcode.saturate),
1448 sx.resources[resource].target,
1449 translate_src_operand(&sx, &opcode.src[0], OF_UINT),
1450 sx.samplers[resource]);
1451 }
1452 else {
1453 struct ureg_dst r0 = ureg_DECL_temporary(ureg);
1454 struct ureg_src tsrc = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1455 struct ureg_dst dstreg = translate_dst_operand(&sx, &opcode.dst[0],
1456 opcode.saturate);
1457
1458 /* while specs say swizzle is ignored better safe than sorry */
1459 tsrc.SwizzleX = TGSI_SWIZZLE_X;
1460 tsrc.SwizzleY = TGSI_SWIZZLE_Y;
1461 tsrc.SwizzleZ = TGSI_SWIZZLE_Z;
1462 tsrc.SwizzleW = TGSI_SWIZZLE_W;
1463
1464 ureg_SVIEWINFO(ureg, r0,
1465 translate_src_operand(&sx, &opcode.src[0], OF_UINT),
1466 tsrc);
1467
1468 tsrc = ureg_src(r0);
1469 tsrc.SwizzleX = opcode.src[1].swizzle[0];
1470 tsrc.SwizzleY = opcode.src[1].swizzle[1];
1471 tsrc.SwizzleZ = opcode.src[1].swizzle[2];
1472 tsrc.SwizzleW = opcode.src[1].swizzle[3];
1473
1474 if (opcode.specific.resinfo_ret_type ==
1475 D3D10_SB_RESINFO_INSTRUCTION_RETURN_UINT) {
1476 ureg_MOV(ureg, dstreg, tsrc);
1477 }
1478 else if (opcode.specific.resinfo_ret_type ==
1479 D3D10_SB_RESINFO_INSTRUCTION_RETURN_FLOAT) {
1480 ureg_I2F(ureg, dstreg, tsrc);
1481 }
1482 else { /* D3D10_SB_RESINFO_INSTRUCTION_RETURN_RCPFLOAT */
1483 unsigned i;
1484 /*
1485 * Must apply rcp only to parts determined by dims,
1486 * (width/height/depth) but NOT to array size nor mip levels
1487 * hence need to figure that out here.
1488 * This is one sick modifier if you ask me!
1489 */
1490 unsigned res_index = opcode.src[1].base.index[0].imm;
1491 unsigned target = sx.resources[res_index].target;
1492 unsigned dims = texture_dim_from_tgsi_target(target);
1493
1494 ureg_I2F(ureg, r0, ureg_src(r0));
1495 tsrc = ureg_src(r0);
1496 for (i = 0; i < 4; i++) {
1497 unsigned dst_swizzle = opcode.src[1].swizzle[i];
1498 struct ureg_dst dstregmasked = ureg_writemask(dstreg, 1 << i);
1499 /*
1500 * could do one mov with multiple write mask bits set
1501 * but rcp is scalar anyway.
1502 */
1503 if (dst_swizzle < dims) {
1504 ureg_RCP(ureg, dstregmasked, ureg_scalar(tsrc, dst_swizzle));
1505 }
1506 else {
1507 ureg_MOV(ureg, dstregmasked, ureg_scalar(tsrc, dst_swizzle));
1508 }
1509 }
1510 }
1511 ureg_release_temporary(ureg, r0);
1512 }
1513 break;
1514
1515 case D3D10_SB_OPCODE_SAMPLE:
1516 if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1517 assert(opcode.src[1].base.index_dim == 1);
1518 assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1519
1520 LOG_UNSUPPORTED(opcode.src[1].base.index[0].imm != opcode.src[2].base.index[0].imm);
1521
1522 ureg_TEX(ureg,
1523 translate_dst_operand(&sx, &opcode.dst[0],
1524 opcode.saturate),
1525 sx.resources[opcode.src[1].base.index[0].imm].target,
1526 translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
1527 translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
1528 }
1529 else {
1530 struct ureg_src srcreg[3];
1531 srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
1532 srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1533 srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
1534
1535 sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE, 3, &opcode,
1536 translate_dst_operand(&sx, &opcode.dst[0],
1537 opcode.saturate),
1538 srcreg);
1539 }
1540 break;
1541
1542 case D3D10_SB_OPCODE_SAMPLE_C:
1543 if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1544 struct ureg_dst r0 = ureg_DECL_temporary(ureg);
1545
1546 /* XXX: Support only 2D texture targets for now.
1547 * Need to figure out how to pack the compare value
1548 * for other dimensions and if there is enough space
1549 * in a single operand for all possible cases.
1550 */
1551 LOG_UNSUPPORTED(sx.resources[opcode.src[1].base.index[0].imm].target !=
1552 TGSI_TEXTURE_2D);
1553
1554 assert(opcode.src[1].base.index_dim == 1);
1555 assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1556
1557 /* Insert the compare value into .z component.
1558 */
1559 ureg_MOV(ureg,
1560 ureg_writemask(r0, TGSI_WRITEMASK_XYW),
1561 translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1562 ureg_MOV(ureg,
1563 ureg_writemask(r0, TGSI_WRITEMASK_Z),
1564 translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));
1565
1566 /* XXX: Pass explicit Lod=0 in D3D10_SB_OPCODE_SAMPLE_C_LZ case.
1567 */
1568
1569 ureg_TEX(ureg,
1570 translate_dst_operand(&sx, &opcode.dst[0],
1571 opcode.saturate),
1572 sx.resources[opcode.src[1].base.index[0].imm].target,
1573 ureg_src(r0),
1574 translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
1575
1576 ureg_release_temporary(ureg, r0);
1577 }
1578 else {
1579 struct ureg_src srcreg[4];
1580 srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
1581 srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1582 srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
1583 srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
1584
1585 sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_C, 4, &opcode,
1586 translate_dst_operand(&sx, &opcode.dst[0],
1587 opcode.saturate),
1588 srcreg);
1589 }
1590 break;
1591
1592 case D3D10_SB_OPCODE_SAMPLE_C_LZ:
1593 if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1594 struct ureg_dst r0 = ureg_DECL_temporary(ureg);
1595
1596 assert(opcode.src[1].base.index_dim == 1);
1597 assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1598
1599 /* XXX: Support only 2D texture targets for now.
1600 * Need to figure out how to pack the compare value
1601 * for other dimensions and if there is enough space
1602 * in a single operand for all possible cases.
1603 */
1604 LOG_UNSUPPORTED(sx.resources[opcode.src[1].base.index[0].imm].target !=
1605 TGSI_TEXTURE_2D);
1606
1607 /* Insert the compare value into .z component.
1608 * Insert 0 into .w component.
1609 */
1610 ureg_MOV(ureg,
1611 ureg_writemask(r0, TGSI_WRITEMASK_XY),
1612 translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1613 ureg_MOV(ureg,
1614 ureg_writemask(r0, TGSI_WRITEMASK_Z),
1615 translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));
1616 ureg_MOV(ureg,
1617 ureg_writemask(r0, TGSI_WRITEMASK_W),
1618 ureg_imm1f(ureg, 0.0f));
1619
1620 ureg_TXL(ureg,
1621 translate_dst_operand(&sx, &opcode.dst[0],
1622 opcode.saturate),
1623 sx.resources[opcode.src[1].base.index[0].imm].target,
1624 ureg_src(r0),
1625 translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
1626
1627 ureg_release_temporary(ureg, r0);
1628 }
1629 else {
1630 struct ureg_src srcreg[4];
1631 srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
1632 srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1633 srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
1634 srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
1635
1636 sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_C_LZ, 4, &opcode,
1637 translate_dst_operand(&sx, &opcode.dst[0],
1638 opcode.saturate),
1639 srcreg);
1640 }
1641 break;
1642
1643 case D3D10_SB_OPCODE_SAMPLE_L:
1644 if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1645 struct ureg_dst r0 = ureg_DECL_temporary(ureg);
1646
1647 assert(opcode.src[1].base.index_dim == 1);
1648 assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1649
1650 /* Insert LOD into .w component.
1651 */
1652 ureg_MOV(ureg,
1653 ureg_writemask(r0, TGSI_WRITEMASK_XYZ),
1654 translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1655 ureg_MOV(ureg,
1656 ureg_writemask(r0, TGSI_WRITEMASK_W),
1657 translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));
1658
1659 ureg_TXL(ureg,
1660 translate_dst_operand(&sx, &opcode.dst[0],
1661 opcode.saturate),
1662 sx.resources[opcode.src[1].base.index[0].imm].target,
1663 ureg_src(r0),
1664 translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
1665
1666 ureg_release_temporary(ureg, r0);
1667 }
1668 else {
1669 struct ureg_src srcreg[4];
1670 srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
1671 srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1672 srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
1673 srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
1674
1675 sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_L, 4, &opcode,
1676 translate_dst_operand(&sx, &opcode.dst[0],
1677 opcode.saturate),
1678 srcreg);
1679 }
1680 break;
1681
1682 case D3D10_SB_OPCODE_SAMPLE_D:
1683 if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1684 assert(opcode.src[1].base.index_dim == 1);
1685 assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1686
1687 ureg_TXD(ureg,
1688 translate_dst_operand(&sx, &opcode.dst[0],
1689 opcode.saturate),
1690 sx.resources[opcode.src[1].base.index[0].imm].target,
1691 translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
1692 translate_src_operand(&sx, &opcode.src[3], OF_FLOAT),
1693 translate_src_operand(&sx, &opcode.src[4], OF_FLOAT),
1694 translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
1695 }
1696 else {
1697 struct ureg_src srcreg[5];
1698 srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
1699 srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1700 srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
1701 srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
1702 srcreg[4] = translate_src_operand(&sx, &opcode.src[4], OF_FLOAT);
1703
1704 sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_D, 5, &opcode,
1705 translate_dst_operand(&sx, &opcode.dst[0],
1706 opcode.saturate),
1707 srcreg);
1708 }
1709 break;
1710
1711 case D3D10_SB_OPCODE_SAMPLE_B:
1712 if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1713 struct ureg_dst r0 = ureg_DECL_temporary(ureg);
1714
1715 assert(opcode.src[1].base.index_dim == 1);
1716 assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1717
1718 /* Insert LOD bias into .w component.
1719 */
1720 ureg_MOV(ureg,
1721 ureg_writemask(r0, TGSI_WRITEMASK_XYZ),
1722 translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1723 ureg_MOV(ureg,
1724 ureg_writemask(r0, TGSI_WRITEMASK_W),
1725 translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));
1726
1727 ureg_TXB(ureg,
1728 translate_dst_operand(&sx, &opcode.dst[0],
1729 opcode.saturate),
1730 sx.resources[opcode.src[1].base.index[0].imm].target,
1731 ureg_src(r0),
1732 translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
1733
1734 ureg_release_temporary(ureg, r0);
1735 }
1736 else {
1737 struct ureg_src srcreg[4];
1738 srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
1739 srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1740 srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
1741 srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
1742
1743 sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_B, 4, &opcode,
1744 translate_dst_operand(&sx, &opcode.dst[0],
1745 opcode.saturate),
1746 srcreg);
1747 }
1748 break;
1749
1750 case D3D10_SB_OPCODE_SINCOS: {
1751 struct ureg_dst src0 = ureg_DECL_temporary(ureg);
1752 ureg_MOV(ureg, src0, translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1753 if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1754 struct ureg_dst dst = translate_dst_operand(&sx, &opcode.dst[0],
1755 opcode.saturate);
1756 struct ureg_src src = ureg_src(src0);
1757 ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
1758 ureg_scalar(src, TGSI_SWIZZLE_X));
1759 ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
1760 ureg_scalar(src, TGSI_SWIZZLE_Y));
1761 ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
1762 ureg_scalar(src, TGSI_SWIZZLE_Z));
1763 ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
1764 ureg_scalar(src, TGSI_SWIZZLE_W));
1765 }
1766 if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1767 struct ureg_dst dst = translate_dst_operand(&sx, &opcode.dst[1],
1768 opcode.saturate);
1769 struct ureg_src src = ureg_src(src0);
1770 ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
1771 ureg_scalar(src, TGSI_SWIZZLE_X));
1772 ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
1773 ureg_scalar(src, TGSI_SWIZZLE_Y));
1774 ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
1775 ureg_scalar(src, TGSI_SWIZZLE_Z));
1776 ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
1777 ureg_scalar(src, TGSI_SWIZZLE_W));
1778 }
1779 ureg_release_temporary(ureg, src0);
1780 }
1781 break;
1782
1783 case D3D10_SB_OPCODE_UDIV: {
1784 struct ureg_dst src0 = ureg_DECL_temporary(ureg);
1785 struct ureg_dst src1 = ureg_DECL_temporary(ureg);
1786 ureg_MOV(ureg, src0, translate_src_operand(&sx, &opcode.src[0], OF_UINT));
1787 ureg_MOV(ureg, src1, translate_src_operand(&sx, &opcode.src[1], OF_UINT));
1788 if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1789 ureg_UDIV(ureg,
1790 translate_dst_operand(&sx, &opcode.dst[0],
1791 opcode.saturate),
1792 ureg_src(src0), ureg_src(src1));
1793 }
1794 if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1795 ureg_UMOD(ureg,
1796 translate_dst_operand(&sx, &opcode.dst[1],
1797 opcode.saturate),
1798 ureg_src(src0), ureg_src(src1));
1799 }
1800 ureg_release_temporary(ureg, src0);
1801 ureg_release_temporary(ureg, src1);
1802 }
1803 break;
1804 case D3D10_SB_OPCODE_UMUL: {
1805 if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1806 ureg_UMUL_HI(ureg,
1807 translate_dst_operand(&sx, &opcode.dst[0],
1808 opcode.saturate),
1809 translate_src_operand(&sx, &opcode.src[0], OF_UINT),
1810 translate_src_operand(&sx, &opcode.src[1], OF_UINT));
1811 }
1812 if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1813 ureg_UMUL(ureg,
1814 translate_dst_operand(&sx, &opcode.dst[1],
1815 opcode.saturate),
1816 translate_src_operand(&sx, &opcode.src[0], OF_UINT),
1817 translate_src_operand(&sx, &opcode.src[1], OF_UINT));
1818 }
1819 }
1820 break;
1821
1822 case D3D10_SB_OPCODE_DCL_RESOURCE:
1823 {
1824 unsigned target;
1825 unsigned res_index = opcode.dst[0].base.index[0].imm;
1826 assert(opcode.dst[0].base.index_dim == 1);
1827 assert(res_index < SHADER_MAX_RESOURCES);
1828
1829 target = translate_resource_dimension(opcode.specific.dcl_resource_dimension);
1830 sx.resources[res_index].target = target;
1831 if (!(st_debug & ST_DEBUG_OLD_TEX_OPS)) {
1832 sx.sv[res_index] =
1833 ureg_DECL_sampler_view(ureg, res_index, target,
1834 trans_dcl_ret_type(opcode.dcl_resource_ret_type[0]),
1835 trans_dcl_ret_type(opcode.dcl_resource_ret_type[1]),
1836 trans_dcl_ret_type(opcode.dcl_resource_ret_type[2]),
1837 trans_dcl_ret_type(opcode.dcl_resource_ret_type[3]));
1838 }
1839 break;
1840 }
1841
1842 case D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER: {
1843 unsigned num_constants = opcode.src[0].base.index[1].imm;
1844
1845 assert(opcode.src[0].base.index[0].imm < PIPE_MAX_CONSTANT_BUFFERS);
1846
1847 if (num_constants == 0) {
1848 num_constants = SHADER_MAX_CONSTS;
1849 } else {
1850 assert(num_constants <= SHADER_MAX_CONSTS);
1851 }
1852
1853 ureg_DECL_constant2D(ureg,
1854 0,
1855 num_constants - 1,
1856 opcode.src[0].base.index[0].imm);
1857 break;
1858 }
1859
1860 case D3D10_SB_OPCODE_DCL_SAMPLER:
1861 assert(opcode.dst[0].base.index_dim == 1);
1862 assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_SAMPLERS);
1863
1864 sx.samplers[opcode.dst[0].base.index[0].imm] =
1865 ureg_DECL_sampler(ureg,
1866 opcode.dst[0].base.index[0].imm);
1867 break;
1868
1869 case D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
1870 assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
1871
1872 switch (opcode.specific.dcl_gs_output_primitive_topology) {
1873 case D3D10_SB_PRIMITIVE_TOPOLOGY_POINTLIST:
1874 ureg_property(sx.ureg,
1875 TGSI_PROPERTY_GS_OUTPUT_PRIM,
1876 MESA_PRIM_POINTS);
1877 break;
1878
1879 case D3D10_SB_PRIMITIVE_TOPOLOGY_LINESTRIP:
1880 ureg_property(sx.ureg,
1881 TGSI_PROPERTY_GS_OUTPUT_PRIM,
1882 MESA_PRIM_LINE_STRIP);
1883 break;
1884
1885 case D3D10_SB_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP:
1886 ureg_property(sx.ureg,
1887 TGSI_PROPERTY_GS_OUTPUT_PRIM,
1888 MESA_PRIM_TRIANGLE_STRIP);
1889 break;
1890
1891 default:
1892 assert(0);
1893 }
1894 break;
1895
1896 case D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE:
1897 assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
1898
1899 /* Figure out the second dimension of GS inputs.
1900 */
1901 switch (opcode.specific.dcl_gs_input_primitive) {
1902 case D3D10_SB_PRIMITIVE_POINT:
1903 declare_vertices_in(&sx, 1);
1904 ureg_property(sx.ureg,
1905 TGSI_PROPERTY_GS_INPUT_PRIM,
1906 MESA_PRIM_POINTS);
1907 break;
1908
1909 case D3D10_SB_PRIMITIVE_LINE:
1910 declare_vertices_in(&sx, 2);
1911 ureg_property(sx.ureg,
1912 TGSI_PROPERTY_GS_INPUT_PRIM,
1913 MESA_PRIM_LINES);
1914 break;
1915
1916 case D3D10_SB_PRIMITIVE_TRIANGLE:
1917 declare_vertices_in(&sx, 3);
1918 ureg_property(sx.ureg,
1919 TGSI_PROPERTY_GS_INPUT_PRIM,
1920 MESA_PRIM_TRIANGLES);
1921 break;
1922
1923 case D3D10_SB_PRIMITIVE_LINE_ADJ:
1924 declare_vertices_in(&sx, 4);
1925 ureg_property(sx.ureg,
1926 TGSI_PROPERTY_GS_INPUT_PRIM,
1927 MESA_PRIM_LINES_ADJACENCY);
1928 break;
1929
1930 case D3D10_SB_PRIMITIVE_TRIANGLE_ADJ:
1931 declare_vertices_in(&sx, 6);
1932 ureg_property(sx.ureg,
1933 TGSI_PROPERTY_GS_INPUT_PRIM,
1934 MESA_PRIM_TRIANGLES_ADJACENCY);
1935 break;
1936
1937 default:
1938 assert(0);
1939 }
1940 break;
1941
1942 case D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
1943 assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
1944
1945 ureg_property(sx.ureg,
1946 TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
1947 opcode.specific.dcl_max_output_vertex_count);
1948 break;
1949
1950 case D3D10_SB_OPCODE_DCL_INPUT:
1951 if (parser.header.type == D3D10_SB_VERTEX_SHADER) {
1952 dcl_vs_input(&sx, ureg, &opcode.dst[0]);
1953 } else {
1954 assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
1955 dcl_gs_input(&sx, ureg, &opcode.dst[0]);
1956 }
1957 break;
1958
1959 case D3D10_SB_OPCODE_DCL_INPUT_SGV:
1960 assert(parser.header.type == D3D10_SB_VERTEX_SHADER);
1961 dcl_sgv_input(&sx, ureg, &opcode.dst[0], opcode.dcl_siv_name);
1962 break;
1963
1964 case D3D10_SB_OPCODE_DCL_INPUT_SIV:
1965 assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
1966 dcl_siv_input(&sx, ureg, &opcode.dst[0], opcode.dcl_siv_name);
1967 break;
1968
1969 case D3D10_SB_OPCODE_DCL_INPUT_PS:
1970 assert(parser.header.type == D3D10_SB_PIXEL_SHADER);
1971 dcl_ps_input(&sx, ureg, &opcode.dst[0],
1972 opcode.specific.dcl_in_ps_interp);
1973 break;
1974
1975 case D3D10_SB_OPCODE_DCL_INPUT_PS_SGV:
1976 assert(parser.header.type == D3D10_SB_PIXEL_SHADER);
1977 dcl_ps_sgv_input(&sx, ureg, &opcode.dst[0],
1978 opcode.dcl_siv_name);
1979 break;
1980
1981 case D3D10_SB_OPCODE_DCL_INPUT_PS_SIV:
1982 assert(parser.header.type == D3D10_SB_PIXEL_SHADER);
1983 dcl_ps_siv_input(&sx, ureg, &opcode.dst[0],
1984 opcode.dcl_siv_name,
1985 opcode.specific.dcl_in_ps_interp);
1986 break;
1987
1988 case D3D10_SB_OPCODE_DCL_OUTPUT:
1989 if (parser.header.type == D3D10_SB_PIXEL_SHADER) {
1990 /* Pixel shader outputs. */
1991 if (opcode.dst[0].base.type == D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) {
1992 /* Depth output. */
1993 assert(opcode.dst[0].base.index_dim == 0);
1994
1995 sx.output_depth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_WRITEMASK_Z, 0, 1);
1996 sx.output_depth = ureg_writemask(sx.output_depth, TGSI_WRITEMASK_Z);
1997 } else {
1998 /* Color outputs. */
1999 assert(opcode.dst[0].base.index_dim == 1);
2000 assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);
2001
2002 dcl_base_output(&sx, ureg,
2003 ureg_DECL_output(ureg,
2004 TGSI_SEMANTIC_COLOR,
2005 opcode.dst[0].base.index[0].imm),
2006 &opcode.dst[0]);
2007 }
2008 } else {
2009 assert(opcode.dst[0].base.index_dim == 1);
2010 assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);
2011
2012 if (output_mapping) {
2013 unsigned nr_outputs = ureg_get_nr_outputs(ureg);
2014 output_mapping[nr_outputs]
2015 = opcode.dst[0].base.index[0].imm;
2016 }
2017 dcl_base_output(&sx, ureg,
2018 ureg_DECL_output(ureg,
2019 TGSI_SEMANTIC_GENERIC,
2020 opcode.dst[0].base.index[0].imm),
2021 &opcode.dst[0]);
2022 }
2023 break;
2024
2025 case D3D10_SB_OPCODE_DCL_OUTPUT_SIV:
2026 assert(opcode.dst[0].base.index_dim == 1);
2027 assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);
2028
2029 if (output_mapping) {
2030 unsigned nr_outputs = ureg_get_nr_outputs(ureg);
2031 output_mapping[nr_outputs]
2032 = opcode.dst[0].base.index[0].imm;
2033 }
2034 if (opcode.dcl_siv_name == D3D10_SB_NAME_CLIP_DISTANCE ||
2035 opcode.dcl_siv_name == D3D10_SB_NAME_CULL_DISTANCE) {
2036 /*
2037 * FIXME: this is quite broken. gallium no longer has separate
2038 * clip/cull dists, using (max 2) combined clipdist/culldist regs
2039 * instead. Unlike d3d10 though, which is clip and which cull is
2040 * simply determined by by number of clip/cull dists (that is,
2041 * all clip dists must come first).
2042 */
2043 unsigned numcliporcull = sx.num_clip_distances_declared +
2044 sx.num_cull_distances_declared;
2045 sx.clip_distance_mapping[numcliporcull].d3d =
2046 opcode.dst[0].base.index[0].imm;
2047 sx.clip_distance_mapping[numcliporcull].tgsi = numcliporcull;
2048 if (opcode.dcl_siv_name == D3D10_SB_NAME_CLIP_DISTANCE) {
2049 ++sx.num_clip_distances_declared;
2050 /* re-emit should be safe... */
2051 ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
2052 sx.num_clip_distances_declared);
2053 } else {
2054 ++sx.num_cull_distances_declared;
2055 ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
2056 sx.num_cull_distances_declared);
2057 }
2058 } else if (0 && opcode.dcl_siv_name == D3D10_SB_NAME_CULL_DISTANCE) {
2059 sx.cull_distance_mapping[sx.num_cull_distances_declared].d3d =
2060 opcode.dst[0].base.index[0].imm;
2061 sx.cull_distance_mapping[sx.num_cull_distances_declared].tgsi =
2062 sx.num_cull_distances_declared;
2063 ++sx.num_cull_distances_declared;
2064 ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
2065 sx.num_cull_distances_declared);
2066 }
2067
2068 dcl_base_output(&sx, ureg,
2069 ureg_DECL_output_masked(
2070 ureg,
2071 translate_system_name(opcode.dcl_siv_name),
2072 translate_semantic_index(&sx, opcode.dcl_siv_name,
2073 &opcode.dst[0]),
2074 opcode.dst[0].mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT,
2075 0, 1),
2076 &opcode.dst[0]);
2077 break;
2078
2079 case D3D10_SB_OPCODE_DCL_OUTPUT_SGV:
2080 assert(opcode.dst[0].base.index_dim == 1);
2081 assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);
2082
2083 if (output_mapping) {
2084 unsigned nr_outputs = ureg_get_nr_outputs(ureg);
2085 output_mapping[nr_outputs]
2086 = opcode.dst[0].base.index[0].imm;
2087 }
2088 dcl_base_output(&sx, ureg,
2089 ureg_DECL_output(ureg,
2090 translate_system_name(opcode.dcl_siv_name),
2091 0),
2092 &opcode.dst[0]);
2093 break;
2094
2095 case D3D10_SB_OPCODE_DCL_TEMPS:
2096 {
2097 uint i;
2098
2099 assert(opcode.specific.dcl_num_temps + sx.declared_temps <=
2100 SHADER_MAX_TEMPS);
2101
2102 sx.temp_offset = sx.declared_temps;
2103
2104 for (i = 0; i < opcode.specific.dcl_num_temps; i++) {
2105 sx.temps[sx.declared_temps + i] = ureg_DECL_temporary(ureg);
2106 }
2107 sx.declared_temps += opcode.specific.dcl_num_temps;
2108 }
2109 break;
2110
2111 case D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP:
2112 {
2113 uint i;
2114
2115 /* XXX: Add true indexable temps to gallium.
2116 */
2117
2118 assert(opcode.specific.dcl_indexable_temp.index <
2119 SHADER_MAX_INDEXABLE_TEMPS);
2120 assert(opcode.specific.dcl_indexable_temp.count + sx.declared_temps <=
2121 SHADER_MAX_TEMPS);
2122
2123 sx.indexable_temp_offsets[opcode.specific.dcl_indexable_temp.index] =
2124 sx.declared_temps;
2125
2126 for (i = 0; i < opcode.specific.dcl_indexable_temp.count; i++) {
2127 sx.temps[sx.declared_temps + i] = ureg_DECL_temporary(ureg);
2128 }
2129 sx.declared_temps += opcode.specific.dcl_indexable_temp.count;
2130 }
2131 break;
2132 case D3D10_SB_OPCODE_IF: {
2133 unsigned label = 0;
2134 if (opcode.specific.test_boolean == D3D10_SB_INSTRUCTION_TEST_ZERO) {
2135 struct ureg_src src =
2136 translate_src_operand(&sx, &opcode.src[0], OF_INT);
2137 struct ureg_dst src_nz = ureg_DECL_temporary(ureg);
2138 ureg_USEQ(ureg, src_nz, src, ureg_imm1u(ureg, 0));
2139 ureg_UIF(ureg, ureg_src(src_nz), &label);
2140 ureg_release_temporary(ureg, src_nz);;
2141 } else {
2142 ureg_UIF(ureg, translate_src_operand(&sx, &opcode.src[0], OF_INT), &label);
2143 }
2144 }
2145 break;
2146 case D3D10_SB_OPCODE_RETC:
2147 case D3D10_SB_OPCODE_CONTINUEC:
2148 case D3D10_SB_OPCODE_CALLC:
2149 case D3D10_SB_OPCODE_DISCARD:
2150 case D3D10_SB_OPCODE_BREAKC:
2151 {
2152 unsigned label = 0;
2153 assert(operand_is_scalar(&opcode.src[0]));
2154 if (opcode.specific.test_boolean == D3D10_SB_INSTRUCTION_TEST_ZERO) {
2155 struct ureg_src src =
2156 translate_src_operand(&sx, &opcode.src[0], OF_INT);
2157 struct ureg_dst src_nz = ureg_DECL_temporary(ureg);
2158 ureg_USEQ(ureg, src_nz, src, ureg_imm1u(ureg, 0));
2159 ureg_UIF(ureg, ureg_src(src_nz), &label);
2160 ureg_release_temporary(ureg, src_nz);
2161 }
2162 else {
2163 ureg_UIF(ureg, translate_src_operand(&sx, &opcode.src[0], OF_INT), &label);
2164 }
2165 switch (opcode.type) {
2166 case D3D10_SB_OPCODE_RETC:
2167 ureg_RET(ureg);
2168 break;
2169 case D3D10_SB_OPCODE_CONTINUEC:
2170 ureg_CONT(ureg);
2171 break;
2172 case D3D10_SB_OPCODE_CALLC: {
2173 unsigned label = opcode.src[1].base.index[0].imm;
2174 unsigned tgsi_token_label = 0;
2175 ureg_CAL(ureg, &tgsi_token_label);
2176 Shader_add_call(&sx, label, tgsi_token_label);
2177 }
2178 break;
2179 case D3D10_SB_OPCODE_DISCARD:
2180 ureg_KILL(ureg);
2181 break;
2182 case D3D10_SB_OPCODE_BREAKC:
2183 ureg_BRK(ureg);
2184 break;
2185 default:
2186 assert(0);
2187 break;
2188 }
2189 ureg_ENDIF(ureg);
2190 }
2191 break;
2192 case D3D10_SB_OPCODE_LABEL: {
2193 unsigned label = opcode.src[0].base.index[0].imm;
2194 unsigned tgsi_inst_no = 0;
2195 if (inside_sub) {
2196 ureg_ENDSUB(ureg);
2197 }
2198 tgsi_inst_no = ureg_get_instruction_number(ureg);
2199 ureg_BGNSUB(ureg);
2200 inside_sub = true;
2201 Shader_add_label(&sx, label, tgsi_inst_no);
2202 }
2203 break;
2204 case D3D10_SB_OPCODE_CALL: {
2205 unsigned label = opcode.src[0].base.index[0].imm;
2206 unsigned tgsi_token_label = 0;
2207 ureg_CAL(ureg, &tgsi_token_label);
2208 Shader_add_call(&sx, label, tgsi_token_label);
2209 }
2210 break;
2211 case D3D10_SB_OPCODE_EMIT:
2212 ureg_EMIT(ureg, ureg_imm1u(ureg, 0));
2213 break;
2214 case D3D10_SB_OPCODE_CUT:
2215 ureg_ENDPRIM(ureg, ureg_imm1u(ureg, 0));
2216 break;
2217 case D3D10_SB_OPCODE_EMITTHENCUT:
2218 ureg_EMIT(ureg, ureg_imm1u(ureg, 0));
2219 ureg_ENDPRIM(ureg, ureg_imm1u(ureg, 0));
2220 break;
2221 case D3D10_SB_OPCODE_DCL_INDEX_RANGE:
2222 case D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS:
2223 /* Ignore */
2224 break;
2225 default:
2226 {
2227 uint i;
2228 struct ureg_dst dst[SHADER_MAX_DST_OPERANDS];
2229 struct ureg_src src[SHADER_MAX_SRC_OPERANDS];
2230
2231 assert(ox->tgsi_opcode != TGSI_EXPAND);
2232
2233 if (ox->tgsi_opcode == TGSI_LOG_UNSUPPORTED) {
2234 if (!shader_dumped) {
2235 dx10_shader_dump_tokens(code);
2236 shader_dumped = true;
2237 }
2238 debug_printf("%s: unsupported opcode %i\n",
2239 __func__, ox->type);
2240 assert(ox->tgsi_opcode != TGSI_LOG_UNSUPPORTED);
2241 }
2242
2243 /* Destination operands. */
2244 for (i = 0; i < opcode.num_dst; i++) {
2245 dst[i] = translate_dst_operand(&sx, &opcode.dst[i],
2246 opcode.saturate);
2247 }
2248
2249 /* Source operands. */
2250 for (i = 0; i < opcode.num_src; i++) {
2251 src[i] = translate_src_operand(&sx, &opcode.src[i], ox->format);
2252 }
2253
2254 /* Try to re-route output depth to Z channel. */
2255 if (opcode.dst[0].base.type == D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) {
2256 LOG_UNSUPPORTED(opcode.type != D3D10_SB_OPCODE_MOV);
2257 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_Z);
2258 src[0] = ureg_scalar(src[0], TGSI_SWIZZLE_X);
2259 }
2260
2261 ureg_insn(ureg,
2262 ox->tgsi_opcode,
2263 dst,
2264 opcode.num_dst,
2265 src,
2266 opcode.num_src, 0);
2267 }
2268 }
2269
2270 Shader_opcode_free(&opcode);
2271 }
2272
2273 if (inside_sub) {
2274 ureg_ENDSUB(ureg);
2275 }
2276
2277 ureg_END(ureg);
2278
2279 for (i = 0; i < sx.num_calls; ++i) {
2280 for (j = 0; j < sx.num_labels; ++j) {
2281 if (sx.calls[i].d3d_label == sx.labels[j].d3d_label) {
2282 ureg_fixup_label(sx.ureg,
2283 sx.calls[i].tgsi_label_token,
2284 sx.labels[j].tgsi_insn_no);
2285 break;
2286 }
2287 }
2288 ASSERT(j < sx.num_labels);
2289 }
2290 FREE(sx.labels);
2291 FREE(sx.calls);
2292
2293 tokens = ureg_get_tokens(ureg, &nr_tokens);
2294 assert(tokens);
2295 ureg_destroy(ureg);
2296
2297 if (st_debug & ST_DEBUG_TGSI) {
2298 tgsi_dump(tokens, 0);
2299 }
2300
2301 return tokens;
2302 }
2303