1 /**************************************************************************
2 *
3 * Copyright 2010-2021 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 *
26 **************************************************************************/
27
28
29 #include "util/detect.h"
30
31 #include "util/u_math.h"
32 #include "util/u_cpu_detect.h"
33 #include "util/u_pack_color.h"
34 #include "util/u_surface.h"
35 #include "util/u_sse.h"
36
37 #include "lp_jit.h"
38 #include "lp_rast.h"
39 #include "lp_debug.h"
40 #include "lp_state_fs.h"
41 #include "lp_linear_priv.h"
42
43
44 /* This file contains various special-case fastpaths which implement
45 * the entire linear pipeline in a single funciton.
46 *
47 * These include simple blits and some debug code.
48 *
49 * These functions fully implement the linear path and do not need to
50 * be combined with blending, interpolation or sampling routines.
51 */
52
53
54 #if DETECT_ARCH_SSE
55
56 /* Linear shader which implements the BLIT_RGBA shader with the
57 * additional constraints imposed by lp_setup_is_blit().
58 */
59 static bool
lp_linear_blit_rgba_blit(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)60 lp_linear_blit_rgba_blit(const struct lp_rast_state *state,
61 unsigned x, unsigned y,
62 unsigned width, unsigned height,
63 const float (*a0)[4],
64 const float (*dadx)[4],
65 const float (*dady)[4],
66 uint8_t *color,
67 unsigned stride)
68 {
69 const struct lp_jit_resources *resources = &state->jit_resources;
70 const struct lp_jit_texture *texture = &resources->textures[0];
71
72 LP_DBG(DEBUG_RAST, "%s\n", __func__);
73
74 /* Require w==1.0:
75 */
76 if (a0[0][3] != 1.0 ||
77 dadx[0][3] != 0.0 ||
78 dady[0][3] != 0.0)
79 return false;
80
81 const int src_x = x + util_iround(a0[1][0]*texture->width - 0.5f);
82 const int src_y = y + util_iround(a0[1][1]*texture->height - 0.5f);
83
84 const uint8_t *src = texture->base;
85 const unsigned src_stride = texture->row_stride[0];
86
87 /* Fall back to blit_rgba() if clamping required:
88 */
89 if (src_x < 0 ||
90 src_y < 0 ||
91 src_x + width > texture->width ||
92 src_y + height > texture->height)
93 return false;
94
95 util_copy_rect(color, PIPE_FORMAT_B8G8R8A8_UNORM, stride,
96 x, y,
97 width, height,
98 src, src_stride,
99 src_x, src_y);
100
101 return true;
102 }
103
104
105 /* Linear shader which implements the BLIT_RGB1 shader, with the
106 * additional constraints imposed by lp_setup_is_blit().
107 */
108 static bool
lp_linear_blit_rgb1_blit(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)109 lp_linear_blit_rgb1_blit(const struct lp_rast_state *state,
110 unsigned x, unsigned y,
111 unsigned width, unsigned height,
112 const float (*a0)[4],
113 const float (*dadx)[4],
114 const float (*dady)[4],
115 uint8_t *color,
116 unsigned stride)
117 {
118 const struct lp_jit_resources *resources = &state->jit_resources;
119 const struct lp_jit_texture *texture = &resources->textures[0];
120
121 LP_DBG(DEBUG_RAST, "%s\n", __func__);
122
123 /* Require w==1.0:
124 */
125 if (a0[0][3] != 1.0 ||
126 dadx[0][3] != 0.0 ||
127 dady[0][3] != 0.0)
128 return false;
129
130 color += x * 4 + y * stride;
131
132 const int src_x = x + util_iround(a0[1][0]*texture->width - 0.5f);
133 const int src_y = y + util_iround(a0[1][1]*texture->height - 0.5f);
134
135 const uint8_t *src = texture->base;
136 const unsigned src_stride = texture->row_stride[0];
137 src += src_x * 4;
138 src += src_y * src_stride;
139
140 if (src_x < 0 ||
141 src_y < 0 ||
142 src_x + width > texture->width ||
143 src_y + height > texture->height)
144 return false;
145
146 for (y = 0; y < height; y++) {
147 const uint32_t *src_row = (const uint32_t *)src;
148 uint32_t *dst_row = (uint32_t *)color;
149
150 for (x = 0; x < width; x++) {
151 *dst_row++ = *src_row++ | 0xff000000;
152 }
153
154 color += stride;
155 src += src_stride;
156 }
157
158 return true;
159 }
160
161
162 /* Linear shader which always emits purple. Used for debugging.
163 */
164 static bool
lp_linear_purple(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)165 lp_linear_purple(const struct lp_rast_state *state,
166 unsigned x, unsigned y,
167 unsigned width, unsigned height,
168 const float (*a0)[4],
169 const float (*dadx)[4],
170 const float (*dady)[4],
171 uint8_t *color,
172 unsigned stride)
173 {
174 union util_color uc;
175
176 util_pack_color_ub(0xff, 0, 0xff, 0xff,
177 PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
178
179 util_fill_rect(color,
180 PIPE_FORMAT_B8G8R8A8_UNORM,
181 stride,
182 x,
183 y,
184 width,
185 height,
186 &uc);
187
188 return true;
189 }
190
191
192 /* Examine the fragment shader variant and determine whether we can
193 * substitute a fastpath linear shader implementation.
194 */
195 bool
lp_linear_check_fastpath(struct lp_fragment_shader_variant * variant)196 lp_linear_check_fastpath(struct lp_fragment_shader_variant *variant)
197 {
198 struct lp_sampler_static_state *samp0 =
199 lp_fs_variant_key_sampler_idx(&variant->key, 0);
200
201 if (!samp0)
202 return false;
203
204 const enum pipe_format tex_format = samp0->texture_state.format;
205 if (variant->shader->kind == LP_FS_KIND_BLIT_RGBA &&
206 tex_format == PIPE_FORMAT_B8G8R8A8_UNORM &&
207 is_nearest_clamp_sampler(samp0) &&
208 variant->opaque) {
209 variant->jit_linear_blit = lp_linear_blit_rgba_blit;
210 }
211
212 if (variant->shader->kind == LP_FS_KIND_BLIT_RGB1 &&
213 variant->opaque &&
214 (tex_format == PIPE_FORMAT_B8G8R8A8_UNORM ||
215 tex_format == PIPE_FORMAT_B8G8R8X8_UNORM) &&
216 is_nearest_clamp_sampler(samp0)) {
217 variant->jit_linear_blit = lp_linear_blit_rgb1_blit;
218 }
219
220 if (0) {
221 variant->jit_linear = lp_linear_purple;
222 }
223
224
225 /* Stop now if jit_linear has been initialized. Otherwise keep
226 * searching - even if jit_linear_blit has been instantiated.
227 */
228 return variant->jit_linear != NULL;
229 }
230
231 #else
232
233 bool
lp_linear_check_fastpath(struct lp_fragment_shader_variant * variant)234 lp_linear_check_fastpath(struct lp_fragment_shader_variant *variant)
235 {
236 return false;
237 }
238
239 #endif
240