xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/vc4/vc4_nir_lower_blend.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2015 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * Implements most of the fixed function fragment pipeline in shader code.
26  *
27  * VC4 doesn't have any hardware support for blending, alpha test, logic ops,
28  * or color mask.  Instead, you read the current contents of the destination
29  * from the tile buffer after having waited for the scoreboard (which is
30  * handled by vc4_qpu_emit.c), then do math using your output color and that
31  * destination value, and update the output color appropriately.
32  *
33  * Once this pass is done, the color write will either have one component (for
34  * single sample) with packed argb8888, or 4 components with the per-sample
35  * argb8888 result.
36  */
37 
38 /**
39  * Lowers fixed-function blending to a load of the destination color and a
40  * series of ALU operations before the store of the output.
41  */
42 #include "util/format/u_format.h"
43 #include "vc4_qir.h"
44 #include "compiler/nir/nir_builder.h"
45 #include "compiler/nir/nir_format_convert.h"
46 #include "vc4_context.h"
47 
48 static bool
blend_depends_on_dst_color(struct vc4_compile * c)49 blend_depends_on_dst_color(struct vc4_compile *c)
50 {
51         return (c->fs_key->blend.blend_enable ||
52                 c->fs_key->blend.colormask != PIPE_MASK_RGBA ||
53                 c->fs_key->logicop_func != PIPE_LOGICOP_COPY);
54 }
55 
56 /** Emits a load of the previous fragment color from the tile buffer. */
57 static nir_def *
vc4_nir_get_dst_color(nir_builder * b,int sample)58 vc4_nir_get_dst_color(nir_builder *b, int sample)
59 {
60         return nir_load_tlb_color_brcm(b, 1, 32, nir_imm_int(b, 0),
61                                        .base = sample);
62 }
63 
64 static nir_def *
vc4_blend_channel_f(nir_builder * b,nir_def ** src,nir_def ** dst,unsigned factor,int channel)65 vc4_blend_channel_f(nir_builder *b,
66                     nir_def **src,
67                     nir_def **dst,
68                     unsigned factor,
69                     int channel)
70 {
71         switch(factor) {
72         case PIPE_BLENDFACTOR_ONE:
73                 return nir_imm_float(b, 1.0);
74         case PIPE_BLENDFACTOR_SRC_COLOR:
75                 return src[channel];
76         case PIPE_BLENDFACTOR_SRC_ALPHA:
77                 return src[3];
78         case PIPE_BLENDFACTOR_DST_ALPHA:
79                 return dst[3];
80         case PIPE_BLENDFACTOR_DST_COLOR:
81                 return dst[channel];
82         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
83                 if (channel != 3) {
84                         return nir_fmin(b,
85                                         src[3],
86                                         nir_fsub_imm(b, 1.0,
87                                                      dst[3]));
88                 } else {
89                         return nir_imm_float(b, 1.0);
90                 }
91         case PIPE_BLENDFACTOR_CONST_COLOR:
92                 return nir_load_system_value(b,
93                                              nir_intrinsic_load_blend_const_color_r_float +
94                                              channel,
95                                              0, 1, 32);
96         case PIPE_BLENDFACTOR_CONST_ALPHA:
97                 return nir_load_blend_const_color_a_float(b);
98         case PIPE_BLENDFACTOR_ZERO:
99                 return nir_imm_float(b, 0.0);
100         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
101                 return nir_fsub_imm(b, 1.0, src[channel]);
102         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
103                 return nir_fsub_imm(b, 1.0, src[3]);
104         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
105                 return nir_fsub_imm(b, 1.0, dst[3]);
106         case PIPE_BLENDFACTOR_INV_DST_COLOR:
107                 return nir_fsub_imm(b, 1.0, dst[channel]);
108         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
109                 return nir_fsub_imm(b, 1.0,
110                                     nir_load_system_value(b,
111                                                           nir_intrinsic_load_blend_const_color_r_float +
112                                                           channel,
113                                                           0, 1, 32));
114         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
115                 return nir_fsub_imm(b, 1.0,
116                                     nir_load_blend_const_color_a_float(b));
117 
118         default:
119         case PIPE_BLENDFACTOR_SRC1_COLOR:
120         case PIPE_BLENDFACTOR_SRC1_ALPHA:
121         case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
122         case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
123                 /* Unsupported. */
124                 fprintf(stderr, "Unknown blend factor %d\n", factor);
125                 return nir_imm_float(b, 1.0);
126         }
127 }
128 
129 static nir_def *
vc4_nir_set_packed_chan(nir_builder * b,nir_def * src0,nir_def * src1,int chan)130 vc4_nir_set_packed_chan(nir_builder *b, nir_def *src0, nir_def *src1,
131                         int chan)
132 {
133         unsigned chan_mask = 0xffu << (chan * 8);
134         return nir_ior(b,
135                        nir_iand_imm(b, src0, ~chan_mask),
136                        nir_iand_imm(b, src1, chan_mask));
137 }
138 
139 static nir_def *
vc4_blend_channel_i(nir_builder * b,nir_def * src,nir_def * dst,nir_def * src_a,nir_def * dst_a,unsigned factor,int a_chan)140 vc4_blend_channel_i(nir_builder *b,
141                     nir_def *src,
142                     nir_def *dst,
143                     nir_def *src_a,
144                     nir_def *dst_a,
145                     unsigned factor,
146                     int a_chan)
147 {
148         switch (factor) {
149         case PIPE_BLENDFACTOR_ONE:
150                 return nir_imm_int(b, ~0);
151         case PIPE_BLENDFACTOR_SRC_COLOR:
152                 return src;
153         case PIPE_BLENDFACTOR_SRC_ALPHA:
154                 return src_a;
155         case PIPE_BLENDFACTOR_DST_ALPHA:
156                 return dst_a;
157         case PIPE_BLENDFACTOR_DST_COLOR:
158                 return dst;
159         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
160                 return vc4_nir_set_packed_chan(b,
161                                                nir_umin_4x8_vc4(b,
162                                                             src_a,
163                                                             nir_inot(b, dst_a)),
164                                                nir_imm_int(b, ~0),
165                                                a_chan);
166         case PIPE_BLENDFACTOR_CONST_COLOR:
167                 return nir_load_blend_const_color_rgba8888_unorm(b);
168         case PIPE_BLENDFACTOR_CONST_ALPHA:
169                 return nir_load_blend_const_color_aaaa8888_unorm(b);
170         case PIPE_BLENDFACTOR_ZERO:
171                 return nir_imm_int(b, 0);
172         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
173                 return nir_inot(b, src);
174         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
175                 return nir_inot(b, src_a);
176         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
177                 return nir_inot(b, dst_a);
178         case PIPE_BLENDFACTOR_INV_DST_COLOR:
179                 return nir_inot(b, dst);
180         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
181                 return nir_inot(b,
182                                 nir_load_blend_const_color_rgba8888_unorm(b));
183         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
184                 return nir_inot(b,
185                                 nir_load_blend_const_color_aaaa8888_unorm(b));
186 
187         default:
188         case PIPE_BLENDFACTOR_SRC1_COLOR:
189         case PIPE_BLENDFACTOR_SRC1_ALPHA:
190         case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
191         case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
192                 /* Unsupported. */
193                 fprintf(stderr, "Unknown blend factor %d\n", factor);
194                 return nir_imm_int(b, ~0);
195         }
196 }
197 
198 static nir_def *
vc4_blend_func_f(nir_builder * b,nir_def * src,nir_def * dst,unsigned func)199 vc4_blend_func_f(nir_builder *b, nir_def *src, nir_def *dst,
200                  unsigned func)
201 {
202         switch (func) {
203         case PIPE_BLEND_ADD:
204                 return nir_fadd(b, src, dst);
205         case PIPE_BLEND_SUBTRACT:
206                 return nir_fsub(b, src, dst);
207         case PIPE_BLEND_REVERSE_SUBTRACT:
208                 return nir_fsub(b, dst, src);
209         case PIPE_BLEND_MIN:
210                 return nir_fmin(b, src, dst);
211         case PIPE_BLEND_MAX:
212                 return nir_fmax(b, src, dst);
213 
214         default:
215                 /* Unsupported. */
216                 fprintf(stderr, "Unknown blend func %d\n", func);
217                 return src;
218 
219         }
220 }
221 
222 static nir_def *
vc4_blend_func_i(nir_builder * b,nir_def * src,nir_def * dst,unsigned func)223 vc4_blend_func_i(nir_builder *b, nir_def *src, nir_def *dst,
224                  unsigned func)
225 {
226         switch (func) {
227         case PIPE_BLEND_ADD:
228                 return nir_usadd_4x8_vc4(b, src, dst);
229         case PIPE_BLEND_SUBTRACT:
230                 return nir_ussub_4x8_vc4(b, src, dst);
231         case PIPE_BLEND_REVERSE_SUBTRACT:
232                 return nir_ussub_4x8_vc4(b, dst, src);
233         case PIPE_BLEND_MIN:
234                 return nir_umin_4x8_vc4(b, src, dst);
235         case PIPE_BLEND_MAX:
236                 return nir_umax_4x8_vc4(b, src, dst);
237 
238         default:
239                 /* Unsupported. */
240                 fprintf(stderr, "Unknown blend func %d\n", func);
241                 return src;
242 
243         }
244 }
245 
246 static void
vc4_do_blending_f(struct vc4_compile * c,nir_builder * b,nir_def ** result,nir_def ** src_color,nir_def ** dst_color)247 vc4_do_blending_f(struct vc4_compile *c, nir_builder *b, nir_def **result,
248                   nir_def **src_color, nir_def **dst_color)
249 {
250         struct pipe_rt_blend_state *blend = &c->fs_key->blend;
251 
252         if (!blend->blend_enable) {
253                 for (int i = 0; i < 4; i++)
254                         result[i] = src_color[i];
255                 return;
256         }
257 
258         /* Clamp the src color to [0, 1].  Dest is already clamped. */
259         for (int i = 0; i < 4; i++)
260                 src_color[i] = nir_fsat(b, src_color[i]);
261 
262         nir_def *src_blend[4], *dst_blend[4];
263         for (int i = 0; i < 4; i++) {
264                 int src_factor = ((i != 3) ? blend->rgb_src_factor :
265                                   blend->alpha_src_factor);
266                 int dst_factor = ((i != 3) ? blend->rgb_dst_factor :
267                                   blend->alpha_dst_factor);
268                 src_blend[i] = nir_fmul(b, src_color[i],
269                                         vc4_blend_channel_f(b,
270                                                             src_color, dst_color,
271                                                             src_factor, i));
272                 dst_blend[i] = nir_fmul(b, dst_color[i],
273                                         vc4_blend_channel_f(b,
274                                                             src_color, dst_color,
275                                                             dst_factor, i));
276         }
277 
278         for (int i = 0; i < 4; i++) {
279                 result[i] = vc4_blend_func_f(b, src_blend[i], dst_blend[i],
280                                              ((i != 3) ? blend->rgb_func :
281                                               blend->alpha_func));
282         }
283 }
284 
285 static nir_def *
vc4_nir_splat(nir_builder * b,nir_def * src)286 vc4_nir_splat(nir_builder *b, nir_def *src)
287 {
288         nir_def *or1 = nir_ior(b, src, nir_ishl_imm(b, src, 8));
289         return nir_ior(b, or1, nir_ishl_imm(b, or1, 16));
290 }
291 
292 static nir_def *
vc4_do_blending_i(struct vc4_compile * c,nir_builder * b,nir_def * src_color,nir_def * dst_color,nir_def * src_float_a)293 vc4_do_blending_i(struct vc4_compile *c, nir_builder *b,
294                   nir_def *src_color, nir_def *dst_color,
295                   nir_def *src_float_a)
296 {
297         struct pipe_rt_blend_state *blend = &c->fs_key->blend;
298 
299         if (!blend->blend_enable)
300                 return src_color;
301 
302         enum pipe_format color_format = c->fs_key->color_format;
303         const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
304         nir_def *src_a = nir_pack_unorm_4x8(b, src_float_a);
305         nir_def *dst_a;
306         int alpha_chan;
307         for (alpha_chan = 0; alpha_chan < 4; alpha_chan++) {
308                 if (format_swiz[alpha_chan] == 3)
309                         break;
310         }
311         if (alpha_chan != 4) {
312                 dst_a = vc4_nir_splat(b, nir_iand_imm(b, nir_ushr_imm(b, dst_color,
313                                                                      alpha_chan * 8),
314                                                       0xff));
315         } else {
316                 dst_a = nir_imm_int(b, ~0);
317         }
318 
319         nir_def *src_factor = vc4_blend_channel_i(b,
320                                                       src_color, dst_color,
321                                                       src_a, dst_a,
322                                                       blend->rgb_src_factor,
323                                                       alpha_chan);
324         nir_def *dst_factor = vc4_blend_channel_i(b,
325                                                       src_color, dst_color,
326                                                       src_a, dst_a,
327                                                       blend->rgb_dst_factor,
328                                                       alpha_chan);
329 
330         if (alpha_chan != 4 &&
331             blend->alpha_src_factor != blend->rgb_src_factor) {
332                 nir_def *src_alpha_factor =
333                         vc4_blend_channel_i(b,
334                                             src_color, dst_color,
335                                             src_a, dst_a,
336                                             blend->alpha_src_factor,
337                                             alpha_chan);
338                 src_factor = vc4_nir_set_packed_chan(b, src_factor,
339                                                      src_alpha_factor,
340                                                      alpha_chan);
341         }
342         if (alpha_chan != 4 &&
343             blend->alpha_dst_factor != blend->rgb_dst_factor) {
344                 nir_def *dst_alpha_factor =
345                         vc4_blend_channel_i(b,
346                                             src_color, dst_color,
347                                             src_a, dst_a,
348                                             blend->alpha_dst_factor,
349                                             alpha_chan);
350                 dst_factor = vc4_nir_set_packed_chan(b, dst_factor,
351                                                      dst_alpha_factor,
352                                                      alpha_chan);
353         }
354         nir_def *src_blend = nir_umul_unorm_4x8_vc4(b, src_color, src_factor);
355         nir_def *dst_blend = nir_umul_unorm_4x8_vc4(b, dst_color, dst_factor);
356 
357         nir_def *result =
358                 vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func);
359         if (alpha_chan != 4 && blend->alpha_func != blend->rgb_func) {
360                 nir_def *result_a = vc4_blend_func_i(b,
361                                                          src_blend,
362                                                          dst_blend,
363                                                          blend->alpha_func);
364                 result = vc4_nir_set_packed_chan(b, result, result_a,
365                                                  alpha_chan);
366         }
367         return result;
368 }
369 
370 static nir_def *
vc4_logicop(nir_builder * b,int logicop_func,nir_def * src,nir_def * dst)371 vc4_logicop(nir_builder *b, int logicop_func,
372             nir_def *src, nir_def *dst)
373 {
374         switch (logicop_func) {
375         case PIPE_LOGICOP_CLEAR:
376                 return nir_imm_int(b, 0);
377         case PIPE_LOGICOP_NOR:
378                 return nir_inot(b, nir_ior(b, src, dst));
379         case PIPE_LOGICOP_AND_INVERTED:
380                 return nir_iand(b, nir_inot(b, src), dst);
381         case PIPE_LOGICOP_COPY_INVERTED:
382                 return nir_inot(b, src);
383         case PIPE_LOGICOP_AND_REVERSE:
384                 return nir_iand(b, src, nir_inot(b, dst));
385         case PIPE_LOGICOP_INVERT:
386                 return nir_inot(b, dst);
387         case PIPE_LOGICOP_XOR:
388                 return nir_ixor(b, src, dst);
389         case PIPE_LOGICOP_NAND:
390                 return nir_inot(b, nir_iand(b, src, dst));
391         case PIPE_LOGICOP_AND:
392                 return nir_iand(b, src, dst);
393         case PIPE_LOGICOP_EQUIV:
394                 return nir_inot(b, nir_ixor(b, src, dst));
395         case PIPE_LOGICOP_NOOP:
396                 return dst;
397         case PIPE_LOGICOP_OR_INVERTED:
398                 return nir_ior(b, nir_inot(b, src), dst);
399         case PIPE_LOGICOP_OR_REVERSE:
400                 return nir_ior(b, src, nir_inot(b, dst));
401         case PIPE_LOGICOP_OR:
402                 return nir_ior(b, src, dst);
403         case PIPE_LOGICOP_SET:
404                 return nir_imm_int(b, ~0);
405         default:
406                 fprintf(stderr, "Unknown logic op %d\n", logicop_func);
407                 FALLTHROUGH;
408         case PIPE_LOGICOP_COPY:
409                 return src;
410         }
411 }
412 
413 static nir_def *
vc4_nir_swizzle_and_pack(struct vc4_compile * c,nir_builder * b,nir_def ** colors)414 vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b,
415                          nir_def **colors)
416 {
417         enum pipe_format color_format = c->fs_key->color_format;
418         const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
419 
420         nir_def *swizzled[4];
421         for (int i = 0; i < 4; i++) {
422                 swizzled[i] = vc4_nir_get_swizzled_channel(b, colors,
423                                                            format_swiz[i]);
424         }
425 
426         return nir_pack_unorm_4x8(b,
427                                   nir_vec4(b,
428                                            swizzled[0], swizzled[1],
429                                            swizzled[2], swizzled[3]));
430 
431 }
432 
433 static nir_def *
vc4_nir_blend_pipeline(struct vc4_compile * c,nir_builder * b,nir_def * src,int sample)434 vc4_nir_blend_pipeline(struct vc4_compile *c, nir_builder *b, nir_def *src,
435                        int sample)
436 {
437         enum pipe_format color_format = c->fs_key->color_format;
438         const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
439         bool srgb = util_format_is_srgb(color_format);
440 
441         /* Pull out the float src/dst color components. */
442         nir_def *packed_dst_color = vc4_nir_get_dst_color(b, sample);
443         nir_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
444         nir_def *src_color[4], *unpacked_dst_color[4];
445         for (unsigned i = 0; i < 4; i++) {
446                 src_color[i] = nir_channel(b, src, i);
447                 unpacked_dst_color[i] = nir_channel(b, dst_vec4, i);
448         }
449 
450         if (c->fs_key->sample_alpha_to_one && c->fs_key->msaa)
451                 src_color[3] = nir_imm_float(b, 1.0);
452 
453         nir_def *packed_color;
454         if (srgb) {
455                 /* Unswizzle the destination color. */
456                 nir_def *dst_color[4];
457                 for (unsigned i = 0; i < 4; i++) {
458                         dst_color[i] = vc4_nir_get_swizzled_channel(b,
459                                                                     unpacked_dst_color,
460                                                                     format_swiz[i]);
461                 }
462 
463                 /* Turn dst color to linear. */
464                 for (int i = 0; i < 3; i++)
465                         dst_color[i] = nir_format_srgb_to_linear(b, dst_color[i]);
466 
467                 nir_def *blend_color[4];
468                 vc4_do_blending_f(c, b, blend_color, src_color, dst_color);
469 
470                 /* sRGB encode the output color */
471                 for (int i = 0; i < 3; i++)
472                         blend_color[i] = nir_format_linear_to_srgb(b, blend_color[i]);
473 
474                 packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color);
475         } else {
476                 nir_def *packed_src_color =
477                         vc4_nir_swizzle_and_pack(c, b, src_color);
478 
479                 packed_color =
480                         vc4_do_blending_i(c, b,
481                                           packed_src_color, packed_dst_color,
482                                           src_color[3]);
483         }
484 
485         packed_color = vc4_logicop(b, c->fs_key->logicop_func,
486                                    packed_color, packed_dst_color);
487 
488         /* If the bit isn't set in the color mask, then just return the
489          * original dst color, instead.
490          */
491         uint32_t colormask = 0xffffffff;
492         for (int i = 0; i < 4; i++) {
493                 if (format_swiz[i] < 4 &&
494                     !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {
495                         colormask &= ~(0xffu << (i * 8));
496                 }
497         }
498 
499         return nir_ior(b,
500                        nir_iand_imm(b, packed_color, colormask),
501                        nir_iand_imm(b, packed_dst_color, ~colormask));
502 }
503 
504 static void
vc4_nir_store_sample_mask(struct vc4_compile * c,nir_builder * b,nir_def * val)505 vc4_nir_store_sample_mask(struct vc4_compile *c, nir_builder *b,
506                           nir_def *val)
507 {
508         nir_variable *sample_mask = nir_variable_create(c->s, nir_var_shader_out,
509                                                         glsl_uint_type(),
510                                                         "sample_mask");
511         sample_mask->data.driver_location = c->s->num_outputs++;
512         sample_mask->data.location = FRAG_RESULT_SAMPLE_MASK;
513 
514         nir_store_output(b, val, nir_imm_int(b, 0),
515                          .base = sample_mask->data.driver_location,
516                          .src_type = nir_type_uint | val->bit_size);
517 }
518 
519 static void
vc4_nir_lower_blend_instr(struct vc4_compile * c,nir_builder * b,nir_intrinsic_instr * intr)520 vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
521                           nir_intrinsic_instr *intr)
522 {
523         nir_def *frag_color = intr->src[0].ssa;
524 
525         if (c->fs_key->sample_alpha_to_coverage) {
526                 nir_def *a = nir_channel(b, frag_color, 3);
527 
528                 /* XXX: We should do a nice dither based on the fragment
529                  * coordinate, instead.
530                  */
531                 nir_def *num_bits = nir_f2i32(b, nir_fmul_imm(b, a, VC4_MAX_SAMPLES));
532                 nir_def *bitmask = nir_iadd_imm(b,
533                                                     nir_ishl(b,
534                                                              nir_imm_int(b, 1),
535                                                              num_bits),
536                                                     -1);
537                 vc4_nir_store_sample_mask(c, b, bitmask);
538         }
539 
540         /* The TLB color read returns each sample in turn, so if our blending
541          * depends on the destination color, we're going to have to run the
542          * blending function separately for each destination sample value, and
543          * then output the per-sample color using TLB_COLOR_MS.
544          */
545         nir_def *blend_output;
546         if (c->fs_key->msaa && blend_depends_on_dst_color(c)) {
547                 c->msaa_per_sample_output = true;
548 
549                 nir_def *samples[4];
550                 for (int i = 0; i < VC4_MAX_SAMPLES; i++)
551                         samples[i] = vc4_nir_blend_pipeline(c, b, frag_color, i);
552                 blend_output = nir_vec4(b,
553                                         samples[0], samples[1],
554                                         samples[2], samples[3]);
555         } else {
556                 blend_output = vc4_nir_blend_pipeline(c, b, frag_color, 0);
557         }
558 
559         nir_src_rewrite(&intr->src[0], blend_output);
560         if (intr->num_components != blend_output->num_components) {
561                 unsigned component_mask = BITFIELD_MASK(blend_output->num_components);
562                 nir_intrinsic_set_write_mask(intr, component_mask);
563                 intr->num_components = blend_output->num_components;
564         }
565 }
566 
567 static bool
vc4_nir_lower_blend_block(nir_block * block,struct vc4_compile * c)568 vc4_nir_lower_blend_block(nir_block *block, struct vc4_compile *c)
569 {
570         nir_foreach_instr_safe(instr, block) {
571                 if (instr->type != nir_instr_type_intrinsic)
572                         continue;
573                 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
574                 if (intr->intrinsic != nir_intrinsic_store_output)
575                         continue;
576 
577                 unsigned loc = nir_intrinsic_io_semantics(intr).location;
578 
579                 if (loc != FRAG_RESULT_COLOR &&
580                     loc != FRAG_RESULT_DATA0) {
581                         continue;
582                 }
583 
584                 nir_builder b = nir_builder_at(nir_before_instr(&intr->instr));
585                 vc4_nir_lower_blend_instr(c, &b, intr);
586         }
587         return true;
588 }
589 
590 void
vc4_nir_lower_blend(nir_shader * s,struct vc4_compile * c)591 vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c)
592 {
593         nir_foreach_function_impl(impl, s) {
594                 nir_foreach_block(block, impl) {
595                         vc4_nir_lower_blend_block(block, c);
596                 }
597 
598                 nir_metadata_preserve(impl,
599                                       nir_metadata_control_flow);
600         }
601 
602         /* If we didn't do alpha-to-coverage on the output color, we still
603          * need to pass glSampleMask() through.
604          */
605         if (c->fs_key->sample_coverage && !c->fs_key->sample_alpha_to_coverage) {
606                 nir_function_impl *impl = nir_shader_get_entrypoint(s);
607                 nir_builder b = nir_builder_at(nir_after_impl(impl));
608 
609                 vc4_nir_store_sample_mask(c, &b, nir_load_sample_mask_in(&b));
610         }
611 }
612