xref: /aosp_15_r20/external/mesa3d/src/compiler/nir/nir_lower_blend.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (C) 2019-2021 Collabora, Ltd.
3  * Copyright (C) 2019 Alyssa Rosenzweig
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 /**
26  * @file
27  *
28  * Implements the fragment pipeline (blending and writeout) in software, to be
29  * run as a dedicated "blend shader" stage on Midgard/Bifrost, or as a fragment
30  * shader variant on typical GPUs. This pass is useful if hardware lacks
31  * fixed-function blending in part or in full.
32  */
33 
34 #include "nir_lower_blend.h"
35 #include "compiler/nir/nir.h"
36 #include "compiler/nir/nir_builder.h"
37 #include "compiler/nir/nir_format_convert.h"
38 #include "util/blend.h"
39 
40 struct ctx {
41    const nir_lower_blend_options *options;
42    nir_def *src1[8];
43 };
44 
45 /* Given processed factors, combine them per a blend function */
46 
47 static nir_def *
nir_blend_func(nir_builder * b,enum pipe_blend_func func,nir_def * src,nir_def * dst)48 nir_blend_func(
49    nir_builder *b,
50    enum pipe_blend_func func,
51    nir_def *src, nir_def *dst)
52 {
53    switch (func) {
54    case PIPE_BLEND_ADD:
55       return nir_fadd(b, src, dst);
56    case PIPE_BLEND_SUBTRACT:
57       return nir_fsub(b, src, dst);
58    case PIPE_BLEND_REVERSE_SUBTRACT:
59       return nir_fsub(b, dst, src);
60    case PIPE_BLEND_MIN:
61       return nir_fmin(b, src, dst);
62    case PIPE_BLEND_MAX:
63       return nir_fmax(b, src, dst);
64    }
65 
66    unreachable("Invalid blend function");
67 }
68 
69 /* Does this blend function multiply by a blend factor? */
70 
71 static bool
nir_blend_factored(enum pipe_blend_func func)72 nir_blend_factored(enum pipe_blend_func func)
73 {
74    switch (func) {
75    case PIPE_BLEND_ADD:
76    case PIPE_BLEND_SUBTRACT:
77    case PIPE_BLEND_REVERSE_SUBTRACT:
78       return true;
79    default:
80       return false;
81    }
82 }
83 
84 /* Compute a src_alpha_saturate factor */
85 static nir_def *
nir_alpha_saturate(nir_builder * b,nir_def * src,nir_def * dst,unsigned chan)86 nir_alpha_saturate(
87    nir_builder *b,
88    nir_def *src, nir_def *dst,
89    unsigned chan)
90 {
91    nir_def *Asrc = nir_channel(b, src, 3);
92    nir_def *Adst = nir_channel(b, dst, 3);
93    nir_def *one = nir_imm_floatN_t(b, 1.0, src->bit_size);
94    nir_def *Adsti = nir_fsub(b, one, Adst);
95 
96    return (chan < 3) ? nir_fmin(b, Asrc, Adsti) : one;
97 }
98 
99 /* Returns a scalar single factor, unmultiplied */
100 
101 static nir_def *
nir_blend_factor_value(nir_builder * b,nir_def * src,nir_def * src1,nir_def * dst,nir_def * bconst,unsigned chan,enum pipe_blendfactor factor_without_invert)102 nir_blend_factor_value(
103    nir_builder *b,
104    nir_def *src, nir_def *src1, nir_def *dst, nir_def *bconst,
105    unsigned chan,
106    enum pipe_blendfactor factor_without_invert)
107 {
108    switch (factor_without_invert) {
109    case PIPE_BLENDFACTOR_ONE:
110       return nir_imm_floatN_t(b, 1.0, src->bit_size);
111    case PIPE_BLENDFACTOR_SRC_COLOR:
112       return nir_channel(b, src, chan);
113    case PIPE_BLENDFACTOR_SRC1_COLOR:
114       return nir_channel(b, src1, chan);
115    case PIPE_BLENDFACTOR_DST_COLOR:
116       return nir_channel(b, dst, chan);
117    case PIPE_BLENDFACTOR_SRC_ALPHA:
118       return nir_channel(b, src, 3);
119    case PIPE_BLENDFACTOR_SRC1_ALPHA:
120       return nir_channel(b, src1, 3);
121    case PIPE_BLENDFACTOR_DST_ALPHA:
122       return nir_channel(b, dst, 3);
123    case PIPE_BLENDFACTOR_CONST_COLOR:
124       return nir_channel(b, bconst, chan);
125    case PIPE_BLENDFACTOR_CONST_ALPHA:
126       return nir_channel(b, bconst, 3);
127    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
128       return nir_alpha_saturate(b, src, dst, chan);
129    default:
130       assert(util_blendfactor_is_inverted(factor_without_invert));
131       unreachable("Unexpected inverted factor");
132    }
133 }
134 
135 static nir_def *
nir_fsat_signed(nir_builder * b,nir_def * x)136 nir_fsat_signed(nir_builder *b, nir_def *x)
137 {
138    return nir_fclamp(b, x, nir_imm_floatN_t(b, -1.0, x->bit_size),
139                      nir_imm_floatN_t(b, +1.0, x->bit_size));
140 }
141 
142 static nir_def *
nir_fsat_to_format(nir_builder * b,nir_def * x,enum pipe_format format)143 nir_fsat_to_format(nir_builder *b, nir_def *x, enum pipe_format format)
144 {
145    if (util_format_is_unorm(format))
146       return nir_fsat(b, x);
147    else if (util_format_is_snorm(format))
148       return nir_fsat_signed(b, x);
149    else
150       return x;
151 }
152 
153 /*
154  * The spec says we need to clamp blend factors. However, we don't want to clamp
155  * unnecessarily, as the clamp might not be optimized out. Check whether
156  * clamping a blend factor is needed.
157  */
158 static bool
should_clamp_factor(enum pipe_blendfactor factor,bool snorm)159 should_clamp_factor(enum pipe_blendfactor factor, bool snorm)
160 {
161    switch (util_blendfactor_without_invert(factor)) {
162    case PIPE_BLENDFACTOR_ONE:
163       /* 0, 1 are in [0, 1] and [-1, 1] */
164       return false;
165 
166    case PIPE_BLENDFACTOR_SRC_COLOR:
167    case PIPE_BLENDFACTOR_SRC1_COLOR:
168    case PIPE_BLENDFACTOR_DST_COLOR:
169    case PIPE_BLENDFACTOR_SRC_ALPHA:
170    case PIPE_BLENDFACTOR_SRC1_ALPHA:
171    case PIPE_BLENDFACTOR_DST_ALPHA:
172       /* Colours are already clamped. For unorm, the complement of something
173        * clamped is still clamped. But for snorm, this is not true. Clamp for
174        * snorm only.
175        */
176       return util_blendfactor_is_inverted(factor) && snorm;
177 
178    case PIPE_BLENDFACTOR_CONST_COLOR:
179    case PIPE_BLENDFACTOR_CONST_ALPHA:
180       /* Constant colours are not yet clamped */
181       return true;
182 
183    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
184       /* For unorm, this is in bounds (and hence so is its complement). For
185        * snorm, it may not be.
186        */
187       return snorm;
188 
189    default:
190       unreachable("invalid blend factor");
191    }
192 }
193 
194 static bool
channel_uses_dest(nir_lower_blend_channel chan)195 channel_uses_dest(nir_lower_blend_channel chan)
196 {
197    /* If blend factors are ignored, dest is used (min/max) */
198    if (!nir_blend_factored(chan.func))
199       return true;
200 
201    /* If dest has a nonzero factor, it is used */
202    if (chan.dst_factor != PIPE_BLENDFACTOR_ZERO)
203       return true;
204 
205    /* Else, check the source factor */
206    switch (util_blendfactor_without_invert(chan.src_factor)) {
207    case PIPE_BLENDFACTOR_DST_COLOR:
208    case PIPE_BLENDFACTOR_DST_ALPHA:
209    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
210       return true;
211    default:
212       return false;
213    }
214 }
215 
216 static nir_def *
nir_blend_factor(nir_builder * b,nir_def * raw_scalar,nir_def * src,nir_def * src1,nir_def * dst,nir_def * bconst,unsigned chan,enum pipe_blendfactor factor,enum pipe_format format)217 nir_blend_factor(
218    nir_builder *b,
219    nir_def *raw_scalar,
220    nir_def *src, nir_def *src1, nir_def *dst, nir_def *bconst,
221    unsigned chan,
222    enum pipe_blendfactor factor,
223    enum pipe_format format)
224 {
225    nir_def *f =
226       nir_blend_factor_value(b, src, src1, dst, bconst, chan,
227                              util_blendfactor_without_invert(factor));
228 
229    if (util_blendfactor_is_inverted(factor))
230       f = nir_fadd_imm(b, nir_fneg(b, f), 1.0);
231 
232    if (should_clamp_factor(factor, util_format_is_snorm(format)))
233       f = nir_fsat_to_format(b, f, format);
234 
235    return nir_fmul(b, raw_scalar, f);
236 }
237 
238 /* Given a colormask, "blend" with the destination */
239 
240 static nir_def *
nir_color_mask(nir_builder * b,unsigned mask,nir_def * src,nir_def * dst)241 nir_color_mask(
242    nir_builder *b,
243    unsigned mask,
244    nir_def *src,
245    nir_def *dst)
246 {
247    return nir_vec4(b,
248                    nir_channel(b, (mask & (1 << 0)) ? src : dst, 0),
249                    nir_channel(b, (mask & (1 << 1)) ? src : dst, 1),
250                    nir_channel(b, (mask & (1 << 2)) ? src : dst, 2),
251                    nir_channel(b, (mask & (1 << 3)) ? src : dst, 3));
252 }
253 
254 static nir_def *
nir_logicop_func(nir_builder * b,enum pipe_logicop func,nir_def * src,nir_def * dst,nir_def * bitmask)255 nir_logicop_func(
256    nir_builder *b,
257    enum pipe_logicop func,
258    nir_def *src, nir_def *dst, nir_def *bitmask)
259 {
260    switch (func) {
261    case PIPE_LOGICOP_CLEAR:
262       return nir_imm_ivec4(b, 0, 0, 0, 0);
263    case PIPE_LOGICOP_NOR:
264       return nir_ixor(b, nir_ior(b, src, dst), bitmask);
265    case PIPE_LOGICOP_AND_INVERTED:
266       return nir_iand(b, nir_ixor(b, src, bitmask), dst);
267    case PIPE_LOGICOP_COPY_INVERTED:
268       return nir_ixor(b, src, bitmask);
269    case PIPE_LOGICOP_AND_REVERSE:
270       return nir_iand(b, src, nir_ixor(b, dst, bitmask));
271    case PIPE_LOGICOP_INVERT:
272       return nir_ixor(b, dst, bitmask);
273    case PIPE_LOGICOP_XOR:
274       return nir_ixor(b, src, dst);
275    case PIPE_LOGICOP_NAND:
276       return nir_ixor(b, nir_iand(b, src, dst), bitmask);
277    case PIPE_LOGICOP_AND:
278       return nir_iand(b, src, dst);
279    case PIPE_LOGICOP_EQUIV:
280       return nir_ixor(b, nir_ixor(b, src, dst), bitmask);
281    case PIPE_LOGICOP_NOOP:
282       unreachable("optimized out");
283    case PIPE_LOGICOP_OR_INVERTED:
284       return nir_ior(b, nir_ixor(b, src, bitmask), dst);
285    case PIPE_LOGICOP_COPY:
286       return src;
287    case PIPE_LOGICOP_OR_REVERSE:
288       return nir_ior(b, src, nir_ixor(b, dst, bitmask));
289    case PIPE_LOGICOP_OR:
290       return nir_ior(b, src, dst);
291    case PIPE_LOGICOP_SET:
292       return nir_imm_ivec4(b, ~0, ~0, ~0, ~0);
293    }
294 
295    unreachable("Invalid logciop function");
296 }
297 
298 static nir_def *
nir_blend_logicop(nir_builder * b,const nir_lower_blend_options * options,unsigned rt,nir_def * src,nir_def * dst)299 nir_blend_logicop(
300    nir_builder *b,
301    const nir_lower_blend_options *options,
302    unsigned rt,
303    nir_def *src, nir_def *dst)
304 {
305    unsigned bit_size = src->bit_size;
306 
307    enum pipe_format format = options->format[rt];
308    const struct util_format_description *format_desc =
309       util_format_description(format);
310 
311    /* From section 17.3.9 ("Logical Operation") of the OpenGL 4.6 core spec:
312     *
313     *    Logical operation has no effect on a floating-point destination color
314     *    buffer, or when FRAMEBUFFER_SRGB is enabled and the value of
315     *    FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING for the framebuffer attachment
316     *    corresponding to the destination buffer is SRGB (see section 9.2.3).
317     *    However, if logical operation is enabled, blending is still disabled.
318     */
319    if (util_format_is_float(format) || util_format_is_srgb(format))
320       return src;
321 
322    nir_alu_type type =
323       util_format_is_pure_integer(format) ? nir_type_uint : nir_type_float;
324 
325    if (bit_size != 32) {
326       src = nir_convert_to_bit_size(b, src, type, 32);
327       dst = nir_convert_to_bit_size(b, dst, type, 32);
328    }
329 
330    assert(src->num_components <= 4);
331    assert(dst->num_components <= 4);
332 
333    unsigned bits[4];
334    for (int i = 0; i < 4; ++i)
335       bits[i] = format_desc->channel[i].size;
336 
337    if (util_format_is_unorm(format)) {
338       src = nir_format_float_to_unorm(b, src, bits);
339       dst = nir_format_float_to_unorm(b, dst, bits);
340    } else if (util_format_is_snorm(format)) {
341       src = nir_format_float_to_snorm(b, src, bits);
342       dst = nir_format_float_to_snorm(b, dst, bits);
343    } else {
344       assert(util_format_is_pure_integer(format));
345    }
346 
347    nir_const_value mask[4];
348    for (int i = 0; i < 4; ++i)
349       mask[i] = nir_const_value_for_uint(BITFIELD_MASK(bits[i]), 32);
350 
351    nir_def *out = nir_logicop_func(b, options->logicop_func, src, dst,
352                                    nir_build_imm(b, 4, 32, mask));
353 
354    if (util_format_is_unorm(format)) {
355       out = nir_format_unorm_to_float(b, out, bits);
356    } else if (util_format_is_snorm(format)) {
357       /* Sign extend before converting so the i2f in snorm_to_float works */
358       out = nir_format_sign_extend_ivec(b, out, bits);
359       out = nir_format_snorm_to_float(b, out, bits);
360    } else {
361       assert(util_format_is_pure_integer(format));
362    }
363 
364    if (bit_size != 32)
365       out = nir_convert_to_bit_size(b, out, type, bit_size);
366 
367    return out;
368 }
369 
370 static bool
channel_exists(const struct util_format_description * desc,unsigned i)371 channel_exists(const struct util_format_description *desc, unsigned i)
372 {
373    return (i < desc->nr_channels) &&
374           desc->channel[i].type != UTIL_FORMAT_TYPE_VOID;
375 }
376 
377 /* Given a blend state, the source color, and the destination color,
378  * return the blended color
379  */
380 
381 static nir_def *
nir_blend(nir_builder * b,const nir_lower_blend_options * options,unsigned rt,nir_def * src,nir_def * src1,nir_def * dst)382 nir_blend(
383    nir_builder *b,
384    const nir_lower_blend_options *options,
385    unsigned rt,
386    nir_def *src, nir_def *src1, nir_def *dst)
387 {
388    /* Don't crash if src1 isn't written. It doesn't matter what dual colour we
389     * blend with in that case, as long as we don't dereference NULL.
390     */
391    if (!src1)
392       src1 = nir_imm_zero(b, 4, src->bit_size);
393 
394    /* Grab the blend constant ahead of time */
395    nir_def *bconst;
396    if (options->scalar_blend_const) {
397       bconst = nir_vec4(b,
398                         nir_load_blend_const_color_r_float(b),
399                         nir_load_blend_const_color_g_float(b),
400                         nir_load_blend_const_color_b_float(b),
401                         nir_load_blend_const_color_a_float(b));
402    } else {
403       bconst = nir_load_blend_const_color_rgba(b);
404    }
405 
406    if (src->bit_size == 16) {
407       bconst = nir_f2f16(b, bconst);
408       src1 = nir_f2f16(b, src1);
409    }
410 
411    /* Fixed-point framebuffers require their inputs clamped. */
412    enum pipe_format format = options->format[rt];
413 
414    /* From section 17.3.6 "Blending" of the OpenGL 4.5 spec:
415     *
416     *     If the color buffer is fixed-point, the components of the source and
417     *     destination values and blend factors are each clamped to [0, 1] or
418     *     [-1, 1] respectively for an unsigned normalized or signed normalized
419     *     color buffer prior to evaluating the blend equation. If the color
420     *     buffer is floating-point, no clamping occurs.
421     *
422     * Blend factors are clamped at the time of their use to ensure we properly
423     * clamp negative constant colours with signed normalized formats and
424     * ONE_MINUS_CONSTANT_* factors. Notice that -1 is in [-1, 1] but 1 - (-1) =
425     * 2 is not in [-1, 1] and should be clamped to 1.
426     */
427    src = nir_fsat_to_format(b, src, format);
428 
429    if (src1)
430       src1 = nir_fsat_to_format(b, src1, format);
431 
432    /* DST_ALPHA reads back 1.0 if there is no alpha channel */
433    const struct util_format_description *desc =
434       util_format_description(format);
435 
436    nir_def *zero = nir_imm_floatN_t(b, 0.0, dst->bit_size);
437    nir_def *one = nir_imm_floatN_t(b, 1.0, dst->bit_size);
438 
439    dst = nir_vec4(b,
440                   channel_exists(desc, 0) ? nir_channel(b, dst, 0) : zero,
441                   channel_exists(desc, 1) ? nir_channel(b, dst, 1) : zero,
442                   channel_exists(desc, 2) ? nir_channel(b, dst, 2) : zero,
443                   channel_exists(desc, 3) ? nir_channel(b, dst, 3) : one);
444 
445    /* We blend per channel and recombine later */
446    nir_def *channels[4];
447 
448    for (unsigned c = 0; c < 4; ++c) {
449       /* Decide properties based on channel */
450       nir_lower_blend_channel chan =
451          (c < 3) ? options->rt[rt].rgb : options->rt[rt].alpha;
452 
453       nir_def *psrc = nir_channel(b, src, c);
454       nir_def *pdst = nir_channel(b, dst, c);
455 
456       if (nir_blend_factored(chan.func)) {
457          psrc = nir_blend_factor(
458             b, psrc,
459             src, src1, dst, bconst, c,
460             chan.src_factor, format);
461 
462          pdst = nir_blend_factor(
463             b, pdst,
464             src, src1, dst, bconst, c,
465             chan.dst_factor, format);
466       }
467 
468       channels[c] = nir_blend_func(b, chan.func, psrc, pdst);
469    }
470 
471    return nir_vec(b, channels, 4);
472 }
473 
474 static int
color_index_for_location(unsigned location)475 color_index_for_location(unsigned location)
476 {
477    assert(location != FRAG_RESULT_COLOR &&
478           "gl_FragColor must be lowered before nir_lower_blend");
479 
480    if (location < FRAG_RESULT_DATA0)
481       return -1;
482    else
483       return location - FRAG_RESULT_DATA0;
484 }
485 
486 /*
487  * Test if the blending options for a given channel encode the "replace" blend
488  * mode: dest = source. In this case, blending may be specially optimized.
489  */
490 static bool
nir_blend_replace_channel(const nir_lower_blend_channel * c)491 nir_blend_replace_channel(const nir_lower_blend_channel *c)
492 {
493    return (c->func == PIPE_BLEND_ADD) &&
494           (c->src_factor == PIPE_BLENDFACTOR_ONE) &&
495           (c->dst_factor == PIPE_BLENDFACTOR_ZERO);
496 }
497 
498 static bool
nir_blend_replace_rt(const nir_lower_blend_rt * rt)499 nir_blend_replace_rt(const nir_lower_blend_rt *rt)
500 {
501    return nir_blend_replace_channel(&rt->rgb) &&
502           nir_blend_replace_channel(&rt->alpha);
503 }
504 
505 static bool
nir_lower_blend_instr(nir_builder * b,nir_intrinsic_instr * store,void * data)506 nir_lower_blend_instr(nir_builder *b, nir_intrinsic_instr *store, void *data)
507 {
508    struct ctx *ctx = data;
509    const nir_lower_blend_options *options = ctx->options;
510    if (store->intrinsic != nir_intrinsic_store_output)
511       return false;
512 
513    nir_io_semantics sem = nir_intrinsic_io_semantics(store);
514    int rt = color_index_for_location(sem.location);
515 
516    /* No blend lowering requested on this RT */
517    if (rt < 0 || options->format[rt] == PIPE_FORMAT_NONE)
518       return false;
519 
520    /* Only process stores once. Pass flags are cleared by consume_dual_stores */
521    if (store->instr.pass_flags)
522       return false;
523 
524    store->instr.pass_flags = 1;
525 
526    /* Store are sunk to the bottom of the block to ensure that the dual
527     * source colour is already written.
528     */
529    b->cursor = nir_after_block(store->instr.block);
530 
531    /* Don't bother copying the destination to the source for disabled RTs */
532    if (options->rt[rt].colormask == 0 ||
533        (options->logicop_enable && options->logicop_func == PIPE_LOGICOP_NOOP)) {
534 
535       nir_instr_remove(&store->instr);
536       return true;
537    }
538 
539    /* Grab the input color.  We always want 4 channels during blend.  Dead
540     * code will clean up any channels we don't need.
541     */
542    nir_def *src = nir_pad_vector(b, store->src[0].ssa, 4);
543 
544    assert(nir_src_as_uint(store->src[1]) == 0 && "store_output invariant");
545 
546    /* Grab the previous fragment color if we need it */
547    nir_def *dst;
548 
549    if (channel_uses_dest(options->rt[rt].rgb) ||
550        channel_uses_dest(options->rt[rt].alpha) ||
551        options->logicop_enable ||
552        options->rt[rt].colormask != BITFIELD_MASK(4)) {
553 
554       b->shader->info.outputs_read |= BITFIELD64_BIT(sem.location);
555       b->shader->info.fs.uses_fbfetch_output = true;
556       b->shader->info.fs.uses_sample_shading = true;
557       sem.fb_fetch_output = true;
558 
559       dst = nir_load_output(b, 4, nir_src_bit_size(store->src[0]),
560                             nir_imm_int(b, 0),
561                             .dest_type = nir_intrinsic_src_type(store),
562                             .io_semantics = sem);
563    } else {
564       dst = nir_undef(b, 4, nir_src_bit_size(store->src[0]));
565    }
566 
567    /* Blend the two colors per the passed options. We only call nir_blend if
568     * blending is enabled with a blend mode other than replace (independent of
569     * the color mask). That avoids unnecessary fsat instructions in the common
570     * case where blending is disabled at an API level, but the driver calls
571     * nir_blend (possibly for color masking).
572     */
573    nir_def *blended = src;
574 
575    if (options->logicop_enable) {
576       blended = nir_blend_logicop(b, options, rt, src, dst);
577    } else if (!util_format_is_pure_integer(options->format[rt]) &&
578               !nir_blend_replace_rt(&options->rt[rt])) {
579       assert(!util_format_is_scaled(options->format[rt]));
580       blended = nir_blend(b, options, rt, src, ctx->src1[rt], dst);
581    }
582 
583    /* Apply a colormask if necessary */
584    if (options->rt[rt].colormask != BITFIELD_MASK(4))
585       blended = nir_color_mask(b, options->rt[rt].colormask, blended, dst);
586 
587    const unsigned num_components =
588       util_format_get_nr_components(options->format[rt]);
589 
590    /* Shave off any components we don't want to store */
591    blended = nir_trim_vector(b, blended, num_components);
592 
593    /* Grow or shrink the store destination as needed */
594    store->num_components = num_components;
595    nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(store) &
596                                           nir_component_mask(num_components));
597 
598    /* Write out the final color instead of the input */
599    nir_src_rewrite(&store->src[0], blended);
600 
601    /* Sink to bottom */
602    nir_instr_remove(&store->instr);
603    nir_builder_instr_insert(b, &store->instr);
604    return true;
605 }
606 
607 /*
608  * Dual-source colours are only for blending, so when nir_lower_blend is used,
609  * the dual source store_output is for us (only). Remove dual stores so the
610  * backend doesn't have to deal with them, collecting the sources for blending.
611  */
612 static bool
consume_dual_stores(nir_builder * b,nir_intrinsic_instr * store,void * data)613 consume_dual_stores(nir_builder *b, nir_intrinsic_instr *store, void *data)
614 {
615    nir_def **outputs = data;
616    if (store->intrinsic != nir_intrinsic_store_output)
617       return false;
618 
619    /* While we're here, clear the pass flags for store_outputs, since we'll set
620     * them later.
621     */
622    store->instr.pass_flags = 0;
623 
624    nir_io_semantics sem = nir_intrinsic_io_semantics(store);
625    if (sem.dual_source_blend_index == 0)
626       return false;
627 
628    int rt = color_index_for_location(sem.location);
629    assert(rt >= 0 && rt < 8 && "bounds for dual-source blending");
630 
631    outputs[rt] = store->src[0].ssa;
632    nir_instr_remove(&store->instr);
633    return true;
634 }
635 
636 /** Lower blending to framebuffer fetch and some math
637  *
638  * This pass requires that shader I/O is lowered to explicit load/store
639  * instructions using nir_lower_io.
640  */
641 bool
nir_lower_blend(nir_shader * shader,const nir_lower_blend_options * options)642 nir_lower_blend(nir_shader *shader, const nir_lower_blend_options *options)
643 {
644    assert(shader->info.stage == MESA_SHADER_FRAGMENT);
645 
646    struct ctx ctx = { .options = options };
647    bool progress = nir_shader_intrinsics_pass(shader, consume_dual_stores,
648                                               nir_metadata_control_flow,
649                                               ctx.src1);
650 
651    progress |= nir_shader_intrinsics_pass(shader, nir_lower_blend_instr,
652                                           nir_metadata_control_flow,
653                                           &ctx);
654    return progress;
655 }
656