xref: /aosp_15_r20/external/skia/src/sksl/codegen/SkSLRasterPipelineBuilder.h (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1 /*
2  * Copyright 2022 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #ifndef SKSL_RASTERPIPELINEBUILDER
9 #define SKSL_RASTERPIPELINEBUILDER
10 
11 #include "include/core/SkTypes.h"
12 
13 #include "include/core/SkSpan.h"
14 #include "include/core/SkTypes.h"
15 #include "include/private/base/SkTArray.h"
16 #include "src/base/SkUtils.h"
17 #include "src/core/SkRasterPipelineOpList.h"
18 
19 #include <cstddef>
20 #include <cstdint>
21 #include <memory>
22 #include <optional>
23 
24 class SkArenaAlloc;
25 class SkRasterPipeline;
26 class SkWStream;
27 using SkRPOffset = uint32_t;
28 
29 namespace SkSL {
30 
31 class DebugTracePriv;
32 class TraceHook;
33 
34 namespace RP {
35 
36 // A single scalar in our program consumes one slot.
37 using Slot = int;
38 constexpr Slot NA = -1;
39 
40 // Scalars, vectors, and matrices can be represented as a range of slot indices.
41 struct SlotRange {
42     Slot index = 0;
43     int count = 0;
44 };
45 
46 #define SKRP_EXTENDED_OPS(M)     \
47     /* branch targets */         \
48     M(label)                     \
49                                  \
50     /* child programs */         \
51     M(invoke_shader)             \
52     M(invoke_color_filter)       \
53     M(invoke_blender)            \
54                                  \
55     /* color space transforms */ \
56     M(invoke_to_linear_srgb)     \
57     M(invoke_from_linear_srgb)
58 
59 // An RP::Program will consist entirely of ProgramOps. The ProgramOps list is a superset of the
60 // native SkRasterPipelineOps op-list. It also has a few extra ops to indicate child-effect
61 // invocation, and a `label` op to indicate branch targets.
62 enum class ProgramOp {
63     #define M(stage) stage,
64         // A finished program can contain any native Raster Pipeline op...
65         SK_RASTER_PIPELINE_OPS_ALL(M)
66 
67         // ... as well as our extended ops.
68         SKRP_EXTENDED_OPS(M)
69     #undef M
70 };
71 
72 // BuilderOps are a superset of ProgramOps. They are used by the RP::Builder, which works in terms
73 // of Instructions; Instructions are slightly more expressive than raw SkRasterPipelineOps. In
74 // particular, the Builder supports stacks for pushing and popping scratch values.
75 // RP::Program::makeStages is responsible for rewriting Instructions/BuilderOps into an array of
76 // RP::Program::Stages, which will contain only native SkRasterPipelineOps and (optionally)
77 // child-effect invocations.
78 enum class BuilderOp {
79     #define M(stage) stage,
80         // An in-flight program can contain all the native Raster Pipeline ops...
81         SK_RASTER_PIPELINE_OPS_ALL(M)
82 
83         // ... and our extended ops...
84         SKRP_EXTENDED_OPS(M)
85     #undef M
86 
87     // ... and also has Builder-specific ops. These ops generally interface with the stack, and are
88     // converted into ProgramOps during `makeStages`.
89     push_clone,
90     push_clone_from_stack,
91     push_clone_indirect_from_stack,
92     push_constant,
93     push_immutable,
94     push_immutable_indirect,
95     push_slots,
96     push_slots_indirect,
97     push_uniform,
98     push_uniform_indirect,
99     copy_stack_to_slots,
100     copy_stack_to_slots_unmasked,
101     copy_stack_to_slots_indirect,
102     copy_uniform_to_slots_unmasked,
103     store_immutable_value,
104     swizzle_copy_stack_to_slots,
105     swizzle_copy_stack_to_slots_indirect,
106     discard_stack,
107     pad_stack,
108     select,
109     push_condition_mask,
110     pop_condition_mask,
111     push_loop_mask,
112     pop_loop_mask,
113     pop_and_reenable_loop_mask,
114     push_return_mask,
115     pop_return_mask,
116     push_src_rgba,
117     push_dst_rgba,
118     push_device_xy01,
119     pop_src_rgba,
120     pop_dst_rgba,
121     trace_var_indirect,
122     branch_if_no_active_lanes_on_stack_top_equal,
123     unsupported
124 };
125 
126 // If the extended ops are not in sync between enums, program creation will not work.
127 static_assert((int)ProgramOp::label == (int)BuilderOp::label);
128 
129 // Represents a single raster-pipeline SkSL instruction.
130 struct Instruction {
131     BuilderOp fOp;
132     Slot      fSlotA = NA;
133     Slot      fSlotB = NA;
134     int       fImmA = 0;
135     int       fImmB = 0;
136     int       fImmC = 0;
137     int       fImmD = 0;
138     int       fStackID = 0;
139 };
140 
141 class Callbacks {
142 public:
143     virtual ~Callbacks() = default;
144 
145     virtual bool appendShader(int index) = 0;
146     virtual bool appendColorFilter(int index) = 0;
147     virtual bool appendBlender(int index) = 0;
148 
149     virtual void toLinearSrgb(const void* color) = 0;
150     virtual void fromLinearSrgb(const void* color) = 0;
151 };
152 
153 class Program {
154 public:
155     Program(skia_private::TArray<Instruction> instrs,
156             int numValueSlots,
157             int numUniformSlots,
158             int numImmutableSlots,
159             int numLabels,
160             DebugTracePriv* debugTrace);
161     ~Program();
162 
163     bool appendStages(SkRasterPipeline* pipeline,
164                       SkArenaAlloc* alloc,
165                       Callbacks* callbacks,
166                       SkSpan<const float> uniforms) const;
167 
168     void dump(SkWStream* out, bool writeInstructionCount = false) const;
169 
numUniforms()170     int numUniforms() const { return fNumUniformSlots; }
171 
172 private:
173     using StackDepths = skia_private::TArray<int>; // [stack index] = depth of stack
174 
175     struct SlotData {
176         SkSpan<float> values;
177         SkSpan<float> stack;
178         SkSpan<float> immutable;
179     };
180     std::optional<SlotData> allocateSlotData(SkArenaAlloc* alloc) const;
181 
182     struct Stage {
183         ProgramOp op;
184         void*     ctx;
185     };
186     void makeStages(skia_private::TArray<Stage>* pipeline,
187                     SkArenaAlloc* alloc,
188                     SkSpan<const float> uniforms,
189                     const SlotData& slots) const;
190     StackDepths tempStackMaxDepths() const;
191 
192     // These methods are used to split up multi-slot copies into multiple ops as needed.
193     void appendCopy(skia_private::TArray<Stage>* pipeline,
194                     SkArenaAlloc* alloc,
195                     std::byte* basePtr,
196                     ProgramOp baseStage,
197                     SkRPOffset dst, int dstStride,
198                     SkRPOffset src, int srcStride,
199                     int numSlots) const;
200     void appendCopyImmutableUnmasked(skia_private::TArray<Stage>* pipeline,
201                                      SkArenaAlloc* alloc,
202                                      std::byte* basePtr,
203                                      SkRPOffset dst,
204                                      SkRPOffset src,
205                                      int numSlots) const;
206     void appendCopySlotsUnmasked(skia_private::TArray<Stage>* pipeline,
207                                  SkArenaAlloc* alloc,
208                                  SkRPOffset dst,
209                                  SkRPOffset src,
210                                  int numSlots) const;
211     void appendCopySlotsMasked(skia_private::TArray<Stage>* pipeline,
212                                SkArenaAlloc* alloc,
213                                SkRPOffset dst,
214                                SkRPOffset src,
215                                int numSlots) const;
216 
217     // Appends a single-slot single-input math operation to the pipeline. The op `stage` will
218     // appended `numSlots` times, starting at position `dst` and advancing one slot for each
219     // subsequent invocation.
220     void appendSingleSlotUnaryOp(skia_private::TArray<Stage>* pipeline, ProgramOp stage,
221                                  float* dst, int numSlots) const;
222 
223     // Appends a multi-slot single-input math operation to the pipeline. `baseStage` must refer to
224     // a single-slot "apply_op" stage, which must be immediately followed by specializations for
225     // 2-4 slots. For instance, {`ceil_float`, `ceil_2_floats`, `ceil_3_floats`, `ceil_4_floats`}
226     // must be contiguous ops in the stage list, listed in that order; pass `ceil_float` and we
227     // pick the appropriate op based on `numSlots`.
228     void appendMultiSlotUnaryOp(skia_private::TArray<Stage>* pipeline, ProgramOp baseStage,
229                                 float* dst, int numSlots) const;
230 
231     // Appends an immediate-mode binary operation to the pipeline. `baseStage` must refer to
232     // a single-slot, immediate-mode "apply-imm" stage, which must be immediately preceded by
233     // specializations for 2-4 slots if numSlots is greater than 1. For instance, {`add_imm_4_ints`,
234     // `add_imm_3_ints`, `add_imm_2_ints`, `add_imm_int`} must be contiguous ops in the stage list,
235     // listed in that order; pass `add_imm_int` and we pick the appropriate op based on `numSlots`.
236     // Some immediate-mode binary ops are single-slot only in the interest of code size; in this
237     // case, the multi-slot ops can be absent, but numSlots must be 1.
238     void appendImmediateBinaryOp(skia_private::TArray<Stage>* pipeline, SkArenaAlloc* alloc,
239                                  ProgramOp baseStage,
240                                  SkRPOffset dst, int32_t value, int numSlots) const;
241 
242     // Appends a two-input math operation to the pipeline. `src` must be _immediately_ after `dst`
243     // in memory. `baseStage` must refer to an unbounded "apply_to_n_slots" stage. A BinaryOpCtx
244     // will be used to pass pointers to the destination and source; the delta between the two
245     // pointers implicitly gives the number of slots.
246     void appendAdjacentNWayBinaryOp(skia_private::TArray<Stage>* pipeline, SkArenaAlloc* alloc,
247                                     ProgramOp stage,
248                                     SkRPOffset dst, SkRPOffset src, int numSlots) const;
249 
250     // Appends a multi-slot two-input math operation to the pipeline. `src` must be _immediately_
251     // after `dst` in memory. `baseStage` must refer to an unbounded "apply_to_n_slots" stage, which
252     // must be immediately followed by specializations for 1-4 slots. For instance, {`add_n_floats`,
253     // `add_float`, `add_2_floats`, `add_3_floats`, `add_4_floats`} must be contiguous ops in the
254     // stage list, listed in that order; pass `add_n_floats` and we pick the appropriate op based on
255     // `numSlots`.
256     void appendAdjacentMultiSlotBinaryOp(skia_private::TArray<Stage>* pipeline, SkArenaAlloc* alloc,
257                                          ProgramOp baseStage, std::byte* basePtr,
258                                          SkRPOffset dst, SkRPOffset src, int numSlots) const;
259 
260     // Appends a multi-slot math operation having three inputs (dst, src0, src1) and one output
261     // (dst) to the pipeline. The three inputs must be _immediately_ adjacent in memory. `baseStage`
262     // must refer to an unbounded "apply_to_n_slots" stage, which must be immediately followed by
263     // specializations for 1-4 slots.
264     void appendAdjacentMultiSlotTernaryOp(skia_private::TArray<Stage>* pipeline,
265                                           SkArenaAlloc* alloc, ProgramOp baseStage,
266                                           std::byte* basePtr, SkRPOffset dst, SkRPOffset src0,
267                                           SkRPOffset src1, int numSlots) const;
268 
269     // Appends a math operation having three inputs (dst, src0, src1) and one output (dst) to the
270     // pipeline. The three inputs must be _immediately_ adjacent in memory. `baseStage` must refer
271     // to an unbounded "apply_to_n_slots" stage. A TernaryOpCtx will be used to pass pointers to the
272     // destination and sources; the delta between the each pointer implicitly gives the slot count.
273     void appendAdjacentNWayTernaryOp(skia_private::TArray<Stage>* pipeline, SkArenaAlloc* alloc,
274                                      ProgramOp stage, std::byte* basePtr, SkRPOffset dst,
275                                      SkRPOffset src0, SkRPOffset src1, int numSlots) const;
276 
277     // Appends a stack_rewind op on platforms where it is needed (when SK_HAS_MUSTTAIL is not set).
278     void appendStackRewindForNonTailcallers(skia_private::TArray<Stage>* pipeline) const;
279 
280     // Appends a stack_rewind op unilaterally.
281     void appendStackRewind(skia_private::TArray<Stage>* pipeline) const;
282 
283     class Dumper;
284     friend class Dumper;
285 
286     skia_private::TArray<Instruction> fInstructions;
287     int fNumValueSlots = 0;
288     int fNumUniformSlots = 0;
289     int fNumImmutableSlots = 0;
290     int fNumTempStackSlots = 0;
291     int fNumLabels = 0;
292     StackDepths fTempStackMaxDepths;
293     DebugTracePriv* fDebugTrace = nullptr;
294     std::unique_ptr<SkSL::TraceHook> fTraceHook;
295 };
296 
297 class Builder {
298 public:
299     /** Finalizes and returns a completed program. */
300     std::unique_ptr<Program> finish(int numValueSlots,
301                                     int numUniformSlots,
302                                     int numImmutableSlots,
303                                     DebugTracePriv* debugTrace = nullptr);
304     /**
305      * Peels off a label ID for use in the program. Set the label's position in the program with
306      * the `label` instruction. Actually branch to the target with an instruction like
307      * `branch_if_any_lanes_active` or `jump`.
308      */
nextLabelID()309     int nextLabelID() {
310         return fNumLabels++;
311     }
312 
313     /**
314      * The builder keeps track of the state of execution masks; when we know that the execution
315      * mask is unaltered, we can generate simpler code. Code which alters the execution mask is
316      * required to enable this flag.
317      */
enableExecutionMaskWrites()318     void enableExecutionMaskWrites() {
319         ++fExecutionMaskWritesEnabled;
320     }
321 
disableExecutionMaskWrites()322     void disableExecutionMaskWrites() {
323         SkASSERT(this->executionMaskWritesAreEnabled());
324         --fExecutionMaskWritesEnabled;
325     }
326 
executionMaskWritesAreEnabled()327     bool executionMaskWritesAreEnabled() {
328         return fExecutionMaskWritesEnabled > 0;
329     }
330 
331     /** Assemble a program from the Raster Pipeline instructions below. */
init_lane_masks()332     void init_lane_masks() {
333         this->appendInstruction(BuilderOp::init_lane_masks, {});
334     }
335 
store_src_rg(SlotRange slots)336     void store_src_rg(SlotRange slots) {
337         SkASSERT(slots.count == 2);
338         this->appendInstruction(BuilderOp::store_src_rg, {slots.index});
339     }
340 
store_src(SlotRange slots)341     void store_src(SlotRange slots) {
342         SkASSERT(slots.count == 4);
343         this->appendInstruction(BuilderOp::store_src, {slots.index});
344     }
345 
store_dst(SlotRange slots)346     void store_dst(SlotRange slots) {
347         SkASSERT(slots.count == 4);
348         this->appendInstruction(BuilderOp::store_dst, {slots.index});
349     }
350 
store_device_xy01(SlotRange slots)351     void store_device_xy01(SlotRange slots) {
352         SkASSERT(slots.count == 4);
353         this->appendInstruction(BuilderOp::store_device_xy01, {slots.index});
354     }
355 
load_src(SlotRange slots)356     void load_src(SlotRange slots) {
357         SkASSERT(slots.count == 4);
358         this->appendInstruction(BuilderOp::load_src, {slots.index});
359     }
360 
load_dst(SlotRange slots)361     void load_dst(SlotRange slots) {
362         SkASSERT(slots.count == 4);
363         this->appendInstruction(BuilderOp::load_dst, {slots.index});
364     }
365 
set_current_stack(int stackID)366     void set_current_stack(int stackID) {
367         fCurrentStackID = stackID;
368     }
369 
370     // Inserts a label into the instruction stream.
371     void label(int labelID);
372 
373     // Unconditionally branches to a label.
374     void jump(int labelID);
375 
376     // Branches to a label if the execution mask is active in every lane.
377     void branch_if_all_lanes_active(int labelID);
378 
379     // Branches to a label if the execution mask is active in any lane.
380     void branch_if_any_lanes_active(int labelID);
381 
382     // Branches to a label if the execution mask is inactive across all lanes.
383     void branch_if_no_lanes_active(int labelID);
384 
385     // Branches to a label if the top value on the stack is _not_ equal to `value` in any lane.
386     void branch_if_no_active_lanes_on_stack_top_equal(int value, int labelID);
387 
388     // We use the same SkRasterPipeline op regardless of the literal type, and bitcast the value.
389     void push_constant_i(int32_t val, int count = 1);
390 
push_zeros(int count)391     void push_zeros(int count) {
392         this->push_constant_i(/*val=*/0, count);
393     }
394 
push_constant_f(float val)395     void push_constant_f(float val) {
396         this->push_constant_i(sk_bit_cast<int32_t>(val), /*count=*/1);
397     }
398 
399     void push_constant_u(uint32_t val, int count = 1) {
400         this->push_constant_i(sk_bit_cast<int32_t>(val), count);
401     }
402 
403     // Translates into copy_uniforms (from uniforms into temp stack) in Raster Pipeline.
404     void push_uniform(SlotRange src);
405 
406     // Initializes the Raster Pipeline slot with a constant value when the program is first created.
407     // Does not add any instructions to the program.
store_immutable_value_i(Slot slot,int32_t val)408     void store_immutable_value_i(Slot slot, int32_t val) {
409         this->appendInstruction(BuilderOp::store_immutable_value, {slot}, val);
410     }
411 
412     // Translates into copy_uniforms (from uniforms into value-slots) in Raster Pipeline.
413     void copy_uniform_to_slots_unmasked(SlotRange dst, SlotRange src);
414 
415     // Translates into copy_from_indirect_uniform_unmasked (from values into temp stack) in Raster
416     // Pipeline. `fixedRange` denotes a fixed set of slots; this range is pushed forward by the
417     // value at the top of stack `dynamicStack`. Pass the range of the uniform being indexed as
418     // `limitRange`; this is used as a hard cap, to avoid indexing outside of bounds.
419     void push_uniform_indirect(SlotRange fixedRange, int dynamicStack, SlotRange limitRange);
420 
421 
422     // Translates into copy_slots_unmasked (from values into temp stack) in Raster Pipeline.
push_slots(SlotRange src)423     void push_slots(SlotRange src) {
424         this->push_slots_or_immutable(src, BuilderOp::push_slots);
425     }
426 
427     // Translates into copy_immutable_unmasked (from immutables into temp stack) in Raster Pipeline.
push_immutable(SlotRange src)428     void push_immutable(SlotRange src) {
429         this->push_slots_or_immutable(src, BuilderOp::push_immutable);
430     }
431 
432     void push_slots_or_immutable(SlotRange src, BuilderOp op);
433 
434     // Translates into copy_from_indirect_unmasked (from values into temp stack) in Raster Pipeline.
435     // `fixedRange` denotes a fixed set of slots; this range is pushed forward by the value at the
436     // top of stack `dynamicStack`. Pass the slot range of the variable being indexed as
437     // `limitRange`; this is used as a hard cap, to avoid indexing outside of bounds.
push_slots_indirect(SlotRange fixedRange,int dynamicStack,SlotRange limitRange)438     void push_slots_indirect(SlotRange fixedRange, int dynamicStack, SlotRange limitRange) {
439         this->push_slots_or_immutable_indirect(fixedRange, dynamicStack, limitRange,
440                                                BuilderOp::push_slots_indirect);
441     }
442 
push_immutable_indirect(SlotRange fixedRange,int dynamicStack,SlotRange limitRange)443     void push_immutable_indirect(SlotRange fixedRange, int dynamicStack, SlotRange limitRange) {
444         this->push_slots_or_immutable_indirect(fixedRange, dynamicStack, limitRange,
445                                                BuilderOp::push_immutable_indirect);
446     }
447 
448     void push_slots_or_immutable_indirect(SlotRange fixedRange, int dynamicStack,
449                                           SlotRange limitRange, BuilderOp op);
450 
451     // Translates into copy_slots_masked (from temp stack to values) in Raster Pipeline.
452     // Does not discard any values on the temp stack.
copy_stack_to_slots(SlotRange dst)453     void copy_stack_to_slots(SlotRange dst) {
454         this->copy_stack_to_slots(dst, /*offsetFromStackTop=*/dst.count);
455     }
456 
457     void copy_stack_to_slots(SlotRange dst, int offsetFromStackTop);
458 
459     // Translates into swizzle_copy_slots_masked (from temp stack to values) in Raster Pipeline.
460     // Does not discard any values on the temp stack.
461     void swizzle_copy_stack_to_slots(SlotRange dst,
462                                      SkSpan<const int8_t> components,
463                                      int offsetFromStackTop);
464 
465     // Translates into swizzle_copy_to_indirect_masked (from temp stack to values) in Raster
466     // Pipeline. Does not discard any values on the temp stack.
467     void swizzle_copy_stack_to_slots_indirect(SlotRange fixedRange,
468                                               int dynamicStackID,
469                                               SlotRange limitRange,
470                                               SkSpan<const int8_t> components,
471                                               int offsetFromStackTop);
472 
473     // Translates into copy_slots_unmasked (from temp stack to values) in Raster Pipeline.
474     // Does not discard any values on the temp stack.
copy_stack_to_slots_unmasked(SlotRange dst)475     void copy_stack_to_slots_unmasked(SlotRange dst) {
476         this->copy_stack_to_slots_unmasked(dst, /*offsetFromStackTop=*/dst.count);
477     }
478 
479     void copy_stack_to_slots_unmasked(SlotRange dst, int offsetFromStackTop);
480 
481     // Translates into copy_to_indirect_masked (from temp stack into values) in Raster Pipeline.
482     // `fixedRange` denotes a fixed set of slots; this range is pushed forward by the value at the
483     // top of stack `dynamicStack`. Pass the slot range of the variable being indexed as
484     // `limitRange`; this is used as a hard cap, to avoid indexing outside of bounds.
485     void copy_stack_to_slots_indirect(SlotRange fixedRange,
486                                       int dynamicStackID,
487                                       SlotRange limitRange);
488 
489     // Copies from temp stack to slots, including an indirect offset, then shrinks the temp stack.
pop_slots_indirect(SlotRange fixedRange,int dynamicStackID,SlotRange limitRange)490     void pop_slots_indirect(SlotRange fixedRange, int dynamicStackID, SlotRange limitRange) {
491         this->copy_stack_to_slots_indirect(fixedRange, dynamicStackID, limitRange);
492         this->discard_stack(fixedRange.count);
493     }
494 
495     // Performs a unary op (like `bitwise_not`), given a slot count of `slots`. The stack top is
496     // replaced with the result.
497     void unary_op(BuilderOp op, int32_t slots);
498 
499     // Performs a binary op (like `add_n_floats` or `cmpeq_n_ints`), given a slot count of
500     // `slots`. Two n-slot input values are consumed, and the result is pushed onto the stack.
501     void binary_op(BuilderOp op, int32_t slots);
502 
503     // Performs a ternary op (like `mix` or `smoothstep`), given a slot count of
504     // `slots`. Three n-slot input values are consumed, and the result is pushed onto the stack.
505     void ternary_op(BuilderOp op, int32_t slots);
506 
507     // Computes a dot product on the stack. The slots consumed (`slots`) must be between 1 and 4.
508     // Two n-slot input vectors are consumed, and a scalar result is pushed onto the stack.
509     void dot_floats(int32_t slots);
510 
511     // Computes refract(N, I, eta) on the stack. N and I are assumed to be 4-slot vectors, and can
512     // be padded with zeros for smaller inputs. Eta is a scalar. The result is a 4-slot vector.
513     void refract_floats();
514 
515     // Computes inverse(matN) on the stack. Pass 2, 3 or 4 for n to specify matrix size.
516     void inverse_matrix(int32_t n);
517 
518     // Shrinks the temp stack, discarding values on top.
519     void discard_stack(int32_t count, int stackID);
520 
discard_stack(int32_t count)521     void discard_stack(int32_t count) {
522         this->discard_stack(count, fCurrentStackID);
523     }
524 
525     // Grows the temp stack, leaving any preexisting values in place.
526     void pad_stack(int32_t count);
527 
528     // Copies vales from the temp stack into slots, and then shrinks the temp stack.
529     void pop_slots(SlotRange dst);
530 
531     // Creates many clones of the top single-slot item on the temp stack.
532     void push_duplicates(int count);
533 
534     // Creates a single clone of an item on the current temp stack. The cloned item can consist of
535     // any number of slots, and can be copied from an earlier position on the stack.
536     void push_clone(int numSlots, int offsetFromStackTop = 0);
537 
538     // Clones a range of slots from another stack onto this stack.
539     void push_clone_from_stack(SlotRange range, int otherStackID, int offsetFromStackTop);
540 
541     // Translates into copy_from_indirect_unmasked (from one temp stack to another) in Raster
542     // Pipeline. `fixedOffset` denotes a range of slots within the top `offsetFromStackTop` slots of
543     // `otherStackID`. This range is pushed forward by the value at the top of `dynamicStackID`.
544     void push_clone_indirect_from_stack(SlotRange fixedOffset,
545                                         int dynamicStackID,
546                                         int otherStackID,
547                                         int offsetFromStackTop);
548 
549     // Compares the stack top with the passed-in value; if it matches, enables the loop mask.
case_op(int value)550     void case_op(int value) {
551         this->appendInstruction(BuilderOp::case_op, {}, value);
552     }
553 
554     // Performs a `continue` in a loop.
continue_op(int continueMaskStackID)555     void continue_op(int continueMaskStackID) {
556         this->appendInstruction(BuilderOp::continue_op, {}, continueMaskStackID);
557     }
558 
select(int slots)559     void select(int slots) {
560         // Overlays the top two entries on the stack, making one hybrid entry. The execution mask
561         // is used to select which lanes are preserved.
562         SkASSERT(slots > 0);
563         this->appendInstruction(BuilderOp::select, {}, slots);
564     }
565 
566     // The opposite of push_slots; copies values from the temp stack into value slots, then
567     // shrinks the temp stack.
568     void pop_slots_unmasked(SlotRange dst);
569 
copy_slots_masked(SlotRange dst,SlotRange src)570     void copy_slots_masked(SlotRange dst, SlotRange src) {
571         SkASSERT(dst.count == src.count);
572         this->appendInstruction(BuilderOp::copy_slot_masked, {dst.index, src.index}, dst.count);
573     }
574 
575     void copy_slots_unmasked(SlotRange dst, SlotRange src);
576 
577     void copy_immutable_unmasked(SlotRange dst, SlotRange src);
578 
579     // Directly writes a constant value into a slot.
580     void copy_constant(Slot slot, int constantValue);
581 
582     // Stores zeros across the entire slot range.
583     void zero_slots_unmasked(SlotRange dst);
584 
585     // Consumes `consumedSlots` elements on the stack, then generates `components.size()` elements.
586     void swizzle(int consumedSlots, SkSpan<const int8_t> components);
587 
588     // Transposes a matrix of size CxR on the stack (into a matrix of size RxC).
589     void transpose(int columns, int rows);
590 
591     // Generates a CxR diagonal matrix from the top two scalars on the stack. The second scalar is
592     // used as the diagonal value; the first scalar (usually zero) fills in the rest of the slots.
593     void diagonal_matrix(int columns, int rows);
594 
595     // Resizes a CxR matrix at the top of the stack to C'xR'.
596     void matrix_resize(int origColumns, int origRows, int newColumns, int newRows);
597 
598     // Multiplies a CxR matrix/vector against an adjacent CxR matrix/vector on the stack.
599     void matrix_multiply(int leftColumns, int leftRows, int rightColumns, int rightRows);
600 
601     void push_condition_mask();
602 
pop_condition_mask()603     void pop_condition_mask() {
604         SkASSERT(this->executionMaskWritesAreEnabled());
605         this->appendInstruction(BuilderOp::pop_condition_mask, {});
606     }
607 
608     void merge_condition_mask();
609 
merge_inv_condition_mask()610     void merge_inv_condition_mask() {
611         SkASSERT(this->executionMaskWritesAreEnabled());
612         this->appendInstruction(BuilderOp::merge_inv_condition_mask, {});
613     }
614 
push_loop_mask()615     void push_loop_mask() {
616         SkASSERT(this->executionMaskWritesAreEnabled());
617         this->appendInstruction(BuilderOp::push_loop_mask, {});
618     }
619 
pop_loop_mask()620     void pop_loop_mask() {
621         SkASSERT(this->executionMaskWritesAreEnabled());
622         this->appendInstruction(BuilderOp::pop_loop_mask, {});
623     }
624 
625     // Exchanges src.rgba with the four values at the top of the stack.
626     void exchange_src();
627 
push_src_rgba()628     void push_src_rgba() {
629         this->appendInstruction(BuilderOp::push_src_rgba, {});
630     }
631 
push_dst_rgba()632     void push_dst_rgba() {
633         this->appendInstruction(BuilderOp::push_dst_rgba, {});
634     }
635 
push_device_xy01()636     void push_device_xy01() {
637         this->appendInstruction(BuilderOp::push_device_xy01, {});
638     }
639 
640     void pop_src_rgba();
641 
pop_dst_rgba()642     void pop_dst_rgba() {
643         this->appendInstruction(BuilderOp::pop_dst_rgba, {});
644     }
645 
mask_off_loop_mask()646     void mask_off_loop_mask() {
647         SkASSERT(this->executionMaskWritesAreEnabled());
648         this->appendInstruction(BuilderOp::mask_off_loop_mask, {});
649     }
650 
reenable_loop_mask(SlotRange src)651     void reenable_loop_mask(SlotRange src) {
652         SkASSERT(this->executionMaskWritesAreEnabled());
653         SkASSERT(src.count == 1);
654         this->appendInstruction(BuilderOp::reenable_loop_mask, {src.index});
655     }
656 
pop_and_reenable_loop_mask()657     void pop_and_reenable_loop_mask() {
658         SkASSERT(this->executionMaskWritesAreEnabled());
659         this->appendInstruction(BuilderOp::pop_and_reenable_loop_mask, {});
660     }
661 
merge_loop_mask()662     void merge_loop_mask() {
663         SkASSERT(this->executionMaskWritesAreEnabled());
664         this->appendInstruction(BuilderOp::merge_loop_mask, {});
665     }
666 
push_return_mask()667     void push_return_mask() {
668         SkASSERT(this->executionMaskWritesAreEnabled());
669         this->appendInstruction(BuilderOp::push_return_mask, {});
670     }
671 
672     void pop_return_mask();
673 
mask_off_return_mask()674     void mask_off_return_mask() {
675         SkASSERT(this->executionMaskWritesAreEnabled());
676         this->appendInstruction(BuilderOp::mask_off_return_mask, {});
677     }
678 
679     void invoke_shader(int childIdx);
680     void invoke_color_filter(int childIdx);
681     void invoke_blender(int childIdx);
682     void invoke_to_linear_srgb();
683     void invoke_from_linear_srgb();
684 
685     // Writes the current line number to the debug trace.
trace_line(int traceMaskStackID,int line)686     void trace_line(int traceMaskStackID, int line) {
687         this->appendInstruction(BuilderOp::trace_line, {}, traceMaskStackID, line);
688     }
689 
690     // Writes a variable update to the debug trace.
trace_var(int traceMaskStackID,SlotRange r)691     void trace_var(int traceMaskStackID, SlotRange r) {
692         this->appendInstruction(BuilderOp::trace_var, {r.index}, traceMaskStackID, r.count);
693     }
694 
695     // Writes a variable update (via indirection) to the debug trace.
696     void trace_var_indirect(int traceMaskStackID, SlotRange fixedRange,
697                             int dynamicStackID, SlotRange limitRange);
698 
699     // Writes a function-entrance to the debug trace.
trace_enter(int traceMaskStackID,int funcID)700     void trace_enter(int traceMaskStackID, int funcID) {
701         this->appendInstruction(BuilderOp::trace_enter, {}, traceMaskStackID, funcID);
702     }
703 
704     // Writes a function-exit to the debug trace.
trace_exit(int traceMaskStackID,int funcID)705     void trace_exit(int traceMaskStackID, int funcID) {
706         this->appendInstruction(BuilderOp::trace_exit, {}, traceMaskStackID, funcID);
707     }
708 
709     // Writes a scope-level change to the debug trace.
trace_scope(int traceMaskStackID,int delta)710     void trace_scope(int traceMaskStackID, int delta) {
711         this->appendInstruction(BuilderOp::trace_scope, {}, traceMaskStackID, delta);
712     }
713 
714 private:
715     struct SlotList {
fSlotASlotList716         SlotList(Slot a = NA, Slot b = NA) : fSlotA(a), fSlotB(b) {}
717         Slot fSlotA = NA;
718         Slot fSlotB = NA;
719     };
720     void appendInstruction(BuilderOp op, SlotList slots,
721                            int a = 0, int b = 0, int c = 0, int d = 0);
722     Instruction* lastInstruction(int fromBack = 0);
723     Instruction* lastInstructionOnAnyStack(int fromBack = 0);
724     void simplifyPopSlotsUnmasked(SlotRange* dst);
725     bool simplifyImmediateUnmaskedOp();
726 
727     skia_private::TArray<Instruction> fInstructions;
728     int fNumLabels = 0;
729     int fExecutionMaskWritesEnabled = 0;
730     int fCurrentStackID = 0;
731 };
732 
733 }  // namespace RP
734 }  // namespace SkSL
735 
736 #endif  // SKSL_RASTERPIPELINEBUILDER
737