xref: /aosp_15_r20/external/skia/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1 /*
2  * Copyright 2022 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "src/sksl/codegen/SkSLRasterPipelineCodeGenerator.h"
9 
10 #include "include/core/SkPoint.h"
11 #include "include/core/SkSpan.h"
12 #include "include/private/base/SkTArray.h"
13 #include "include/private/base/SkTo.h"
14 #include "src/base/SkEnumBitMask.h"
15 #include "src/base/SkStringView.h"
16 #include "src/base/SkUtils.h"
17 #include "src/core/SkTHash.h"
18 #include "src/sksl/SkSLAnalysis.h"
19 #include "src/sksl/SkSLBuiltinTypes.h"
20 #include "src/sksl/SkSLCompiler.h"
21 #include "src/sksl/SkSLConstantFolder.h"
22 #include "src/sksl/SkSLContext.h"
23 #include "src/sksl/SkSLDefines.h"
24 #include "src/sksl/SkSLIntrinsicList.h"
25 #include "src/sksl/SkSLOperator.h"
26 #include "src/sksl/SkSLPosition.h"
27 #include "src/sksl/analysis/SkSLProgramUsage.h"
28 #include "src/sksl/codegen/SkSLRasterPipelineBuilder.h"
29 #include "src/sksl/ir/SkSLBinaryExpression.h"
30 #include "src/sksl/ir/SkSLBlock.h"
31 #include "src/sksl/ir/SkSLBreakStatement.h"
32 #include "src/sksl/ir/SkSLChildCall.h"
33 #include "src/sksl/ir/SkSLConstructor.h"
34 #include "src/sksl/ir/SkSLConstructorDiagonalMatrix.h"
35 #include "src/sksl/ir/SkSLConstructorMatrixResize.h"
36 #include "src/sksl/ir/SkSLConstructorSplat.h"
37 #include "src/sksl/ir/SkSLContinueStatement.h"
38 #include "src/sksl/ir/SkSLDoStatement.h"
39 #include "src/sksl/ir/SkSLExpression.h"
40 #include "src/sksl/ir/SkSLExpressionStatement.h"
41 #include "src/sksl/ir/SkSLFieldAccess.h"
42 #include "src/sksl/ir/SkSLForStatement.h"
43 #include "src/sksl/ir/SkSLFunctionCall.h"
44 #include "src/sksl/ir/SkSLFunctionDeclaration.h"
45 #include "src/sksl/ir/SkSLFunctionDefinition.h"
46 #include "src/sksl/ir/SkSLIRNode.h"
47 #include "src/sksl/ir/SkSLIfStatement.h"
48 #include "src/sksl/ir/SkSLIndexExpression.h"
49 #include "src/sksl/ir/SkSLLayout.h"
50 #include "src/sksl/ir/SkSLLiteral.h"
51 #include "src/sksl/ir/SkSLModifierFlags.h"
52 #include "src/sksl/ir/SkSLPostfixExpression.h"
53 #include "src/sksl/ir/SkSLPrefixExpression.h"
54 #include "src/sksl/ir/SkSLProgram.h"
55 #include "src/sksl/ir/SkSLProgramElement.h"
56 #include "src/sksl/ir/SkSLReturnStatement.h"
57 #include "src/sksl/ir/SkSLStatement.h"
58 #include "src/sksl/ir/SkSLSwitchCase.h"
59 #include "src/sksl/ir/SkSLSwitchStatement.h"
60 #include "src/sksl/ir/SkSLSwizzle.h"
61 #include "src/sksl/ir/SkSLTernaryExpression.h"
62 #include "src/sksl/ir/SkSLType.h"
63 #include "src/sksl/ir/SkSLVarDeclarations.h"
64 #include "src/sksl/ir/SkSLVariable.h"
65 #include "src/sksl/ir/SkSLVariableReference.h"
66 #include "src/sksl/tracing/SkSLDebugTracePriv.h"
67 #include "src/sksl/transform/SkSLTransform.h"
68 
69 #include <algorithm>
70 #include <climits>
71 #include <cstddef>
72 #include <cstdint>
73 #include <float.h>
74 #include <iterator>
75 #include <optional>
76 #include <string>
77 #include <string_view>
78 #include <utility>
79 #include <vector>
80 
81 using namespace skia_private;
82 
83 namespace SkSL {
84 namespace RP {
85 
unsupported()86 static bool unsupported() {
87     // If MakeRasterPipelineProgram returns false, set a breakpoint here for more information.
88     return false;
89 }
90 
91 class AutoContinueMask;
92 class Generator;
93 class LValue;
94 
95 class SlotManager {
96 public:
SlotManager(std::vector<SlotDebugInfo> * i)97     SlotManager(std::vector<SlotDebugInfo>* i) : fSlotDebugInfo(i) {}
98 
99     /** Used by `createSlots` to add this variable to SlotDebugInfo inside the DebugTrace. */
100     void addSlotDebugInfoForGroup(const std::string& varName,
101                                   const Type& type,
102                                   Position pos,
103                                   int* groupIndex,
104                                   bool isFunctionReturnValue);
105     void addSlotDebugInfo(const std::string& varName,
106                           const Type& type,
107                           Position pos,
108                           bool isFunctionReturnValue);
109 
110     /** Creates slots associated with an SkSL variable or return value. */
111     SlotRange createSlots(std::string name,
112                           const Type& type,
113                           Position pos,
114                           bool isFunctionReturnValue);
115 
116     /**
117      * Associates previously-created slots with an SkSL variable; this can allow multiple variables
118      * to share overlapping ranges. If the variable was already associated with a slot range,
119      * returns the previously associated range.
120      */
121     std::optional<SlotRange> mapVariableToSlots(const Variable& v, SlotRange range);
122 
123     /**
124      * Deletes the existing mapping between a variable and its slots; a future call to
125      * `getVariableSlots` will see this as a brand new variable and associate new slots.
126      */
127     void unmapVariableSlots(const Variable& v);
128 
129     /** Looks up the slots associated with an SkSL variable; creates the slot if necessary. */
130     SlotRange getVariableSlots(const Variable& v);
131 
132     /**
133      * Looks up the slots associated with an SkSL function's return value; creates the range if
134      * necessary. Note that recursion is never supported, so we don't need to maintain return values
135      * in a stack; we can just statically allocate one slot per function call-site.
136      */
137     SlotRange getFunctionSlots(const IRNode& callSite, const FunctionDeclaration& f);
138 
139     /** Returns the total number of slots consumed. */
slotCount() const140     int slotCount() const { return fSlotCount; }
141 
142 private:
143     THashMap<const IRNode*, SlotRange> fSlotMap;
144     int fSlotCount = 0;
145     std::vector<SlotDebugInfo>* fSlotDebugInfo;
146 };
147 
148 class AutoStack {
149 public:
150     /**
151      * Creates a temporary stack. The caller is responsible for discarding every entry on this
152      * stack before ~AutoStack is reached.
153      */
154     explicit AutoStack(Generator* g);
155     ~AutoStack();
156 
157     /** Activates the associated stack. */
158     void enter();
159 
160     /** Undoes a call to `enter`, returning to the previously-active stack. */
161     void exit();
162 
163     /** Returns the stack ID of this AutoStack. */
stackID()164     int stackID() { return fStackID; }
165 
166     /** Clones values from this stack onto the top of the active stack. */
167     void pushClone(int slots);
168 
169     /** Clones values from a fixed range of this stack onto the top of the active stack. */
170     void pushClone(SlotRange range, int offsetFromStackTop);
171 
172     /** Clones values from a dynamic range of this stack onto the top of the active stack. */
173     void pushCloneIndirect(SlotRange range, int dynamicStackID, int offsetFromStackTop);
174 
175 private:
176     Generator* fGenerator;
177     int fStackID = 0;
178     int fParentStackID = 0;
179 };
180 
181 class Generator {
182 public:
Generator(const SkSL::Program & program,DebugTracePriv * debugTrace,bool writeTraceOps)183     Generator(const SkSL::Program& program, DebugTracePriv* debugTrace, bool writeTraceOps)
184             : fProgram(program)
185             , fContext(fProgram.fContext->fTypes, *fProgram.fContext->fErrors)
186             , fDebugTrace(debugTrace)
187             , fWriteTraceOps(writeTraceOps)
188             , fProgramSlots(debugTrace ? &debugTrace->fSlotInfo : nullptr)
189             , fUniformSlots(debugTrace ? &debugTrace->fUniformInfo : nullptr)
190             , fImmutableSlots(nullptr) {
191         fContext.fConfig = fProgram.fConfig.get();
192         fContext.fModule = fProgram.fContext->fModule;
193     }
194 
~Generator()195     ~Generator() {
196         // ~AutoStack calls into the Generator, so we need to make sure the trace mask is reset
197         // before the Generator is destroyed.
198         fTraceMask.reset();
199     }
200 
201     /** Converts the SkSL main() function into a set of Instructions. */
202     bool writeProgram(const FunctionDefinition& function);
203 
204     /** Returns the generated program. */
205     std::unique_ptr<RP::Program> finish();
206 
207     /**
208      * Converts an SkSL function into a set of Instructions. Returns nullopt if the function
209      * contained unsupported statements or expressions.
210      */
211     std::optional<SlotRange> writeFunction(const IRNode& callSite,
212                                            const FunctionDefinition& function,
213                                            SkSpan<std::unique_ptr<Expression> const> arguments);
214 
215     /**
216      * Returns the slot index of this function inside the FunctionDebugInfo array in DebugTracePriv.
217      * The FunctionDebugInfo slot will be created if it doesn't already exist.
218      */
219     int getFunctionDebugInfo(const FunctionDeclaration& decl);
220 
221     /** Returns true for variables with slots in fProgramSlots; immutables or uniforms are false. */
hasVariableSlots(const Variable & v)222     bool hasVariableSlots(const Variable& v) {
223         return !IsUniform(v) && !fImmutableVariables.contains(&v);
224     }
225 
226     /** Looks up the slots associated with an SkSL variable; creates the slots if necessary. */
getVariableSlots(const Variable & v)227     SlotRange getVariableSlots(const Variable& v) {
228         SkASSERT(this->hasVariableSlots(v));
229         return fProgramSlots.getVariableSlots(v);
230     }
231 
232     /**
233      * Looks up the slots associated with an immutable variable; creates the slots if necessary.
234      */
getImmutableSlots(const Variable & v)235     SlotRange getImmutableSlots(const Variable& v) {
236         SkASSERT(!IsUniform(v));
237         SkASSERT(fImmutableVariables.contains(&v));
238         return fImmutableSlots.getVariableSlots(v);
239     }
240 
241     /** Looks up the slots associated with an SkSL uniform; creates the slots if necessary. */
getUniformSlots(const Variable & v)242     SlotRange getUniformSlots(const Variable& v) {
243         SkASSERT(IsUniform(v));
244         SkASSERT(!fImmutableVariables.contains(&v));
245         return fUniformSlots.getVariableSlots(v);
246     }
247 
248     /**
249      * Looks up the slots associated with an SkSL function's return value; creates the range if
250      * necessary. Note that recursion is never supported, so we don't need to maintain return values
251      * in a stack; we can just statically allocate one slot per function call-site.
252      */
getFunctionSlots(const IRNode & callSite,const FunctionDeclaration & f)253     SlotRange getFunctionSlots(const IRNode& callSite, const FunctionDeclaration& f) {
254         return fProgramSlots.getFunctionSlots(callSite, f);
255     }
256 
257     /**
258      * Creates an additional stack for the program to push values onto. The stack will not become
259      * actively in-use until `setCurrentStack` is called.
260      */
261     int createStack();
262 
263     /** Frees a stack generated by `createStack`. The freed stack must be completely empty. */
264     void recycleStack(int stackID);
265 
266     /** Redirects builder ops to point to a different stack (created by `createStack`). */
267     void setCurrentStack(int stackID);
268 
269     /** Reports the currently active stack. */
currentStack()270     int currentStack() {
271         return fCurrentStack;
272     }
273 
274     /**
275      * Returns an LValue for the passed-in expression; if the expression isn't supported as an
276      * LValue, returns nullptr.
277      */
278     std::unique_ptr<LValue> makeLValue(const Expression& e, bool allowScratch = false);
279 
280     /** Copies the top-of-stack value into this lvalue, without discarding it from the stack. */
281     [[nodiscard]] bool store(LValue& lvalue);
282 
283     /** Pushes the lvalue onto the top-of-stack. */
284     [[nodiscard]] bool push(LValue& lvalue);
285 
286     /** The Builder stitches our instructions together into Raster Pipeline code. */
builder()287     Builder* builder() { return &fBuilder; }
288 
289     /** Appends a statement to the program. */
290     [[nodiscard]] bool writeStatement(const Statement& s);
291     [[nodiscard]] bool writeBlock(const Block& b);
292     [[nodiscard]] bool writeBreakStatement(const BreakStatement& b);
293     [[nodiscard]] bool writeContinueStatement(const ContinueStatement& b);
294     [[nodiscard]] bool writeDoStatement(const DoStatement& d);
295     [[nodiscard]] bool writeExpressionStatement(const ExpressionStatement& e);
296     [[nodiscard]] bool writeMasklessForStatement(const ForStatement& f);
297     [[nodiscard]] bool writeForStatement(const ForStatement& f);
298     [[nodiscard]] bool writeGlobals();
299     [[nodiscard]] bool writeIfStatement(const IfStatement& i);
300     [[nodiscard]] bool writeDynamicallyUniformIfStatement(const IfStatement& i);
301     [[nodiscard]] bool writeReturnStatement(const ReturnStatement& r);
302     [[nodiscard]] bool writeSwitchStatement(const SwitchStatement& s);
303     [[nodiscard]] bool writeVarDeclaration(const VarDeclaration& v);
304     [[nodiscard]] bool writeImmutableVarDeclaration(const VarDeclaration& d);
305 
306     /** Pushes an expression to the value stack. */
307     [[nodiscard]] bool pushBinaryExpression(const BinaryExpression& e);
308     [[nodiscard]] bool pushBinaryExpression(const Expression& left,
309                                             Operator op,
310                                             const Expression& right);
311     [[nodiscard]] bool pushChildCall(const ChildCall& c);
312     [[nodiscard]] bool pushConstructorCast(const AnyConstructor& c);
313     [[nodiscard]] bool pushConstructorCompound(const AnyConstructor& c);
314     [[nodiscard]] bool pushConstructorDiagonalMatrix(const ConstructorDiagonalMatrix& c);
315     [[nodiscard]] bool pushConstructorMatrixResize(const ConstructorMatrixResize& c);
316     [[nodiscard]] bool pushConstructorSplat(const ConstructorSplat& c);
317     [[nodiscard]] bool pushExpression(const Expression& e, bool usesResult = true);
318     [[nodiscard]] bool pushFieldAccess(const FieldAccess& f);
319     [[nodiscard]] bool pushFunctionCall(const FunctionCall& c);
320     [[nodiscard]] bool pushIndexExpression(const IndexExpression& i);
321     [[nodiscard]] bool pushIntrinsic(const FunctionCall& c);
322     [[nodiscard]] bool pushIntrinsic(IntrinsicKind intrinsic, const Expression& arg0);
323     [[nodiscard]] bool pushIntrinsic(IntrinsicKind intrinsic,
324                                      const Expression& arg0,
325                                      const Expression& arg1);
326     [[nodiscard]] bool pushIntrinsic(IntrinsicKind intrinsic,
327                                      const Expression& arg0,
328                                      const Expression& arg1,
329                                      const Expression& arg2);
330     [[nodiscard]] bool pushLiteral(const Literal& l);
331     [[nodiscard]] bool pushPostfixExpression(const PostfixExpression& p, bool usesResult);
332     [[nodiscard]] bool pushPrefixExpression(const PrefixExpression& p);
333     [[nodiscard]] bool pushPrefixExpression(Operator op, const Expression& expr);
334     [[nodiscard]] bool pushSwizzle(const Swizzle& s);
335     [[nodiscard]] bool pushTernaryExpression(const TernaryExpression& t);
336     [[nodiscard]] bool pushTernaryExpression(const Expression& test,
337                                              const Expression& ifTrue,
338                                              const Expression& ifFalse);
339     [[nodiscard]] bool pushDynamicallyUniformTernaryExpression(const Expression& test,
340                                                                const Expression& ifTrue,
341                                                                const Expression& ifFalse);
342     [[nodiscard]] bool pushVariableReference(const VariableReference& v);
343 
344     /** Support methods for immutable data, which trade more slots for smaller code size. */
345     using ImmutableBits = int32_t;
346 
347     [[nodiscard]] bool pushImmutableData(const Expression& e);
348     [[nodiscard]] std::optional<SlotRange> findPreexistingImmutableData(
349             const TArray<ImmutableBits>& immutableValues);
350     [[nodiscard]] std::optional<ImmutableBits> getImmutableBitsForSlot(const Expression& expr,
351                                                                        size_t slot);
352     [[nodiscard]] bool getImmutableValueForExpression(const Expression& expr,
353                                                       TArray<ImmutableBits>* immutableValues);
354     void storeImmutableValueToSlots(const TArray<ImmutableBits>& immutableValues, SlotRange slots);
355 
356     /** Pops an expression from the value stack and copies it into slots. */
popToSlotRange(SlotRange r)357     void popToSlotRange(SlotRange r) {
358         fBuilder.pop_slots(r);
359         if (this->shouldWriteTraceOps()) {
360             fBuilder.trace_var(fTraceMask->stackID(), r);
361         }
362     }
popToSlotRangeUnmasked(SlotRange r)363     void popToSlotRangeUnmasked(SlotRange r) {
364         fBuilder.pop_slots_unmasked(r);
365         if (this->shouldWriteTraceOps()) {
366             fBuilder.trace_var(fTraceMask->stackID(), r);
367         }
368     }
369 
370     /** Pops an expression from the value stack and discards it. */
discardExpression(int slots)371     void discardExpression(int slots) { fBuilder.discard_stack(slots); }
372 
373     /** Zeroes out a range of slots. */
zeroSlotRangeUnmasked(SlotRange r)374     void zeroSlotRangeUnmasked(SlotRange r) {
375         fBuilder.zero_slots_unmasked(r);
376         if (this->shouldWriteTraceOps()) {
377             fBuilder.trace_var(fTraceMask->stackID(), r);
378         }
379     }
380 
381     /**
382      * Emits a trace_line opcode. writeStatement does this, and statements that alter control flow
383      * may need to explicitly add additional traces.
384      */
385     void emitTraceLine(Position pos);
386 
387     /**
388      * Emits a trace_scope opcode, which alters the SkSL variable-scope depth.
389      * Unlike the other trace ops, trace_scope takes a dedicated mask instead of the trace-scope
390      * mask. Call `pushTraceScopeMask` to synthesize this mask; discard it when you're done.
391      */
392     void pushTraceScopeMask();
393     void discardTraceScopeMask();
394     void emitTraceScope(int delta);
395 
396     /** Prepares our position-to-line-offset conversion table (stored in `fLineOffsets`). */
397     void calculateLineOffsets();
398 
shouldWriteTraceOps()399     bool shouldWriteTraceOps() { return fDebugTrace && fWriteTraceOps; }
traceMaskStackID()400     int traceMaskStackID() { return fTraceMask->stackID(); }
401 
402     /** Expression utilities. */
403     struct TypedOps {
404         BuilderOp fFloatOp;
405         BuilderOp fSignedOp;
406         BuilderOp fUnsignedOp;
407         BuilderOp fBooleanOp;
408     };
409 
410     static BuilderOp GetTypedOp(const SkSL::Type& type, const TypedOps& ops);
411 
412     [[nodiscard]] bool unaryOp(const SkSL::Type& type, const TypedOps& ops);
413     [[nodiscard]] bool binaryOp(const SkSL::Type& type, const TypedOps& ops);
414     [[nodiscard]] bool ternaryOp(const SkSL::Type& type, const TypedOps& ops);
415     [[nodiscard]] bool pushIntrinsic(const TypedOps& ops, const Expression& arg0);
416     [[nodiscard]] bool pushIntrinsic(const TypedOps& ops,
417                                      const Expression& arg0,
418                                      const Expression& arg1);
419     [[nodiscard]] bool pushIntrinsic(BuilderOp builderOp, const Expression& arg0);
420     [[nodiscard]] bool pushIntrinsic(BuilderOp builderOp,
421                                      const Expression& arg0,
422                                      const Expression& arg1);
423     [[nodiscard]] bool pushAbsFloatIntrinsic(int slots);
424     [[nodiscard]] bool pushLengthIntrinsic(int slotCount);
425     [[nodiscard]] bool pushVectorizedExpression(const Expression& expr, const Type& vectorType);
426     [[nodiscard]] bool pushVariableReferencePartial(const VariableReference& v, SlotRange subset);
427     [[nodiscard]] bool pushLValueOrExpression(LValue* lvalue, const Expression& expr);
428     [[nodiscard]] bool pushMatrixMultiply(LValue* lvalue,
429                                           const Expression& left,
430                                           const Expression& right,
431                                           int leftColumns, int leftRows,
432                                           int rightColumns, int rightRows);
433     [[nodiscard]] bool pushStructuredComparison(LValue* left,
434                                                 Operator op,
435                                                 LValue* right,
436                                                 const Type& type);
437 
438     void foldWithMultiOp(BuilderOp op, int elements);
439     void foldComparisonOp(Operator op, int elements);
440 
441     BuilderOp getTypedOp(const SkSL::Type& type, const TypedOps& ops) const;
442 
returnComplexity(const FunctionDefinition * func)443     Analysis::ReturnComplexity returnComplexity(const FunctionDefinition* func) {
444         Analysis::ReturnComplexity* complexity = fReturnComplexityMap.find(func);
445         if (!complexity) {
446             complexity = fReturnComplexityMap.set(fCurrentFunction,
447                                                   Analysis::GetReturnComplexity(*func));
448         }
449         return *complexity;
450     }
451 
needsReturnMask(const FunctionDefinition * func)452     bool needsReturnMask(const FunctionDefinition* func) {
453         return this->returnComplexity(func) >= Analysis::ReturnComplexity::kEarlyReturns;
454     }
455 
needsFunctionResultSlots(const FunctionDefinition * func)456     bool needsFunctionResultSlots(const FunctionDefinition* func) {
457         return this->shouldWriteTraceOps() || (this->returnComplexity(func) >
458                                                Analysis::ReturnComplexity::kSingleSafeReturn);
459     }
460 
IsUniform(const Variable & var)461     static bool IsUniform(const Variable& var) {
462        return var.modifierFlags().isUniform();
463     }
464 
IsOutParameter(const Variable & var)465     static bool IsOutParameter(const Variable& var) {
466         return (var.modifierFlags() & (ModifierFlag::kIn | ModifierFlag::kOut)) ==
467                ModifierFlag::kOut;
468     }
469 
IsInoutParameter(const Variable & var)470     static bool IsInoutParameter(const Variable& var) {
471         return (var.modifierFlags() & (ModifierFlag::kIn | ModifierFlag::kOut)) ==
472                (ModifierFlag::kIn | ModifierFlag::kOut);
473     }
474 
475 private:
476     const SkSL::Program& fProgram;
477     SkSL::Context fContext;
478     Builder fBuilder;
479     DebugTracePriv* fDebugTrace = nullptr;
480     bool fWriteTraceOps = false;
481     THashMap<const Variable*, int> fChildEffectMap;
482 
483     SlotManager fProgramSlots;
484     SlotManager fUniformSlots;
485     SlotManager fImmutableSlots;
486 
487     std::optional<AutoStack> fTraceMask;
488     const FunctionDefinition* fCurrentFunction = nullptr;
489     SlotRange fCurrentFunctionResult;
490     AutoContinueMask* fCurrentContinueMask = nullptr;
491     int fCurrentBreakTarget = -1;
492     int fCurrentStack = 0;
493     int fNextStackID = 0;
494     TArray<int> fRecycledStacks;
495 
496     THashMap<const FunctionDefinition*, Analysis::ReturnComplexity> fReturnComplexityMap;
497 
498     THashMap<ImmutableBits, THashSet<Slot>> fImmutableSlotMap;
499     THashSet<const Variable*> fImmutableVariables;
500 
501     // `fInsideCompoundStatement` will be nonzero if we are currently writing statements inside of a
502     // compound-statement Block. (Conceptually those statements should all count as one.)
503     int fInsideCompoundStatement = 0;
504 
505     // `fLineOffsets` contains the position of each newline in the source, plus a zero at the
506     // beginning, and the total source length at the end, as sentinels.
507     TArray<int> fLineOffsets;
508 
509     static constexpr auto kAddOps = TypedOps{BuilderOp::add_n_floats,
510                                              BuilderOp::add_n_ints,
511                                              BuilderOp::add_n_ints,
512                                              BuilderOp::unsupported};
513     static constexpr auto kSubtractOps = TypedOps{BuilderOp::sub_n_floats,
514                                                   BuilderOp::sub_n_ints,
515                                                   BuilderOp::sub_n_ints,
516                                                   BuilderOp::unsupported};
517     static constexpr auto kMultiplyOps = TypedOps{BuilderOp::mul_n_floats,
518                                                   BuilderOp::mul_n_ints,
519                                                   BuilderOp::mul_n_ints,
520                                                   BuilderOp::unsupported};
521     static constexpr auto kDivideOps = TypedOps{BuilderOp::div_n_floats,
522                                                 BuilderOp::div_n_ints,
523                                                 BuilderOp::div_n_uints,
524                                                 BuilderOp::unsupported};
525     static constexpr auto kLessThanOps = TypedOps{BuilderOp::cmplt_n_floats,
526                                                   BuilderOp::cmplt_n_ints,
527                                                   BuilderOp::cmplt_n_uints,
528                                                   BuilderOp::unsupported};
529     static constexpr auto kLessThanEqualOps = TypedOps{BuilderOp::cmple_n_floats,
530                                                        BuilderOp::cmple_n_ints,
531                                                        BuilderOp::cmple_n_uints,
532                                                        BuilderOp::unsupported};
533     static constexpr auto kEqualOps = TypedOps{BuilderOp::cmpeq_n_floats,
534                                                BuilderOp::cmpeq_n_ints,
535                                                BuilderOp::cmpeq_n_ints,
536                                                BuilderOp::cmpeq_n_ints};
537     static constexpr auto kNotEqualOps = TypedOps{BuilderOp::cmpne_n_floats,
538                                                   BuilderOp::cmpne_n_ints,
539                                                   BuilderOp::cmpne_n_ints,
540                                                   BuilderOp::cmpne_n_ints};
541     static constexpr auto kModOps = TypedOps{BuilderOp::mod_n_floats,
542                                              BuilderOp::unsupported,
543                                              BuilderOp::unsupported,
544                                              BuilderOp::unsupported};
545     static constexpr auto kMinOps = TypedOps{BuilderOp::min_n_floats,
546                                              BuilderOp::min_n_ints,
547                                              BuilderOp::min_n_uints,
548                                              BuilderOp::min_n_uints};
549     static constexpr auto kMaxOps = TypedOps{BuilderOp::max_n_floats,
550                                              BuilderOp::max_n_ints,
551                                              BuilderOp::max_n_uints,
552                                              BuilderOp::max_n_uints};
553     static constexpr auto kMixOps = TypedOps{BuilderOp::mix_n_floats,
554                                              BuilderOp::unsupported,
555                                              BuilderOp::unsupported,
556                                              BuilderOp::unsupported};
557     static constexpr auto kInverseSqrtOps = TypedOps{BuilderOp::invsqrt_float,
558                                                      BuilderOp::unsupported,
559                                                      BuilderOp::unsupported,
560                                                      BuilderOp::unsupported};
561     friend class AutoContinueMask;
562 };
563 
AutoStack(Generator * g)564 AutoStack::AutoStack(Generator* g)
565         : fGenerator(g)
566         , fStackID(g->createStack()) {}
567 
~AutoStack()568 AutoStack::~AutoStack() {
569     fGenerator->recycleStack(fStackID);
570 }
571 
enter()572 void AutoStack::enter() {
573     fParentStackID = fGenerator->currentStack();
574     fGenerator->setCurrentStack(fStackID);
575 }
576 
exit()577 void AutoStack::exit() {
578     SkASSERT(fGenerator->currentStack() == fStackID);
579     fGenerator->setCurrentStack(fParentStackID);
580 }
581 
pushClone(int slots)582 void AutoStack::pushClone(int slots) {
583     this->pushClone(SlotRange{0, slots}, /*offsetFromStackTop=*/slots);
584 }
585 
pushClone(SlotRange range,int offsetFromStackTop)586 void AutoStack::pushClone(SlotRange range, int offsetFromStackTop) {
587     fGenerator->builder()->push_clone_from_stack(range, fStackID, offsetFromStackTop);
588 }
589 
pushCloneIndirect(SlotRange range,int dynamicStackID,int offsetFromStackTop)590 void AutoStack::pushCloneIndirect(SlotRange range, int dynamicStackID, int offsetFromStackTop) {
591     fGenerator->builder()->push_clone_indirect_from_stack(
592             range, dynamicStackID, /*otherStackID=*/fStackID, offsetFromStackTop);
593 }
594 
595 class AutoContinueMask {
596 public:
AutoContinueMask(Generator * gen)597     AutoContinueMask(Generator* gen) : fGenerator(gen) {}
598 
~AutoContinueMask()599     ~AutoContinueMask() {
600         if (fPreviousContinueMask) {
601             fGenerator->fCurrentContinueMask = fPreviousContinueMask;
602         }
603     }
604 
enable()605     void enable() {
606         SkASSERT(!fContinueMaskStack.has_value());
607 
608         fContinueMaskStack.emplace(fGenerator);
609         fPreviousContinueMask = fGenerator->fCurrentContinueMask;
610         fGenerator->fCurrentContinueMask = this;
611     }
612 
enter()613     void enter() {
614         SkASSERT(fContinueMaskStack.has_value());
615         fContinueMaskStack->enter();
616     }
617 
exit()618     void exit() {
619         SkASSERT(fContinueMaskStack.has_value());
620         fContinueMaskStack->exit();
621     }
622 
enterLoopBody()623     void enterLoopBody() {
624         if (fContinueMaskStack.has_value()) {
625             fContinueMaskStack->enter();
626             fGenerator->builder()->push_constant_i(0);
627             fContinueMaskStack->exit();
628         }
629     }
630 
exitLoopBody()631     void exitLoopBody() {
632         if (fContinueMaskStack.has_value()) {
633             fContinueMaskStack->enter();
634             fGenerator->builder()->pop_and_reenable_loop_mask();
635             fContinueMaskStack->exit();
636         }
637     }
638 
stackID()639     int stackID() {
640         SkASSERT(fContinueMaskStack.has_value());
641         return fContinueMaskStack->stackID();
642     }
643 
644 private:
645     std::optional<AutoStack> fContinueMaskStack;
646     Generator* fGenerator = nullptr;
647     AutoContinueMask* fPreviousContinueMask = nullptr;
648 };
649 
650 class AutoLoopTarget {
651 public:
AutoLoopTarget(Generator * gen,int * targetPtr)652     AutoLoopTarget(Generator* gen, int* targetPtr) : fGenerator(gen), fLoopTargetPtr(targetPtr) {
653         fLabelID = fGenerator->builder()->nextLabelID();
654         fPreviousLoopTarget = *fLoopTargetPtr;
655         *fLoopTargetPtr = fLabelID;
656     }
657 
~AutoLoopTarget()658     ~AutoLoopTarget() {
659         *fLoopTargetPtr = fPreviousLoopTarget;
660     }
661 
labelID()662     int labelID() {
663         return fLabelID;
664     }
665 
666 private:
667     Generator* fGenerator = nullptr;
668     int* fLoopTargetPtr = nullptr;
669     int fPreviousLoopTarget;
670     int fLabelID;
671 };
672 
673 class LValue {
674 public:
675     virtual ~LValue() = default;
676 
677     /** Returns true if this lvalue is actually writable--temporaries and uniforms are not. */
678     virtual bool isWritable() const = 0;
679 
680     /**
681      * Returns the fixed slot range of the lvalue, after it is winnowed down to the selected
682      * field/index. The range is calculated assuming every dynamic index will evaluate to zero.
683      */
684     virtual SlotRange fixedSlotRange(Generator* gen) = 0;
685 
686     /**
687      * Returns a stack which holds a single integer, representing the dynamic offset of the lvalue.
688      * This value does not incorporate the fixed offset. If null is returned, the lvalue doesn't
689      * have a dynamic offset. `evaluateDynamicIndices` must be called before this is used.
690      */
691     virtual AutoStack* dynamicSlotRange() = 0;
692 
693     /** Returns the swizzle components of the lvalue, or an empty span for non-swizzle LValues. */
swizzle()694     virtual SkSpan<const int8_t> swizzle() { return {}; }
695 
696     /** Pushes values directly onto the stack. */
697     [[nodiscard]] virtual bool push(Generator* gen,
698                                     SlotRange fixedOffset,
699                                     AutoStack* dynamicOffset,
700                                     SkSpan<const int8_t> swizzle) = 0;
701 
702     /** Stores topmost values from the stack directly into the lvalue. */
703     [[nodiscard]] virtual bool store(Generator* gen,
704                                      SlotRange fixedOffset,
705                                      AutoStack* dynamicOffset,
706                                      SkSpan<const int8_t> swizzle) = 0;
707     /**
708      * Some lvalues refer to a temporary expression; these temps can be held in the
709      * scratch-expression field to ensure that they exist for the lifetime of the lvalue.
710      */
711     std::unique_ptr<Expression> fScratchExpression;
712 };
713 
714 class ScratchLValue final : public LValue {
715 public:
ScratchLValue(const Expression & e)716     explicit ScratchLValue(const Expression& e)
717             : fExpression(&e)
718             , fNumSlots(e.type().slotCount()) {}
719 
~ScratchLValue()720     ~ScratchLValue() override {
721         if (fGenerator && fDedicatedStack.has_value()) {
722             // Jettison the scratch expression.
723             fDedicatedStack->enter();
724             fGenerator->discardExpression(fNumSlots);
725             fDedicatedStack->exit();
726         }
727     }
728 
isWritable() const729     bool isWritable() const override {
730         return false;
731     }
732 
fixedSlotRange(Generator * gen)733     SlotRange fixedSlotRange(Generator* gen) override {
734         return SlotRange{0, fNumSlots};
735     }
736 
dynamicSlotRange()737     AutoStack* dynamicSlotRange() override {
738         return nullptr;
739     }
740 
push(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)741     [[nodiscard]] bool push(Generator* gen,
742                             SlotRange fixedOffset,
743                             AutoStack* dynamicOffset,
744                             SkSpan<const int8_t> swizzle) override {
745         if (!fDedicatedStack.has_value()) {
746             // Push the scratch expression onto a dedicated stack.
747             fGenerator = gen;
748             fDedicatedStack.emplace(fGenerator);
749             fDedicatedStack->enter();
750             if (!fGenerator->pushExpression(*fExpression)) {
751                 return unsupported();
752             }
753             fDedicatedStack->exit();
754         }
755 
756         if (dynamicOffset) {
757             fDedicatedStack->pushCloneIndirect(fixedOffset, dynamicOffset->stackID(), fNumSlots);
758         } else {
759             fDedicatedStack->pushClone(fixedOffset, fNumSlots);
760         }
761         if (!swizzle.empty()) {
762             gen->builder()->swizzle(fixedOffset.count, swizzle);
763         }
764         return true;
765     }
766 
store(Generator *,SlotRange,AutoStack *,SkSpan<const int8_t>)767     [[nodiscard]] bool store(Generator*, SlotRange, AutoStack*, SkSpan<const int8_t>) override {
768         SkDEBUGFAIL("scratch lvalues cannot be stored into");
769         return unsupported();
770     }
771 
772 private:
773     Generator* fGenerator = nullptr;
774     const Expression* fExpression = nullptr;
775     std::optional<AutoStack> fDedicatedStack;
776     int fNumSlots = 0;
777 };
778 
779 class VariableLValue final : public LValue {
780 public:
VariableLValue(const Variable * v)781     explicit VariableLValue(const Variable* v) : fVariable(v) {}
782 
isWritable() const783     bool isWritable() const override {
784         return !Generator::IsUniform(*fVariable);
785     }
786 
fixedSlotRange(Generator * gen)787     SlotRange fixedSlotRange(Generator* gen) override {
788         return Generator::IsUniform(*fVariable) ? gen->getUniformSlots(*fVariable)
789                                                 : gen->getVariableSlots(*fVariable);
790     }
791 
dynamicSlotRange()792     AutoStack* dynamicSlotRange() override {
793         return nullptr;
794     }
795 
push(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)796     [[nodiscard]] bool push(Generator* gen,
797                             SlotRange fixedOffset,
798                             AutoStack* dynamicOffset,
799                             SkSpan<const int8_t> swizzle) override {
800         if (Generator::IsUniform(*fVariable)) {
801             if (dynamicOffset) {
802                 gen->builder()->push_uniform_indirect(fixedOffset, dynamicOffset->stackID(),
803                                                       this->fixedSlotRange(gen));
804             } else {
805                 gen->builder()->push_uniform(fixedOffset);
806             }
807         } else {
808             if (dynamicOffset) {
809                 gen->builder()->push_slots_indirect(fixedOffset, dynamicOffset->stackID(),
810                                                     this->fixedSlotRange(gen));
811             } else {
812                 gen->builder()->push_slots(fixedOffset);
813             }
814         }
815         if (!swizzle.empty()) {
816             gen->builder()->swizzle(fixedOffset.count, swizzle);
817         }
818         return true;
819     }
820 
store(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)821     [[nodiscard]] bool store(Generator* gen,
822                              SlotRange fixedOffset,
823                              AutoStack* dynamicOffset,
824                              SkSpan<const int8_t> swizzle) override {
825         SkASSERT(!Generator::IsUniform(*fVariable));
826 
827         if (swizzle.empty()) {
828             if (dynamicOffset) {
829                 gen->builder()->copy_stack_to_slots_indirect(fixedOffset, dynamicOffset->stackID(),
830                                                              this->fixedSlotRange(gen));
831             } else {
832                 gen->builder()->copy_stack_to_slots(fixedOffset);
833             }
834         } else {
835             if (dynamicOffset) {
836                 gen->builder()->swizzle_copy_stack_to_slots_indirect(fixedOffset,
837                                                                      dynamicOffset->stackID(),
838                                                                      this->fixedSlotRange(gen),
839                                                                      swizzle,
840                                                                      swizzle.size());
841             } else {
842                 gen->builder()->swizzle_copy_stack_to_slots(fixedOffset, swizzle, swizzle.size());
843             }
844         }
845         if (gen->shouldWriteTraceOps()) {
846             if (dynamicOffset) {
847                 gen->builder()->trace_var_indirect(gen->traceMaskStackID(),
848                                                    fixedOffset,
849                                                    dynamicOffset->stackID(),
850                                                    this->fixedSlotRange(gen));
851             } else {
852                 gen->builder()->trace_var(gen->traceMaskStackID(), fixedOffset);
853             }
854         }
855         return true;
856     }
857 
858 private:
859     const Variable* fVariable;
860 };
861 
862 class ImmutableLValue final : public LValue {
863 public:
ImmutableLValue(const Variable * v)864     explicit ImmutableLValue(const Variable* v) : fVariable(v) {}
865 
isWritable() const866     bool isWritable() const override {
867         return false;
868     }
869 
fixedSlotRange(Generator * gen)870     SlotRange fixedSlotRange(Generator* gen) override {
871         return gen->getImmutableSlots(*fVariable);
872     }
873 
dynamicSlotRange()874     AutoStack* dynamicSlotRange() override {
875         return nullptr;
876     }
877 
push(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)878     [[nodiscard]] bool push(Generator* gen,
879                             SlotRange fixedOffset,
880                             AutoStack* dynamicOffset,
881                             SkSpan<const int8_t> swizzle) override {
882         if (dynamicOffset) {
883             gen->builder()->push_immutable_indirect(fixedOffset, dynamicOffset->stackID(),
884                                                     this->fixedSlotRange(gen));
885         } else {
886             gen->builder()->push_immutable(fixedOffset);
887         }
888         if (!swizzle.empty()) {
889             gen->builder()->swizzle(fixedOffset.count, swizzle);
890         }
891         return true;
892     }
893 
store(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)894     [[nodiscard]] bool store(Generator* gen,
895                              SlotRange fixedOffset,
896                              AutoStack* dynamicOffset,
897                              SkSpan<const int8_t> swizzle) override {
898         SkDEBUGFAIL("immutable values cannot be stored into");
899         return unsupported();
900     }
901 
902 private:
903     const Variable* fVariable;
904 };
905 
906 class SwizzleLValue final : public LValue {
907 public:
SwizzleLValue(std::unique_ptr<LValue> p,const ComponentArray & c)908     explicit SwizzleLValue(std::unique_ptr<LValue> p, const ComponentArray& c)
909             : fParent(std::move(p))
910             , fComponents(c) {
911         SkASSERT(!fComponents.empty() && fComponents.size() <= 4);
912     }
913 
isWritable() const914     bool isWritable() const override {
915         return fParent->isWritable();
916     }
917 
fixedSlotRange(Generator * gen)918     SlotRange fixedSlotRange(Generator* gen) override {
919         return fParent->fixedSlotRange(gen);
920     }
921 
dynamicSlotRange()922     AutoStack* dynamicSlotRange() override {
923         return fParent->dynamicSlotRange();
924     }
925 
swizzle()926     SkSpan<const int8_t> swizzle() override {
927         return fComponents;
928     }
929 
push(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)930     [[nodiscard]] bool push(Generator* gen,
931                             SlotRange fixedOffset,
932                             AutoStack* dynamicOffset,
933                             SkSpan<const int8_t> swizzle) override {
934         if (!swizzle.empty()) {
935             SkDEBUGFAIL("swizzle-of-a-swizzle should have been folded out in front end");
936             return unsupported();
937         }
938         return fParent->push(gen, fixedOffset, dynamicOffset, fComponents);
939     }
940 
store(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)941     [[nodiscard]] bool store(Generator* gen,
942                              SlotRange fixedOffset,
943                              AutoStack* dynamicOffset,
944                              SkSpan<const int8_t> swizzle) override {
945         if (!swizzle.empty()) {
946             SkDEBUGFAIL("swizzle-of-a-swizzle should have been folded out in front end");
947             return unsupported();
948         }
949         return fParent->store(gen, fixedOffset, dynamicOffset, fComponents);
950     }
951 
952 private:
953     std::unique_ptr<LValue> fParent;
954     const ComponentArray& fComponents;
955 };
956 
957 class UnownedLValueSlice : public LValue {
958 public:
UnownedLValueSlice(LValue * p,int initialSlot,int numSlots)959     explicit UnownedLValueSlice(LValue* p, int initialSlot, int numSlots)
960             : fParent(p)
961             , fInitialSlot(initialSlot)
962             , fNumSlots(numSlots) {
963         SkASSERT(fInitialSlot >= 0);
964         SkASSERT(fNumSlots > 0);
965     }
966 
isWritable() const967     bool isWritable() const override {
968         return fParent->isWritable();
969     }
970 
fixedSlotRange(Generator * gen)971     SlotRange fixedSlotRange(Generator* gen) override {
972         SlotRange range = fParent->fixedSlotRange(gen);
973         SlotRange adjusted = range;
974         adjusted.index += fInitialSlot;
975         adjusted.count = fNumSlots;
976         SkASSERT((adjusted.index + adjusted.count) <= (range.index + range.count));
977         return adjusted;
978     }
979 
dynamicSlotRange()980     AutoStack* dynamicSlotRange() override {
981         return fParent->dynamicSlotRange();
982     }
983 
push(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)984     [[nodiscard]] bool push(Generator* gen,
985                             SlotRange fixedOffset,
986                             AutoStack* dynamicOffset,
987                             SkSpan<const int8_t> swizzle) override {
988         return fParent->push(gen, fixedOffset, dynamicOffset, swizzle);
989     }
990 
store(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)991     [[nodiscard]] bool store(Generator* gen,
992                              SlotRange fixedOffset,
993                              AutoStack* dynamicOffset,
994                              SkSpan<const int8_t> swizzle) override {
995         return fParent->store(gen, fixedOffset, dynamicOffset, swizzle);
996     }
997 
998 protected:
999     LValue* fParent;
1000 
1001 private:
1002     int fInitialSlot = 0;
1003     int fNumSlots = 0;
1004 };
1005 
1006 class LValueSlice final : public UnownedLValueSlice {
1007 public:
LValueSlice(std::unique_ptr<LValue> p,int initialSlot,int numSlots)1008     explicit LValueSlice(std::unique_ptr<LValue> p, int initialSlot, int numSlots)
1009             : UnownedLValueSlice(p.release(), initialSlot, numSlots) {}
1010 
~LValueSlice()1011     ~LValueSlice() override {
1012         delete fParent;
1013     }
1014 };
1015 
1016 class DynamicIndexLValue final : public LValue {
1017 public:
DynamicIndexLValue(std::unique_ptr<LValue> p,const IndexExpression & i)1018     explicit DynamicIndexLValue(std::unique_ptr<LValue> p, const IndexExpression& i)
1019             : fParent(std::move(p))
1020             , fIndexExpr(&i) {
1021         SkASSERT(fIndexExpr->index()->type().isInteger());
1022     }
1023 
~DynamicIndexLValue()1024     ~DynamicIndexLValue() override {
1025         if (fDedicatedStack.has_value()) {
1026             SkASSERT(fGenerator);
1027 
1028             // Jettison the index expression.
1029             fDedicatedStack->enter();
1030             fGenerator->discardExpression(/*slots=*/1);
1031             fDedicatedStack->exit();
1032         }
1033     }
1034 
isWritable() const1035     bool isWritable() const override {
1036         return fParent->isWritable();
1037     }
1038 
evaluateDynamicIndices(Generator * gen)1039     [[nodiscard]] bool evaluateDynamicIndices(Generator* gen) {
1040         // The index must only be computed once; the index-expression could have side effects.
1041         // Once it has been computed, the offset lives on `fDedicatedStack`.
1042         SkASSERT(!fDedicatedStack.has_value());
1043         SkASSERT(!fGenerator);
1044         fGenerator = gen;
1045         fDedicatedStack.emplace(fGenerator);
1046 
1047         if (!fParent->swizzle().empty()) {
1048             SkDEBUGFAIL("an indexed-swizzle should have been handled by RewriteIndexedSwizzle");
1049             return unsupported();
1050         }
1051 
1052         // Push the index expression onto the dedicated stack.
1053         fDedicatedStack->enter();
1054         if (!fGenerator->pushExpression(*fIndexExpr->index())) {
1055             return unsupported();
1056         }
1057 
1058         // Multiply the index-expression result by the per-value slot count.
1059         int slotCount = fIndexExpr->type().slotCount();
1060         if (slotCount != 1) {
1061             fGenerator->builder()->push_constant_i(fIndexExpr->type().slotCount());
1062             fGenerator->builder()->binary_op(BuilderOp::mul_n_ints, 1);
1063         }
1064 
1065         // Check to see if a parent LValue already has a dynamic index. If so, we need to
1066         // incorporate its value into our own.
1067         if (AutoStack* parentDynamicIndexStack = fParent->dynamicSlotRange()) {
1068             parentDynamicIndexStack->pushClone(/*slots=*/1);
1069             fGenerator->builder()->binary_op(BuilderOp::add_n_ints, 1);
1070         }
1071         fDedicatedStack->exit();
1072         return true;
1073     }
1074 
fixedSlotRange(Generator * gen)1075     SlotRange fixedSlotRange(Generator* gen) override {
1076         // Compute the fixed slot range as if we are indexing into position zero.
1077         SlotRange range = fParent->fixedSlotRange(gen);
1078         range.count = fIndexExpr->type().slotCount();
1079         return range;
1080     }
1081 
dynamicSlotRange()1082     AutoStack* dynamicSlotRange() override {
1083         // We incorporated any parent dynamic offsets when `evaluateDynamicIndices` was called.
1084         SkASSERT(fDedicatedStack.has_value());
1085         return &*fDedicatedStack;
1086     }
1087 
push(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)1088     [[nodiscard]] bool push(Generator* gen,
1089                             SlotRange fixedOffset,
1090                             AutoStack* dynamicOffset,
1091                             SkSpan<const int8_t> swizzle) override {
1092         return fParent->push(gen, fixedOffset, dynamicOffset, swizzle);
1093     }
1094 
store(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)1095     [[nodiscard]] bool store(Generator* gen,
1096                              SlotRange fixedOffset,
1097                              AutoStack* dynamicOffset,
1098                              SkSpan<const int8_t> swizzle) override {
1099         return fParent->store(gen, fixedOffset, dynamicOffset, swizzle);
1100     }
1101 
1102 private:
1103     Generator* fGenerator = nullptr;
1104     std::unique_ptr<LValue> fParent;
1105     std::optional<AutoStack> fDedicatedStack;
1106     const IndexExpression* fIndexExpr = nullptr;
1107 };
1108 
addSlotDebugInfoForGroup(const std::string & varName,const Type & type,Position pos,int * groupIndex,bool isFunctionReturnValue)1109 void SlotManager::addSlotDebugInfoForGroup(const std::string& varName,
1110                                            const Type& type,
1111                                            Position pos,
1112                                            int* groupIndex,
1113                                            bool isFunctionReturnValue) {
1114     SkASSERT(fSlotDebugInfo);
1115     switch (type.typeKind()) {
1116         case Type::TypeKind::kArray: {
1117             int nslots = type.columns();
1118             const Type& elemType = type.componentType();
1119             for (int slot = 0; slot < nslots; ++slot) {
1120                 this->addSlotDebugInfoForGroup(varName + "[" + std::to_string(slot) + "]", elemType,
1121                                                pos, groupIndex, isFunctionReturnValue);
1122             }
1123             break;
1124         }
1125         case Type::TypeKind::kStruct: {
1126             for (const Field& field : type.fields()) {
1127                 this->addSlotDebugInfoForGroup(varName + "." + std::string(field.fName),
1128                                                *field.fType, pos, groupIndex,
1129                                                isFunctionReturnValue);
1130             }
1131             break;
1132         }
1133         default:
1134             SkASSERTF(0, "unsupported slot type %d", (int)type.typeKind());
1135             [[fallthrough]];
1136 
1137         case Type::TypeKind::kScalar:
1138         case Type::TypeKind::kVector:
1139         case Type::TypeKind::kMatrix: {
1140             Type::NumberKind numberKind = type.componentType().numberKind();
1141             int nslots = type.slotCount();
1142 
1143             for (int slot = 0; slot < nslots; ++slot) {
1144                 SlotDebugInfo slotInfo;
1145                 slotInfo.name = varName;
1146                 slotInfo.columns = type.columns();
1147                 slotInfo.rows = type.rows();
1148                 slotInfo.componentIndex = slot;
1149                 slotInfo.groupIndex = (*groupIndex)++;
1150                 slotInfo.numberKind = numberKind;
1151                 slotInfo.pos = pos;
1152                 slotInfo.fnReturnValue = isFunctionReturnValue ? 1 : -1;
1153                 fSlotDebugInfo->push_back(std::move(slotInfo));
1154             }
1155             break;
1156         }
1157     }
1158 }
1159 
addSlotDebugInfo(const std::string & varName,const Type & type,Position pos,bool isFunctionReturnValue)1160 void SlotManager::addSlotDebugInfo(const std::string& varName,
1161                                    const Type& type,
1162                                    Position pos,
1163                                    bool isFunctionReturnValue) {
1164     int groupIndex = 0;
1165     this->addSlotDebugInfoForGroup(varName, type, pos, &groupIndex, isFunctionReturnValue);
1166     SkASSERT((size_t)groupIndex == type.slotCount());
1167 }
1168 
createSlots(std::string name,const Type & type,Position pos,bool isFunctionReturnValue)1169 SlotRange SlotManager::createSlots(std::string name,
1170                                    const Type& type,
1171                                    Position pos,
1172                                    bool isFunctionReturnValue) {
1173     size_t nslots = type.slotCount();
1174     if (nslots == 0) {
1175         return {};
1176     }
1177     if (fSlotDebugInfo) {
1178         // Our debug slot-info table should have the same length as the actual slot table.
1179         SkASSERT(fSlotDebugInfo->size() == (size_t)fSlotCount);
1180 
1181         // Append slot names and types to our debug slot-info table.
1182         fSlotDebugInfo->reserve(fSlotCount + nslots);
1183         this->addSlotDebugInfo(name, type, pos, isFunctionReturnValue);
1184 
1185         // Confirm that we added the expected number of slots.
1186         SkASSERT(fSlotDebugInfo->size() == (size_t)(fSlotCount + nslots));
1187     }
1188 
1189     SlotRange result = {fSlotCount, (int)nslots};
1190     fSlotCount += nslots;
1191     return result;
1192 }
1193 
mapVariableToSlots(const Variable & v,SlotRange range)1194 std::optional<SlotRange> SlotManager::mapVariableToSlots(const Variable& v, SlotRange range) {
1195     SkASSERT(v.type().slotCount() == SkToSizeT(range.count));
1196     const SlotRange* existingEntry = fSlotMap.find(&v);
1197     std::optional<SlotRange> originalRange = existingEntry ? std::optional(*existingEntry)
1198                                                            : std::nullopt;
1199     fSlotMap.set(&v, range);
1200     return originalRange;
1201 }
1202 
unmapVariableSlots(const Variable & v)1203 void SlotManager::unmapVariableSlots(const Variable& v) {
1204     fSlotMap.remove(&v);
1205 }
1206 
getVariableSlots(const Variable & v)1207 SlotRange SlotManager::getVariableSlots(const Variable& v) {
1208     SlotRange* entry = fSlotMap.find(&v);
1209     if (entry != nullptr) {
1210         return *entry;
1211     }
1212     SlotRange range = this->createSlots(std::string(v.name()),
1213                                         v.type(),
1214                                         v.fPosition,
1215                                         /*isFunctionReturnValue=*/false);
1216     this->mapVariableToSlots(v, range);
1217     return range;
1218 }
1219 
getFunctionSlots(const IRNode & callSite,const FunctionDeclaration & f)1220 SlotRange SlotManager::getFunctionSlots(const IRNode& callSite, const FunctionDeclaration& f) {
1221     SlotRange* entry = fSlotMap.find(&callSite);
1222     if (entry != nullptr) {
1223         return *entry;
1224     }
1225     SlotRange range = this->createSlots("[" + std::string(f.name()) + "].result",
1226                                         f.returnType(),
1227                                         f.fPosition,
1228                                         /*isFunctionReturnValue=*/true);
1229     fSlotMap.set(&callSite, range);
1230     return range;
1231 }
1232 
is_sliceable_swizzle(SkSpan<const int8_t> components)1233 static bool is_sliceable_swizzle(SkSpan<const int8_t> components) {
1234     // Determine if the swizzle rearranges its elements, or if it's a simple subset of its elements.
1235     // (A simple subset would be a sequential non-repeating range of components, like `.xyz` or
1236     // `.yzw` or `.z`, but not `.xx` or `.xz`, which can be accessed as a slice of the variable.)
1237     for (size_t index = 1; index < components.size(); ++index) {
1238         if (components[index] != int8_t(components[0] + index)) {
1239             return false;
1240         }
1241     }
1242     return true;
1243 }
1244 
makeLValue(const Expression & e,bool allowScratch)1245 std::unique_ptr<LValue> Generator::makeLValue(const Expression& e, bool allowScratch) {
1246     if (e.is<VariableReference>()) {
1247         const Variable* variable = e.as<VariableReference>().variable();
1248         if (fImmutableVariables.contains(variable)) {
1249             return std::make_unique<ImmutableLValue>(variable);
1250         }
1251         return std::make_unique<VariableLValue>(variable);
1252     }
1253     if (e.is<Swizzle>()) {
1254         const Swizzle& swizzleExpr = e.as<Swizzle>();
1255         if (std::unique_ptr<LValue> base = this->makeLValue(*swizzleExpr.base(),
1256                                                             allowScratch)) {
1257             const ComponentArray& components = swizzleExpr.components();
1258             if (is_sliceable_swizzle(components)) {
1259                 // If the swizzle is a contiguous subset, we can represent it with a fixed slice.
1260                 return std::make_unique<LValueSlice>(std::move(base), components[0],
1261                                                      components.size());
1262             }
1263             return std::make_unique<SwizzleLValue>(std::move(base), components);
1264         }
1265         return nullptr;
1266     }
1267     if (e.is<FieldAccess>()) {
1268         const FieldAccess& fieldExpr = e.as<FieldAccess>();
1269         if (std::unique_ptr<LValue> base = this->makeLValue(*fieldExpr.base(),
1270                                                             allowScratch)) {
1271             // Represent field access with a slice.
1272             return std::make_unique<LValueSlice>(std::move(base), fieldExpr.initialSlot(),
1273                                                  fieldExpr.type().slotCount());
1274         }
1275         return nullptr;
1276     }
1277     if (e.is<IndexExpression>()) {
1278         const IndexExpression& indexExpr = e.as<IndexExpression>();
1279 
1280         // If the index base is swizzled (`vec.zyx[idx]`), rewrite it into an equivalent
1281         // non-swizzled form (`vec[uint3(2,1,0)[idx]]`).
1282         if (std::unique_ptr<Expression> rewritten = Transform::RewriteIndexedSwizzle(fContext,
1283                                                                                      indexExpr)) {
1284             // Convert the rewritten expression into an lvalue.
1285             std::unique_ptr<LValue> lvalue = this->makeLValue(*rewritten, allowScratch);
1286             if (!lvalue) {
1287                 return nullptr;
1288             }
1289             // We need to hold onto the rewritten expression for the lifetime of the lvalue.
1290             lvalue->fScratchExpression = std::move(rewritten);
1291             return lvalue;
1292         }
1293         if (std::unique_ptr<LValue> base = this->makeLValue(*indexExpr.base(),
1294                                                             allowScratch)) {
1295             // If the index is a compile-time constant, we can represent it with a fixed slice.
1296             SKSL_INT indexValue;
1297             if (ConstantFolder::GetConstantInt(*indexExpr.index(), &indexValue)) {
1298                 int numSlots = indexExpr.type().slotCount();
1299                 return std::make_unique<LValueSlice>(std::move(base), numSlots * indexValue,
1300                                                      numSlots);
1301             }
1302 
1303             // Represent non-constant indexing via a dynamic index.
1304             auto dynLValue = std::make_unique<DynamicIndexLValue>(std::move(base), indexExpr);
1305             return dynLValue->evaluateDynamicIndices(this) ? std::move(dynLValue)
1306                                                            : nullptr;
1307         }
1308         return nullptr;
1309     }
1310     if (allowScratch) {
1311         // This path allows us to perform field- and index-accesses on an expression as if it were
1312         // an lvalue, but is a temporary and shouldn't be written back to.
1313         return std::make_unique<ScratchLValue>(e);
1314     }
1315     return nullptr;
1316 }
1317 
push(LValue & lvalue)1318 bool Generator::push(LValue& lvalue) {
1319     return lvalue.push(this,
1320                        lvalue.fixedSlotRange(this),
1321                        lvalue.dynamicSlotRange(),
1322                        /*swizzle=*/{});
1323 }
1324 
store(LValue & lvalue)1325 bool Generator::store(LValue& lvalue) {
1326     SkASSERT(lvalue.isWritable());
1327     return lvalue.store(this,
1328                         lvalue.fixedSlotRange(this),
1329                         lvalue.dynamicSlotRange(),
1330                         /*swizzle=*/{});
1331 }
1332 
getFunctionDebugInfo(const FunctionDeclaration & decl)1333 int Generator::getFunctionDebugInfo(const FunctionDeclaration& decl) {
1334     SkASSERT(fDebugTrace);
1335 
1336     std::string name = decl.description();
1337 
1338     // When generating the debug trace, we typically mark every function as `noinline`. This makes
1339     // the trace more confusing, since this isn't in the source program, so remove it.
1340     static constexpr std::string_view kNoInline = "noinline ";
1341     if (skstd::starts_with(name, kNoInline)) {
1342         name = name.substr(kNoInline.size());
1343     }
1344 
1345     // Look for a matching FunctionDebugInfo slot.
1346     for (size_t index = 0; index < fDebugTrace->fFuncInfo.size(); ++index) {
1347         if (fDebugTrace->fFuncInfo[index].name == name) {
1348             return index;
1349         }
1350     }
1351 
1352     // We've never called this function before; create a new slot to hold its information.
1353     int slot = (int)fDebugTrace->fFuncInfo.size();
1354     fDebugTrace->fFuncInfo.push_back(FunctionDebugInfo{std::move(name)});
1355     return slot;
1356 }
1357 
createStack()1358 int Generator::createStack() {
1359     if (!fRecycledStacks.empty()) {
1360         int stackID = fRecycledStacks.back();
1361         fRecycledStacks.pop_back();
1362         return stackID;
1363     }
1364     return ++fNextStackID;
1365 }
1366 
recycleStack(int stackID)1367 void Generator::recycleStack(int stackID) {
1368     fRecycledStacks.push_back(stackID);
1369 }
1370 
setCurrentStack(int stackID)1371 void Generator::setCurrentStack(int stackID) {
1372     if (fCurrentStack != stackID) {
1373         fCurrentStack = stackID;
1374         fBuilder.set_current_stack(stackID);
1375     }
1376 }
1377 
writeFunction(const IRNode & callSite,const FunctionDefinition & function,SkSpan<std::unique_ptr<Expression> const> arguments)1378 std::optional<SlotRange> Generator::writeFunction(
1379         const IRNode& callSite,
1380         const FunctionDefinition& function,
1381         SkSpan<std::unique_ptr<Expression> const> arguments) {
1382     // Generate debug information and emit a trace-enter op.
1383     int funcIndex = -1;
1384     if (fDebugTrace) {
1385         funcIndex = this->getFunctionDebugInfo(function.declaration());
1386         SkASSERT(funcIndex >= 0);
1387         if (this->shouldWriteTraceOps()) {
1388             fBuilder.trace_enter(fTraceMask->stackID(), funcIndex);
1389         }
1390     }
1391 
1392     // Handle parameter lvalues.
1393     struct RemappedSlotRange {
1394         const Variable* fVariable;
1395         std::optional<SlotRange> fSlotRange;
1396     };
1397     SkSpan<Variable* const> parameters = function.declaration().parameters();
1398     TArray<std::unique_ptr<LValue>> lvalues;
1399     TArray<RemappedSlotRange> remappedSlotRanges;
1400 
1401     if (function.declaration().isMain()) {
1402         // For main(), the parameter slots have already been populated by `writeProgram`, but we
1403         // still need to explicitly emit trace ops for the variables in main(), since they are
1404         // initialized before it is safe to use trace-var. (We can't invoke init-lane-masks until
1405         // after we've copied the inputs from main into slots, because dst.rgba is used to pass in a
1406         // blend-destination color, but we clobber it and put in the execution mask instead.)
1407         if (this->shouldWriteTraceOps()) {
1408             for (const Variable* var : parameters) {
1409                 fBuilder.trace_var(fTraceMask->stackID(), this->getVariableSlots(*var));
1410             }
1411         }
1412     } else {
1413         // Write all the arguments into their parameter's variable slots. Because we never allow
1414         // recursion, we don't need to worry about overwriting any existing values in those slots.
1415         // (In fact, we don't even need to apply the write mask.)
1416         lvalues.resize(arguments.size());
1417 
1418         for (size_t index = 0; index < arguments.size(); ++index) {
1419             const Expression& arg = *arguments[index];
1420             const Variable& param = *parameters[index];
1421 
1422             // If we are passing a child effect to a function, we need to add its mapping to our
1423             // child map.
1424             if (arg.type().isEffectChild()) {
1425                 if (int* childIndex = fChildEffectMap.find(arg.as<VariableReference>()
1426                                                               .variable())) {
1427                     SkASSERT(!fChildEffectMap.find(&param));
1428                     fChildEffectMap[&param] = *childIndex;
1429                 }
1430                 continue;
1431             }
1432 
1433             // Use LValues for out-parameters and inout-parameters, so we can store back to them
1434             // later.
1435             if (IsInoutParameter(param) || IsOutParameter(param)) {
1436                 lvalues[index] = this->makeLValue(arg);
1437                 if (!lvalues[index]) {
1438                     return std::nullopt;
1439                 }
1440                 // There are no guarantees on the starting value of an out-parameter, so we only
1441                 // need to store the lvalues associated with an inout parameter.
1442                 if (IsInoutParameter(param)) {
1443                     if (!this->push(*lvalues[index])) {
1444                         return std::nullopt;
1445                     }
1446                     this->popToSlotRangeUnmasked(this->getVariableSlots(param));
1447                 }
1448                 continue;
1449             }
1450 
1451             // If a parameter is never read by the function, we don't need to populate its slots.
1452             ProgramUsage::VariableCounts paramCounts = fProgram.fUsage->get(param);
1453             if (paramCounts.fRead == 0) {
1454                 // Honor the expression's side effects, if any.
1455                 if (Analysis::HasSideEffects(arg)) {
1456                     if (!this->pushExpression(arg, /*usesResult=*/false)) {
1457                         return std::nullopt;
1458                     }
1459                     this->discardExpression(arg.type().slotCount());
1460                 }
1461                 continue;
1462             }
1463 
1464             // If the expression is a plain variable and the parameter is never written to, we don't
1465             // need to copy it; we can just share the slots from the existing variable.
1466             if (paramCounts.fWrite == 0 && arg.is<VariableReference>()) {
1467                 const Variable& var = *arg.as<VariableReference>().variable();
1468                 if (this->hasVariableSlots(var)) {
1469                     std::optional<SlotRange> originalRange =
1470                             fProgramSlots.mapVariableToSlots(param, this->getVariableSlots(var));
1471                     remappedSlotRanges.push_back({&param, originalRange});
1472                     continue;
1473                 }
1474             }
1475 
1476             // Copy input arguments into their respective parameter slots.
1477             if (!this->pushExpression(arg)) {
1478                 return std::nullopt;
1479             }
1480             this->popToSlotRangeUnmasked(this->getVariableSlots(param));
1481         }
1482     }
1483 
1484     // Set up a slot range dedicated to this function's return value.
1485     SlotRange lastFunctionResult = fCurrentFunctionResult;
1486     fCurrentFunctionResult = this->getFunctionSlots(callSite, function.declaration());
1487 
1488     // Save off the return mask.
1489     if (this->needsReturnMask(&function)) {
1490         fBuilder.enableExecutionMaskWrites();
1491         if (!function.declaration().isMain()) {
1492             fBuilder.push_return_mask();
1493         }
1494     }
1495 
1496     // Emit the function body.
1497     if (!this->writeStatement(*function.body())) {
1498         return std::nullopt;
1499     }
1500 
1501     // Restore the original return mask.
1502     if (this->needsReturnMask(&function)) {
1503         if (!function.declaration().isMain()) {
1504             fBuilder.pop_return_mask();
1505         }
1506         fBuilder.disableExecutionMaskWrites();
1507     }
1508 
1509     // Restore the function-result slot range.
1510     SlotRange functionResult = fCurrentFunctionResult;
1511     fCurrentFunctionResult = lastFunctionResult;
1512 
1513     // Emit a trace-exit op.
1514     if (fDebugTrace && fWriteTraceOps) {
1515         fBuilder.trace_exit(fTraceMask->stackID(), funcIndex);
1516     }
1517 
1518     // Copy out-parameters and inout-parameters back to their homes.
1519     for (int index = 0; index < lvalues.size(); ++index) {
1520         if (lvalues[index]) {
1521             // Only out- and inout-parameters should have an associated lvalue.
1522             const Variable& param = *parameters[index];
1523             SkASSERT(IsInoutParameter(param) || IsOutParameter(param));
1524 
1525             // Copy the parameter's slots directly into the lvalue.
1526             fBuilder.push_slots(this->getVariableSlots(param));
1527             if (!this->store(*lvalues[index])) {
1528                 return std::nullopt;
1529             }
1530             this->discardExpression(param.type().slotCount());
1531         }
1532     }
1533 
1534     // Restore any remapped parameter slot ranges to their original values.
1535     for (const RemappedSlotRange& remapped : remappedSlotRanges) {
1536         if (remapped.fSlotRange.has_value()) {
1537             fProgramSlots.mapVariableToSlots(*remapped.fVariable, *remapped.fSlotRange);
1538         } else {
1539             fProgramSlots.unmapVariableSlots(*remapped.fVariable);
1540         }
1541     }
1542 
1543     // Remove any child-effect mappings that were made for this call.
1544     for (size_t index = 0; index < arguments.size(); ++index) {
1545         const Expression& arg = *arguments[index];
1546         if (arg.type().isEffectChild()) {
1547             fChildEffectMap.remove(parameters[index]);
1548         }
1549     }
1550 
1551     return functionResult;
1552 }
1553 
emitTraceLine(Position pos)1554 void Generator::emitTraceLine(Position pos) {
1555     if (fDebugTrace && fWriteTraceOps && pos.valid() && fInsideCompoundStatement == 0) {
1556         // Binary search within fLineOffets to convert the position into a line number.
1557         SkASSERT(fLineOffsets.size() >= 2);
1558         SkASSERT(fLineOffsets[0] == 0);
1559         SkASSERT(fLineOffsets.back() == (int)fProgram.fSource->length());
1560         int lineNumber = std::distance(
1561                 fLineOffsets.begin(),
1562                 std::upper_bound(fLineOffsets.begin(), fLineOffsets.end(), pos.startOffset()));
1563 
1564         fBuilder.trace_line(fTraceMask->stackID(), lineNumber);
1565     }
1566 }
1567 
pushTraceScopeMask()1568 void Generator::pushTraceScopeMask() {
1569     if (this->shouldWriteTraceOps()) {
1570         // Take the intersection of the trace mask and the execution mask. To do this, start with an
1571         // all-zero mask, then use select to overwrite those zeros with the trace mask across all
1572         // executing lanes. We'll get the trace mask in executing lanes, and zero in dead lanes.
1573         fBuilder.push_constant_i(0);
1574         fTraceMask->pushClone(/*slots=*/1);
1575         fBuilder.select(/*slots=*/1);
1576     }
1577 }
1578 
discardTraceScopeMask()1579 void Generator::discardTraceScopeMask() {
1580     if (this->shouldWriteTraceOps()) {
1581         this->discardExpression(/*slots=*/1);
1582     }
1583 }
1584 
emitTraceScope(int delta)1585 void Generator::emitTraceScope(int delta) {
1586     if (this->shouldWriteTraceOps()) {
1587         fBuilder.trace_scope(this->currentStack(), delta);
1588     }
1589 }
1590 
calculateLineOffsets()1591 void Generator::calculateLineOffsets() {
1592     SkASSERT(fLineOffsets.empty());
1593     fLineOffsets.push_back(0);
1594     for (size_t i = 0; i < fProgram.fSource->length(); ++i) {
1595         if ((*fProgram.fSource)[i] == '\n') {
1596             fLineOffsets.push_back(i);
1597         }
1598     }
1599     fLineOffsets.push_back(fProgram.fSource->length());
1600 }
1601 
writeGlobals()1602 bool Generator::writeGlobals() {
1603     for (const ProgramElement* e : fProgram.elements()) {
1604         if (e->is<GlobalVarDeclaration>()) {
1605             const GlobalVarDeclaration& gvd = e->as<GlobalVarDeclaration>();
1606             const VarDeclaration& decl = gvd.varDeclaration();
1607             const Variable* var = decl.var();
1608 
1609             if (var->type().isEffectChild()) {
1610                 // Associate each child effect variable with its numeric index.
1611                 SkASSERT(!fChildEffectMap.find(var));
1612                 int childEffectIndex = fChildEffectMap.count();
1613                 fChildEffectMap[var] = childEffectIndex;
1614                 continue;
1615             }
1616 
1617             // Opaque types include child processors and GL objects (samplers, textures, etc).
1618             // Of those, only child processors are legal variables.
1619             SkASSERT(!var->type().isVoid());
1620             SkASSERT(!var->type().isOpaque());
1621 
1622             // Builtin variables are system-defined, with special semantics.
1623             if (int builtin = var->layout().fBuiltin; builtin >= 0) {
1624                 if (builtin == SK_FRAGCOORD_BUILTIN) {
1625                     fBuilder.store_device_xy01(this->getVariableSlots(*var));
1626                     continue;
1627                 }
1628                 // The only builtin variable exposed to runtime effects is sk_FragCoord.
1629                 return unsupported();
1630             }
1631 
1632             if (IsUniform(*var)) {
1633                 // Create the uniform slot map in first-to-last order.
1634                 SlotRange uniformSlotRange = this->getUniformSlots(*var);
1635 
1636                 if (this->shouldWriteTraceOps()) {
1637                     // We expect uniform values to show up in the debug trace. To make this happen
1638                     // without updating the file format, we synthesize a value-slot range for the
1639                     // uniform here, and copy the uniform data into the value slots. This allows
1640                     // trace_var to work naturally. This wastes a bit of memory, but debug traces
1641                     // don't need to be hyper-efficient.
1642                     SlotRange copyRange = fProgramSlots.getVariableSlots(*var);
1643                     fBuilder.push_uniform(uniformSlotRange);
1644                     this->popToSlotRangeUnmasked(copyRange);
1645                 }
1646 
1647                 continue;
1648             }
1649 
1650             // Other globals are treated as normal variable declarations.
1651             if (!this->writeVarDeclaration(decl)) {
1652                 return unsupported();
1653             }
1654         }
1655     }
1656 
1657     return true;
1658 }
1659 
writeStatement(const Statement & s)1660 bool Generator::writeStatement(const Statement& s) {
1661     switch (s.kind()) {
1662         case Statement::Kind::kBlock:
1663             // The debugger will stop on statements inside Blocks; there's no need for an additional
1664             // stop on the block's initial open-brace.
1665         case Statement::Kind::kFor:
1666             // The debugger will stop on the init-statement of a for statement, so we don't need to
1667             // stop on the outer for-statement itself as well.
1668             break;
1669 
1670         default:
1671             // The debugger should stop on other statements.
1672             this->emitTraceLine(s.fPosition);
1673             break;
1674     }
1675 
1676     switch (s.kind()) {
1677         case Statement::Kind::kBlock:
1678             return this->writeBlock(s.as<Block>());
1679 
1680         case Statement::Kind::kBreak:
1681             return this->writeBreakStatement(s.as<BreakStatement>());
1682 
1683         case Statement::Kind::kContinue:
1684             return this->writeContinueStatement(s.as<ContinueStatement>());
1685 
1686         case Statement::Kind::kDo:
1687             return this->writeDoStatement(s.as<DoStatement>());
1688 
1689         case Statement::Kind::kExpression:
1690             return this->writeExpressionStatement(s.as<ExpressionStatement>());
1691 
1692         case Statement::Kind::kFor:
1693             return this->writeForStatement(s.as<ForStatement>());
1694 
1695         case Statement::Kind::kIf:
1696             return this->writeIfStatement(s.as<IfStatement>());
1697 
1698         case Statement::Kind::kNop:
1699             return true;
1700 
1701         case Statement::Kind::kReturn:
1702             return this->writeReturnStatement(s.as<ReturnStatement>());
1703 
1704         case Statement::Kind::kSwitch:
1705             return this->writeSwitchStatement(s.as<SwitchStatement>());
1706 
1707         case Statement::Kind::kVarDeclaration:
1708             return this->writeVarDeclaration(s.as<VarDeclaration>());
1709 
1710         default:
1711             return unsupported();
1712     }
1713 }
1714 
writeBlock(const Block & b)1715 bool Generator::writeBlock(const Block& b) {
1716     if (b.blockKind() == Block::Kind::kCompoundStatement) {
1717         this->emitTraceLine(b.fPosition);
1718         ++fInsideCompoundStatement;
1719     } else {
1720         this->pushTraceScopeMask();
1721         this->emitTraceScope(+1);
1722     }
1723 
1724     for (const std::unique_ptr<Statement>& stmt : b.children()) {
1725         if (!this->writeStatement(*stmt)) {
1726             return unsupported();
1727         }
1728     }
1729 
1730     if (b.blockKind() == Block::Kind::kCompoundStatement) {
1731         --fInsideCompoundStatement;
1732     } else {
1733         this->emitTraceScope(-1);
1734         this->discardTraceScopeMask();
1735     }
1736 
1737     return true;
1738 }
1739 
writeBreakStatement(const BreakStatement &)1740 bool Generator::writeBreakStatement(const BreakStatement&) {
1741     // If all lanes have reached this break, we can just branch straight to the break target instead
1742     // of updating masks.
1743     fBuilder.branch_if_all_lanes_active(fCurrentBreakTarget);
1744     fBuilder.mask_off_loop_mask();
1745     return true;
1746 }
1747 
writeContinueStatement(const ContinueStatement &)1748 bool Generator::writeContinueStatement(const ContinueStatement&) {
1749     fBuilder.continue_op(fCurrentContinueMask->stackID());
1750     return true;
1751 }
1752 
writeDoStatement(const DoStatement & d)1753 bool Generator::writeDoStatement(const DoStatement& d) {
1754     // Set up a break target.
1755     AutoLoopTarget breakTarget(this, &fCurrentBreakTarget);
1756 
1757     // Save off the original loop mask.
1758     fBuilder.enableExecutionMaskWrites();
1759     fBuilder.push_loop_mask();
1760 
1761     // If `continue` is used in the loop...
1762     Analysis::LoopControlFlowInfo loopInfo = Analysis::GetLoopControlFlowInfo(*d.statement());
1763     AutoContinueMask autoContinueMask(this);
1764     if (loopInfo.fHasContinue) {
1765         // ... create a temporary slot for continue-mask storage.
1766         autoContinueMask.enable();
1767     }
1768 
1769     // Write the do-loop body.
1770     int labelID = fBuilder.nextLabelID();
1771     fBuilder.label(labelID);
1772 
1773     autoContinueMask.enterLoopBody();
1774 
1775     if (!this->writeStatement(*d.statement())) {
1776         return false;
1777     }
1778 
1779     autoContinueMask.exitLoopBody();
1780 
1781     // Point the debugger at the do-statement's test-expression before we run it.
1782     this->emitTraceLine(d.test()->fPosition);
1783 
1784     // Emit the test-expression, in order to combine it with the loop mask.
1785     if (!this->pushExpression(*d.test())) {
1786         return false;
1787     }
1788 
1789     // Mask off any lanes in the loop mask where the test-expression is false; this breaks the loop.
1790     // We don't use the test expression for anything else, so jettison it.
1791     fBuilder.merge_loop_mask();
1792     this->discardExpression(/*slots=*/1);
1793 
1794     // If any lanes are still running, go back to the top and run the loop body again.
1795     fBuilder.branch_if_any_lanes_active(labelID);
1796 
1797     // If we hit a break statement on all lanes, we will branch here to escape from the loop.
1798     fBuilder.label(breakTarget.labelID());
1799 
1800     // Restore the loop mask.
1801     fBuilder.pop_loop_mask();
1802     fBuilder.disableExecutionMaskWrites();
1803 
1804     return true;
1805 }
1806 
writeMasklessForStatement(const ForStatement & f)1807 bool Generator::writeMasklessForStatement(const ForStatement& f) {
1808     SkASSERT(f.unrollInfo());
1809     SkASSERT(f.unrollInfo()->fCount > 0);
1810     SkASSERT(f.initializer());
1811     SkASSERT(f.test());
1812     SkASSERT(f.next());
1813 
1814     // We want the loop index to disappear at the end of the loop, so wrap the for statement in a
1815     // trace scope.
1816     this->pushTraceScopeMask();
1817     this->emitTraceScope(+1);
1818 
1819     // If no lanes are active, skip over the loop entirely. This guards against looping forever;
1820     // with no lanes active, we wouldn't be able to write the loop variable back to its slot, so
1821     // we'd never make forward progress.
1822     int loopExitID = fBuilder.nextLabelID();
1823     int loopBodyID = fBuilder.nextLabelID();
1824     fBuilder.branch_if_no_lanes_active(loopExitID);
1825 
1826     // Run the loop initializer.
1827     if (!this->writeStatement(*f.initializer())) {
1828         return unsupported();
1829     }
1830 
1831     // Write the for-loop body. We know the for-loop has a standard ES2 unrollable structure, and
1832     // that it runs for at least one iteration, so we can plow straight ahead into the loop body
1833     // instead of running the loop-test first.
1834     fBuilder.label(loopBodyID);
1835 
1836     if (!this->writeStatement(*f.statement())) {
1837         return unsupported();
1838     }
1839 
1840     // Point the debugger at the for-statement's next-expression before we run it, or as close as we
1841     // can reasonably get.
1842     if (f.next()) {
1843         this->emitTraceLine(f.next()->fPosition);
1844     } else if (f.test()) {
1845         this->emitTraceLine(f.test()->fPosition);
1846     } else {
1847         this->emitTraceLine(f.fPosition);
1848     }
1849 
1850     // If the loop only runs for a single iteration, we are already done. If not...
1851     if (f.unrollInfo()->fCount > 1) {
1852         // ... run the next-expression, and immediately discard its result.
1853         if (!this->pushExpression(*f.next(), /*usesResult=*/false)) {
1854             return unsupported();
1855         }
1856         this->discardExpression(f.next()->type().slotCount());
1857 
1858         // Run the test-expression, and repeat the loop until the test-expression evaluates false.
1859         if (!this->pushExpression(*f.test())) {
1860             return unsupported();
1861         }
1862         fBuilder.branch_if_no_active_lanes_on_stack_top_equal(0, loopBodyID);
1863 
1864         // Jettison the test-expression.
1865         this->discardExpression(/*slots=*/1);
1866     }
1867 
1868     fBuilder.label(loopExitID);
1869 
1870     this->emitTraceScope(-1);
1871     this->discardTraceScopeMask();
1872     return true;
1873 }
1874 
writeForStatement(const ForStatement & f)1875 bool Generator::writeForStatement(const ForStatement& f) {
1876     // If we've determined that the loop does not run, omit its code entirely.
1877     if (f.unrollInfo() && f.unrollInfo()->fCount == 0) {
1878         return true;
1879     }
1880 
1881     // If the loop doesn't escape early due to a `continue`, `break` or `return`, and the loop
1882     // conforms to ES2 structure, we know that we will run the full number of iterations across all
1883     // lanes and don't need to use a loop mask.
1884     Analysis::LoopControlFlowInfo loopInfo = Analysis::GetLoopControlFlowInfo(*f.statement());
1885     if (!loopInfo.fHasContinue && !loopInfo.fHasBreak && !loopInfo.fHasReturn && f.unrollInfo()) {
1886         return this->writeMasklessForStatement(f);
1887     }
1888 
1889     // We want the loop index to disappear at the end of the loop, so wrap the for statement in a
1890     // trace scope.
1891     this->pushTraceScopeMask();
1892     this->emitTraceScope(+1);
1893 
1894     // Set up a break target.
1895     AutoLoopTarget breakTarget(this, &fCurrentBreakTarget);
1896 
1897     // Run the loop initializer.
1898     if (f.initializer()) {
1899         if (!this->writeStatement(*f.initializer())) {
1900             return unsupported();
1901         }
1902     } else {
1903         this->emitTraceLine(f.fPosition);
1904     }
1905 
1906     AutoContinueMask autoContinueMask(this);
1907     if (loopInfo.fHasContinue) {
1908         // Acquire a temporary slot for continue-mask storage.
1909         autoContinueMask.enable();
1910     }
1911 
1912     // Save off the original loop mask.
1913     fBuilder.enableExecutionMaskWrites();
1914     fBuilder.push_loop_mask();
1915 
1916     int loopTestID = fBuilder.nextLabelID();
1917     int loopBodyID = fBuilder.nextLabelID();
1918 
1919     // Jump down to the loop test so we can fall out of the loop immediately if it's zero-iteration.
1920     fBuilder.jump(loopTestID);
1921 
1922     // Write the for-loop body.
1923     fBuilder.label(loopBodyID);
1924 
1925     autoContinueMask.enterLoopBody();
1926 
1927     if (!this->writeStatement(*f.statement())) {
1928         return unsupported();
1929     }
1930 
1931     autoContinueMask.exitLoopBody();
1932 
1933     // Point the debugger at the for-statement's next-expression before we run it, or as close as we
1934     // can reasonably get.
1935     if (f.next()) {
1936         this->emitTraceLine(f.next()->fPosition);
1937     } else if (f.test()) {
1938         this->emitTraceLine(f.test()->fPosition);
1939     } else {
1940         this->emitTraceLine(f.fPosition);
1941     }
1942 
1943     // Run the next-expression. Immediately discard its result.
1944     if (f.next()) {
1945         if (!this->pushExpression(*f.next(), /*usesResult=*/false)) {
1946             return unsupported();
1947         }
1948         this->discardExpression(f.next()->type().slotCount());
1949     }
1950 
1951     fBuilder.label(loopTestID);
1952     if (f.test()) {
1953         // Emit the test-expression, in order to combine it with the loop mask.
1954         if (!this->pushExpression(*f.test())) {
1955             return unsupported();
1956         }
1957         // Mask off any lanes in the loop mask where the test-expression is false; this breaks the
1958         // loop. We don't use the test expression for anything else, so jettison it.
1959         fBuilder.merge_loop_mask();
1960         this->discardExpression(/*slots=*/1);
1961     }
1962 
1963     // If any lanes are still running, go back to the top and run the loop body again.
1964     fBuilder.branch_if_any_lanes_active(loopBodyID);
1965 
1966     // If we hit a break statement on all lanes, we will branch here to escape from the loop.
1967     fBuilder.label(breakTarget.labelID());
1968 
1969     // Restore the loop mask.
1970     fBuilder.pop_loop_mask();
1971     fBuilder.disableExecutionMaskWrites();
1972 
1973     this->emitTraceScope(-1);
1974     this->discardTraceScopeMask();
1975     return true;
1976 }
1977 
writeExpressionStatement(const ExpressionStatement & e)1978 bool Generator::writeExpressionStatement(const ExpressionStatement& e) {
1979     if (!this->pushExpression(*e.expression(), /*usesResult=*/false)) {
1980         return unsupported();
1981     }
1982     this->discardExpression(e.expression()->type().slotCount());
1983     return true;
1984 }
1985 
writeDynamicallyUniformIfStatement(const IfStatement & i)1986 bool Generator::writeDynamicallyUniformIfStatement(const IfStatement& i) {
1987     SkASSERT(Analysis::IsDynamicallyUniformExpression(*i.test()));
1988 
1989     int falseLabelID = fBuilder.nextLabelID();
1990     int exitLabelID = fBuilder.nextLabelID();
1991 
1992     if (!this->pushExpression(*i.test())) {
1993         return unsupported();
1994     }
1995 
1996     fBuilder.branch_if_no_active_lanes_on_stack_top_equal(~0, falseLabelID);
1997 
1998     if (!this->writeStatement(*i.ifTrue())) {
1999         return unsupported();
2000     }
2001 
2002     if (!i.ifFalse()) {
2003         // We don't have an if-false condition at all.
2004         fBuilder.label(falseLabelID);
2005     } else {
2006         // We do have an if-false condition. We've just completed the if-true block, so we need to
2007         // jump past the if-false block to avoid executing it.
2008         fBuilder.jump(exitLabelID);
2009 
2010         // The if-false block starts here.
2011         fBuilder.label(falseLabelID);
2012 
2013         if (!this->writeStatement(*i.ifFalse())) {
2014             return unsupported();
2015         }
2016 
2017         fBuilder.label(exitLabelID);
2018     }
2019 
2020     // Jettison the test-expression.
2021     this->discardExpression(/*slots=*/1);
2022     return true;
2023 }
2024 
writeIfStatement(const IfStatement & i)2025 bool Generator::writeIfStatement(const IfStatement& i) {
2026     // If the test condition is known to be uniform, we can skip over the untrue portion entirely.
2027     if (Analysis::IsDynamicallyUniformExpression(*i.test())) {
2028         return this->writeDynamicallyUniformIfStatement(i);
2029     }
2030 
2031     // Save the current condition-mask.
2032     fBuilder.enableExecutionMaskWrites();
2033     fBuilder.push_condition_mask();
2034 
2035     // Push the test condition mask.
2036     if (!this->pushExpression(*i.test())) {
2037         return unsupported();
2038     }
2039 
2040     // Merge the current condition-mask with the test condition, then run the if-true branch.
2041     fBuilder.merge_condition_mask();
2042     if (!this->writeStatement(*i.ifTrue())) {
2043         return unsupported();
2044     }
2045 
2046     if (i.ifFalse()) {
2047         // Apply the inverse condition-mask. Then run the if-false branch.
2048         fBuilder.merge_inv_condition_mask();
2049         if (!this->writeStatement(*i.ifFalse())) {
2050             return unsupported();
2051         }
2052     }
2053 
2054     // Jettison the test-expression, and restore the the condition-mask.
2055     this->discardExpression(/*slots=*/1);
2056     fBuilder.pop_condition_mask();
2057     fBuilder.disableExecutionMaskWrites();
2058 
2059     return true;
2060 }
2061 
writeReturnStatement(const ReturnStatement & r)2062 bool Generator::writeReturnStatement(const ReturnStatement& r) {
2063     if (r.expression()) {
2064         if (!this->pushExpression(*r.expression())) {
2065             return unsupported();
2066         }
2067         if (this->needsFunctionResultSlots(fCurrentFunction)) {
2068             this->popToSlotRange(fCurrentFunctionResult);
2069         }
2070     }
2071     if (fBuilder.executionMaskWritesAreEnabled() && this->needsReturnMask(fCurrentFunction)) {
2072         fBuilder.mask_off_return_mask();
2073     }
2074     return true;
2075 }
2076 
writeSwitchStatement(const SwitchStatement & s)2077 bool Generator::writeSwitchStatement(const SwitchStatement& s) {
2078     const StatementArray& cases = s.cases();
2079     SkASSERT(std::all_of(cases.begin(), cases.end(), [](const std::unique_ptr<Statement>& stmt) {
2080         return stmt->is<SwitchCase>();
2081     }));
2082 
2083     // Set up a break target.
2084     AutoLoopTarget breakTarget(this, &fCurrentBreakTarget);
2085 
2086     // Save off the original loop mask.
2087     fBuilder.enableExecutionMaskWrites();
2088     fBuilder.push_loop_mask();
2089 
2090     // Push the switch-case value, and write a default-mask that enables every lane which already
2091     // has an active loop mask. As we match cases, the default mask will get pared down.
2092     if (!this->pushExpression(*s.value())) {
2093         return unsupported();
2094     }
2095     fBuilder.push_loop_mask();
2096 
2097     // Zero out the loop mask; each case op will re-enable it as we go.
2098     fBuilder.mask_off_loop_mask();
2099 
2100     // Write each switch-case.
2101     bool foundDefaultCase = false;
2102     for (const std::unique_ptr<Statement>& stmt : cases) {
2103         int skipLabelID = fBuilder.nextLabelID();
2104 
2105         const SwitchCase& sc = stmt->as<SwitchCase>();
2106         if (sc.isDefault()) {
2107             foundDefaultCase = true;
2108             if (stmt.get() != cases.back().get()) {
2109                 // We only support a default case when it is the very last case. If that changes,
2110                 // this logic will need to be updated.
2111                 return unsupported();
2112             }
2113             // Keep whatever lanes are executing now, and also enable any lanes in the default mask.
2114             fBuilder.pop_and_reenable_loop_mask();
2115             // Execute the switch-case block, if any lanes are alive to see it.
2116             fBuilder.branch_if_no_lanes_active(skipLabelID);
2117             if (!this->writeStatement(*sc.statement())) {
2118                 return unsupported();
2119             }
2120         } else {
2121             // The case-op will enable the loop mask if the switch-value matches, and mask off lanes
2122             // from the default-mask.
2123             fBuilder.case_op(sc.value());
2124             // Execute the switch-case block, if any lanes are alive to see it.
2125             fBuilder.branch_if_no_lanes_active(skipLabelID);
2126             if (!this->writeStatement(*sc.statement())) {
2127                 return unsupported();
2128             }
2129         }
2130         fBuilder.label(skipLabelID);
2131     }
2132 
2133     // Jettison the switch value, and the default case mask if it was never consumed above.
2134     this->discardExpression(/*slots=*/foundDefaultCase ? 1 : 2);
2135 
2136     // If we hit a break statement on all lanes, we will branch here to escape from the switch.
2137     fBuilder.label(breakTarget.labelID());
2138 
2139     // Restore the loop mask.
2140     fBuilder.pop_loop_mask();
2141     fBuilder.disableExecutionMaskWrites();
2142     return true;
2143 }
2144 
writeImmutableVarDeclaration(const VarDeclaration & d)2145 bool Generator::writeImmutableVarDeclaration(const VarDeclaration& d) {
2146     // In a debugging session, we expect debug traces for a variable declaration to appear, even if
2147     // it's constant, so we don't use immutable slots for variables when tracing is on.
2148     if (this->shouldWriteTraceOps()) {
2149         return false;
2150     }
2151 
2152     // Find the constant value for this variable.
2153     const Expression* initialValue = ConstantFolder::GetConstantValueForVariable(*d.value());
2154     SkASSERT(initialValue);
2155 
2156     // For a variable to be immutable, it cannot be written-to besides its initial declaration.
2157     ProgramUsage::VariableCounts counts = fProgram.fUsage->get(*d.var());
2158     if (counts.fWrite != 1) {
2159         return false;
2160     }
2161 
2162     STArray<16, ImmutableBits> immutableValues;
2163     if (!this->getImmutableValueForExpression(*initialValue, &immutableValues)) {
2164         return false;
2165     }
2166 
2167     fImmutableVariables.add(d.var());
2168 
2169     std::optional<SlotRange> preexistingSlots = this->findPreexistingImmutableData(immutableValues);
2170     if (preexistingSlots.has_value()) {
2171         // Associate this variable with a preexisting range of immutable data (no new data or code).
2172         fImmutableSlots.mapVariableToSlots(*d.var(), *preexistingSlots);
2173     } else {
2174         // Write out the constant value back to immutable slots. (This generates data, but no
2175         // runtime code.)
2176         SlotRange slots = this->getImmutableSlots(*d.var());
2177         this->storeImmutableValueToSlots(immutableValues, slots);
2178     }
2179 
2180     return true;
2181 }
2182 
writeVarDeclaration(const VarDeclaration & v)2183 bool Generator::writeVarDeclaration(const VarDeclaration& v) {
2184     if (v.value()) {
2185         // If a variable never actually changes, we can make it immutable.
2186         if (this->writeImmutableVarDeclaration(v)) {
2187             return true;
2188         }
2189         // This is a real variable which can change over the course of execution.
2190         if (!this->pushExpression(*v.value())) {
2191             return unsupported();
2192         }
2193         this->popToSlotRangeUnmasked(this->getVariableSlots(*v.var()));
2194     } else {
2195         this->zeroSlotRangeUnmasked(this->getVariableSlots(*v.var()));
2196     }
2197     return true;
2198 }
2199 
pushExpression(const Expression & e,bool usesResult)2200 bool Generator::pushExpression(const Expression& e, bool usesResult) {
2201     switch (e.kind()) {
2202         case Expression::Kind::kBinary:
2203             return this->pushBinaryExpression(e.as<BinaryExpression>());
2204 
2205         case Expression::Kind::kChildCall:
2206             return this->pushChildCall(e.as<ChildCall>());
2207 
2208         case Expression::Kind::kConstructorArray:
2209         case Expression::Kind::kConstructorArrayCast:
2210         case Expression::Kind::kConstructorCompound:
2211         case Expression::Kind::kConstructorStruct:
2212             return this->pushConstructorCompound(e.asAnyConstructor());
2213 
2214         case Expression::Kind::kConstructorCompoundCast:
2215         case Expression::Kind::kConstructorScalarCast:
2216             return this->pushConstructorCast(e.asAnyConstructor());
2217 
2218         case Expression::Kind::kConstructorDiagonalMatrix:
2219             return this->pushConstructorDiagonalMatrix(e.as<ConstructorDiagonalMatrix>());
2220 
2221         case Expression::Kind::kConstructorMatrixResize:
2222             return this->pushConstructorMatrixResize(e.as<ConstructorMatrixResize>());
2223 
2224         case Expression::Kind::kConstructorSplat:
2225             return this->pushConstructorSplat(e.as<ConstructorSplat>());
2226 
2227         case Expression::Kind::kEmpty:
2228             return true;
2229 
2230         case Expression::Kind::kFieldAccess:
2231             return this->pushFieldAccess(e.as<FieldAccess>());
2232 
2233         case Expression::Kind::kFunctionCall:
2234             return this->pushFunctionCall(e.as<FunctionCall>());
2235 
2236         case Expression::Kind::kIndex:
2237             return this->pushIndexExpression(e.as<IndexExpression>());
2238 
2239         case Expression::Kind::kLiteral:
2240             return this->pushLiteral(e.as<Literal>());
2241 
2242         case Expression::Kind::kPrefix:
2243             return this->pushPrefixExpression(e.as<PrefixExpression>());
2244 
2245         case Expression::Kind::kPostfix:
2246             return this->pushPostfixExpression(e.as<PostfixExpression>(), usesResult);
2247 
2248         case Expression::Kind::kSwizzle:
2249             return this->pushSwizzle(e.as<Swizzle>());
2250 
2251         case Expression::Kind::kTernary:
2252             return this->pushTernaryExpression(e.as<TernaryExpression>());
2253 
2254         case Expression::Kind::kVariableReference:
2255             return this->pushVariableReference(e.as<VariableReference>());
2256 
2257         default:
2258             return unsupported();
2259     }
2260 }
2261 
GetTypedOp(const SkSL::Type & type,const TypedOps & ops)2262 BuilderOp Generator::GetTypedOp(const SkSL::Type& type, const TypedOps& ops) {
2263     switch (type.componentType().numberKind()) {
2264         case Type::NumberKind::kFloat:    return ops.fFloatOp;
2265         case Type::NumberKind::kSigned:   return ops.fSignedOp;
2266         case Type::NumberKind::kUnsigned: return ops.fUnsignedOp;
2267         case Type::NumberKind::kBoolean:  return ops.fBooleanOp;
2268         default:                          return BuilderOp::unsupported;
2269     }
2270 }
2271 
unaryOp(const SkSL::Type & type,const TypedOps & ops)2272 bool Generator::unaryOp(const SkSL::Type& type, const TypedOps& ops) {
2273     BuilderOp op = GetTypedOp(type, ops);
2274     if (op == BuilderOp::unsupported) {
2275         return unsupported();
2276     }
2277     fBuilder.unary_op(op, type.slotCount());
2278     return true;
2279 }
2280 
binaryOp(const SkSL::Type & type,const TypedOps & ops)2281 bool Generator::binaryOp(const SkSL::Type& type, const TypedOps& ops) {
2282     BuilderOp op = GetTypedOp(type, ops);
2283     if (op == BuilderOp::unsupported) {
2284         return unsupported();
2285     }
2286     fBuilder.binary_op(op, type.slotCount());
2287     return true;
2288 }
2289 
ternaryOp(const SkSL::Type & type,const TypedOps & ops)2290 bool Generator::ternaryOp(const SkSL::Type& type, const TypedOps& ops) {
2291     BuilderOp op = GetTypedOp(type, ops);
2292     if (op == BuilderOp::unsupported) {
2293         return unsupported();
2294     }
2295     fBuilder.ternary_op(op, type.slotCount());
2296     return true;
2297 }
2298 
foldWithMultiOp(BuilderOp op,int elements)2299 void Generator::foldWithMultiOp(BuilderOp op, int elements) {
2300     // Fold the top N elements on the stack using an op that supports multiple slots, e.g.:
2301     // (A + B + C + D) -> add_2_floats $0..1 += $2..3
2302     //                    add_float    $0    += $1
2303     for (; elements >= 8; elements -= 4) {
2304         fBuilder.binary_op(op, /*slots=*/4);
2305     }
2306     for (; elements >= 6; elements -= 3) {
2307         fBuilder.binary_op(op, /*slots=*/3);
2308     }
2309     for (; elements >= 4; elements -= 2) {
2310         fBuilder.binary_op(op, /*slots=*/2);
2311     }
2312     for (; elements >= 2; elements -= 1) {
2313         fBuilder.binary_op(op, /*slots=*/1);
2314     }
2315 }
2316 
pushLValueOrExpression(LValue * lvalue,const Expression & expr)2317 bool Generator::pushLValueOrExpression(LValue* lvalue, const Expression& expr) {
2318     return lvalue ? this->push(*lvalue)
2319                   : this->pushExpression(expr);
2320 }
2321 
pushMatrixMultiply(LValue * lvalue,const Expression & left,const Expression & right,int leftColumns,int leftRows,int rightColumns,int rightRows)2322 bool Generator::pushMatrixMultiply(LValue* lvalue,
2323                                    const Expression& left,
2324                                    const Expression& right,
2325                                    int leftColumns,
2326                                    int leftRows,
2327                                    int rightColumns,
2328                                    int rightRows) {
2329     SkASSERT(left.type().isMatrix() || left.type().isVector());
2330     SkASSERT(right.type().isMatrix() || right.type().isVector());
2331 
2332     // Insert padding space on the stack to hold the result.
2333     fBuilder.pad_stack(rightColumns * leftRows);
2334 
2335     // Push the left and right matrices onto the stack.
2336     if (!this->pushLValueOrExpression(lvalue, left) || !this->pushExpression(right)) {
2337         return unsupported();
2338     }
2339 
2340     fBuilder.matrix_multiply(leftColumns, leftRows, rightColumns, rightRows);
2341 
2342     // If this multiply was actually an assignment (via *=), write the result back to the lvalue.
2343     return lvalue ? this->store(*lvalue)
2344                   : true;
2345 }
2346 
foldComparisonOp(Operator op,int elements)2347 void Generator::foldComparisonOp(Operator op, int elements) {
2348     switch (op.kind()) {
2349         case OperatorKind::EQEQ:
2350             // equal(x,y) returns a vector; use & to fold into a scalar.
2351             this->foldWithMultiOp(BuilderOp::bitwise_and_n_ints, elements);
2352             break;
2353 
2354         case OperatorKind::NEQ:
2355             // notEqual(x,y) returns a vector; use | to fold into a scalar.
2356             this->foldWithMultiOp(BuilderOp::bitwise_or_n_ints, elements);
2357             break;
2358 
2359         default:
2360             SkDEBUGFAIL("comparison only allows == and !=");
2361             break;
2362     }
2363 }
2364 
pushStructuredComparison(LValue * left,Operator op,LValue * right,const Type & type)2365 bool Generator::pushStructuredComparison(LValue* left,
2366                                          Operator op,
2367                                          LValue* right,
2368                                          const Type& type) {
2369     if (type.isStruct()) {
2370         // Compare every field in the struct.
2371         SkSpan<const Field> fields = type.fields();
2372         int currentSlot = 0;
2373         for (size_t index = 0; index < fields.size(); ++index) {
2374             const Type& fieldType = *fields[index].fType;
2375             const int   fieldSlotCount = fieldType.slotCount();
2376             UnownedLValueSlice fieldLeft {left,  currentSlot, fieldSlotCount};
2377             UnownedLValueSlice fieldRight{right, currentSlot, fieldSlotCount};
2378             if (!this->pushStructuredComparison(&fieldLeft, op, &fieldRight, fieldType)) {
2379                 return unsupported();
2380             }
2381             currentSlot += fieldSlotCount;
2382         }
2383 
2384         this->foldComparisonOp(op, fields.size());
2385         return true;
2386     }
2387 
2388     if (type.isArray()) {
2389         const Type& indexedType = type.componentType();
2390         if (indexedType.numberKind() == Type::NumberKind::kNonnumeric) {
2391             // Compare every element in the array.
2392             const int indexedSlotCount = indexedType.slotCount();
2393             int       currentSlot = 0;
2394             for (int index = 0; index < type.columns(); ++index) {
2395                 UnownedLValueSlice indexedLeft {left,  currentSlot, indexedSlotCount};
2396                 UnownedLValueSlice indexedRight{right, currentSlot, indexedSlotCount};
2397                 if (!this->pushStructuredComparison(&indexedLeft, op, &indexedRight, indexedType)) {
2398                     return unsupported();
2399                 }
2400                 currentSlot += indexedSlotCount;
2401             }
2402 
2403             this->foldComparisonOp(op, type.columns());
2404             return true;
2405         }
2406     }
2407 
2408     // We've winnowed down to a single element, or an array of homogeneous numeric elements.
2409     // Push the elements onto the stack, then compare them.
2410     if (!this->push(*left) || !this->push(*right)) {
2411         return unsupported();
2412     }
2413     switch (op.kind()) {
2414         case OperatorKind::EQEQ:
2415             if (!this->binaryOp(type, kEqualOps)) {
2416                 return unsupported();
2417             }
2418             break;
2419 
2420         case OperatorKind::NEQ:
2421             if (!this->binaryOp(type, kNotEqualOps)) {
2422                 return unsupported();
2423             }
2424             break;
2425 
2426         default:
2427             SkDEBUGFAIL("comparison only allows == and !=");
2428             break;
2429     }
2430 
2431     this->foldComparisonOp(op, type.slotCount());
2432     return true;
2433 }
2434 
pushBinaryExpression(const BinaryExpression & e)2435 bool Generator::pushBinaryExpression(const BinaryExpression& e) {
2436     return this->pushBinaryExpression(*e.left(), e.getOperator(), *e.right());
2437 }
2438 
pushBinaryExpression(const Expression & left,Operator op,const Expression & right)2439 bool Generator::pushBinaryExpression(const Expression& left, Operator op, const Expression& right) {
2440     switch (op.kind()) {
2441         // Rewrite greater-than ops as their less-than equivalents.
2442         case OperatorKind::GT:
2443             return this->pushBinaryExpression(right, OperatorKind::LT, left);
2444 
2445         case OperatorKind::GTEQ:
2446             return this->pushBinaryExpression(right, OperatorKind::LTEQ, left);
2447 
2448         // Handle struct and array comparisons.
2449         case OperatorKind::EQEQ:
2450         case OperatorKind::NEQ:
2451             if (left.type().isStruct() || left.type().isArray()) {
2452                 SkASSERT(left.type().matches(right.type()));
2453                 std::unique_ptr<LValue> lvLeft = this->makeLValue(left, /*allowScratch=*/true);
2454                 std::unique_ptr<LValue> lvRight = this->makeLValue(right, /*allowScratch=*/true);
2455                 return this->pushStructuredComparison(lvLeft.get(), op, lvRight.get(), left.type());
2456             }
2457             [[fallthrough]];
2458 
2459         // Rewrite commutative ops so that the literal is on the right-hand side. This gives the
2460         // Builder more opportunities to use immediate-mode ops.
2461         case OperatorKind::PLUS:
2462         case OperatorKind::STAR:
2463         case OperatorKind::BITWISEAND:
2464         case OperatorKind::BITWISEXOR:
2465         case OperatorKind::LOGICALXOR: {
2466             double unused;
2467             if (ConstantFolder::GetConstantValue(left, &unused) &&
2468                 !ConstantFolder::GetConstantValue(right, &unused)) {
2469                 return this->pushBinaryExpression(right, op, left);
2470             }
2471             break;
2472         }
2473         // Emit comma expressions.
2474         case OperatorKind::COMMA:
2475             if (Analysis::HasSideEffects(left)) {
2476                 if (!this->pushExpression(left, /*usesResult=*/false)) {
2477                     return unsupported();
2478                 }
2479                 this->discardExpression(left.type().slotCount());
2480             }
2481             return this->pushExpression(right);
2482 
2483         default:
2484             break;
2485     }
2486 
2487     // Handle binary expressions with mismatched types.
2488     bool vectorizeLeft = false, vectorizeRight = false;
2489     if (!left.type().matches(right.type())) {
2490         if (left.type().componentType().numberKind() != right.type().componentType().numberKind()) {
2491             return unsupported();
2492         }
2493         if (left.type().isScalar() && (right.type().isVector() || right.type().isMatrix())) {
2494             vectorizeLeft = true;
2495         } else if ((left.type().isVector() || left.type().isMatrix()) && right.type().isScalar()) {
2496             vectorizeRight = true;
2497         }
2498     }
2499 
2500     const Type& type = vectorizeLeft ? right.type() : left.type();
2501 
2502     // If this is an assignment...
2503     std::unique_ptr<LValue> lvalue;
2504     if (op.isAssignment()) {
2505         // ... turn the left side into an lvalue.
2506         lvalue = this->makeLValue(left);
2507         if (!lvalue) {
2508             return unsupported();
2509         }
2510 
2511         // Handle simple assignment (`var = expr`).
2512         if (op.kind() == OperatorKind::EQ) {
2513             return this->pushExpression(right) &&
2514                    this->store(*lvalue);
2515         }
2516 
2517         // Strip off the assignment from the op (turning += into +).
2518         op = op.removeAssignment();
2519     }
2520 
2521     // Handle matrix multiplication (MxM/MxV/VxM).
2522     if (op.kind() == OperatorKind::STAR) {
2523         // Matrix * matrix:
2524         if (left.type().isMatrix() && right.type().isMatrix()) {
2525             return this->pushMatrixMultiply(lvalue.get(), left, right,
2526                                             left.type().columns(), left.type().rows(),
2527                                             right.type().columns(), right.type().rows());
2528         }
2529 
2530         // Vector * matrix:
2531         if (left.type().isVector() && right.type().isMatrix()) {
2532             return this->pushMatrixMultiply(lvalue.get(), left, right,
2533                                             left.type().columns(), 1,
2534                                             right.type().columns(), right.type().rows());
2535         }
2536 
2537         // Matrix * vector:
2538         if (left.type().isMatrix() && right.type().isVector()) {
2539             return this->pushMatrixMultiply(lvalue.get(), left, right,
2540                                             left.type().columns(), left.type().rows(),
2541                                             1, right.type().columns());
2542         }
2543     }
2544 
2545     if (!vectorizeLeft && !vectorizeRight && !type.matches(right.type())) {
2546         // We have mismatched types but don't know how to handle them.
2547         return unsupported();
2548     }
2549 
2550     // Handle binary ops which require short-circuiting.
2551     switch (op.kind()) {
2552         case OperatorKind::LOGICALAND:
2553             if (Analysis::HasSideEffects(right)) {
2554                 // If the RHS has side effects, we rewrite `a && b` as `a ? b : false`. This
2555                 // generates pretty solid code and gives us the required short-circuit behavior.
2556                 SkASSERT(!op.isAssignment());
2557                 SkASSERT(type.componentType().isBoolean());
2558                 SkASSERT(type.slotCount() == 1);  // operator&& only works with scalar types
2559                 Literal falseLiteral{Position{}, 0.0, &right.type()};
2560                 return this->pushTernaryExpression(left, right, falseLiteral);
2561             }
2562             break;
2563 
2564         case OperatorKind::LOGICALOR:
2565             if (Analysis::HasSideEffects(right)) {
2566                 // If the RHS has side effects, we rewrite `a || b` as `a ? true : b`.
2567                 SkASSERT(!op.isAssignment());
2568                 SkASSERT(type.componentType().isBoolean());
2569                 SkASSERT(type.slotCount() == 1);  // operator|| only works with scalar types
2570                 Literal trueLiteral{Position{}, 1.0, &right.type()};
2571                 return this->pushTernaryExpression(left, trueLiteral, right);
2572             }
2573             break;
2574 
2575         default:
2576             break;
2577     }
2578 
2579     // Push the left- and right-expressions onto the stack.
2580     if (!this->pushLValueOrExpression(lvalue.get(), left)) {
2581         return unsupported();
2582     }
2583     if (vectorizeLeft) {
2584         fBuilder.push_duplicates(right.type().slotCount() - 1);
2585     }
2586     if (!this->pushExpression(right)) {
2587         return unsupported();
2588     }
2589     if (vectorizeRight) {
2590         fBuilder.push_duplicates(left.type().slotCount() - 1);
2591     }
2592 
2593     switch (op.kind()) {
2594         case OperatorKind::PLUS:
2595             if (!this->binaryOp(type, kAddOps)) {
2596                 return unsupported();
2597             }
2598             break;
2599 
2600         case OperatorKind::MINUS:
2601             if (!this->binaryOp(type, kSubtractOps)) {
2602                 return unsupported();
2603             }
2604             break;
2605 
2606         case OperatorKind::STAR:
2607             if (!this->binaryOp(type, kMultiplyOps)) {
2608                 return unsupported();
2609             }
2610             break;
2611 
2612         case OperatorKind::SLASH:
2613             if (!this->binaryOp(type, kDivideOps)) {
2614                 return unsupported();
2615             }
2616             break;
2617 
2618         case OperatorKind::LT:
2619         case OperatorKind::GT:
2620             if (!this->binaryOp(type, kLessThanOps)) {
2621                 return unsupported();
2622             }
2623             SkASSERT(type.slotCount() == 1);  // operator< only works with scalar types
2624             break;
2625 
2626         case OperatorKind::LTEQ:
2627         case OperatorKind::GTEQ:
2628             if (!this->binaryOp(type, kLessThanEqualOps)) {
2629                 return unsupported();
2630             }
2631             SkASSERT(type.slotCount() == 1);  // operator<= only works with scalar types
2632             break;
2633 
2634         case OperatorKind::EQEQ:
2635             if (!this->binaryOp(type, kEqualOps)) {
2636                 return unsupported();
2637             }
2638             this->foldComparisonOp(op, type.slotCount());
2639             break;
2640 
2641         case OperatorKind::NEQ:
2642             if (!this->binaryOp(type, kNotEqualOps)) {
2643                 return unsupported();
2644             }
2645             this->foldComparisonOp(op, type.slotCount());
2646             break;
2647 
2648         case OperatorKind::LOGICALAND:
2649         case OperatorKind::BITWISEAND:
2650             // For logical-and, we verified above that the RHS does not have side effects, so we
2651             // don't need to worry about short-circuiting side effects.
2652             fBuilder.binary_op(BuilderOp::bitwise_and_n_ints, type.slotCount());
2653             break;
2654 
2655         case OperatorKind::LOGICALOR:
2656         case OperatorKind::BITWISEOR:
2657             // For logical-or, we verified above that the RHS does not have side effects.
2658             fBuilder.binary_op(BuilderOp::bitwise_or_n_ints, type.slotCount());
2659             break;
2660 
2661         case OperatorKind::LOGICALXOR:
2662         case OperatorKind::BITWISEXOR:
2663             // Logical-xor does not short circuit.
2664             fBuilder.binary_op(BuilderOp::bitwise_xor_n_ints, type.slotCount());
2665             break;
2666 
2667         default:
2668             return unsupported();
2669     }
2670 
2671     // If we have an lvalue, we need to write the result back into it.
2672     return lvalue ? this->store(*lvalue)
2673                   : true;
2674 }
2675 
getImmutableBitsForSlot(const Expression & expr,size_t slot)2676 std::optional<Generator::ImmutableBits> Generator::getImmutableBitsForSlot(const Expression& expr,
2677                                                                            size_t slot) {
2678     // Determine the constant-value of the slot; bail if it isn't constant.
2679     std::optional<double> v = expr.getConstantValue(slot);
2680     if (!v.has_value()) {
2681         return std::nullopt;
2682     }
2683     // Determine the number-kind of the slot, and convert the value to its bit-representation.
2684     Type::NumberKind kind = expr.type().slotType(slot).numberKind();
2685     double value = *v;
2686     switch (kind) {
2687         case Type::NumberKind::kFloat:
2688             return sk_bit_cast<ImmutableBits>((float)value);
2689 
2690         case Type::NumberKind::kSigned:
2691             return sk_bit_cast<ImmutableBits>((int32_t)value);
2692 
2693         case Type::NumberKind::kUnsigned:
2694             return sk_bit_cast<ImmutableBits>((uint32_t)value);
2695 
2696         case Type::NumberKind::kBoolean:
2697             return value ? ~0 : 0;
2698 
2699         default:
2700             return std::nullopt;
2701     }
2702 }
2703 
getImmutableValueForExpression(const Expression & expr,TArray<ImmutableBits> * immutableValues)2704 bool Generator::getImmutableValueForExpression(const Expression& expr,
2705                                                TArray<ImmutableBits>* immutableValues) {
2706     if (!expr.supportsConstantValues()) {
2707         return false;
2708     }
2709     size_t numSlots = expr.type().slotCount();
2710     immutableValues->reserve_exact(numSlots);
2711     for (size_t index = 0; index < numSlots; ++index) {
2712         std::optional<ImmutableBits> bits = this->getImmutableBitsForSlot(expr, index);
2713         if (!bits.has_value()) {
2714             return false;
2715         }
2716         immutableValues->push_back(*bits);
2717     }
2718     return true;
2719 }
2720 
storeImmutableValueToSlots(const TArray<ImmutableBits> & immutableValues,SlotRange slots)2721 void Generator::storeImmutableValueToSlots(const TArray<ImmutableBits>& immutableValues,
2722                                            SlotRange slots) {
2723     for (int index = 0; index < slots.count; ++index) {
2724         // Store the immutable value in its slot.
2725         const Slot slot = slots.index++;
2726         const ImmutableBits bits = immutableValues[index];
2727         fBuilder.store_immutable_value_i(slot, bits);
2728 
2729         // Keep track of every stored immutable value for potential later reuse.
2730         fImmutableSlotMap[bits].add(slot);
2731     }
2732 }
2733 
findPreexistingImmutableData(const TArray<ImmutableBits> & immutableValues)2734 std::optional<SlotRange> Generator::findPreexistingImmutableData(
2735         const TArray<ImmutableBits>& immutableValues) {
2736     STArray<16, const THashSet<Slot>*> slotArray;
2737     slotArray.reserve_exact(immutableValues.size());
2738 
2739     // Find all the slots associated with each immutable-value bit representation.
2740     // If a given bit-pattern doesn't exist anywhere in our program yet, we can stop searching.
2741     for (const ImmutableBits& immutableValue : immutableValues) {
2742         const THashSet<Slot>* slotsForValue = fImmutableSlotMap.find(immutableValue);
2743         if (!slotsForValue) {
2744             return std::nullopt;
2745         }
2746         slotArray.push_back(slotsForValue);
2747     }
2748 
2749     // Look for the group with the fewest number of entries, since that can be searched in the
2750     // least amount of effort.
2751     int leastSlotIndex = 0, leastSlotCount = INT_MAX;
2752     for (int index = 0; index < slotArray.size(); ++index) {
2753         int currentCount = slotArray[index]->count();
2754         if (currentCount < leastSlotCount) {
2755             leastSlotIndex = index;
2756             leastSlotCount = currentCount;
2757         }
2758     }
2759 
2760     // See if we can reconstitute the value that we want with any of the data we've already got.
2761     for (int slot : *slotArray[leastSlotIndex]) {
2762         int firstSlot = slot - leastSlotIndex;
2763         bool found = true;
2764         for (int index = 0; index < slotArray.size(); ++index) {
2765             if (!slotArray[index]->contains(firstSlot + index)) {
2766                 found = false;
2767                 break;
2768             }
2769         }
2770         if (found) {
2771             // We've found an exact match for the input value; return its slot-range.
2772             return SlotRange{firstSlot, slotArray.size()};
2773         }
2774     }
2775 
2776     // We didn't find any reusable slot ranges.
2777     return std::nullopt;
2778 }
2779 
pushImmutableData(const Expression & e)2780 bool Generator::pushImmutableData(const Expression& e) {
2781     STArray<16, ImmutableBits> immutableValues;
2782     if (!this->getImmutableValueForExpression(e, &immutableValues)) {
2783         return false;
2784     }
2785     std::optional<SlotRange> preexistingData = this->findPreexistingImmutableData(immutableValues);
2786     if (preexistingData.has_value()) {
2787         fBuilder.push_immutable(*preexistingData);
2788         return true;
2789     }
2790     SlotRange range = fImmutableSlots.createSlots(e.description(),
2791                                                   e.type(),
2792                                                   e.fPosition,
2793                                                   /*isFunctionReturnValue=*/false);
2794     this->storeImmutableValueToSlots(immutableValues, range);
2795     fBuilder.push_immutable(range);
2796     return true;
2797 }
2798 
pushConstructorCompound(const AnyConstructor & c)2799 bool Generator::pushConstructorCompound(const AnyConstructor& c) {
2800     if (c.type().slotCount() > 1 && this->pushImmutableData(c)) {
2801         return true;
2802     }
2803     for (const std::unique_ptr<Expression> &arg : c.argumentSpan()) {
2804         if (!this->pushExpression(*arg)) {
2805             return unsupported();
2806         }
2807     }
2808     return true;
2809 }
2810 
pushChildCall(const ChildCall & c)2811 bool Generator::pushChildCall(const ChildCall& c) {
2812     int* childIdx = fChildEffectMap.find(&c.child());
2813     SkASSERT(childIdx != nullptr);
2814     SkASSERT(!c.arguments().empty());
2815 
2816     // All child calls have at least one argument.
2817     const Expression* arg = c.arguments()[0].get();
2818     if (!this->pushExpression(*arg)) {
2819         return unsupported();
2820     }
2821 
2822     // Copy arguments from the stack into src/dst as required by this particular child-call.
2823     switch (c.child().type().typeKind()) {
2824         case Type::TypeKind::kShader: {
2825             // The argument must be a float2.
2826             SkASSERT(c.arguments().size() == 1);
2827             SkASSERT(arg->type().matches(*fContext.fTypes.fFloat2));
2828 
2829             // `exchange_src` will use the top four values on the stack, but we don't care what goes
2830             // into the blue/alpha components. We inject padding here to balance the stack.
2831             fBuilder.pad_stack(2);
2832 
2833             // Move the argument into src.rgba while also preserving the execution mask.
2834             fBuilder.exchange_src();
2835             fBuilder.invoke_shader(*childIdx);
2836             break;
2837         }
2838         case Type::TypeKind::kColorFilter: {
2839             // The argument must be a half4/float4.
2840             SkASSERT(c.arguments().size() == 1);
2841             SkASSERT(arg->type().matches(*fContext.fTypes.fHalf4) ||
2842                      arg->type().matches(*fContext.fTypes.fFloat4));
2843 
2844             // Move the argument into src.rgba while also preserving the execution mask.
2845             fBuilder.exchange_src();
2846             fBuilder.invoke_color_filter(*childIdx);
2847             break;
2848         }
2849         case Type::TypeKind::kBlender: {
2850             // Both arguments must be half4/float4.
2851             SkASSERT(c.arguments().size() == 2);
2852             SkASSERT(c.arguments()[0]->type().matches(*fContext.fTypes.fHalf4) ||
2853                      c.arguments()[0]->type().matches(*fContext.fTypes.fFloat4));
2854             SkASSERT(c.arguments()[1]->type().matches(*fContext.fTypes.fHalf4) ||
2855                      c.arguments()[1]->type().matches(*fContext.fTypes.fFloat4));
2856 
2857             // Move the second argument into dst.rgba, and the first argument into src.rgba, while
2858             // simultaneously preserving the execution mask.
2859             if (!this->pushExpression(*c.arguments()[1])) {
2860                 return unsupported();
2861             }
2862             fBuilder.pop_dst_rgba();
2863             fBuilder.exchange_src();
2864             fBuilder.invoke_blender(*childIdx);
2865             break;
2866         }
2867         default: {
2868             SkDEBUGFAILF("cannot sample from type '%s'", c.child().type().description().c_str());
2869         }
2870     }
2871 
2872     // The child call has returned the result color via src.rgba, and the SkRP execution mask is
2873     // on top of the stack. Swapping the two puts the result color on top of the stack, and also
2874     // restores our execution masks.
2875     fBuilder.exchange_src();
2876     return true;
2877 }
2878 
pushConstructorCast(const AnyConstructor & c)2879 bool Generator::pushConstructorCast(const AnyConstructor& c) {
2880     SkASSERT(c.argumentSpan().size() == 1);
2881     const Expression& inner = *c.argumentSpan().front();
2882     SkASSERT(inner.type().slotCount() == c.type().slotCount());
2883 
2884     if (!this->pushExpression(inner)) {
2885         return unsupported();
2886     }
2887     const Type::NumberKind innerKind = inner.type().componentType().numberKind();
2888     const Type::NumberKind outerKind = c.type().componentType().numberKind();
2889 
2890     if (innerKind == outerKind) {
2891         // Since we ignore type precision, this cast is effectively a no-op.
2892         return true;
2893     }
2894 
2895     switch (innerKind) {
2896         case Type::NumberKind::kSigned:
2897             if (outerKind == Type::NumberKind::kUnsigned) {
2898                 // Treat uint(int) as a no-op.
2899                 return true;
2900             }
2901             if (outerKind == Type::NumberKind::kFloat) {
2902                 fBuilder.unary_op(BuilderOp::cast_to_float_from_int, c.type().slotCount());
2903                 return true;
2904             }
2905             break;
2906 
2907         case Type::NumberKind::kUnsigned:
2908             if (outerKind == Type::NumberKind::kSigned) {
2909                 // Treat int(uint) as a no-op.
2910                 return true;
2911             }
2912             if (outerKind == Type::NumberKind::kFloat) {
2913                 fBuilder.unary_op(BuilderOp::cast_to_float_from_uint, c.type().slotCount());
2914                 return true;
2915             }
2916             break;
2917 
2918         case Type::NumberKind::kBoolean:
2919             // Converting boolean to int or float can be accomplished via bitwise-and.
2920             if (outerKind == Type::NumberKind::kFloat) {
2921                 fBuilder.push_constant_f(1.0f);
2922             } else if (outerKind == Type::NumberKind::kSigned ||
2923                        outerKind == Type::NumberKind::kUnsigned) {
2924                 fBuilder.push_constant_i(1);
2925             } else {
2926                 SkDEBUGFAILF("unexpected cast from bool to %s", c.type().description().c_str());
2927                 return unsupported();
2928             }
2929             fBuilder.push_duplicates(c.type().slotCount() - 1);
2930             fBuilder.binary_op(BuilderOp::bitwise_and_n_ints, c.type().slotCount());
2931             return true;
2932 
2933         case Type::NumberKind::kFloat:
2934             if (outerKind == Type::NumberKind::kSigned) {
2935                 fBuilder.unary_op(BuilderOp::cast_to_int_from_float, c.type().slotCount());
2936                 return true;
2937             }
2938             if (outerKind == Type::NumberKind::kUnsigned) {
2939                 fBuilder.unary_op(BuilderOp::cast_to_uint_from_float, c.type().slotCount());
2940                 return true;
2941             }
2942             break;
2943 
2944         case Type::NumberKind::kNonnumeric:
2945             break;
2946     }
2947 
2948     if (outerKind == Type::NumberKind::kBoolean) {
2949         // Converting int or float to boolean can be accomplished via `notEqual(x, 0)`.
2950         fBuilder.push_zeros(c.type().slotCount());
2951         return this->binaryOp(inner.type(), kNotEqualOps);
2952     }
2953 
2954     SkDEBUGFAILF("unexpected cast from %s to %s",
2955                  c.type().description().c_str(), inner.type().description().c_str());
2956     return unsupported();
2957 }
2958 
pushConstructorDiagonalMatrix(const ConstructorDiagonalMatrix & c)2959 bool Generator::pushConstructorDiagonalMatrix(const ConstructorDiagonalMatrix& c) {
2960     if (this->pushImmutableData(c)) {
2961         return true;
2962     }
2963     fBuilder.push_zeros(1);
2964     if (!this->pushExpression(*c.argument())) {
2965         return unsupported();
2966     }
2967     fBuilder.diagonal_matrix(c.type().columns(), c.type().rows());
2968 
2969     return true;
2970 }
2971 
pushConstructorMatrixResize(const ConstructorMatrixResize & c)2972 bool Generator::pushConstructorMatrixResize(const ConstructorMatrixResize& c) {
2973     if (!this->pushExpression(*c.argument())) {
2974         return unsupported();
2975     }
2976     fBuilder.matrix_resize(c.argument()->type().columns(),
2977                            c.argument()->type().rows(),
2978                            c.type().columns(),
2979                            c.type().rows());
2980     return true;
2981 }
2982 
pushConstructorSplat(const ConstructorSplat & c)2983 bool Generator::pushConstructorSplat(const ConstructorSplat& c) {
2984     if (!this->pushExpression(*c.argument())) {
2985         return unsupported();
2986     }
2987     fBuilder.push_duplicates(c.type().slotCount() - 1);
2988     return true;
2989 }
2990 
pushFieldAccess(const FieldAccess & f)2991 bool Generator::pushFieldAccess(const FieldAccess& f) {
2992     // If possible, get direct field access via the lvalue.
2993     std::unique_ptr<LValue> lvalue = this->makeLValue(f, /*allowScratch=*/true);
2994     return lvalue && this->push(*lvalue);
2995 }
2996 
pushFunctionCall(const FunctionCall & c)2997 bool Generator::pushFunctionCall(const FunctionCall& c) {
2998     if (c.function().isIntrinsic()) {
2999         return this->pushIntrinsic(c);
3000     }
3001 
3002     // Keep track of the current function.
3003     const FunctionDefinition* lastFunction = fCurrentFunction;
3004     fCurrentFunction = c.function().definition();
3005 
3006     // Skip over the function body entirely if there are no active lanes.
3007     // (If the function call was trivial, it would likely have been inlined in the frontend, so we
3008     // assume here that function calls generally represent a significant amount of work.)
3009     int skipLabelID = fBuilder.nextLabelID();
3010     fBuilder.branch_if_no_lanes_active(skipLabelID);
3011 
3012     // Emit the function body.
3013     std::optional<SlotRange> r = this->writeFunction(c, *fCurrentFunction, c.arguments());
3014     if (!r.has_value()) {
3015         return unsupported();
3016     }
3017 
3018     // If the function uses result slots, move its result from slots onto the stack.
3019     if (this->needsFunctionResultSlots(fCurrentFunction)) {
3020         fBuilder.push_slots(*r);
3021     }
3022 
3023     // We've returned back to the last function.
3024     fCurrentFunction = lastFunction;
3025 
3026     // Copy the function result from its slots onto the stack.
3027     fBuilder.label(skipLabelID);
3028     return true;
3029 }
3030 
pushIndexExpression(const IndexExpression & i)3031 bool Generator::pushIndexExpression(const IndexExpression& i) {
3032     std::unique_ptr<LValue> lvalue = this->makeLValue(i, /*allowScratch=*/true);
3033     return lvalue && this->push(*lvalue);
3034 }
3035 
pushIntrinsic(const FunctionCall & c)3036 bool Generator::pushIntrinsic(const FunctionCall& c) {
3037     const ExpressionArray& args = c.arguments();
3038     switch (args.size()) {
3039         case 1:
3040             return this->pushIntrinsic(c.function().intrinsicKind(), *args[0]);
3041 
3042         case 2:
3043             return this->pushIntrinsic(c.function().intrinsicKind(), *args[0], *args[1]);
3044 
3045         case 3:
3046             return this->pushIntrinsic(c.function().intrinsicKind(), *args[0], *args[1], *args[2]);
3047 
3048         default:
3049             break;
3050     }
3051 
3052     return unsupported();
3053 }
3054 
pushLengthIntrinsic(int slotCount)3055 bool Generator::pushLengthIntrinsic(int slotCount) {
3056     if (slotCount == 1) {
3057         // `length(scalar)` is `sqrt(x^2)`, which is equivalent to `abs(x)`.
3058         return this->pushAbsFloatIntrinsic(/*slots=*/1);
3059     }
3060     // Implement `length(vec)` as `sqrt(dot(x, x))`.
3061     fBuilder.push_clone(slotCount);
3062     fBuilder.dot_floats(slotCount);
3063     fBuilder.unary_op(BuilderOp::sqrt_float, 1);
3064     return true;
3065 }
3066 
pushAbsFloatIntrinsic(int slots)3067 bool Generator::pushAbsFloatIntrinsic(int slots) {
3068     // Perform abs(float) by masking off the sign bit.
3069     fBuilder.push_constant_u(0x7FFFFFFF, slots);
3070     fBuilder.binary_op(BuilderOp::bitwise_and_n_ints, slots);
3071     return true;
3072 }
3073 
pushVectorizedExpression(const Expression & expr,const Type & vectorType)3074 bool Generator::pushVectorizedExpression(const Expression& expr, const Type& vectorType) {
3075     if (!this->pushExpression(expr)) {
3076         return unsupported();
3077     }
3078     if (vectorType.slotCount() > expr.type().slotCount()) {
3079         SkASSERT(expr.type().slotCount() == 1);
3080         fBuilder.push_duplicates(vectorType.slotCount() - expr.type().slotCount());
3081     }
3082     return true;
3083 }
3084 
pushIntrinsic(const TypedOps & ops,const Expression & arg0)3085 bool Generator::pushIntrinsic(const TypedOps& ops, const Expression& arg0) {
3086     if (!this->pushExpression(arg0)) {
3087         return unsupported();
3088     }
3089     return this->unaryOp(arg0.type(), ops);
3090 }
3091 
pushIntrinsic(BuilderOp builderOp,const Expression & arg0)3092 bool Generator::pushIntrinsic(BuilderOp builderOp, const Expression& arg0) {
3093     if (!this->pushExpression(arg0)) {
3094         return unsupported();
3095     }
3096     fBuilder.unary_op(builderOp, arg0.type().slotCount());
3097     return true;
3098 }
3099 
pushIntrinsic(IntrinsicKind intrinsic,const Expression & arg0)3100 bool Generator::pushIntrinsic(IntrinsicKind intrinsic, const Expression& arg0) {
3101     switch (intrinsic) {
3102         case IntrinsicKind::k_abs_IntrinsicKind:
3103             if (arg0.type().componentType().isFloat()) {
3104                 // Perform abs(float) by masking off the sign bit.
3105                 if (!this->pushExpression(arg0)) {
3106                     return unsupported();
3107                 }
3108                 return this->pushAbsFloatIntrinsic(arg0.type().slotCount());
3109             }
3110             // We have a dedicated op for abs(int).
3111             return this->pushIntrinsic(BuilderOp::abs_int, arg0);
3112 
3113         case IntrinsicKind::k_any_IntrinsicKind:
3114             if (!this->pushExpression(arg0)) {
3115                 return unsupported();
3116             }
3117             this->foldWithMultiOp(BuilderOp::bitwise_or_n_ints, arg0.type().slotCount());
3118             return true;
3119 
3120         case IntrinsicKind::k_all_IntrinsicKind:
3121             if (!this->pushExpression(arg0)) {
3122                 return unsupported();
3123             }
3124             this->foldWithMultiOp(BuilderOp::bitwise_and_n_ints, arg0.type().slotCount());
3125             return true;
3126 
3127         case IntrinsicKind::k_acos_IntrinsicKind:
3128             return this->pushIntrinsic(BuilderOp::acos_float, arg0);
3129 
3130         case IntrinsicKind::k_asin_IntrinsicKind:
3131             return this->pushIntrinsic(BuilderOp::asin_float, arg0);
3132 
3133         case IntrinsicKind::k_atan_IntrinsicKind:
3134             return this->pushIntrinsic(BuilderOp::atan_float, arg0);
3135 
3136         case IntrinsicKind::k_ceil_IntrinsicKind:
3137             return this->pushIntrinsic(BuilderOp::ceil_float, arg0);
3138 
3139         case IntrinsicKind::k_cos_IntrinsicKind:
3140             return this->pushIntrinsic(BuilderOp::cos_float, arg0);
3141 
3142         case IntrinsicKind::k_degrees_IntrinsicKind: {
3143             Literal lit180OverPi{Position{}, 57.2957795131f, &arg0.type().componentType()};
3144             return this->pushBinaryExpression(arg0, OperatorKind::STAR, lit180OverPi);
3145         }
3146         case IntrinsicKind::k_floatBitsToInt_IntrinsicKind:
3147         case IntrinsicKind::k_floatBitsToUint_IntrinsicKind:
3148         case IntrinsicKind::k_intBitsToFloat_IntrinsicKind:
3149         case IntrinsicKind::k_uintBitsToFloat_IntrinsicKind:
3150             return this->pushExpression(arg0);
3151 
3152         case IntrinsicKind::k_exp_IntrinsicKind:
3153             return this->pushIntrinsic(BuilderOp::exp_float, arg0);
3154 
3155         case IntrinsicKind::k_exp2_IntrinsicKind:
3156             return this->pushIntrinsic(BuilderOp::exp2_float, arg0);
3157 
3158         case IntrinsicKind::k_floor_IntrinsicKind:
3159             return this->pushIntrinsic(BuilderOp::floor_float, arg0);
3160 
3161         case IntrinsicKind::k_fract_IntrinsicKind:
3162             // Implement fract as `x - floor(x)`.
3163             if (!this->pushExpression(arg0)) {
3164                 return unsupported();
3165             }
3166             fBuilder.push_clone(arg0.type().slotCount());
3167             fBuilder.unary_op(BuilderOp::floor_float, arg0.type().slotCount());
3168             return this->binaryOp(arg0.type(), kSubtractOps);
3169 
3170         case IntrinsicKind::k_inverse_IntrinsicKind:
3171             SkASSERT(arg0.type().isMatrix());
3172             SkASSERT(arg0.type().rows() == arg0.type().columns());
3173             if (!this->pushExpression(arg0)) {
3174                 return unsupported();
3175             }
3176             fBuilder.inverse_matrix(arg0.type().rows());
3177             return true;
3178 
3179         case IntrinsicKind::k_inversesqrt_IntrinsicKind:
3180             return this->pushIntrinsic(kInverseSqrtOps, arg0);
3181 
3182         case IntrinsicKind::k_length_IntrinsicKind:
3183             return this->pushExpression(arg0) &&
3184                    this->pushLengthIntrinsic(arg0.type().slotCount());
3185 
3186         case IntrinsicKind::k_log_IntrinsicKind:
3187             if (!this->pushExpression(arg0)) {
3188                 return unsupported();
3189             }
3190             fBuilder.unary_op(BuilderOp::log_float, arg0.type().slotCount());
3191             return true;
3192 
3193         case IntrinsicKind::k_log2_IntrinsicKind:
3194             if (!this->pushExpression(arg0)) {
3195                 return unsupported();
3196             }
3197             fBuilder.unary_op(BuilderOp::log2_float, arg0.type().slotCount());
3198             return true;
3199 
3200         case IntrinsicKind::k_normalize_IntrinsicKind: {
3201             // Implement normalize as `x / length(x)`. First, push the expression.
3202             if (!this->pushExpression(arg0)) {
3203                 return unsupported();
3204             }
3205             int slotCount = arg0.type().slotCount();
3206             if (slotCount > 1) {
3207 #if defined(SK_USE_RSQRT_IN_RP_NORMALIZE)
3208                 // Instead of `x / sqrt(dot(x, x))`, we can get roughly the same result in less time
3209                 // by computing `x * invsqrt(dot(x, x))`.
3210                 fBuilder.push_clone(slotCount);
3211                 fBuilder.push_clone(slotCount);
3212                 fBuilder.dot_floats(slotCount);
3213 
3214                 // Compute `vec(inversesqrt(dot(x, x)))`.
3215                 fBuilder.unary_op(BuilderOp::invsqrt_float, 1);
3216                 fBuilder.push_duplicates(slotCount - 1);
3217 
3218                 // Return `x * vec(inversesqrt(dot(x, x)))`.
3219                 return this->binaryOp(arg0.type(), kMultiplyOps);
3220 #else
3221                 // TODO: We can get roughly the same result in less time by using `invsqrt`, but
3222                 // that leads to more variance across architectures, which Chromium layout tests do
3223                 // not handle nicely.
3224                 fBuilder.push_clone(slotCount);
3225                 fBuilder.push_clone(slotCount);
3226                 fBuilder.dot_floats(slotCount);
3227 
3228                 // Compute `vec(sqrt(dot(x, x)))`.
3229                 fBuilder.unary_op(BuilderOp::sqrt_float, 1);
3230                 fBuilder.push_duplicates(slotCount - 1);
3231 
3232                 // Return `x / vec(sqrt(dot(x, x)))`.
3233                 return this->binaryOp(arg0.type(), kDivideOps);
3234 #endif
3235             } else {
3236                 // For single-slot normalization, we can simplify `sqrt(x * x)` into `abs(x)`.
3237                 fBuilder.push_clone(slotCount);
3238                 return this->pushAbsFloatIntrinsic(/*slots=*/1) &&
3239                        this->binaryOp(arg0.type(), kDivideOps);
3240             }
3241         }
3242         case IntrinsicKind::k_not_IntrinsicKind:
3243             return this->pushPrefixExpression(OperatorKind::LOGICALNOT, arg0);
3244 
3245         case IntrinsicKind::k_radians_IntrinsicKind: {
3246             Literal litPiOver180{Position{}, 0.01745329251f, &arg0.type().componentType()};
3247             return this->pushBinaryExpression(arg0, OperatorKind::STAR, litPiOver180);
3248         }
3249         case IntrinsicKind::k_saturate_IntrinsicKind: {
3250             // Implement saturate as clamp(arg, 0, 1).
3251             Literal zeroLiteral{Position{}, 0.0, &arg0.type().componentType()};
3252             Literal oneLiteral{Position{}, 1.0, &arg0.type().componentType()};
3253             return this->pushIntrinsic(k_clamp_IntrinsicKind, arg0, zeroLiteral, oneLiteral);
3254         }
3255         case IntrinsicKind::k_sign_IntrinsicKind: {
3256             // Implement floating-point sign() as `clamp(arg * FLT_MAX, -1, 1)`.
3257             // FLT_MIN * FLT_MAX evaluates to 4, so multiplying any float value against FLT_MAX is
3258             // sufficient to ensure that |value| is always 1 or greater (excluding zero and nan).
3259             // Integer sign() doesn't need to worry about fractional values or nans, and can simply
3260             // be `clamp(arg, -1, 1)`.
3261             if (!this->pushExpression(arg0)) {
3262                 return unsupported();
3263             }
3264             if (arg0.type().componentType().isFloat()) {
3265                 Literal fltMaxLiteral{Position{}, FLT_MAX, &arg0.type().componentType()};
3266                 if (!this->pushVectorizedExpression(fltMaxLiteral, arg0.type())) {
3267                     return unsupported();
3268                 }
3269                 if (!this->binaryOp(arg0.type(), kMultiplyOps)) {
3270                     return unsupported();
3271                 }
3272             }
3273             Literal neg1Literal{Position{}, -1.0, &arg0.type().componentType()};
3274             if (!this->pushVectorizedExpression(neg1Literal, arg0.type())) {
3275                 return unsupported();
3276             }
3277             if (!this->binaryOp(arg0.type(), kMaxOps)) {
3278                 return unsupported();
3279             }
3280             Literal pos1Literal{Position{}, 1.0, &arg0.type().componentType()};
3281             if (!this->pushVectorizedExpression(pos1Literal, arg0.type())) {
3282                 return unsupported();
3283             }
3284             return this->binaryOp(arg0.type(), kMinOps);
3285         }
3286         case IntrinsicKind::k_sin_IntrinsicKind:
3287             return this->pushIntrinsic(BuilderOp::sin_float, arg0);
3288 
3289         case IntrinsicKind::k_sqrt_IntrinsicKind:
3290             return this->pushIntrinsic(BuilderOp::sqrt_float, arg0);
3291 
3292         case IntrinsicKind::k_tan_IntrinsicKind:
3293             return this->pushIntrinsic(BuilderOp::tan_float, arg0);
3294 
3295         case IntrinsicKind::k_transpose_IntrinsicKind:
3296             SkASSERT(arg0.type().isMatrix());
3297             if (!this->pushExpression(arg0)) {
3298                 return unsupported();
3299             }
3300             fBuilder.transpose(arg0.type().columns(), arg0.type().rows());
3301             return true;
3302 
3303         case IntrinsicKind::k_trunc_IntrinsicKind:
3304             // Implement trunc as `float(int(x))`, since float-to-int rounds toward zero.
3305             if (!this->pushExpression(arg0)) {
3306                 return unsupported();
3307             }
3308             fBuilder.unary_op(BuilderOp::cast_to_int_from_float, arg0.type().slotCount());
3309             fBuilder.unary_op(BuilderOp::cast_to_float_from_int, arg0.type().slotCount());
3310             return true;
3311 
3312         case IntrinsicKind::k_fromLinearSrgb_IntrinsicKind:
3313         case IntrinsicKind::k_toLinearSrgb_IntrinsicKind:
3314             // The argument must be a half3.
3315             SkASSERT(arg0.type().matches(*fContext.fTypes.fHalf3));
3316             if (!this->pushExpression(arg0)) {
3317                 return unsupported();
3318             }
3319 
3320             if (intrinsic == IntrinsicKind::k_fromLinearSrgb_IntrinsicKind) {
3321                 fBuilder.invoke_from_linear_srgb();
3322             } else {
3323                 fBuilder.invoke_to_linear_srgb();
3324             }
3325             return true;
3326 
3327         default:
3328             break;
3329     }
3330     return unsupported();
3331 }
3332 
pushIntrinsic(const TypedOps & ops,const Expression & arg0,const Expression & arg1)3333 bool Generator::pushIntrinsic(const TypedOps& ops, const Expression& arg0, const Expression& arg1) {
3334     if (!this->pushExpression(arg0) || !this->pushVectorizedExpression(arg1, arg0.type())) {
3335         return unsupported();
3336     }
3337     return this->binaryOp(arg0.type(), ops);
3338 }
3339 
pushIntrinsic(BuilderOp builderOp,const Expression & arg0,const Expression & arg1)3340 bool Generator::pushIntrinsic(BuilderOp builderOp, const Expression& arg0, const Expression& arg1) {
3341     if (!this->pushExpression(arg0) || !this->pushVectorizedExpression(arg1, arg0.type())) {
3342         return unsupported();
3343     }
3344     fBuilder.binary_op(builderOp, arg0.type().slotCount());
3345     return true;
3346 }
3347 
pushIntrinsic(IntrinsicKind intrinsic,const Expression & arg0,const Expression & arg1)3348 bool Generator::pushIntrinsic(IntrinsicKind intrinsic,
3349                               const Expression& arg0,
3350                               const Expression& arg1) {
3351     switch (intrinsic) {
3352         case IntrinsicKind::k_atan_IntrinsicKind:
3353             return this->pushIntrinsic(BuilderOp::atan2_n_floats, arg0, arg1);
3354 
3355         case IntrinsicKind::k_cross_IntrinsicKind: {
3356             // Implement cross as `arg0.yzx * arg1.zxy - arg0.zxy * arg1.yzx`. We use two stacks so
3357             // that each subexpression can be multiplied separately.
3358             SkASSERT(arg0.type().matches(arg1.type()));
3359             SkASSERT(arg0.type().slotCount() == 3);
3360             SkASSERT(arg1.type().slotCount() == 3);
3361 
3362             // Push `arg0.yzx` onto this stack and `arg0.zxy` onto a separate subexpression stack.
3363             AutoStack subexpressionStack(this);
3364             subexpressionStack.enter();
3365             if (!this->pushExpression(arg0)) {
3366                 return unsupported();
3367             }
3368             subexpressionStack.exit();
3369             subexpressionStack.pushClone(/*slots=*/3);
3370 
3371             fBuilder.swizzle(/*consumedSlots=*/3, {1, 2, 0});
3372             subexpressionStack.enter();
3373             fBuilder.swizzle(/*consumedSlots=*/3, {2, 0, 1});
3374             subexpressionStack.exit();
3375 
3376             // Push `arg1.zxy` onto this stack and `arg1.yzx` onto the next stack. Perform the
3377             // multiply on each subexpression (`arg0.yzx * arg1.zxy` on the first stack, and
3378             // `arg0.zxy * arg1.yzx` on the next).
3379             subexpressionStack.enter();
3380             if (!this->pushExpression(arg1)) {
3381                 return unsupported();
3382             }
3383             subexpressionStack.exit();
3384             subexpressionStack.pushClone(/*slots=*/3);
3385 
3386             fBuilder.swizzle(/*consumedSlots=*/3, {2, 0, 1});
3387             fBuilder.binary_op(BuilderOp::mul_n_floats, 3);
3388 
3389             subexpressionStack.enter();
3390             fBuilder.swizzle(/*consumedSlots=*/3, {1, 2, 0});
3391             fBuilder.binary_op(BuilderOp::mul_n_floats, 3);
3392             subexpressionStack.exit();
3393 
3394             // Migrate the result of the second subexpression (`arg0.zxy * arg1.yzx`) back onto the
3395             // main stack and subtract it from the first subexpression (`arg0.yzx * arg1.zxy`).
3396             subexpressionStack.pushClone(/*slots=*/3);
3397             fBuilder.binary_op(BuilderOp::sub_n_floats, 3);
3398 
3399             // Now that the calculation is complete, discard the subexpression on the next stack.
3400             subexpressionStack.enter();
3401             this->discardExpression(/*slots=*/3);
3402             subexpressionStack.exit();
3403             return true;
3404         }
3405         case IntrinsicKind::k_distance_IntrinsicKind:
3406             // Implement distance as `length(a - b)`.
3407             SkASSERT(arg0.type().slotCount() == arg1.type().slotCount());
3408             return this->pushBinaryExpression(arg0, OperatorKind::MINUS, arg1) &&
3409                    this->pushLengthIntrinsic(arg0.type().slotCount());
3410 
3411         case IntrinsicKind::k_dot_IntrinsicKind:
3412             SkASSERT(arg0.type().matches(arg1.type()));
3413             if (!this->pushExpression(arg0) || !this->pushExpression(arg1)) {
3414                 return unsupported();
3415             }
3416             fBuilder.dot_floats(arg0.type().slotCount());
3417             return true;
3418 
3419         case IntrinsicKind::k_equal_IntrinsicKind:
3420             SkASSERT(arg0.type().matches(arg1.type()));
3421             return this->pushIntrinsic(kEqualOps, arg0, arg1);
3422 
3423         case IntrinsicKind::k_notEqual_IntrinsicKind:
3424             SkASSERT(arg0.type().matches(arg1.type()));
3425             return this->pushIntrinsic(kNotEqualOps, arg0, arg1);
3426 
3427         case IntrinsicKind::k_lessThan_IntrinsicKind:
3428             SkASSERT(arg0.type().matches(arg1.type()));
3429             return this->pushIntrinsic(kLessThanOps, arg0, arg1);
3430 
3431         case IntrinsicKind::k_greaterThan_IntrinsicKind:
3432             SkASSERT(arg0.type().matches(arg1.type()));
3433             return this->pushIntrinsic(kLessThanOps, arg1, arg0);
3434 
3435         case IntrinsicKind::k_lessThanEqual_IntrinsicKind:
3436             SkASSERT(arg0.type().matches(arg1.type()));
3437             return this->pushIntrinsic(kLessThanEqualOps, arg0, arg1);
3438 
3439         case IntrinsicKind::k_greaterThanEqual_IntrinsicKind:
3440             SkASSERT(arg0.type().matches(arg1.type()));
3441             return this->pushIntrinsic(kLessThanEqualOps, arg1, arg0);
3442 
3443         case IntrinsicKind::k_min_IntrinsicKind:
3444             SkASSERT(arg0.type().componentType().matches(arg1.type().componentType()));
3445             return this->pushIntrinsic(kMinOps, arg0, arg1);
3446 
3447         case IntrinsicKind::k_matrixCompMult_IntrinsicKind:
3448             SkASSERT(arg0.type().matches(arg1.type()));
3449             return this->pushIntrinsic(kMultiplyOps, arg0, arg1);
3450 
3451         case IntrinsicKind::k_max_IntrinsicKind:
3452             SkASSERT(arg0.type().componentType().matches(arg1.type().componentType()));
3453             return this->pushIntrinsic(kMaxOps, arg0, arg1);
3454 
3455         case IntrinsicKind::k_mod_IntrinsicKind:
3456             SkASSERT(arg0.type().componentType().matches(arg1.type().componentType()));
3457             return this->pushIntrinsic(kModOps, arg0, arg1);
3458 
3459         case IntrinsicKind::k_pow_IntrinsicKind:
3460             SkASSERT(arg0.type().matches(arg1.type()));
3461             return this->pushIntrinsic(BuilderOp::pow_n_floats, arg0, arg1);
3462 
3463         case IntrinsicKind::k_reflect_IntrinsicKind: {
3464             // Implement reflect as `I - (N * dot(I,N) * 2)`.
3465             SkASSERT(arg0.type().matches(arg1.type()));
3466             SkASSERT(arg0.type().slotCount() == arg1.type().slotCount());
3467             SkASSERT(arg0.type().componentType().isFloat());
3468             int slotCount = arg0.type().slotCount();
3469 
3470             // Stack: I, N.
3471             if (!this->pushExpression(arg0) || !this->pushExpression(arg1)) {
3472                 return unsupported();
3473             }
3474             // Stack: I, N, I, N.
3475             fBuilder.push_clone(2 * slotCount);
3476             // Stack: I, N, dot(I,N)
3477             fBuilder.dot_floats(slotCount);
3478             // Stack: I, N, dot(I,N), 2
3479             fBuilder.push_constant_f(2.0);
3480             // Stack: I, N, dot(I,N) * 2
3481             fBuilder.binary_op(BuilderOp::mul_n_floats, 1);
3482             // Stack: I, N * dot(I,N) * 2
3483             fBuilder.push_duplicates(slotCount - 1);
3484             fBuilder.binary_op(BuilderOp::mul_n_floats, slotCount);
3485             // Stack: I - (N * dot(I,N) * 2)
3486             fBuilder.binary_op(BuilderOp::sub_n_floats, slotCount);
3487             return true;
3488         }
3489         case IntrinsicKind::k_step_IntrinsicKind: {
3490             // Compute step as `float(lessThanEqual(edge, x))`. We convert from boolean 0/~0 to
3491             // floating point zero/one by using a bitwise-and against the bit-pattern of 1.0.
3492             SkASSERT(arg0.type().componentType().matches(arg1.type().componentType()));
3493             if (!this->pushVectorizedExpression(arg0, arg1.type()) || !this->pushExpression(arg1)) {
3494                 return unsupported();
3495             }
3496             if (!this->binaryOp(arg1.type(), kLessThanEqualOps)) {
3497                 return unsupported();
3498             }
3499             Literal pos1Literal{Position{}, 1.0, &arg1.type().componentType()};
3500             if (!this->pushVectorizedExpression(pos1Literal, arg1.type())) {
3501                 return unsupported();
3502             }
3503             fBuilder.binary_op(BuilderOp::bitwise_and_n_ints, arg1.type().slotCount());
3504             return true;
3505         }
3506 
3507         default:
3508             break;
3509     }
3510     return unsupported();
3511 }
3512 
pushIntrinsic(IntrinsicKind intrinsic,const Expression & arg0,const Expression & arg1,const Expression & arg2)3513 bool Generator::pushIntrinsic(IntrinsicKind intrinsic,
3514                               const Expression& arg0,
3515                               const Expression& arg1,
3516                               const Expression& arg2) {
3517     switch (intrinsic) {
3518         case IntrinsicKind::k_clamp_IntrinsicKind:
3519             // Implement clamp as min(max(arg, low), high).
3520             SkASSERT(arg0.type().componentType().matches(arg1.type().componentType()));
3521             SkASSERT(arg0.type().componentType().matches(arg2.type().componentType()));
3522             if (!this->pushExpression(arg0) || !this->pushVectorizedExpression(arg1, arg0.type())) {
3523                 return unsupported();
3524             }
3525             if (!this->binaryOp(arg0.type(), kMaxOps)) {
3526                 return unsupported();
3527             }
3528             if (!this->pushVectorizedExpression(arg2, arg0.type())) {
3529                 return unsupported();
3530             }
3531             if (!this->binaryOp(arg0.type(), kMinOps)) {
3532                 return unsupported();
3533             }
3534             return true;
3535 
3536         case IntrinsicKind::k_faceforward_IntrinsicKind: {
3537             // Implement faceforward as `N ^ ((0 <= dot(I, NRef)) & 0x80000000)`.
3538             // In other words, flip the sign bit of N if `0 <= dot(I, NRef)`.
3539             SkASSERT(arg0.type().matches(arg1.type()));
3540             SkASSERT(arg0.type().matches(arg2.type()));
3541             int slotCount = arg0.type().slotCount();
3542 
3543             // Stack: N, 0, I, Nref
3544             if (!this->pushExpression(arg0)) {
3545                 return unsupported();
3546             }
3547             fBuilder.push_constant_f(0.0);
3548             if (!this->pushExpression(arg1) || !this->pushExpression(arg2)) {
3549                 return unsupported();
3550             }
3551             // Stack: N, 0, dot(I,NRef)
3552             fBuilder.dot_floats(slotCount);
3553             // Stack: N, (0 <= dot(I,NRef))
3554             fBuilder.binary_op(BuilderOp::cmple_n_floats, 1);
3555             // Stack: N, (0 <= dot(I,NRef)), 0x80000000
3556             fBuilder.push_constant_u(0x80000000);
3557             // Stack: N, (0 <= dot(I,NRef)) & 0x80000000)
3558             fBuilder.binary_op(BuilderOp::bitwise_and_n_ints, 1);
3559             // Stack: N, vec(0 <= dot(I,NRef)) & 0x80000000)
3560             fBuilder.push_duplicates(slotCount - 1);
3561             // Stack: N ^ vec((0 <= dot(I,NRef)) & 0x80000000)
3562             fBuilder.binary_op(BuilderOp::bitwise_xor_n_ints, slotCount);
3563             return true;
3564         }
3565         case IntrinsicKind::k_mix_IntrinsicKind:
3566             // Note: our SkRP mix op takes the interpolation point first, not the interpolants.
3567             SkASSERT(arg0.type().matches(arg1.type()));
3568             if (arg2.type().componentType().isFloat()) {
3569                 SkASSERT(arg0.type().componentType().matches(arg2.type().componentType()));
3570                 if (!this->pushVectorizedExpression(arg2, arg0.type())) {
3571                     return unsupported();
3572                 }
3573                 if (!this->pushExpression(arg0) || !this->pushExpression(arg1)) {
3574                     return unsupported();
3575                 }
3576                 return this->ternaryOp(arg0.type(), kMixOps);
3577             }
3578             if (arg2.type().componentType().isBoolean()) {
3579                 if (!this->pushExpression(arg2)) {
3580                     return unsupported();
3581                 }
3582                 if (!this->pushExpression(arg0) || !this->pushExpression(arg1)) {
3583                     return unsupported();
3584                 }
3585                 // The `mix_int` op isn't doing a lerp; it uses the third argument to select values
3586                 // from the first and second arguments. It's safe for use with any type in arguments
3587                 // 0 and 1.
3588                 fBuilder.ternary_op(BuilderOp::mix_n_ints, arg0.type().slotCount());
3589                 return true;
3590             }
3591             return unsupported();
3592 
3593         case IntrinsicKind::k_refract_IntrinsicKind: {
3594             // We always calculate refraction using vec4s, so we pad out unused N/I slots with zero.
3595             int padding = 4 - arg0.type().slotCount();
3596             if (!this->pushExpression(arg0)) {
3597                 return unsupported();
3598             }
3599             fBuilder.push_zeros(padding);
3600 
3601             if (!this->pushExpression(arg1)) {
3602                 return unsupported();
3603             }
3604             fBuilder.push_zeros(padding);
3605 
3606             // eta is always a scalar and doesn't need padding.
3607             if (!this->pushExpression(arg2)) {
3608                 return unsupported();
3609             }
3610             fBuilder.refract_floats();
3611 
3612             // The result vector was returned as a vec4, so discard the extra columns.
3613             fBuilder.discard_stack(padding);
3614             return true;
3615         }
3616         case IntrinsicKind::k_smoothstep_IntrinsicKind:
3617             SkASSERT(arg0.type().componentType().isFloat());
3618             SkASSERT(arg1.type().matches(arg0.type()));
3619             SkASSERT(arg2.type().componentType().isFloat());
3620 
3621             if (!this->pushVectorizedExpression(arg0, arg2.type()) ||
3622                 !this->pushVectorizedExpression(arg1, arg2.type()) ||
3623                 !this->pushExpression(arg2)) {
3624                 return unsupported();
3625             }
3626             fBuilder.ternary_op(BuilderOp::smoothstep_n_floats, arg2.type().slotCount());
3627             return true;
3628 
3629         default:
3630             break;
3631     }
3632     return unsupported();
3633 }
3634 
pushLiteral(const Literal & l)3635 bool Generator::pushLiteral(const Literal& l) {
3636     switch (l.type().numberKind()) {
3637         case Type::NumberKind::kFloat:
3638             fBuilder.push_constant_f(l.floatValue());
3639             return true;
3640 
3641         case Type::NumberKind::kSigned:
3642             fBuilder.push_constant_i(l.intValue());
3643             return true;
3644 
3645         case Type::NumberKind::kUnsigned:
3646             fBuilder.push_constant_u(l.intValue());
3647             return true;
3648 
3649         case Type::NumberKind::kBoolean:
3650             fBuilder.push_constant_i(l.boolValue() ? ~0 : 0);
3651             return true;
3652 
3653         default:
3654             SkUNREACHABLE;
3655     }
3656 }
3657 
pushPostfixExpression(const PostfixExpression & p,bool usesResult)3658 bool Generator::pushPostfixExpression(const PostfixExpression& p, bool usesResult) {
3659     // If the result is ignored...
3660     if (!usesResult) {
3661         // ... just emit a prefix expression instead.
3662         return this->pushPrefixExpression(p.getOperator(), *p.operand());
3663     }
3664     // Get the operand as an lvalue, and push it onto the stack as-is.
3665     std::unique_ptr<LValue> lvalue = this->makeLValue(*p.operand());
3666     if (!lvalue || !this->push(*lvalue)) {
3667         return unsupported();
3668     }
3669 
3670     // Push a scratch copy of the operand.
3671     fBuilder.push_clone(p.type().slotCount());
3672 
3673     // Increment or decrement the scratch copy by one.
3674     Literal oneLiteral{Position{}, 1.0, &p.type().componentType()};
3675     if (!this->pushVectorizedExpression(oneLiteral, p.type())) {
3676         return unsupported();
3677     }
3678 
3679     switch (p.getOperator().kind()) {
3680         case OperatorKind::PLUSPLUS:
3681             if (!this->binaryOp(p.type(), kAddOps)) {
3682                 return unsupported();
3683             }
3684             break;
3685 
3686         case OperatorKind::MINUSMINUS:
3687             if (!this->binaryOp(p.type(), kSubtractOps)) {
3688                 return unsupported();
3689             }
3690             break;
3691 
3692         default:
3693             SkUNREACHABLE;
3694     }
3695 
3696     // Write the new value back to the operand.
3697     if (!this->store(*lvalue)) {
3698         return unsupported();
3699     }
3700 
3701     // Discard the scratch copy, leaving only the original value as-is.
3702     this->discardExpression(p.type().slotCount());
3703     return true;
3704 }
3705 
pushPrefixExpression(const PrefixExpression & p)3706 bool Generator::pushPrefixExpression(const PrefixExpression& p) {
3707     return this->pushPrefixExpression(p.getOperator(), *p.operand());
3708 }
3709 
pushPrefixExpression(Operator op,const Expression & expr)3710 bool Generator::pushPrefixExpression(Operator op, const Expression& expr) {
3711     switch (op.kind()) {
3712         case OperatorKind::BITWISENOT:
3713         case OperatorKind::LOGICALNOT:
3714             // Handle operators ! and ~.
3715             if (!this->pushExpression(expr)) {
3716                 return unsupported();
3717             }
3718             fBuilder.push_constant_u(~0, expr.type().slotCount());
3719             fBuilder.binary_op(BuilderOp::bitwise_xor_n_ints, expr.type().slotCount());
3720             return true;
3721 
3722         case OperatorKind::MINUS: {
3723             if (!this->pushExpression(expr)) {
3724                 return unsupported();
3725             }
3726             if (expr.type().componentType().isFloat()) {
3727                 // Handle float negation as an integer `x ^ 0x80000000`. This toggles the sign bit.
3728                 fBuilder.push_constant_u(0x80000000, expr.type().slotCount());
3729                 fBuilder.binary_op(BuilderOp::bitwise_xor_n_ints, expr.type().slotCount());
3730             } else {
3731                 // Handle integer negation as a componentwise `expr * -1`.
3732                 fBuilder.push_constant_i(-1, expr.type().slotCount());
3733                 fBuilder.binary_op(BuilderOp::mul_n_ints, expr.type().slotCount());
3734             }
3735             return true;
3736         }
3737         case OperatorKind::PLUSPLUS: {
3738             // Rewrite as `expr += 1`.
3739             Literal oneLiteral{Position{}, 1.0, &expr.type().componentType()};
3740             return this->pushBinaryExpression(expr, OperatorKind::PLUSEQ, oneLiteral);
3741         }
3742         case OperatorKind::MINUSMINUS: {
3743             // Rewrite as `expr += -1`.
3744             Literal minusOneLiteral{expr.fPosition, -1.0, &expr.type().componentType()};
3745             return this->pushBinaryExpression(expr, OperatorKind::PLUSEQ, minusOneLiteral);
3746         }
3747         default:
3748             break;
3749     }
3750 
3751     return unsupported();
3752 }
3753 
pushSwizzle(const Swizzle & s)3754 bool Generator::pushSwizzle(const Swizzle& s) {
3755     SkASSERT(!s.components().empty() && s.components().size() <= 4);
3756 
3757     // If this is a simple subset of a variable's slots...
3758     bool isSimpleSubset = is_sliceable_swizzle(s.components());
3759     if (isSimpleSubset && s.base()->is<VariableReference>()) {
3760         // ... we can just push part of the variable directly onto the stack, rather than pushing
3761         // the whole expression and then immediately cutting it down. (Either way works, but this
3762         // saves a step.)
3763         return this->pushVariableReferencePartial(
3764                 s.base()->as<VariableReference>(),
3765                 SlotRange{/*index=*/s.components()[0], /*count=*/s.components().size()});
3766     }
3767     // Push the base expression.
3768     if (!this->pushExpression(*s.base())) {
3769         return false;
3770     }
3771     // An identity swizzle doesn't rearrange the data; it just (potentially) discards tail elements.
3772     if (isSimpleSubset && s.components()[0] == 0) {
3773         int discardedElements = s.base()->type().slotCount() - s.components().size();
3774         SkASSERT(discardedElements >= 0);
3775         fBuilder.discard_stack(discardedElements);
3776         return true;
3777     }
3778     // Perform the swizzle.
3779     fBuilder.swizzle(s.base()->type().slotCount(), s.components());
3780     return true;
3781 }
3782 
pushTernaryExpression(const TernaryExpression & t)3783 bool Generator::pushTernaryExpression(const TernaryExpression& t) {
3784     return this->pushTernaryExpression(*t.test(), *t.ifTrue(), *t.ifFalse());
3785 }
3786 
pushDynamicallyUniformTernaryExpression(const Expression & test,const Expression & ifTrue,const Expression & ifFalse)3787 bool Generator::pushDynamicallyUniformTernaryExpression(const Expression& test,
3788                                                         const Expression& ifTrue,
3789                                                         const Expression& ifFalse) {
3790     SkASSERT(Analysis::IsDynamicallyUniformExpression(test));
3791 
3792     int falseLabelID = fBuilder.nextLabelID();
3793     int exitLabelID = fBuilder.nextLabelID();
3794 
3795     // First, push the test-expression into a separate stack.
3796     AutoStack testStack(this);
3797     testStack.enter();
3798     if (!this->pushExpression(test)) {
3799         return unsupported();
3800     }
3801 
3802     // Branch to the true- or false-expression based on the test-expression. We can skip the
3803     // non-true path entirely since the test is known to be uniform.
3804     fBuilder.branch_if_no_active_lanes_on_stack_top_equal(~0, falseLabelID);
3805     testStack.exit();
3806 
3807     if (!this->pushExpression(ifTrue)) {
3808         return unsupported();
3809     }
3810 
3811     fBuilder.jump(exitLabelID);
3812 
3813     // The builder doesn't understand control flow, and assumes that every push moves the stack-top
3814     // forwards. We need to manually balance out the `pushExpression` from the if-true path by
3815     // moving the stack position backwards, so that the if-false path pushes its expression into the
3816     // same as the if-true result.
3817     this->discardExpression(/*slots=*/ifTrue.type().slotCount());
3818 
3819     fBuilder.label(falseLabelID);
3820 
3821     if (!this->pushExpression(ifFalse)) {
3822         return unsupported();
3823     }
3824 
3825     fBuilder.label(exitLabelID);
3826 
3827     // Jettison the text-expression from the separate stack.
3828     testStack.enter();
3829     this->discardExpression(/*slots=*/1);
3830     testStack.exit();
3831     return true;
3832 }
3833 
pushTernaryExpression(const Expression & test,const Expression & ifTrue,const Expression & ifFalse)3834 bool Generator::pushTernaryExpression(const Expression& test,
3835                                       const Expression& ifTrue,
3836                                       const Expression& ifFalse) {
3837     // If the test-expression is dynamically-uniform, we can skip over the non-true expressions
3838     // entirely, and not need to involve the condition mask.
3839     if (Analysis::IsDynamicallyUniformExpression(test)) {
3840         return this->pushDynamicallyUniformTernaryExpression(test, ifTrue, ifFalse);
3841     }
3842 
3843     // Analyze the ternary to see which corners we can safely cut.
3844     bool ifFalseHasSideEffects = Analysis::HasSideEffects(ifFalse);
3845     bool ifTrueHasSideEffects  = Analysis::HasSideEffects(ifTrue);
3846     bool ifTrueIsTrivial       = Analysis::IsTrivialExpression(ifTrue);
3847     int  cleanupLabelID        = fBuilder.nextLabelID();
3848 
3849     // If the true- and false-expressions both lack side effects, we evaluate both of them safely
3850     // without masking off their effects. In that case, we can emit both sides and use boolean mix
3851     // to select the correct result without using the condition mask at all.
3852     if (!ifFalseHasSideEffects && !ifTrueHasSideEffects && ifTrueIsTrivial) {
3853         // Push all of the arguments to mix.
3854         if (!this->pushVectorizedExpression(test, ifTrue.type())) {
3855             return unsupported();
3856         }
3857         if (!this->pushExpression(ifFalse)) {
3858             return unsupported();
3859         }
3860         if (!this->pushExpression(ifTrue)) {
3861             return unsupported();
3862         }
3863         // Use boolean mix to select the true- or false-expression via the test-expression.
3864         fBuilder.ternary_op(BuilderOp::mix_n_ints, ifTrue.type().slotCount());
3865         return true;
3866     }
3867 
3868     // First, push the current condition-mask and the test-expression into a separate stack.
3869     fBuilder.enableExecutionMaskWrites();
3870     AutoStack testStack(this);
3871     testStack.enter();
3872     fBuilder.push_condition_mask();
3873     if (!this->pushExpression(test)) {
3874         return unsupported();
3875     }
3876     testStack.exit();
3877 
3878     // We can take some shortcuts with condition-mask handling if the false-expression is entirely
3879     // side-effect free. (We can evaluate it without masking off its effects.) We always handle the
3880     // condition mask properly for the test-expression and true-expression properly.
3881     if (!ifFalseHasSideEffects) {
3882         // Push the false-expression onto the primary stack.
3883         if (!this->pushExpression(ifFalse)) {
3884             return unsupported();
3885         }
3886 
3887         // Next, merge the condition mask (on the separate stack) with the test expression.
3888         testStack.enter();
3889         fBuilder.merge_condition_mask();
3890         testStack.exit();
3891 
3892         // If no lanes are active, we can skip the true-expression entirely. This isn't super likely
3893         // to happen, so it's probably only a win for non-trivial true-expressions.
3894         if (!ifTrueIsTrivial) {
3895             fBuilder.branch_if_no_lanes_active(cleanupLabelID);
3896         }
3897 
3898         // Push the true-expression onto the primary stack, immediately after the false-expression.
3899         if (!this->pushExpression(ifTrue)) {
3900             return unsupported();
3901         }
3902 
3903         // Use a select to conditionally mask-merge the true-expression and false-expression lanes.
3904         fBuilder.select(/*slots=*/ifTrue.type().slotCount());
3905         fBuilder.label(cleanupLabelID);
3906     } else {
3907         // Merge the condition mask (on the separate stack) with the test expression.
3908         testStack.enter();
3909         fBuilder.merge_condition_mask();
3910         testStack.exit();
3911 
3912         // Push the true-expression onto the primary stack.
3913         if (!this->pushExpression(ifTrue)) {
3914             return unsupported();
3915         }
3916 
3917         // Switch back to the test-expression stack and apply the inverted test condition.
3918         testStack.enter();
3919         fBuilder.merge_inv_condition_mask();
3920         testStack.exit();
3921 
3922         // Push the false-expression onto the primary stack, immediately after the true-expression.
3923         if (!this->pushExpression(ifFalse)) {
3924             return unsupported();
3925         }
3926 
3927         // Use a select to conditionally mask-merge the true-expression and false-expression lanes;
3928         // the mask is already set up for this.
3929         fBuilder.select(/*slots=*/ifTrue.type().slotCount());
3930     }
3931 
3932     // Restore the condition-mask to its original state and jettison the test-expression.
3933     testStack.enter();
3934     this->discardExpression(/*slots=*/1);
3935     fBuilder.pop_condition_mask();
3936     testStack.exit();
3937 
3938     fBuilder.disableExecutionMaskWrites();
3939     return true;
3940 }
3941 
pushVariableReference(const VariableReference & var)3942 bool Generator::pushVariableReference(const VariableReference& var) {
3943     // If we are pushing a constant-value variable, push the value directly; literal values are more
3944     // amenable to optimization.
3945     if (var.type().isScalar() || var.type().isVector()) {
3946         if (const Expression* expr = ConstantFolder::GetConstantValueOrNull(var)) {
3947             return this->pushExpression(*expr);
3948         }
3949         if (fImmutableVariables.contains(var.variable())) {
3950             return this->pushExpression(*var.variable()->initialValue());
3951         }
3952     }
3953     return this->pushVariableReferencePartial(var, SlotRange{0, (int)var.type().slotCount()});
3954 }
3955 
pushVariableReferencePartial(const VariableReference & v,SlotRange subset)3956 bool Generator::pushVariableReferencePartial(const VariableReference& v, SlotRange subset) {
3957     const Variable& var = *v.variable();
3958     SlotRange r;
3959     if (IsUniform(var)) {
3960         // Push a uniform.
3961         r = this->getUniformSlots(var);
3962         SkASSERT(r.count == (int)var.type().slotCount());
3963         r.index += subset.index;
3964         r.count = subset.count;
3965         fBuilder.push_uniform(r);
3966     } else if (fImmutableVariables.contains(&var)) {
3967         // If we only need a single slot, we can push a constant. This saves a lookup, and can
3968         // occasionally permit the use of an immediate-mode op.
3969         if (subset.count == 1) {
3970             const Expression& expr = *v.variable()->initialValue();
3971             std::optional<ImmutableBits> bits = this->getImmutableBitsForSlot(expr, subset.index);
3972             if (bits.has_value()) {
3973                 fBuilder.push_constant_i(*bits);
3974                 return true;
3975             }
3976         }
3977         // Push the immutable slot range.
3978         r = this->getImmutableSlots(var);
3979         SkASSERT(r.count == (int)var.type().slotCount());
3980         r.index += subset.index;
3981         r.count = subset.count;
3982         fBuilder.push_immutable(r);
3983     } else {
3984         // Push the variable.
3985         r = this->getVariableSlots(var);
3986         SkASSERT(r.count == (int)var.type().slotCount());
3987         r.index += subset.index;
3988         r.count = subset.count;
3989         fBuilder.push_slots(r);
3990     }
3991     return true;
3992 }
3993 
writeProgram(const FunctionDefinition & function)3994 bool Generator::writeProgram(const FunctionDefinition& function) {
3995     fCurrentFunction = &function;
3996 
3997     if (fDebugTrace) {
3998         // Copy the program source into the debug info so that it will be written in the trace file.
3999         fDebugTrace->setSource(*fProgram.fSource);
4000 
4001         if (fWriteTraceOps) {
4002             // The Raster Pipeline blitter generates centered pixel coordinates. (0.5, 1.5, 2.5,
4003             // etc.) Add 0.5 to the requested trace coordinate to match this, then compare against
4004             // src.rg, which contains the shader's coordinates. We keep this result in a dedicated
4005             // trace-mask stack.
4006             fTraceMask.emplace(this);
4007             fTraceMask->enter();
4008             fBuilder.push_device_xy01();
4009             fBuilder.discard_stack(2);
4010             fBuilder.push_constant_f(fDebugTrace->fTraceCoord.fX + 0.5f);
4011             fBuilder.push_constant_f(fDebugTrace->fTraceCoord.fY + 0.5f);
4012             fBuilder.binary_op(BuilderOp::cmpeq_n_floats, 2);
4013             fBuilder.binary_op(BuilderOp::bitwise_and_n_ints, 1);
4014             fTraceMask->exit();
4015 
4016             // Assemble a position-to-line-number mapping for the debugger.
4017             this->calculateLineOffsets();
4018         }
4019     }
4020 
4021     // Assign slots to the parameters of main; copy src and dst into those slots as appropriate.
4022     const SkSL::Variable* mainCoordsParam = function.declaration().getMainCoordsParameter();
4023     const SkSL::Variable* mainInputColorParam = function.declaration().getMainInputColorParameter();
4024     const SkSL::Variable* mainDestColorParam = function.declaration().getMainDestColorParameter();
4025 
4026     for (const SkSL::Variable* param : function.declaration().parameters()) {
4027         if (param == mainCoordsParam) {
4028             // Coordinates are passed via RG.
4029             SlotRange fragCoord = this->getVariableSlots(*param);
4030             SkASSERT(fragCoord.count == 2);
4031             fBuilder.store_src_rg(fragCoord);
4032         } else if (param == mainInputColorParam) {
4033             // Input colors are passed via RGBA.
4034             SlotRange srcColor = this->getVariableSlots(*param);
4035             SkASSERT(srcColor.count == 4);
4036             fBuilder.store_src(srcColor);
4037         } else if (param == mainDestColorParam) {
4038             // Dest colors are passed via dRGBA.
4039             SlotRange destColor = this->getVariableSlots(*param);
4040             SkASSERT(destColor.count == 4);
4041             fBuilder.store_dst(destColor);
4042         } else {
4043             SkDEBUGFAIL("Invalid parameter to main()");
4044             return unsupported();
4045         }
4046     }
4047 
4048     // Initialize the program.
4049     fBuilder.init_lane_masks();
4050 
4051     // Emit global variables.
4052     if (!this->writeGlobals()) {
4053         return unsupported();
4054     }
4055 
4056     // Invoke main().
4057     std::optional<SlotRange> mainResult = this->writeFunction(function, function, /*arguments=*/{});
4058     if (!mainResult.has_value()) {
4059         return unsupported();
4060     }
4061 
4062     // Move the result of main() from slots into RGBA.
4063     SkASSERT(mainResult->count == 4);
4064     if (this->needsFunctionResultSlots(fCurrentFunction)) {
4065         fBuilder.load_src(*mainResult);
4066     } else {
4067         fBuilder.pop_src_rgba();
4068     }
4069 
4070     // Discard the trace mask.
4071     if (fTraceMask.has_value()) {
4072         fTraceMask->enter();
4073         fBuilder.discard_stack(1);
4074         fTraceMask->exit();
4075     }
4076 
4077     return true;
4078 }
4079 
finish()4080 std::unique_ptr<RP::Program> Generator::finish() {
4081     return fBuilder.finish(fProgramSlots.slotCount(),
4082                            fUniformSlots.slotCount(),
4083                            fImmutableSlots.slotCount(),
4084                            fDebugTrace);
4085 }
4086 
4087 }  // namespace RP
4088 
MakeRasterPipelineProgram(const SkSL::Program & program,const FunctionDefinition & function,DebugTracePriv * debugTrace,bool writeTraceOps)4089 std::unique_ptr<RP::Program> MakeRasterPipelineProgram(const SkSL::Program& program,
4090                                                        const FunctionDefinition& function,
4091                                                        DebugTracePriv* debugTrace,
4092                                                        bool writeTraceOps) {
4093     RP::Generator generator(program, debugTrace, writeTraceOps);
4094     if (!generator.writeProgram(function)) {
4095         return nullptr;
4096     }
4097     return generator.finish();
4098 }
4099 
4100 }  // namespace SkSL
4101