1 /*
2 * Copyright 2022 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "src/sksl/codegen/SkSLRasterPipelineCodeGenerator.h"
9
10 #include "include/core/SkPoint.h"
11 #include "include/core/SkSpan.h"
12 #include "include/private/base/SkTArray.h"
13 #include "include/private/base/SkTo.h"
14 #include "src/base/SkEnumBitMask.h"
15 #include "src/base/SkStringView.h"
16 #include "src/base/SkUtils.h"
17 #include "src/core/SkTHash.h"
18 #include "src/sksl/SkSLAnalysis.h"
19 #include "src/sksl/SkSLBuiltinTypes.h"
20 #include "src/sksl/SkSLCompiler.h"
21 #include "src/sksl/SkSLConstantFolder.h"
22 #include "src/sksl/SkSLContext.h"
23 #include "src/sksl/SkSLDefines.h"
24 #include "src/sksl/SkSLIntrinsicList.h"
25 #include "src/sksl/SkSLOperator.h"
26 #include "src/sksl/SkSLPosition.h"
27 #include "src/sksl/analysis/SkSLProgramUsage.h"
28 #include "src/sksl/codegen/SkSLRasterPipelineBuilder.h"
29 #include "src/sksl/ir/SkSLBinaryExpression.h"
30 #include "src/sksl/ir/SkSLBlock.h"
31 #include "src/sksl/ir/SkSLBreakStatement.h"
32 #include "src/sksl/ir/SkSLChildCall.h"
33 #include "src/sksl/ir/SkSLConstructor.h"
34 #include "src/sksl/ir/SkSLConstructorDiagonalMatrix.h"
35 #include "src/sksl/ir/SkSLConstructorMatrixResize.h"
36 #include "src/sksl/ir/SkSLConstructorSplat.h"
37 #include "src/sksl/ir/SkSLContinueStatement.h"
38 #include "src/sksl/ir/SkSLDoStatement.h"
39 #include "src/sksl/ir/SkSLExpression.h"
40 #include "src/sksl/ir/SkSLExpressionStatement.h"
41 #include "src/sksl/ir/SkSLFieldAccess.h"
42 #include "src/sksl/ir/SkSLForStatement.h"
43 #include "src/sksl/ir/SkSLFunctionCall.h"
44 #include "src/sksl/ir/SkSLFunctionDeclaration.h"
45 #include "src/sksl/ir/SkSLFunctionDefinition.h"
46 #include "src/sksl/ir/SkSLIRNode.h"
47 #include "src/sksl/ir/SkSLIfStatement.h"
48 #include "src/sksl/ir/SkSLIndexExpression.h"
49 #include "src/sksl/ir/SkSLLayout.h"
50 #include "src/sksl/ir/SkSLLiteral.h"
51 #include "src/sksl/ir/SkSLModifierFlags.h"
52 #include "src/sksl/ir/SkSLPostfixExpression.h"
53 #include "src/sksl/ir/SkSLPrefixExpression.h"
54 #include "src/sksl/ir/SkSLProgram.h"
55 #include "src/sksl/ir/SkSLProgramElement.h"
56 #include "src/sksl/ir/SkSLReturnStatement.h"
57 #include "src/sksl/ir/SkSLStatement.h"
58 #include "src/sksl/ir/SkSLSwitchCase.h"
59 #include "src/sksl/ir/SkSLSwitchStatement.h"
60 #include "src/sksl/ir/SkSLSwizzle.h"
61 #include "src/sksl/ir/SkSLTernaryExpression.h"
62 #include "src/sksl/ir/SkSLType.h"
63 #include "src/sksl/ir/SkSLVarDeclarations.h"
64 #include "src/sksl/ir/SkSLVariable.h"
65 #include "src/sksl/ir/SkSLVariableReference.h"
66 #include "src/sksl/tracing/SkSLDebugTracePriv.h"
67 #include "src/sksl/transform/SkSLTransform.h"
68
69 #include <algorithm>
70 #include <climits>
71 #include <cstddef>
72 #include <cstdint>
73 #include <float.h>
74 #include <iterator>
75 #include <optional>
76 #include <string>
77 #include <string_view>
78 #include <utility>
79 #include <vector>
80
81 using namespace skia_private;
82
83 namespace SkSL {
84 namespace RP {
85
unsupported()86 static bool unsupported() {
87 // If MakeRasterPipelineProgram returns false, set a breakpoint here for more information.
88 return false;
89 }
90
91 class AutoContinueMask;
92 class Generator;
93 class LValue;
94
95 class SlotManager {
96 public:
SlotManager(std::vector<SlotDebugInfo> * i)97 SlotManager(std::vector<SlotDebugInfo>* i) : fSlotDebugInfo(i) {}
98
99 /** Used by `createSlots` to add this variable to SlotDebugInfo inside the DebugTrace. */
100 void addSlotDebugInfoForGroup(const std::string& varName,
101 const Type& type,
102 Position pos,
103 int* groupIndex,
104 bool isFunctionReturnValue);
105 void addSlotDebugInfo(const std::string& varName,
106 const Type& type,
107 Position pos,
108 bool isFunctionReturnValue);
109
110 /** Creates slots associated with an SkSL variable or return value. */
111 SlotRange createSlots(std::string name,
112 const Type& type,
113 Position pos,
114 bool isFunctionReturnValue);
115
116 /**
117 * Associates previously-created slots with an SkSL variable; this can allow multiple variables
118 * to share overlapping ranges. If the variable was already associated with a slot range,
119 * returns the previously associated range.
120 */
121 std::optional<SlotRange> mapVariableToSlots(const Variable& v, SlotRange range);
122
123 /**
124 * Deletes the existing mapping between a variable and its slots; a future call to
125 * `getVariableSlots` will see this as a brand new variable and associate new slots.
126 */
127 void unmapVariableSlots(const Variable& v);
128
129 /** Looks up the slots associated with an SkSL variable; creates the slot if necessary. */
130 SlotRange getVariableSlots(const Variable& v);
131
132 /**
133 * Looks up the slots associated with an SkSL function's return value; creates the range if
134 * necessary. Note that recursion is never supported, so we don't need to maintain return values
135 * in a stack; we can just statically allocate one slot per function call-site.
136 */
137 SlotRange getFunctionSlots(const IRNode& callSite, const FunctionDeclaration& f);
138
139 /** Returns the total number of slots consumed. */
slotCount() const140 int slotCount() const { return fSlotCount; }
141
142 private:
143 THashMap<const IRNode*, SlotRange> fSlotMap;
144 int fSlotCount = 0;
145 std::vector<SlotDebugInfo>* fSlotDebugInfo;
146 };
147
148 class AutoStack {
149 public:
150 /**
151 * Creates a temporary stack. The caller is responsible for discarding every entry on this
152 * stack before ~AutoStack is reached.
153 */
154 explicit AutoStack(Generator* g);
155 ~AutoStack();
156
157 /** Activates the associated stack. */
158 void enter();
159
160 /** Undoes a call to `enter`, returning to the previously-active stack. */
161 void exit();
162
163 /** Returns the stack ID of this AutoStack. */
stackID()164 int stackID() { return fStackID; }
165
166 /** Clones values from this stack onto the top of the active stack. */
167 void pushClone(int slots);
168
169 /** Clones values from a fixed range of this stack onto the top of the active stack. */
170 void pushClone(SlotRange range, int offsetFromStackTop);
171
172 /** Clones values from a dynamic range of this stack onto the top of the active stack. */
173 void pushCloneIndirect(SlotRange range, int dynamicStackID, int offsetFromStackTop);
174
175 private:
176 Generator* fGenerator;
177 int fStackID = 0;
178 int fParentStackID = 0;
179 };
180
181 class Generator {
182 public:
Generator(const SkSL::Program & program,DebugTracePriv * debugTrace,bool writeTraceOps)183 Generator(const SkSL::Program& program, DebugTracePriv* debugTrace, bool writeTraceOps)
184 : fProgram(program)
185 , fContext(fProgram.fContext->fTypes, *fProgram.fContext->fErrors)
186 , fDebugTrace(debugTrace)
187 , fWriteTraceOps(writeTraceOps)
188 , fProgramSlots(debugTrace ? &debugTrace->fSlotInfo : nullptr)
189 , fUniformSlots(debugTrace ? &debugTrace->fUniformInfo : nullptr)
190 , fImmutableSlots(nullptr) {
191 fContext.fConfig = fProgram.fConfig.get();
192 fContext.fModule = fProgram.fContext->fModule;
193 }
194
~Generator()195 ~Generator() {
196 // ~AutoStack calls into the Generator, so we need to make sure the trace mask is reset
197 // before the Generator is destroyed.
198 fTraceMask.reset();
199 }
200
201 /** Converts the SkSL main() function into a set of Instructions. */
202 bool writeProgram(const FunctionDefinition& function);
203
204 /** Returns the generated program. */
205 std::unique_ptr<RP::Program> finish();
206
207 /**
208 * Converts an SkSL function into a set of Instructions. Returns nullopt if the function
209 * contained unsupported statements or expressions.
210 */
211 std::optional<SlotRange> writeFunction(const IRNode& callSite,
212 const FunctionDefinition& function,
213 SkSpan<std::unique_ptr<Expression> const> arguments);
214
215 /**
216 * Returns the slot index of this function inside the FunctionDebugInfo array in DebugTracePriv.
217 * The FunctionDebugInfo slot will be created if it doesn't already exist.
218 */
219 int getFunctionDebugInfo(const FunctionDeclaration& decl);
220
221 /** Returns true for variables with slots in fProgramSlots; immutables or uniforms are false. */
hasVariableSlots(const Variable & v)222 bool hasVariableSlots(const Variable& v) {
223 return !IsUniform(v) && !fImmutableVariables.contains(&v);
224 }
225
226 /** Looks up the slots associated with an SkSL variable; creates the slots if necessary. */
getVariableSlots(const Variable & v)227 SlotRange getVariableSlots(const Variable& v) {
228 SkASSERT(this->hasVariableSlots(v));
229 return fProgramSlots.getVariableSlots(v);
230 }
231
232 /**
233 * Looks up the slots associated with an immutable variable; creates the slots if necessary.
234 */
getImmutableSlots(const Variable & v)235 SlotRange getImmutableSlots(const Variable& v) {
236 SkASSERT(!IsUniform(v));
237 SkASSERT(fImmutableVariables.contains(&v));
238 return fImmutableSlots.getVariableSlots(v);
239 }
240
241 /** Looks up the slots associated with an SkSL uniform; creates the slots if necessary. */
getUniformSlots(const Variable & v)242 SlotRange getUniformSlots(const Variable& v) {
243 SkASSERT(IsUniform(v));
244 SkASSERT(!fImmutableVariables.contains(&v));
245 return fUniformSlots.getVariableSlots(v);
246 }
247
248 /**
249 * Looks up the slots associated with an SkSL function's return value; creates the range if
250 * necessary. Note that recursion is never supported, so we don't need to maintain return values
251 * in a stack; we can just statically allocate one slot per function call-site.
252 */
getFunctionSlots(const IRNode & callSite,const FunctionDeclaration & f)253 SlotRange getFunctionSlots(const IRNode& callSite, const FunctionDeclaration& f) {
254 return fProgramSlots.getFunctionSlots(callSite, f);
255 }
256
257 /**
258 * Creates an additional stack for the program to push values onto. The stack will not become
259 * actively in-use until `setCurrentStack` is called.
260 */
261 int createStack();
262
263 /** Frees a stack generated by `createStack`. The freed stack must be completely empty. */
264 void recycleStack(int stackID);
265
266 /** Redirects builder ops to point to a different stack (created by `createStack`). */
267 void setCurrentStack(int stackID);
268
269 /** Reports the currently active stack. */
currentStack()270 int currentStack() {
271 return fCurrentStack;
272 }
273
274 /**
275 * Returns an LValue for the passed-in expression; if the expression isn't supported as an
276 * LValue, returns nullptr.
277 */
278 std::unique_ptr<LValue> makeLValue(const Expression& e, bool allowScratch = false);
279
280 /** Copies the top-of-stack value into this lvalue, without discarding it from the stack. */
281 [[nodiscard]] bool store(LValue& lvalue);
282
283 /** Pushes the lvalue onto the top-of-stack. */
284 [[nodiscard]] bool push(LValue& lvalue);
285
286 /** The Builder stitches our instructions together into Raster Pipeline code. */
builder()287 Builder* builder() { return &fBuilder; }
288
289 /** Appends a statement to the program. */
290 [[nodiscard]] bool writeStatement(const Statement& s);
291 [[nodiscard]] bool writeBlock(const Block& b);
292 [[nodiscard]] bool writeBreakStatement(const BreakStatement& b);
293 [[nodiscard]] bool writeContinueStatement(const ContinueStatement& b);
294 [[nodiscard]] bool writeDoStatement(const DoStatement& d);
295 [[nodiscard]] bool writeExpressionStatement(const ExpressionStatement& e);
296 [[nodiscard]] bool writeMasklessForStatement(const ForStatement& f);
297 [[nodiscard]] bool writeForStatement(const ForStatement& f);
298 [[nodiscard]] bool writeGlobals();
299 [[nodiscard]] bool writeIfStatement(const IfStatement& i);
300 [[nodiscard]] bool writeDynamicallyUniformIfStatement(const IfStatement& i);
301 [[nodiscard]] bool writeReturnStatement(const ReturnStatement& r);
302 [[nodiscard]] bool writeSwitchStatement(const SwitchStatement& s);
303 [[nodiscard]] bool writeVarDeclaration(const VarDeclaration& v);
304 [[nodiscard]] bool writeImmutableVarDeclaration(const VarDeclaration& d);
305
306 /** Pushes an expression to the value stack. */
307 [[nodiscard]] bool pushBinaryExpression(const BinaryExpression& e);
308 [[nodiscard]] bool pushBinaryExpression(const Expression& left,
309 Operator op,
310 const Expression& right);
311 [[nodiscard]] bool pushChildCall(const ChildCall& c);
312 [[nodiscard]] bool pushConstructorCast(const AnyConstructor& c);
313 [[nodiscard]] bool pushConstructorCompound(const AnyConstructor& c);
314 [[nodiscard]] bool pushConstructorDiagonalMatrix(const ConstructorDiagonalMatrix& c);
315 [[nodiscard]] bool pushConstructorMatrixResize(const ConstructorMatrixResize& c);
316 [[nodiscard]] bool pushConstructorSplat(const ConstructorSplat& c);
317 [[nodiscard]] bool pushExpression(const Expression& e, bool usesResult = true);
318 [[nodiscard]] bool pushFieldAccess(const FieldAccess& f);
319 [[nodiscard]] bool pushFunctionCall(const FunctionCall& c);
320 [[nodiscard]] bool pushIndexExpression(const IndexExpression& i);
321 [[nodiscard]] bool pushIntrinsic(const FunctionCall& c);
322 [[nodiscard]] bool pushIntrinsic(IntrinsicKind intrinsic, const Expression& arg0);
323 [[nodiscard]] bool pushIntrinsic(IntrinsicKind intrinsic,
324 const Expression& arg0,
325 const Expression& arg1);
326 [[nodiscard]] bool pushIntrinsic(IntrinsicKind intrinsic,
327 const Expression& arg0,
328 const Expression& arg1,
329 const Expression& arg2);
330 [[nodiscard]] bool pushLiteral(const Literal& l);
331 [[nodiscard]] bool pushPostfixExpression(const PostfixExpression& p, bool usesResult);
332 [[nodiscard]] bool pushPrefixExpression(const PrefixExpression& p);
333 [[nodiscard]] bool pushPrefixExpression(Operator op, const Expression& expr);
334 [[nodiscard]] bool pushSwizzle(const Swizzle& s);
335 [[nodiscard]] bool pushTernaryExpression(const TernaryExpression& t);
336 [[nodiscard]] bool pushTernaryExpression(const Expression& test,
337 const Expression& ifTrue,
338 const Expression& ifFalse);
339 [[nodiscard]] bool pushDynamicallyUniformTernaryExpression(const Expression& test,
340 const Expression& ifTrue,
341 const Expression& ifFalse);
342 [[nodiscard]] bool pushVariableReference(const VariableReference& v);
343
344 /** Support methods for immutable data, which trade more slots for smaller code size. */
345 using ImmutableBits = int32_t;
346
347 [[nodiscard]] bool pushImmutableData(const Expression& e);
348 [[nodiscard]] std::optional<SlotRange> findPreexistingImmutableData(
349 const TArray<ImmutableBits>& immutableValues);
350 [[nodiscard]] std::optional<ImmutableBits> getImmutableBitsForSlot(const Expression& expr,
351 size_t slot);
352 [[nodiscard]] bool getImmutableValueForExpression(const Expression& expr,
353 TArray<ImmutableBits>* immutableValues);
354 void storeImmutableValueToSlots(const TArray<ImmutableBits>& immutableValues, SlotRange slots);
355
356 /** Pops an expression from the value stack and copies it into slots. */
popToSlotRange(SlotRange r)357 void popToSlotRange(SlotRange r) {
358 fBuilder.pop_slots(r);
359 if (this->shouldWriteTraceOps()) {
360 fBuilder.trace_var(fTraceMask->stackID(), r);
361 }
362 }
popToSlotRangeUnmasked(SlotRange r)363 void popToSlotRangeUnmasked(SlotRange r) {
364 fBuilder.pop_slots_unmasked(r);
365 if (this->shouldWriteTraceOps()) {
366 fBuilder.trace_var(fTraceMask->stackID(), r);
367 }
368 }
369
370 /** Pops an expression from the value stack and discards it. */
discardExpression(int slots)371 void discardExpression(int slots) { fBuilder.discard_stack(slots); }
372
373 /** Zeroes out a range of slots. */
zeroSlotRangeUnmasked(SlotRange r)374 void zeroSlotRangeUnmasked(SlotRange r) {
375 fBuilder.zero_slots_unmasked(r);
376 if (this->shouldWriteTraceOps()) {
377 fBuilder.trace_var(fTraceMask->stackID(), r);
378 }
379 }
380
381 /**
382 * Emits a trace_line opcode. writeStatement does this, and statements that alter control flow
383 * may need to explicitly add additional traces.
384 */
385 void emitTraceLine(Position pos);
386
387 /**
388 * Emits a trace_scope opcode, which alters the SkSL variable-scope depth.
389 * Unlike the other trace ops, trace_scope takes a dedicated mask instead of the trace-scope
390 * mask. Call `pushTraceScopeMask` to synthesize this mask; discard it when you're done.
391 */
392 void pushTraceScopeMask();
393 void discardTraceScopeMask();
394 void emitTraceScope(int delta);
395
396 /** Prepares our position-to-line-offset conversion table (stored in `fLineOffsets`). */
397 void calculateLineOffsets();
398
shouldWriteTraceOps()399 bool shouldWriteTraceOps() { return fDebugTrace && fWriteTraceOps; }
traceMaskStackID()400 int traceMaskStackID() { return fTraceMask->stackID(); }
401
402 /** Expression utilities. */
403 struct TypedOps {
404 BuilderOp fFloatOp;
405 BuilderOp fSignedOp;
406 BuilderOp fUnsignedOp;
407 BuilderOp fBooleanOp;
408 };
409
410 static BuilderOp GetTypedOp(const SkSL::Type& type, const TypedOps& ops);
411
412 [[nodiscard]] bool unaryOp(const SkSL::Type& type, const TypedOps& ops);
413 [[nodiscard]] bool binaryOp(const SkSL::Type& type, const TypedOps& ops);
414 [[nodiscard]] bool ternaryOp(const SkSL::Type& type, const TypedOps& ops);
415 [[nodiscard]] bool pushIntrinsic(const TypedOps& ops, const Expression& arg0);
416 [[nodiscard]] bool pushIntrinsic(const TypedOps& ops,
417 const Expression& arg0,
418 const Expression& arg1);
419 [[nodiscard]] bool pushIntrinsic(BuilderOp builderOp, const Expression& arg0);
420 [[nodiscard]] bool pushIntrinsic(BuilderOp builderOp,
421 const Expression& arg0,
422 const Expression& arg1);
423 [[nodiscard]] bool pushAbsFloatIntrinsic(int slots);
424 [[nodiscard]] bool pushLengthIntrinsic(int slotCount);
425 [[nodiscard]] bool pushVectorizedExpression(const Expression& expr, const Type& vectorType);
426 [[nodiscard]] bool pushVariableReferencePartial(const VariableReference& v, SlotRange subset);
427 [[nodiscard]] bool pushLValueOrExpression(LValue* lvalue, const Expression& expr);
428 [[nodiscard]] bool pushMatrixMultiply(LValue* lvalue,
429 const Expression& left,
430 const Expression& right,
431 int leftColumns, int leftRows,
432 int rightColumns, int rightRows);
433 [[nodiscard]] bool pushStructuredComparison(LValue* left,
434 Operator op,
435 LValue* right,
436 const Type& type);
437
438 void foldWithMultiOp(BuilderOp op, int elements);
439 void foldComparisonOp(Operator op, int elements);
440
441 BuilderOp getTypedOp(const SkSL::Type& type, const TypedOps& ops) const;
442
returnComplexity(const FunctionDefinition * func)443 Analysis::ReturnComplexity returnComplexity(const FunctionDefinition* func) {
444 Analysis::ReturnComplexity* complexity = fReturnComplexityMap.find(func);
445 if (!complexity) {
446 complexity = fReturnComplexityMap.set(fCurrentFunction,
447 Analysis::GetReturnComplexity(*func));
448 }
449 return *complexity;
450 }
451
needsReturnMask(const FunctionDefinition * func)452 bool needsReturnMask(const FunctionDefinition* func) {
453 return this->returnComplexity(func) >= Analysis::ReturnComplexity::kEarlyReturns;
454 }
455
needsFunctionResultSlots(const FunctionDefinition * func)456 bool needsFunctionResultSlots(const FunctionDefinition* func) {
457 return this->shouldWriteTraceOps() || (this->returnComplexity(func) >
458 Analysis::ReturnComplexity::kSingleSafeReturn);
459 }
460
IsUniform(const Variable & var)461 static bool IsUniform(const Variable& var) {
462 return var.modifierFlags().isUniform();
463 }
464
IsOutParameter(const Variable & var)465 static bool IsOutParameter(const Variable& var) {
466 return (var.modifierFlags() & (ModifierFlag::kIn | ModifierFlag::kOut)) ==
467 ModifierFlag::kOut;
468 }
469
IsInoutParameter(const Variable & var)470 static bool IsInoutParameter(const Variable& var) {
471 return (var.modifierFlags() & (ModifierFlag::kIn | ModifierFlag::kOut)) ==
472 (ModifierFlag::kIn | ModifierFlag::kOut);
473 }
474
475 private:
476 const SkSL::Program& fProgram;
477 SkSL::Context fContext;
478 Builder fBuilder;
479 DebugTracePriv* fDebugTrace = nullptr;
480 bool fWriteTraceOps = false;
481 THashMap<const Variable*, int> fChildEffectMap;
482
483 SlotManager fProgramSlots;
484 SlotManager fUniformSlots;
485 SlotManager fImmutableSlots;
486
487 std::optional<AutoStack> fTraceMask;
488 const FunctionDefinition* fCurrentFunction = nullptr;
489 SlotRange fCurrentFunctionResult;
490 AutoContinueMask* fCurrentContinueMask = nullptr;
491 int fCurrentBreakTarget = -1;
492 int fCurrentStack = 0;
493 int fNextStackID = 0;
494 TArray<int> fRecycledStacks;
495
496 THashMap<const FunctionDefinition*, Analysis::ReturnComplexity> fReturnComplexityMap;
497
498 THashMap<ImmutableBits, THashSet<Slot>> fImmutableSlotMap;
499 THashSet<const Variable*> fImmutableVariables;
500
501 // `fInsideCompoundStatement` will be nonzero if we are currently writing statements inside of a
502 // compound-statement Block. (Conceptually those statements should all count as one.)
503 int fInsideCompoundStatement = 0;
504
505 // `fLineOffsets` contains the position of each newline in the source, plus a zero at the
506 // beginning, and the total source length at the end, as sentinels.
507 TArray<int> fLineOffsets;
508
509 static constexpr auto kAddOps = TypedOps{BuilderOp::add_n_floats,
510 BuilderOp::add_n_ints,
511 BuilderOp::add_n_ints,
512 BuilderOp::unsupported};
513 static constexpr auto kSubtractOps = TypedOps{BuilderOp::sub_n_floats,
514 BuilderOp::sub_n_ints,
515 BuilderOp::sub_n_ints,
516 BuilderOp::unsupported};
517 static constexpr auto kMultiplyOps = TypedOps{BuilderOp::mul_n_floats,
518 BuilderOp::mul_n_ints,
519 BuilderOp::mul_n_ints,
520 BuilderOp::unsupported};
521 static constexpr auto kDivideOps = TypedOps{BuilderOp::div_n_floats,
522 BuilderOp::div_n_ints,
523 BuilderOp::div_n_uints,
524 BuilderOp::unsupported};
525 static constexpr auto kLessThanOps = TypedOps{BuilderOp::cmplt_n_floats,
526 BuilderOp::cmplt_n_ints,
527 BuilderOp::cmplt_n_uints,
528 BuilderOp::unsupported};
529 static constexpr auto kLessThanEqualOps = TypedOps{BuilderOp::cmple_n_floats,
530 BuilderOp::cmple_n_ints,
531 BuilderOp::cmple_n_uints,
532 BuilderOp::unsupported};
533 static constexpr auto kEqualOps = TypedOps{BuilderOp::cmpeq_n_floats,
534 BuilderOp::cmpeq_n_ints,
535 BuilderOp::cmpeq_n_ints,
536 BuilderOp::cmpeq_n_ints};
537 static constexpr auto kNotEqualOps = TypedOps{BuilderOp::cmpne_n_floats,
538 BuilderOp::cmpne_n_ints,
539 BuilderOp::cmpne_n_ints,
540 BuilderOp::cmpne_n_ints};
541 static constexpr auto kModOps = TypedOps{BuilderOp::mod_n_floats,
542 BuilderOp::unsupported,
543 BuilderOp::unsupported,
544 BuilderOp::unsupported};
545 static constexpr auto kMinOps = TypedOps{BuilderOp::min_n_floats,
546 BuilderOp::min_n_ints,
547 BuilderOp::min_n_uints,
548 BuilderOp::min_n_uints};
549 static constexpr auto kMaxOps = TypedOps{BuilderOp::max_n_floats,
550 BuilderOp::max_n_ints,
551 BuilderOp::max_n_uints,
552 BuilderOp::max_n_uints};
553 static constexpr auto kMixOps = TypedOps{BuilderOp::mix_n_floats,
554 BuilderOp::unsupported,
555 BuilderOp::unsupported,
556 BuilderOp::unsupported};
557 static constexpr auto kInverseSqrtOps = TypedOps{BuilderOp::invsqrt_float,
558 BuilderOp::unsupported,
559 BuilderOp::unsupported,
560 BuilderOp::unsupported};
561 friend class AutoContinueMask;
562 };
563
AutoStack(Generator * g)564 AutoStack::AutoStack(Generator* g)
565 : fGenerator(g)
566 , fStackID(g->createStack()) {}
567
~AutoStack()568 AutoStack::~AutoStack() {
569 fGenerator->recycleStack(fStackID);
570 }
571
enter()572 void AutoStack::enter() {
573 fParentStackID = fGenerator->currentStack();
574 fGenerator->setCurrentStack(fStackID);
575 }
576
exit()577 void AutoStack::exit() {
578 SkASSERT(fGenerator->currentStack() == fStackID);
579 fGenerator->setCurrentStack(fParentStackID);
580 }
581
pushClone(int slots)582 void AutoStack::pushClone(int slots) {
583 this->pushClone(SlotRange{0, slots}, /*offsetFromStackTop=*/slots);
584 }
585
pushClone(SlotRange range,int offsetFromStackTop)586 void AutoStack::pushClone(SlotRange range, int offsetFromStackTop) {
587 fGenerator->builder()->push_clone_from_stack(range, fStackID, offsetFromStackTop);
588 }
589
pushCloneIndirect(SlotRange range,int dynamicStackID,int offsetFromStackTop)590 void AutoStack::pushCloneIndirect(SlotRange range, int dynamicStackID, int offsetFromStackTop) {
591 fGenerator->builder()->push_clone_indirect_from_stack(
592 range, dynamicStackID, /*otherStackID=*/fStackID, offsetFromStackTop);
593 }
594
595 class AutoContinueMask {
596 public:
AutoContinueMask(Generator * gen)597 AutoContinueMask(Generator* gen) : fGenerator(gen) {}
598
~AutoContinueMask()599 ~AutoContinueMask() {
600 if (fPreviousContinueMask) {
601 fGenerator->fCurrentContinueMask = fPreviousContinueMask;
602 }
603 }
604
enable()605 void enable() {
606 SkASSERT(!fContinueMaskStack.has_value());
607
608 fContinueMaskStack.emplace(fGenerator);
609 fPreviousContinueMask = fGenerator->fCurrentContinueMask;
610 fGenerator->fCurrentContinueMask = this;
611 }
612
enter()613 void enter() {
614 SkASSERT(fContinueMaskStack.has_value());
615 fContinueMaskStack->enter();
616 }
617
exit()618 void exit() {
619 SkASSERT(fContinueMaskStack.has_value());
620 fContinueMaskStack->exit();
621 }
622
enterLoopBody()623 void enterLoopBody() {
624 if (fContinueMaskStack.has_value()) {
625 fContinueMaskStack->enter();
626 fGenerator->builder()->push_constant_i(0);
627 fContinueMaskStack->exit();
628 }
629 }
630
exitLoopBody()631 void exitLoopBody() {
632 if (fContinueMaskStack.has_value()) {
633 fContinueMaskStack->enter();
634 fGenerator->builder()->pop_and_reenable_loop_mask();
635 fContinueMaskStack->exit();
636 }
637 }
638
stackID()639 int stackID() {
640 SkASSERT(fContinueMaskStack.has_value());
641 return fContinueMaskStack->stackID();
642 }
643
644 private:
645 std::optional<AutoStack> fContinueMaskStack;
646 Generator* fGenerator = nullptr;
647 AutoContinueMask* fPreviousContinueMask = nullptr;
648 };
649
650 class AutoLoopTarget {
651 public:
AutoLoopTarget(Generator * gen,int * targetPtr)652 AutoLoopTarget(Generator* gen, int* targetPtr) : fGenerator(gen), fLoopTargetPtr(targetPtr) {
653 fLabelID = fGenerator->builder()->nextLabelID();
654 fPreviousLoopTarget = *fLoopTargetPtr;
655 *fLoopTargetPtr = fLabelID;
656 }
657
~AutoLoopTarget()658 ~AutoLoopTarget() {
659 *fLoopTargetPtr = fPreviousLoopTarget;
660 }
661
labelID()662 int labelID() {
663 return fLabelID;
664 }
665
666 private:
667 Generator* fGenerator = nullptr;
668 int* fLoopTargetPtr = nullptr;
669 int fPreviousLoopTarget;
670 int fLabelID;
671 };
672
673 class LValue {
674 public:
675 virtual ~LValue() = default;
676
677 /** Returns true if this lvalue is actually writable--temporaries and uniforms are not. */
678 virtual bool isWritable() const = 0;
679
680 /**
681 * Returns the fixed slot range of the lvalue, after it is winnowed down to the selected
682 * field/index. The range is calculated assuming every dynamic index will evaluate to zero.
683 */
684 virtual SlotRange fixedSlotRange(Generator* gen) = 0;
685
686 /**
687 * Returns a stack which holds a single integer, representing the dynamic offset of the lvalue.
688 * This value does not incorporate the fixed offset. If null is returned, the lvalue doesn't
689 * have a dynamic offset. `evaluateDynamicIndices` must be called before this is used.
690 */
691 virtual AutoStack* dynamicSlotRange() = 0;
692
693 /** Returns the swizzle components of the lvalue, or an empty span for non-swizzle LValues. */
swizzle()694 virtual SkSpan<const int8_t> swizzle() { return {}; }
695
696 /** Pushes values directly onto the stack. */
697 [[nodiscard]] virtual bool push(Generator* gen,
698 SlotRange fixedOffset,
699 AutoStack* dynamicOffset,
700 SkSpan<const int8_t> swizzle) = 0;
701
702 /** Stores topmost values from the stack directly into the lvalue. */
703 [[nodiscard]] virtual bool store(Generator* gen,
704 SlotRange fixedOffset,
705 AutoStack* dynamicOffset,
706 SkSpan<const int8_t> swizzle) = 0;
707 /**
708 * Some lvalues refer to a temporary expression; these temps can be held in the
709 * scratch-expression field to ensure that they exist for the lifetime of the lvalue.
710 */
711 std::unique_ptr<Expression> fScratchExpression;
712 };
713
714 class ScratchLValue final : public LValue {
715 public:
ScratchLValue(const Expression & e)716 explicit ScratchLValue(const Expression& e)
717 : fExpression(&e)
718 , fNumSlots(e.type().slotCount()) {}
719
~ScratchLValue()720 ~ScratchLValue() override {
721 if (fGenerator && fDedicatedStack.has_value()) {
722 // Jettison the scratch expression.
723 fDedicatedStack->enter();
724 fGenerator->discardExpression(fNumSlots);
725 fDedicatedStack->exit();
726 }
727 }
728
isWritable() const729 bool isWritable() const override {
730 return false;
731 }
732
fixedSlotRange(Generator * gen)733 SlotRange fixedSlotRange(Generator* gen) override {
734 return SlotRange{0, fNumSlots};
735 }
736
dynamicSlotRange()737 AutoStack* dynamicSlotRange() override {
738 return nullptr;
739 }
740
push(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)741 [[nodiscard]] bool push(Generator* gen,
742 SlotRange fixedOffset,
743 AutoStack* dynamicOffset,
744 SkSpan<const int8_t> swizzle) override {
745 if (!fDedicatedStack.has_value()) {
746 // Push the scratch expression onto a dedicated stack.
747 fGenerator = gen;
748 fDedicatedStack.emplace(fGenerator);
749 fDedicatedStack->enter();
750 if (!fGenerator->pushExpression(*fExpression)) {
751 return unsupported();
752 }
753 fDedicatedStack->exit();
754 }
755
756 if (dynamicOffset) {
757 fDedicatedStack->pushCloneIndirect(fixedOffset, dynamicOffset->stackID(), fNumSlots);
758 } else {
759 fDedicatedStack->pushClone(fixedOffset, fNumSlots);
760 }
761 if (!swizzle.empty()) {
762 gen->builder()->swizzle(fixedOffset.count, swizzle);
763 }
764 return true;
765 }
766
store(Generator *,SlotRange,AutoStack *,SkSpan<const int8_t>)767 [[nodiscard]] bool store(Generator*, SlotRange, AutoStack*, SkSpan<const int8_t>) override {
768 SkDEBUGFAIL("scratch lvalues cannot be stored into");
769 return unsupported();
770 }
771
772 private:
773 Generator* fGenerator = nullptr;
774 const Expression* fExpression = nullptr;
775 std::optional<AutoStack> fDedicatedStack;
776 int fNumSlots = 0;
777 };
778
779 class VariableLValue final : public LValue {
780 public:
VariableLValue(const Variable * v)781 explicit VariableLValue(const Variable* v) : fVariable(v) {}
782
isWritable() const783 bool isWritable() const override {
784 return !Generator::IsUniform(*fVariable);
785 }
786
fixedSlotRange(Generator * gen)787 SlotRange fixedSlotRange(Generator* gen) override {
788 return Generator::IsUniform(*fVariable) ? gen->getUniformSlots(*fVariable)
789 : gen->getVariableSlots(*fVariable);
790 }
791
dynamicSlotRange()792 AutoStack* dynamicSlotRange() override {
793 return nullptr;
794 }
795
push(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)796 [[nodiscard]] bool push(Generator* gen,
797 SlotRange fixedOffset,
798 AutoStack* dynamicOffset,
799 SkSpan<const int8_t> swizzle) override {
800 if (Generator::IsUniform(*fVariable)) {
801 if (dynamicOffset) {
802 gen->builder()->push_uniform_indirect(fixedOffset, dynamicOffset->stackID(),
803 this->fixedSlotRange(gen));
804 } else {
805 gen->builder()->push_uniform(fixedOffset);
806 }
807 } else {
808 if (dynamicOffset) {
809 gen->builder()->push_slots_indirect(fixedOffset, dynamicOffset->stackID(),
810 this->fixedSlotRange(gen));
811 } else {
812 gen->builder()->push_slots(fixedOffset);
813 }
814 }
815 if (!swizzle.empty()) {
816 gen->builder()->swizzle(fixedOffset.count, swizzle);
817 }
818 return true;
819 }
820
store(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)821 [[nodiscard]] bool store(Generator* gen,
822 SlotRange fixedOffset,
823 AutoStack* dynamicOffset,
824 SkSpan<const int8_t> swizzle) override {
825 SkASSERT(!Generator::IsUniform(*fVariable));
826
827 if (swizzle.empty()) {
828 if (dynamicOffset) {
829 gen->builder()->copy_stack_to_slots_indirect(fixedOffset, dynamicOffset->stackID(),
830 this->fixedSlotRange(gen));
831 } else {
832 gen->builder()->copy_stack_to_slots(fixedOffset);
833 }
834 } else {
835 if (dynamicOffset) {
836 gen->builder()->swizzle_copy_stack_to_slots_indirect(fixedOffset,
837 dynamicOffset->stackID(),
838 this->fixedSlotRange(gen),
839 swizzle,
840 swizzle.size());
841 } else {
842 gen->builder()->swizzle_copy_stack_to_slots(fixedOffset, swizzle, swizzle.size());
843 }
844 }
845 if (gen->shouldWriteTraceOps()) {
846 if (dynamicOffset) {
847 gen->builder()->trace_var_indirect(gen->traceMaskStackID(),
848 fixedOffset,
849 dynamicOffset->stackID(),
850 this->fixedSlotRange(gen));
851 } else {
852 gen->builder()->trace_var(gen->traceMaskStackID(), fixedOffset);
853 }
854 }
855 return true;
856 }
857
858 private:
859 const Variable* fVariable;
860 };
861
862 class ImmutableLValue final : public LValue {
863 public:
ImmutableLValue(const Variable * v)864 explicit ImmutableLValue(const Variable* v) : fVariable(v) {}
865
isWritable() const866 bool isWritable() const override {
867 return false;
868 }
869
fixedSlotRange(Generator * gen)870 SlotRange fixedSlotRange(Generator* gen) override {
871 return gen->getImmutableSlots(*fVariable);
872 }
873
dynamicSlotRange()874 AutoStack* dynamicSlotRange() override {
875 return nullptr;
876 }
877
push(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)878 [[nodiscard]] bool push(Generator* gen,
879 SlotRange fixedOffset,
880 AutoStack* dynamicOffset,
881 SkSpan<const int8_t> swizzle) override {
882 if (dynamicOffset) {
883 gen->builder()->push_immutable_indirect(fixedOffset, dynamicOffset->stackID(),
884 this->fixedSlotRange(gen));
885 } else {
886 gen->builder()->push_immutable(fixedOffset);
887 }
888 if (!swizzle.empty()) {
889 gen->builder()->swizzle(fixedOffset.count, swizzle);
890 }
891 return true;
892 }
893
store(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)894 [[nodiscard]] bool store(Generator* gen,
895 SlotRange fixedOffset,
896 AutoStack* dynamicOffset,
897 SkSpan<const int8_t> swizzle) override {
898 SkDEBUGFAIL("immutable values cannot be stored into");
899 return unsupported();
900 }
901
902 private:
903 const Variable* fVariable;
904 };
905
906 class SwizzleLValue final : public LValue {
907 public:
SwizzleLValue(std::unique_ptr<LValue> p,const ComponentArray & c)908 explicit SwizzleLValue(std::unique_ptr<LValue> p, const ComponentArray& c)
909 : fParent(std::move(p))
910 , fComponents(c) {
911 SkASSERT(!fComponents.empty() && fComponents.size() <= 4);
912 }
913
isWritable() const914 bool isWritable() const override {
915 return fParent->isWritable();
916 }
917
fixedSlotRange(Generator * gen)918 SlotRange fixedSlotRange(Generator* gen) override {
919 return fParent->fixedSlotRange(gen);
920 }
921
dynamicSlotRange()922 AutoStack* dynamicSlotRange() override {
923 return fParent->dynamicSlotRange();
924 }
925
swizzle()926 SkSpan<const int8_t> swizzle() override {
927 return fComponents;
928 }
929
push(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)930 [[nodiscard]] bool push(Generator* gen,
931 SlotRange fixedOffset,
932 AutoStack* dynamicOffset,
933 SkSpan<const int8_t> swizzle) override {
934 if (!swizzle.empty()) {
935 SkDEBUGFAIL("swizzle-of-a-swizzle should have been folded out in front end");
936 return unsupported();
937 }
938 return fParent->push(gen, fixedOffset, dynamicOffset, fComponents);
939 }
940
store(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)941 [[nodiscard]] bool store(Generator* gen,
942 SlotRange fixedOffset,
943 AutoStack* dynamicOffset,
944 SkSpan<const int8_t> swizzle) override {
945 if (!swizzle.empty()) {
946 SkDEBUGFAIL("swizzle-of-a-swizzle should have been folded out in front end");
947 return unsupported();
948 }
949 return fParent->store(gen, fixedOffset, dynamicOffset, fComponents);
950 }
951
952 private:
953 std::unique_ptr<LValue> fParent;
954 const ComponentArray& fComponents;
955 };
956
957 class UnownedLValueSlice : public LValue {
958 public:
UnownedLValueSlice(LValue * p,int initialSlot,int numSlots)959 explicit UnownedLValueSlice(LValue* p, int initialSlot, int numSlots)
960 : fParent(p)
961 , fInitialSlot(initialSlot)
962 , fNumSlots(numSlots) {
963 SkASSERT(fInitialSlot >= 0);
964 SkASSERT(fNumSlots > 0);
965 }
966
isWritable() const967 bool isWritable() const override {
968 return fParent->isWritable();
969 }
970
fixedSlotRange(Generator * gen)971 SlotRange fixedSlotRange(Generator* gen) override {
972 SlotRange range = fParent->fixedSlotRange(gen);
973 SlotRange adjusted = range;
974 adjusted.index += fInitialSlot;
975 adjusted.count = fNumSlots;
976 SkASSERT((adjusted.index + adjusted.count) <= (range.index + range.count));
977 return adjusted;
978 }
979
dynamicSlotRange()980 AutoStack* dynamicSlotRange() override {
981 return fParent->dynamicSlotRange();
982 }
983
push(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)984 [[nodiscard]] bool push(Generator* gen,
985 SlotRange fixedOffset,
986 AutoStack* dynamicOffset,
987 SkSpan<const int8_t> swizzle) override {
988 return fParent->push(gen, fixedOffset, dynamicOffset, swizzle);
989 }
990
store(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)991 [[nodiscard]] bool store(Generator* gen,
992 SlotRange fixedOffset,
993 AutoStack* dynamicOffset,
994 SkSpan<const int8_t> swizzle) override {
995 return fParent->store(gen, fixedOffset, dynamicOffset, swizzle);
996 }
997
998 protected:
999 LValue* fParent;
1000
1001 private:
1002 int fInitialSlot = 0;
1003 int fNumSlots = 0;
1004 };
1005
1006 class LValueSlice final : public UnownedLValueSlice {
1007 public:
LValueSlice(std::unique_ptr<LValue> p,int initialSlot,int numSlots)1008 explicit LValueSlice(std::unique_ptr<LValue> p, int initialSlot, int numSlots)
1009 : UnownedLValueSlice(p.release(), initialSlot, numSlots) {}
1010
~LValueSlice()1011 ~LValueSlice() override {
1012 delete fParent;
1013 }
1014 };
1015
1016 class DynamicIndexLValue final : public LValue {
1017 public:
DynamicIndexLValue(std::unique_ptr<LValue> p,const IndexExpression & i)1018 explicit DynamicIndexLValue(std::unique_ptr<LValue> p, const IndexExpression& i)
1019 : fParent(std::move(p))
1020 , fIndexExpr(&i) {
1021 SkASSERT(fIndexExpr->index()->type().isInteger());
1022 }
1023
~DynamicIndexLValue()1024 ~DynamicIndexLValue() override {
1025 if (fDedicatedStack.has_value()) {
1026 SkASSERT(fGenerator);
1027
1028 // Jettison the index expression.
1029 fDedicatedStack->enter();
1030 fGenerator->discardExpression(/*slots=*/1);
1031 fDedicatedStack->exit();
1032 }
1033 }
1034
isWritable() const1035 bool isWritable() const override {
1036 return fParent->isWritable();
1037 }
1038
evaluateDynamicIndices(Generator * gen)1039 [[nodiscard]] bool evaluateDynamicIndices(Generator* gen) {
1040 // The index must only be computed once; the index-expression could have side effects.
1041 // Once it has been computed, the offset lives on `fDedicatedStack`.
1042 SkASSERT(!fDedicatedStack.has_value());
1043 SkASSERT(!fGenerator);
1044 fGenerator = gen;
1045 fDedicatedStack.emplace(fGenerator);
1046
1047 if (!fParent->swizzle().empty()) {
1048 SkDEBUGFAIL("an indexed-swizzle should have been handled by RewriteIndexedSwizzle");
1049 return unsupported();
1050 }
1051
1052 // Push the index expression onto the dedicated stack.
1053 fDedicatedStack->enter();
1054 if (!fGenerator->pushExpression(*fIndexExpr->index())) {
1055 return unsupported();
1056 }
1057
1058 // Multiply the index-expression result by the per-value slot count.
1059 int slotCount = fIndexExpr->type().slotCount();
1060 if (slotCount != 1) {
1061 fGenerator->builder()->push_constant_i(fIndexExpr->type().slotCount());
1062 fGenerator->builder()->binary_op(BuilderOp::mul_n_ints, 1);
1063 }
1064
1065 // Check to see if a parent LValue already has a dynamic index. If so, we need to
1066 // incorporate its value into our own.
1067 if (AutoStack* parentDynamicIndexStack = fParent->dynamicSlotRange()) {
1068 parentDynamicIndexStack->pushClone(/*slots=*/1);
1069 fGenerator->builder()->binary_op(BuilderOp::add_n_ints, 1);
1070 }
1071 fDedicatedStack->exit();
1072 return true;
1073 }
1074
fixedSlotRange(Generator * gen)1075 SlotRange fixedSlotRange(Generator* gen) override {
1076 // Compute the fixed slot range as if we are indexing into position zero.
1077 SlotRange range = fParent->fixedSlotRange(gen);
1078 range.count = fIndexExpr->type().slotCount();
1079 return range;
1080 }
1081
dynamicSlotRange()1082 AutoStack* dynamicSlotRange() override {
1083 // We incorporated any parent dynamic offsets when `evaluateDynamicIndices` was called.
1084 SkASSERT(fDedicatedStack.has_value());
1085 return &*fDedicatedStack;
1086 }
1087
push(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)1088 [[nodiscard]] bool push(Generator* gen,
1089 SlotRange fixedOffset,
1090 AutoStack* dynamicOffset,
1091 SkSpan<const int8_t> swizzle) override {
1092 return fParent->push(gen, fixedOffset, dynamicOffset, swizzle);
1093 }
1094
store(Generator * gen,SlotRange fixedOffset,AutoStack * dynamicOffset,SkSpan<const int8_t> swizzle)1095 [[nodiscard]] bool store(Generator* gen,
1096 SlotRange fixedOffset,
1097 AutoStack* dynamicOffset,
1098 SkSpan<const int8_t> swizzle) override {
1099 return fParent->store(gen, fixedOffset, dynamicOffset, swizzle);
1100 }
1101
1102 private:
1103 Generator* fGenerator = nullptr;
1104 std::unique_ptr<LValue> fParent;
1105 std::optional<AutoStack> fDedicatedStack;
1106 const IndexExpression* fIndexExpr = nullptr;
1107 };
1108
addSlotDebugInfoForGroup(const std::string & varName,const Type & type,Position pos,int * groupIndex,bool isFunctionReturnValue)1109 void SlotManager::addSlotDebugInfoForGroup(const std::string& varName,
1110 const Type& type,
1111 Position pos,
1112 int* groupIndex,
1113 bool isFunctionReturnValue) {
1114 SkASSERT(fSlotDebugInfo);
1115 switch (type.typeKind()) {
1116 case Type::TypeKind::kArray: {
1117 int nslots = type.columns();
1118 const Type& elemType = type.componentType();
1119 for (int slot = 0; slot < nslots; ++slot) {
1120 this->addSlotDebugInfoForGroup(varName + "[" + std::to_string(slot) + "]", elemType,
1121 pos, groupIndex, isFunctionReturnValue);
1122 }
1123 break;
1124 }
1125 case Type::TypeKind::kStruct: {
1126 for (const Field& field : type.fields()) {
1127 this->addSlotDebugInfoForGroup(varName + "." + std::string(field.fName),
1128 *field.fType, pos, groupIndex,
1129 isFunctionReturnValue);
1130 }
1131 break;
1132 }
1133 default:
1134 SkASSERTF(0, "unsupported slot type %d", (int)type.typeKind());
1135 [[fallthrough]];
1136
1137 case Type::TypeKind::kScalar:
1138 case Type::TypeKind::kVector:
1139 case Type::TypeKind::kMatrix: {
1140 Type::NumberKind numberKind = type.componentType().numberKind();
1141 int nslots = type.slotCount();
1142
1143 for (int slot = 0; slot < nslots; ++slot) {
1144 SlotDebugInfo slotInfo;
1145 slotInfo.name = varName;
1146 slotInfo.columns = type.columns();
1147 slotInfo.rows = type.rows();
1148 slotInfo.componentIndex = slot;
1149 slotInfo.groupIndex = (*groupIndex)++;
1150 slotInfo.numberKind = numberKind;
1151 slotInfo.pos = pos;
1152 slotInfo.fnReturnValue = isFunctionReturnValue ? 1 : -1;
1153 fSlotDebugInfo->push_back(std::move(slotInfo));
1154 }
1155 break;
1156 }
1157 }
1158 }
1159
addSlotDebugInfo(const std::string & varName,const Type & type,Position pos,bool isFunctionReturnValue)1160 void SlotManager::addSlotDebugInfo(const std::string& varName,
1161 const Type& type,
1162 Position pos,
1163 bool isFunctionReturnValue) {
1164 int groupIndex = 0;
1165 this->addSlotDebugInfoForGroup(varName, type, pos, &groupIndex, isFunctionReturnValue);
1166 SkASSERT((size_t)groupIndex == type.slotCount());
1167 }
1168
createSlots(std::string name,const Type & type,Position pos,bool isFunctionReturnValue)1169 SlotRange SlotManager::createSlots(std::string name,
1170 const Type& type,
1171 Position pos,
1172 bool isFunctionReturnValue) {
1173 size_t nslots = type.slotCount();
1174 if (nslots == 0) {
1175 return {};
1176 }
1177 if (fSlotDebugInfo) {
1178 // Our debug slot-info table should have the same length as the actual slot table.
1179 SkASSERT(fSlotDebugInfo->size() == (size_t)fSlotCount);
1180
1181 // Append slot names and types to our debug slot-info table.
1182 fSlotDebugInfo->reserve(fSlotCount + nslots);
1183 this->addSlotDebugInfo(name, type, pos, isFunctionReturnValue);
1184
1185 // Confirm that we added the expected number of slots.
1186 SkASSERT(fSlotDebugInfo->size() == (size_t)(fSlotCount + nslots));
1187 }
1188
1189 SlotRange result = {fSlotCount, (int)nslots};
1190 fSlotCount += nslots;
1191 return result;
1192 }
1193
mapVariableToSlots(const Variable & v,SlotRange range)1194 std::optional<SlotRange> SlotManager::mapVariableToSlots(const Variable& v, SlotRange range) {
1195 SkASSERT(v.type().slotCount() == SkToSizeT(range.count));
1196 const SlotRange* existingEntry = fSlotMap.find(&v);
1197 std::optional<SlotRange> originalRange = existingEntry ? std::optional(*existingEntry)
1198 : std::nullopt;
1199 fSlotMap.set(&v, range);
1200 return originalRange;
1201 }
1202
unmapVariableSlots(const Variable & v)1203 void SlotManager::unmapVariableSlots(const Variable& v) {
1204 fSlotMap.remove(&v);
1205 }
1206
getVariableSlots(const Variable & v)1207 SlotRange SlotManager::getVariableSlots(const Variable& v) {
1208 SlotRange* entry = fSlotMap.find(&v);
1209 if (entry != nullptr) {
1210 return *entry;
1211 }
1212 SlotRange range = this->createSlots(std::string(v.name()),
1213 v.type(),
1214 v.fPosition,
1215 /*isFunctionReturnValue=*/false);
1216 this->mapVariableToSlots(v, range);
1217 return range;
1218 }
1219
getFunctionSlots(const IRNode & callSite,const FunctionDeclaration & f)1220 SlotRange SlotManager::getFunctionSlots(const IRNode& callSite, const FunctionDeclaration& f) {
1221 SlotRange* entry = fSlotMap.find(&callSite);
1222 if (entry != nullptr) {
1223 return *entry;
1224 }
1225 SlotRange range = this->createSlots("[" + std::string(f.name()) + "].result",
1226 f.returnType(),
1227 f.fPosition,
1228 /*isFunctionReturnValue=*/true);
1229 fSlotMap.set(&callSite, range);
1230 return range;
1231 }
1232
is_sliceable_swizzle(SkSpan<const int8_t> components)1233 static bool is_sliceable_swizzle(SkSpan<const int8_t> components) {
1234 // Determine if the swizzle rearranges its elements, or if it's a simple subset of its elements.
1235 // (A simple subset would be a sequential non-repeating range of components, like `.xyz` or
1236 // `.yzw` or `.z`, but not `.xx` or `.xz`, which can be accessed as a slice of the variable.)
1237 for (size_t index = 1; index < components.size(); ++index) {
1238 if (components[index] != int8_t(components[0] + index)) {
1239 return false;
1240 }
1241 }
1242 return true;
1243 }
1244
makeLValue(const Expression & e,bool allowScratch)1245 std::unique_ptr<LValue> Generator::makeLValue(const Expression& e, bool allowScratch) {
1246 if (e.is<VariableReference>()) {
1247 const Variable* variable = e.as<VariableReference>().variable();
1248 if (fImmutableVariables.contains(variable)) {
1249 return std::make_unique<ImmutableLValue>(variable);
1250 }
1251 return std::make_unique<VariableLValue>(variable);
1252 }
1253 if (e.is<Swizzle>()) {
1254 const Swizzle& swizzleExpr = e.as<Swizzle>();
1255 if (std::unique_ptr<LValue> base = this->makeLValue(*swizzleExpr.base(),
1256 allowScratch)) {
1257 const ComponentArray& components = swizzleExpr.components();
1258 if (is_sliceable_swizzle(components)) {
1259 // If the swizzle is a contiguous subset, we can represent it with a fixed slice.
1260 return std::make_unique<LValueSlice>(std::move(base), components[0],
1261 components.size());
1262 }
1263 return std::make_unique<SwizzleLValue>(std::move(base), components);
1264 }
1265 return nullptr;
1266 }
1267 if (e.is<FieldAccess>()) {
1268 const FieldAccess& fieldExpr = e.as<FieldAccess>();
1269 if (std::unique_ptr<LValue> base = this->makeLValue(*fieldExpr.base(),
1270 allowScratch)) {
1271 // Represent field access with a slice.
1272 return std::make_unique<LValueSlice>(std::move(base), fieldExpr.initialSlot(),
1273 fieldExpr.type().slotCount());
1274 }
1275 return nullptr;
1276 }
1277 if (e.is<IndexExpression>()) {
1278 const IndexExpression& indexExpr = e.as<IndexExpression>();
1279
1280 // If the index base is swizzled (`vec.zyx[idx]`), rewrite it into an equivalent
1281 // non-swizzled form (`vec[uint3(2,1,0)[idx]]`).
1282 if (std::unique_ptr<Expression> rewritten = Transform::RewriteIndexedSwizzle(fContext,
1283 indexExpr)) {
1284 // Convert the rewritten expression into an lvalue.
1285 std::unique_ptr<LValue> lvalue = this->makeLValue(*rewritten, allowScratch);
1286 if (!lvalue) {
1287 return nullptr;
1288 }
1289 // We need to hold onto the rewritten expression for the lifetime of the lvalue.
1290 lvalue->fScratchExpression = std::move(rewritten);
1291 return lvalue;
1292 }
1293 if (std::unique_ptr<LValue> base = this->makeLValue(*indexExpr.base(),
1294 allowScratch)) {
1295 // If the index is a compile-time constant, we can represent it with a fixed slice.
1296 SKSL_INT indexValue;
1297 if (ConstantFolder::GetConstantInt(*indexExpr.index(), &indexValue)) {
1298 int numSlots = indexExpr.type().slotCount();
1299 return std::make_unique<LValueSlice>(std::move(base), numSlots * indexValue,
1300 numSlots);
1301 }
1302
1303 // Represent non-constant indexing via a dynamic index.
1304 auto dynLValue = std::make_unique<DynamicIndexLValue>(std::move(base), indexExpr);
1305 return dynLValue->evaluateDynamicIndices(this) ? std::move(dynLValue)
1306 : nullptr;
1307 }
1308 return nullptr;
1309 }
1310 if (allowScratch) {
1311 // This path allows us to perform field- and index-accesses on an expression as if it were
1312 // an lvalue, but is a temporary and shouldn't be written back to.
1313 return std::make_unique<ScratchLValue>(e);
1314 }
1315 return nullptr;
1316 }
1317
push(LValue & lvalue)1318 bool Generator::push(LValue& lvalue) {
1319 return lvalue.push(this,
1320 lvalue.fixedSlotRange(this),
1321 lvalue.dynamicSlotRange(),
1322 /*swizzle=*/{});
1323 }
1324
store(LValue & lvalue)1325 bool Generator::store(LValue& lvalue) {
1326 SkASSERT(lvalue.isWritable());
1327 return lvalue.store(this,
1328 lvalue.fixedSlotRange(this),
1329 lvalue.dynamicSlotRange(),
1330 /*swizzle=*/{});
1331 }
1332
getFunctionDebugInfo(const FunctionDeclaration & decl)1333 int Generator::getFunctionDebugInfo(const FunctionDeclaration& decl) {
1334 SkASSERT(fDebugTrace);
1335
1336 std::string name = decl.description();
1337
1338 // When generating the debug trace, we typically mark every function as `noinline`. This makes
1339 // the trace more confusing, since this isn't in the source program, so remove it.
1340 static constexpr std::string_view kNoInline = "noinline ";
1341 if (skstd::starts_with(name, kNoInline)) {
1342 name = name.substr(kNoInline.size());
1343 }
1344
1345 // Look for a matching FunctionDebugInfo slot.
1346 for (size_t index = 0; index < fDebugTrace->fFuncInfo.size(); ++index) {
1347 if (fDebugTrace->fFuncInfo[index].name == name) {
1348 return index;
1349 }
1350 }
1351
1352 // We've never called this function before; create a new slot to hold its information.
1353 int slot = (int)fDebugTrace->fFuncInfo.size();
1354 fDebugTrace->fFuncInfo.push_back(FunctionDebugInfo{std::move(name)});
1355 return slot;
1356 }
1357
createStack()1358 int Generator::createStack() {
1359 if (!fRecycledStacks.empty()) {
1360 int stackID = fRecycledStacks.back();
1361 fRecycledStacks.pop_back();
1362 return stackID;
1363 }
1364 return ++fNextStackID;
1365 }
1366
recycleStack(int stackID)1367 void Generator::recycleStack(int stackID) {
1368 fRecycledStacks.push_back(stackID);
1369 }
1370
setCurrentStack(int stackID)1371 void Generator::setCurrentStack(int stackID) {
1372 if (fCurrentStack != stackID) {
1373 fCurrentStack = stackID;
1374 fBuilder.set_current_stack(stackID);
1375 }
1376 }
1377
writeFunction(const IRNode & callSite,const FunctionDefinition & function,SkSpan<std::unique_ptr<Expression> const> arguments)1378 std::optional<SlotRange> Generator::writeFunction(
1379 const IRNode& callSite,
1380 const FunctionDefinition& function,
1381 SkSpan<std::unique_ptr<Expression> const> arguments) {
1382 // Generate debug information and emit a trace-enter op.
1383 int funcIndex = -1;
1384 if (fDebugTrace) {
1385 funcIndex = this->getFunctionDebugInfo(function.declaration());
1386 SkASSERT(funcIndex >= 0);
1387 if (this->shouldWriteTraceOps()) {
1388 fBuilder.trace_enter(fTraceMask->stackID(), funcIndex);
1389 }
1390 }
1391
1392 // Handle parameter lvalues.
1393 struct RemappedSlotRange {
1394 const Variable* fVariable;
1395 std::optional<SlotRange> fSlotRange;
1396 };
1397 SkSpan<Variable* const> parameters = function.declaration().parameters();
1398 TArray<std::unique_ptr<LValue>> lvalues;
1399 TArray<RemappedSlotRange> remappedSlotRanges;
1400
1401 if (function.declaration().isMain()) {
1402 // For main(), the parameter slots have already been populated by `writeProgram`, but we
1403 // still need to explicitly emit trace ops for the variables in main(), since they are
1404 // initialized before it is safe to use trace-var. (We can't invoke init-lane-masks until
1405 // after we've copied the inputs from main into slots, because dst.rgba is used to pass in a
1406 // blend-destination color, but we clobber it and put in the execution mask instead.)
1407 if (this->shouldWriteTraceOps()) {
1408 for (const Variable* var : parameters) {
1409 fBuilder.trace_var(fTraceMask->stackID(), this->getVariableSlots(*var));
1410 }
1411 }
1412 } else {
1413 // Write all the arguments into their parameter's variable slots. Because we never allow
1414 // recursion, we don't need to worry about overwriting any existing values in those slots.
1415 // (In fact, we don't even need to apply the write mask.)
1416 lvalues.resize(arguments.size());
1417
1418 for (size_t index = 0; index < arguments.size(); ++index) {
1419 const Expression& arg = *arguments[index];
1420 const Variable& param = *parameters[index];
1421
1422 // If we are passing a child effect to a function, we need to add its mapping to our
1423 // child map.
1424 if (arg.type().isEffectChild()) {
1425 if (int* childIndex = fChildEffectMap.find(arg.as<VariableReference>()
1426 .variable())) {
1427 SkASSERT(!fChildEffectMap.find(¶m));
1428 fChildEffectMap[¶m] = *childIndex;
1429 }
1430 continue;
1431 }
1432
1433 // Use LValues for out-parameters and inout-parameters, so we can store back to them
1434 // later.
1435 if (IsInoutParameter(param) || IsOutParameter(param)) {
1436 lvalues[index] = this->makeLValue(arg);
1437 if (!lvalues[index]) {
1438 return std::nullopt;
1439 }
1440 // There are no guarantees on the starting value of an out-parameter, so we only
1441 // need to store the lvalues associated with an inout parameter.
1442 if (IsInoutParameter(param)) {
1443 if (!this->push(*lvalues[index])) {
1444 return std::nullopt;
1445 }
1446 this->popToSlotRangeUnmasked(this->getVariableSlots(param));
1447 }
1448 continue;
1449 }
1450
1451 // If a parameter is never read by the function, we don't need to populate its slots.
1452 ProgramUsage::VariableCounts paramCounts = fProgram.fUsage->get(param);
1453 if (paramCounts.fRead == 0) {
1454 // Honor the expression's side effects, if any.
1455 if (Analysis::HasSideEffects(arg)) {
1456 if (!this->pushExpression(arg, /*usesResult=*/false)) {
1457 return std::nullopt;
1458 }
1459 this->discardExpression(arg.type().slotCount());
1460 }
1461 continue;
1462 }
1463
1464 // If the expression is a plain variable and the parameter is never written to, we don't
1465 // need to copy it; we can just share the slots from the existing variable.
1466 if (paramCounts.fWrite == 0 && arg.is<VariableReference>()) {
1467 const Variable& var = *arg.as<VariableReference>().variable();
1468 if (this->hasVariableSlots(var)) {
1469 std::optional<SlotRange> originalRange =
1470 fProgramSlots.mapVariableToSlots(param, this->getVariableSlots(var));
1471 remappedSlotRanges.push_back({¶m, originalRange});
1472 continue;
1473 }
1474 }
1475
1476 // Copy input arguments into their respective parameter slots.
1477 if (!this->pushExpression(arg)) {
1478 return std::nullopt;
1479 }
1480 this->popToSlotRangeUnmasked(this->getVariableSlots(param));
1481 }
1482 }
1483
1484 // Set up a slot range dedicated to this function's return value.
1485 SlotRange lastFunctionResult = fCurrentFunctionResult;
1486 fCurrentFunctionResult = this->getFunctionSlots(callSite, function.declaration());
1487
1488 // Save off the return mask.
1489 if (this->needsReturnMask(&function)) {
1490 fBuilder.enableExecutionMaskWrites();
1491 if (!function.declaration().isMain()) {
1492 fBuilder.push_return_mask();
1493 }
1494 }
1495
1496 // Emit the function body.
1497 if (!this->writeStatement(*function.body())) {
1498 return std::nullopt;
1499 }
1500
1501 // Restore the original return mask.
1502 if (this->needsReturnMask(&function)) {
1503 if (!function.declaration().isMain()) {
1504 fBuilder.pop_return_mask();
1505 }
1506 fBuilder.disableExecutionMaskWrites();
1507 }
1508
1509 // Restore the function-result slot range.
1510 SlotRange functionResult = fCurrentFunctionResult;
1511 fCurrentFunctionResult = lastFunctionResult;
1512
1513 // Emit a trace-exit op.
1514 if (fDebugTrace && fWriteTraceOps) {
1515 fBuilder.trace_exit(fTraceMask->stackID(), funcIndex);
1516 }
1517
1518 // Copy out-parameters and inout-parameters back to their homes.
1519 for (int index = 0; index < lvalues.size(); ++index) {
1520 if (lvalues[index]) {
1521 // Only out- and inout-parameters should have an associated lvalue.
1522 const Variable& param = *parameters[index];
1523 SkASSERT(IsInoutParameter(param) || IsOutParameter(param));
1524
1525 // Copy the parameter's slots directly into the lvalue.
1526 fBuilder.push_slots(this->getVariableSlots(param));
1527 if (!this->store(*lvalues[index])) {
1528 return std::nullopt;
1529 }
1530 this->discardExpression(param.type().slotCount());
1531 }
1532 }
1533
1534 // Restore any remapped parameter slot ranges to their original values.
1535 for (const RemappedSlotRange& remapped : remappedSlotRanges) {
1536 if (remapped.fSlotRange.has_value()) {
1537 fProgramSlots.mapVariableToSlots(*remapped.fVariable, *remapped.fSlotRange);
1538 } else {
1539 fProgramSlots.unmapVariableSlots(*remapped.fVariable);
1540 }
1541 }
1542
1543 // Remove any child-effect mappings that were made for this call.
1544 for (size_t index = 0; index < arguments.size(); ++index) {
1545 const Expression& arg = *arguments[index];
1546 if (arg.type().isEffectChild()) {
1547 fChildEffectMap.remove(parameters[index]);
1548 }
1549 }
1550
1551 return functionResult;
1552 }
1553
emitTraceLine(Position pos)1554 void Generator::emitTraceLine(Position pos) {
1555 if (fDebugTrace && fWriteTraceOps && pos.valid() && fInsideCompoundStatement == 0) {
1556 // Binary search within fLineOffets to convert the position into a line number.
1557 SkASSERT(fLineOffsets.size() >= 2);
1558 SkASSERT(fLineOffsets[0] == 0);
1559 SkASSERT(fLineOffsets.back() == (int)fProgram.fSource->length());
1560 int lineNumber = std::distance(
1561 fLineOffsets.begin(),
1562 std::upper_bound(fLineOffsets.begin(), fLineOffsets.end(), pos.startOffset()));
1563
1564 fBuilder.trace_line(fTraceMask->stackID(), lineNumber);
1565 }
1566 }
1567
pushTraceScopeMask()1568 void Generator::pushTraceScopeMask() {
1569 if (this->shouldWriteTraceOps()) {
1570 // Take the intersection of the trace mask and the execution mask. To do this, start with an
1571 // all-zero mask, then use select to overwrite those zeros with the trace mask across all
1572 // executing lanes. We'll get the trace mask in executing lanes, and zero in dead lanes.
1573 fBuilder.push_constant_i(0);
1574 fTraceMask->pushClone(/*slots=*/1);
1575 fBuilder.select(/*slots=*/1);
1576 }
1577 }
1578
discardTraceScopeMask()1579 void Generator::discardTraceScopeMask() {
1580 if (this->shouldWriteTraceOps()) {
1581 this->discardExpression(/*slots=*/1);
1582 }
1583 }
1584
emitTraceScope(int delta)1585 void Generator::emitTraceScope(int delta) {
1586 if (this->shouldWriteTraceOps()) {
1587 fBuilder.trace_scope(this->currentStack(), delta);
1588 }
1589 }
1590
calculateLineOffsets()1591 void Generator::calculateLineOffsets() {
1592 SkASSERT(fLineOffsets.empty());
1593 fLineOffsets.push_back(0);
1594 for (size_t i = 0; i < fProgram.fSource->length(); ++i) {
1595 if ((*fProgram.fSource)[i] == '\n') {
1596 fLineOffsets.push_back(i);
1597 }
1598 }
1599 fLineOffsets.push_back(fProgram.fSource->length());
1600 }
1601
writeGlobals()1602 bool Generator::writeGlobals() {
1603 for (const ProgramElement* e : fProgram.elements()) {
1604 if (e->is<GlobalVarDeclaration>()) {
1605 const GlobalVarDeclaration& gvd = e->as<GlobalVarDeclaration>();
1606 const VarDeclaration& decl = gvd.varDeclaration();
1607 const Variable* var = decl.var();
1608
1609 if (var->type().isEffectChild()) {
1610 // Associate each child effect variable with its numeric index.
1611 SkASSERT(!fChildEffectMap.find(var));
1612 int childEffectIndex = fChildEffectMap.count();
1613 fChildEffectMap[var] = childEffectIndex;
1614 continue;
1615 }
1616
1617 // Opaque types include child processors and GL objects (samplers, textures, etc).
1618 // Of those, only child processors are legal variables.
1619 SkASSERT(!var->type().isVoid());
1620 SkASSERT(!var->type().isOpaque());
1621
1622 // Builtin variables are system-defined, with special semantics.
1623 if (int builtin = var->layout().fBuiltin; builtin >= 0) {
1624 if (builtin == SK_FRAGCOORD_BUILTIN) {
1625 fBuilder.store_device_xy01(this->getVariableSlots(*var));
1626 continue;
1627 }
1628 // The only builtin variable exposed to runtime effects is sk_FragCoord.
1629 return unsupported();
1630 }
1631
1632 if (IsUniform(*var)) {
1633 // Create the uniform slot map in first-to-last order.
1634 SlotRange uniformSlotRange = this->getUniformSlots(*var);
1635
1636 if (this->shouldWriteTraceOps()) {
1637 // We expect uniform values to show up in the debug trace. To make this happen
1638 // without updating the file format, we synthesize a value-slot range for the
1639 // uniform here, and copy the uniform data into the value slots. This allows
1640 // trace_var to work naturally. This wastes a bit of memory, but debug traces
1641 // don't need to be hyper-efficient.
1642 SlotRange copyRange = fProgramSlots.getVariableSlots(*var);
1643 fBuilder.push_uniform(uniformSlotRange);
1644 this->popToSlotRangeUnmasked(copyRange);
1645 }
1646
1647 continue;
1648 }
1649
1650 // Other globals are treated as normal variable declarations.
1651 if (!this->writeVarDeclaration(decl)) {
1652 return unsupported();
1653 }
1654 }
1655 }
1656
1657 return true;
1658 }
1659
writeStatement(const Statement & s)1660 bool Generator::writeStatement(const Statement& s) {
1661 switch (s.kind()) {
1662 case Statement::Kind::kBlock:
1663 // The debugger will stop on statements inside Blocks; there's no need for an additional
1664 // stop on the block's initial open-brace.
1665 case Statement::Kind::kFor:
1666 // The debugger will stop on the init-statement of a for statement, so we don't need to
1667 // stop on the outer for-statement itself as well.
1668 break;
1669
1670 default:
1671 // The debugger should stop on other statements.
1672 this->emitTraceLine(s.fPosition);
1673 break;
1674 }
1675
1676 switch (s.kind()) {
1677 case Statement::Kind::kBlock:
1678 return this->writeBlock(s.as<Block>());
1679
1680 case Statement::Kind::kBreak:
1681 return this->writeBreakStatement(s.as<BreakStatement>());
1682
1683 case Statement::Kind::kContinue:
1684 return this->writeContinueStatement(s.as<ContinueStatement>());
1685
1686 case Statement::Kind::kDo:
1687 return this->writeDoStatement(s.as<DoStatement>());
1688
1689 case Statement::Kind::kExpression:
1690 return this->writeExpressionStatement(s.as<ExpressionStatement>());
1691
1692 case Statement::Kind::kFor:
1693 return this->writeForStatement(s.as<ForStatement>());
1694
1695 case Statement::Kind::kIf:
1696 return this->writeIfStatement(s.as<IfStatement>());
1697
1698 case Statement::Kind::kNop:
1699 return true;
1700
1701 case Statement::Kind::kReturn:
1702 return this->writeReturnStatement(s.as<ReturnStatement>());
1703
1704 case Statement::Kind::kSwitch:
1705 return this->writeSwitchStatement(s.as<SwitchStatement>());
1706
1707 case Statement::Kind::kVarDeclaration:
1708 return this->writeVarDeclaration(s.as<VarDeclaration>());
1709
1710 default:
1711 return unsupported();
1712 }
1713 }
1714
writeBlock(const Block & b)1715 bool Generator::writeBlock(const Block& b) {
1716 if (b.blockKind() == Block::Kind::kCompoundStatement) {
1717 this->emitTraceLine(b.fPosition);
1718 ++fInsideCompoundStatement;
1719 } else {
1720 this->pushTraceScopeMask();
1721 this->emitTraceScope(+1);
1722 }
1723
1724 for (const std::unique_ptr<Statement>& stmt : b.children()) {
1725 if (!this->writeStatement(*stmt)) {
1726 return unsupported();
1727 }
1728 }
1729
1730 if (b.blockKind() == Block::Kind::kCompoundStatement) {
1731 --fInsideCompoundStatement;
1732 } else {
1733 this->emitTraceScope(-1);
1734 this->discardTraceScopeMask();
1735 }
1736
1737 return true;
1738 }
1739
writeBreakStatement(const BreakStatement &)1740 bool Generator::writeBreakStatement(const BreakStatement&) {
1741 // If all lanes have reached this break, we can just branch straight to the break target instead
1742 // of updating masks.
1743 fBuilder.branch_if_all_lanes_active(fCurrentBreakTarget);
1744 fBuilder.mask_off_loop_mask();
1745 return true;
1746 }
1747
writeContinueStatement(const ContinueStatement &)1748 bool Generator::writeContinueStatement(const ContinueStatement&) {
1749 fBuilder.continue_op(fCurrentContinueMask->stackID());
1750 return true;
1751 }
1752
writeDoStatement(const DoStatement & d)1753 bool Generator::writeDoStatement(const DoStatement& d) {
1754 // Set up a break target.
1755 AutoLoopTarget breakTarget(this, &fCurrentBreakTarget);
1756
1757 // Save off the original loop mask.
1758 fBuilder.enableExecutionMaskWrites();
1759 fBuilder.push_loop_mask();
1760
1761 // If `continue` is used in the loop...
1762 Analysis::LoopControlFlowInfo loopInfo = Analysis::GetLoopControlFlowInfo(*d.statement());
1763 AutoContinueMask autoContinueMask(this);
1764 if (loopInfo.fHasContinue) {
1765 // ... create a temporary slot for continue-mask storage.
1766 autoContinueMask.enable();
1767 }
1768
1769 // Write the do-loop body.
1770 int labelID = fBuilder.nextLabelID();
1771 fBuilder.label(labelID);
1772
1773 autoContinueMask.enterLoopBody();
1774
1775 if (!this->writeStatement(*d.statement())) {
1776 return false;
1777 }
1778
1779 autoContinueMask.exitLoopBody();
1780
1781 // Point the debugger at the do-statement's test-expression before we run it.
1782 this->emitTraceLine(d.test()->fPosition);
1783
1784 // Emit the test-expression, in order to combine it with the loop mask.
1785 if (!this->pushExpression(*d.test())) {
1786 return false;
1787 }
1788
1789 // Mask off any lanes in the loop mask where the test-expression is false; this breaks the loop.
1790 // We don't use the test expression for anything else, so jettison it.
1791 fBuilder.merge_loop_mask();
1792 this->discardExpression(/*slots=*/1);
1793
1794 // If any lanes are still running, go back to the top and run the loop body again.
1795 fBuilder.branch_if_any_lanes_active(labelID);
1796
1797 // If we hit a break statement on all lanes, we will branch here to escape from the loop.
1798 fBuilder.label(breakTarget.labelID());
1799
1800 // Restore the loop mask.
1801 fBuilder.pop_loop_mask();
1802 fBuilder.disableExecutionMaskWrites();
1803
1804 return true;
1805 }
1806
writeMasklessForStatement(const ForStatement & f)1807 bool Generator::writeMasklessForStatement(const ForStatement& f) {
1808 SkASSERT(f.unrollInfo());
1809 SkASSERT(f.unrollInfo()->fCount > 0);
1810 SkASSERT(f.initializer());
1811 SkASSERT(f.test());
1812 SkASSERT(f.next());
1813
1814 // We want the loop index to disappear at the end of the loop, so wrap the for statement in a
1815 // trace scope.
1816 this->pushTraceScopeMask();
1817 this->emitTraceScope(+1);
1818
1819 // If no lanes are active, skip over the loop entirely. This guards against looping forever;
1820 // with no lanes active, we wouldn't be able to write the loop variable back to its slot, so
1821 // we'd never make forward progress.
1822 int loopExitID = fBuilder.nextLabelID();
1823 int loopBodyID = fBuilder.nextLabelID();
1824 fBuilder.branch_if_no_lanes_active(loopExitID);
1825
1826 // Run the loop initializer.
1827 if (!this->writeStatement(*f.initializer())) {
1828 return unsupported();
1829 }
1830
1831 // Write the for-loop body. We know the for-loop has a standard ES2 unrollable structure, and
1832 // that it runs for at least one iteration, so we can plow straight ahead into the loop body
1833 // instead of running the loop-test first.
1834 fBuilder.label(loopBodyID);
1835
1836 if (!this->writeStatement(*f.statement())) {
1837 return unsupported();
1838 }
1839
1840 // Point the debugger at the for-statement's next-expression before we run it, or as close as we
1841 // can reasonably get.
1842 if (f.next()) {
1843 this->emitTraceLine(f.next()->fPosition);
1844 } else if (f.test()) {
1845 this->emitTraceLine(f.test()->fPosition);
1846 } else {
1847 this->emitTraceLine(f.fPosition);
1848 }
1849
1850 // If the loop only runs for a single iteration, we are already done. If not...
1851 if (f.unrollInfo()->fCount > 1) {
1852 // ... run the next-expression, and immediately discard its result.
1853 if (!this->pushExpression(*f.next(), /*usesResult=*/false)) {
1854 return unsupported();
1855 }
1856 this->discardExpression(f.next()->type().slotCount());
1857
1858 // Run the test-expression, and repeat the loop until the test-expression evaluates false.
1859 if (!this->pushExpression(*f.test())) {
1860 return unsupported();
1861 }
1862 fBuilder.branch_if_no_active_lanes_on_stack_top_equal(0, loopBodyID);
1863
1864 // Jettison the test-expression.
1865 this->discardExpression(/*slots=*/1);
1866 }
1867
1868 fBuilder.label(loopExitID);
1869
1870 this->emitTraceScope(-1);
1871 this->discardTraceScopeMask();
1872 return true;
1873 }
1874
writeForStatement(const ForStatement & f)1875 bool Generator::writeForStatement(const ForStatement& f) {
1876 // If we've determined that the loop does not run, omit its code entirely.
1877 if (f.unrollInfo() && f.unrollInfo()->fCount == 0) {
1878 return true;
1879 }
1880
1881 // If the loop doesn't escape early due to a `continue`, `break` or `return`, and the loop
1882 // conforms to ES2 structure, we know that we will run the full number of iterations across all
1883 // lanes and don't need to use a loop mask.
1884 Analysis::LoopControlFlowInfo loopInfo = Analysis::GetLoopControlFlowInfo(*f.statement());
1885 if (!loopInfo.fHasContinue && !loopInfo.fHasBreak && !loopInfo.fHasReturn && f.unrollInfo()) {
1886 return this->writeMasklessForStatement(f);
1887 }
1888
1889 // We want the loop index to disappear at the end of the loop, so wrap the for statement in a
1890 // trace scope.
1891 this->pushTraceScopeMask();
1892 this->emitTraceScope(+1);
1893
1894 // Set up a break target.
1895 AutoLoopTarget breakTarget(this, &fCurrentBreakTarget);
1896
1897 // Run the loop initializer.
1898 if (f.initializer()) {
1899 if (!this->writeStatement(*f.initializer())) {
1900 return unsupported();
1901 }
1902 } else {
1903 this->emitTraceLine(f.fPosition);
1904 }
1905
1906 AutoContinueMask autoContinueMask(this);
1907 if (loopInfo.fHasContinue) {
1908 // Acquire a temporary slot for continue-mask storage.
1909 autoContinueMask.enable();
1910 }
1911
1912 // Save off the original loop mask.
1913 fBuilder.enableExecutionMaskWrites();
1914 fBuilder.push_loop_mask();
1915
1916 int loopTestID = fBuilder.nextLabelID();
1917 int loopBodyID = fBuilder.nextLabelID();
1918
1919 // Jump down to the loop test so we can fall out of the loop immediately if it's zero-iteration.
1920 fBuilder.jump(loopTestID);
1921
1922 // Write the for-loop body.
1923 fBuilder.label(loopBodyID);
1924
1925 autoContinueMask.enterLoopBody();
1926
1927 if (!this->writeStatement(*f.statement())) {
1928 return unsupported();
1929 }
1930
1931 autoContinueMask.exitLoopBody();
1932
1933 // Point the debugger at the for-statement's next-expression before we run it, or as close as we
1934 // can reasonably get.
1935 if (f.next()) {
1936 this->emitTraceLine(f.next()->fPosition);
1937 } else if (f.test()) {
1938 this->emitTraceLine(f.test()->fPosition);
1939 } else {
1940 this->emitTraceLine(f.fPosition);
1941 }
1942
1943 // Run the next-expression. Immediately discard its result.
1944 if (f.next()) {
1945 if (!this->pushExpression(*f.next(), /*usesResult=*/false)) {
1946 return unsupported();
1947 }
1948 this->discardExpression(f.next()->type().slotCount());
1949 }
1950
1951 fBuilder.label(loopTestID);
1952 if (f.test()) {
1953 // Emit the test-expression, in order to combine it with the loop mask.
1954 if (!this->pushExpression(*f.test())) {
1955 return unsupported();
1956 }
1957 // Mask off any lanes in the loop mask where the test-expression is false; this breaks the
1958 // loop. We don't use the test expression for anything else, so jettison it.
1959 fBuilder.merge_loop_mask();
1960 this->discardExpression(/*slots=*/1);
1961 }
1962
1963 // If any lanes are still running, go back to the top and run the loop body again.
1964 fBuilder.branch_if_any_lanes_active(loopBodyID);
1965
1966 // If we hit a break statement on all lanes, we will branch here to escape from the loop.
1967 fBuilder.label(breakTarget.labelID());
1968
1969 // Restore the loop mask.
1970 fBuilder.pop_loop_mask();
1971 fBuilder.disableExecutionMaskWrites();
1972
1973 this->emitTraceScope(-1);
1974 this->discardTraceScopeMask();
1975 return true;
1976 }
1977
writeExpressionStatement(const ExpressionStatement & e)1978 bool Generator::writeExpressionStatement(const ExpressionStatement& e) {
1979 if (!this->pushExpression(*e.expression(), /*usesResult=*/false)) {
1980 return unsupported();
1981 }
1982 this->discardExpression(e.expression()->type().slotCount());
1983 return true;
1984 }
1985
writeDynamicallyUniformIfStatement(const IfStatement & i)1986 bool Generator::writeDynamicallyUniformIfStatement(const IfStatement& i) {
1987 SkASSERT(Analysis::IsDynamicallyUniformExpression(*i.test()));
1988
1989 int falseLabelID = fBuilder.nextLabelID();
1990 int exitLabelID = fBuilder.nextLabelID();
1991
1992 if (!this->pushExpression(*i.test())) {
1993 return unsupported();
1994 }
1995
1996 fBuilder.branch_if_no_active_lanes_on_stack_top_equal(~0, falseLabelID);
1997
1998 if (!this->writeStatement(*i.ifTrue())) {
1999 return unsupported();
2000 }
2001
2002 if (!i.ifFalse()) {
2003 // We don't have an if-false condition at all.
2004 fBuilder.label(falseLabelID);
2005 } else {
2006 // We do have an if-false condition. We've just completed the if-true block, so we need to
2007 // jump past the if-false block to avoid executing it.
2008 fBuilder.jump(exitLabelID);
2009
2010 // The if-false block starts here.
2011 fBuilder.label(falseLabelID);
2012
2013 if (!this->writeStatement(*i.ifFalse())) {
2014 return unsupported();
2015 }
2016
2017 fBuilder.label(exitLabelID);
2018 }
2019
2020 // Jettison the test-expression.
2021 this->discardExpression(/*slots=*/1);
2022 return true;
2023 }
2024
writeIfStatement(const IfStatement & i)2025 bool Generator::writeIfStatement(const IfStatement& i) {
2026 // If the test condition is known to be uniform, we can skip over the untrue portion entirely.
2027 if (Analysis::IsDynamicallyUniformExpression(*i.test())) {
2028 return this->writeDynamicallyUniformIfStatement(i);
2029 }
2030
2031 // Save the current condition-mask.
2032 fBuilder.enableExecutionMaskWrites();
2033 fBuilder.push_condition_mask();
2034
2035 // Push the test condition mask.
2036 if (!this->pushExpression(*i.test())) {
2037 return unsupported();
2038 }
2039
2040 // Merge the current condition-mask with the test condition, then run the if-true branch.
2041 fBuilder.merge_condition_mask();
2042 if (!this->writeStatement(*i.ifTrue())) {
2043 return unsupported();
2044 }
2045
2046 if (i.ifFalse()) {
2047 // Apply the inverse condition-mask. Then run the if-false branch.
2048 fBuilder.merge_inv_condition_mask();
2049 if (!this->writeStatement(*i.ifFalse())) {
2050 return unsupported();
2051 }
2052 }
2053
2054 // Jettison the test-expression, and restore the the condition-mask.
2055 this->discardExpression(/*slots=*/1);
2056 fBuilder.pop_condition_mask();
2057 fBuilder.disableExecutionMaskWrites();
2058
2059 return true;
2060 }
2061
writeReturnStatement(const ReturnStatement & r)2062 bool Generator::writeReturnStatement(const ReturnStatement& r) {
2063 if (r.expression()) {
2064 if (!this->pushExpression(*r.expression())) {
2065 return unsupported();
2066 }
2067 if (this->needsFunctionResultSlots(fCurrentFunction)) {
2068 this->popToSlotRange(fCurrentFunctionResult);
2069 }
2070 }
2071 if (fBuilder.executionMaskWritesAreEnabled() && this->needsReturnMask(fCurrentFunction)) {
2072 fBuilder.mask_off_return_mask();
2073 }
2074 return true;
2075 }
2076
writeSwitchStatement(const SwitchStatement & s)2077 bool Generator::writeSwitchStatement(const SwitchStatement& s) {
2078 const StatementArray& cases = s.cases();
2079 SkASSERT(std::all_of(cases.begin(), cases.end(), [](const std::unique_ptr<Statement>& stmt) {
2080 return stmt->is<SwitchCase>();
2081 }));
2082
2083 // Set up a break target.
2084 AutoLoopTarget breakTarget(this, &fCurrentBreakTarget);
2085
2086 // Save off the original loop mask.
2087 fBuilder.enableExecutionMaskWrites();
2088 fBuilder.push_loop_mask();
2089
2090 // Push the switch-case value, and write a default-mask that enables every lane which already
2091 // has an active loop mask. As we match cases, the default mask will get pared down.
2092 if (!this->pushExpression(*s.value())) {
2093 return unsupported();
2094 }
2095 fBuilder.push_loop_mask();
2096
2097 // Zero out the loop mask; each case op will re-enable it as we go.
2098 fBuilder.mask_off_loop_mask();
2099
2100 // Write each switch-case.
2101 bool foundDefaultCase = false;
2102 for (const std::unique_ptr<Statement>& stmt : cases) {
2103 int skipLabelID = fBuilder.nextLabelID();
2104
2105 const SwitchCase& sc = stmt->as<SwitchCase>();
2106 if (sc.isDefault()) {
2107 foundDefaultCase = true;
2108 if (stmt.get() != cases.back().get()) {
2109 // We only support a default case when it is the very last case. If that changes,
2110 // this logic will need to be updated.
2111 return unsupported();
2112 }
2113 // Keep whatever lanes are executing now, and also enable any lanes in the default mask.
2114 fBuilder.pop_and_reenable_loop_mask();
2115 // Execute the switch-case block, if any lanes are alive to see it.
2116 fBuilder.branch_if_no_lanes_active(skipLabelID);
2117 if (!this->writeStatement(*sc.statement())) {
2118 return unsupported();
2119 }
2120 } else {
2121 // The case-op will enable the loop mask if the switch-value matches, and mask off lanes
2122 // from the default-mask.
2123 fBuilder.case_op(sc.value());
2124 // Execute the switch-case block, if any lanes are alive to see it.
2125 fBuilder.branch_if_no_lanes_active(skipLabelID);
2126 if (!this->writeStatement(*sc.statement())) {
2127 return unsupported();
2128 }
2129 }
2130 fBuilder.label(skipLabelID);
2131 }
2132
2133 // Jettison the switch value, and the default case mask if it was never consumed above.
2134 this->discardExpression(/*slots=*/foundDefaultCase ? 1 : 2);
2135
2136 // If we hit a break statement on all lanes, we will branch here to escape from the switch.
2137 fBuilder.label(breakTarget.labelID());
2138
2139 // Restore the loop mask.
2140 fBuilder.pop_loop_mask();
2141 fBuilder.disableExecutionMaskWrites();
2142 return true;
2143 }
2144
writeImmutableVarDeclaration(const VarDeclaration & d)2145 bool Generator::writeImmutableVarDeclaration(const VarDeclaration& d) {
2146 // In a debugging session, we expect debug traces for a variable declaration to appear, even if
2147 // it's constant, so we don't use immutable slots for variables when tracing is on.
2148 if (this->shouldWriteTraceOps()) {
2149 return false;
2150 }
2151
2152 // Find the constant value for this variable.
2153 const Expression* initialValue = ConstantFolder::GetConstantValueForVariable(*d.value());
2154 SkASSERT(initialValue);
2155
2156 // For a variable to be immutable, it cannot be written-to besides its initial declaration.
2157 ProgramUsage::VariableCounts counts = fProgram.fUsage->get(*d.var());
2158 if (counts.fWrite != 1) {
2159 return false;
2160 }
2161
2162 STArray<16, ImmutableBits> immutableValues;
2163 if (!this->getImmutableValueForExpression(*initialValue, &immutableValues)) {
2164 return false;
2165 }
2166
2167 fImmutableVariables.add(d.var());
2168
2169 std::optional<SlotRange> preexistingSlots = this->findPreexistingImmutableData(immutableValues);
2170 if (preexistingSlots.has_value()) {
2171 // Associate this variable with a preexisting range of immutable data (no new data or code).
2172 fImmutableSlots.mapVariableToSlots(*d.var(), *preexistingSlots);
2173 } else {
2174 // Write out the constant value back to immutable slots. (This generates data, but no
2175 // runtime code.)
2176 SlotRange slots = this->getImmutableSlots(*d.var());
2177 this->storeImmutableValueToSlots(immutableValues, slots);
2178 }
2179
2180 return true;
2181 }
2182
writeVarDeclaration(const VarDeclaration & v)2183 bool Generator::writeVarDeclaration(const VarDeclaration& v) {
2184 if (v.value()) {
2185 // If a variable never actually changes, we can make it immutable.
2186 if (this->writeImmutableVarDeclaration(v)) {
2187 return true;
2188 }
2189 // This is a real variable which can change over the course of execution.
2190 if (!this->pushExpression(*v.value())) {
2191 return unsupported();
2192 }
2193 this->popToSlotRangeUnmasked(this->getVariableSlots(*v.var()));
2194 } else {
2195 this->zeroSlotRangeUnmasked(this->getVariableSlots(*v.var()));
2196 }
2197 return true;
2198 }
2199
pushExpression(const Expression & e,bool usesResult)2200 bool Generator::pushExpression(const Expression& e, bool usesResult) {
2201 switch (e.kind()) {
2202 case Expression::Kind::kBinary:
2203 return this->pushBinaryExpression(e.as<BinaryExpression>());
2204
2205 case Expression::Kind::kChildCall:
2206 return this->pushChildCall(e.as<ChildCall>());
2207
2208 case Expression::Kind::kConstructorArray:
2209 case Expression::Kind::kConstructorArrayCast:
2210 case Expression::Kind::kConstructorCompound:
2211 case Expression::Kind::kConstructorStruct:
2212 return this->pushConstructorCompound(e.asAnyConstructor());
2213
2214 case Expression::Kind::kConstructorCompoundCast:
2215 case Expression::Kind::kConstructorScalarCast:
2216 return this->pushConstructorCast(e.asAnyConstructor());
2217
2218 case Expression::Kind::kConstructorDiagonalMatrix:
2219 return this->pushConstructorDiagonalMatrix(e.as<ConstructorDiagonalMatrix>());
2220
2221 case Expression::Kind::kConstructorMatrixResize:
2222 return this->pushConstructorMatrixResize(e.as<ConstructorMatrixResize>());
2223
2224 case Expression::Kind::kConstructorSplat:
2225 return this->pushConstructorSplat(e.as<ConstructorSplat>());
2226
2227 case Expression::Kind::kEmpty:
2228 return true;
2229
2230 case Expression::Kind::kFieldAccess:
2231 return this->pushFieldAccess(e.as<FieldAccess>());
2232
2233 case Expression::Kind::kFunctionCall:
2234 return this->pushFunctionCall(e.as<FunctionCall>());
2235
2236 case Expression::Kind::kIndex:
2237 return this->pushIndexExpression(e.as<IndexExpression>());
2238
2239 case Expression::Kind::kLiteral:
2240 return this->pushLiteral(e.as<Literal>());
2241
2242 case Expression::Kind::kPrefix:
2243 return this->pushPrefixExpression(e.as<PrefixExpression>());
2244
2245 case Expression::Kind::kPostfix:
2246 return this->pushPostfixExpression(e.as<PostfixExpression>(), usesResult);
2247
2248 case Expression::Kind::kSwizzle:
2249 return this->pushSwizzle(e.as<Swizzle>());
2250
2251 case Expression::Kind::kTernary:
2252 return this->pushTernaryExpression(e.as<TernaryExpression>());
2253
2254 case Expression::Kind::kVariableReference:
2255 return this->pushVariableReference(e.as<VariableReference>());
2256
2257 default:
2258 return unsupported();
2259 }
2260 }
2261
GetTypedOp(const SkSL::Type & type,const TypedOps & ops)2262 BuilderOp Generator::GetTypedOp(const SkSL::Type& type, const TypedOps& ops) {
2263 switch (type.componentType().numberKind()) {
2264 case Type::NumberKind::kFloat: return ops.fFloatOp;
2265 case Type::NumberKind::kSigned: return ops.fSignedOp;
2266 case Type::NumberKind::kUnsigned: return ops.fUnsignedOp;
2267 case Type::NumberKind::kBoolean: return ops.fBooleanOp;
2268 default: return BuilderOp::unsupported;
2269 }
2270 }
2271
unaryOp(const SkSL::Type & type,const TypedOps & ops)2272 bool Generator::unaryOp(const SkSL::Type& type, const TypedOps& ops) {
2273 BuilderOp op = GetTypedOp(type, ops);
2274 if (op == BuilderOp::unsupported) {
2275 return unsupported();
2276 }
2277 fBuilder.unary_op(op, type.slotCount());
2278 return true;
2279 }
2280
binaryOp(const SkSL::Type & type,const TypedOps & ops)2281 bool Generator::binaryOp(const SkSL::Type& type, const TypedOps& ops) {
2282 BuilderOp op = GetTypedOp(type, ops);
2283 if (op == BuilderOp::unsupported) {
2284 return unsupported();
2285 }
2286 fBuilder.binary_op(op, type.slotCount());
2287 return true;
2288 }
2289
ternaryOp(const SkSL::Type & type,const TypedOps & ops)2290 bool Generator::ternaryOp(const SkSL::Type& type, const TypedOps& ops) {
2291 BuilderOp op = GetTypedOp(type, ops);
2292 if (op == BuilderOp::unsupported) {
2293 return unsupported();
2294 }
2295 fBuilder.ternary_op(op, type.slotCount());
2296 return true;
2297 }
2298
foldWithMultiOp(BuilderOp op,int elements)2299 void Generator::foldWithMultiOp(BuilderOp op, int elements) {
2300 // Fold the top N elements on the stack using an op that supports multiple slots, e.g.:
2301 // (A + B + C + D) -> add_2_floats $0..1 += $2..3
2302 // add_float $0 += $1
2303 for (; elements >= 8; elements -= 4) {
2304 fBuilder.binary_op(op, /*slots=*/4);
2305 }
2306 for (; elements >= 6; elements -= 3) {
2307 fBuilder.binary_op(op, /*slots=*/3);
2308 }
2309 for (; elements >= 4; elements -= 2) {
2310 fBuilder.binary_op(op, /*slots=*/2);
2311 }
2312 for (; elements >= 2; elements -= 1) {
2313 fBuilder.binary_op(op, /*slots=*/1);
2314 }
2315 }
2316
pushLValueOrExpression(LValue * lvalue,const Expression & expr)2317 bool Generator::pushLValueOrExpression(LValue* lvalue, const Expression& expr) {
2318 return lvalue ? this->push(*lvalue)
2319 : this->pushExpression(expr);
2320 }
2321
pushMatrixMultiply(LValue * lvalue,const Expression & left,const Expression & right,int leftColumns,int leftRows,int rightColumns,int rightRows)2322 bool Generator::pushMatrixMultiply(LValue* lvalue,
2323 const Expression& left,
2324 const Expression& right,
2325 int leftColumns,
2326 int leftRows,
2327 int rightColumns,
2328 int rightRows) {
2329 SkASSERT(left.type().isMatrix() || left.type().isVector());
2330 SkASSERT(right.type().isMatrix() || right.type().isVector());
2331
2332 // Insert padding space on the stack to hold the result.
2333 fBuilder.pad_stack(rightColumns * leftRows);
2334
2335 // Push the left and right matrices onto the stack.
2336 if (!this->pushLValueOrExpression(lvalue, left) || !this->pushExpression(right)) {
2337 return unsupported();
2338 }
2339
2340 fBuilder.matrix_multiply(leftColumns, leftRows, rightColumns, rightRows);
2341
2342 // If this multiply was actually an assignment (via *=), write the result back to the lvalue.
2343 return lvalue ? this->store(*lvalue)
2344 : true;
2345 }
2346
foldComparisonOp(Operator op,int elements)2347 void Generator::foldComparisonOp(Operator op, int elements) {
2348 switch (op.kind()) {
2349 case OperatorKind::EQEQ:
2350 // equal(x,y) returns a vector; use & to fold into a scalar.
2351 this->foldWithMultiOp(BuilderOp::bitwise_and_n_ints, elements);
2352 break;
2353
2354 case OperatorKind::NEQ:
2355 // notEqual(x,y) returns a vector; use | to fold into a scalar.
2356 this->foldWithMultiOp(BuilderOp::bitwise_or_n_ints, elements);
2357 break;
2358
2359 default:
2360 SkDEBUGFAIL("comparison only allows == and !=");
2361 break;
2362 }
2363 }
2364
pushStructuredComparison(LValue * left,Operator op,LValue * right,const Type & type)2365 bool Generator::pushStructuredComparison(LValue* left,
2366 Operator op,
2367 LValue* right,
2368 const Type& type) {
2369 if (type.isStruct()) {
2370 // Compare every field in the struct.
2371 SkSpan<const Field> fields = type.fields();
2372 int currentSlot = 0;
2373 for (size_t index = 0; index < fields.size(); ++index) {
2374 const Type& fieldType = *fields[index].fType;
2375 const int fieldSlotCount = fieldType.slotCount();
2376 UnownedLValueSlice fieldLeft {left, currentSlot, fieldSlotCount};
2377 UnownedLValueSlice fieldRight{right, currentSlot, fieldSlotCount};
2378 if (!this->pushStructuredComparison(&fieldLeft, op, &fieldRight, fieldType)) {
2379 return unsupported();
2380 }
2381 currentSlot += fieldSlotCount;
2382 }
2383
2384 this->foldComparisonOp(op, fields.size());
2385 return true;
2386 }
2387
2388 if (type.isArray()) {
2389 const Type& indexedType = type.componentType();
2390 if (indexedType.numberKind() == Type::NumberKind::kNonnumeric) {
2391 // Compare every element in the array.
2392 const int indexedSlotCount = indexedType.slotCount();
2393 int currentSlot = 0;
2394 for (int index = 0; index < type.columns(); ++index) {
2395 UnownedLValueSlice indexedLeft {left, currentSlot, indexedSlotCount};
2396 UnownedLValueSlice indexedRight{right, currentSlot, indexedSlotCount};
2397 if (!this->pushStructuredComparison(&indexedLeft, op, &indexedRight, indexedType)) {
2398 return unsupported();
2399 }
2400 currentSlot += indexedSlotCount;
2401 }
2402
2403 this->foldComparisonOp(op, type.columns());
2404 return true;
2405 }
2406 }
2407
2408 // We've winnowed down to a single element, or an array of homogeneous numeric elements.
2409 // Push the elements onto the stack, then compare them.
2410 if (!this->push(*left) || !this->push(*right)) {
2411 return unsupported();
2412 }
2413 switch (op.kind()) {
2414 case OperatorKind::EQEQ:
2415 if (!this->binaryOp(type, kEqualOps)) {
2416 return unsupported();
2417 }
2418 break;
2419
2420 case OperatorKind::NEQ:
2421 if (!this->binaryOp(type, kNotEqualOps)) {
2422 return unsupported();
2423 }
2424 break;
2425
2426 default:
2427 SkDEBUGFAIL("comparison only allows == and !=");
2428 break;
2429 }
2430
2431 this->foldComparisonOp(op, type.slotCount());
2432 return true;
2433 }
2434
pushBinaryExpression(const BinaryExpression & e)2435 bool Generator::pushBinaryExpression(const BinaryExpression& e) {
2436 return this->pushBinaryExpression(*e.left(), e.getOperator(), *e.right());
2437 }
2438
pushBinaryExpression(const Expression & left,Operator op,const Expression & right)2439 bool Generator::pushBinaryExpression(const Expression& left, Operator op, const Expression& right) {
2440 switch (op.kind()) {
2441 // Rewrite greater-than ops as their less-than equivalents.
2442 case OperatorKind::GT:
2443 return this->pushBinaryExpression(right, OperatorKind::LT, left);
2444
2445 case OperatorKind::GTEQ:
2446 return this->pushBinaryExpression(right, OperatorKind::LTEQ, left);
2447
2448 // Handle struct and array comparisons.
2449 case OperatorKind::EQEQ:
2450 case OperatorKind::NEQ:
2451 if (left.type().isStruct() || left.type().isArray()) {
2452 SkASSERT(left.type().matches(right.type()));
2453 std::unique_ptr<LValue> lvLeft = this->makeLValue(left, /*allowScratch=*/true);
2454 std::unique_ptr<LValue> lvRight = this->makeLValue(right, /*allowScratch=*/true);
2455 return this->pushStructuredComparison(lvLeft.get(), op, lvRight.get(), left.type());
2456 }
2457 [[fallthrough]];
2458
2459 // Rewrite commutative ops so that the literal is on the right-hand side. This gives the
2460 // Builder more opportunities to use immediate-mode ops.
2461 case OperatorKind::PLUS:
2462 case OperatorKind::STAR:
2463 case OperatorKind::BITWISEAND:
2464 case OperatorKind::BITWISEXOR:
2465 case OperatorKind::LOGICALXOR: {
2466 double unused;
2467 if (ConstantFolder::GetConstantValue(left, &unused) &&
2468 !ConstantFolder::GetConstantValue(right, &unused)) {
2469 return this->pushBinaryExpression(right, op, left);
2470 }
2471 break;
2472 }
2473 // Emit comma expressions.
2474 case OperatorKind::COMMA:
2475 if (Analysis::HasSideEffects(left)) {
2476 if (!this->pushExpression(left, /*usesResult=*/false)) {
2477 return unsupported();
2478 }
2479 this->discardExpression(left.type().slotCount());
2480 }
2481 return this->pushExpression(right);
2482
2483 default:
2484 break;
2485 }
2486
2487 // Handle binary expressions with mismatched types.
2488 bool vectorizeLeft = false, vectorizeRight = false;
2489 if (!left.type().matches(right.type())) {
2490 if (left.type().componentType().numberKind() != right.type().componentType().numberKind()) {
2491 return unsupported();
2492 }
2493 if (left.type().isScalar() && (right.type().isVector() || right.type().isMatrix())) {
2494 vectorizeLeft = true;
2495 } else if ((left.type().isVector() || left.type().isMatrix()) && right.type().isScalar()) {
2496 vectorizeRight = true;
2497 }
2498 }
2499
2500 const Type& type = vectorizeLeft ? right.type() : left.type();
2501
2502 // If this is an assignment...
2503 std::unique_ptr<LValue> lvalue;
2504 if (op.isAssignment()) {
2505 // ... turn the left side into an lvalue.
2506 lvalue = this->makeLValue(left);
2507 if (!lvalue) {
2508 return unsupported();
2509 }
2510
2511 // Handle simple assignment (`var = expr`).
2512 if (op.kind() == OperatorKind::EQ) {
2513 return this->pushExpression(right) &&
2514 this->store(*lvalue);
2515 }
2516
2517 // Strip off the assignment from the op (turning += into +).
2518 op = op.removeAssignment();
2519 }
2520
2521 // Handle matrix multiplication (MxM/MxV/VxM).
2522 if (op.kind() == OperatorKind::STAR) {
2523 // Matrix * matrix:
2524 if (left.type().isMatrix() && right.type().isMatrix()) {
2525 return this->pushMatrixMultiply(lvalue.get(), left, right,
2526 left.type().columns(), left.type().rows(),
2527 right.type().columns(), right.type().rows());
2528 }
2529
2530 // Vector * matrix:
2531 if (left.type().isVector() && right.type().isMatrix()) {
2532 return this->pushMatrixMultiply(lvalue.get(), left, right,
2533 left.type().columns(), 1,
2534 right.type().columns(), right.type().rows());
2535 }
2536
2537 // Matrix * vector:
2538 if (left.type().isMatrix() && right.type().isVector()) {
2539 return this->pushMatrixMultiply(lvalue.get(), left, right,
2540 left.type().columns(), left.type().rows(),
2541 1, right.type().columns());
2542 }
2543 }
2544
2545 if (!vectorizeLeft && !vectorizeRight && !type.matches(right.type())) {
2546 // We have mismatched types but don't know how to handle them.
2547 return unsupported();
2548 }
2549
2550 // Handle binary ops which require short-circuiting.
2551 switch (op.kind()) {
2552 case OperatorKind::LOGICALAND:
2553 if (Analysis::HasSideEffects(right)) {
2554 // If the RHS has side effects, we rewrite `a && b` as `a ? b : false`. This
2555 // generates pretty solid code and gives us the required short-circuit behavior.
2556 SkASSERT(!op.isAssignment());
2557 SkASSERT(type.componentType().isBoolean());
2558 SkASSERT(type.slotCount() == 1); // operator&& only works with scalar types
2559 Literal falseLiteral{Position{}, 0.0, &right.type()};
2560 return this->pushTernaryExpression(left, right, falseLiteral);
2561 }
2562 break;
2563
2564 case OperatorKind::LOGICALOR:
2565 if (Analysis::HasSideEffects(right)) {
2566 // If the RHS has side effects, we rewrite `a || b` as `a ? true : b`.
2567 SkASSERT(!op.isAssignment());
2568 SkASSERT(type.componentType().isBoolean());
2569 SkASSERT(type.slotCount() == 1); // operator|| only works with scalar types
2570 Literal trueLiteral{Position{}, 1.0, &right.type()};
2571 return this->pushTernaryExpression(left, trueLiteral, right);
2572 }
2573 break;
2574
2575 default:
2576 break;
2577 }
2578
2579 // Push the left- and right-expressions onto the stack.
2580 if (!this->pushLValueOrExpression(lvalue.get(), left)) {
2581 return unsupported();
2582 }
2583 if (vectorizeLeft) {
2584 fBuilder.push_duplicates(right.type().slotCount() - 1);
2585 }
2586 if (!this->pushExpression(right)) {
2587 return unsupported();
2588 }
2589 if (vectorizeRight) {
2590 fBuilder.push_duplicates(left.type().slotCount() - 1);
2591 }
2592
2593 switch (op.kind()) {
2594 case OperatorKind::PLUS:
2595 if (!this->binaryOp(type, kAddOps)) {
2596 return unsupported();
2597 }
2598 break;
2599
2600 case OperatorKind::MINUS:
2601 if (!this->binaryOp(type, kSubtractOps)) {
2602 return unsupported();
2603 }
2604 break;
2605
2606 case OperatorKind::STAR:
2607 if (!this->binaryOp(type, kMultiplyOps)) {
2608 return unsupported();
2609 }
2610 break;
2611
2612 case OperatorKind::SLASH:
2613 if (!this->binaryOp(type, kDivideOps)) {
2614 return unsupported();
2615 }
2616 break;
2617
2618 case OperatorKind::LT:
2619 case OperatorKind::GT:
2620 if (!this->binaryOp(type, kLessThanOps)) {
2621 return unsupported();
2622 }
2623 SkASSERT(type.slotCount() == 1); // operator< only works with scalar types
2624 break;
2625
2626 case OperatorKind::LTEQ:
2627 case OperatorKind::GTEQ:
2628 if (!this->binaryOp(type, kLessThanEqualOps)) {
2629 return unsupported();
2630 }
2631 SkASSERT(type.slotCount() == 1); // operator<= only works with scalar types
2632 break;
2633
2634 case OperatorKind::EQEQ:
2635 if (!this->binaryOp(type, kEqualOps)) {
2636 return unsupported();
2637 }
2638 this->foldComparisonOp(op, type.slotCount());
2639 break;
2640
2641 case OperatorKind::NEQ:
2642 if (!this->binaryOp(type, kNotEqualOps)) {
2643 return unsupported();
2644 }
2645 this->foldComparisonOp(op, type.slotCount());
2646 break;
2647
2648 case OperatorKind::LOGICALAND:
2649 case OperatorKind::BITWISEAND:
2650 // For logical-and, we verified above that the RHS does not have side effects, so we
2651 // don't need to worry about short-circuiting side effects.
2652 fBuilder.binary_op(BuilderOp::bitwise_and_n_ints, type.slotCount());
2653 break;
2654
2655 case OperatorKind::LOGICALOR:
2656 case OperatorKind::BITWISEOR:
2657 // For logical-or, we verified above that the RHS does not have side effects.
2658 fBuilder.binary_op(BuilderOp::bitwise_or_n_ints, type.slotCount());
2659 break;
2660
2661 case OperatorKind::LOGICALXOR:
2662 case OperatorKind::BITWISEXOR:
2663 // Logical-xor does not short circuit.
2664 fBuilder.binary_op(BuilderOp::bitwise_xor_n_ints, type.slotCount());
2665 break;
2666
2667 default:
2668 return unsupported();
2669 }
2670
2671 // If we have an lvalue, we need to write the result back into it.
2672 return lvalue ? this->store(*lvalue)
2673 : true;
2674 }
2675
getImmutableBitsForSlot(const Expression & expr,size_t slot)2676 std::optional<Generator::ImmutableBits> Generator::getImmutableBitsForSlot(const Expression& expr,
2677 size_t slot) {
2678 // Determine the constant-value of the slot; bail if it isn't constant.
2679 std::optional<double> v = expr.getConstantValue(slot);
2680 if (!v.has_value()) {
2681 return std::nullopt;
2682 }
2683 // Determine the number-kind of the slot, and convert the value to its bit-representation.
2684 Type::NumberKind kind = expr.type().slotType(slot).numberKind();
2685 double value = *v;
2686 switch (kind) {
2687 case Type::NumberKind::kFloat:
2688 return sk_bit_cast<ImmutableBits>((float)value);
2689
2690 case Type::NumberKind::kSigned:
2691 return sk_bit_cast<ImmutableBits>((int32_t)value);
2692
2693 case Type::NumberKind::kUnsigned:
2694 return sk_bit_cast<ImmutableBits>((uint32_t)value);
2695
2696 case Type::NumberKind::kBoolean:
2697 return value ? ~0 : 0;
2698
2699 default:
2700 return std::nullopt;
2701 }
2702 }
2703
getImmutableValueForExpression(const Expression & expr,TArray<ImmutableBits> * immutableValues)2704 bool Generator::getImmutableValueForExpression(const Expression& expr,
2705 TArray<ImmutableBits>* immutableValues) {
2706 if (!expr.supportsConstantValues()) {
2707 return false;
2708 }
2709 size_t numSlots = expr.type().slotCount();
2710 immutableValues->reserve_exact(numSlots);
2711 for (size_t index = 0; index < numSlots; ++index) {
2712 std::optional<ImmutableBits> bits = this->getImmutableBitsForSlot(expr, index);
2713 if (!bits.has_value()) {
2714 return false;
2715 }
2716 immutableValues->push_back(*bits);
2717 }
2718 return true;
2719 }
2720
storeImmutableValueToSlots(const TArray<ImmutableBits> & immutableValues,SlotRange slots)2721 void Generator::storeImmutableValueToSlots(const TArray<ImmutableBits>& immutableValues,
2722 SlotRange slots) {
2723 for (int index = 0; index < slots.count; ++index) {
2724 // Store the immutable value in its slot.
2725 const Slot slot = slots.index++;
2726 const ImmutableBits bits = immutableValues[index];
2727 fBuilder.store_immutable_value_i(slot, bits);
2728
2729 // Keep track of every stored immutable value for potential later reuse.
2730 fImmutableSlotMap[bits].add(slot);
2731 }
2732 }
2733
findPreexistingImmutableData(const TArray<ImmutableBits> & immutableValues)2734 std::optional<SlotRange> Generator::findPreexistingImmutableData(
2735 const TArray<ImmutableBits>& immutableValues) {
2736 STArray<16, const THashSet<Slot>*> slotArray;
2737 slotArray.reserve_exact(immutableValues.size());
2738
2739 // Find all the slots associated with each immutable-value bit representation.
2740 // If a given bit-pattern doesn't exist anywhere in our program yet, we can stop searching.
2741 for (const ImmutableBits& immutableValue : immutableValues) {
2742 const THashSet<Slot>* slotsForValue = fImmutableSlotMap.find(immutableValue);
2743 if (!slotsForValue) {
2744 return std::nullopt;
2745 }
2746 slotArray.push_back(slotsForValue);
2747 }
2748
2749 // Look for the group with the fewest number of entries, since that can be searched in the
2750 // least amount of effort.
2751 int leastSlotIndex = 0, leastSlotCount = INT_MAX;
2752 for (int index = 0; index < slotArray.size(); ++index) {
2753 int currentCount = slotArray[index]->count();
2754 if (currentCount < leastSlotCount) {
2755 leastSlotIndex = index;
2756 leastSlotCount = currentCount;
2757 }
2758 }
2759
2760 // See if we can reconstitute the value that we want with any of the data we've already got.
2761 for (int slot : *slotArray[leastSlotIndex]) {
2762 int firstSlot = slot - leastSlotIndex;
2763 bool found = true;
2764 for (int index = 0; index < slotArray.size(); ++index) {
2765 if (!slotArray[index]->contains(firstSlot + index)) {
2766 found = false;
2767 break;
2768 }
2769 }
2770 if (found) {
2771 // We've found an exact match for the input value; return its slot-range.
2772 return SlotRange{firstSlot, slotArray.size()};
2773 }
2774 }
2775
2776 // We didn't find any reusable slot ranges.
2777 return std::nullopt;
2778 }
2779
pushImmutableData(const Expression & e)2780 bool Generator::pushImmutableData(const Expression& e) {
2781 STArray<16, ImmutableBits> immutableValues;
2782 if (!this->getImmutableValueForExpression(e, &immutableValues)) {
2783 return false;
2784 }
2785 std::optional<SlotRange> preexistingData = this->findPreexistingImmutableData(immutableValues);
2786 if (preexistingData.has_value()) {
2787 fBuilder.push_immutable(*preexistingData);
2788 return true;
2789 }
2790 SlotRange range = fImmutableSlots.createSlots(e.description(),
2791 e.type(),
2792 e.fPosition,
2793 /*isFunctionReturnValue=*/false);
2794 this->storeImmutableValueToSlots(immutableValues, range);
2795 fBuilder.push_immutable(range);
2796 return true;
2797 }
2798
pushConstructorCompound(const AnyConstructor & c)2799 bool Generator::pushConstructorCompound(const AnyConstructor& c) {
2800 if (c.type().slotCount() > 1 && this->pushImmutableData(c)) {
2801 return true;
2802 }
2803 for (const std::unique_ptr<Expression> &arg : c.argumentSpan()) {
2804 if (!this->pushExpression(*arg)) {
2805 return unsupported();
2806 }
2807 }
2808 return true;
2809 }
2810
pushChildCall(const ChildCall & c)2811 bool Generator::pushChildCall(const ChildCall& c) {
2812 int* childIdx = fChildEffectMap.find(&c.child());
2813 SkASSERT(childIdx != nullptr);
2814 SkASSERT(!c.arguments().empty());
2815
2816 // All child calls have at least one argument.
2817 const Expression* arg = c.arguments()[0].get();
2818 if (!this->pushExpression(*arg)) {
2819 return unsupported();
2820 }
2821
2822 // Copy arguments from the stack into src/dst as required by this particular child-call.
2823 switch (c.child().type().typeKind()) {
2824 case Type::TypeKind::kShader: {
2825 // The argument must be a float2.
2826 SkASSERT(c.arguments().size() == 1);
2827 SkASSERT(arg->type().matches(*fContext.fTypes.fFloat2));
2828
2829 // `exchange_src` will use the top four values on the stack, but we don't care what goes
2830 // into the blue/alpha components. We inject padding here to balance the stack.
2831 fBuilder.pad_stack(2);
2832
2833 // Move the argument into src.rgba while also preserving the execution mask.
2834 fBuilder.exchange_src();
2835 fBuilder.invoke_shader(*childIdx);
2836 break;
2837 }
2838 case Type::TypeKind::kColorFilter: {
2839 // The argument must be a half4/float4.
2840 SkASSERT(c.arguments().size() == 1);
2841 SkASSERT(arg->type().matches(*fContext.fTypes.fHalf4) ||
2842 arg->type().matches(*fContext.fTypes.fFloat4));
2843
2844 // Move the argument into src.rgba while also preserving the execution mask.
2845 fBuilder.exchange_src();
2846 fBuilder.invoke_color_filter(*childIdx);
2847 break;
2848 }
2849 case Type::TypeKind::kBlender: {
2850 // Both arguments must be half4/float4.
2851 SkASSERT(c.arguments().size() == 2);
2852 SkASSERT(c.arguments()[0]->type().matches(*fContext.fTypes.fHalf4) ||
2853 c.arguments()[0]->type().matches(*fContext.fTypes.fFloat4));
2854 SkASSERT(c.arguments()[1]->type().matches(*fContext.fTypes.fHalf4) ||
2855 c.arguments()[1]->type().matches(*fContext.fTypes.fFloat4));
2856
2857 // Move the second argument into dst.rgba, and the first argument into src.rgba, while
2858 // simultaneously preserving the execution mask.
2859 if (!this->pushExpression(*c.arguments()[1])) {
2860 return unsupported();
2861 }
2862 fBuilder.pop_dst_rgba();
2863 fBuilder.exchange_src();
2864 fBuilder.invoke_blender(*childIdx);
2865 break;
2866 }
2867 default: {
2868 SkDEBUGFAILF("cannot sample from type '%s'", c.child().type().description().c_str());
2869 }
2870 }
2871
2872 // The child call has returned the result color via src.rgba, and the SkRP execution mask is
2873 // on top of the stack. Swapping the two puts the result color on top of the stack, and also
2874 // restores our execution masks.
2875 fBuilder.exchange_src();
2876 return true;
2877 }
2878
pushConstructorCast(const AnyConstructor & c)2879 bool Generator::pushConstructorCast(const AnyConstructor& c) {
2880 SkASSERT(c.argumentSpan().size() == 1);
2881 const Expression& inner = *c.argumentSpan().front();
2882 SkASSERT(inner.type().slotCount() == c.type().slotCount());
2883
2884 if (!this->pushExpression(inner)) {
2885 return unsupported();
2886 }
2887 const Type::NumberKind innerKind = inner.type().componentType().numberKind();
2888 const Type::NumberKind outerKind = c.type().componentType().numberKind();
2889
2890 if (innerKind == outerKind) {
2891 // Since we ignore type precision, this cast is effectively a no-op.
2892 return true;
2893 }
2894
2895 switch (innerKind) {
2896 case Type::NumberKind::kSigned:
2897 if (outerKind == Type::NumberKind::kUnsigned) {
2898 // Treat uint(int) as a no-op.
2899 return true;
2900 }
2901 if (outerKind == Type::NumberKind::kFloat) {
2902 fBuilder.unary_op(BuilderOp::cast_to_float_from_int, c.type().slotCount());
2903 return true;
2904 }
2905 break;
2906
2907 case Type::NumberKind::kUnsigned:
2908 if (outerKind == Type::NumberKind::kSigned) {
2909 // Treat int(uint) as a no-op.
2910 return true;
2911 }
2912 if (outerKind == Type::NumberKind::kFloat) {
2913 fBuilder.unary_op(BuilderOp::cast_to_float_from_uint, c.type().slotCount());
2914 return true;
2915 }
2916 break;
2917
2918 case Type::NumberKind::kBoolean:
2919 // Converting boolean to int or float can be accomplished via bitwise-and.
2920 if (outerKind == Type::NumberKind::kFloat) {
2921 fBuilder.push_constant_f(1.0f);
2922 } else if (outerKind == Type::NumberKind::kSigned ||
2923 outerKind == Type::NumberKind::kUnsigned) {
2924 fBuilder.push_constant_i(1);
2925 } else {
2926 SkDEBUGFAILF("unexpected cast from bool to %s", c.type().description().c_str());
2927 return unsupported();
2928 }
2929 fBuilder.push_duplicates(c.type().slotCount() - 1);
2930 fBuilder.binary_op(BuilderOp::bitwise_and_n_ints, c.type().slotCount());
2931 return true;
2932
2933 case Type::NumberKind::kFloat:
2934 if (outerKind == Type::NumberKind::kSigned) {
2935 fBuilder.unary_op(BuilderOp::cast_to_int_from_float, c.type().slotCount());
2936 return true;
2937 }
2938 if (outerKind == Type::NumberKind::kUnsigned) {
2939 fBuilder.unary_op(BuilderOp::cast_to_uint_from_float, c.type().slotCount());
2940 return true;
2941 }
2942 break;
2943
2944 case Type::NumberKind::kNonnumeric:
2945 break;
2946 }
2947
2948 if (outerKind == Type::NumberKind::kBoolean) {
2949 // Converting int or float to boolean can be accomplished via `notEqual(x, 0)`.
2950 fBuilder.push_zeros(c.type().slotCount());
2951 return this->binaryOp(inner.type(), kNotEqualOps);
2952 }
2953
2954 SkDEBUGFAILF("unexpected cast from %s to %s",
2955 c.type().description().c_str(), inner.type().description().c_str());
2956 return unsupported();
2957 }
2958
pushConstructorDiagonalMatrix(const ConstructorDiagonalMatrix & c)2959 bool Generator::pushConstructorDiagonalMatrix(const ConstructorDiagonalMatrix& c) {
2960 if (this->pushImmutableData(c)) {
2961 return true;
2962 }
2963 fBuilder.push_zeros(1);
2964 if (!this->pushExpression(*c.argument())) {
2965 return unsupported();
2966 }
2967 fBuilder.diagonal_matrix(c.type().columns(), c.type().rows());
2968
2969 return true;
2970 }
2971
pushConstructorMatrixResize(const ConstructorMatrixResize & c)2972 bool Generator::pushConstructorMatrixResize(const ConstructorMatrixResize& c) {
2973 if (!this->pushExpression(*c.argument())) {
2974 return unsupported();
2975 }
2976 fBuilder.matrix_resize(c.argument()->type().columns(),
2977 c.argument()->type().rows(),
2978 c.type().columns(),
2979 c.type().rows());
2980 return true;
2981 }
2982
pushConstructorSplat(const ConstructorSplat & c)2983 bool Generator::pushConstructorSplat(const ConstructorSplat& c) {
2984 if (!this->pushExpression(*c.argument())) {
2985 return unsupported();
2986 }
2987 fBuilder.push_duplicates(c.type().slotCount() - 1);
2988 return true;
2989 }
2990
pushFieldAccess(const FieldAccess & f)2991 bool Generator::pushFieldAccess(const FieldAccess& f) {
2992 // If possible, get direct field access via the lvalue.
2993 std::unique_ptr<LValue> lvalue = this->makeLValue(f, /*allowScratch=*/true);
2994 return lvalue && this->push(*lvalue);
2995 }
2996
pushFunctionCall(const FunctionCall & c)2997 bool Generator::pushFunctionCall(const FunctionCall& c) {
2998 if (c.function().isIntrinsic()) {
2999 return this->pushIntrinsic(c);
3000 }
3001
3002 // Keep track of the current function.
3003 const FunctionDefinition* lastFunction = fCurrentFunction;
3004 fCurrentFunction = c.function().definition();
3005
3006 // Skip over the function body entirely if there are no active lanes.
3007 // (If the function call was trivial, it would likely have been inlined in the frontend, so we
3008 // assume here that function calls generally represent a significant amount of work.)
3009 int skipLabelID = fBuilder.nextLabelID();
3010 fBuilder.branch_if_no_lanes_active(skipLabelID);
3011
3012 // Emit the function body.
3013 std::optional<SlotRange> r = this->writeFunction(c, *fCurrentFunction, c.arguments());
3014 if (!r.has_value()) {
3015 return unsupported();
3016 }
3017
3018 // If the function uses result slots, move its result from slots onto the stack.
3019 if (this->needsFunctionResultSlots(fCurrentFunction)) {
3020 fBuilder.push_slots(*r);
3021 }
3022
3023 // We've returned back to the last function.
3024 fCurrentFunction = lastFunction;
3025
3026 // Copy the function result from its slots onto the stack.
3027 fBuilder.label(skipLabelID);
3028 return true;
3029 }
3030
pushIndexExpression(const IndexExpression & i)3031 bool Generator::pushIndexExpression(const IndexExpression& i) {
3032 std::unique_ptr<LValue> lvalue = this->makeLValue(i, /*allowScratch=*/true);
3033 return lvalue && this->push(*lvalue);
3034 }
3035
pushIntrinsic(const FunctionCall & c)3036 bool Generator::pushIntrinsic(const FunctionCall& c) {
3037 const ExpressionArray& args = c.arguments();
3038 switch (args.size()) {
3039 case 1:
3040 return this->pushIntrinsic(c.function().intrinsicKind(), *args[0]);
3041
3042 case 2:
3043 return this->pushIntrinsic(c.function().intrinsicKind(), *args[0], *args[1]);
3044
3045 case 3:
3046 return this->pushIntrinsic(c.function().intrinsicKind(), *args[0], *args[1], *args[2]);
3047
3048 default:
3049 break;
3050 }
3051
3052 return unsupported();
3053 }
3054
pushLengthIntrinsic(int slotCount)3055 bool Generator::pushLengthIntrinsic(int slotCount) {
3056 if (slotCount == 1) {
3057 // `length(scalar)` is `sqrt(x^2)`, which is equivalent to `abs(x)`.
3058 return this->pushAbsFloatIntrinsic(/*slots=*/1);
3059 }
3060 // Implement `length(vec)` as `sqrt(dot(x, x))`.
3061 fBuilder.push_clone(slotCount);
3062 fBuilder.dot_floats(slotCount);
3063 fBuilder.unary_op(BuilderOp::sqrt_float, 1);
3064 return true;
3065 }
3066
pushAbsFloatIntrinsic(int slots)3067 bool Generator::pushAbsFloatIntrinsic(int slots) {
3068 // Perform abs(float) by masking off the sign bit.
3069 fBuilder.push_constant_u(0x7FFFFFFF, slots);
3070 fBuilder.binary_op(BuilderOp::bitwise_and_n_ints, slots);
3071 return true;
3072 }
3073
pushVectorizedExpression(const Expression & expr,const Type & vectorType)3074 bool Generator::pushVectorizedExpression(const Expression& expr, const Type& vectorType) {
3075 if (!this->pushExpression(expr)) {
3076 return unsupported();
3077 }
3078 if (vectorType.slotCount() > expr.type().slotCount()) {
3079 SkASSERT(expr.type().slotCount() == 1);
3080 fBuilder.push_duplicates(vectorType.slotCount() - expr.type().slotCount());
3081 }
3082 return true;
3083 }
3084
pushIntrinsic(const TypedOps & ops,const Expression & arg0)3085 bool Generator::pushIntrinsic(const TypedOps& ops, const Expression& arg0) {
3086 if (!this->pushExpression(arg0)) {
3087 return unsupported();
3088 }
3089 return this->unaryOp(arg0.type(), ops);
3090 }
3091
pushIntrinsic(BuilderOp builderOp,const Expression & arg0)3092 bool Generator::pushIntrinsic(BuilderOp builderOp, const Expression& arg0) {
3093 if (!this->pushExpression(arg0)) {
3094 return unsupported();
3095 }
3096 fBuilder.unary_op(builderOp, arg0.type().slotCount());
3097 return true;
3098 }
3099
pushIntrinsic(IntrinsicKind intrinsic,const Expression & arg0)3100 bool Generator::pushIntrinsic(IntrinsicKind intrinsic, const Expression& arg0) {
3101 switch (intrinsic) {
3102 case IntrinsicKind::k_abs_IntrinsicKind:
3103 if (arg0.type().componentType().isFloat()) {
3104 // Perform abs(float) by masking off the sign bit.
3105 if (!this->pushExpression(arg0)) {
3106 return unsupported();
3107 }
3108 return this->pushAbsFloatIntrinsic(arg0.type().slotCount());
3109 }
3110 // We have a dedicated op for abs(int).
3111 return this->pushIntrinsic(BuilderOp::abs_int, arg0);
3112
3113 case IntrinsicKind::k_any_IntrinsicKind:
3114 if (!this->pushExpression(arg0)) {
3115 return unsupported();
3116 }
3117 this->foldWithMultiOp(BuilderOp::bitwise_or_n_ints, arg0.type().slotCount());
3118 return true;
3119
3120 case IntrinsicKind::k_all_IntrinsicKind:
3121 if (!this->pushExpression(arg0)) {
3122 return unsupported();
3123 }
3124 this->foldWithMultiOp(BuilderOp::bitwise_and_n_ints, arg0.type().slotCount());
3125 return true;
3126
3127 case IntrinsicKind::k_acos_IntrinsicKind:
3128 return this->pushIntrinsic(BuilderOp::acos_float, arg0);
3129
3130 case IntrinsicKind::k_asin_IntrinsicKind:
3131 return this->pushIntrinsic(BuilderOp::asin_float, arg0);
3132
3133 case IntrinsicKind::k_atan_IntrinsicKind:
3134 return this->pushIntrinsic(BuilderOp::atan_float, arg0);
3135
3136 case IntrinsicKind::k_ceil_IntrinsicKind:
3137 return this->pushIntrinsic(BuilderOp::ceil_float, arg0);
3138
3139 case IntrinsicKind::k_cos_IntrinsicKind:
3140 return this->pushIntrinsic(BuilderOp::cos_float, arg0);
3141
3142 case IntrinsicKind::k_degrees_IntrinsicKind: {
3143 Literal lit180OverPi{Position{}, 57.2957795131f, &arg0.type().componentType()};
3144 return this->pushBinaryExpression(arg0, OperatorKind::STAR, lit180OverPi);
3145 }
3146 case IntrinsicKind::k_floatBitsToInt_IntrinsicKind:
3147 case IntrinsicKind::k_floatBitsToUint_IntrinsicKind:
3148 case IntrinsicKind::k_intBitsToFloat_IntrinsicKind:
3149 case IntrinsicKind::k_uintBitsToFloat_IntrinsicKind:
3150 return this->pushExpression(arg0);
3151
3152 case IntrinsicKind::k_exp_IntrinsicKind:
3153 return this->pushIntrinsic(BuilderOp::exp_float, arg0);
3154
3155 case IntrinsicKind::k_exp2_IntrinsicKind:
3156 return this->pushIntrinsic(BuilderOp::exp2_float, arg0);
3157
3158 case IntrinsicKind::k_floor_IntrinsicKind:
3159 return this->pushIntrinsic(BuilderOp::floor_float, arg0);
3160
3161 case IntrinsicKind::k_fract_IntrinsicKind:
3162 // Implement fract as `x - floor(x)`.
3163 if (!this->pushExpression(arg0)) {
3164 return unsupported();
3165 }
3166 fBuilder.push_clone(arg0.type().slotCount());
3167 fBuilder.unary_op(BuilderOp::floor_float, arg0.type().slotCount());
3168 return this->binaryOp(arg0.type(), kSubtractOps);
3169
3170 case IntrinsicKind::k_inverse_IntrinsicKind:
3171 SkASSERT(arg0.type().isMatrix());
3172 SkASSERT(arg0.type().rows() == arg0.type().columns());
3173 if (!this->pushExpression(arg0)) {
3174 return unsupported();
3175 }
3176 fBuilder.inverse_matrix(arg0.type().rows());
3177 return true;
3178
3179 case IntrinsicKind::k_inversesqrt_IntrinsicKind:
3180 return this->pushIntrinsic(kInverseSqrtOps, arg0);
3181
3182 case IntrinsicKind::k_length_IntrinsicKind:
3183 return this->pushExpression(arg0) &&
3184 this->pushLengthIntrinsic(arg0.type().slotCount());
3185
3186 case IntrinsicKind::k_log_IntrinsicKind:
3187 if (!this->pushExpression(arg0)) {
3188 return unsupported();
3189 }
3190 fBuilder.unary_op(BuilderOp::log_float, arg0.type().slotCount());
3191 return true;
3192
3193 case IntrinsicKind::k_log2_IntrinsicKind:
3194 if (!this->pushExpression(arg0)) {
3195 return unsupported();
3196 }
3197 fBuilder.unary_op(BuilderOp::log2_float, arg0.type().slotCount());
3198 return true;
3199
3200 case IntrinsicKind::k_normalize_IntrinsicKind: {
3201 // Implement normalize as `x / length(x)`. First, push the expression.
3202 if (!this->pushExpression(arg0)) {
3203 return unsupported();
3204 }
3205 int slotCount = arg0.type().slotCount();
3206 if (slotCount > 1) {
3207 #if defined(SK_USE_RSQRT_IN_RP_NORMALIZE)
3208 // Instead of `x / sqrt(dot(x, x))`, we can get roughly the same result in less time
3209 // by computing `x * invsqrt(dot(x, x))`.
3210 fBuilder.push_clone(slotCount);
3211 fBuilder.push_clone(slotCount);
3212 fBuilder.dot_floats(slotCount);
3213
3214 // Compute `vec(inversesqrt(dot(x, x)))`.
3215 fBuilder.unary_op(BuilderOp::invsqrt_float, 1);
3216 fBuilder.push_duplicates(slotCount - 1);
3217
3218 // Return `x * vec(inversesqrt(dot(x, x)))`.
3219 return this->binaryOp(arg0.type(), kMultiplyOps);
3220 #else
3221 // TODO: We can get roughly the same result in less time by using `invsqrt`, but
3222 // that leads to more variance across architectures, which Chromium layout tests do
3223 // not handle nicely.
3224 fBuilder.push_clone(slotCount);
3225 fBuilder.push_clone(slotCount);
3226 fBuilder.dot_floats(slotCount);
3227
3228 // Compute `vec(sqrt(dot(x, x)))`.
3229 fBuilder.unary_op(BuilderOp::sqrt_float, 1);
3230 fBuilder.push_duplicates(slotCount - 1);
3231
3232 // Return `x / vec(sqrt(dot(x, x)))`.
3233 return this->binaryOp(arg0.type(), kDivideOps);
3234 #endif
3235 } else {
3236 // For single-slot normalization, we can simplify `sqrt(x * x)` into `abs(x)`.
3237 fBuilder.push_clone(slotCount);
3238 return this->pushAbsFloatIntrinsic(/*slots=*/1) &&
3239 this->binaryOp(arg0.type(), kDivideOps);
3240 }
3241 }
3242 case IntrinsicKind::k_not_IntrinsicKind:
3243 return this->pushPrefixExpression(OperatorKind::LOGICALNOT, arg0);
3244
3245 case IntrinsicKind::k_radians_IntrinsicKind: {
3246 Literal litPiOver180{Position{}, 0.01745329251f, &arg0.type().componentType()};
3247 return this->pushBinaryExpression(arg0, OperatorKind::STAR, litPiOver180);
3248 }
3249 case IntrinsicKind::k_saturate_IntrinsicKind: {
3250 // Implement saturate as clamp(arg, 0, 1).
3251 Literal zeroLiteral{Position{}, 0.0, &arg0.type().componentType()};
3252 Literal oneLiteral{Position{}, 1.0, &arg0.type().componentType()};
3253 return this->pushIntrinsic(k_clamp_IntrinsicKind, arg0, zeroLiteral, oneLiteral);
3254 }
3255 case IntrinsicKind::k_sign_IntrinsicKind: {
3256 // Implement floating-point sign() as `clamp(arg * FLT_MAX, -1, 1)`.
3257 // FLT_MIN * FLT_MAX evaluates to 4, so multiplying any float value against FLT_MAX is
3258 // sufficient to ensure that |value| is always 1 or greater (excluding zero and nan).
3259 // Integer sign() doesn't need to worry about fractional values or nans, and can simply
3260 // be `clamp(arg, -1, 1)`.
3261 if (!this->pushExpression(arg0)) {
3262 return unsupported();
3263 }
3264 if (arg0.type().componentType().isFloat()) {
3265 Literal fltMaxLiteral{Position{}, FLT_MAX, &arg0.type().componentType()};
3266 if (!this->pushVectorizedExpression(fltMaxLiteral, arg0.type())) {
3267 return unsupported();
3268 }
3269 if (!this->binaryOp(arg0.type(), kMultiplyOps)) {
3270 return unsupported();
3271 }
3272 }
3273 Literal neg1Literal{Position{}, -1.0, &arg0.type().componentType()};
3274 if (!this->pushVectorizedExpression(neg1Literal, arg0.type())) {
3275 return unsupported();
3276 }
3277 if (!this->binaryOp(arg0.type(), kMaxOps)) {
3278 return unsupported();
3279 }
3280 Literal pos1Literal{Position{}, 1.0, &arg0.type().componentType()};
3281 if (!this->pushVectorizedExpression(pos1Literal, arg0.type())) {
3282 return unsupported();
3283 }
3284 return this->binaryOp(arg0.type(), kMinOps);
3285 }
3286 case IntrinsicKind::k_sin_IntrinsicKind:
3287 return this->pushIntrinsic(BuilderOp::sin_float, arg0);
3288
3289 case IntrinsicKind::k_sqrt_IntrinsicKind:
3290 return this->pushIntrinsic(BuilderOp::sqrt_float, arg0);
3291
3292 case IntrinsicKind::k_tan_IntrinsicKind:
3293 return this->pushIntrinsic(BuilderOp::tan_float, arg0);
3294
3295 case IntrinsicKind::k_transpose_IntrinsicKind:
3296 SkASSERT(arg0.type().isMatrix());
3297 if (!this->pushExpression(arg0)) {
3298 return unsupported();
3299 }
3300 fBuilder.transpose(arg0.type().columns(), arg0.type().rows());
3301 return true;
3302
3303 case IntrinsicKind::k_trunc_IntrinsicKind:
3304 // Implement trunc as `float(int(x))`, since float-to-int rounds toward zero.
3305 if (!this->pushExpression(arg0)) {
3306 return unsupported();
3307 }
3308 fBuilder.unary_op(BuilderOp::cast_to_int_from_float, arg0.type().slotCount());
3309 fBuilder.unary_op(BuilderOp::cast_to_float_from_int, arg0.type().slotCount());
3310 return true;
3311
3312 case IntrinsicKind::k_fromLinearSrgb_IntrinsicKind:
3313 case IntrinsicKind::k_toLinearSrgb_IntrinsicKind:
3314 // The argument must be a half3.
3315 SkASSERT(arg0.type().matches(*fContext.fTypes.fHalf3));
3316 if (!this->pushExpression(arg0)) {
3317 return unsupported();
3318 }
3319
3320 if (intrinsic == IntrinsicKind::k_fromLinearSrgb_IntrinsicKind) {
3321 fBuilder.invoke_from_linear_srgb();
3322 } else {
3323 fBuilder.invoke_to_linear_srgb();
3324 }
3325 return true;
3326
3327 default:
3328 break;
3329 }
3330 return unsupported();
3331 }
3332
pushIntrinsic(const TypedOps & ops,const Expression & arg0,const Expression & arg1)3333 bool Generator::pushIntrinsic(const TypedOps& ops, const Expression& arg0, const Expression& arg1) {
3334 if (!this->pushExpression(arg0) || !this->pushVectorizedExpression(arg1, arg0.type())) {
3335 return unsupported();
3336 }
3337 return this->binaryOp(arg0.type(), ops);
3338 }
3339
pushIntrinsic(BuilderOp builderOp,const Expression & arg0,const Expression & arg1)3340 bool Generator::pushIntrinsic(BuilderOp builderOp, const Expression& arg0, const Expression& arg1) {
3341 if (!this->pushExpression(arg0) || !this->pushVectorizedExpression(arg1, arg0.type())) {
3342 return unsupported();
3343 }
3344 fBuilder.binary_op(builderOp, arg0.type().slotCount());
3345 return true;
3346 }
3347
pushIntrinsic(IntrinsicKind intrinsic,const Expression & arg0,const Expression & arg1)3348 bool Generator::pushIntrinsic(IntrinsicKind intrinsic,
3349 const Expression& arg0,
3350 const Expression& arg1) {
3351 switch (intrinsic) {
3352 case IntrinsicKind::k_atan_IntrinsicKind:
3353 return this->pushIntrinsic(BuilderOp::atan2_n_floats, arg0, arg1);
3354
3355 case IntrinsicKind::k_cross_IntrinsicKind: {
3356 // Implement cross as `arg0.yzx * arg1.zxy - arg0.zxy * arg1.yzx`. We use two stacks so
3357 // that each subexpression can be multiplied separately.
3358 SkASSERT(arg0.type().matches(arg1.type()));
3359 SkASSERT(arg0.type().slotCount() == 3);
3360 SkASSERT(arg1.type().slotCount() == 3);
3361
3362 // Push `arg0.yzx` onto this stack and `arg0.zxy` onto a separate subexpression stack.
3363 AutoStack subexpressionStack(this);
3364 subexpressionStack.enter();
3365 if (!this->pushExpression(arg0)) {
3366 return unsupported();
3367 }
3368 subexpressionStack.exit();
3369 subexpressionStack.pushClone(/*slots=*/3);
3370
3371 fBuilder.swizzle(/*consumedSlots=*/3, {1, 2, 0});
3372 subexpressionStack.enter();
3373 fBuilder.swizzle(/*consumedSlots=*/3, {2, 0, 1});
3374 subexpressionStack.exit();
3375
3376 // Push `arg1.zxy` onto this stack and `arg1.yzx` onto the next stack. Perform the
3377 // multiply on each subexpression (`arg0.yzx * arg1.zxy` on the first stack, and
3378 // `arg0.zxy * arg1.yzx` on the next).
3379 subexpressionStack.enter();
3380 if (!this->pushExpression(arg1)) {
3381 return unsupported();
3382 }
3383 subexpressionStack.exit();
3384 subexpressionStack.pushClone(/*slots=*/3);
3385
3386 fBuilder.swizzle(/*consumedSlots=*/3, {2, 0, 1});
3387 fBuilder.binary_op(BuilderOp::mul_n_floats, 3);
3388
3389 subexpressionStack.enter();
3390 fBuilder.swizzle(/*consumedSlots=*/3, {1, 2, 0});
3391 fBuilder.binary_op(BuilderOp::mul_n_floats, 3);
3392 subexpressionStack.exit();
3393
3394 // Migrate the result of the second subexpression (`arg0.zxy * arg1.yzx`) back onto the
3395 // main stack and subtract it from the first subexpression (`arg0.yzx * arg1.zxy`).
3396 subexpressionStack.pushClone(/*slots=*/3);
3397 fBuilder.binary_op(BuilderOp::sub_n_floats, 3);
3398
3399 // Now that the calculation is complete, discard the subexpression on the next stack.
3400 subexpressionStack.enter();
3401 this->discardExpression(/*slots=*/3);
3402 subexpressionStack.exit();
3403 return true;
3404 }
3405 case IntrinsicKind::k_distance_IntrinsicKind:
3406 // Implement distance as `length(a - b)`.
3407 SkASSERT(arg0.type().slotCount() == arg1.type().slotCount());
3408 return this->pushBinaryExpression(arg0, OperatorKind::MINUS, arg1) &&
3409 this->pushLengthIntrinsic(arg0.type().slotCount());
3410
3411 case IntrinsicKind::k_dot_IntrinsicKind:
3412 SkASSERT(arg0.type().matches(arg1.type()));
3413 if (!this->pushExpression(arg0) || !this->pushExpression(arg1)) {
3414 return unsupported();
3415 }
3416 fBuilder.dot_floats(arg0.type().slotCount());
3417 return true;
3418
3419 case IntrinsicKind::k_equal_IntrinsicKind:
3420 SkASSERT(arg0.type().matches(arg1.type()));
3421 return this->pushIntrinsic(kEqualOps, arg0, arg1);
3422
3423 case IntrinsicKind::k_notEqual_IntrinsicKind:
3424 SkASSERT(arg0.type().matches(arg1.type()));
3425 return this->pushIntrinsic(kNotEqualOps, arg0, arg1);
3426
3427 case IntrinsicKind::k_lessThan_IntrinsicKind:
3428 SkASSERT(arg0.type().matches(arg1.type()));
3429 return this->pushIntrinsic(kLessThanOps, arg0, arg1);
3430
3431 case IntrinsicKind::k_greaterThan_IntrinsicKind:
3432 SkASSERT(arg0.type().matches(arg1.type()));
3433 return this->pushIntrinsic(kLessThanOps, arg1, arg0);
3434
3435 case IntrinsicKind::k_lessThanEqual_IntrinsicKind:
3436 SkASSERT(arg0.type().matches(arg1.type()));
3437 return this->pushIntrinsic(kLessThanEqualOps, arg0, arg1);
3438
3439 case IntrinsicKind::k_greaterThanEqual_IntrinsicKind:
3440 SkASSERT(arg0.type().matches(arg1.type()));
3441 return this->pushIntrinsic(kLessThanEqualOps, arg1, arg0);
3442
3443 case IntrinsicKind::k_min_IntrinsicKind:
3444 SkASSERT(arg0.type().componentType().matches(arg1.type().componentType()));
3445 return this->pushIntrinsic(kMinOps, arg0, arg1);
3446
3447 case IntrinsicKind::k_matrixCompMult_IntrinsicKind:
3448 SkASSERT(arg0.type().matches(arg1.type()));
3449 return this->pushIntrinsic(kMultiplyOps, arg0, arg1);
3450
3451 case IntrinsicKind::k_max_IntrinsicKind:
3452 SkASSERT(arg0.type().componentType().matches(arg1.type().componentType()));
3453 return this->pushIntrinsic(kMaxOps, arg0, arg1);
3454
3455 case IntrinsicKind::k_mod_IntrinsicKind:
3456 SkASSERT(arg0.type().componentType().matches(arg1.type().componentType()));
3457 return this->pushIntrinsic(kModOps, arg0, arg1);
3458
3459 case IntrinsicKind::k_pow_IntrinsicKind:
3460 SkASSERT(arg0.type().matches(arg1.type()));
3461 return this->pushIntrinsic(BuilderOp::pow_n_floats, arg0, arg1);
3462
3463 case IntrinsicKind::k_reflect_IntrinsicKind: {
3464 // Implement reflect as `I - (N * dot(I,N) * 2)`.
3465 SkASSERT(arg0.type().matches(arg1.type()));
3466 SkASSERT(arg0.type().slotCount() == arg1.type().slotCount());
3467 SkASSERT(arg0.type().componentType().isFloat());
3468 int slotCount = arg0.type().slotCount();
3469
3470 // Stack: I, N.
3471 if (!this->pushExpression(arg0) || !this->pushExpression(arg1)) {
3472 return unsupported();
3473 }
3474 // Stack: I, N, I, N.
3475 fBuilder.push_clone(2 * slotCount);
3476 // Stack: I, N, dot(I,N)
3477 fBuilder.dot_floats(slotCount);
3478 // Stack: I, N, dot(I,N), 2
3479 fBuilder.push_constant_f(2.0);
3480 // Stack: I, N, dot(I,N) * 2
3481 fBuilder.binary_op(BuilderOp::mul_n_floats, 1);
3482 // Stack: I, N * dot(I,N) * 2
3483 fBuilder.push_duplicates(slotCount - 1);
3484 fBuilder.binary_op(BuilderOp::mul_n_floats, slotCount);
3485 // Stack: I - (N * dot(I,N) * 2)
3486 fBuilder.binary_op(BuilderOp::sub_n_floats, slotCount);
3487 return true;
3488 }
3489 case IntrinsicKind::k_step_IntrinsicKind: {
3490 // Compute step as `float(lessThanEqual(edge, x))`. We convert from boolean 0/~0 to
3491 // floating point zero/one by using a bitwise-and against the bit-pattern of 1.0.
3492 SkASSERT(arg0.type().componentType().matches(arg1.type().componentType()));
3493 if (!this->pushVectorizedExpression(arg0, arg1.type()) || !this->pushExpression(arg1)) {
3494 return unsupported();
3495 }
3496 if (!this->binaryOp(arg1.type(), kLessThanEqualOps)) {
3497 return unsupported();
3498 }
3499 Literal pos1Literal{Position{}, 1.0, &arg1.type().componentType()};
3500 if (!this->pushVectorizedExpression(pos1Literal, arg1.type())) {
3501 return unsupported();
3502 }
3503 fBuilder.binary_op(BuilderOp::bitwise_and_n_ints, arg1.type().slotCount());
3504 return true;
3505 }
3506
3507 default:
3508 break;
3509 }
3510 return unsupported();
3511 }
3512
pushIntrinsic(IntrinsicKind intrinsic,const Expression & arg0,const Expression & arg1,const Expression & arg2)3513 bool Generator::pushIntrinsic(IntrinsicKind intrinsic,
3514 const Expression& arg0,
3515 const Expression& arg1,
3516 const Expression& arg2) {
3517 switch (intrinsic) {
3518 case IntrinsicKind::k_clamp_IntrinsicKind:
3519 // Implement clamp as min(max(arg, low), high).
3520 SkASSERT(arg0.type().componentType().matches(arg1.type().componentType()));
3521 SkASSERT(arg0.type().componentType().matches(arg2.type().componentType()));
3522 if (!this->pushExpression(arg0) || !this->pushVectorizedExpression(arg1, arg0.type())) {
3523 return unsupported();
3524 }
3525 if (!this->binaryOp(arg0.type(), kMaxOps)) {
3526 return unsupported();
3527 }
3528 if (!this->pushVectorizedExpression(arg2, arg0.type())) {
3529 return unsupported();
3530 }
3531 if (!this->binaryOp(arg0.type(), kMinOps)) {
3532 return unsupported();
3533 }
3534 return true;
3535
3536 case IntrinsicKind::k_faceforward_IntrinsicKind: {
3537 // Implement faceforward as `N ^ ((0 <= dot(I, NRef)) & 0x80000000)`.
3538 // In other words, flip the sign bit of N if `0 <= dot(I, NRef)`.
3539 SkASSERT(arg0.type().matches(arg1.type()));
3540 SkASSERT(arg0.type().matches(arg2.type()));
3541 int slotCount = arg0.type().slotCount();
3542
3543 // Stack: N, 0, I, Nref
3544 if (!this->pushExpression(arg0)) {
3545 return unsupported();
3546 }
3547 fBuilder.push_constant_f(0.0);
3548 if (!this->pushExpression(arg1) || !this->pushExpression(arg2)) {
3549 return unsupported();
3550 }
3551 // Stack: N, 0, dot(I,NRef)
3552 fBuilder.dot_floats(slotCount);
3553 // Stack: N, (0 <= dot(I,NRef))
3554 fBuilder.binary_op(BuilderOp::cmple_n_floats, 1);
3555 // Stack: N, (0 <= dot(I,NRef)), 0x80000000
3556 fBuilder.push_constant_u(0x80000000);
3557 // Stack: N, (0 <= dot(I,NRef)) & 0x80000000)
3558 fBuilder.binary_op(BuilderOp::bitwise_and_n_ints, 1);
3559 // Stack: N, vec(0 <= dot(I,NRef)) & 0x80000000)
3560 fBuilder.push_duplicates(slotCount - 1);
3561 // Stack: N ^ vec((0 <= dot(I,NRef)) & 0x80000000)
3562 fBuilder.binary_op(BuilderOp::bitwise_xor_n_ints, slotCount);
3563 return true;
3564 }
3565 case IntrinsicKind::k_mix_IntrinsicKind:
3566 // Note: our SkRP mix op takes the interpolation point first, not the interpolants.
3567 SkASSERT(arg0.type().matches(arg1.type()));
3568 if (arg2.type().componentType().isFloat()) {
3569 SkASSERT(arg0.type().componentType().matches(arg2.type().componentType()));
3570 if (!this->pushVectorizedExpression(arg2, arg0.type())) {
3571 return unsupported();
3572 }
3573 if (!this->pushExpression(arg0) || !this->pushExpression(arg1)) {
3574 return unsupported();
3575 }
3576 return this->ternaryOp(arg0.type(), kMixOps);
3577 }
3578 if (arg2.type().componentType().isBoolean()) {
3579 if (!this->pushExpression(arg2)) {
3580 return unsupported();
3581 }
3582 if (!this->pushExpression(arg0) || !this->pushExpression(arg1)) {
3583 return unsupported();
3584 }
3585 // The `mix_int` op isn't doing a lerp; it uses the third argument to select values
3586 // from the first and second arguments. It's safe for use with any type in arguments
3587 // 0 and 1.
3588 fBuilder.ternary_op(BuilderOp::mix_n_ints, arg0.type().slotCount());
3589 return true;
3590 }
3591 return unsupported();
3592
3593 case IntrinsicKind::k_refract_IntrinsicKind: {
3594 // We always calculate refraction using vec4s, so we pad out unused N/I slots with zero.
3595 int padding = 4 - arg0.type().slotCount();
3596 if (!this->pushExpression(arg0)) {
3597 return unsupported();
3598 }
3599 fBuilder.push_zeros(padding);
3600
3601 if (!this->pushExpression(arg1)) {
3602 return unsupported();
3603 }
3604 fBuilder.push_zeros(padding);
3605
3606 // eta is always a scalar and doesn't need padding.
3607 if (!this->pushExpression(arg2)) {
3608 return unsupported();
3609 }
3610 fBuilder.refract_floats();
3611
3612 // The result vector was returned as a vec4, so discard the extra columns.
3613 fBuilder.discard_stack(padding);
3614 return true;
3615 }
3616 case IntrinsicKind::k_smoothstep_IntrinsicKind:
3617 SkASSERT(arg0.type().componentType().isFloat());
3618 SkASSERT(arg1.type().matches(arg0.type()));
3619 SkASSERT(arg2.type().componentType().isFloat());
3620
3621 if (!this->pushVectorizedExpression(arg0, arg2.type()) ||
3622 !this->pushVectorizedExpression(arg1, arg2.type()) ||
3623 !this->pushExpression(arg2)) {
3624 return unsupported();
3625 }
3626 fBuilder.ternary_op(BuilderOp::smoothstep_n_floats, arg2.type().slotCount());
3627 return true;
3628
3629 default:
3630 break;
3631 }
3632 return unsupported();
3633 }
3634
pushLiteral(const Literal & l)3635 bool Generator::pushLiteral(const Literal& l) {
3636 switch (l.type().numberKind()) {
3637 case Type::NumberKind::kFloat:
3638 fBuilder.push_constant_f(l.floatValue());
3639 return true;
3640
3641 case Type::NumberKind::kSigned:
3642 fBuilder.push_constant_i(l.intValue());
3643 return true;
3644
3645 case Type::NumberKind::kUnsigned:
3646 fBuilder.push_constant_u(l.intValue());
3647 return true;
3648
3649 case Type::NumberKind::kBoolean:
3650 fBuilder.push_constant_i(l.boolValue() ? ~0 : 0);
3651 return true;
3652
3653 default:
3654 SkUNREACHABLE;
3655 }
3656 }
3657
pushPostfixExpression(const PostfixExpression & p,bool usesResult)3658 bool Generator::pushPostfixExpression(const PostfixExpression& p, bool usesResult) {
3659 // If the result is ignored...
3660 if (!usesResult) {
3661 // ... just emit a prefix expression instead.
3662 return this->pushPrefixExpression(p.getOperator(), *p.operand());
3663 }
3664 // Get the operand as an lvalue, and push it onto the stack as-is.
3665 std::unique_ptr<LValue> lvalue = this->makeLValue(*p.operand());
3666 if (!lvalue || !this->push(*lvalue)) {
3667 return unsupported();
3668 }
3669
3670 // Push a scratch copy of the operand.
3671 fBuilder.push_clone(p.type().slotCount());
3672
3673 // Increment or decrement the scratch copy by one.
3674 Literal oneLiteral{Position{}, 1.0, &p.type().componentType()};
3675 if (!this->pushVectorizedExpression(oneLiteral, p.type())) {
3676 return unsupported();
3677 }
3678
3679 switch (p.getOperator().kind()) {
3680 case OperatorKind::PLUSPLUS:
3681 if (!this->binaryOp(p.type(), kAddOps)) {
3682 return unsupported();
3683 }
3684 break;
3685
3686 case OperatorKind::MINUSMINUS:
3687 if (!this->binaryOp(p.type(), kSubtractOps)) {
3688 return unsupported();
3689 }
3690 break;
3691
3692 default:
3693 SkUNREACHABLE;
3694 }
3695
3696 // Write the new value back to the operand.
3697 if (!this->store(*lvalue)) {
3698 return unsupported();
3699 }
3700
3701 // Discard the scratch copy, leaving only the original value as-is.
3702 this->discardExpression(p.type().slotCount());
3703 return true;
3704 }
3705
pushPrefixExpression(const PrefixExpression & p)3706 bool Generator::pushPrefixExpression(const PrefixExpression& p) {
3707 return this->pushPrefixExpression(p.getOperator(), *p.operand());
3708 }
3709
pushPrefixExpression(Operator op,const Expression & expr)3710 bool Generator::pushPrefixExpression(Operator op, const Expression& expr) {
3711 switch (op.kind()) {
3712 case OperatorKind::BITWISENOT:
3713 case OperatorKind::LOGICALNOT:
3714 // Handle operators ! and ~.
3715 if (!this->pushExpression(expr)) {
3716 return unsupported();
3717 }
3718 fBuilder.push_constant_u(~0, expr.type().slotCount());
3719 fBuilder.binary_op(BuilderOp::bitwise_xor_n_ints, expr.type().slotCount());
3720 return true;
3721
3722 case OperatorKind::MINUS: {
3723 if (!this->pushExpression(expr)) {
3724 return unsupported();
3725 }
3726 if (expr.type().componentType().isFloat()) {
3727 // Handle float negation as an integer `x ^ 0x80000000`. This toggles the sign bit.
3728 fBuilder.push_constant_u(0x80000000, expr.type().slotCount());
3729 fBuilder.binary_op(BuilderOp::bitwise_xor_n_ints, expr.type().slotCount());
3730 } else {
3731 // Handle integer negation as a componentwise `expr * -1`.
3732 fBuilder.push_constant_i(-1, expr.type().slotCount());
3733 fBuilder.binary_op(BuilderOp::mul_n_ints, expr.type().slotCount());
3734 }
3735 return true;
3736 }
3737 case OperatorKind::PLUSPLUS: {
3738 // Rewrite as `expr += 1`.
3739 Literal oneLiteral{Position{}, 1.0, &expr.type().componentType()};
3740 return this->pushBinaryExpression(expr, OperatorKind::PLUSEQ, oneLiteral);
3741 }
3742 case OperatorKind::MINUSMINUS: {
3743 // Rewrite as `expr += -1`.
3744 Literal minusOneLiteral{expr.fPosition, -1.0, &expr.type().componentType()};
3745 return this->pushBinaryExpression(expr, OperatorKind::PLUSEQ, minusOneLiteral);
3746 }
3747 default:
3748 break;
3749 }
3750
3751 return unsupported();
3752 }
3753
pushSwizzle(const Swizzle & s)3754 bool Generator::pushSwizzle(const Swizzle& s) {
3755 SkASSERT(!s.components().empty() && s.components().size() <= 4);
3756
3757 // If this is a simple subset of a variable's slots...
3758 bool isSimpleSubset = is_sliceable_swizzle(s.components());
3759 if (isSimpleSubset && s.base()->is<VariableReference>()) {
3760 // ... we can just push part of the variable directly onto the stack, rather than pushing
3761 // the whole expression and then immediately cutting it down. (Either way works, but this
3762 // saves a step.)
3763 return this->pushVariableReferencePartial(
3764 s.base()->as<VariableReference>(),
3765 SlotRange{/*index=*/s.components()[0], /*count=*/s.components().size()});
3766 }
3767 // Push the base expression.
3768 if (!this->pushExpression(*s.base())) {
3769 return false;
3770 }
3771 // An identity swizzle doesn't rearrange the data; it just (potentially) discards tail elements.
3772 if (isSimpleSubset && s.components()[0] == 0) {
3773 int discardedElements = s.base()->type().slotCount() - s.components().size();
3774 SkASSERT(discardedElements >= 0);
3775 fBuilder.discard_stack(discardedElements);
3776 return true;
3777 }
3778 // Perform the swizzle.
3779 fBuilder.swizzle(s.base()->type().slotCount(), s.components());
3780 return true;
3781 }
3782
pushTernaryExpression(const TernaryExpression & t)3783 bool Generator::pushTernaryExpression(const TernaryExpression& t) {
3784 return this->pushTernaryExpression(*t.test(), *t.ifTrue(), *t.ifFalse());
3785 }
3786
pushDynamicallyUniformTernaryExpression(const Expression & test,const Expression & ifTrue,const Expression & ifFalse)3787 bool Generator::pushDynamicallyUniformTernaryExpression(const Expression& test,
3788 const Expression& ifTrue,
3789 const Expression& ifFalse) {
3790 SkASSERT(Analysis::IsDynamicallyUniformExpression(test));
3791
3792 int falseLabelID = fBuilder.nextLabelID();
3793 int exitLabelID = fBuilder.nextLabelID();
3794
3795 // First, push the test-expression into a separate stack.
3796 AutoStack testStack(this);
3797 testStack.enter();
3798 if (!this->pushExpression(test)) {
3799 return unsupported();
3800 }
3801
3802 // Branch to the true- or false-expression based on the test-expression. We can skip the
3803 // non-true path entirely since the test is known to be uniform.
3804 fBuilder.branch_if_no_active_lanes_on_stack_top_equal(~0, falseLabelID);
3805 testStack.exit();
3806
3807 if (!this->pushExpression(ifTrue)) {
3808 return unsupported();
3809 }
3810
3811 fBuilder.jump(exitLabelID);
3812
3813 // The builder doesn't understand control flow, and assumes that every push moves the stack-top
3814 // forwards. We need to manually balance out the `pushExpression` from the if-true path by
3815 // moving the stack position backwards, so that the if-false path pushes its expression into the
3816 // same as the if-true result.
3817 this->discardExpression(/*slots=*/ifTrue.type().slotCount());
3818
3819 fBuilder.label(falseLabelID);
3820
3821 if (!this->pushExpression(ifFalse)) {
3822 return unsupported();
3823 }
3824
3825 fBuilder.label(exitLabelID);
3826
3827 // Jettison the text-expression from the separate stack.
3828 testStack.enter();
3829 this->discardExpression(/*slots=*/1);
3830 testStack.exit();
3831 return true;
3832 }
3833
pushTernaryExpression(const Expression & test,const Expression & ifTrue,const Expression & ifFalse)3834 bool Generator::pushTernaryExpression(const Expression& test,
3835 const Expression& ifTrue,
3836 const Expression& ifFalse) {
3837 // If the test-expression is dynamically-uniform, we can skip over the non-true expressions
3838 // entirely, and not need to involve the condition mask.
3839 if (Analysis::IsDynamicallyUniformExpression(test)) {
3840 return this->pushDynamicallyUniformTernaryExpression(test, ifTrue, ifFalse);
3841 }
3842
3843 // Analyze the ternary to see which corners we can safely cut.
3844 bool ifFalseHasSideEffects = Analysis::HasSideEffects(ifFalse);
3845 bool ifTrueHasSideEffects = Analysis::HasSideEffects(ifTrue);
3846 bool ifTrueIsTrivial = Analysis::IsTrivialExpression(ifTrue);
3847 int cleanupLabelID = fBuilder.nextLabelID();
3848
3849 // If the true- and false-expressions both lack side effects, we evaluate both of them safely
3850 // without masking off their effects. In that case, we can emit both sides and use boolean mix
3851 // to select the correct result without using the condition mask at all.
3852 if (!ifFalseHasSideEffects && !ifTrueHasSideEffects && ifTrueIsTrivial) {
3853 // Push all of the arguments to mix.
3854 if (!this->pushVectorizedExpression(test, ifTrue.type())) {
3855 return unsupported();
3856 }
3857 if (!this->pushExpression(ifFalse)) {
3858 return unsupported();
3859 }
3860 if (!this->pushExpression(ifTrue)) {
3861 return unsupported();
3862 }
3863 // Use boolean mix to select the true- or false-expression via the test-expression.
3864 fBuilder.ternary_op(BuilderOp::mix_n_ints, ifTrue.type().slotCount());
3865 return true;
3866 }
3867
3868 // First, push the current condition-mask and the test-expression into a separate stack.
3869 fBuilder.enableExecutionMaskWrites();
3870 AutoStack testStack(this);
3871 testStack.enter();
3872 fBuilder.push_condition_mask();
3873 if (!this->pushExpression(test)) {
3874 return unsupported();
3875 }
3876 testStack.exit();
3877
3878 // We can take some shortcuts with condition-mask handling if the false-expression is entirely
3879 // side-effect free. (We can evaluate it without masking off its effects.) We always handle the
3880 // condition mask properly for the test-expression and true-expression properly.
3881 if (!ifFalseHasSideEffects) {
3882 // Push the false-expression onto the primary stack.
3883 if (!this->pushExpression(ifFalse)) {
3884 return unsupported();
3885 }
3886
3887 // Next, merge the condition mask (on the separate stack) with the test expression.
3888 testStack.enter();
3889 fBuilder.merge_condition_mask();
3890 testStack.exit();
3891
3892 // If no lanes are active, we can skip the true-expression entirely. This isn't super likely
3893 // to happen, so it's probably only a win for non-trivial true-expressions.
3894 if (!ifTrueIsTrivial) {
3895 fBuilder.branch_if_no_lanes_active(cleanupLabelID);
3896 }
3897
3898 // Push the true-expression onto the primary stack, immediately after the false-expression.
3899 if (!this->pushExpression(ifTrue)) {
3900 return unsupported();
3901 }
3902
3903 // Use a select to conditionally mask-merge the true-expression and false-expression lanes.
3904 fBuilder.select(/*slots=*/ifTrue.type().slotCount());
3905 fBuilder.label(cleanupLabelID);
3906 } else {
3907 // Merge the condition mask (on the separate stack) with the test expression.
3908 testStack.enter();
3909 fBuilder.merge_condition_mask();
3910 testStack.exit();
3911
3912 // Push the true-expression onto the primary stack.
3913 if (!this->pushExpression(ifTrue)) {
3914 return unsupported();
3915 }
3916
3917 // Switch back to the test-expression stack and apply the inverted test condition.
3918 testStack.enter();
3919 fBuilder.merge_inv_condition_mask();
3920 testStack.exit();
3921
3922 // Push the false-expression onto the primary stack, immediately after the true-expression.
3923 if (!this->pushExpression(ifFalse)) {
3924 return unsupported();
3925 }
3926
3927 // Use a select to conditionally mask-merge the true-expression and false-expression lanes;
3928 // the mask is already set up for this.
3929 fBuilder.select(/*slots=*/ifTrue.type().slotCount());
3930 }
3931
3932 // Restore the condition-mask to its original state and jettison the test-expression.
3933 testStack.enter();
3934 this->discardExpression(/*slots=*/1);
3935 fBuilder.pop_condition_mask();
3936 testStack.exit();
3937
3938 fBuilder.disableExecutionMaskWrites();
3939 return true;
3940 }
3941
pushVariableReference(const VariableReference & var)3942 bool Generator::pushVariableReference(const VariableReference& var) {
3943 // If we are pushing a constant-value variable, push the value directly; literal values are more
3944 // amenable to optimization.
3945 if (var.type().isScalar() || var.type().isVector()) {
3946 if (const Expression* expr = ConstantFolder::GetConstantValueOrNull(var)) {
3947 return this->pushExpression(*expr);
3948 }
3949 if (fImmutableVariables.contains(var.variable())) {
3950 return this->pushExpression(*var.variable()->initialValue());
3951 }
3952 }
3953 return this->pushVariableReferencePartial(var, SlotRange{0, (int)var.type().slotCount()});
3954 }
3955
pushVariableReferencePartial(const VariableReference & v,SlotRange subset)3956 bool Generator::pushVariableReferencePartial(const VariableReference& v, SlotRange subset) {
3957 const Variable& var = *v.variable();
3958 SlotRange r;
3959 if (IsUniform(var)) {
3960 // Push a uniform.
3961 r = this->getUniformSlots(var);
3962 SkASSERT(r.count == (int)var.type().slotCount());
3963 r.index += subset.index;
3964 r.count = subset.count;
3965 fBuilder.push_uniform(r);
3966 } else if (fImmutableVariables.contains(&var)) {
3967 // If we only need a single slot, we can push a constant. This saves a lookup, and can
3968 // occasionally permit the use of an immediate-mode op.
3969 if (subset.count == 1) {
3970 const Expression& expr = *v.variable()->initialValue();
3971 std::optional<ImmutableBits> bits = this->getImmutableBitsForSlot(expr, subset.index);
3972 if (bits.has_value()) {
3973 fBuilder.push_constant_i(*bits);
3974 return true;
3975 }
3976 }
3977 // Push the immutable slot range.
3978 r = this->getImmutableSlots(var);
3979 SkASSERT(r.count == (int)var.type().slotCount());
3980 r.index += subset.index;
3981 r.count = subset.count;
3982 fBuilder.push_immutable(r);
3983 } else {
3984 // Push the variable.
3985 r = this->getVariableSlots(var);
3986 SkASSERT(r.count == (int)var.type().slotCount());
3987 r.index += subset.index;
3988 r.count = subset.count;
3989 fBuilder.push_slots(r);
3990 }
3991 return true;
3992 }
3993
writeProgram(const FunctionDefinition & function)3994 bool Generator::writeProgram(const FunctionDefinition& function) {
3995 fCurrentFunction = &function;
3996
3997 if (fDebugTrace) {
3998 // Copy the program source into the debug info so that it will be written in the trace file.
3999 fDebugTrace->setSource(*fProgram.fSource);
4000
4001 if (fWriteTraceOps) {
4002 // The Raster Pipeline blitter generates centered pixel coordinates. (0.5, 1.5, 2.5,
4003 // etc.) Add 0.5 to the requested trace coordinate to match this, then compare against
4004 // src.rg, which contains the shader's coordinates. We keep this result in a dedicated
4005 // trace-mask stack.
4006 fTraceMask.emplace(this);
4007 fTraceMask->enter();
4008 fBuilder.push_device_xy01();
4009 fBuilder.discard_stack(2);
4010 fBuilder.push_constant_f(fDebugTrace->fTraceCoord.fX + 0.5f);
4011 fBuilder.push_constant_f(fDebugTrace->fTraceCoord.fY + 0.5f);
4012 fBuilder.binary_op(BuilderOp::cmpeq_n_floats, 2);
4013 fBuilder.binary_op(BuilderOp::bitwise_and_n_ints, 1);
4014 fTraceMask->exit();
4015
4016 // Assemble a position-to-line-number mapping for the debugger.
4017 this->calculateLineOffsets();
4018 }
4019 }
4020
4021 // Assign slots to the parameters of main; copy src and dst into those slots as appropriate.
4022 const SkSL::Variable* mainCoordsParam = function.declaration().getMainCoordsParameter();
4023 const SkSL::Variable* mainInputColorParam = function.declaration().getMainInputColorParameter();
4024 const SkSL::Variable* mainDestColorParam = function.declaration().getMainDestColorParameter();
4025
4026 for (const SkSL::Variable* param : function.declaration().parameters()) {
4027 if (param == mainCoordsParam) {
4028 // Coordinates are passed via RG.
4029 SlotRange fragCoord = this->getVariableSlots(*param);
4030 SkASSERT(fragCoord.count == 2);
4031 fBuilder.store_src_rg(fragCoord);
4032 } else if (param == mainInputColorParam) {
4033 // Input colors are passed via RGBA.
4034 SlotRange srcColor = this->getVariableSlots(*param);
4035 SkASSERT(srcColor.count == 4);
4036 fBuilder.store_src(srcColor);
4037 } else if (param == mainDestColorParam) {
4038 // Dest colors are passed via dRGBA.
4039 SlotRange destColor = this->getVariableSlots(*param);
4040 SkASSERT(destColor.count == 4);
4041 fBuilder.store_dst(destColor);
4042 } else {
4043 SkDEBUGFAIL("Invalid parameter to main()");
4044 return unsupported();
4045 }
4046 }
4047
4048 // Initialize the program.
4049 fBuilder.init_lane_masks();
4050
4051 // Emit global variables.
4052 if (!this->writeGlobals()) {
4053 return unsupported();
4054 }
4055
4056 // Invoke main().
4057 std::optional<SlotRange> mainResult = this->writeFunction(function, function, /*arguments=*/{});
4058 if (!mainResult.has_value()) {
4059 return unsupported();
4060 }
4061
4062 // Move the result of main() from slots into RGBA.
4063 SkASSERT(mainResult->count == 4);
4064 if (this->needsFunctionResultSlots(fCurrentFunction)) {
4065 fBuilder.load_src(*mainResult);
4066 } else {
4067 fBuilder.pop_src_rgba();
4068 }
4069
4070 // Discard the trace mask.
4071 if (fTraceMask.has_value()) {
4072 fTraceMask->enter();
4073 fBuilder.discard_stack(1);
4074 fTraceMask->exit();
4075 }
4076
4077 return true;
4078 }
4079
finish()4080 std::unique_ptr<RP::Program> Generator::finish() {
4081 return fBuilder.finish(fProgramSlots.slotCount(),
4082 fUniformSlots.slotCount(),
4083 fImmutableSlots.slotCount(),
4084 fDebugTrace);
4085 }
4086
4087 } // namespace RP
4088
MakeRasterPipelineProgram(const SkSL::Program & program,const FunctionDefinition & function,DebugTracePriv * debugTrace,bool writeTraceOps)4089 std::unique_ptr<RP::Program> MakeRasterPipelineProgram(const SkSL::Program& program,
4090 const FunctionDefinition& function,
4091 DebugTracePriv* debugTrace,
4092 bool writeTraceOps) {
4093 RP::Generator generator(program, debugTrace, writeTraceOps);
4094 if (!generator.writeProgram(function)) {
4095 return nullptr;
4096 }
4097 return generator.finish();
4098 }
4099
4100 } // namespace SkSL
4101