1 /*
2 * Copyright 2022 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "src/sksl/codegen/SkSLRasterPipelineBuilder.h"
9 #include <cstdint>
10 #include <optional>
11
12 #include "include/core/SkStream.h"
13 #include "include/private/base/SkMalloc.h"
14 #include "include/private/base/SkTFitsIn.h"
15 #include "include/private/base/SkTo.h"
16 #include "src/base/SkArenaAlloc.h"
17 #include "src/base/SkSafeMath.h"
18 #include "src/core/SkOpts.h"
19 #include "src/core/SkRasterPipelineContextUtils.h"
20 #include "src/core/SkRasterPipelineOpContexts.h"
21 #include "src/core/SkRasterPipelineOpList.h"
22 #include "src/core/SkTHash.h"
23 #include "src/sksl/SkSLPosition.h"
24 #include "src/sksl/SkSLString.h"
25 #include "src/sksl/tracing/SkSLDebugTracePriv.h"
26 #include "src/sksl/tracing/SkSLTraceHook.h"
27
28 #if !defined(SKSL_STANDALONE)
29 #include "src/core/SkRasterPipeline.h"
30 #endif
31
32 #include <algorithm>
33 #include <cmath>
34 #include <cstddef>
35 #include <cstring>
36 #include <iterator>
37 #include <string>
38 #include <string_view>
39 #include <tuple>
40 #include <utility>
41 #include <vector>
42
43 using namespace skia_private;
44
45 namespace SkSL::RP {
46
47 #define ALL_SINGLE_SLOT_UNARY_OP_CASES \
48 BuilderOp::acos_float: \
49 case BuilderOp::asin_float: \
50 case BuilderOp::atan_float: \
51 case BuilderOp::cos_float: \
52 case BuilderOp::exp_float: \
53 case BuilderOp::exp2_float: \
54 case BuilderOp::log_float: \
55 case BuilderOp::log2_float: \
56 case BuilderOp::sin_float: \
57 case BuilderOp::sqrt_float: \
58 case BuilderOp::tan_float
59
60 #define ALL_MULTI_SLOT_UNARY_OP_CASES \
61 BuilderOp::abs_int: \
62 case BuilderOp::cast_to_float_from_int: \
63 case BuilderOp::cast_to_float_from_uint: \
64 case BuilderOp::cast_to_int_from_float: \
65 case BuilderOp::cast_to_uint_from_float: \
66 case BuilderOp::ceil_float: \
67 case BuilderOp::floor_float: \
68 case BuilderOp::invsqrt_float
69
70 #define ALL_N_WAY_BINARY_OP_CASES \
71 BuilderOp::atan2_n_floats: \
72 case BuilderOp::pow_n_floats
73
74 #define ALL_MULTI_SLOT_BINARY_OP_CASES \
75 BuilderOp::add_n_floats: \
76 case BuilderOp::add_n_ints: \
77 case BuilderOp::sub_n_floats: \
78 case BuilderOp::sub_n_ints: \
79 case BuilderOp::mul_n_floats: \
80 case BuilderOp::mul_n_ints: \
81 case BuilderOp::div_n_floats: \
82 case BuilderOp::div_n_ints: \
83 case BuilderOp::div_n_uints: \
84 case BuilderOp::bitwise_and_n_ints: \
85 case BuilderOp::bitwise_or_n_ints: \
86 case BuilderOp::bitwise_xor_n_ints: \
87 case BuilderOp::mod_n_floats: \
88 case BuilderOp::min_n_floats: \
89 case BuilderOp::min_n_ints: \
90 case BuilderOp::min_n_uints: \
91 case BuilderOp::max_n_floats: \
92 case BuilderOp::max_n_ints: \
93 case BuilderOp::max_n_uints: \
94 case BuilderOp::cmple_n_floats: \
95 case BuilderOp::cmple_n_ints: \
96 case BuilderOp::cmple_n_uints: \
97 case BuilderOp::cmplt_n_floats: \
98 case BuilderOp::cmplt_n_ints: \
99 case BuilderOp::cmplt_n_uints: \
100 case BuilderOp::cmpeq_n_floats: \
101 case BuilderOp::cmpeq_n_ints: \
102 case BuilderOp::cmpne_n_floats: \
103 case BuilderOp::cmpne_n_ints
104
105 #define ALL_IMMEDIATE_BINARY_OP_CASES \
106 BuilderOp::add_imm_float: \
107 case BuilderOp::add_imm_int: \
108 case BuilderOp::mul_imm_float: \
109 case BuilderOp::mul_imm_int: \
110 case BuilderOp::bitwise_and_imm_int: \
111 case BuilderOp::bitwise_xor_imm_int: \
112 case BuilderOp::min_imm_float: \
113 case BuilderOp::max_imm_float: \
114 case BuilderOp::cmple_imm_float: \
115 case BuilderOp::cmple_imm_int: \
116 case BuilderOp::cmple_imm_uint: \
117 case BuilderOp::cmplt_imm_float: \
118 case BuilderOp::cmplt_imm_int: \
119 case BuilderOp::cmplt_imm_uint: \
120 case BuilderOp::cmpeq_imm_float: \
121 case BuilderOp::cmpeq_imm_int: \
122 case BuilderOp::cmpne_imm_float: \
123 case BuilderOp::cmpne_imm_int
124
125 #define ALL_IMMEDIATE_MULTI_SLOT_BINARY_OP_CASES \
126 BuilderOp::bitwise_and_imm_int
127
128 #define ALL_N_WAY_TERNARY_OP_CASES \
129 BuilderOp::smoothstep_n_floats
130
131 #define ALL_MULTI_SLOT_TERNARY_OP_CASES \
132 BuilderOp::mix_n_floats: \
133 case BuilderOp::mix_n_ints
134
is_immediate_op(BuilderOp op)135 static bool is_immediate_op(BuilderOp op) {
136 switch (op) {
137 case ALL_IMMEDIATE_BINARY_OP_CASES: return true;
138 default: return false;
139 }
140 }
141
is_multi_slot_immediate_op(BuilderOp op)142 static bool is_multi_slot_immediate_op(BuilderOp op) {
143 switch (op) {
144 case ALL_IMMEDIATE_MULTI_SLOT_BINARY_OP_CASES: return true;
145 default: return false;
146 }
147 }
148
convert_n_way_op_to_immediate(BuilderOp op,int slots,int32_t * constantValue)149 static BuilderOp convert_n_way_op_to_immediate(BuilderOp op, int slots, int32_t* constantValue) {
150 // We rely on the exact ordering of SkRP ops here; the immediate-mode op must always come
151 // directly before the n-way op. (If we have more than one, the increasing-slot variations
152 // continue backwards from there.)
153 BuilderOp immOp = (BuilderOp)((int)op - 1);
154
155 // Some immediate ops support multiple slots.
156 if (is_multi_slot_immediate_op(immOp)) {
157 return immOp;
158 }
159
160 // Most immediate ops only directly support a single slot. However, it's still faster to execute
161 // `add_imm_int, add_imm_int` instead of `splat_2_ints, add_2_ints`, so we allow those
162 // conversions as well.
163 if (slots <= 2) {
164 if (is_immediate_op(immOp)) {
165 return immOp;
166 }
167
168 // We also allow for immediate-mode subtraction, by adding a negative value.
169 switch (op) {
170 case BuilderOp::sub_n_ints:
171 *constantValue *= -1;
172 return BuilderOp::add_imm_int;
173
174 case BuilderOp::sub_n_floats: {
175 // This negates the floating-point value by inverting its sign bit.
176 *constantValue ^= 0x80000000;
177 return BuilderOp::add_imm_float;
178 }
179 default:
180 break;
181 }
182 }
183
184 // We don't have an immediate-mode version of this op.
185 return op;
186 }
187
appendInstruction(BuilderOp op,SlotList slots,int immA,int immB,int immC,int immD)188 void Builder::appendInstruction(BuilderOp op, SlotList slots,
189 int immA, int immB, int immC, int immD) {
190 fInstructions.push_back({op, slots.fSlotA, slots.fSlotB,
191 immA, immB, immC, immD, fCurrentStackID});
192 }
193
lastInstruction(int fromBack)194 Instruction* Builder::lastInstruction(int fromBack) {
195 if (fInstructions.size() <= fromBack) {
196 return nullptr;
197 }
198 Instruction* inst = &fInstructions.fromBack(fromBack);
199 if (inst->fStackID != fCurrentStackID) {
200 return nullptr;
201 }
202 return inst;
203 }
204
lastInstructionOnAnyStack(int fromBack)205 Instruction* Builder::lastInstructionOnAnyStack(int fromBack) {
206 if (fInstructions.size() <= fromBack) {
207 return nullptr;
208 }
209 return &fInstructions.fromBack(fromBack);
210 }
211
unary_op(BuilderOp op,int32_t slots)212 void Builder::unary_op(BuilderOp op, int32_t slots) {
213 switch (op) {
214 case ALL_SINGLE_SLOT_UNARY_OP_CASES:
215 case ALL_MULTI_SLOT_UNARY_OP_CASES:
216 this->appendInstruction(op, {}, slots);
217 break;
218
219 default:
220 SkDEBUGFAIL("not a unary op");
221 break;
222 }
223 }
224
binary_op(BuilderOp op,int32_t slots)225 void Builder::binary_op(BuilderOp op, int32_t slots) {
226 if (Instruction* lastInstruction = this->lastInstruction()) {
227 // If we just pushed or splatted a constant onto the stack...
228 if (lastInstruction->fOp == BuilderOp::push_constant &&
229 lastInstruction->fImmA >= slots) {
230 // ... and this op has an immediate-mode equivalent...
231 int32_t constantValue = lastInstruction->fImmB;
232 BuilderOp immOp = convert_n_way_op_to_immediate(op, slots, &constantValue);
233 if (immOp != op) {
234 // ... discard the constants from the stack, and use an immediate-mode op.
235 this->discard_stack(slots);
236 this->appendInstruction(immOp, {}, slots, constantValue);
237 return;
238 }
239 }
240 }
241
242 switch (op) {
243 case ALL_N_WAY_BINARY_OP_CASES:
244 case ALL_MULTI_SLOT_BINARY_OP_CASES:
245 this->appendInstruction(op, {}, slots);
246 break;
247
248 default:
249 SkDEBUGFAIL("not a binary op");
250 break;
251 }
252 }
253
ternary_op(BuilderOp op,int32_t slots)254 void Builder::ternary_op(BuilderOp op, int32_t slots) {
255 switch (op) {
256 case ALL_N_WAY_TERNARY_OP_CASES:
257 case ALL_MULTI_SLOT_TERNARY_OP_CASES:
258 this->appendInstruction(op, {}, slots);
259 break;
260
261 default:
262 SkDEBUGFAIL("not a ternary op");
263 break;
264 }
265 }
266
dot_floats(int32_t slots)267 void Builder::dot_floats(int32_t slots) {
268 switch (slots) {
269 case 1: this->appendInstruction(BuilderOp::mul_n_floats, {}, slots); break;
270 case 2: this->appendInstruction(BuilderOp::dot_2_floats, {}, slots); break;
271 case 3: this->appendInstruction(BuilderOp::dot_3_floats, {}, slots); break;
272 case 4: this->appendInstruction(BuilderOp::dot_4_floats, {}, slots); break;
273
274 default:
275 SkDEBUGFAIL("invalid number of slots");
276 break;
277 }
278 }
279
refract_floats()280 void Builder::refract_floats() {
281 this->appendInstruction(BuilderOp::refract_4_floats, {});
282 }
283
inverse_matrix(int32_t n)284 void Builder::inverse_matrix(int32_t n) {
285 switch (n) {
286 case 2: this->appendInstruction(BuilderOp::inverse_mat2, {}, 4); break;
287 case 3: this->appendInstruction(BuilderOp::inverse_mat3, {}, 9); break;
288 case 4: this->appendInstruction(BuilderOp::inverse_mat4, {}, 16); break;
289 default: SkUNREACHABLE;
290 }
291 }
292
pad_stack(int32_t count)293 void Builder::pad_stack(int32_t count) {
294 if (count > 0) {
295 this->appendInstruction(BuilderOp::pad_stack, {}, count);
296 }
297 }
298
simplifyImmediateUnmaskedOp()299 bool Builder::simplifyImmediateUnmaskedOp() {
300 if (fInstructions.size() < 3) {
301 return false;
302 }
303
304 // If we detect a pattern of 'push, immediate-op, unmasked pop', then we can
305 // convert it into an immediate-op directly onto the value slots and take the
306 // stack entirely out of the equation.
307 Instruction* popInstruction = this->lastInstruction(/*fromBack=*/0);
308 Instruction* immInstruction = this->lastInstruction(/*fromBack=*/1);
309 Instruction* pushInstruction = this->lastInstruction(/*fromBack=*/2);
310
311 // If the last instruction is an unmasked pop...
312 if (popInstruction && immInstruction && pushInstruction &&
313 popInstruction->fOp == BuilderOp::copy_stack_to_slots_unmasked) {
314 // ... and the prior instruction was an immediate-mode op, with the same number of slots...
315 if (is_immediate_op(immInstruction->fOp) &&
316 immInstruction->fImmA == popInstruction->fImmA) {
317 // ... and we support multiple-slot immediates (if this op calls for it)...
318 if (immInstruction->fImmA == 1 || is_multi_slot_immediate_op(immInstruction->fOp)) {
319 // ... and the prior instruction was `push_slots` or `push_immutable` of at least
320 // that many slots...
321 if ((pushInstruction->fOp == BuilderOp::push_slots ||
322 pushInstruction->fOp == BuilderOp::push_immutable) &&
323 pushInstruction->fImmA >= popInstruction->fImmA) {
324 // ... onto the same slot range...
325 Slot immSlot = popInstruction->fSlotA + popInstruction->fImmA;
326 Slot pushSlot = pushInstruction->fSlotA + pushInstruction->fImmA;
327 if (immSlot == pushSlot) {
328 // ... we can shrink the push, eliminate the pop, and perform the immediate
329 // op in-place instead.
330 pushInstruction->fImmA -= immInstruction->fImmA;
331 immInstruction->fSlotA = immSlot - immInstruction->fImmA;
332 fInstructions.pop_back();
333 return true;
334 }
335 }
336 }
337 }
338 }
339
340 return false;
341 }
342
discard_stack(int32_t count,int stackID)343 void Builder::discard_stack(int32_t count, int stackID) {
344 // If we pushed something onto the stack and then immediately discarded part of it, we can
345 // shrink or eliminate the push.
346 while (count > 0) {
347 Instruction* lastInstruction = this->lastInstructionOnAnyStack();
348 if (!lastInstruction || lastInstruction->fStackID != stackID) {
349 break;
350 }
351
352 switch (lastInstruction->fOp) {
353 case BuilderOp::discard_stack:
354 // Our last op was actually a separate discard_stack; combine the discards.
355 lastInstruction->fImmA += count;
356 return;
357
358 case BuilderOp::push_clone:
359 case BuilderOp::push_clone_from_stack:
360 case BuilderOp::push_clone_indirect_from_stack:
361 case BuilderOp::push_constant:
362 case BuilderOp::push_immutable:
363 case BuilderOp::push_immutable_indirect:
364 case BuilderOp::push_slots:
365 case BuilderOp::push_slots_indirect:
366 case BuilderOp::push_uniform:
367 case BuilderOp::push_uniform_indirect:
368 case BuilderOp::pad_stack: {
369 // Our last op was a multi-slot push; these cancel out. Eliminate the op if its
370 // count reached zero.
371 int cancelOut = std::min(count, lastInstruction->fImmA);
372 count -= cancelOut;
373 lastInstruction->fImmA -= cancelOut;
374 if (lastInstruction->fImmA == 0) {
375 fInstructions.pop_back();
376 }
377 continue;
378 }
379 case BuilderOp::push_condition_mask:
380 case BuilderOp::push_loop_mask:
381 case BuilderOp::push_return_mask:
382 // Our last op was a single-slot push; cancel out one discard and eliminate the op.
383 --count;
384 fInstructions.pop_back();
385 continue;
386
387 case BuilderOp::copy_stack_to_slots_unmasked: {
388 // Look for a pattern of `push, immediate-ops, pop` and simplify it down to an
389 // immediate-op directly to the value slot.
390 if (count == 1) {
391 if (this->simplifyImmediateUnmaskedOp()) {
392 return;
393 }
394 }
395
396 // A `copy_stack_to_slots_unmasked` op, followed immediately by a `discard_stack`
397 // op with an equal number of slots, is interpreted as an unmasked stack pop.
398 // We can simplify pops in a variety of ways. First, temporarily get rid of
399 // `copy_stack_to_slots_unmasked`.
400 if (count == lastInstruction->fImmA) {
401 SlotRange dst{lastInstruction->fSlotA, lastInstruction->fImmA};
402 fInstructions.pop_back();
403
404 // See if we can write this pop in a simpler way.
405 this->simplifyPopSlotsUnmasked(&dst);
406
407 // If simplification consumed the entire range, we're done!
408 if (dst.count == 0) {
409 return;
410 }
411
412 // Simplification did not consume the entire range. We are still responsible for
413 // copying-back and discarding any remaining slots.
414 this->copy_stack_to_slots_unmasked(dst);
415 count = dst.count;
416 }
417 break;
418 }
419 default:
420 break;
421 }
422
423 // This instruction wasn't a push.
424 break;
425 }
426
427 if (count > 0) {
428 this->appendInstruction(BuilderOp::discard_stack, {}, count);
429 }
430 }
431
label(int labelID)432 void Builder::label(int labelID) {
433 SkASSERT(labelID >= 0 && labelID < fNumLabels);
434
435 // If the previous instruction was a branch to this label, it's a no-op; jumping to the very
436 // next instruction is effectively meaningless.
437 while (const Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {
438 switch (lastInstruction->fOp) {
439 case BuilderOp::jump:
440 case BuilderOp::branch_if_all_lanes_active:
441 case BuilderOp::branch_if_any_lanes_active:
442 case BuilderOp::branch_if_no_lanes_active:
443 case BuilderOp::branch_if_no_active_lanes_on_stack_top_equal:
444 if (lastInstruction->fImmA == labelID) {
445 fInstructions.pop_back();
446 continue;
447 }
448 break;
449
450 default:
451 break;
452 }
453 break;
454 }
455 this->appendInstruction(BuilderOp::label, {}, labelID);
456 }
457
jump(int labelID)458 void Builder::jump(int labelID) {
459 SkASSERT(labelID >= 0 && labelID < fNumLabels);
460 if (const Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {
461 if (lastInstruction->fOp == BuilderOp::jump) {
462 // The previous instruction was also `jump`, so this branch could never possibly occur.
463 return;
464 }
465 }
466 this->appendInstruction(BuilderOp::jump, {}, labelID);
467 }
468
branch_if_any_lanes_active(int labelID)469 void Builder::branch_if_any_lanes_active(int labelID) {
470 if (!this->executionMaskWritesAreEnabled()) {
471 this->jump(labelID);
472 return;
473 }
474
475 SkASSERT(labelID >= 0 && labelID < fNumLabels);
476 if (const Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {
477 if (lastInstruction->fOp == BuilderOp::branch_if_any_lanes_active ||
478 lastInstruction->fOp == BuilderOp::jump) {
479 // The previous instruction was `jump` or `branch_if_any_lanes_active`, so this branch
480 // could never possibly occur.
481 return;
482 }
483 }
484 this->appendInstruction(BuilderOp::branch_if_any_lanes_active, {}, labelID);
485 }
486
branch_if_all_lanes_active(int labelID)487 void Builder::branch_if_all_lanes_active(int labelID) {
488 if (!this->executionMaskWritesAreEnabled()) {
489 this->jump(labelID);
490 return;
491 }
492
493 SkASSERT(labelID >= 0 && labelID < fNumLabels);
494 if (const Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {
495 if (lastInstruction->fOp == BuilderOp::branch_if_all_lanes_active ||
496 lastInstruction->fOp == BuilderOp::jump) {
497 // The previous instruction was `jump` or `branch_if_all_lanes_active`, so this branch
498 // could never possibly occur.
499 return;
500 }
501 }
502 this->appendInstruction(BuilderOp::branch_if_all_lanes_active, {}, labelID);
503 }
504
branch_if_no_lanes_active(int labelID)505 void Builder::branch_if_no_lanes_active(int labelID) {
506 if (!this->executionMaskWritesAreEnabled()) {
507 return;
508 }
509
510 SkASSERT(labelID >= 0 && labelID < fNumLabels);
511 if (const Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {
512 if (lastInstruction->fOp == BuilderOp::branch_if_no_lanes_active ||
513 lastInstruction->fOp == BuilderOp::jump) {
514 // The previous instruction was `jump` or `branch_if_no_lanes_active`, so this branch
515 // could never possibly occur.
516 return;
517 }
518 }
519 this->appendInstruction(BuilderOp::branch_if_no_lanes_active, {}, labelID);
520 }
521
branch_if_no_active_lanes_on_stack_top_equal(int value,int labelID)522 void Builder::branch_if_no_active_lanes_on_stack_top_equal(int value, int labelID) {
523 SkASSERT(labelID >= 0 && labelID < fNumLabels);
524 if (const Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {
525 if (lastInstruction->fOp == BuilderOp::jump ||
526 (lastInstruction->fOp == BuilderOp::branch_if_no_active_lanes_on_stack_top_equal &&
527 lastInstruction->fImmB == value)) {
528 // The previous instruction was `jump` or `branch_if_no_active_lanes_on_stack_top_equal`
529 // (checking against the same value), so this branch could never possibly occur.
530 return;
531 }
532 }
533 this->appendInstruction(BuilderOp::branch_if_no_active_lanes_on_stack_top_equal,
534 {}, labelID, value);
535 }
536
push_slots_or_immutable(SlotRange src,BuilderOp op)537 void Builder::push_slots_or_immutable(SlotRange src, BuilderOp op) {
538 SkASSERT(src.count >= 0);
539 if (Instruction* lastInstruction = this->lastInstruction()) {
540 // If the previous instruction was pushing slots contiguous to this range, we can collapse
541 // the two pushes into one larger push.
542 if (lastInstruction->fOp == op &&
543 lastInstruction->fSlotA + lastInstruction->fImmA == src.index) {
544 lastInstruction->fImmA += src.count;
545 src.count = 0;
546 }
547 }
548
549 if (src.count > 0) {
550 this->appendInstruction(op, {src.index}, src.count);
551 }
552
553 // Look for a sequence of "copy stack to X, discard stack, copy X to stack". This is a common
554 // pattern when multiple operations in a row affect the same variable. When we see this, we can
555 // eliminate both the discard and the push.
556 if (fInstructions.size() >= 3) {
557 const Instruction* pushInst = this->lastInstruction(/*fromBack=*/0);
558 const Instruction* discardInst = this->lastInstruction(/*fromBack=*/1);
559 const Instruction* copyToSlotsInst = this->lastInstruction(/*fromBack=*/2);
560
561 if (pushInst && discardInst && copyToSlotsInst && pushInst->fOp == BuilderOp::push_slots) {
562 int pushIndex = pushInst->fSlotA;
563 int pushCount = pushInst->fImmA;
564
565 // Look for a `discard_stack` matching our push count.
566 if (discardInst->fOp == BuilderOp::discard_stack && discardInst->fImmA == pushCount) {
567 // Look for a `copy_stack_to_slots` matching our push.
568 if ((copyToSlotsInst->fOp == BuilderOp::copy_stack_to_slots ||
569 copyToSlotsInst->fOp == BuilderOp::copy_stack_to_slots_unmasked) &&
570 copyToSlotsInst->fSlotA == pushIndex && copyToSlotsInst->fImmA == pushCount) {
571 // We found a matching sequence. Remove the discard and push.
572 fInstructions.pop_back();
573 fInstructions.pop_back();
574 return;
575 }
576 }
577 }
578 }
579 }
580
push_slots_or_immutable_indirect(SlotRange fixedRange,int dynamicStackID,SlotRange limitRange,BuilderOp op)581 void Builder::push_slots_or_immutable_indirect(SlotRange fixedRange,
582 int dynamicStackID,
583 SlotRange limitRange,
584 BuilderOp op) {
585 // SlotA: fixed-range start
586 // SlotB: limit-range end
587 // immA: number of slots
588 // immB: dynamic stack ID
589 this->appendInstruction(op,
590 {fixedRange.index, limitRange.index + limitRange.count},
591 fixedRange.count,
592 dynamicStackID);
593 }
594
push_uniform(SlotRange src)595 void Builder::push_uniform(SlotRange src) {
596 SkASSERT(src.count >= 0);
597 if (Instruction* lastInstruction = this->lastInstruction()) {
598 // If the previous instruction was pushing uniforms contiguous to this range, we can
599 // collapse the two pushes into one larger push.
600 if (lastInstruction->fOp == BuilderOp::push_uniform &&
601 lastInstruction->fSlotA + lastInstruction->fImmA == src.index) {
602 lastInstruction->fImmA += src.count;
603 return;
604 }
605 }
606
607 if (src.count > 0) {
608 this->appendInstruction(BuilderOp::push_uniform, {src.index}, src.count);
609 }
610 }
611
push_uniform_indirect(SlotRange fixedRange,int dynamicStackID,SlotRange limitRange)612 void Builder::push_uniform_indirect(SlotRange fixedRange,
613 int dynamicStackID,
614 SlotRange limitRange) {
615 // SlotA: fixed-range start
616 // SlotB: limit-range end
617 // immA: number of slots
618 // immB: dynamic stack ID
619 this->appendInstruction(BuilderOp::push_uniform_indirect,
620 {fixedRange.index, limitRange.index + limitRange.count},
621 fixedRange.count,
622 dynamicStackID);
623 }
624
trace_var_indirect(int traceMaskStackID,SlotRange fixedRange,int dynamicStackID,SlotRange limitRange)625 void Builder::trace_var_indirect(int traceMaskStackID,
626 SlotRange fixedRange,
627 int dynamicStackID,
628 SlotRange limitRange) {
629 // SlotA: fixed-range start
630 // SlotB: limit-range end
631 // immA: trace-mask stack ID
632 // immB: number of slots
633 // immC: dynamic stack ID
634 this->appendInstruction(BuilderOp::trace_var_indirect,
635 {fixedRange.index, limitRange.index + limitRange.count},
636 traceMaskStackID,
637 fixedRange.count,
638 dynamicStackID);
639 }
640
push_constant_i(int32_t val,int count)641 void Builder::push_constant_i(int32_t val, int count) {
642 SkASSERT(count >= 0);
643 if (count > 0) {
644 if (Instruction* lastInstruction = this->lastInstruction()) {
645 // If the previous op is pushing the same value, we can just push more of them.
646 if (lastInstruction->fOp == BuilderOp::push_constant && lastInstruction->fImmB == val) {
647 lastInstruction->fImmA += count;
648 return;
649 }
650 }
651 this->appendInstruction(BuilderOp::push_constant, {}, count, val);
652 }
653 }
654
push_duplicates(int count)655 void Builder::push_duplicates(int count) {
656 if (Instruction* lastInstruction = this->lastInstruction()) {
657 // If the previous op is pushing a constant, we can just push more of them.
658 if (lastInstruction->fOp == BuilderOp::push_constant) {
659 lastInstruction->fImmA += count;
660 return;
661 }
662 }
663 SkASSERT(count >= 0);
664 if (count >= 3) {
665 // Use a swizzle to splat the input into a 4-slot value.
666 this->swizzle(/*consumedSlots=*/1, {0, 0, 0, 0});
667 count -= 3;
668 }
669 for (; count >= 4; count -= 4) {
670 // Clone the splatted value four slots at a time.
671 this->push_clone(/*numSlots=*/4);
672 }
673 // Use a swizzle or clone to handle the trailing items.
674 switch (count) {
675 case 3: this->swizzle(/*consumedSlots=*/1, {0, 0, 0, 0}); break;
676 case 2: this->swizzle(/*consumedSlots=*/1, {0, 0, 0}); break;
677 case 1: this->push_clone(/*numSlots=*/1); break;
678 default: break;
679 }
680 }
681
push_clone(int numSlots,int offsetFromStackTop)682 void Builder::push_clone(int numSlots, int offsetFromStackTop) {
683 // If we are cloning the stack top...
684 if (numSlots == 1 && offsetFromStackTop == 0) {
685 // ... and the previous op is pushing a constant...
686 if (Instruction* lastInstruction = this->lastInstruction()) {
687 if (lastInstruction->fOp == BuilderOp::push_constant) {
688 // ... we can just push more of them.
689 lastInstruction->fImmA += 1;
690 return;
691 }
692 }
693 }
694 this->appendInstruction(BuilderOp::push_clone, {}, numSlots, numSlots + offsetFromStackTop);
695 }
696
push_clone_from_stack(SlotRange range,int otherStackID,int offsetFromStackTop)697 void Builder::push_clone_from_stack(SlotRange range, int otherStackID, int offsetFromStackTop) {
698 // immA: number of slots
699 // immB: other stack ID
700 // immC: offset from stack top
701 offsetFromStackTop -= range.index;
702
703 if (Instruction* lastInstruction = this->lastInstruction()) {
704 // If the previous op is also pushing a clone...
705 if (lastInstruction->fOp == BuilderOp::push_clone_from_stack &&
706 // ... from the same stack...
707 lastInstruction->fImmB == otherStackID &&
708 // ... and this clone starts at the same place that the last clone ends...
709 lastInstruction->fImmC - lastInstruction->fImmA == offsetFromStackTop) {
710 // ... just extend the existing clone-op.
711 lastInstruction->fImmA += range.count;
712 return;
713 }
714 }
715
716 this->appendInstruction(BuilderOp::push_clone_from_stack, {},
717 range.count, otherStackID, offsetFromStackTop);
718 }
719
push_clone_indirect_from_stack(SlotRange fixedOffset,int dynamicStackID,int otherStackID,int offsetFromStackTop)720 void Builder::push_clone_indirect_from_stack(SlotRange fixedOffset,
721 int dynamicStackID,
722 int otherStackID,
723 int offsetFromStackTop) {
724 // immA: number of slots
725 // immB: other stack ID
726 // immC: offset from stack top
727 // immD: dynamic stack ID
728 offsetFromStackTop -= fixedOffset.index;
729
730 this->appendInstruction(BuilderOp::push_clone_indirect_from_stack, {},
731 fixedOffset.count, otherStackID, offsetFromStackTop, dynamicStackID);
732 }
733
pop_slots(SlotRange dst)734 void Builder::pop_slots(SlotRange dst) {
735 if (!this->executionMaskWritesAreEnabled()) {
736 this->pop_slots_unmasked(dst);
737 return;
738 }
739
740 this->copy_stack_to_slots(dst);
741 this->discard_stack(dst.count);
742 }
743
simplifyPopSlotsUnmasked(SlotRange * dst)744 void Builder::simplifyPopSlotsUnmasked(SlotRange* dst) {
745 if (!dst->count) {
746 // There's nothing left to simplify.
747 return;
748 }
749 Instruction* lastInstruction = this->lastInstruction();
750 if (!lastInstruction) {
751 // There's nothing left to simplify.
752 return;
753 }
754 BuilderOp lastOp = lastInstruction->fOp;
755
756 // If the last instruction is pushing a constant, we can simplify it by copying the constant
757 // directly into the destination slot.
758 if (lastOp == BuilderOp::push_constant) {
759 // Get the last slot.
760 int32_t value = lastInstruction->fImmB;
761 lastInstruction->fImmA--;
762 if (lastInstruction->fImmA == 0) {
763 fInstructions.pop_back();
764 }
765
766 // Consume one destination slot.
767 dst->count--;
768 Slot destinationSlot = dst->index + dst->count;
769
770 // Continue simplifying if possible.
771 this->simplifyPopSlotsUnmasked(dst);
772
773 // Write the constant directly to the destination slot.
774 this->copy_constant(destinationSlot, value);
775 return;
776 }
777
778 // If the last instruction is pushing a uniform, we can simplify it by copying the uniform
779 // directly into the destination slot.
780 if (lastOp == BuilderOp::push_uniform) {
781 // Get the last slot.
782 Slot sourceSlot = lastInstruction->fSlotA + lastInstruction->fImmA - 1;
783 lastInstruction->fImmA--;
784 if (lastInstruction->fImmA == 0) {
785 fInstructions.pop_back();
786 }
787
788 // Consume one destination slot.
789 dst->count--;
790 Slot destinationSlot = dst->index + dst->count;
791
792 // Continue simplifying if possible.
793 this->simplifyPopSlotsUnmasked(dst);
794
795 // Write the constant directly to the destination slot.
796 this->copy_uniform_to_slots_unmasked({destinationSlot, 1}, {sourceSlot, 1});
797 return;
798 }
799
800 // If the last instruction is pushing a slot or immutable, we can just copy that slot.
801 if (lastOp == BuilderOp::push_slots || lastOp == BuilderOp::push_immutable) {
802 // Get the last slot.
803 Slot sourceSlot = lastInstruction->fSlotA + lastInstruction->fImmA - 1;
804 lastInstruction->fImmA--;
805 if (lastInstruction->fImmA == 0) {
806 fInstructions.pop_back();
807 }
808
809 // Consume one destination slot.
810 dst->count--;
811 Slot destinationSlot = dst->index + dst->count;
812
813 // Try once more.
814 this->simplifyPopSlotsUnmasked(dst);
815
816 // Copy the slot directly.
817 if (lastOp == BuilderOp::push_slots) {
818 if (destinationSlot != sourceSlot) {
819 this->copy_slots_unmasked({destinationSlot, 1}, {sourceSlot, 1});
820 } else {
821 // Copying from a value-slot into the same value-slot is a no-op.
822 }
823 } else {
824 // Copy from immutable data directly to the destination slot.
825 this->copy_immutable_unmasked({destinationSlot, 1}, {sourceSlot, 1});
826 }
827 return;
828 }
829 }
830
pop_slots_unmasked(SlotRange dst)831 void Builder::pop_slots_unmasked(SlotRange dst) {
832 SkASSERT(dst.count >= 0);
833 this->copy_stack_to_slots_unmasked(dst);
834 this->discard_stack(dst.count);
835 }
836
exchange_src()837 void Builder::exchange_src() {
838 if (Instruction* lastInstruction = this->lastInstruction()) {
839 // If the previous op is also an exchange-src...
840 if (lastInstruction->fOp == BuilderOp::exchange_src) {
841 // ... both ops can be eliminated. A double-swap is a no-op.
842 fInstructions.pop_back();
843 return;
844 }
845 }
846
847 this->appendInstruction(BuilderOp::exchange_src, {});
848 }
849
pop_src_rgba()850 void Builder::pop_src_rgba() {
851 if (Instruction* lastInstruction = this->lastInstruction()) {
852 // If the previous op is exchanging src.rgba with the stack...
853 if (lastInstruction->fOp == BuilderOp::exchange_src) {
854 // ... both ops can be eliminated. It's just sliding the color back and forth.
855 fInstructions.pop_back();
856 this->discard_stack(4);
857 return;
858 }
859 }
860
861 this->appendInstruction(BuilderOp::pop_src_rgba, {});
862 }
863
copy_stack_to_slots(SlotRange dst,int offsetFromStackTop)864 void Builder::copy_stack_to_slots(SlotRange dst, int offsetFromStackTop) {
865 // If the execution mask is known to be all-true, then we can ignore the write mask.
866 if (!this->executionMaskWritesAreEnabled()) {
867 this->copy_stack_to_slots_unmasked(dst, offsetFromStackTop);
868 return;
869 }
870
871 // If the last instruction copied the previous stack slots, just extend it.
872 if (Instruction* lastInstruction = this->lastInstruction()) {
873 // If the last op is copy-stack-to-slots...
874 if (lastInstruction->fOp == BuilderOp::copy_stack_to_slots &&
875 // and this op's destination is immediately after the last copy-slots-op's destination
876 lastInstruction->fSlotA + lastInstruction->fImmA == dst.index &&
877 // and this op's source is immediately after the last copy-slots-op's source
878 lastInstruction->fImmB - lastInstruction->fImmA == offsetFromStackTop) {
879 // then we can just extend the copy!
880 lastInstruction->fImmA += dst.count;
881 return;
882 }
883 }
884
885 this->appendInstruction(BuilderOp::copy_stack_to_slots, {dst.index},
886 dst.count, offsetFromStackTop);
887 }
888
copy_stack_to_slots_indirect(SlotRange fixedRange,int dynamicStackID,SlotRange limitRange)889 void Builder::copy_stack_to_slots_indirect(SlotRange fixedRange,
890 int dynamicStackID,
891 SlotRange limitRange) {
892 // SlotA: fixed-range start
893 // SlotB: limit-range end
894 // immA: number of slots
895 // immB: dynamic stack ID
896 this->appendInstruction(BuilderOp::copy_stack_to_slots_indirect,
897 {fixedRange.index, limitRange.index + limitRange.count},
898 fixedRange.count,
899 dynamicStackID);
900 }
901
slot_ranges_overlap(SlotRange x,SlotRange y)902 static bool slot_ranges_overlap(SlotRange x, SlotRange y) {
903 return x.index < y.index + y.count &&
904 y.index < x.index + x.count;
905 }
906
copy_constant(Slot slot,int constantValue)907 void Builder::copy_constant(Slot slot, int constantValue) {
908 // If the last instruction copied the same constant, just extend it.
909 if (Instruction* lastInstr = this->lastInstruction()) {
910 // If the last op is copy-constant...
911 if (lastInstr->fOp == BuilderOp::copy_constant &&
912 // ... and has the same value...
913 lastInstr->fImmB == constantValue &&
914 // ... and the slot is immediately after the last copy-constant's destination...
915 lastInstr->fSlotA + lastInstr->fImmA == slot) {
916 // ... then we can extend the copy!
917 lastInstr->fImmA += 1;
918 return;
919 }
920 }
921
922 this->appendInstruction(BuilderOp::copy_constant, {slot}, 1, constantValue);
923 }
924
copy_slots_unmasked(SlotRange dst,SlotRange src)925 void Builder::copy_slots_unmasked(SlotRange dst, SlotRange src) {
926 // If the last instruction copied adjacent slots, just extend it.
927 if (Instruction* lastInstr = this->lastInstruction()) {
928 // If the last op is a match...
929 if (lastInstr->fOp == BuilderOp::copy_slot_unmasked &&
930 // and this op's destination is immediately after the last copy-slots-op's destination
931 lastInstr->fSlotA + lastInstr->fImmA == dst.index &&
932 // and this op's source is immediately after the last copy-slots-op's source
933 lastInstr->fSlotB + lastInstr->fImmA == src.index &&
934 // and the source/dest ranges will not overlap
935 !slot_ranges_overlap({lastInstr->fSlotB, lastInstr->fImmA + dst.count},
936 {lastInstr->fSlotA, lastInstr->fImmA + dst.count})) {
937 // then we can just extend the copy!
938 lastInstr->fImmA += dst.count;
939 return;
940 }
941 }
942
943 SkASSERT(dst.count == src.count);
944 this->appendInstruction(BuilderOp::copy_slot_unmasked, {dst.index, src.index}, dst.count);
945 }
946
copy_immutable_unmasked(SlotRange dst,SlotRange src)947 void Builder::copy_immutable_unmasked(SlotRange dst, SlotRange src) {
948 // If the last instruction copied adjacent immutable data, just extend it.
949 if (Instruction* lastInstr = this->lastInstruction()) {
950 // If the last op is a match...
951 if (lastInstr->fOp == BuilderOp::copy_immutable_unmasked &&
952 // and this op's destination is immediately after the last copy-slots-op's destination
953 lastInstr->fSlotA + lastInstr->fImmA == dst.index &&
954 // and this op's source is immediately after the last copy-slots-op's source
955 lastInstr->fSlotB + lastInstr->fImmA == src.index) {
956 // then we can just extend the copy!
957 lastInstr->fImmA += dst.count;
958 return;
959 }
960 }
961
962 SkASSERT(dst.count == src.count);
963 this->appendInstruction(BuilderOp::copy_immutable_unmasked, {dst.index, src.index}, dst.count);
964 }
965
copy_uniform_to_slots_unmasked(SlotRange dst,SlotRange src)966 void Builder::copy_uniform_to_slots_unmasked(SlotRange dst, SlotRange src) {
967 // If the last instruction copied adjacent uniforms, just extend it.
968 if (Instruction* lastInstr = this->lastInstruction()) {
969 // If the last op is copy-constant...
970 if (lastInstr->fOp == BuilderOp::copy_uniform_to_slots_unmasked &&
971 // and this op's destination is immediately after the last copy-constant's destination
972 lastInstr->fSlotB + lastInstr->fImmA == dst.index &&
973 // and this op's source is immediately after the last copy-constant's source
974 lastInstr->fSlotA + lastInstr->fImmA == src.index) {
975 // then we can just extend the copy!
976 lastInstr->fImmA += dst.count;
977 return;
978 }
979 }
980
981 SkASSERT(dst.count == src.count);
982 this->appendInstruction(BuilderOp::copy_uniform_to_slots_unmasked, {src.index, dst.index},
983 dst.count);
984 }
985
copy_stack_to_slots_unmasked(SlotRange dst,int offsetFromStackTop)986 void Builder::copy_stack_to_slots_unmasked(SlotRange dst, int offsetFromStackTop) {
987 // If the last instruction copied the previous stack slots, just extend it.
988 if (Instruction* lastInstr = this->lastInstruction()) {
989 // If the last op is copy-stack-to-slots-unmasked...
990 if (lastInstr->fOp == BuilderOp::copy_stack_to_slots_unmasked &&
991 // and this op's destination is immediately after the last copy-slots-op's destination
992 lastInstr->fSlotA + lastInstr->fImmA == dst.index &&
993 // and this op's source is immediately after the last copy-slots-op's source
994 lastInstr->fImmB - lastInstr->fImmA == offsetFromStackTop) {
995 // then we can just extend the copy!
996 lastInstr->fImmA += dst.count;
997 return;
998 }
999 }
1000
1001 this->appendInstruction(BuilderOp::copy_stack_to_slots_unmasked, {dst.index},
1002 dst.count, offsetFromStackTop);
1003 }
1004
pop_return_mask()1005 void Builder::pop_return_mask() {
1006 SkASSERT(this->executionMaskWritesAreEnabled());
1007
1008 // This instruction is going to overwrite the return mask. If the previous instruction was
1009 // masking off the return mask, that's wasted work and it can be eliminated.
1010 if (Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {
1011 if (lastInstruction->fOp == BuilderOp::mask_off_return_mask) {
1012 fInstructions.pop_back();
1013 }
1014 }
1015
1016 this->appendInstruction(BuilderOp::pop_return_mask, {});
1017 }
1018
push_condition_mask()1019 void Builder::push_condition_mask() {
1020 SkASSERT(this->executionMaskWritesAreEnabled());
1021
1022 // If the previous instruction is popping the condition mask, we can restore it onto the stack
1023 // "for free" instead of copying it.
1024 if (Instruction* lastInstruction = this->lastInstruction()) {
1025 if (lastInstruction->fOp == BuilderOp::pop_condition_mask) {
1026 this->pad_stack(1);
1027 return;
1028 }
1029 }
1030 this->appendInstruction(BuilderOp::push_condition_mask, {});
1031 }
1032
merge_condition_mask()1033 void Builder::merge_condition_mask() {
1034 SkASSERT(this->executionMaskWritesAreEnabled());
1035
1036 // This instruction is going to overwrite the condition mask. If the previous instruction was
1037 // loading the condition mask, that's wasted work and it can be eliminated.
1038 if (Instruction* lastInstruction = this->lastInstructionOnAnyStack()) {
1039 if (lastInstruction->fOp == BuilderOp::pop_condition_mask) {
1040 int stackID = lastInstruction->fStackID;
1041 fInstructions.pop_back();
1042 this->discard_stack(/*count=*/1, stackID);
1043 }
1044 }
1045
1046 this->appendInstruction(BuilderOp::merge_condition_mask, {});
1047 }
1048
zero_slots_unmasked(SlotRange dst)1049 void Builder::zero_slots_unmasked(SlotRange dst) {
1050 if (Instruction* lastInstruction = this->lastInstruction()) {
1051 if (lastInstruction->fOp == BuilderOp::copy_constant && lastInstruction->fImmB == 0) {
1052 if (lastInstruction->fSlotA + lastInstruction->fImmA == dst.index) {
1053 // The previous instruction was zeroing the range immediately before this range.
1054 // Combine the ranges.
1055 lastInstruction->fImmA += dst.count;
1056 return;
1057 }
1058
1059 if (lastInstruction->fSlotA == dst.index + dst.count) {
1060 // The previous instruction was zeroing the range immediately after this range.
1061 // Combine the ranges.
1062 lastInstruction->fSlotA = dst.index;
1063 lastInstruction->fImmA += dst.count;
1064 return;
1065 }
1066 }
1067 }
1068
1069 this->appendInstruction(BuilderOp::copy_constant, {dst.index}, dst.count, 0);
1070 }
1071
pack_nybbles(SkSpan<const int8_t> components)1072 static int pack_nybbles(SkSpan<const int8_t> components) {
1073 // Pack up to 8 elements into nybbles, in reverse order.
1074 int packed = 0;
1075 for (auto iter = components.rbegin(); iter != components.rend(); ++iter) {
1076 SkASSERT(*iter >= 0 && *iter <= 0xF);
1077 packed <<= 4;
1078 packed |= *iter;
1079 }
1080 return packed;
1081 }
1082
1083 template <typename T>
unpack_nybbles_to_offsets(uint32_t components,SkSpan<T> offsets)1084 static void unpack_nybbles_to_offsets(uint32_t components, SkSpan<T> offsets) {
1085 // Unpack component nybbles into byte-offsets pointing at stack slots.
1086 for (size_t index = 0; index < offsets.size(); ++index) {
1087 offsets[index] = (components & 0xF) * SkOpts::raster_pipeline_highp_stride * sizeof(float);
1088 components >>= 4;
1089 }
1090 }
1091
max_packed_nybble(uint32_t components,size_t numComponents)1092 static int max_packed_nybble(uint32_t components, size_t numComponents) {
1093 int largest = 0;
1094 for (size_t index = 0; index < numComponents; ++index) {
1095 largest = std::max<int>(largest, components & 0xF);
1096 components >>= 4;
1097 }
1098 return largest;
1099 }
1100
swizzle_copy_stack_to_slots(SlotRange dst,SkSpan<const int8_t> components,int offsetFromStackTop)1101 void Builder::swizzle_copy_stack_to_slots(SlotRange dst,
1102 SkSpan<const int8_t> components,
1103 int offsetFromStackTop) {
1104 // When the execution-mask writes-enabled flag is off, we could squeeze out a little bit of
1105 // extra speed here by implementing and using an unmasked version of this op.
1106
1107 // SlotA: fixed-range start
1108 // immA: number of swizzle components
1109 // immB: swizzle components
1110 // immC: offset from stack top
1111 this->appendInstruction(BuilderOp::swizzle_copy_stack_to_slots, {dst.index},
1112 (int)components.size(),
1113 pack_nybbles(components),
1114 offsetFromStackTop);
1115 }
1116
swizzle_copy_stack_to_slots_indirect(SlotRange fixedRange,int dynamicStackID,SlotRange limitRange,SkSpan<const int8_t> components,int offsetFromStackTop)1117 void Builder::swizzle_copy_stack_to_slots_indirect(SlotRange fixedRange,
1118 int dynamicStackID,
1119 SlotRange limitRange,
1120 SkSpan<const int8_t> components,
1121 int offsetFromStackTop) {
1122 // When the execution-mask writes-enabled flag is off, we could squeeze out a little bit of
1123 // extra speed here by implementing and using an unmasked version of this op.
1124
1125 // SlotA: fixed-range start
1126 // SlotB: limit-range end
1127 // immA: number of swizzle components
1128 // immB: swizzle components
1129 // immC: offset from stack top
1130 // immD: dynamic stack ID
1131 this->appendInstruction(BuilderOp::swizzle_copy_stack_to_slots_indirect,
1132 {fixedRange.index, limitRange.index + limitRange.count},
1133 (int)components.size(),
1134 pack_nybbles(components),
1135 offsetFromStackTop,
1136 dynamicStackID);
1137 }
1138
swizzle(int consumedSlots,SkSpan<const int8_t> components)1139 void Builder::swizzle(int consumedSlots, SkSpan<const int8_t> components) {
1140 // Consumes `consumedSlots` elements on the stack, then generates `elementSpan.size()` elements.
1141 SkASSERT(consumedSlots >= 0);
1142
1143 // We only allow up to 16 elements, and they can only reach 0-15 slots, due to nybble packing.
1144 int numElements = components.size();
1145 SkASSERT(numElements <= 16);
1146 SkASSERT(std::all_of(components.begin(), components.end(), [](int8_t e){ return e >= 0; }));
1147 SkASSERT(std::all_of(components.begin(), components.end(), [](int8_t e){ return e <= 0xF; }));
1148
1149 // Make a local copy of the element array.
1150 int8_t elements[16] = {};
1151 std::copy(components.begin(), components.end(), std::begin(elements));
1152
1153 while (numElements > 0) {
1154 // If the first element of the swizzle is zero...
1155 if (elements[0] != 0) {
1156 break;
1157 }
1158 // ...and zero isn't used elsewhere in the swizzle...
1159 if (std::any_of(&elements[1], &elements[numElements], [](int8_t e) { return e == 0; })) {
1160 break;
1161 }
1162 // We can omit the first slot from the swizzle entirely.
1163 // Slide everything forward by one slot, and reduce the element index by one.
1164 for (int index = 1; index < numElements; ++index) {
1165 elements[index - 1] = elements[index] - 1;
1166 }
1167 elements[numElements - 1] = 0;
1168 --consumedSlots;
1169 --numElements;
1170 }
1171
1172 // A completely empty swizzle is a discard.
1173 if (numElements == 0) {
1174 this->discard_stack(consumedSlots);
1175 return;
1176 }
1177
1178 if (consumedSlots <= 4 && numElements <= 4) {
1179 // We can fit everything into a little swizzle.
1180 int op = (int)BuilderOp::swizzle_1 + numElements - 1;
1181 this->appendInstruction((BuilderOp)op, {}, consumedSlots,
1182 pack_nybbles(SkSpan(elements, numElements)));
1183 return;
1184 }
1185
1186 // This is a big swizzle. We use the `shuffle` op to handle these. immA counts the consumed
1187 // slots. immB counts the generated slots. immC and immD hold packed-nybble shuffle values.
1188 this->appendInstruction(BuilderOp::shuffle, {},
1189 consumedSlots, numElements,
1190 pack_nybbles(SkSpan(&elements[0], 8)),
1191 pack_nybbles(SkSpan(&elements[8], 8)));
1192 }
1193
transpose(int columns,int rows)1194 void Builder::transpose(int columns, int rows) {
1195 // Transposes a matrix of size CxR on the stack (into a matrix of size RxC).
1196 int8_t elements[16] = {};
1197 size_t index = 0;
1198 for (int r = 0; r < rows; ++r) {
1199 for (int c = 0; c < columns; ++c) {
1200 elements[index++] = (c * rows) + r;
1201 }
1202 }
1203 this->swizzle(/*consumedSlots=*/columns * rows, SkSpan(elements, index));
1204 }
1205
diagonal_matrix(int columns,int rows)1206 void Builder::diagonal_matrix(int columns, int rows) {
1207 // Generates a CxR diagonal matrix from the top two scalars on the stack.
1208 int8_t elements[16] = {};
1209 size_t index = 0;
1210 for (int c = 0; c < columns; ++c) {
1211 for (int r = 0; r < rows; ++r) {
1212 elements[index++] = (c == r) ? 1 : 0;
1213 }
1214 }
1215 this->swizzle(/*consumedSlots=*/2, SkSpan(elements, index));
1216 }
1217
matrix_resize(int origColumns,int origRows,int newColumns,int newRows)1218 void Builder::matrix_resize(int origColumns, int origRows, int newColumns, int newRows) {
1219 // Resizes a CxR matrix at the top of the stack to C'xR'.
1220 int8_t elements[16] = {};
1221 size_t index = 0;
1222
1223 size_t consumedSlots = origColumns * origRows;
1224 size_t zeroOffset = 0, oneOffset = 0;
1225
1226 for (int c = 0; c < newColumns; ++c) {
1227 for (int r = 0; r < newRows; ++r) {
1228 if (c < origColumns && r < origRows) {
1229 // Push an element from the original matrix.
1230 elements[index++] = (c * origRows) + r;
1231 } else {
1232 // This element is outside the original matrix; push 1 or 0.
1233 if (c == r) {
1234 // We need to synthesize a literal 1.
1235 if (oneOffset == 0) {
1236 this->push_constant_f(1.0f);
1237 oneOffset = consumedSlots++;
1238 }
1239 elements[index++] = oneOffset;
1240 } else {
1241 // We need to synthesize a literal 0.
1242 if (zeroOffset == 0) {
1243 this->push_constant_f(0.0f);
1244 zeroOffset = consumedSlots++;
1245 }
1246 elements[index++] = zeroOffset;
1247 }
1248 }
1249 }
1250 }
1251 this->swizzle(consumedSlots, SkSpan(elements, index));
1252 }
1253
matrix_multiply(int leftColumns,int leftRows,int rightColumns,int rightRows)1254 void Builder::matrix_multiply(int leftColumns, int leftRows, int rightColumns, int rightRows) {
1255 BuilderOp op;
1256 switch (leftColumns) {
1257 case 2: op = BuilderOp::matrix_multiply_2; break;
1258 case 3: op = BuilderOp::matrix_multiply_3; break;
1259 case 4: op = BuilderOp::matrix_multiply_4; break;
1260 default: SkDEBUGFAIL("unsupported matrix dimensions"); return;
1261 }
1262
1263 this->appendInstruction(op, {}, leftColumns, leftRows, rightColumns, rightRows);
1264 }
1265
finish(int numValueSlots,int numUniformSlots,int numImmutableSlots,DebugTracePriv * debugTrace)1266 std::unique_ptr<Program> Builder::finish(int numValueSlots,
1267 int numUniformSlots,
1268 int numImmutableSlots,
1269 DebugTracePriv* debugTrace) {
1270 // Verify that calls to enableExecutionMaskWrites and disableExecutionMaskWrites are balanced.
1271 SkASSERT(fExecutionMaskWritesEnabled == 0);
1272
1273 return std::make_unique<Program>(std::move(fInstructions), numValueSlots, numUniformSlots,
1274 numImmutableSlots, fNumLabels, debugTrace);
1275 }
1276
stack_usage(const Instruction & inst)1277 static int stack_usage(const Instruction& inst) {
1278 switch (inst.fOp) {
1279 case BuilderOp::push_condition_mask:
1280 case BuilderOp::push_loop_mask:
1281 case BuilderOp::push_return_mask:
1282 return 1;
1283
1284 case BuilderOp::push_src_rgba:
1285 case BuilderOp::push_dst_rgba:
1286 case BuilderOp::push_device_xy01:
1287 return 4;
1288
1289 case BuilderOp::push_immutable:
1290 case BuilderOp::push_immutable_indirect:
1291 case BuilderOp::push_constant:
1292 case BuilderOp::push_slots:
1293 case BuilderOp::push_slots_indirect:
1294 case BuilderOp::push_uniform:
1295 case BuilderOp::push_uniform_indirect:
1296 case BuilderOp::push_clone:
1297 case BuilderOp::push_clone_from_stack:
1298 case BuilderOp::push_clone_indirect_from_stack:
1299 case BuilderOp::pad_stack:
1300 return inst.fImmA;
1301
1302 case BuilderOp::pop_condition_mask:
1303 case BuilderOp::pop_loop_mask:
1304 case BuilderOp::pop_and_reenable_loop_mask:
1305 case BuilderOp::pop_return_mask:
1306 return -1;
1307
1308 case BuilderOp::pop_src_rgba:
1309 case BuilderOp::pop_dst_rgba:
1310 return -4;
1311
1312 case ALL_N_WAY_BINARY_OP_CASES:
1313 case ALL_MULTI_SLOT_BINARY_OP_CASES:
1314 case BuilderOp::discard_stack:
1315 case BuilderOp::select:
1316 return -inst.fImmA;
1317
1318 case ALL_N_WAY_TERNARY_OP_CASES:
1319 case ALL_MULTI_SLOT_TERNARY_OP_CASES:
1320 return 2 * -inst.fImmA;
1321
1322 case BuilderOp::swizzle_1:
1323 return 1 - inst.fImmA; // consumes immA slots and emits a scalar
1324 case BuilderOp::swizzle_2:
1325 return 2 - inst.fImmA; // consumes immA slots and emits a 2-slot vector
1326 case BuilderOp::swizzle_3:
1327 return 3 - inst.fImmA; // consumes immA slots and emits a 3-slot vector
1328 case BuilderOp::swizzle_4:
1329 return 4 - inst.fImmA; // consumes immA slots and emits a 4-slot vector
1330
1331 case BuilderOp::dot_2_floats:
1332 return -3; // consumes two 2-slot vectors and emits one scalar
1333 case BuilderOp::dot_3_floats:
1334 return -5; // consumes two 3-slot vectors and emits one scalar
1335 case BuilderOp::dot_4_floats:
1336 return -7; // consumes two 4-slot vectors and emits one scalar
1337
1338 case BuilderOp::refract_4_floats:
1339 return -5; // consumes nine slots (N + I + eta) and emits a 4-slot vector (R)
1340
1341 case BuilderOp::matrix_multiply_2:
1342 case BuilderOp::matrix_multiply_3:
1343 case BuilderOp::matrix_multiply_4:
1344 // consumes the left- and right-matrices; emits result over existing padding slots
1345 return -(inst.fImmA * inst.fImmB + inst.fImmC * inst.fImmD);
1346
1347 case BuilderOp::shuffle: {
1348 int consumed = inst.fImmA;
1349 int generated = inst.fImmB;
1350 return generated - consumed;
1351 }
1352 case ALL_SINGLE_SLOT_UNARY_OP_CASES:
1353 case ALL_MULTI_SLOT_UNARY_OP_CASES:
1354 case ALL_IMMEDIATE_BINARY_OP_CASES:
1355 default:
1356 return 0;
1357 }
1358 }
1359
tempStackMaxDepths() const1360 Program::StackDepths Program::tempStackMaxDepths() const {
1361 // Count the number of separate temp stacks that the program uses.
1362 int numStacks = 1;
1363 for (const Instruction& inst : fInstructions) {
1364 numStacks = std::max(numStacks, inst.fStackID + 1);
1365 }
1366
1367 // Walk the program and calculate how deep each stack can potentially get.
1368 StackDepths largest, current;
1369 largest.push_back_n(numStacks, 0);
1370 current.push_back_n(numStacks, 0);
1371
1372 for (const Instruction& inst : fInstructions) {
1373 int stackID = inst.fStackID;
1374 current[stackID] += stack_usage(inst);
1375 largest[stackID] = std::max(current[stackID], largest[stackID]);
1376 // If we assert here, the generated program has popped off the top of the stack.
1377 SkASSERTF(current[stackID] >= 0, "unbalanced temp stack push/pop on stack %d", stackID);
1378 }
1379
1380 // Ensure that when the program is complete, our stacks are fully balanced.
1381 for (int stackID = 0; stackID < numStacks; ++stackID) {
1382 // If we assert here, the generated program has pushed more data than it has popped.
1383 SkASSERTF(current[stackID] == 0, "unbalanced temp stack push/pop on stack %d", stackID);
1384 }
1385
1386 return largest;
1387 }
1388
Program(TArray<Instruction> instrs,int numValueSlots,int numUniformSlots,int numImmutableSlots,int numLabels,DebugTracePriv * debugTrace)1389 Program::Program(TArray<Instruction> instrs,
1390 int numValueSlots,
1391 int numUniformSlots,
1392 int numImmutableSlots,
1393 int numLabels,
1394 DebugTracePriv* debugTrace)
1395 : fInstructions(std::move(instrs))
1396 , fNumValueSlots(numValueSlots)
1397 , fNumUniformSlots(numUniformSlots)
1398 , fNumImmutableSlots(numImmutableSlots)
1399 , fNumLabels(numLabels)
1400 , fDebugTrace(debugTrace) {
1401 fTempStackMaxDepths = this->tempStackMaxDepths();
1402
1403 fNumTempStackSlots = 0;
1404 for (const int depth : fTempStackMaxDepths) {
1405 fNumTempStackSlots += depth;
1406 }
1407
1408 if (fDebugTrace) {
1409 fTraceHook = SkSL::Tracer::Make(&fDebugTrace->fTraceInfo);
1410 }
1411 }
1412
1413 Program::~Program() = default;
1414
immutable_data_is_splattable(int32_t * immutablePtr,int numSlots)1415 static bool immutable_data_is_splattable(int32_t* immutablePtr, int numSlots) {
1416 // If every value between `immutablePtr[0]` and `immutablePtr[numSlots]` is bit-identical, we
1417 // can use a splat.
1418 for (int index = 1; index < numSlots; ++index) {
1419 if (immutablePtr[0] != immutablePtr[index]) {
1420 return false;
1421 }
1422 }
1423 return true;
1424 }
1425
appendCopy(TArray<Stage> * pipeline,SkArenaAlloc * alloc,std::byte * basePtr,ProgramOp baseStage,SkRPOffset dst,int dstStride,SkRPOffset src,int srcStride,int numSlots) const1426 void Program::appendCopy(TArray<Stage>* pipeline,
1427 SkArenaAlloc* alloc,
1428 std::byte* basePtr, // only used for immutable-value copies
1429 ProgramOp baseStage,
1430 SkRPOffset dst, int dstStride,
1431 SkRPOffset src, int srcStride,
1432 int numSlots) const {
1433 SkASSERT(numSlots >= 0);
1434 while (numSlots > 4) {
1435 // If we are appending a large copy, split it up into groups of four at a time.
1436 this->appendCopy(pipeline, alloc, basePtr,
1437 baseStage,
1438 dst, dstStride,
1439 src, srcStride,
1440 /*numSlots=*/4);
1441 dst += 4 * dstStride * sizeof(float);
1442 src += 4 * srcStride * sizeof(float);
1443 numSlots -= 4;
1444 }
1445
1446 SkASSERT(numSlots <= 4);
1447
1448 if (numSlots > 0) {
1449 // If we are copying immutable data, it might be representable by a splat; this is
1450 // preferable, since splats are a tiny bit faster than regular copies.
1451 if (basePtr) {
1452 SkASSERT(srcStride == 1);
1453 int32_t* immutablePtr = reinterpret_cast<int32_t*>(basePtr + src);
1454 if (immutable_data_is_splattable(immutablePtr, numSlots)) {
1455 auto stage = (ProgramOp)((int)ProgramOp::copy_constant + numSlots - 1);
1456 SkRasterPipeline_ConstantCtx ctx;
1457 ctx.dst = dst;
1458 ctx.value = *immutablePtr;
1459 pipeline->push_back({stage, SkRPCtxUtils::Pack(ctx, alloc)});
1460 return;
1461 }
1462 }
1463
1464 // We can't use a splat, so emit the requested copy op.
1465 auto stage = (ProgramOp)((int)baseStage + numSlots - 1);
1466 SkRasterPipeline_BinaryOpCtx ctx;
1467 ctx.dst = dst;
1468 ctx.src = src;
1469 pipeline->push_back({stage, SkRPCtxUtils::Pack(ctx, alloc)});
1470 }
1471 }
1472
appendCopySlotsUnmasked(TArray<Stage> * pipeline,SkArenaAlloc * alloc,SkRPOffset dst,SkRPOffset src,int numSlots) const1473 void Program::appendCopySlotsUnmasked(TArray<Stage>* pipeline,
1474 SkArenaAlloc* alloc,
1475 SkRPOffset dst,
1476 SkRPOffset src,
1477 int numSlots) const {
1478 this->appendCopy(pipeline, alloc, /*basePtr=*/nullptr,
1479 ProgramOp::copy_slot_unmasked,
1480 dst, SkOpts::raster_pipeline_highp_stride,
1481 src, SkOpts::raster_pipeline_highp_stride,
1482 numSlots);
1483 }
1484
appendCopyImmutableUnmasked(TArray<Stage> * pipeline,SkArenaAlloc * alloc,std::byte * basePtr,SkRPOffset dst,SkRPOffset src,int numSlots) const1485 void Program::appendCopyImmutableUnmasked(TArray<Stage>* pipeline,
1486 SkArenaAlloc* alloc,
1487 std::byte* basePtr,
1488 SkRPOffset dst,
1489 SkRPOffset src,
1490 int numSlots) const {
1491 this->appendCopy(pipeline, alloc, basePtr,
1492 ProgramOp::copy_immutable_unmasked,
1493 dst, SkOpts::raster_pipeline_highp_stride,
1494 src, 1,
1495 numSlots);
1496 }
1497
appendCopySlotsMasked(TArray<Stage> * pipeline,SkArenaAlloc * alloc,SkRPOffset dst,SkRPOffset src,int numSlots) const1498 void Program::appendCopySlotsMasked(TArray<Stage>* pipeline,
1499 SkArenaAlloc* alloc,
1500 SkRPOffset dst,
1501 SkRPOffset src,
1502 int numSlots) const {
1503 this->appendCopy(pipeline, alloc, /*basePtr=*/nullptr,
1504 ProgramOp::copy_slot_masked,
1505 dst, SkOpts::raster_pipeline_highp_stride,
1506 src, SkOpts::raster_pipeline_highp_stride,
1507 numSlots);
1508 }
1509
appendSingleSlotUnaryOp(TArray<Stage> * pipeline,ProgramOp stage,float * dst,int numSlots) const1510 void Program::appendSingleSlotUnaryOp(TArray<Stage>* pipeline, ProgramOp stage,
1511 float* dst, int numSlots) const {
1512 SkASSERT(numSlots >= 0);
1513 while (numSlots--) {
1514 pipeline->push_back({stage, dst});
1515 dst += SkOpts::raster_pipeline_highp_stride;
1516 }
1517 }
1518
appendMultiSlotUnaryOp(TArray<Stage> * pipeline,ProgramOp baseStage,float * dst,int numSlots) const1519 void Program::appendMultiSlotUnaryOp(TArray<Stage>* pipeline, ProgramOp baseStage,
1520 float* dst, int numSlots) const {
1521 SkASSERT(numSlots >= 0);
1522 while (numSlots > 0) {
1523 int currentSlots = std::min(numSlots, 4);
1524 auto stage = (ProgramOp)((int)baseStage + currentSlots - 1);
1525 pipeline->push_back({stage, dst});
1526
1527 dst += 4 * SkOpts::raster_pipeline_highp_stride;
1528 numSlots -= 4;
1529 }
1530 }
1531
appendImmediateBinaryOp(TArray<Stage> * pipeline,SkArenaAlloc * alloc,ProgramOp baseStage,SkRPOffset dst,int32_t value,int numSlots) const1532 void Program::appendImmediateBinaryOp(TArray<Stage>* pipeline, SkArenaAlloc* alloc,
1533 ProgramOp baseStage,
1534 SkRPOffset dst, int32_t value, int numSlots) const {
1535 SkASSERT(is_immediate_op((BuilderOp)baseStage));
1536 int slotsPerStage = is_multi_slot_immediate_op((BuilderOp)baseStage) ? 4 : 1;
1537
1538 SkRasterPipeline_ConstantCtx ctx;
1539 ctx.dst = dst;
1540 ctx.value = value;
1541
1542 SkASSERT(numSlots >= 0);
1543 while (numSlots > 0) {
1544 int currentSlots = std::min(numSlots, slotsPerStage);
1545 auto stage = (ProgramOp)((int)baseStage - (currentSlots - 1));
1546 pipeline->push_back({stage, SkRPCtxUtils::Pack(ctx, alloc)});
1547
1548 ctx.dst += slotsPerStage * SkOpts::raster_pipeline_highp_stride * sizeof(float);
1549 numSlots -= slotsPerStage;
1550 }
1551 }
1552
appendAdjacentNWayBinaryOp(TArray<Stage> * pipeline,SkArenaAlloc * alloc,ProgramOp stage,SkRPOffset dst,SkRPOffset src,int numSlots) const1553 void Program::appendAdjacentNWayBinaryOp(TArray<Stage>* pipeline, SkArenaAlloc* alloc,
1554 ProgramOp stage,
1555 SkRPOffset dst, SkRPOffset src, int numSlots) const {
1556 // The source and destination must be directly next to one another.
1557 SkASSERT(numSlots >= 0);
1558 SkASSERT((dst + SkOpts::raster_pipeline_highp_stride * numSlots * sizeof(float)) == src);
1559
1560 if (numSlots > 0) {
1561 SkRasterPipeline_BinaryOpCtx ctx;
1562 ctx.dst = dst;
1563 ctx.src = src;
1564 pipeline->push_back({stage, SkRPCtxUtils::Pack(ctx, alloc)});
1565 }
1566 }
1567
appendAdjacentMultiSlotBinaryOp(TArray<Stage> * pipeline,SkArenaAlloc * alloc,ProgramOp baseStage,std::byte * basePtr,SkRPOffset dst,SkRPOffset src,int numSlots) const1568 void Program::appendAdjacentMultiSlotBinaryOp(TArray<Stage>* pipeline, SkArenaAlloc* alloc,
1569 ProgramOp baseStage, std::byte* basePtr,
1570 SkRPOffset dst, SkRPOffset src, int numSlots) const {
1571 // The source and destination must be directly next to one another.
1572 SkASSERT(numSlots >= 0);
1573 SkASSERT((dst + SkOpts::raster_pipeline_highp_stride * numSlots * sizeof(float)) == src);
1574
1575 if (numSlots > 4) {
1576 this->appendAdjacentNWayBinaryOp(pipeline, alloc, baseStage, dst, src, numSlots);
1577 return;
1578 }
1579 if (numSlots > 0) {
1580 auto specializedStage = (ProgramOp)((int)baseStage + numSlots);
1581 pipeline->push_back({specializedStage, basePtr + dst});
1582 }
1583 }
1584
appendAdjacentNWayTernaryOp(TArray<Stage> * pipeline,SkArenaAlloc * alloc,ProgramOp stage,std::byte * basePtr,SkRPOffset dst,SkRPOffset src0,SkRPOffset src1,int numSlots) const1585 void Program::appendAdjacentNWayTernaryOp(TArray<Stage>* pipeline, SkArenaAlloc* alloc,
1586 ProgramOp stage, std::byte* basePtr, SkRPOffset dst,
1587 SkRPOffset src0, SkRPOffset src1, int numSlots) const {
1588 // The float pointers must all be immediately adjacent to each other.
1589 SkASSERT(numSlots >= 0);
1590 SkASSERT((dst + SkOpts::raster_pipeline_highp_stride * numSlots * sizeof(float)) == src0);
1591 SkASSERT((src0 + SkOpts::raster_pipeline_highp_stride * numSlots * sizeof(float)) == src1);
1592
1593 if (numSlots > 0) {
1594 SkRasterPipeline_TernaryOpCtx ctx;
1595 ctx.dst = dst;
1596 ctx.delta = src0 - dst;
1597 pipeline->push_back({stage, SkRPCtxUtils::Pack(ctx, alloc)});
1598 }
1599 }
1600
appendAdjacentMultiSlotTernaryOp(TArray<Stage> * pipeline,SkArenaAlloc * alloc,ProgramOp baseStage,std::byte * basePtr,SkRPOffset dst,SkRPOffset src0,SkRPOffset src1,int numSlots) const1601 void Program::appendAdjacentMultiSlotTernaryOp(TArray<Stage>* pipeline, SkArenaAlloc* alloc,
1602 ProgramOp baseStage, std::byte* basePtr,
1603 SkRPOffset dst, SkRPOffset src0, SkRPOffset src1,
1604 int numSlots) const {
1605 // The float pointers must all be immediately adjacent to each other.
1606 SkASSERT(numSlots >= 0);
1607 SkASSERT((dst + SkOpts::raster_pipeline_highp_stride * numSlots * sizeof(float)) == src0);
1608 SkASSERT((src0 + SkOpts::raster_pipeline_highp_stride * numSlots * sizeof(float)) == src1);
1609
1610 if (numSlots > 4) {
1611 this->appendAdjacentNWayTernaryOp(pipeline, alloc, baseStage, basePtr,
1612 dst, src0, src1, numSlots);
1613 return;
1614 }
1615 if (numSlots > 0) {
1616 auto specializedStage = (ProgramOp)((int)baseStage + numSlots);
1617 pipeline->push_back({specializedStage, basePtr + dst});
1618 }
1619 }
1620
appendStackRewindForNonTailcallers(TArray<Stage> * pipeline) const1621 void Program::appendStackRewindForNonTailcallers(TArray<Stage>* pipeline) const {
1622 #if defined(SKSL_STANDALONE) || !SK_HAS_MUSTTAIL
1623 // When SK_HAS_MUSTTAIL is not enabled, stack rewinds are critical because because the stack may
1624 // grow after every single SkSL stage.
1625 this->appendStackRewind(pipeline);
1626 #endif
1627 }
1628
appendStackRewind(TArray<Stage> * pipeline) const1629 void Program::appendStackRewind(TArray<Stage>* pipeline) const {
1630 pipeline->push_back({ProgramOp::stack_rewind, nullptr});
1631 }
1632
invoke_shader(int childIdx)1633 void Builder::invoke_shader(int childIdx) {
1634 this->appendInstruction(BuilderOp::invoke_shader, {}, childIdx);
1635 }
1636
invoke_color_filter(int childIdx)1637 void Builder::invoke_color_filter(int childIdx) {
1638 this->appendInstruction(BuilderOp::invoke_color_filter, {}, childIdx);
1639 }
1640
invoke_blender(int childIdx)1641 void Builder::invoke_blender(int childIdx) {
1642 this->appendInstruction(BuilderOp::invoke_blender, {}, childIdx);
1643 }
1644
invoke_to_linear_srgb()1645 void Builder::invoke_to_linear_srgb() {
1646 // The intrinsics accept a three-component value; add a fourth padding element (which will be
1647 // ignored) since our RP ops deal in RGBA colors.
1648 this->pad_stack(1);
1649 this->appendInstruction(BuilderOp::invoke_to_linear_srgb, {});
1650 this->discard_stack(1);
1651 }
1652
invoke_from_linear_srgb()1653 void Builder::invoke_from_linear_srgb() {
1654 // The intrinsics accept a three-component value; add a fourth padding element (which will be
1655 // ignored) since our RP ops deal in RGBA colors.
1656 this->pad_stack(1);
1657 this->appendInstruction(BuilderOp::invoke_from_linear_srgb, {});
1658 this->discard_stack(1);
1659 }
1660
context_bit_pun(intptr_t val)1661 static void* context_bit_pun(intptr_t val) {
1662 return sk_bit_cast<void*>(val);
1663 }
1664
allocateSlotData(SkArenaAlloc * alloc) const1665 std::optional<Program::SlotData> Program::allocateSlotData(SkArenaAlloc* alloc) const {
1666 // Allocate a contiguous slab of slot data for immutables, values, and stack entries.
1667 const int N = SkOpts::raster_pipeline_highp_stride;
1668 const int scalarWidth = 1 * sizeof(float);
1669 const int vectorWidth = N * sizeof(float);
1670 SkSafeMath safe;
1671 size_t allocSize = safe.add(safe.mul(vectorWidth, safe.add(fNumValueSlots, fNumTempStackSlots)),
1672 safe.mul(scalarWidth, fNumImmutableSlots));
1673 if (!safe || !SkTFitsIn<int>(allocSize)) {
1674 return std::nullopt;
1675 }
1676 float* slotPtr = static_cast<float*>(alloc->makeBytesAlignedTo(allocSize, vectorWidth));
1677 sk_bzero(slotPtr, allocSize);
1678
1679 // Store the temp stack immediately after the values, and immutable data after the stack.
1680 SlotData s;
1681 s.values = SkSpan<float>{slotPtr, N * fNumValueSlots};
1682 s.stack = SkSpan<float>{s.values.end(), N * fNumTempStackSlots};
1683 s.immutable = SkSpan<float>{s.stack.end(), 1 * fNumImmutableSlots};
1684 return s;
1685 }
1686
appendStages(SkRasterPipeline * pipeline,SkArenaAlloc * alloc,RP::Callbacks * callbacks,SkSpan<const float> uniforms) const1687 bool Program::appendStages(SkRasterPipeline* pipeline,
1688 SkArenaAlloc* alloc,
1689 RP::Callbacks* callbacks,
1690 SkSpan<const float> uniforms) const {
1691 #if defined(SKSL_STANDALONE)
1692 return false;
1693 #else
1694 // Convert our Instruction list to an array of ProgramOps.
1695 TArray<Stage> stages;
1696 std::optional<SlotData> slotData = this->allocateSlotData(alloc);
1697 if (!slotData) {
1698 return false;
1699 }
1700 this->makeStages(&stages, alloc, uniforms, *slotData);
1701
1702 // Allocate buffers for branch targets and labels; these are needed to convert labels into
1703 // actual offsets into the pipeline and fix up branches.
1704 TArray<SkRasterPipeline_BranchCtx*> branchContexts;
1705 branchContexts.reserve_exact(fNumLabels);
1706 TArray<int> labelOffsets;
1707 labelOffsets.push_back_n(fNumLabels, -1);
1708 TArray<int> branchGoesToLabel;
1709 branchGoesToLabel.reserve_exact(fNumLabels);
1710
1711 auto resetBasePointer = [&]() {
1712 // Whenever we hand off control to another shader, we have to assume that it might overwrite
1713 // the base pointer (if it uses SkSL, it will!), so we reset it on return.
1714 pipeline->append(SkRasterPipelineOp::set_base_pointer, (*slotData).values.data());
1715 };
1716
1717 resetBasePointer();
1718
1719 for (const Stage& stage : stages) {
1720 switch (stage.op) {
1721 case ProgramOp::stack_rewind:
1722 pipeline->appendStackRewind();
1723 break;
1724
1725 case ProgramOp::invoke_shader:
1726 if (!callbacks || !callbacks->appendShader(sk_bit_cast<intptr_t>(stage.ctx))) {
1727 return false;
1728 }
1729 resetBasePointer();
1730 break;
1731
1732 case ProgramOp::invoke_color_filter:
1733 if (!callbacks || !callbacks->appendColorFilter(sk_bit_cast<intptr_t>(stage.ctx))) {
1734 return false;
1735 }
1736 resetBasePointer();
1737 break;
1738
1739 case ProgramOp::invoke_blender:
1740 if (!callbacks || !callbacks->appendBlender(sk_bit_cast<intptr_t>(stage.ctx))) {
1741 return false;
1742 }
1743 resetBasePointer();
1744 break;
1745
1746 case ProgramOp::invoke_to_linear_srgb:
1747 if (!callbacks) {
1748 return false;
1749 }
1750 callbacks->toLinearSrgb(stage.ctx);
1751 // A ColorSpaceXform shouldn't ever alter the base pointer, so we don't need to call
1752 // resetBasePointer here.
1753 break;
1754
1755 case ProgramOp::invoke_from_linear_srgb:
1756 if (!callbacks) {
1757 return false;
1758 }
1759 callbacks->fromLinearSrgb(stage.ctx);
1760 // A ColorSpaceXform shouldn't ever alter the base pointer, so we don't need to call
1761 // resetBasePointer here.
1762 break;
1763
1764 case ProgramOp::label: {
1765 // Remember the absolute pipeline position of this label.
1766 int labelID = sk_bit_cast<intptr_t>(stage.ctx);
1767 SkASSERT(labelID >= 0 && labelID < fNumLabels);
1768 labelOffsets[labelID] = pipeline->getNumStages();
1769 break;
1770 }
1771 case ProgramOp::jump:
1772 case ProgramOp::branch_if_all_lanes_active:
1773 case ProgramOp::branch_if_any_lanes_active:
1774 case ProgramOp::branch_if_no_lanes_active:
1775 case ProgramOp::branch_if_no_active_lanes_eq: {
1776 // The branch context contain a valid label ID at this point.
1777 auto* branchCtx = static_cast<SkRasterPipeline_BranchCtx*>(stage.ctx);
1778 int labelID = branchCtx->offset;
1779 SkASSERT(labelID >= 0 && labelID < fNumLabels);
1780
1781 // Replace the label ID in the branch context with the absolute pipeline position.
1782 // We will go back over the branch targets at the end and fix them up.
1783 branchCtx->offset = pipeline->getNumStages();
1784
1785 SkASSERT(branchContexts.size() == branchGoesToLabel.size());
1786 branchContexts.push_back(branchCtx);
1787 branchGoesToLabel.push_back(labelID);
1788 [[fallthrough]];
1789 }
1790 default:
1791 // Append a regular op to the program.
1792 SkASSERT((int)stage.op < kNumRasterPipelineHighpOps);
1793 pipeline->append((SkRasterPipelineOp)stage.op, stage.ctx);
1794 break;
1795 }
1796 }
1797
1798 // Now that we have assembled the program and know the pipeline positions of each label and
1799 // branch, fix up every branch target.
1800 SkASSERT(branchContexts.size() == branchGoesToLabel.size());
1801 for (int index = 0; index < branchContexts.size(); ++index) {
1802 int branchFromIdx = branchContexts[index]->offset;
1803 int branchToIdx = labelOffsets[branchGoesToLabel[index]];
1804 branchContexts[index]->offset = branchToIdx - branchFromIdx;
1805 }
1806
1807 return true;
1808 #endif
1809 }
1810
makeStages(TArray<Stage> * pipeline,SkArenaAlloc * alloc,SkSpan<const float> uniforms,const SlotData & slots) const1811 void Program::makeStages(TArray<Stage>* pipeline,
1812 SkArenaAlloc* alloc,
1813 SkSpan<const float> uniforms,
1814 const SlotData& slots) const {
1815 SkASSERT(fNumUniformSlots == SkToInt(uniforms.size()));
1816
1817 const int N = SkOpts::raster_pipeline_highp_stride;
1818 int mostRecentRewind = 0;
1819
1820 // Assemble a map holding the current stack-top for each temporary stack. Position each temp
1821 // stack immediately after the previous temp stack; temp stacks are never allowed to overlap.
1822 int pos = 0;
1823 TArray<float*> tempStackMap;
1824 tempStackMap.resize(fTempStackMaxDepths.size());
1825 for (int idx = 0; idx < fTempStackMaxDepths.size(); ++idx) {
1826 tempStackMap[idx] = slots.stack.begin() + (pos * N);
1827 pos += fTempStackMaxDepths[idx];
1828 }
1829
1830 // Track labels that we have reached in processing.
1831 TArray<int> labelToInstructionIndex;
1832 labelToInstructionIndex.push_back_n(fNumLabels, -1);
1833
1834 int mostRecentInvocationInstructionIdx = 0;
1835
1836 auto EmitStackRewindForBackwardsBranch = [&](int labelID) {
1837 // If we have already encountered the label associated with this branch, this is a
1838 // backwards branch. Add a stack-rewind immediately before the branch to ensure that
1839 // long-running loops don't use an unbounded amount of stack space.
1840 int labelInstructionIdx = labelToInstructionIndex[labelID];
1841 if (labelInstructionIdx >= 0) {
1842 if (mostRecentInvocationInstructionIdx > labelInstructionIdx) {
1843 // The backwards-branch range includes an external invocation to another shader,
1844 // color filter, blender, or colorspace conversion. In this case, we always emit a
1845 // stack rewind, since the non-tailcall stages may exist on the stack.
1846 this->appendStackRewind(pipeline);
1847 } else {
1848 // The backwards-branch range only includes SkSL ops. If tailcalling is supported,
1849 // stack rewinding isn't needed. If the platform cannot tailcall, we need to rewind.
1850 this->appendStackRewindForNonTailcallers(pipeline);
1851 }
1852 mostRecentRewind = pipeline->size();
1853 }
1854 };
1855
1856 auto* const basePtr = (std::byte*)slots.values.data();
1857 auto OffsetFromBase = [&](const void* ptr) -> SkRPOffset {
1858 return (SkRPOffset)((const std::byte*)ptr - basePtr);
1859 };
1860
1861 // Copy all immutable values into the immutable slots.
1862 for (const Instruction& inst : fInstructions) {
1863 if (inst.fOp == BuilderOp::store_immutable_value) {
1864 slots.immutable[inst.fSlotA] = sk_bit_cast<float>(inst.fImmA);
1865 }
1866 }
1867
1868 // Write each BuilderOp to the pipeline array.
1869 pipeline->reserve_exact(pipeline->size() + fInstructions.size());
1870 for (int instructionIdx = 0; instructionIdx < fInstructions.size(); ++instructionIdx) {
1871 const Instruction& inst = fInstructions[instructionIdx];
1872
1873 auto ImmutableA = [&]() { return &slots.immutable[1 * inst.fSlotA]; };
1874 auto ImmutableB = [&]() { return &slots.immutable[1 * inst.fSlotB]; };
1875 auto SlotA = [&]() { return &slots.values[N * inst.fSlotA]; };
1876 auto SlotB = [&]() { return &slots.values[N * inst.fSlotB]; };
1877 auto UniformA = [&]() { return &uniforms[inst.fSlotA]; };
1878 auto AllocTraceContext = [&](auto* ctx) {
1879 // We pass `ctx` solely for its type; the value is unused.
1880 using ContextType = typename std::remove_reference<decltype(*ctx)>::type;
1881 ctx = alloc->make<ContextType>();
1882 ctx->traceMask = reinterpret_cast<int*>(tempStackMap[inst.fImmA] - N);
1883 ctx->traceHook = fTraceHook.get();
1884 return ctx;
1885 };
1886 float*& tempStackPtr = tempStackMap[inst.fStackID];
1887
1888 switch (inst.fOp) {
1889 case BuilderOp::label: {
1890 intptr_t labelID = inst.fImmA;
1891 SkASSERT(labelID >= 0 && labelID < fNumLabels);
1892 SkASSERT(labelToInstructionIndex[labelID] == -1);
1893 labelToInstructionIndex[labelID] = instructionIdx;
1894 pipeline->push_back({ProgramOp::label, context_bit_pun(labelID)});
1895 break;
1896 }
1897 case BuilderOp::jump:
1898 case BuilderOp::branch_if_any_lanes_active:
1899 case BuilderOp::branch_if_no_lanes_active: {
1900 SkASSERT(inst.fImmA >= 0 && inst.fImmA < fNumLabels);
1901 EmitStackRewindForBackwardsBranch(inst.fImmA);
1902
1903 auto* ctx = alloc->make<SkRasterPipeline_BranchCtx>();
1904 ctx->offset = inst.fImmA;
1905 pipeline->push_back({(ProgramOp)inst.fOp, ctx});
1906 break;
1907 }
1908 case BuilderOp::branch_if_all_lanes_active: {
1909 SkASSERT(inst.fImmA >= 0 && inst.fImmA < fNumLabels);
1910 EmitStackRewindForBackwardsBranch(inst.fImmA);
1911
1912 auto* ctx = alloc->make<SkRasterPipeline_BranchIfAllLanesActiveCtx>();
1913 ctx->offset = inst.fImmA;
1914 pipeline->push_back({ProgramOp::branch_if_all_lanes_active, ctx});
1915 break;
1916 }
1917 case BuilderOp::branch_if_no_active_lanes_on_stack_top_equal: {
1918 SkASSERT(inst.fImmA >= 0 && inst.fImmA < fNumLabels);
1919 EmitStackRewindForBackwardsBranch(inst.fImmA);
1920
1921 auto* ctx = alloc->make<SkRasterPipeline_BranchIfEqualCtx>();
1922 ctx->offset = inst.fImmA;
1923 ctx->value = inst.fImmB;
1924 ctx->ptr = reinterpret_cast<int*>(tempStackPtr - N);
1925 pipeline->push_back({ProgramOp::branch_if_no_active_lanes_eq, ctx});
1926 break;
1927 }
1928 case BuilderOp::init_lane_masks: {
1929 auto* ctx = alloc->make<SkRasterPipeline_InitLaneMasksCtx>();
1930 pipeline->push_back({ProgramOp::init_lane_masks, ctx});
1931 break;
1932 }
1933 case BuilderOp::store_src_rg:
1934 pipeline->push_back({ProgramOp::store_src_rg, SlotA()});
1935 break;
1936
1937 case BuilderOp::store_src:
1938 pipeline->push_back({ProgramOp::store_src, SlotA()});
1939 break;
1940
1941 case BuilderOp::store_dst:
1942 pipeline->push_back({ProgramOp::store_dst, SlotA()});
1943 break;
1944
1945 case BuilderOp::store_device_xy01:
1946 pipeline->push_back({ProgramOp::store_device_xy01, SlotA()});
1947 break;
1948
1949 case BuilderOp::store_immutable_value:
1950 // The immutable slots were populated in an earlier pass.
1951 break;
1952
1953 case BuilderOp::load_src:
1954 pipeline->push_back({ProgramOp::load_src, SlotA()});
1955 break;
1956
1957 case BuilderOp::load_dst:
1958 pipeline->push_back({ProgramOp::load_dst, SlotA()});
1959 break;
1960
1961 case ALL_SINGLE_SLOT_UNARY_OP_CASES: {
1962 float* dst = tempStackPtr - (inst.fImmA * N);
1963 this->appendSingleSlotUnaryOp(pipeline, (ProgramOp)inst.fOp, dst, inst.fImmA);
1964 break;
1965 }
1966 case ALL_MULTI_SLOT_UNARY_OP_CASES: {
1967 float* dst = tempStackPtr - (inst.fImmA * N);
1968 this->appendMultiSlotUnaryOp(pipeline, (ProgramOp)inst.fOp, dst, inst.fImmA);
1969 break;
1970 }
1971 case ALL_IMMEDIATE_BINARY_OP_CASES: {
1972 float* dst = (inst.fSlotA == NA) ? tempStackPtr - (inst.fImmA * N)
1973 : SlotA();
1974
1975 this->appendImmediateBinaryOp(pipeline, alloc, (ProgramOp)inst.fOp,
1976 OffsetFromBase(dst), inst.fImmB, inst.fImmA);
1977 break;
1978 }
1979 case ALL_N_WAY_BINARY_OP_CASES: {
1980 float* src = tempStackPtr - (inst.fImmA * N);
1981 float* dst = tempStackPtr - (inst.fImmA * 2 * N);
1982 this->appendAdjacentNWayBinaryOp(pipeline, alloc, (ProgramOp)inst.fOp,
1983 OffsetFromBase(dst), OffsetFromBase(src),
1984 inst.fImmA);
1985 break;
1986 }
1987 case ALL_MULTI_SLOT_BINARY_OP_CASES: {
1988 float* src = tempStackPtr - (inst.fImmA * N);
1989 float* dst = tempStackPtr - (inst.fImmA * 2 * N);
1990 this->appendAdjacentMultiSlotBinaryOp(pipeline, alloc, (ProgramOp)inst.fOp,
1991 basePtr,
1992 OffsetFromBase(dst),
1993 OffsetFromBase(src),
1994 inst.fImmA);
1995 break;
1996 }
1997 case ALL_N_WAY_TERNARY_OP_CASES: {
1998 float* src1 = tempStackPtr - (inst.fImmA * N);
1999 float* src0 = tempStackPtr - (inst.fImmA * 2 * N);
2000 float* dst = tempStackPtr - (inst.fImmA * 3 * N);
2001 this->appendAdjacentNWayTernaryOp(pipeline, alloc, (ProgramOp)inst.fOp, basePtr,
2002 OffsetFromBase(dst),
2003 OffsetFromBase(src0),
2004 OffsetFromBase(src1),
2005 inst.fImmA);
2006 break;
2007 }
2008 case ALL_MULTI_SLOT_TERNARY_OP_CASES: {
2009 float* src1 = tempStackPtr - (inst.fImmA * N);
2010 float* src0 = tempStackPtr - (inst.fImmA * 2 * N);
2011 float* dst = tempStackPtr - (inst.fImmA * 3 * N);
2012 this->appendAdjacentMultiSlotTernaryOp(pipeline, alloc,(ProgramOp)inst.fOp, basePtr,
2013 OffsetFromBase(dst),
2014 OffsetFromBase(src0),
2015 OffsetFromBase(src1),
2016 inst.fImmA);
2017 break;
2018 }
2019 case BuilderOp::select: {
2020 float* src = tempStackPtr - (inst.fImmA * N);
2021 float* dst = tempStackPtr - (inst.fImmA * 2 * N);
2022 this->appendCopySlotsMasked(pipeline, alloc,
2023 OffsetFromBase(dst),
2024 OffsetFromBase(src),
2025 inst.fImmA);
2026 break;
2027 }
2028 case BuilderOp::copy_slot_masked:
2029 this->appendCopySlotsMasked(pipeline, alloc,
2030 OffsetFromBase(SlotA()),
2031 OffsetFromBase(SlotB()),
2032 inst.fImmA);
2033 break;
2034
2035 case BuilderOp::copy_slot_unmasked:
2036 this->appendCopySlotsUnmasked(pipeline, alloc,
2037 OffsetFromBase(SlotA()),
2038 OffsetFromBase(SlotB()),
2039 inst.fImmA);
2040 break;
2041
2042 case BuilderOp::copy_immutable_unmasked:
2043 this->appendCopyImmutableUnmasked(pipeline, alloc, basePtr,
2044 OffsetFromBase(SlotA()),
2045 OffsetFromBase(ImmutableB()),
2046 inst.fImmA);
2047 break;
2048
2049 case BuilderOp::refract_4_floats: {
2050 float* dst = tempStackPtr - (9 * N);
2051 pipeline->push_back({ProgramOp::refract_4_floats, dst});
2052 break;
2053 }
2054 case BuilderOp::inverse_mat2:
2055 case BuilderOp::inverse_mat3:
2056 case BuilderOp::inverse_mat4: {
2057 float* dst = tempStackPtr - (inst.fImmA * N);
2058 pipeline->push_back({(ProgramOp)inst.fOp, dst});
2059 break;
2060 }
2061 case BuilderOp::dot_2_floats:
2062 case BuilderOp::dot_3_floats:
2063 case BuilderOp::dot_4_floats: {
2064 float* dst = tempStackPtr - (inst.fImmA * 2 * N);
2065 pipeline->push_back({(ProgramOp)inst.fOp, dst});
2066 break;
2067 }
2068 case BuilderOp::swizzle_1: {
2069 // A single-component swizzle just copies a slot and shrinks the stack; we can
2070 // slightly improve codegen by making that simplification here.
2071 int offset = inst.fImmB;
2072 SkASSERT(offset >= 0 && offset <= 15);
2073 float* dst = tempStackPtr - (inst.fImmA * N);
2074 float* src = dst + (offset * N);
2075 if (src != dst) {
2076 this->appendCopySlotsUnmasked(pipeline, alloc,
2077 OffsetFromBase(dst),
2078 OffsetFromBase(src),
2079 /*numSlots=*/1);
2080 }
2081 break;
2082 }
2083 case BuilderOp::swizzle_2:
2084 case BuilderOp::swizzle_3:
2085 case BuilderOp::swizzle_4: {
2086 SkRasterPipeline_SwizzleCtx ctx;
2087 ctx.dst = OffsetFromBase(tempStackPtr - (N * inst.fImmA));
2088 // Unpack component nybbles into byte-offsets pointing at stack slots.
2089 unpack_nybbles_to_offsets(inst.fImmB, SkSpan(ctx.offsets));
2090 pipeline->push_back({(ProgramOp)inst.fOp, SkRPCtxUtils::Pack(ctx, alloc)});
2091 break;
2092 }
2093 case BuilderOp::shuffle: {
2094 int consumed = inst.fImmA;
2095 int generated = inst.fImmB;
2096
2097 auto* ctx = alloc->make<SkRasterPipeline_ShuffleCtx>();
2098 ctx->ptr = reinterpret_cast<int32_t*>(tempStackPtr) - (N * consumed);
2099 ctx->count = generated;
2100 // Unpack immB and immC from nybble form into the offset array.
2101 unpack_nybbles_to_offsets(inst.fImmC, SkSpan(&ctx->offsets[0], 8));
2102 unpack_nybbles_to_offsets(inst.fImmD, SkSpan(&ctx->offsets[8], 8));
2103 pipeline->push_back({ProgramOp::shuffle, ctx});
2104 break;
2105 }
2106 case BuilderOp::matrix_multiply_2:
2107 case BuilderOp::matrix_multiply_3:
2108 case BuilderOp::matrix_multiply_4: {
2109 int consumed = (inst.fImmB * inst.fImmC) + // result
2110 (inst.fImmA * inst.fImmB) + // left-matrix
2111 (inst.fImmC * inst.fImmD); // right-matrix
2112
2113 SkRasterPipeline_MatrixMultiplyCtx ctx;
2114 ctx.dst = OffsetFromBase(tempStackPtr - (N * consumed));
2115 ctx.leftColumns = inst.fImmA;
2116 ctx.leftRows = inst.fImmB;
2117 ctx.rightColumns = inst.fImmC;
2118 ctx.rightRows = inst.fImmD;
2119 pipeline->push_back({(ProgramOp)inst.fOp, SkRPCtxUtils::Pack(ctx, alloc)});
2120 break;
2121 }
2122 case BuilderOp::exchange_src: {
2123 float* dst = tempStackPtr - (4 * N);
2124 pipeline->push_back({ProgramOp::exchange_src, dst});
2125 break;
2126 }
2127 case BuilderOp::push_src_rgba: {
2128 float* dst = tempStackPtr;
2129 pipeline->push_back({ProgramOp::store_src, dst});
2130 break;
2131 }
2132 case BuilderOp::push_dst_rgba: {
2133 float* dst = tempStackPtr;
2134 pipeline->push_back({ProgramOp::store_dst, dst});
2135 break;
2136 }
2137 case BuilderOp::push_device_xy01: {
2138 float* dst = tempStackPtr;
2139 pipeline->push_back({ProgramOp::store_device_xy01, dst});
2140 break;
2141 }
2142 case BuilderOp::pop_src_rgba: {
2143 float* src = tempStackPtr - (4 * N);
2144 pipeline->push_back({ProgramOp::load_src, src});
2145 break;
2146 }
2147 case BuilderOp::pop_dst_rgba: {
2148 float* src = tempStackPtr - (4 * N);
2149 pipeline->push_back({ProgramOp::load_dst, src});
2150 break;
2151 }
2152 case BuilderOp::push_slots: {
2153 float* dst = tempStackPtr;
2154 this->appendCopySlotsUnmasked(pipeline, alloc,
2155 OffsetFromBase(dst),
2156 OffsetFromBase(SlotA()),
2157 inst.fImmA);
2158 break;
2159 }
2160 case BuilderOp::push_immutable: {
2161 float* dst = tempStackPtr;
2162 this->appendCopyImmutableUnmasked(pipeline, alloc, basePtr,
2163 OffsetFromBase(dst),
2164 OffsetFromBase(ImmutableA()),
2165 inst.fImmA);
2166 break;
2167 }
2168 case BuilderOp::copy_stack_to_slots_indirect:
2169 case BuilderOp::push_immutable_indirect:
2170 case BuilderOp::push_slots_indirect:
2171 case BuilderOp::push_uniform_indirect: {
2172 // SlotA: fixed-range start
2173 // SlotB: limit-range end
2174 // immA: number of slots to copy
2175 // immB: dynamic stack ID
2176 ProgramOp op;
2177 auto* ctx = alloc->make<SkRasterPipeline_CopyIndirectCtx>();
2178 ctx->indirectOffset =
2179 reinterpret_cast<const uint32_t*>(tempStackMap[inst.fImmB]) - (1 * N);
2180 ctx->indirectLimit = inst.fSlotB - inst.fSlotA - inst.fImmA;
2181 ctx->slots = inst.fImmA;
2182 if (inst.fOp == BuilderOp::push_slots_indirect) {
2183 op = ProgramOp::copy_from_indirect_unmasked;
2184 ctx->src = reinterpret_cast<const int32_t*>(SlotA());
2185 ctx->dst = reinterpret_cast<int32_t*>(tempStackPtr);
2186 } else if (inst.fOp == BuilderOp::push_immutable_indirect) {
2187 // We reuse the indirect-uniform op for indirect copies of immutable data.
2188 op = ProgramOp::copy_from_indirect_uniform_unmasked;
2189 ctx->src = reinterpret_cast<const int32_t*>(ImmutableA());
2190 ctx->dst = reinterpret_cast<int32_t*>(tempStackPtr);
2191 } else if (inst.fOp == BuilderOp::push_uniform_indirect) {
2192 op = ProgramOp::copy_from_indirect_uniform_unmasked;
2193 ctx->src = reinterpret_cast<const int32_t*>(UniformA());
2194 ctx->dst = reinterpret_cast<int32_t*>(tempStackPtr);
2195 } else {
2196 op = ProgramOp::copy_to_indirect_masked;
2197 ctx->src = reinterpret_cast<const int32_t*>(tempStackPtr) - (ctx->slots * N);
2198 ctx->dst = reinterpret_cast<int32_t*>(SlotA());
2199 }
2200 pipeline->push_back({op, ctx});
2201 break;
2202 }
2203 case BuilderOp::push_uniform:
2204 case BuilderOp::copy_uniform_to_slots_unmasked: {
2205 const float* src = UniformA();
2206 float* dst = (inst.fOp == BuilderOp::push_uniform) ? tempStackPtr : SlotB();
2207
2208 for (int remaining = inst.fImmA; remaining > 0; remaining -= 4) {
2209 auto ctx = alloc->make<SkRasterPipeline_UniformCtx>();
2210 ctx->dst = reinterpret_cast<int32_t*>(dst);
2211 ctx->src = reinterpret_cast<const int32_t*>(src);
2212 switch (remaining) {
2213 case 1: pipeline->push_back({ProgramOp::copy_uniform, ctx}); break;
2214 case 2: pipeline->push_back({ProgramOp::copy_2_uniforms, ctx}); break;
2215 case 3: pipeline->push_back({ProgramOp::copy_3_uniforms, ctx}); break;
2216 default: pipeline->push_back({ProgramOp::copy_4_uniforms, ctx}); break;
2217 }
2218 dst += 4 * N;
2219 src += 4;
2220 }
2221 break;
2222 }
2223 case BuilderOp::push_condition_mask: {
2224 float* dst = tempStackPtr;
2225 pipeline->push_back({ProgramOp::store_condition_mask, dst});
2226 break;
2227 }
2228 case BuilderOp::pop_condition_mask: {
2229 float* src = tempStackPtr - (1 * N);
2230 pipeline->push_back({ProgramOp::load_condition_mask, src});
2231 break;
2232 }
2233 case BuilderOp::merge_condition_mask:
2234 case BuilderOp::merge_inv_condition_mask: {
2235 float* ptr = tempStackPtr - (2 * N);
2236 pipeline->push_back({(ProgramOp)inst.fOp, ptr});
2237 break;
2238 }
2239 case BuilderOp::push_loop_mask: {
2240 float* dst = tempStackPtr;
2241 pipeline->push_back({ProgramOp::store_loop_mask, dst});
2242 break;
2243 }
2244 case BuilderOp::pop_loop_mask: {
2245 float* src = tempStackPtr - (1 * N);
2246 pipeline->push_back({ProgramOp::load_loop_mask, src});
2247 break;
2248 }
2249 case BuilderOp::pop_and_reenable_loop_mask: {
2250 float* src = tempStackPtr - (1 * N);
2251 pipeline->push_back({ProgramOp::reenable_loop_mask, src});
2252 break;
2253 }
2254 case BuilderOp::reenable_loop_mask:
2255 pipeline->push_back({ProgramOp::reenable_loop_mask, SlotA()});
2256 break;
2257
2258 case BuilderOp::mask_off_loop_mask:
2259 pipeline->push_back({ProgramOp::mask_off_loop_mask, nullptr});
2260 break;
2261
2262 case BuilderOp::merge_loop_mask: {
2263 float* src = tempStackPtr - (1 * N);
2264 pipeline->push_back({ProgramOp::merge_loop_mask, src});
2265 break;
2266 }
2267 case BuilderOp::push_return_mask: {
2268 float* dst = tempStackPtr;
2269 pipeline->push_back({ProgramOp::store_return_mask, dst});
2270 break;
2271 }
2272 case BuilderOp::pop_return_mask: {
2273 float* src = tempStackPtr - (1 * N);
2274 pipeline->push_back({ProgramOp::load_return_mask, src});
2275 break;
2276 }
2277 case BuilderOp::mask_off_return_mask:
2278 pipeline->push_back({ProgramOp::mask_off_return_mask, nullptr});
2279 break;
2280
2281 case BuilderOp::copy_constant:
2282 case BuilderOp::push_constant: {
2283 float* dst = (inst.fOp == BuilderOp::copy_constant) ? SlotA() : tempStackPtr;
2284 // Splat constant values onto the stack.
2285 for (int remaining = inst.fImmA; remaining > 0; remaining -= 4) {
2286 SkRasterPipeline_ConstantCtx ctx;
2287 ctx.dst = OffsetFromBase(dst);
2288 ctx.value = inst.fImmB;
2289 void* ptr = SkRPCtxUtils::Pack(ctx, alloc);
2290 switch (remaining) {
2291 case 1: pipeline->push_back({ProgramOp::copy_constant, ptr}); break;
2292 case 2: pipeline->push_back({ProgramOp::splat_2_constants, ptr}); break;
2293 case 3: pipeline->push_back({ProgramOp::splat_3_constants, ptr}); break;
2294 default: pipeline->push_back({ProgramOp::splat_4_constants, ptr}); break;
2295 }
2296 dst += 4 * N;
2297 }
2298 break;
2299 }
2300 case BuilderOp::copy_stack_to_slots: {
2301 float* src = tempStackPtr - (inst.fImmB * N);
2302 this->appendCopySlotsMasked(pipeline, alloc,
2303 OffsetFromBase(SlotA()),
2304 OffsetFromBase(src),
2305 inst.fImmA);
2306 break;
2307 }
2308 case BuilderOp::copy_stack_to_slots_unmasked: {
2309 float* src = tempStackPtr - (inst.fImmB * N);
2310 this->appendCopySlotsUnmasked(pipeline, alloc,
2311 OffsetFromBase(SlotA()),
2312 OffsetFromBase(src),
2313 inst.fImmA);
2314 break;
2315 }
2316 case BuilderOp::swizzle_copy_stack_to_slots: {
2317 // SlotA: fixed-range start
2318 // immA: number of swizzle components
2319 // immB: swizzle components
2320 // immC: offset from stack top
2321 auto stage = (ProgramOp)((int)ProgramOp::swizzle_copy_slot_masked + inst.fImmA - 1);
2322 auto* ctx = alloc->make<SkRasterPipeline_SwizzleCopyCtx>();
2323 ctx->src = reinterpret_cast<const int32_t*>(tempStackPtr) - (inst.fImmC * N);
2324 ctx->dst = reinterpret_cast<int32_t*>(SlotA());
2325 unpack_nybbles_to_offsets(inst.fImmB, SkSpan(ctx->offsets));
2326 pipeline->push_back({stage, ctx});
2327 break;
2328 }
2329 case BuilderOp::push_clone: {
2330 float* src = tempStackPtr - (inst.fImmB * N);
2331 float* dst = tempStackPtr;
2332 this->appendCopySlotsUnmasked(pipeline, alloc,
2333 OffsetFromBase(dst),
2334 OffsetFromBase(src),
2335 inst.fImmA);
2336 break;
2337 }
2338 case BuilderOp::push_clone_from_stack: {
2339 // immA: number of slots
2340 // immB: other stack ID
2341 // immC: offset from stack top
2342 float* sourceStackPtr = tempStackMap[inst.fImmB];
2343 float* src = sourceStackPtr - (inst.fImmC * N);
2344 float* dst = tempStackPtr;
2345 this->appendCopySlotsUnmasked(pipeline, alloc,
2346 OffsetFromBase(dst),
2347 OffsetFromBase(src),
2348 inst.fImmA);
2349 break;
2350 }
2351 case BuilderOp::push_clone_indirect_from_stack: {
2352 // immA: number of slots
2353 // immB: other stack ID
2354 // immC: offset from stack top
2355 // immD: dynamic stack ID
2356 float* sourceStackPtr = tempStackMap[inst.fImmB];
2357
2358 auto* ctx = alloc->make<SkRasterPipeline_CopyIndirectCtx>();
2359 ctx->dst = reinterpret_cast<int32_t*>(tempStackPtr);
2360 ctx->src = reinterpret_cast<const int32_t*>(sourceStackPtr) - (inst.fImmC * N);
2361 ctx->indirectOffset =
2362 reinterpret_cast<const uint32_t*>(tempStackMap[inst.fImmD]) - (1 * N);
2363 ctx->indirectLimit = inst.fImmC - inst.fImmA;
2364 ctx->slots = inst.fImmA;
2365 pipeline->push_back({ProgramOp::copy_from_indirect_unmasked, ctx});
2366 break;
2367 }
2368 case BuilderOp::swizzle_copy_stack_to_slots_indirect: {
2369 // SlotA: fixed-range start
2370 // SlotB: limit-range end
2371 // immA: number of swizzle components
2372 // immB: swizzle components
2373 // immC: offset from stack top
2374 // immD: dynamic stack ID
2375 auto* ctx = alloc->make<SkRasterPipeline_SwizzleCopyIndirectCtx>();
2376 ctx->src = reinterpret_cast<const int32_t*>(tempStackPtr) - (inst.fImmC * N);
2377 ctx->dst = reinterpret_cast<int32_t*>(SlotA());
2378 ctx->indirectOffset =
2379 reinterpret_cast<const uint32_t*>(tempStackMap[inst.fImmD]) - (1 * N);
2380 ctx->indirectLimit =
2381 inst.fSlotB - inst.fSlotA - (max_packed_nybble(inst.fImmB, inst.fImmA) + 1);
2382 ctx->slots = inst.fImmA;
2383 unpack_nybbles_to_offsets(inst.fImmB, SkSpan(ctx->offsets));
2384 pipeline->push_back({ProgramOp::swizzle_copy_to_indirect_masked, ctx});
2385 break;
2386 }
2387 case BuilderOp::case_op: {
2388 SkRasterPipeline_CaseOpCtx ctx;
2389 ctx.expectedValue = inst.fImmA;
2390 ctx.offset = OffsetFromBase(tempStackPtr - (2 * N));
2391 pipeline->push_back({ProgramOp::case_op, SkRPCtxUtils::Pack(ctx, alloc)});
2392 break;
2393 }
2394 case BuilderOp::continue_op:
2395 pipeline->push_back({ProgramOp::continue_op, tempStackMap[inst.fImmA] - (1 * N)});
2396 break;
2397
2398 case BuilderOp::pad_stack:
2399 case BuilderOp::discard_stack:
2400 break;
2401
2402 case BuilderOp::invoke_shader:
2403 case BuilderOp::invoke_color_filter:
2404 case BuilderOp::invoke_blender:
2405 pipeline->push_back({(ProgramOp)inst.fOp, context_bit_pun(inst.fImmA)});
2406 mostRecentInvocationInstructionIdx = instructionIdx;
2407 break;
2408
2409 case BuilderOp::invoke_to_linear_srgb:
2410 case BuilderOp::invoke_from_linear_srgb:
2411 pipeline->push_back({(ProgramOp)inst.fOp, tempStackMap[inst.fImmA] - (4 * N)});
2412 mostRecentInvocationInstructionIdx = instructionIdx;
2413 break;
2414
2415 case BuilderOp::trace_line: {
2416 auto* ctx = AllocTraceContext((SkRasterPipeline_TraceLineCtx*)nullptr);
2417 ctx->lineNumber = inst.fImmB;
2418 pipeline->push_back({ProgramOp::trace_line, ctx});
2419 break;
2420 }
2421 case BuilderOp::trace_scope: {
2422 auto* ctx = AllocTraceContext((SkRasterPipeline_TraceScopeCtx*)nullptr);
2423 ctx->delta = inst.fImmB;
2424 pipeline->push_back({ProgramOp::trace_scope, ctx});
2425 break;
2426 }
2427 case BuilderOp::trace_enter:
2428 case BuilderOp::trace_exit: {
2429 auto* ctx = AllocTraceContext((SkRasterPipeline_TraceFuncCtx*)nullptr);
2430 ctx->funcIdx = inst.fImmB;
2431 pipeline->push_back({(ProgramOp)inst.fOp, ctx});
2432 break;
2433 }
2434 case BuilderOp::trace_var:
2435 case BuilderOp::trace_var_indirect: {
2436 // SlotA: fixed-range start
2437 // SlotB: limit-range end
2438 // immA: trace-mask stack ID
2439 // immB: number of slots
2440 // immC: dynamic stack ID
2441 auto* ctx = AllocTraceContext((SkRasterPipeline_TraceVarCtx*)nullptr);
2442 ctx->slotIdx = inst.fSlotA;
2443 ctx->numSlots = inst.fImmB;
2444 ctx->data = reinterpret_cast<int*>(SlotA());
2445 if (inst.fOp == BuilderOp::trace_var_indirect) {
2446 ctx->indirectOffset =
2447 reinterpret_cast<const uint32_t*>(tempStackMap[inst.fImmC]) - (1 * N);
2448 ctx->indirectLimit = inst.fSlotB - inst.fSlotA - inst.fImmB;
2449 } else {
2450 ctx->indirectOffset = nullptr;
2451 ctx->indirectLimit = 0;
2452 }
2453 pipeline->push_back({ProgramOp::trace_var, ctx});
2454 break;
2455 }
2456 default:
2457 SkDEBUGFAILF("Raster Pipeline: unsupported instruction %d", (int)inst.fOp);
2458 break;
2459 }
2460
2461 int stackUsage = stack_usage(inst);
2462 if (stackUsage != 0) {
2463 tempStackPtr += stackUsage * N;
2464 SkASSERT(tempStackPtr >= slots.stack.begin());
2465 SkASSERT(tempStackPtr <= slots.stack.end());
2466 }
2467
2468 // Periodically rewind the stack every 500 instructions. When SK_HAS_MUSTTAIL is set,
2469 // rewinds are not actually used; the appendStackRewind call becomes a no-op. On platforms
2470 // that don't support SK_HAS_MUSTTAIL, rewinding the stack periodically can prevent a
2471 // potential stack overflow when running a long program.
2472 int numPipelineStages = pipeline->size();
2473 if (numPipelineStages - mostRecentRewind > 500) {
2474 this->appendStackRewindForNonTailcallers(pipeline);
2475 mostRecentRewind = numPipelineStages;
2476 }
2477 }
2478 }
2479
2480 class Program::Dumper {
2481 public:
Dumper(const Program & p)2482 Dumper(const Program& p) : fProgram(p) {}
2483
2484 void dump(SkWStream* out, bool writeInstructionCount);
2485
2486 // Finds the labels in the program, and keeps track of their offsets.
buildLabelToStageMap()2487 void buildLabelToStageMap() {
2488 for (int index = 0; index < fStages.size(); ++index) {
2489 if (fStages[index].op == ProgramOp::label) {
2490 int labelID = sk_bit_cast<intptr_t>(fStages[index].ctx);
2491 SkASSERT(!fLabelToStageMap.find(labelID));
2492 fLabelToStageMap[labelID] = index;
2493 }
2494 }
2495 }
2496
2497 // Assign unique names to each variable slot; our trace might have multiple variables with the
2498 // same name, which can make a dump hard to read. We disambiguate them with subscripts.
buildUniqueSlotNameList()2499 void buildUniqueSlotNameList() {
2500 if (fProgram.fDebugTrace) {
2501 fSlotNameList.reserve_exact(fProgram.fDebugTrace->fSlotInfo.size());
2502
2503 // The map consists of <variable name, <source position, unique name>>.
2504 THashMap<std::string_view, THashMap<int, std::string>> uniqueNameMap;
2505
2506 for (const SlotDebugInfo& slotInfo : fProgram.fDebugTrace->fSlotInfo) {
2507 // Look up this variable by its name and source position.
2508 int pos = slotInfo.pos.valid() ? slotInfo.pos.startOffset() : 0;
2509 THashMap<int, std::string>& positionMap = uniqueNameMap[slotInfo.name];
2510 std::string& uniqueName = positionMap[pos];
2511
2512 // Have we seen this variable name/position combination before?
2513 if (uniqueName.empty()) {
2514 // This is a unique name/position pair.
2515 uniqueName = slotInfo.name;
2516
2517 // But if it's not a unique _name_, it deserves a subscript to disambiguate it.
2518 int subscript = positionMap.count() - 1;
2519 if (subscript > 0) {
2520 for (char digit : std::to_string(subscript)) {
2521 // U+2080 through U+2089 (₀₁₂₃₄₅₆₇₈₉) in UTF8:
2522 uniqueName.push_back((char)0xE2);
2523 uniqueName.push_back((char)0x82);
2524 uniqueName.push_back((char)(0x80 + digit - '0'));
2525 }
2526 }
2527 }
2528
2529 fSlotNameList.push_back(uniqueName);
2530 }
2531 }
2532 }
2533
2534 // Interprets the context value as a branch offset.
branchOffset(const SkRasterPipeline_BranchCtx * ctx,int index) const2535 std::string branchOffset(const SkRasterPipeline_BranchCtx* ctx, int index) const {
2536 // The context's offset field contains a label ID
2537 int labelID = ctx->offset;
2538 const int* targetIndex = fLabelToStageMap.find(labelID);
2539 SkASSERT(targetIndex);
2540 return SkSL::String::printf("%+d (label %d at #%d)", *targetIndex - index, labelID,
2541 *targetIndex + 1);
2542 }
2543
2544 // Prints a 32-bit immediate value of unknown type (int/float).
imm(float immFloat,bool showAsFloat=true) const2545 std::string imm(float immFloat, bool showAsFloat = true) const {
2546 // Special case exact zero as "0" for readability (vs `0x00000000 (0.0)`).
2547 if (sk_bit_cast<int32_t>(immFloat) == 0) {
2548 return "0";
2549 }
2550 // Start with `0x3F800000` as a baseline.
2551 uint32_t immUnsigned;
2552 memcpy(&immUnsigned, &immFloat, sizeof(uint32_t));
2553 auto text = SkSL::String::printf("0x%08X", immUnsigned);
2554
2555 // Extend it to `0x3F800000 (1.0)` for finite floating point values.
2556 if (showAsFloat && std::isfinite(immFloat)) {
2557 text += " (";
2558 text += skstd::to_string(immFloat);
2559 text += ')';
2560 }
2561 return text;
2562 }
2563
2564 // Interprets the context pointer as a 32-bit immediate value of unknown type (int/float).
immCtx(const void * ctx,bool showAsFloat=true) const2565 std::string immCtx(const void* ctx, bool showAsFloat = true) const {
2566 float f;
2567 memcpy(&f, &ctx, sizeof(float));
2568 return this->imm(f, showAsFloat);
2569 }
2570
2571 // Prints `1` for single slots and `1..3` for ranges of slots.
asRange(int first,int count) const2572 std::string asRange(int first, int count) const {
2573 std::string text = std::to_string(first);
2574 if (count > 1) {
2575 text += ".." + std::to_string(first + count - 1);
2576 }
2577 return text;
2578 }
2579
2580 // Generates a reasonable name for a range of slots or uniforms, e.g.:
2581 // `val`: slot range points at one variable, named val
2582 // `val(0..1)`: slot range points at the first and second slot of val (which has 3+ slots)
2583 // `foo, bar`: slot range fully covers two variables, named foo and bar
2584 // `foo(3), bar(0)`: slot range covers the fourth slot of foo and the first slot of bar
slotOrUniformName(SkSpan<const SlotDebugInfo> debugInfo,SkSpan<const std::string> names,SlotRange range) const2585 std::string slotOrUniformName(SkSpan<const SlotDebugInfo> debugInfo,
2586 SkSpan<const std::string> names,
2587 SlotRange range) const {
2588 SkASSERT(range.index >= 0 && (range.index + range.count) <= (int)debugInfo.size());
2589
2590 std::string text;
2591 auto separator = SkSL::String::Separator();
2592 while (range.count > 0) {
2593 const SlotDebugInfo& slotInfo = debugInfo[range.index];
2594 text += separator();
2595 text += names.empty() ? slotInfo.name : names[range.index];
2596
2597 // Figure out how many slots we can chomp in this iteration.
2598 int entireVariable = slotInfo.columns * slotInfo.rows;
2599 int slotsToChomp = std::min(range.count, entireVariable - slotInfo.componentIndex);
2600 // If we aren't consuming an entire variable, from first slot to last...
2601 if (slotsToChomp != entireVariable) {
2602 // ... decorate it with a range suffix.
2603 text += '(' + this->asRange(slotInfo.componentIndex, slotsToChomp) + ')';
2604 }
2605 range.index += slotsToChomp;
2606 range.count -= slotsToChomp;
2607 }
2608
2609 return text;
2610 }
2611
2612 // Generates a reasonable name for a range of slots.
slotName(SlotRange range) const2613 std::string slotName(SlotRange range) const {
2614 return this->slotOrUniformName(fProgram.fDebugTrace->fSlotInfo, fSlotNameList, range);
2615 }
2616
2617 // Generates a reasonable name for a range of uniforms.
uniformName(SlotRange range) const2618 std::string uniformName(SlotRange range) const {
2619 return this->slotOrUniformName(fProgram.fDebugTrace->fUniformInfo, /*names=*/{}, range);
2620 }
2621
2622 // Attempts to interpret the passed-in pointer as a uniform range.
uniformPtrCtx(const float * ptr,int numSlots) const2623 std::string uniformPtrCtx(const float* ptr, int numSlots) const {
2624 const float* end = ptr + numSlots;
2625 if (ptr >= fUniforms.begin() && end <= fUniforms.end()) {
2626 int uniformIdx = ptr - fUniforms.begin();
2627 if (fProgram.fDebugTrace) {
2628 // Handle pointers to named uniform slots.
2629 std::string name = this->uniformName({uniformIdx, numSlots});
2630 if (!name.empty()) {
2631 return name;
2632 }
2633 }
2634 // Handle pointers to uniforms (when no debug info exists).
2635 return 'u' + this->asRange(uniformIdx, numSlots);
2636 }
2637 return {};
2638 }
2639
2640 // Attempts to interpret the passed-in pointer as a value slot range.
valuePtrCtx(const float * ptr,int numSlots) const2641 std::string valuePtrCtx(const float* ptr, int numSlots) const {
2642 const float* end = ptr + (N * numSlots);
2643 if (ptr >= fSlots.values.begin() && end <= fSlots.values.end()) {
2644 int valueIdx = ptr - fSlots.values.begin();
2645 SkASSERT((valueIdx % N) == 0);
2646 valueIdx /= N;
2647 if (fProgram.fDebugTrace) {
2648 // Handle pointers to named value slots.
2649 std::string name = this->slotName({valueIdx, numSlots});
2650 if (!name.empty()) {
2651 return name;
2652 }
2653 }
2654 // Handle pointers to value slots (when no debug info exists).
2655 return 'v' + this->asRange(valueIdx, numSlots);
2656 }
2657 return {};
2658 }
2659
2660 // Attempts to interpret the passed-in pointer as a immutable slot range.
immutablePtrCtx(const float * ptr,int numSlots) const2661 std::string immutablePtrCtx(const float* ptr, int numSlots) const {
2662 const float* end = ptr + numSlots;
2663 if (ptr >= fSlots.immutable.begin() && end <= fSlots.immutable.end()) {
2664 int index = ptr - fSlots.immutable.begin();
2665 return 'i' + this->asRange(index, numSlots) + ' ' +
2666 this->multiImmCtx(ptr, numSlots);
2667 }
2668 return {};
2669 }
2670
2671 // Interprets the context value as a pointer to `count` immediate values.
multiImmCtx(const float * ptr,int count) const2672 std::string multiImmCtx(const float* ptr, int count) const {
2673 // If this is a uniform, print it by name.
2674 if (std::string text = this->uniformPtrCtx(ptr, count); !text.empty()) {
2675 return text;
2676 }
2677 // Emit a single bracketed immediate.
2678 if (count == 1) {
2679 return '[' + this->imm(*ptr) + ']';
2680 }
2681 // Emit a list like `[0x00000000 (0.0), 0x3F80000 (1.0)]`.
2682 std::string text = "[";
2683 auto separator = SkSL::String::Separator();
2684 while (count--) {
2685 text += separator();
2686 text += this->imm(*ptr++);
2687 }
2688 return text + ']';
2689 }
2690
2691 // Interprets the context value as a generic pointer.
ptrCtx(const void * ctx,int numSlots) const2692 std::string ptrCtx(const void* ctx, int numSlots) const {
2693 const float *ctxAsSlot = static_cast<const float*>(ctx);
2694 // Check for uniform, value, and immutable pointers.
2695 if (std::string uniform = this->uniformPtrCtx(ctxAsSlot, numSlots); !uniform.empty()) {
2696 return uniform;
2697 }
2698 if (std::string value = this->valuePtrCtx(ctxAsSlot, numSlots); !value.empty()) {
2699 return value;
2700 }
2701 if (std::string value = this->immutablePtrCtx(ctxAsSlot, numSlots); !value.empty()) {
2702 return value;
2703 }
2704 // Handle pointers to temporary stack slots.
2705 if (ctxAsSlot >= fSlots.stack.begin() && ctxAsSlot < fSlots.stack.end()) {
2706 int stackIdx = ctxAsSlot - fSlots.stack.begin();
2707 SkASSERT((stackIdx % N) == 0);
2708 return '$' + this->asRange(stackIdx / N, numSlots);
2709 }
2710 // This pointer is out of our expected bounds; this generally isn't expected to happen.
2711 return "ExternalPtr(" + this->asRange(0, numSlots) + ")";
2712 }
2713
2714 // Converts an SkRPOffset to a pointer into the value-slot range.
offsetToPtr(SkRPOffset offset) const2715 std::byte* offsetToPtr(SkRPOffset offset) const {
2716 return (std::byte*)fSlots.values.data() + offset;
2717 }
2718
2719 // Interprets a slab offset as a slot range.
offsetCtx(SkRPOffset offset,int numSlots) const2720 std::string offsetCtx(SkRPOffset offset, int numSlots) const {
2721 return this->ptrCtx(this->offsetToPtr(offset), numSlots);
2722 }
2723
2724 // Interprets the context value as a packed ConstantCtx structure.
constantCtx(const void * v,int slots,bool showAsFloat=true) const2725 std::tuple<std::string, std::string> constantCtx(const void* v,
2726 int slots,
2727 bool showAsFloat = true) const {
2728 auto ctx = SkRPCtxUtils::Unpack((const SkRasterPipeline_ConstantCtx*)v);
2729 return {this->offsetCtx(ctx.dst, slots),
2730 this->imm(sk_bit_cast<float>(ctx.value), showAsFloat)};
2731 }
2732
2733 // Interprets the context value as a BinaryOp structure for copy_n_slots (numSlots is dictated
2734 // by the op itself).
binaryOpCtx(const void * v,int numSlots) const2735 std::tuple<std::string, std::string> binaryOpCtx(const void* v, int numSlots) const {
2736 auto ctx = SkRPCtxUtils::Unpack((const SkRasterPipeline_BinaryOpCtx*)v);
2737 return {this->offsetCtx(ctx.dst, numSlots),
2738 this->offsetCtx(ctx.src, numSlots)};
2739 }
2740
2741 // Interprets the context value as a BinaryOp structure for copy_n_uniforms (numSlots is
2742 // dictated by the op itself).
copyUniformCtx(const void * v,int numSlots) const2743 std::tuple<std::string, std::string> copyUniformCtx(const void* v, int numSlots) const {
2744 const auto *ctx = static_cast<const SkRasterPipeline_UniformCtx*>(v);
2745 return {this->ptrCtx(ctx->dst, numSlots),
2746 this->multiImmCtx(reinterpret_cast<const float*>(ctx->src), numSlots)};
2747 }
2748
2749 // Interprets the context value as a pointer to two adjacent values.
adjacentPtrCtx(const void * ctx,int numSlots) const2750 std::tuple<std::string, std::string> adjacentPtrCtx(const void* ctx, int numSlots) const {
2751 const float *ctxAsSlot = static_cast<const float*>(ctx);
2752 return std::make_tuple(this->ptrCtx(ctxAsSlot, numSlots),
2753 this->ptrCtx(ctxAsSlot + (N * numSlots), numSlots));
2754 }
2755
2756 // Interprets a slab offset as two adjacent slot ranges.
adjacentOffsetCtx(SkRPOffset offset,int numSlots) const2757 std::tuple<std::string, std::string> adjacentOffsetCtx(SkRPOffset offset, int numSlots) const {
2758 return this->adjacentPtrCtx((std::byte*)fSlots.values.data() + offset, numSlots);
2759 }
2760
2761 // Interprets the context value as a BinaryOp structure (numSlots is inferred from the distance
2762 // between pointers).
adjacentBinaryOpCtx(const void * v) const2763 std::tuple<std::string, std::string> adjacentBinaryOpCtx(const void* v) const {
2764 auto ctx = SkRPCtxUtils::Unpack((const SkRasterPipeline_BinaryOpCtx*)v);
2765 int numSlots = (ctx.src - ctx.dst) / (N * sizeof(float));
2766 return this->adjacentOffsetCtx(ctx.dst, numSlots);
2767 }
2768
2769 // Interprets the context value as a pointer to three adjacent values.
adjacent3PtrCtx(const void * ctx,int numSlots) const2770 std::tuple<std::string, std::string, std::string> adjacent3PtrCtx(const void* ctx,
2771 int numSlots) const {
2772 const float *ctxAsSlot = static_cast<const float*>(ctx);
2773 return {this->ptrCtx(ctxAsSlot, numSlots),
2774 this->ptrCtx(ctxAsSlot + (N * numSlots), numSlots),
2775 this->ptrCtx(ctxAsSlot + (2 * N * numSlots), numSlots)};
2776 }
2777
2778 // Interprets a slab offset as three adjacent slot ranges.
adjacent3OffsetCtx(SkRPOffset offset,int numSlots) const2779 std::tuple<std::string, std::string, std::string> adjacent3OffsetCtx(SkRPOffset offset,
2780 int numSlots) const {
2781 return this->adjacent3PtrCtx((std::byte*)fSlots.values.data() + offset, numSlots);
2782 }
2783
2784 // Interprets the context value as a TernaryOp structure (numSlots is inferred from `delta`).
adjacentTernaryOpCtx(const void * v) const2785 std::tuple<std::string, std::string, std::string> adjacentTernaryOpCtx(const void* v) const {
2786 auto ctx = SkRPCtxUtils::Unpack((const SkRasterPipeline_TernaryOpCtx*)v);
2787 int numSlots = ctx.delta / (sizeof(float) * N);
2788 return this->adjacent3OffsetCtx(ctx.dst, numSlots);
2789 }
2790
2791 // Stringizes a span of swizzle offsets to the textual equivalent (`xyzw`).
2792 template <typename T>
swizzleOffsetSpan(SkSpan<T> offsets) const2793 std::string swizzleOffsetSpan(SkSpan<T> offsets) const {
2794 std::string src;
2795 for (uint16_t offset : offsets) {
2796 if (offset == (0 * N * sizeof(float))) {
2797 src.push_back('x');
2798 } else if (offset == (1 * N * sizeof(float))) {
2799 src.push_back('y');
2800 } else if (offset == (2 * N * sizeof(float))) {
2801 src.push_back('z');
2802 } else if (offset == (3 * N * sizeof(float))) {
2803 src.push_back('w');
2804 } else {
2805 src.push_back('?');
2806 }
2807 }
2808 return src;
2809 }
2810
2811 // Determines the effective width of a swizzle op. When we decode a swizzle, we don't know the
2812 // slot width of the original value; that's not preserved in the instruction encoding. (e.g.,
2813 // myFloat4.y would be indistinguishable from myFloat2.y.) We do our best to make a readable
2814 // dump using the data we have.
2815 template <typename T>
swizzleWidth(SkSpan<T> offsets) const2816 size_t swizzleWidth(SkSpan<T> offsets) const {
2817 size_t highestComponent = *std::max_element(offsets.begin(), offsets.end()) /
2818 (N * sizeof(float));
2819 size_t swizzleWidth = offsets.size();
2820 return std::max(swizzleWidth, highestComponent + 1);
2821 }
2822
2823 // Stringizes a swizzled pointer.
2824 template <typename T>
swizzlePtr(const void * ptr,SkSpan<T> offsets) const2825 std::string swizzlePtr(const void* ptr, SkSpan<T> offsets) const {
2826 return "(" + this->ptrCtx(ptr, this->swizzleWidth(SkSpan(offsets))) + ")." +
2827 this->swizzleOffsetSpan(SkSpan(offsets));
2828 }
2829
2830 // Interprets the context value as a SwizzleCtx structure.
swizzleCtx(ProgramOp op,const void * v) const2831 std::tuple<std::string, std::string> swizzleCtx(ProgramOp op, const void* v) const {
2832 auto ctx = SkRPCtxUtils::Unpack((const SkRasterPipeline_SwizzleCtx*)v);
2833 int destSlots = (int)op - (int)BuilderOp::swizzle_1 + 1;
2834 return {this->offsetCtx(ctx.dst, destSlots),
2835 this->swizzlePtr(this->offsetToPtr(ctx.dst), SkSpan(ctx.offsets, destSlots))};
2836 }
2837
2838 // Interprets the context value as a SwizzleCopyCtx structure.
swizzleCopyCtx(ProgramOp op,const void * v) const2839 std::tuple<std::string, std::string> swizzleCopyCtx(ProgramOp op, const void* v) const {
2840 const auto* ctx = static_cast<const SkRasterPipeline_SwizzleCopyCtx*>(v);
2841 int destSlots = (int)op - (int)BuilderOp::swizzle_copy_slot_masked + 1;
2842
2843 return {this->swizzlePtr(ctx->dst, SkSpan(ctx->offsets, destSlots)),
2844 this->ptrCtx(ctx->src, destSlots)};
2845 }
2846
2847 // Interprets the context value as a ShuffleCtx structure.
shuffleCtx(const void * v) const2848 std::tuple<std::string, std::string> shuffleCtx(const void* v) const {
2849 const auto* ctx = static_cast<const SkRasterPipeline_ShuffleCtx*>(v);
2850
2851 std::string dst = this->ptrCtx(ctx->ptr, ctx->count);
2852 std::string src = "(" + dst + ")[";
2853 for (int index = 0; index < ctx->count; ++index) {
2854 if (ctx->offsets[index] % (N * sizeof(float))) {
2855 src.push_back('?');
2856 } else {
2857 src += std::to_string(ctx->offsets[index] / (N * sizeof(float)));
2858 }
2859 src.push_back(' ');
2860 }
2861 src.back() = ']';
2862 return std::make_tuple(dst, src);
2863 }
2864
2865 // Interprets the context value as a packed MatrixMultiplyCtx structure.
matrixMultiply(const void * v) const2866 std::tuple<std::string, std::string, std::string> matrixMultiply(const void* v) const {
2867 auto ctx = SkRPCtxUtils::Unpack((const SkRasterPipeline_MatrixMultiplyCtx*)v);
2868 int leftMatrix = ctx.leftColumns * ctx.leftRows;
2869 int rightMatrix = ctx.rightColumns * ctx.rightRows;
2870 int resultMatrix = ctx.rightColumns * ctx.leftRows;
2871 SkRPOffset leftOffset = ctx.dst + (ctx.rightColumns * ctx.leftRows * sizeof(float) * N);
2872 SkRPOffset rightOffset = leftOffset + (ctx.leftColumns * ctx.leftRows * sizeof(float) * N);
2873 return {SkSL::String::printf("mat%dx%d(%s)",
2874 ctx.rightColumns,
2875 ctx.leftRows,
2876 this->offsetCtx(ctx.dst, resultMatrix).c_str()),
2877 SkSL::String::printf("mat%dx%d(%s)",
2878 ctx.leftColumns,
2879 ctx.leftRows,
2880 this->offsetCtx(leftOffset, leftMatrix).c_str()),
2881 SkSL::String::printf("mat%dx%d(%s)",
2882 ctx.rightColumns,
2883 ctx.rightRows,
2884 this->offsetCtx(rightOffset, rightMatrix).c_str())};
2885 }
2886
2887 private:
2888 const int N = SkOpts::raster_pipeline_highp_stride;
2889 const Program& fProgram;
2890 TArray<Stage> fStages;
2891 TArray<std::string> fSlotNameList;
2892 THashMap<int, int> fLabelToStageMap; // <label ID, stage index>
2893 SlotData fSlots;
2894 SkSpan<float> fUniforms;
2895 };
2896
dump(SkWStream * out,bool writeInstructionCount)2897 void Program::Dumper::dump(SkWStream* out, bool writeInstructionCount) {
2898 using POp = ProgramOp;
2899
2900 // Allocate memory for the slot and uniform data, even though the program won't ever be
2901 // executed. The program requires pointer ranges for managing its data, and ASAN will report
2902 // errors if those pointers are pointing at unallocated memory.
2903 SkArenaAlloc alloc(/*firstHeapAllocation=*/1000);
2904 fSlots = fProgram.allocateSlotData(&alloc).value();
2905 float* uniformPtr = alloc.makeArray<float>(fProgram.fNumUniformSlots);
2906 fUniforms = SkSpan(uniformPtr, fProgram.fNumUniformSlots);
2907
2908 // Turn this program into an array of Raster Pipeline stages.
2909 fProgram.makeStages(&fStages, &alloc, fUniforms, fSlots);
2910
2911 // Assemble lookup tables for program labels and slot names.
2912 this->buildLabelToStageMap();
2913 this->buildUniqueSlotNameList();
2914
2915 // Emit the program's instruction count.
2916 if (writeInstructionCount) {
2917 int invocationCount = 0, instructionCount = 0;
2918 for (const Stage& stage : fStages) {
2919 switch (stage.op) {
2920 case POp::label:
2921 // consumes zero instructions
2922 break;
2923
2924 case POp::invoke_shader:
2925 case POp::invoke_color_filter:
2926 case POp::invoke_blender:
2927 case POp::invoke_to_linear_srgb:
2928 case POp::invoke_from_linear_srgb:
2929 ++invocationCount;
2930 break;
2931
2932 default:
2933 ++instructionCount;
2934 break;
2935 }
2936 }
2937
2938 out->writeText(std::to_string(instructionCount).c_str());
2939 out->writeText(" instructions");
2940 if (invocationCount > 0) {
2941 out->writeText(", ");
2942 out->writeText(std::to_string(invocationCount).c_str());
2943 out->writeText(" invocations");
2944 }
2945 out->writeText("\n\n");
2946 }
2947
2948 // Emit all of the program's immutable data.
2949 const char* header = "[immutable slots]\n";
2950 const char* footer = "";
2951 for (const Instruction& inst : fProgram.fInstructions) {
2952 if (inst.fOp == BuilderOp::store_immutable_value) {
2953 out->writeText(header);
2954 out->writeText("i");
2955 out->writeText(std::to_string(inst.fSlotA).c_str());
2956 out->writeText(" = ");
2957 out->writeText(this->imm(sk_bit_cast<float>(inst.fImmA)).c_str());
2958 out->writeText("\n");
2959
2960 header = "";
2961 footer = "\n";
2962 }
2963 }
2964 out->writeText(footer);
2965
2966 // Emit the program's instruction list.
2967 for (int index = 0; index < fStages.size(); ++index) {
2968 const Stage& stage = fStages[index];
2969
2970 std::string opArg1, opArg2, opArg3, opSwizzle;
2971 switch (stage.op) {
2972 case POp::label:
2973 case POp::invoke_shader:
2974 case POp::invoke_color_filter:
2975 case POp::invoke_blender:
2976 opArg1 = this->immCtx(stage.ctx, /*showAsFloat=*/false);
2977 break;
2978
2979 case POp::case_op: {
2980 auto ctx = SkRPCtxUtils::Unpack((const SkRasterPipeline_CaseOpCtx*)stage.ctx);
2981 opArg1 = this->offsetCtx(ctx.offset, 1);
2982 opArg2 = this->offsetCtx(ctx.offset + sizeof(int32_t) * N, 1);
2983 opArg3 = this->imm(sk_bit_cast<float>(ctx.expectedValue), /*showAsFloat=*/false);
2984 break;
2985 }
2986 case POp::swizzle_1:
2987 case POp::swizzle_2:
2988 case POp::swizzle_3:
2989 case POp::swizzle_4:
2990 std::tie(opArg1, opArg2) = this->swizzleCtx(stage.op, stage.ctx);
2991 break;
2992
2993 case POp::swizzle_copy_slot_masked:
2994 case POp::swizzle_copy_2_slots_masked:
2995 case POp::swizzle_copy_3_slots_masked:
2996 case POp::swizzle_copy_4_slots_masked:
2997 std::tie(opArg1, opArg2) = this->swizzleCopyCtx(stage.op, stage.ctx);
2998 break;
2999
3000 case POp::refract_4_floats:
3001 std::tie(opArg1, opArg2) = this->adjacentPtrCtx(stage.ctx, 4);
3002 opArg3 = this->ptrCtx((const float*)(stage.ctx) + (8 * N), 1);
3003 break;
3004
3005 case POp::dot_2_floats:
3006 opArg1 = this->ptrCtx(stage.ctx, 1);
3007 std::tie(opArg2, opArg3) = this->adjacentPtrCtx(stage.ctx, 2);
3008 break;
3009
3010 case POp::dot_3_floats:
3011 opArg1 = this->ptrCtx(stage.ctx, 1);
3012 std::tie(opArg2, opArg3) = this->adjacentPtrCtx(stage.ctx, 3);
3013 break;
3014
3015 case POp::dot_4_floats:
3016 opArg1 = this->ptrCtx(stage.ctx, 1);
3017 std::tie(opArg2, opArg3) = this->adjacentPtrCtx(stage.ctx, 4);
3018 break;
3019
3020 case POp::shuffle:
3021 std::tie(opArg1, opArg2) = this->shuffleCtx(stage.ctx);
3022 break;
3023
3024 case POp::matrix_multiply_2:
3025 case POp::matrix_multiply_3:
3026 case POp::matrix_multiply_4:
3027 std::tie(opArg1, opArg2, opArg3) = this->matrixMultiply(stage.ctx);
3028 break;
3029
3030 case POp::load_condition_mask:
3031 case POp::store_condition_mask:
3032 case POp::load_loop_mask:
3033 case POp::store_loop_mask:
3034 case POp::merge_loop_mask:
3035 case POp::reenable_loop_mask:
3036 case POp::load_return_mask:
3037 case POp::store_return_mask:
3038 case POp::continue_op:
3039 case POp::cast_to_float_from_int: case POp::cast_to_float_from_uint:
3040 case POp::cast_to_int_from_float: case POp::cast_to_uint_from_float:
3041 case POp::abs_int:
3042 case POp::acos_float:
3043 case POp::asin_float:
3044 case POp::atan_float:
3045 case POp::ceil_float:
3046 case POp::cos_float:
3047 case POp::exp_float:
3048 case POp::exp2_float:
3049 case POp::log_float:
3050 case POp::log2_float:
3051 case POp::floor_float:
3052 case POp::invsqrt_float:
3053 case POp::sin_float:
3054 case POp::sqrt_float:
3055 case POp::tan_float:
3056 opArg1 = this->ptrCtx(stage.ctx, 1);
3057 break;
3058
3059 case POp::store_src_rg:
3060 case POp::cast_to_float_from_2_ints: case POp::cast_to_float_from_2_uints:
3061 case POp::cast_to_int_from_2_floats: case POp::cast_to_uint_from_2_floats:
3062 case POp::abs_2_ints:
3063 case POp::ceil_2_floats:
3064 case POp::floor_2_floats:
3065 case POp::invsqrt_2_floats:
3066 opArg1 = this->ptrCtx(stage.ctx, 2);
3067 break;
3068
3069 case POp::cast_to_float_from_3_ints: case POp::cast_to_float_from_3_uints:
3070 case POp::cast_to_int_from_3_floats: case POp::cast_to_uint_from_3_floats:
3071 case POp::abs_3_ints:
3072 case POp::ceil_3_floats:
3073 case POp::floor_3_floats:
3074 case POp::invsqrt_3_floats:
3075 opArg1 = this->ptrCtx(stage.ctx, 3);
3076 break;
3077
3078 case POp::load_src:
3079 case POp::load_dst:
3080 case POp::exchange_src:
3081 case POp::store_src:
3082 case POp::store_dst:
3083 case POp::store_device_xy01:
3084 case POp::invoke_to_linear_srgb:
3085 case POp::invoke_from_linear_srgb:
3086 case POp::cast_to_float_from_4_ints: case POp::cast_to_float_from_4_uints:
3087 case POp::cast_to_int_from_4_floats: case POp::cast_to_uint_from_4_floats:
3088 case POp::abs_4_ints:
3089 case POp::ceil_4_floats:
3090 case POp::floor_4_floats:
3091 case POp::invsqrt_4_floats:
3092 case POp::inverse_mat2:
3093 opArg1 = this->ptrCtx(stage.ctx, 4);
3094 break;
3095
3096 case POp::inverse_mat3:
3097 opArg1 = this->ptrCtx(stage.ctx, 9);
3098 break;
3099
3100 case POp::inverse_mat4:
3101 opArg1 = this->ptrCtx(stage.ctx, 16);
3102 break;
3103
3104 case POp::copy_constant:
3105 case POp::add_imm_float:
3106 case POp::mul_imm_float:
3107 case POp::cmple_imm_float:
3108 case POp::cmplt_imm_float:
3109 case POp::cmpeq_imm_float:
3110 case POp::cmpne_imm_float:
3111 case POp::min_imm_float:
3112 case POp::max_imm_float:
3113 std::tie(opArg1, opArg2) = this->constantCtx(stage.ctx, 1);
3114 break;
3115
3116 case POp::add_imm_int:
3117 case POp::mul_imm_int:
3118 case POp::bitwise_and_imm_int:
3119 case POp::bitwise_xor_imm_int:
3120 case POp::cmple_imm_int:
3121 case POp::cmple_imm_uint:
3122 case POp::cmplt_imm_int:
3123 case POp::cmplt_imm_uint:
3124 case POp::cmpeq_imm_int:
3125 case POp::cmpne_imm_int:
3126 std::tie(opArg1, opArg2) = this->constantCtx(stage.ctx, 1, /*showAsFloat=*/false);
3127 break;
3128
3129 case POp::splat_2_constants:
3130 case POp::bitwise_and_imm_2_ints:
3131 std::tie(opArg1, opArg2) = this->constantCtx(stage.ctx, 2);
3132 break;
3133
3134 case POp::splat_3_constants:
3135 case POp::bitwise_and_imm_3_ints:
3136 std::tie(opArg1, opArg2) = this->constantCtx(stage.ctx, 3);
3137 break;
3138
3139 case POp::splat_4_constants:
3140 case POp::bitwise_and_imm_4_ints:
3141 std::tie(opArg1, opArg2) = this->constantCtx(stage.ctx, 4);
3142 break;
3143
3144 case POp::copy_uniform:
3145 std::tie(opArg1, opArg2) = this->copyUniformCtx(stage.ctx, 1);
3146 break;
3147
3148 case POp::copy_2_uniforms:
3149 std::tie(opArg1, opArg2) = this->copyUniformCtx(stage.ctx, 2);
3150 break;
3151
3152 case POp::copy_3_uniforms:
3153 std::tie(opArg1, opArg2) = this->copyUniformCtx(stage.ctx, 3);
3154 break;
3155
3156 case POp::copy_4_uniforms:
3157 std::tie(opArg1, opArg2) = this->copyUniformCtx(stage.ctx, 4);
3158 break;
3159
3160 case POp::copy_slot_masked:
3161 case POp::copy_slot_unmasked:
3162 case POp::copy_immutable_unmasked:
3163 std::tie(opArg1, opArg2) = this->binaryOpCtx(stage.ctx, 1);
3164 break;
3165
3166 case POp::copy_2_slots_masked:
3167 case POp::copy_2_slots_unmasked:
3168 case POp::copy_2_immutables_unmasked:
3169 std::tie(opArg1, opArg2) = this->binaryOpCtx(stage.ctx, 2);
3170 break;
3171
3172 case POp::copy_3_slots_masked:
3173 case POp::copy_3_slots_unmasked:
3174 case POp::copy_3_immutables_unmasked:
3175 std::tie(opArg1, opArg2) = this->binaryOpCtx(stage.ctx, 3);
3176 break;
3177
3178 case POp::copy_4_slots_masked:
3179 case POp::copy_4_slots_unmasked:
3180 case POp::copy_4_immutables_unmasked:
3181 std::tie(opArg1, opArg2) = this->binaryOpCtx(stage.ctx, 4);
3182 break;
3183
3184 case POp::copy_from_indirect_uniform_unmasked:
3185 case POp::copy_from_indirect_unmasked:
3186 case POp::copy_to_indirect_masked: {
3187 const auto* ctx = static_cast<SkRasterPipeline_CopyIndirectCtx*>(stage.ctx);
3188 // We don't incorporate the indirect-limit in the output
3189 opArg1 = this->ptrCtx(ctx->dst, ctx->slots);
3190 opArg2 = this->ptrCtx(ctx->src, ctx->slots);
3191 opArg3 = this->ptrCtx(ctx->indirectOffset, 1);
3192 break;
3193 }
3194 case POp::swizzle_copy_to_indirect_masked: {
3195 const auto* ctx = static_cast<SkRasterPipeline_SwizzleCopyIndirectCtx*>(stage.ctx);
3196 opArg1 = this->ptrCtx(ctx->dst, this->swizzleWidth(SkSpan(ctx->offsets,
3197 ctx->slots)));
3198 opArg2 = this->ptrCtx(ctx->src, ctx->slots);
3199 opArg3 = this->ptrCtx(ctx->indirectOffset, 1);
3200 opSwizzle = this->swizzleOffsetSpan(SkSpan(ctx->offsets, ctx->slots));
3201 break;
3202 }
3203 case POp::merge_condition_mask:
3204 case POp::merge_inv_condition_mask:
3205 case POp::add_float: case POp::add_int:
3206 case POp::sub_float: case POp::sub_int:
3207 case POp::mul_float: case POp::mul_int:
3208 case POp::div_float: case POp::div_int: case POp::div_uint:
3209 case POp::bitwise_and_int:
3210 case POp::bitwise_or_int:
3211 case POp::bitwise_xor_int:
3212 case POp::mod_float:
3213 case POp::min_float: case POp::min_int: case POp::min_uint:
3214 case POp::max_float: case POp::max_int: case POp::max_uint:
3215 case POp::cmplt_float: case POp::cmplt_int: case POp::cmplt_uint:
3216 case POp::cmple_float: case POp::cmple_int: case POp::cmple_uint:
3217 case POp::cmpeq_float: case POp::cmpeq_int:
3218 case POp::cmpne_float: case POp::cmpne_int:
3219 std::tie(opArg1, opArg2) = this->adjacentPtrCtx(stage.ctx, 1);
3220 break;
3221
3222 case POp::mix_float: case POp::mix_int:
3223 std::tie(opArg1, opArg2, opArg3) = this->adjacent3PtrCtx(stage.ctx, 1);
3224 break;
3225
3226 case POp::add_2_floats: case POp::add_2_ints:
3227 case POp::sub_2_floats: case POp::sub_2_ints:
3228 case POp::mul_2_floats: case POp::mul_2_ints:
3229 case POp::div_2_floats: case POp::div_2_ints: case POp::div_2_uints:
3230 case POp::bitwise_and_2_ints:
3231 case POp::bitwise_or_2_ints:
3232 case POp::bitwise_xor_2_ints:
3233 case POp::mod_2_floats:
3234 case POp::min_2_floats: case POp::min_2_ints: case POp::min_2_uints:
3235 case POp::max_2_floats: case POp::max_2_ints: case POp::max_2_uints:
3236 case POp::cmplt_2_floats: case POp::cmplt_2_ints: case POp::cmplt_2_uints:
3237 case POp::cmple_2_floats: case POp::cmple_2_ints: case POp::cmple_2_uints:
3238 case POp::cmpeq_2_floats: case POp::cmpeq_2_ints:
3239 case POp::cmpne_2_floats: case POp::cmpne_2_ints:
3240 std::tie(opArg1, opArg2) = this->adjacentPtrCtx(stage.ctx, 2);
3241 break;
3242
3243 case POp::mix_2_floats: case POp::mix_2_ints:
3244 std::tie(opArg1, opArg2, opArg3) = this->adjacent3PtrCtx(stage.ctx, 2);
3245 break;
3246
3247 case POp::add_3_floats: case POp::add_3_ints:
3248 case POp::sub_3_floats: case POp::sub_3_ints:
3249 case POp::mul_3_floats: case POp::mul_3_ints:
3250 case POp::div_3_floats: case POp::div_3_ints: case POp::div_3_uints:
3251 case POp::bitwise_and_3_ints:
3252 case POp::bitwise_or_3_ints:
3253 case POp::bitwise_xor_3_ints:
3254 case POp::mod_3_floats:
3255 case POp::min_3_floats: case POp::min_3_ints: case POp::min_3_uints:
3256 case POp::max_3_floats: case POp::max_3_ints: case POp::max_3_uints:
3257 case POp::cmplt_3_floats: case POp::cmplt_3_ints: case POp::cmplt_3_uints:
3258 case POp::cmple_3_floats: case POp::cmple_3_ints: case POp::cmple_3_uints:
3259 case POp::cmpeq_3_floats: case POp::cmpeq_3_ints:
3260 case POp::cmpne_3_floats: case POp::cmpne_3_ints:
3261 std::tie(opArg1, opArg2) = this->adjacentPtrCtx(stage.ctx, 3);
3262 break;
3263
3264 case POp::mix_3_floats: case POp::mix_3_ints:
3265 std::tie(opArg1, opArg2, opArg3) = this->adjacent3PtrCtx(stage.ctx, 3);
3266 break;
3267
3268 case POp::add_4_floats: case POp::add_4_ints:
3269 case POp::sub_4_floats: case POp::sub_4_ints:
3270 case POp::mul_4_floats: case POp::mul_4_ints:
3271 case POp::div_4_floats: case POp::div_4_ints: case POp::div_4_uints:
3272 case POp::bitwise_and_4_ints:
3273 case POp::bitwise_or_4_ints:
3274 case POp::bitwise_xor_4_ints:
3275 case POp::mod_4_floats:
3276 case POp::min_4_floats: case POp::min_4_ints: case POp::min_4_uints:
3277 case POp::max_4_floats: case POp::max_4_ints: case POp::max_4_uints:
3278 case POp::cmplt_4_floats: case POp::cmplt_4_ints: case POp::cmplt_4_uints:
3279 case POp::cmple_4_floats: case POp::cmple_4_ints: case POp::cmple_4_uints:
3280 case POp::cmpeq_4_floats: case POp::cmpeq_4_ints:
3281 case POp::cmpne_4_floats: case POp::cmpne_4_ints:
3282 std::tie(opArg1, opArg2) = this->adjacentPtrCtx(stage.ctx, 4);
3283 break;
3284
3285 case POp::mix_4_floats: case POp::mix_4_ints:
3286 std::tie(opArg1, opArg2, opArg3) = this->adjacent3PtrCtx(stage.ctx, 4);
3287 break;
3288
3289 case POp::add_n_floats: case POp::add_n_ints:
3290 case POp::sub_n_floats: case POp::sub_n_ints:
3291 case POp::mul_n_floats: case POp::mul_n_ints:
3292 case POp::div_n_floats: case POp::div_n_ints: case POp::div_n_uints:
3293 case POp::bitwise_and_n_ints:
3294 case POp::bitwise_or_n_ints:
3295 case POp::bitwise_xor_n_ints:
3296 case POp::mod_n_floats:
3297 case POp::min_n_floats: case POp::min_n_ints: case POp::min_n_uints:
3298 case POp::max_n_floats: case POp::max_n_ints: case POp::max_n_uints:
3299 case POp::cmplt_n_floats: case POp::cmplt_n_ints: case POp::cmplt_n_uints:
3300 case POp::cmple_n_floats: case POp::cmple_n_ints: case POp::cmple_n_uints:
3301 case POp::cmpeq_n_floats: case POp::cmpeq_n_ints:
3302 case POp::cmpne_n_floats: case POp::cmpne_n_ints:
3303 case POp::atan2_n_floats:
3304 case POp::pow_n_floats:
3305 std::tie(opArg1, opArg2) = this->adjacentBinaryOpCtx(stage.ctx);
3306 break;
3307
3308 case POp::mix_n_floats: case POp::mix_n_ints:
3309 case POp::smoothstep_n_floats:
3310 std::tie(opArg1, opArg2, opArg3) = this->adjacentTernaryOpCtx(stage.ctx);
3311 break;
3312
3313 case POp::jump:
3314 case POp::branch_if_all_lanes_active:
3315 case POp::branch_if_any_lanes_active:
3316 case POp::branch_if_no_lanes_active:
3317 opArg1 = this->branchOffset(static_cast<SkRasterPipeline_BranchCtx*>(stage.ctx),
3318 index);
3319 break;
3320
3321 case POp::branch_if_no_active_lanes_eq: {
3322 const auto* ctx = static_cast<SkRasterPipeline_BranchIfEqualCtx*>(stage.ctx);
3323 opArg1 = this->branchOffset(ctx, index);
3324 opArg2 = this->ptrCtx(ctx->ptr, 1);
3325 opArg3 = this->imm(sk_bit_cast<float>(ctx->value));
3326 break;
3327 }
3328 case POp::trace_var: {
3329 const auto* ctx = static_cast<SkRasterPipeline_TraceVarCtx*>(stage.ctx);
3330 opArg1 = this->ptrCtx(ctx->traceMask, 1);
3331 opArg2 = this->ptrCtx(ctx->data, ctx->numSlots);
3332 if (ctx->indirectOffset != nullptr) {
3333 opArg3 = " + " + this->ptrCtx(ctx->indirectOffset, 1);
3334 }
3335 break;
3336 }
3337 case POp::trace_line: {
3338 const auto* ctx = static_cast<SkRasterPipeline_TraceLineCtx*>(stage.ctx);
3339 opArg1 = this->ptrCtx(ctx->traceMask, 1);
3340 opArg2 = std::to_string(ctx->lineNumber);
3341 break;
3342 }
3343 case POp::trace_enter:
3344 case POp::trace_exit: {
3345 const auto* ctx = static_cast<SkRasterPipeline_TraceFuncCtx*>(stage.ctx);
3346 opArg1 = this->ptrCtx(ctx->traceMask, 1);
3347 opArg2 = (fProgram.fDebugTrace &&
3348 ctx->funcIdx >= 0 &&
3349 ctx->funcIdx < (int)fProgram.fDebugTrace->fFuncInfo.size())
3350 ? fProgram.fDebugTrace->fFuncInfo[ctx->funcIdx].name
3351 : "???";
3352 break;
3353 }
3354 case POp::trace_scope: {
3355 const auto* ctx = static_cast<SkRasterPipeline_TraceScopeCtx*>(stage.ctx);
3356 opArg1 = this->ptrCtx(ctx->traceMask, 1);
3357 opArg2 = SkSL::String::printf("%+d", ctx->delta);
3358 break;
3359 }
3360 default:
3361 break;
3362 }
3363
3364 std::string_view opName;
3365 switch (stage.op) {
3366 #define M(x) case POp::x: opName = #x; break;
3367 SK_RASTER_PIPELINE_OPS_ALL(M)
3368 SKRP_EXTENDED_OPS(M)
3369 #undef M
3370 }
3371
3372 std::string opText;
3373 switch (stage.op) {
3374 case POp::trace_var:
3375 opText = "TraceVar(" + opArg2 + opArg3 + ") when " + opArg1 + " is true";
3376 break;
3377
3378 case POp::trace_line:
3379 opText = "TraceLine(" + opArg2 + ") when " + opArg1 + " is true";
3380 break;
3381
3382 case POp::trace_enter:
3383 opText = "TraceEnter(" + opArg2 + ") when " + opArg1 + " is true";
3384 break;
3385
3386 case POp::trace_exit:
3387 opText = "TraceExit(" + opArg2 + ") when " + opArg1 + " is true";
3388 break;
3389
3390 case POp::trace_scope:
3391 opText = "TraceScope(" + opArg2 + ") when " + opArg1 + " is true";
3392 break;
3393
3394 case POp::init_lane_masks:
3395 opText = "CondMask = LoopMask = RetMask = true";
3396 break;
3397
3398 case POp::load_condition_mask:
3399 opText = "CondMask = " + opArg1;
3400 break;
3401
3402 case POp::store_condition_mask:
3403 opText = opArg1 + " = CondMask";
3404 break;
3405
3406 case POp::merge_condition_mask:
3407 opText = "CondMask = " + opArg1 + " & " + opArg2;
3408 break;
3409
3410 case POp::merge_inv_condition_mask:
3411 opText = "CondMask = " + opArg1 + " & ~" + opArg2;
3412 break;
3413
3414 case POp::load_loop_mask:
3415 opText = "LoopMask = " + opArg1;
3416 break;
3417
3418 case POp::store_loop_mask:
3419 opText = opArg1 + " = LoopMask";
3420 break;
3421
3422 case POp::mask_off_loop_mask:
3423 opText = "LoopMask &= ~(CondMask & LoopMask & RetMask)";
3424 break;
3425
3426 case POp::reenable_loop_mask:
3427 opText = "LoopMask |= " + opArg1;
3428 break;
3429
3430 case POp::merge_loop_mask:
3431 opText = "LoopMask &= " + opArg1;
3432 break;
3433
3434 case POp::load_return_mask:
3435 opText = "RetMask = " + opArg1;
3436 break;
3437
3438 case POp::store_return_mask:
3439 opText = opArg1 + " = RetMask";
3440 break;
3441
3442 case POp::mask_off_return_mask:
3443 opText = "RetMask &= ~(CondMask & LoopMask & RetMask)";
3444 break;
3445
3446 case POp::store_src_rg:
3447 opText = opArg1 + " = src.rg";
3448 break;
3449
3450 case POp::exchange_src:
3451 opText = "swap(src.rgba, " + opArg1 + ")";
3452 break;
3453
3454 case POp::store_src:
3455 opText = opArg1 + " = src.rgba";
3456 break;
3457
3458 case POp::store_dst:
3459 opText = opArg1 + " = dst.rgba";
3460 break;
3461
3462 case POp::store_device_xy01:
3463 opText = opArg1 + " = DeviceCoords.xy01";
3464 break;
3465
3466 case POp::load_src:
3467 opText = "src.rgba = " + opArg1;
3468 break;
3469
3470 case POp::load_dst:
3471 opText = "dst.rgba = " + opArg1;
3472 break;
3473
3474 case POp::bitwise_and_int:
3475 case POp::bitwise_and_2_ints:
3476 case POp::bitwise_and_3_ints:
3477 case POp::bitwise_and_4_ints:
3478 case POp::bitwise_and_n_ints:
3479 case POp::bitwise_and_imm_int:
3480 case POp::bitwise_and_imm_2_ints:
3481 case POp::bitwise_and_imm_3_ints:
3482 case POp::bitwise_and_imm_4_ints:
3483 opText = opArg1 + " &= " + opArg2;
3484 break;
3485
3486 case POp::bitwise_or_int:
3487 case POp::bitwise_or_2_ints:
3488 case POp::bitwise_or_3_ints:
3489 case POp::bitwise_or_4_ints:
3490 case POp::bitwise_or_n_ints:
3491 opText = opArg1 + " |= " + opArg2;
3492 break;
3493
3494 case POp::bitwise_xor_int:
3495 case POp::bitwise_xor_2_ints:
3496 case POp::bitwise_xor_3_ints:
3497 case POp::bitwise_xor_4_ints:
3498 case POp::bitwise_xor_n_ints:
3499 case POp::bitwise_xor_imm_int:
3500 opText = opArg1 + " ^= " + opArg2;
3501 break;
3502
3503 case POp::cast_to_float_from_int:
3504 case POp::cast_to_float_from_2_ints:
3505 case POp::cast_to_float_from_3_ints:
3506 case POp::cast_to_float_from_4_ints:
3507 opText = opArg1 + " = IntToFloat(" + opArg1 + ")";
3508 break;
3509
3510 case POp::cast_to_float_from_uint:
3511 case POp::cast_to_float_from_2_uints:
3512 case POp::cast_to_float_from_3_uints:
3513 case POp::cast_to_float_from_4_uints:
3514 opText = opArg1 + " = UintToFloat(" + opArg1 + ")";
3515 break;
3516
3517 case POp::cast_to_int_from_float:
3518 case POp::cast_to_int_from_2_floats:
3519 case POp::cast_to_int_from_3_floats:
3520 case POp::cast_to_int_from_4_floats:
3521 opText = opArg1 + " = FloatToInt(" + opArg1 + ")";
3522 break;
3523
3524 case POp::cast_to_uint_from_float:
3525 case POp::cast_to_uint_from_2_floats:
3526 case POp::cast_to_uint_from_3_floats:
3527 case POp::cast_to_uint_from_4_floats:
3528 opText = opArg1 + " = FloatToUint(" + opArg1 + ")";
3529 break;
3530
3531 case POp::copy_slot_masked: case POp::copy_2_slots_masked:
3532 case POp::copy_3_slots_masked: case POp::copy_4_slots_masked:
3533 case POp::swizzle_copy_slot_masked: case POp::swizzle_copy_2_slots_masked:
3534 case POp::swizzle_copy_3_slots_masked: case POp::swizzle_copy_4_slots_masked:
3535 opText = opArg1 + " = Mask(" + opArg2 + ")";
3536 break;
3537
3538 case POp::copy_uniform: case POp::copy_2_uniforms:
3539 case POp::copy_3_uniforms: case POp::copy_4_uniforms:
3540 case POp::copy_slot_unmasked: case POp::copy_2_slots_unmasked:
3541 case POp::copy_3_slots_unmasked: case POp::copy_4_slots_unmasked:
3542 case POp::copy_immutable_unmasked: case POp::copy_2_immutables_unmasked:
3543 case POp::copy_3_immutables_unmasked: case POp::copy_4_immutables_unmasked:
3544 case POp::copy_constant: case POp::splat_2_constants:
3545 case POp::splat_3_constants: case POp::splat_4_constants:
3546 case POp::swizzle_1: case POp::swizzle_2:
3547 case POp::swizzle_3: case POp::swizzle_4:
3548 case POp::shuffle:
3549 opText = opArg1 + " = " + opArg2;
3550 break;
3551
3552 case POp::copy_from_indirect_unmasked:
3553 case POp::copy_from_indirect_uniform_unmasked:
3554 opText = opArg1 + " = Indirect(" + opArg2 + " + " + opArg3 + ")";
3555 break;
3556
3557 case POp::copy_to_indirect_masked:
3558 opText = "Indirect(" + opArg1 + " + " + opArg3 + ") = Mask(" + opArg2 + ")";
3559 break;
3560
3561 case POp::swizzle_copy_to_indirect_masked:
3562 opText = "Indirect(" + opArg1 + " + " + opArg3 + ")." + opSwizzle + " = Mask(" +
3563 opArg2 + ")";
3564 break;
3565
3566 case POp::abs_int:
3567 case POp::abs_2_ints:
3568 case POp::abs_3_ints:
3569 case POp::abs_4_ints:
3570 opText = opArg1 + " = abs(" + opArg1 + ")";
3571 break;
3572
3573 case POp::acos_float:
3574 opText = opArg1 + " = acos(" + opArg1 + ")";
3575 break;
3576
3577 case POp::asin_float:
3578 opText = opArg1 + " = asin(" + opArg1 + ")";
3579 break;
3580
3581 case POp::atan_float:
3582 opText = opArg1 + " = atan(" + opArg1 + ")";
3583 break;
3584
3585 case POp::atan2_n_floats:
3586 opText = opArg1 + " = atan2(" + opArg1 + ", " + opArg2 + ")";
3587 break;
3588
3589 case POp::ceil_float:
3590 case POp::ceil_2_floats:
3591 case POp::ceil_3_floats:
3592 case POp::ceil_4_floats:
3593 opText = opArg1 + " = ceil(" + opArg1 + ")";
3594 break;
3595
3596 case POp::cos_float:
3597 opText = opArg1 + " = cos(" + opArg1 + ")";
3598 break;
3599
3600 case POp::refract_4_floats:
3601 opText = opArg1 + " = refract(" + opArg1 + ", " + opArg2 + ", " + opArg3 + ")";
3602 break;
3603
3604 case POp::dot_2_floats:
3605 case POp::dot_3_floats:
3606 case POp::dot_4_floats:
3607 opText = opArg1 + " = dot(" + opArg2 + ", " + opArg3 + ")";
3608 break;
3609
3610 case POp::exp_float:
3611 opText = opArg1 + " = exp(" + opArg1 + ")";
3612 break;
3613
3614 case POp::exp2_float:
3615 opText = opArg1 + " = exp2(" + opArg1 + ")";
3616 break;
3617
3618 case POp::log_float:
3619 opText = opArg1 + " = log(" + opArg1 + ")";
3620 break;
3621
3622 case POp::log2_float:
3623 opText = opArg1 + " = log2(" + opArg1 + ")";
3624 break;
3625
3626 case POp::pow_n_floats:
3627 opText = opArg1 + " = pow(" + opArg1 + ", " + opArg2 + ")";
3628 break;
3629
3630 case POp::sin_float:
3631 opText = opArg1 + " = sin(" + opArg1 + ")";
3632 break;
3633
3634 case POp::sqrt_float:
3635 opText = opArg1 + " = sqrt(" + opArg1 + ")";
3636 break;
3637
3638 case POp::tan_float:
3639 opText = opArg1 + " = tan(" + opArg1 + ")";
3640 break;
3641
3642 case POp::floor_float:
3643 case POp::floor_2_floats:
3644 case POp::floor_3_floats:
3645 case POp::floor_4_floats:
3646 opText = opArg1 + " = floor(" + opArg1 + ")";
3647 break;
3648
3649 case POp::invsqrt_float:
3650 case POp::invsqrt_2_floats:
3651 case POp::invsqrt_3_floats:
3652 case POp::invsqrt_4_floats:
3653 opText = opArg1 + " = inversesqrt(" + opArg1 + ")";
3654 break;
3655
3656 case POp::inverse_mat2:
3657 case POp::inverse_mat3:
3658 case POp::inverse_mat4:
3659 opText = opArg1 + " = inverse(" + opArg1 + ")";
3660 break;
3661
3662 case POp::add_float: case POp::add_int:
3663 case POp::add_2_floats: case POp::add_2_ints:
3664 case POp::add_3_floats: case POp::add_3_ints:
3665 case POp::add_4_floats: case POp::add_4_ints:
3666 case POp::add_n_floats: case POp::add_n_ints:
3667 case POp::add_imm_float: case POp::add_imm_int:
3668 opText = opArg1 + " += " + opArg2;
3669 break;
3670
3671 case POp::sub_float: case POp::sub_int:
3672 case POp::sub_2_floats: case POp::sub_2_ints:
3673 case POp::sub_3_floats: case POp::sub_3_ints:
3674 case POp::sub_4_floats: case POp::sub_4_ints:
3675 case POp::sub_n_floats: case POp::sub_n_ints:
3676 opText = opArg1 + " -= " + opArg2;
3677 break;
3678
3679 case POp::mul_float: case POp::mul_int:
3680 case POp::mul_2_floats: case POp::mul_2_ints:
3681 case POp::mul_3_floats: case POp::mul_3_ints:
3682 case POp::mul_4_floats: case POp::mul_4_ints:
3683 case POp::mul_n_floats: case POp::mul_n_ints:
3684 case POp::mul_imm_float: case POp::mul_imm_int:
3685 opText = opArg1 + " *= " + opArg2;
3686 break;
3687
3688 case POp::div_float: case POp::div_int: case POp::div_uint:
3689 case POp::div_2_floats: case POp::div_2_ints: case POp::div_2_uints:
3690 case POp::div_3_floats: case POp::div_3_ints: case POp::div_3_uints:
3691 case POp::div_4_floats: case POp::div_4_ints: case POp::div_4_uints:
3692 case POp::div_n_floats: case POp::div_n_ints: case POp::div_n_uints:
3693 opText = opArg1 + " /= " + opArg2;
3694 break;
3695
3696 case POp::matrix_multiply_2:
3697 case POp::matrix_multiply_3:
3698 case POp::matrix_multiply_4:
3699 opText = opArg1 + " = " + opArg2 + " * " + opArg3;
3700 break;
3701
3702 case POp::mod_float:
3703 case POp::mod_2_floats:
3704 case POp::mod_3_floats:
3705 case POp::mod_4_floats:
3706 case POp::mod_n_floats:
3707 opText = opArg1 + " = mod(" + opArg1 + ", " + opArg2 + ")";
3708 break;
3709
3710 case POp::min_float: case POp::min_int: case POp::min_uint:
3711 case POp::min_2_floats: case POp::min_2_ints: case POp::min_2_uints:
3712 case POp::min_3_floats: case POp::min_3_ints: case POp::min_3_uints:
3713 case POp::min_4_floats: case POp::min_4_ints: case POp::min_4_uints:
3714 case POp::min_n_floats: case POp::min_n_ints: case POp::min_n_uints:
3715 case POp::min_imm_float:
3716 opText = opArg1 + " = min(" + opArg1 + ", " + opArg2 + ")";
3717 break;
3718
3719 case POp::max_float: case POp::max_int: case POp::max_uint:
3720 case POp::max_2_floats: case POp::max_2_ints: case POp::max_2_uints:
3721 case POp::max_3_floats: case POp::max_3_ints: case POp::max_3_uints:
3722 case POp::max_4_floats: case POp::max_4_ints: case POp::max_4_uints:
3723 case POp::max_n_floats: case POp::max_n_ints: case POp::max_n_uints:
3724 case POp::max_imm_float:
3725 opText = opArg1 + " = max(" + opArg1 + ", " + opArg2 + ")";
3726 break;
3727
3728 case POp::cmplt_float: case POp::cmplt_int: case POp::cmplt_uint:
3729 case POp::cmplt_2_floats: case POp::cmplt_2_ints: case POp::cmplt_2_uints:
3730 case POp::cmplt_3_floats: case POp::cmplt_3_ints: case POp::cmplt_3_uints:
3731 case POp::cmplt_4_floats: case POp::cmplt_4_ints: case POp::cmplt_4_uints:
3732 case POp::cmplt_n_floats: case POp::cmplt_n_ints: case POp::cmplt_n_uints:
3733 case POp::cmplt_imm_float: case POp::cmplt_imm_int: case POp::cmplt_imm_uint:
3734 opText = opArg1 + " = lessThan(" + opArg1 + ", " + opArg2 + ")";
3735 break;
3736
3737 case POp::cmple_float: case POp::cmple_int: case POp::cmple_uint:
3738 case POp::cmple_2_floats: case POp::cmple_2_ints: case POp::cmple_2_uints:
3739 case POp::cmple_3_floats: case POp::cmple_3_ints: case POp::cmple_3_uints:
3740 case POp::cmple_4_floats: case POp::cmple_4_ints: case POp::cmple_4_uints:
3741 case POp::cmple_n_floats: case POp::cmple_n_ints: case POp::cmple_n_uints:
3742 case POp::cmple_imm_float: case POp::cmple_imm_int: case POp::cmple_imm_uint:
3743 opText = opArg1 + " = lessThanEqual(" + opArg1 + ", " + opArg2 + ")";
3744 break;
3745
3746 case POp::cmpeq_float: case POp::cmpeq_int:
3747 case POp::cmpeq_2_floats: case POp::cmpeq_2_ints:
3748 case POp::cmpeq_3_floats: case POp::cmpeq_3_ints:
3749 case POp::cmpeq_4_floats: case POp::cmpeq_4_ints:
3750 case POp::cmpeq_n_floats: case POp::cmpeq_n_ints:
3751 case POp::cmpeq_imm_float: case POp::cmpeq_imm_int:
3752 opText = opArg1 + " = equal(" + opArg1 + ", " + opArg2 + ")";
3753 break;
3754
3755 case POp::cmpne_float: case POp::cmpne_int:
3756 case POp::cmpne_2_floats: case POp::cmpne_2_ints:
3757 case POp::cmpne_3_floats: case POp::cmpne_3_ints:
3758 case POp::cmpne_4_floats: case POp::cmpne_4_ints:
3759 case POp::cmpne_n_floats: case POp::cmpne_n_ints:
3760 case POp::cmpne_imm_float: case POp::cmpne_imm_int:
3761 opText = opArg1 + " = notEqual(" + opArg1 + ", " + opArg2 + ")";
3762 break;
3763
3764 case POp::mix_float: case POp::mix_int:
3765 case POp::mix_2_floats: case POp::mix_2_ints:
3766 case POp::mix_3_floats: case POp::mix_3_ints:
3767 case POp::mix_4_floats: case POp::mix_4_ints:
3768 case POp::mix_n_floats: case POp::mix_n_ints:
3769 opText = opArg1 + " = mix(" + opArg2 + ", " + opArg3 + ", " + opArg1 + ")";
3770 break;
3771
3772 case POp::smoothstep_n_floats:
3773 opText = opArg1 + " = smoothstep(" + opArg1 + ", " + opArg2 + ", " + opArg3 + ")";
3774 break;
3775
3776 case POp::jump:
3777 case POp::branch_if_all_lanes_active:
3778 case POp::branch_if_any_lanes_active:
3779 case POp::branch_if_no_lanes_active:
3780 case POp::invoke_shader:
3781 case POp::invoke_color_filter:
3782 case POp::invoke_blender:
3783 opText = std::string(opName) + " " + opArg1;
3784 break;
3785
3786 case POp::invoke_to_linear_srgb:
3787 opText = opArg1 + " = toLinearSrgb(" + opArg1 + ")";
3788 break;
3789
3790 case POp::invoke_from_linear_srgb:
3791 opText = opArg1 + " = fromLinearSrgb(" + opArg1 + ")";
3792 break;
3793
3794 case POp::branch_if_no_active_lanes_eq:
3795 opText = "branch " + opArg1 + " if no lanes of " + opArg2 + " == " + opArg3;
3796 break;
3797
3798 case POp::label:
3799 opText = "label " + opArg1;
3800 break;
3801
3802 case POp::case_op:
3803 opText = "if (" + opArg1 + " == " + opArg3 +
3804 ") { LoopMask = true; " + opArg2 + " = false; }";
3805 break;
3806
3807 case POp::continue_op:
3808 opText = opArg1 +
3809 " |= Mask(0xFFFFFFFF); LoopMask &= ~(CondMask & LoopMask & RetMask)";
3810 break;
3811
3812 default:
3813 break;
3814 }
3815
3816 opName = opName.substr(0, 30);
3817 if (!opText.empty()) {
3818 out->writeText(SkSL::String::printf("%-30.*s %s\n",
3819 (int)opName.size(), opName.data(),
3820 opText.c_str()).c_str());
3821 } else {
3822 out->writeText(SkSL::String::printf("%.*s\n",
3823 (int)opName.size(), opName.data()).c_str());
3824 }
3825 }
3826 }
3827
dump(SkWStream * out,bool writeInstructionCount) const3828 void Program::dump(SkWStream* out, bool writeInstructionCount) const {
3829 Dumper(*this).dump(out, writeInstructionCount);
3830 }
3831
3832 } // namespace SkSL::RP
3833