1 /* 2 * Copyright 2022 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #ifndef SKSL_RASTERPIPELINEBUILDER 9 #define SKSL_RASTERPIPELINEBUILDER 10 11 #include "include/core/SkTypes.h" 12 13 #include "include/core/SkSpan.h" 14 #include "include/core/SkTypes.h" 15 #include "include/private/base/SkTArray.h" 16 #include "src/base/SkUtils.h" 17 #include "src/core/SkRasterPipelineOpList.h" 18 19 #include <cstddef> 20 #include <cstdint> 21 #include <memory> 22 #include <optional> 23 24 class SkArenaAlloc; 25 class SkRasterPipeline; 26 class SkWStream; 27 using SkRPOffset = uint32_t; 28 29 namespace SkSL { 30 31 class DebugTracePriv; 32 class TraceHook; 33 34 namespace RP { 35 36 // A single scalar in our program consumes one slot. 37 using Slot = int; 38 constexpr Slot NA = -1; 39 40 // Scalars, vectors, and matrices can be represented as a range of slot indices. 41 struct SlotRange { 42 Slot index = 0; 43 int count = 0; 44 }; 45 46 #define SKRP_EXTENDED_OPS(M) \ 47 /* branch targets */ \ 48 M(label) \ 49 \ 50 /* child programs */ \ 51 M(invoke_shader) \ 52 M(invoke_color_filter) \ 53 M(invoke_blender) \ 54 \ 55 /* color space transforms */ \ 56 M(invoke_to_linear_srgb) \ 57 M(invoke_from_linear_srgb) 58 59 // An RP::Program will consist entirely of ProgramOps. The ProgramOps list is a superset of the 60 // native SkRasterPipelineOps op-list. It also has a few extra ops to indicate child-effect 61 // invocation, and a `label` op to indicate branch targets. 62 enum class ProgramOp { 63 #define M(stage) stage, 64 // A finished program can contain any native Raster Pipeline op... 65 SK_RASTER_PIPELINE_OPS_ALL(M) 66 67 // ... as well as our extended ops. 68 SKRP_EXTENDED_OPS(M) 69 #undef M 70 }; 71 72 // BuilderOps are a superset of ProgramOps. They are used by the RP::Builder, which works in terms 73 // of Instructions; Instructions are slightly more expressive than raw SkRasterPipelineOps. In 74 // particular, the Builder supports stacks for pushing and popping scratch values. 75 // RP::Program::makeStages is responsible for rewriting Instructions/BuilderOps into an array of 76 // RP::Program::Stages, which will contain only native SkRasterPipelineOps and (optionally) 77 // child-effect invocations. 78 enum class BuilderOp { 79 #define M(stage) stage, 80 // An in-flight program can contain all the native Raster Pipeline ops... 81 SK_RASTER_PIPELINE_OPS_ALL(M) 82 83 // ... and our extended ops... 84 SKRP_EXTENDED_OPS(M) 85 #undef M 86 87 // ... and also has Builder-specific ops. These ops generally interface with the stack, and are 88 // converted into ProgramOps during `makeStages`. 89 push_clone, 90 push_clone_from_stack, 91 push_clone_indirect_from_stack, 92 push_constant, 93 push_immutable, 94 push_immutable_indirect, 95 push_slots, 96 push_slots_indirect, 97 push_uniform, 98 push_uniform_indirect, 99 copy_stack_to_slots, 100 copy_stack_to_slots_unmasked, 101 copy_stack_to_slots_indirect, 102 copy_uniform_to_slots_unmasked, 103 store_immutable_value, 104 swizzle_copy_stack_to_slots, 105 swizzle_copy_stack_to_slots_indirect, 106 discard_stack, 107 pad_stack, 108 select, 109 push_condition_mask, 110 pop_condition_mask, 111 push_loop_mask, 112 pop_loop_mask, 113 pop_and_reenable_loop_mask, 114 push_return_mask, 115 pop_return_mask, 116 push_src_rgba, 117 push_dst_rgba, 118 push_device_xy01, 119 pop_src_rgba, 120 pop_dst_rgba, 121 trace_var_indirect, 122 branch_if_no_active_lanes_on_stack_top_equal, 123 unsupported 124 }; 125 126 // If the extended ops are not in sync between enums, program creation will not work. 127 static_assert((int)ProgramOp::label == (int)BuilderOp::label); 128 129 // Represents a single raster-pipeline SkSL instruction. 130 struct Instruction { 131 BuilderOp fOp; 132 Slot fSlotA = NA; 133 Slot fSlotB = NA; 134 int fImmA = 0; 135 int fImmB = 0; 136 int fImmC = 0; 137 int fImmD = 0; 138 int fStackID = 0; 139 }; 140 141 class Callbacks { 142 public: 143 virtual ~Callbacks() = default; 144 145 virtual bool appendShader(int index) = 0; 146 virtual bool appendColorFilter(int index) = 0; 147 virtual bool appendBlender(int index) = 0; 148 149 virtual void toLinearSrgb(const void* color) = 0; 150 virtual void fromLinearSrgb(const void* color) = 0; 151 }; 152 153 class Program { 154 public: 155 Program(skia_private::TArray<Instruction> instrs, 156 int numValueSlots, 157 int numUniformSlots, 158 int numImmutableSlots, 159 int numLabels, 160 DebugTracePriv* debugTrace); 161 ~Program(); 162 163 bool appendStages(SkRasterPipeline* pipeline, 164 SkArenaAlloc* alloc, 165 Callbacks* callbacks, 166 SkSpan<const float> uniforms) const; 167 168 void dump(SkWStream* out, bool writeInstructionCount = false) const; 169 numUniforms()170 int numUniforms() const { return fNumUniformSlots; } 171 172 private: 173 using StackDepths = skia_private::TArray<int>; // [stack index] = depth of stack 174 175 struct SlotData { 176 SkSpan<float> values; 177 SkSpan<float> stack; 178 SkSpan<float> immutable; 179 }; 180 std::optional<SlotData> allocateSlotData(SkArenaAlloc* alloc) const; 181 182 struct Stage { 183 ProgramOp op; 184 void* ctx; 185 }; 186 void makeStages(skia_private::TArray<Stage>* pipeline, 187 SkArenaAlloc* alloc, 188 SkSpan<const float> uniforms, 189 const SlotData& slots) const; 190 StackDepths tempStackMaxDepths() const; 191 192 // These methods are used to split up multi-slot copies into multiple ops as needed. 193 void appendCopy(skia_private::TArray<Stage>* pipeline, 194 SkArenaAlloc* alloc, 195 std::byte* basePtr, 196 ProgramOp baseStage, 197 SkRPOffset dst, int dstStride, 198 SkRPOffset src, int srcStride, 199 int numSlots) const; 200 void appendCopyImmutableUnmasked(skia_private::TArray<Stage>* pipeline, 201 SkArenaAlloc* alloc, 202 std::byte* basePtr, 203 SkRPOffset dst, 204 SkRPOffset src, 205 int numSlots) const; 206 void appendCopySlotsUnmasked(skia_private::TArray<Stage>* pipeline, 207 SkArenaAlloc* alloc, 208 SkRPOffset dst, 209 SkRPOffset src, 210 int numSlots) const; 211 void appendCopySlotsMasked(skia_private::TArray<Stage>* pipeline, 212 SkArenaAlloc* alloc, 213 SkRPOffset dst, 214 SkRPOffset src, 215 int numSlots) const; 216 217 // Appends a single-slot single-input math operation to the pipeline. The op `stage` will 218 // appended `numSlots` times, starting at position `dst` and advancing one slot for each 219 // subsequent invocation. 220 void appendSingleSlotUnaryOp(skia_private::TArray<Stage>* pipeline, ProgramOp stage, 221 float* dst, int numSlots) const; 222 223 // Appends a multi-slot single-input math operation to the pipeline. `baseStage` must refer to 224 // a single-slot "apply_op" stage, which must be immediately followed by specializations for 225 // 2-4 slots. For instance, {`ceil_float`, `ceil_2_floats`, `ceil_3_floats`, `ceil_4_floats`} 226 // must be contiguous ops in the stage list, listed in that order; pass `ceil_float` and we 227 // pick the appropriate op based on `numSlots`. 228 void appendMultiSlotUnaryOp(skia_private::TArray<Stage>* pipeline, ProgramOp baseStage, 229 float* dst, int numSlots) const; 230 231 // Appends an immediate-mode binary operation to the pipeline. `baseStage` must refer to 232 // a single-slot, immediate-mode "apply-imm" stage, which must be immediately preceded by 233 // specializations for 2-4 slots if numSlots is greater than 1. For instance, {`add_imm_4_ints`, 234 // `add_imm_3_ints`, `add_imm_2_ints`, `add_imm_int`} must be contiguous ops in the stage list, 235 // listed in that order; pass `add_imm_int` and we pick the appropriate op based on `numSlots`. 236 // Some immediate-mode binary ops are single-slot only in the interest of code size; in this 237 // case, the multi-slot ops can be absent, but numSlots must be 1. 238 void appendImmediateBinaryOp(skia_private::TArray<Stage>* pipeline, SkArenaAlloc* alloc, 239 ProgramOp baseStage, 240 SkRPOffset dst, int32_t value, int numSlots) const; 241 242 // Appends a two-input math operation to the pipeline. `src` must be _immediately_ after `dst` 243 // in memory. `baseStage` must refer to an unbounded "apply_to_n_slots" stage. A BinaryOpCtx 244 // will be used to pass pointers to the destination and source; the delta between the two 245 // pointers implicitly gives the number of slots. 246 void appendAdjacentNWayBinaryOp(skia_private::TArray<Stage>* pipeline, SkArenaAlloc* alloc, 247 ProgramOp stage, 248 SkRPOffset dst, SkRPOffset src, int numSlots) const; 249 250 // Appends a multi-slot two-input math operation to the pipeline. `src` must be _immediately_ 251 // after `dst` in memory. `baseStage` must refer to an unbounded "apply_to_n_slots" stage, which 252 // must be immediately followed by specializations for 1-4 slots. For instance, {`add_n_floats`, 253 // `add_float`, `add_2_floats`, `add_3_floats`, `add_4_floats`} must be contiguous ops in the 254 // stage list, listed in that order; pass `add_n_floats` and we pick the appropriate op based on 255 // `numSlots`. 256 void appendAdjacentMultiSlotBinaryOp(skia_private::TArray<Stage>* pipeline, SkArenaAlloc* alloc, 257 ProgramOp baseStage, std::byte* basePtr, 258 SkRPOffset dst, SkRPOffset src, int numSlots) const; 259 260 // Appends a multi-slot math operation having three inputs (dst, src0, src1) and one output 261 // (dst) to the pipeline. The three inputs must be _immediately_ adjacent in memory. `baseStage` 262 // must refer to an unbounded "apply_to_n_slots" stage, which must be immediately followed by 263 // specializations for 1-4 slots. 264 void appendAdjacentMultiSlotTernaryOp(skia_private::TArray<Stage>* pipeline, 265 SkArenaAlloc* alloc, ProgramOp baseStage, 266 std::byte* basePtr, SkRPOffset dst, SkRPOffset src0, 267 SkRPOffset src1, int numSlots) const; 268 269 // Appends a math operation having three inputs (dst, src0, src1) and one output (dst) to the 270 // pipeline. The three inputs must be _immediately_ adjacent in memory. `baseStage` must refer 271 // to an unbounded "apply_to_n_slots" stage. A TernaryOpCtx will be used to pass pointers to the 272 // destination and sources; the delta between the each pointer implicitly gives the slot count. 273 void appendAdjacentNWayTernaryOp(skia_private::TArray<Stage>* pipeline, SkArenaAlloc* alloc, 274 ProgramOp stage, std::byte* basePtr, SkRPOffset dst, 275 SkRPOffset src0, SkRPOffset src1, int numSlots) const; 276 277 // Appends a stack_rewind op on platforms where it is needed (when SK_HAS_MUSTTAIL is not set). 278 void appendStackRewindForNonTailcallers(skia_private::TArray<Stage>* pipeline) const; 279 280 // Appends a stack_rewind op unilaterally. 281 void appendStackRewind(skia_private::TArray<Stage>* pipeline) const; 282 283 class Dumper; 284 friend class Dumper; 285 286 skia_private::TArray<Instruction> fInstructions; 287 int fNumValueSlots = 0; 288 int fNumUniformSlots = 0; 289 int fNumImmutableSlots = 0; 290 int fNumTempStackSlots = 0; 291 int fNumLabels = 0; 292 StackDepths fTempStackMaxDepths; 293 DebugTracePriv* fDebugTrace = nullptr; 294 std::unique_ptr<SkSL::TraceHook> fTraceHook; 295 }; 296 297 class Builder { 298 public: 299 /** Finalizes and returns a completed program. */ 300 std::unique_ptr<Program> finish(int numValueSlots, 301 int numUniformSlots, 302 int numImmutableSlots, 303 DebugTracePriv* debugTrace = nullptr); 304 /** 305 * Peels off a label ID for use in the program. Set the label's position in the program with 306 * the `label` instruction. Actually branch to the target with an instruction like 307 * `branch_if_any_lanes_active` or `jump`. 308 */ nextLabelID()309 int nextLabelID() { 310 return fNumLabels++; 311 } 312 313 /** 314 * The builder keeps track of the state of execution masks; when we know that the execution 315 * mask is unaltered, we can generate simpler code. Code which alters the execution mask is 316 * required to enable this flag. 317 */ enableExecutionMaskWrites()318 void enableExecutionMaskWrites() { 319 ++fExecutionMaskWritesEnabled; 320 } 321 disableExecutionMaskWrites()322 void disableExecutionMaskWrites() { 323 SkASSERT(this->executionMaskWritesAreEnabled()); 324 --fExecutionMaskWritesEnabled; 325 } 326 executionMaskWritesAreEnabled()327 bool executionMaskWritesAreEnabled() { 328 return fExecutionMaskWritesEnabled > 0; 329 } 330 331 /** Assemble a program from the Raster Pipeline instructions below. */ init_lane_masks()332 void init_lane_masks() { 333 this->appendInstruction(BuilderOp::init_lane_masks, {}); 334 } 335 store_src_rg(SlotRange slots)336 void store_src_rg(SlotRange slots) { 337 SkASSERT(slots.count == 2); 338 this->appendInstruction(BuilderOp::store_src_rg, {slots.index}); 339 } 340 store_src(SlotRange slots)341 void store_src(SlotRange slots) { 342 SkASSERT(slots.count == 4); 343 this->appendInstruction(BuilderOp::store_src, {slots.index}); 344 } 345 store_dst(SlotRange slots)346 void store_dst(SlotRange slots) { 347 SkASSERT(slots.count == 4); 348 this->appendInstruction(BuilderOp::store_dst, {slots.index}); 349 } 350 store_device_xy01(SlotRange slots)351 void store_device_xy01(SlotRange slots) { 352 SkASSERT(slots.count == 4); 353 this->appendInstruction(BuilderOp::store_device_xy01, {slots.index}); 354 } 355 load_src(SlotRange slots)356 void load_src(SlotRange slots) { 357 SkASSERT(slots.count == 4); 358 this->appendInstruction(BuilderOp::load_src, {slots.index}); 359 } 360 load_dst(SlotRange slots)361 void load_dst(SlotRange slots) { 362 SkASSERT(slots.count == 4); 363 this->appendInstruction(BuilderOp::load_dst, {slots.index}); 364 } 365 set_current_stack(int stackID)366 void set_current_stack(int stackID) { 367 fCurrentStackID = stackID; 368 } 369 370 // Inserts a label into the instruction stream. 371 void label(int labelID); 372 373 // Unconditionally branches to a label. 374 void jump(int labelID); 375 376 // Branches to a label if the execution mask is active in every lane. 377 void branch_if_all_lanes_active(int labelID); 378 379 // Branches to a label if the execution mask is active in any lane. 380 void branch_if_any_lanes_active(int labelID); 381 382 // Branches to a label if the execution mask is inactive across all lanes. 383 void branch_if_no_lanes_active(int labelID); 384 385 // Branches to a label if the top value on the stack is _not_ equal to `value` in any lane. 386 void branch_if_no_active_lanes_on_stack_top_equal(int value, int labelID); 387 388 // We use the same SkRasterPipeline op regardless of the literal type, and bitcast the value. 389 void push_constant_i(int32_t val, int count = 1); 390 push_zeros(int count)391 void push_zeros(int count) { 392 this->push_constant_i(/*val=*/0, count); 393 } 394 push_constant_f(float val)395 void push_constant_f(float val) { 396 this->push_constant_i(sk_bit_cast<int32_t>(val), /*count=*/1); 397 } 398 399 void push_constant_u(uint32_t val, int count = 1) { 400 this->push_constant_i(sk_bit_cast<int32_t>(val), count); 401 } 402 403 // Translates into copy_uniforms (from uniforms into temp stack) in Raster Pipeline. 404 void push_uniform(SlotRange src); 405 406 // Initializes the Raster Pipeline slot with a constant value when the program is first created. 407 // Does not add any instructions to the program. store_immutable_value_i(Slot slot,int32_t val)408 void store_immutable_value_i(Slot slot, int32_t val) { 409 this->appendInstruction(BuilderOp::store_immutable_value, {slot}, val); 410 } 411 412 // Translates into copy_uniforms (from uniforms into value-slots) in Raster Pipeline. 413 void copy_uniform_to_slots_unmasked(SlotRange dst, SlotRange src); 414 415 // Translates into copy_from_indirect_uniform_unmasked (from values into temp stack) in Raster 416 // Pipeline. `fixedRange` denotes a fixed set of slots; this range is pushed forward by the 417 // value at the top of stack `dynamicStack`. Pass the range of the uniform being indexed as 418 // `limitRange`; this is used as a hard cap, to avoid indexing outside of bounds. 419 void push_uniform_indirect(SlotRange fixedRange, int dynamicStack, SlotRange limitRange); 420 421 422 // Translates into copy_slots_unmasked (from values into temp stack) in Raster Pipeline. push_slots(SlotRange src)423 void push_slots(SlotRange src) { 424 this->push_slots_or_immutable(src, BuilderOp::push_slots); 425 } 426 427 // Translates into copy_immutable_unmasked (from immutables into temp stack) in Raster Pipeline. push_immutable(SlotRange src)428 void push_immutable(SlotRange src) { 429 this->push_slots_or_immutable(src, BuilderOp::push_immutable); 430 } 431 432 void push_slots_or_immutable(SlotRange src, BuilderOp op); 433 434 // Translates into copy_from_indirect_unmasked (from values into temp stack) in Raster Pipeline. 435 // `fixedRange` denotes a fixed set of slots; this range is pushed forward by the value at the 436 // top of stack `dynamicStack`. Pass the slot range of the variable being indexed as 437 // `limitRange`; this is used as a hard cap, to avoid indexing outside of bounds. push_slots_indirect(SlotRange fixedRange,int dynamicStack,SlotRange limitRange)438 void push_slots_indirect(SlotRange fixedRange, int dynamicStack, SlotRange limitRange) { 439 this->push_slots_or_immutable_indirect(fixedRange, dynamicStack, limitRange, 440 BuilderOp::push_slots_indirect); 441 } 442 push_immutable_indirect(SlotRange fixedRange,int dynamicStack,SlotRange limitRange)443 void push_immutable_indirect(SlotRange fixedRange, int dynamicStack, SlotRange limitRange) { 444 this->push_slots_or_immutable_indirect(fixedRange, dynamicStack, limitRange, 445 BuilderOp::push_immutable_indirect); 446 } 447 448 void push_slots_or_immutable_indirect(SlotRange fixedRange, int dynamicStack, 449 SlotRange limitRange, BuilderOp op); 450 451 // Translates into copy_slots_masked (from temp stack to values) in Raster Pipeline. 452 // Does not discard any values on the temp stack. copy_stack_to_slots(SlotRange dst)453 void copy_stack_to_slots(SlotRange dst) { 454 this->copy_stack_to_slots(dst, /*offsetFromStackTop=*/dst.count); 455 } 456 457 void copy_stack_to_slots(SlotRange dst, int offsetFromStackTop); 458 459 // Translates into swizzle_copy_slots_masked (from temp stack to values) in Raster Pipeline. 460 // Does not discard any values on the temp stack. 461 void swizzle_copy_stack_to_slots(SlotRange dst, 462 SkSpan<const int8_t> components, 463 int offsetFromStackTop); 464 465 // Translates into swizzle_copy_to_indirect_masked (from temp stack to values) in Raster 466 // Pipeline. Does not discard any values on the temp stack. 467 void swizzle_copy_stack_to_slots_indirect(SlotRange fixedRange, 468 int dynamicStackID, 469 SlotRange limitRange, 470 SkSpan<const int8_t> components, 471 int offsetFromStackTop); 472 473 // Translates into copy_slots_unmasked (from temp stack to values) in Raster Pipeline. 474 // Does not discard any values on the temp stack. copy_stack_to_slots_unmasked(SlotRange dst)475 void copy_stack_to_slots_unmasked(SlotRange dst) { 476 this->copy_stack_to_slots_unmasked(dst, /*offsetFromStackTop=*/dst.count); 477 } 478 479 void copy_stack_to_slots_unmasked(SlotRange dst, int offsetFromStackTop); 480 481 // Translates into copy_to_indirect_masked (from temp stack into values) in Raster Pipeline. 482 // `fixedRange` denotes a fixed set of slots; this range is pushed forward by the value at the 483 // top of stack `dynamicStack`. Pass the slot range of the variable being indexed as 484 // `limitRange`; this is used as a hard cap, to avoid indexing outside of bounds. 485 void copy_stack_to_slots_indirect(SlotRange fixedRange, 486 int dynamicStackID, 487 SlotRange limitRange); 488 489 // Copies from temp stack to slots, including an indirect offset, then shrinks the temp stack. pop_slots_indirect(SlotRange fixedRange,int dynamicStackID,SlotRange limitRange)490 void pop_slots_indirect(SlotRange fixedRange, int dynamicStackID, SlotRange limitRange) { 491 this->copy_stack_to_slots_indirect(fixedRange, dynamicStackID, limitRange); 492 this->discard_stack(fixedRange.count); 493 } 494 495 // Performs a unary op (like `bitwise_not`), given a slot count of `slots`. The stack top is 496 // replaced with the result. 497 void unary_op(BuilderOp op, int32_t slots); 498 499 // Performs a binary op (like `add_n_floats` or `cmpeq_n_ints`), given a slot count of 500 // `slots`. Two n-slot input values are consumed, and the result is pushed onto the stack. 501 void binary_op(BuilderOp op, int32_t slots); 502 503 // Performs a ternary op (like `mix` or `smoothstep`), given a slot count of 504 // `slots`. Three n-slot input values are consumed, and the result is pushed onto the stack. 505 void ternary_op(BuilderOp op, int32_t slots); 506 507 // Computes a dot product on the stack. The slots consumed (`slots`) must be between 1 and 4. 508 // Two n-slot input vectors are consumed, and a scalar result is pushed onto the stack. 509 void dot_floats(int32_t slots); 510 511 // Computes refract(N, I, eta) on the stack. N and I are assumed to be 4-slot vectors, and can 512 // be padded with zeros for smaller inputs. Eta is a scalar. The result is a 4-slot vector. 513 void refract_floats(); 514 515 // Computes inverse(matN) on the stack. Pass 2, 3 or 4 for n to specify matrix size. 516 void inverse_matrix(int32_t n); 517 518 // Shrinks the temp stack, discarding values on top. 519 void discard_stack(int32_t count, int stackID); 520 discard_stack(int32_t count)521 void discard_stack(int32_t count) { 522 this->discard_stack(count, fCurrentStackID); 523 } 524 525 // Grows the temp stack, leaving any preexisting values in place. 526 void pad_stack(int32_t count); 527 528 // Copies vales from the temp stack into slots, and then shrinks the temp stack. 529 void pop_slots(SlotRange dst); 530 531 // Creates many clones of the top single-slot item on the temp stack. 532 void push_duplicates(int count); 533 534 // Creates a single clone of an item on the current temp stack. The cloned item can consist of 535 // any number of slots, and can be copied from an earlier position on the stack. 536 void push_clone(int numSlots, int offsetFromStackTop = 0); 537 538 // Clones a range of slots from another stack onto this stack. 539 void push_clone_from_stack(SlotRange range, int otherStackID, int offsetFromStackTop); 540 541 // Translates into copy_from_indirect_unmasked (from one temp stack to another) in Raster 542 // Pipeline. `fixedOffset` denotes a range of slots within the top `offsetFromStackTop` slots of 543 // `otherStackID`. This range is pushed forward by the value at the top of `dynamicStackID`. 544 void push_clone_indirect_from_stack(SlotRange fixedOffset, 545 int dynamicStackID, 546 int otherStackID, 547 int offsetFromStackTop); 548 549 // Compares the stack top with the passed-in value; if it matches, enables the loop mask. case_op(int value)550 void case_op(int value) { 551 this->appendInstruction(BuilderOp::case_op, {}, value); 552 } 553 554 // Performs a `continue` in a loop. continue_op(int continueMaskStackID)555 void continue_op(int continueMaskStackID) { 556 this->appendInstruction(BuilderOp::continue_op, {}, continueMaskStackID); 557 } 558 select(int slots)559 void select(int slots) { 560 // Overlays the top two entries on the stack, making one hybrid entry. The execution mask 561 // is used to select which lanes are preserved. 562 SkASSERT(slots > 0); 563 this->appendInstruction(BuilderOp::select, {}, slots); 564 } 565 566 // The opposite of push_slots; copies values from the temp stack into value slots, then 567 // shrinks the temp stack. 568 void pop_slots_unmasked(SlotRange dst); 569 copy_slots_masked(SlotRange dst,SlotRange src)570 void copy_slots_masked(SlotRange dst, SlotRange src) { 571 SkASSERT(dst.count == src.count); 572 this->appendInstruction(BuilderOp::copy_slot_masked, {dst.index, src.index}, dst.count); 573 } 574 575 void copy_slots_unmasked(SlotRange dst, SlotRange src); 576 577 void copy_immutable_unmasked(SlotRange dst, SlotRange src); 578 579 // Directly writes a constant value into a slot. 580 void copy_constant(Slot slot, int constantValue); 581 582 // Stores zeros across the entire slot range. 583 void zero_slots_unmasked(SlotRange dst); 584 585 // Consumes `consumedSlots` elements on the stack, then generates `components.size()` elements. 586 void swizzle(int consumedSlots, SkSpan<const int8_t> components); 587 588 // Transposes a matrix of size CxR on the stack (into a matrix of size RxC). 589 void transpose(int columns, int rows); 590 591 // Generates a CxR diagonal matrix from the top two scalars on the stack. The second scalar is 592 // used as the diagonal value; the first scalar (usually zero) fills in the rest of the slots. 593 void diagonal_matrix(int columns, int rows); 594 595 // Resizes a CxR matrix at the top of the stack to C'xR'. 596 void matrix_resize(int origColumns, int origRows, int newColumns, int newRows); 597 598 // Multiplies a CxR matrix/vector against an adjacent CxR matrix/vector on the stack. 599 void matrix_multiply(int leftColumns, int leftRows, int rightColumns, int rightRows); 600 601 void push_condition_mask(); 602 pop_condition_mask()603 void pop_condition_mask() { 604 SkASSERT(this->executionMaskWritesAreEnabled()); 605 this->appendInstruction(BuilderOp::pop_condition_mask, {}); 606 } 607 608 void merge_condition_mask(); 609 merge_inv_condition_mask()610 void merge_inv_condition_mask() { 611 SkASSERT(this->executionMaskWritesAreEnabled()); 612 this->appendInstruction(BuilderOp::merge_inv_condition_mask, {}); 613 } 614 push_loop_mask()615 void push_loop_mask() { 616 SkASSERT(this->executionMaskWritesAreEnabled()); 617 this->appendInstruction(BuilderOp::push_loop_mask, {}); 618 } 619 pop_loop_mask()620 void pop_loop_mask() { 621 SkASSERT(this->executionMaskWritesAreEnabled()); 622 this->appendInstruction(BuilderOp::pop_loop_mask, {}); 623 } 624 625 // Exchanges src.rgba with the four values at the top of the stack. 626 void exchange_src(); 627 push_src_rgba()628 void push_src_rgba() { 629 this->appendInstruction(BuilderOp::push_src_rgba, {}); 630 } 631 push_dst_rgba()632 void push_dst_rgba() { 633 this->appendInstruction(BuilderOp::push_dst_rgba, {}); 634 } 635 push_device_xy01()636 void push_device_xy01() { 637 this->appendInstruction(BuilderOp::push_device_xy01, {}); 638 } 639 640 void pop_src_rgba(); 641 pop_dst_rgba()642 void pop_dst_rgba() { 643 this->appendInstruction(BuilderOp::pop_dst_rgba, {}); 644 } 645 mask_off_loop_mask()646 void mask_off_loop_mask() { 647 SkASSERT(this->executionMaskWritesAreEnabled()); 648 this->appendInstruction(BuilderOp::mask_off_loop_mask, {}); 649 } 650 reenable_loop_mask(SlotRange src)651 void reenable_loop_mask(SlotRange src) { 652 SkASSERT(this->executionMaskWritesAreEnabled()); 653 SkASSERT(src.count == 1); 654 this->appendInstruction(BuilderOp::reenable_loop_mask, {src.index}); 655 } 656 pop_and_reenable_loop_mask()657 void pop_and_reenable_loop_mask() { 658 SkASSERT(this->executionMaskWritesAreEnabled()); 659 this->appendInstruction(BuilderOp::pop_and_reenable_loop_mask, {}); 660 } 661 merge_loop_mask()662 void merge_loop_mask() { 663 SkASSERT(this->executionMaskWritesAreEnabled()); 664 this->appendInstruction(BuilderOp::merge_loop_mask, {}); 665 } 666 push_return_mask()667 void push_return_mask() { 668 SkASSERT(this->executionMaskWritesAreEnabled()); 669 this->appendInstruction(BuilderOp::push_return_mask, {}); 670 } 671 672 void pop_return_mask(); 673 mask_off_return_mask()674 void mask_off_return_mask() { 675 SkASSERT(this->executionMaskWritesAreEnabled()); 676 this->appendInstruction(BuilderOp::mask_off_return_mask, {}); 677 } 678 679 void invoke_shader(int childIdx); 680 void invoke_color_filter(int childIdx); 681 void invoke_blender(int childIdx); 682 void invoke_to_linear_srgb(); 683 void invoke_from_linear_srgb(); 684 685 // Writes the current line number to the debug trace. trace_line(int traceMaskStackID,int line)686 void trace_line(int traceMaskStackID, int line) { 687 this->appendInstruction(BuilderOp::trace_line, {}, traceMaskStackID, line); 688 } 689 690 // Writes a variable update to the debug trace. trace_var(int traceMaskStackID,SlotRange r)691 void trace_var(int traceMaskStackID, SlotRange r) { 692 this->appendInstruction(BuilderOp::trace_var, {r.index}, traceMaskStackID, r.count); 693 } 694 695 // Writes a variable update (via indirection) to the debug trace. 696 void trace_var_indirect(int traceMaskStackID, SlotRange fixedRange, 697 int dynamicStackID, SlotRange limitRange); 698 699 // Writes a function-entrance to the debug trace. trace_enter(int traceMaskStackID,int funcID)700 void trace_enter(int traceMaskStackID, int funcID) { 701 this->appendInstruction(BuilderOp::trace_enter, {}, traceMaskStackID, funcID); 702 } 703 704 // Writes a function-exit to the debug trace. trace_exit(int traceMaskStackID,int funcID)705 void trace_exit(int traceMaskStackID, int funcID) { 706 this->appendInstruction(BuilderOp::trace_exit, {}, traceMaskStackID, funcID); 707 } 708 709 // Writes a scope-level change to the debug trace. trace_scope(int traceMaskStackID,int delta)710 void trace_scope(int traceMaskStackID, int delta) { 711 this->appendInstruction(BuilderOp::trace_scope, {}, traceMaskStackID, delta); 712 } 713 714 private: 715 struct SlotList { fSlotASlotList716 SlotList(Slot a = NA, Slot b = NA) : fSlotA(a), fSlotB(b) {} 717 Slot fSlotA = NA; 718 Slot fSlotB = NA; 719 }; 720 void appendInstruction(BuilderOp op, SlotList slots, 721 int a = 0, int b = 0, int c = 0, int d = 0); 722 Instruction* lastInstruction(int fromBack = 0); 723 Instruction* lastInstructionOnAnyStack(int fromBack = 0); 724 void simplifyPopSlotsUnmasked(SlotRange* dst); 725 bool simplifyImmediateUnmaskedOp(); 726 727 skia_private::TArray<Instruction> fInstructions; 728 int fNumLabels = 0; 729 int fExecutionMaskWritesEnabled = 0; 730 int fCurrentStackID = 0; 731 }; 732 733 } // namespace RP 734 } // namespace SkSL 735 736 #endif // SKSL_RASTERPIPELINEBUILDER 737