xref: /aosp_15_r20/external/skia/tests/SkRasterPipelineTest.cpp (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1 /*
2  * Copyright 2016 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "include/private/base/SkTo.h"
9 #include "src/base/SkHalf.h"
10 #include "src/base/SkUtils.h"
11 #include "src/core/SkOpts.h"
12 #include "src/core/SkRasterPipeline.h"
13 #include "src/core/SkRasterPipelineContextUtils.h"
14 #include "src/gpu/Swizzle.h"
15 #include "src/sksl/tracing/SkSLTraceHook.h"
16 #include "tests/Test.h"
17 
18 #include <cmath>
19 #include <numeric>
20 
21 using namespace skia_private;
22 
DEF_TEST(SkRasterPipeline,r)23 DEF_TEST(SkRasterPipeline, r) {
24     // Build and run a simple pipeline to exercise SkRasterPipeline,
25     // drawing 50% transparent blue over opaque red in half-floats.
26     uint64_t red  = 0x3c00000000003c00ull,
27              blue = 0x3800380000000000ull,
28              result;
29 
30     SkRasterPipeline_MemoryCtx load_s_ctx = { &blue, 0 },
31                                load_d_ctx = { &red, 0 },
32                                store_ctx  = { &result, 0 };
33 
34     SkRasterPipeline_<256> p;
35     p.append(SkRasterPipelineOp::load_f16,     &load_s_ctx);
36     p.append(SkRasterPipelineOp::load_f16_dst, &load_d_ctx);
37     p.append(SkRasterPipelineOp::srcover);
38     p.append(SkRasterPipelineOp::store_f16, &store_ctx);
39     p.run(0,0,1,1);
40 
41     // We should see half-intensity magenta.
42     REPORTER_ASSERT(r, ((result >>  0) & 0xffff) == 0x3800);
43     REPORTER_ASSERT(r, ((result >> 16) & 0xffff) == 0x0000);
44     REPORTER_ASSERT(r, ((result >> 32) & 0xffff) == 0x3800);
45     REPORTER_ASSERT(r, ((result >> 48) & 0xffff) == 0x3c00);
46 }
47 
DEF_TEST(SkRasterPipeline_PackSmallContext,r)48 DEF_TEST(SkRasterPipeline_PackSmallContext, r) {
49     struct PackableObject {
50         std::array<uint8_t, sizeof(void*)> data;
51     };
52 
53     // Create an arena with storage.
54     using StorageArray = std::array<char, 128>;
55     StorageArray storage = {};
56     SkArenaAllocWithReset alloc(storage.data(), storage.size(), 500);
57 
58     // Construct and pack one PackableObject.
59     PackableObject object;
60     std::fill(object.data.begin(), object.data.end(), 123);
61 
62     const void* packed = SkRPCtxUtils::Pack(object, &alloc);
63 
64     // The alloc should still be empty.
65     REPORTER_ASSERT(r, alloc.isEmpty());
66 
67     // `packed` should now contain a bitwise cast of the raw object data.
68     uintptr_t objectBits = sk_bit_cast<uintptr_t>(packed);
69     for (size_t index = 0; index < sizeof(void*); ++index) {
70         REPORTER_ASSERT(r, (objectBits & 0xFF) == 123);
71         objectBits >>= 8;
72     }
73 
74     // Now unpack it.
75     auto unpacked = SkRPCtxUtils::Unpack((const PackableObject*)packed);
76 
77     // The data should be identical to the original.
78     REPORTER_ASSERT(r, unpacked.data == object.data);
79 }
80 
DEF_TEST(SkRasterPipeline_PackBigContext,r)81 DEF_TEST(SkRasterPipeline_PackBigContext, r) {
82     struct BigObject {
83         std::array<uint8_t, sizeof(void*) + 1> data;
84     };
85 
86     // Create an arena with storage.
87     using StorageArray = std::array<char, 128>;
88     StorageArray storage = {};
89     SkArenaAllocWithReset alloc(storage.data(), storage.size(), 500);
90 
91     // Construct and pack one BigObject.
92     BigObject object;
93     std::fill(object.data.begin(), object.data.end(), 123);
94 
95     const void* packed = SkRPCtxUtils::Pack(object, &alloc);
96 
97     // The alloc should not be empty any longer.
98     REPORTER_ASSERT(r, !alloc.isEmpty());
99 
100     // Now unpack it.
101     auto unpacked = SkRPCtxUtils::Unpack((const BigObject*)packed);
102 
103     // The data should be identical to the original.
104     REPORTER_ASSERT(r, unpacked.data == object.data);
105 }
106 
DEF_TEST(SkRasterPipeline_LoadStoreConditionMask,reporter)107 DEF_TEST(SkRasterPipeline_LoadStoreConditionMask, reporter) {
108     alignas(64) int32_t mask[16]  = {~0, 0, ~0, 0, ~0, ~0, ~0, 0, ~0, 0, ~0, 0, ~0, ~0, ~0, 0};
109     alignas(64) int32_t maskCopy[SkRasterPipeline_kMaxStride_highp] = {};
110     alignas(64) int32_t src[4 * SkRasterPipeline_kMaxStride_highp] = {};
111 
112     static_assert(std::size(mask) == SkRasterPipeline_kMaxStride_highp);
113 
114     SkRasterPipeline_<256> p;
115     SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx;
116     p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx);
117     p.append(SkRasterPipelineOp::load_condition_mask, mask);
118     p.append(SkRasterPipelineOp::store_condition_mask, maskCopy);
119     p.append(SkRasterPipelineOp::store_src, src);
120     p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
121 
122     {
123         // `maskCopy` should be populated with `mask` in the frontmost positions
124         // (depending on the architecture that SkRasterPipeline is targeting).
125         size_t index = 0;
126         for (; index < SkOpts::raster_pipeline_highp_stride; ++index) {
127             REPORTER_ASSERT(reporter, maskCopy[index] == mask[index]);
128         }
129 
130         // The remaining slots should have been left alone.
131         for (; index < std::size(maskCopy); ++index) {
132             REPORTER_ASSERT(reporter, maskCopy[index] == 0);
133         }
134     }
135     {
136         // `r` and `a` should be populated with `mask`.
137         // `g` and `b` should remain initialized to true.
138         const int r = 0 * SkOpts::raster_pipeline_highp_stride;
139         const int g = 1 * SkOpts::raster_pipeline_highp_stride;
140         const int b = 2 * SkOpts::raster_pipeline_highp_stride;
141         const int a = 3 * SkOpts::raster_pipeline_highp_stride;
142         for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
143             REPORTER_ASSERT(reporter, src[r + index] == mask[index]);
144             REPORTER_ASSERT(reporter, src[g + index] == ~0);
145             REPORTER_ASSERT(reporter, src[b + index] == ~0);
146             REPORTER_ASSERT(reporter, src[a + index] == mask[index]);
147         }
148     }
149 }
150 
DEF_TEST(SkRasterPipeline_LoadStoreLoopMask,reporter)151 DEF_TEST(SkRasterPipeline_LoadStoreLoopMask, reporter) {
152     alignas(64) int32_t mask[16]  = {~0, 0, ~0, 0, ~0, ~0, ~0, 0, ~0, 0, ~0, 0, ~0, ~0, ~0, 0};
153     alignas(64) int32_t maskCopy[SkRasterPipeline_kMaxStride_highp] = {};
154     alignas(64) int32_t src[4 * SkRasterPipeline_kMaxStride_highp] = {};
155 
156     static_assert(std::size(mask) == SkRasterPipeline_kMaxStride_highp);
157 
158     SkRasterPipeline_<256> p;
159     SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx;
160     p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx);
161     p.append(SkRasterPipelineOp::load_loop_mask, mask);
162     p.append(SkRasterPipelineOp::store_loop_mask, maskCopy);
163     p.append(SkRasterPipelineOp::store_src, src);
164     p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
165 
166     {
167         // `maskCopy` should be populated with `mask` in the frontmost positions
168         // (depending on the architecture that SkRasterPipeline is targeting).
169         size_t index = 0;
170         for (; index < SkOpts::raster_pipeline_highp_stride; ++index) {
171             REPORTER_ASSERT(reporter, maskCopy[index] == mask[index]);
172         }
173 
174         // The remaining slots should have been left alone.
175         for (; index < std::size(maskCopy); ++index) {
176             REPORTER_ASSERT(reporter, maskCopy[index] == 0);
177         }
178     }
179     {
180         // `g` and `a` should be populated with `mask`.
181         // `r` and `b` should remain initialized to true.
182         const int r = 0 * SkOpts::raster_pipeline_highp_stride;
183         const int g = 1 * SkOpts::raster_pipeline_highp_stride;
184         const int b = 2 * SkOpts::raster_pipeline_highp_stride;
185         const int a = 3 * SkOpts::raster_pipeline_highp_stride;
186         for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
187             REPORTER_ASSERT(reporter, src[r + index] == ~0);
188             REPORTER_ASSERT(reporter, src[g + index] == mask[index]);
189             REPORTER_ASSERT(reporter, src[b + index] == ~0);
190             REPORTER_ASSERT(reporter, src[a + index] == mask[index]);
191         }
192     }
193 }
194 
DEF_TEST(SkRasterPipeline_LoadStoreReturnMask,reporter)195 DEF_TEST(SkRasterPipeline_LoadStoreReturnMask, reporter) {
196     alignas(64) int32_t mask[16]  = {~0, 0, ~0, 0, ~0, ~0, ~0, 0, ~0, 0, ~0, 0, ~0, ~0, ~0, 0};
197     alignas(64) int32_t maskCopy[SkRasterPipeline_kMaxStride_highp] = {};
198     alignas(64) int32_t src[4 * SkRasterPipeline_kMaxStride_highp] = {};
199 
200     static_assert(std::size(mask) == SkRasterPipeline_kMaxStride_highp);
201 
202     SkRasterPipeline_<256> p;
203     SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx;
204     p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx);
205     p.append(SkRasterPipelineOp::load_return_mask, mask);
206     p.append(SkRasterPipelineOp::store_return_mask, maskCopy);
207     p.append(SkRasterPipelineOp::store_src, src);
208     p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
209 
210     {
211         // `maskCopy` should be populated with `mask` in the frontmost positions
212         // (depending on the architecture that SkRasterPipeline is targeting).
213         size_t index = 0;
214         for (; index < SkOpts::raster_pipeline_highp_stride; ++index) {
215             REPORTER_ASSERT(reporter, maskCopy[index] == mask[index]);
216         }
217 
218         // The remaining slots should have been left alone.
219         for (; index < std::size(maskCopy); ++index) {
220             REPORTER_ASSERT(reporter, maskCopy[index] == 0);
221         }
222     }
223     {
224         // `b` and `a` should be populated with `mask`.
225         // `r` and `g` should remain initialized to true.
226         const int r = 0 * SkOpts::raster_pipeline_highp_stride;
227         const int g = 1 * SkOpts::raster_pipeline_highp_stride;
228         const int b = 2 * SkOpts::raster_pipeline_highp_stride;
229         const int a = 3 * SkOpts::raster_pipeline_highp_stride;
230         for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
231             REPORTER_ASSERT(reporter, src[r + index] == ~0);
232             REPORTER_ASSERT(reporter, src[g + index] == ~0);
233             REPORTER_ASSERT(reporter, src[b + index] == mask[index]);
234             REPORTER_ASSERT(reporter, src[a + index] == mask[index]);
235         }
236     }
237 }
238 
DEF_TEST(SkRasterPipeline_MergeConditionMask,reporter)239 DEF_TEST(SkRasterPipeline_MergeConditionMask, reporter) {
240     alignas(64) int32_t mask[32]  = { 0, 0, ~0, ~0, 0, ~0, 0, ~0,
241                                       ~0, ~0, ~0, ~0, 0, 0, 0, 0,
242                                       0, 0, ~0, ~0, 0, ~0, 0, ~0,
243                                       ~0, ~0, ~0, ~0, 0, 0, 0, 0};
244     alignas(64) int32_t src[4 * SkRasterPipeline_kMaxStride_highp] = {};
245     static_assert(std::size(mask) == (2 * SkRasterPipeline_kMaxStride_highp));
246 
247     SkRasterPipeline_<256> p;
248     SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx;
249     p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx);
250     p.append(SkRasterPipelineOp::merge_condition_mask, mask);
251     p.append(SkRasterPipelineOp::store_src, src);
252     p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
253 
254     // `r` and `a` should be populated with `mask[x] & mask[y]` in the frontmost positions.
255     // `g` and `b` should remain initialized to true.
256     const int r = 0 * SkOpts::raster_pipeline_highp_stride;
257     const int g = 1 * SkOpts::raster_pipeline_highp_stride;
258     const int b = 2 * SkOpts::raster_pipeline_highp_stride;
259     const int a = 3 * SkOpts::raster_pipeline_highp_stride;
260     for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
261         int32_t expected = mask[index] & mask[index + SkOpts::raster_pipeline_highp_stride];
262         REPORTER_ASSERT(reporter, src[r + index] == expected);
263         REPORTER_ASSERT(reporter, src[g + index] == ~0);
264         REPORTER_ASSERT(reporter, src[b + index] == ~0);
265         REPORTER_ASSERT(reporter, src[a + index] == expected);
266     }
267 }
268 
DEF_TEST(SkRasterPipeline_MergeLoopMask,reporter)269 DEF_TEST(SkRasterPipeline_MergeLoopMask, reporter) {
270     alignas(64) int32_t initial[64]  = {~0, ~0, ~0, ~0, ~0,  0, ~0, ~0,  // r (condition)
271                                         ~0,  0, ~0,  0, ~0, ~0, ~0, ~0,
272                                         ~0, ~0, ~0, ~0, ~0, ~0,  0, ~0,  // g (loop)
273                                         ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0,
274                                         ~0, ~0, ~0, ~0, ~0,  0, ~0, ~0,  // b (return)
275                                         ~0,  0, ~0,  0, ~0, ~0, ~0, ~0,
276                                         ~0, ~0, ~0, ~0, ~0, ~0,  0, ~0,  // a (combined)
277                                         ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0};
278     alignas(64) int32_t mask[16]     = {0, ~0, ~0, 0, ~0, ~0, ~0, ~0, 0, ~0, ~0, 0, ~0, ~0, ~0, ~0};
279     alignas(64) int32_t src[4 * SkRasterPipeline_kMaxStride_highp] = {};
280     static_assert(std::size(initial) == (4 * SkRasterPipeline_kMaxStride_highp));
281 
282     SkRasterPipeline_<256> p;
283     p.append(SkRasterPipelineOp::load_src, initial);
284     p.append(SkRasterPipelineOp::merge_loop_mask, mask);
285     p.append(SkRasterPipelineOp::store_src, src);
286     p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
287 
288     const int r = 0 * SkOpts::raster_pipeline_highp_stride;
289     const int g = 1 * SkOpts::raster_pipeline_highp_stride;
290     const int b = 2 * SkOpts::raster_pipeline_highp_stride;
291     const int a = 3 * SkOpts::raster_pipeline_highp_stride;
292     for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
293         // `g` should contain `g & mask` in each lane.
294         REPORTER_ASSERT(reporter, src[g + index] == (initial[g + index] & mask[index]));
295 
296         // `r` and `b` should be unchanged.
297         REPORTER_ASSERT(reporter, src[r + index] == initial[r + index]);
298         REPORTER_ASSERT(reporter, src[b + index] == initial[b + index]);
299 
300         // `a` should contain `r & g & b`.
301         REPORTER_ASSERT(reporter, src[a + index] == (src[r+index] & src[g+index] & src[b+index]));
302     }
303 }
304 
DEF_TEST(SkRasterPipeline_ReenableLoopMask,reporter)305 DEF_TEST(SkRasterPipeline_ReenableLoopMask, reporter) {
306     alignas(64) int32_t initial[64]  = {~0, ~0, ~0, ~0, ~0,  0, ~0, ~0,  // r (condition)
307                                         ~0,  0, ~0,  0, ~0, ~0,  0, ~0,
308                                          0, ~0, ~0, ~0,  0,  0,  0, ~0,  // g (loop)
309                                          0,  0, ~0,  0,  0,  0,  0, ~0,
310                                         ~0, ~0, ~0, ~0, ~0,  0, ~0, ~0,  // b (return)
311                                         ~0,  0, ~0,  0, ~0, ~0,  0, ~0,
312                                          0, ~0, ~0, ~0,  0,  0,  0, ~0,  // a (combined)
313                                          0,  0, ~0,  0,  0,  0,  0, ~0};
314     alignas(64) int32_t mask[16]     = { 0, ~0, 0, 0, 0, 0, ~0, 0, 0, ~0, 0, 0, 0, 0, ~0, 0};
315     alignas(64) int32_t src[4 * SkRasterPipeline_kMaxStride_highp] = {};
316     static_assert(std::size(initial) == (4 * SkRasterPipeline_kMaxStride_highp));
317 
318     SkRasterPipeline_<256> p;
319     p.append(SkRasterPipelineOp::load_src, initial);
320     p.append(SkRasterPipelineOp::reenable_loop_mask, mask);
321     p.append(SkRasterPipelineOp::store_src, src);
322     p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
323 
324     const int r = 0 * SkOpts::raster_pipeline_highp_stride;
325     const int g = 1 * SkOpts::raster_pipeline_highp_stride;
326     const int b = 2 * SkOpts::raster_pipeline_highp_stride;
327     const int a = 3 * SkOpts::raster_pipeline_highp_stride;
328     for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
329         // `g` should contain `g | mask` in each lane.
330         REPORTER_ASSERT(reporter, src[g + index] == (initial[g + index] | mask[index]));
331 
332         // `r` and `b` should be unchanged.
333         REPORTER_ASSERT(reporter, src[r + index] == initial[r + index]);
334         REPORTER_ASSERT(reporter, src[b + index] == initial[b + index]);
335 
336         // `a` should contain `r & g & b`.
337         REPORTER_ASSERT(reporter, src[a + index] == (src[r+index] & src[g+index] & src[b+index]));
338     }
339 }
340 
DEF_TEST(SkRasterPipeline_CaseOp,reporter)341 DEF_TEST(SkRasterPipeline_CaseOp, reporter) {
342     alignas(64) int32_t initial[64]        = {~0, ~0, ~0, ~0, ~0,  0, ~0, ~0,  // r (condition)
343                                                0, ~0, ~0,  0, ~0, ~0,  0, ~0,
344                                               ~0,  0, ~0, ~0,  0,  0,  0, ~0,  // g (loop)
345                                                0,  0, ~0,  0,  0,  0,  0, ~0,
346                                               ~0, ~0, ~0, ~0, ~0,  0, ~0, ~0,  // b (return)
347                                                0, ~0, ~0,  0, ~0, ~0,  0, ~0,
348                                               ~0,  0, ~0, ~0,  0,  0,  0, ~0,  // a (combined)
349                                                0,  0, ~0,  0,  0,  0,  0, ~0};
350     alignas(64) int32_t src[4 * SkRasterPipeline_kMaxStride_highp] = {};
351     static_assert(std::size(initial) == (4 * SkRasterPipeline_kMaxStride_highp));
352 
353     constexpr int32_t actualValues[16] = { 2,  1,  2,  4,  5,  2,  2,  8};
354     static_assert(std::size(actualValues) == SkRasterPipeline_kMaxStride_highp);
355 
356     alignas(64) int32_t caseOpData[2 * SkRasterPipeline_kMaxStride_highp];
357     for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
358         caseOpData[0 * SkOpts::raster_pipeline_highp_stride + index] = actualValues[index];
359         caseOpData[1 * SkOpts::raster_pipeline_highp_stride + index] = ~0;
360     }
361 
362     SkRasterPipeline_CaseOpCtx ctx;
363     ctx.offset = 0;
364     ctx.expectedValue = 2;
365 
366     SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
367     SkRasterPipeline p(&alloc);
368     p.append(SkRasterPipelineOp::load_src, initial);
369     p.append(SkRasterPipelineOp::set_base_pointer, &caseOpData[0]);
370     p.append(SkRasterPipelineOp::case_op, SkRPCtxUtils::Pack(ctx, &alloc));
371     p.append(SkRasterPipelineOp::store_src, src);
372     p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
373 
374     const int r = 0 * SkOpts::raster_pipeline_highp_stride;
375     const int g = 1 * SkOpts::raster_pipeline_highp_stride;
376     const int b = 2 * SkOpts::raster_pipeline_highp_stride;
377     const int a = 3 * SkOpts::raster_pipeline_highp_stride;
378     const int actualValueIdx = 0 * SkOpts::raster_pipeline_highp_stride;
379     const int defaultMaskIdx = 1 * SkOpts::raster_pipeline_highp_stride;
380 
381     for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
382         // `g` should have been set to true for each lane containing 2.
383         int32_t expected = (actualValues[index] == 2) ? ~0 : initial[g + index];
384         REPORTER_ASSERT(reporter, src[g + index] == expected);
385 
386         // `r` and `b` should be unchanged.
387         REPORTER_ASSERT(reporter, src[r + index] == initial[r + index]);
388         REPORTER_ASSERT(reporter, src[b + index] == initial[b + index]);
389 
390         // `a` should contain `r & g & b`.
391         REPORTER_ASSERT(reporter, src[a + index] == (src[r+index] & src[g+index] & src[b+index]));
392 
393         // The actual-value part of `caseOpData` should be unchanged from the inputs.
394         REPORTER_ASSERT(reporter, caseOpData[actualValueIdx + index] == actualValues[index]);
395 
396         // The default-mask part of `caseOpData` should have been zeroed where the values matched.
397         expected = (actualValues[index] == 2) ? 0 : ~0;
398         REPORTER_ASSERT(reporter, caseOpData[defaultMaskIdx + index] == expected);
399     }
400 }
401 
DEF_TEST(SkRasterPipeline_MaskOffLoopMask,reporter)402 DEF_TEST(SkRasterPipeline_MaskOffLoopMask, reporter) {
403     alignas(64) int32_t initial[64]  = {~0, ~0, ~0, ~0, ~0,  0, ~0, ~0,  // r (condition)
404                                         ~0,  0, ~0, ~0,  0,  0,  0, ~0,
405                                         ~0, ~0,  0, ~0,  0,  0, ~0, ~0,  // g (loop)
406                                         ~0,  0,  0, ~0,  0,  0,  0, ~0,
407                                         ~0, ~0, ~0, ~0, ~0,  0, ~0, ~0,  // b (return)
408                                         ~0,  0, ~0, ~0,  0,  0,  0, ~0,
409                                         ~0, ~0,  0, ~0,  0,  0, ~0, ~0,  // a (combined)
410                                         ~0,  0,  0, ~0,  0,  0,  0, ~0};
411     alignas(64) int32_t src[4 * SkRasterPipeline_kMaxStride_highp] = {};
412     static_assert(std::size(initial) == (4 * SkRasterPipeline_kMaxStride_highp));
413 
414     SkRasterPipeline_<256> p;
415     p.append(SkRasterPipelineOp::load_src, initial);
416     p.append(SkRasterPipelineOp::mask_off_loop_mask);
417     p.append(SkRasterPipelineOp::store_src, src);
418     p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
419 
420     const int r = 0 * SkOpts::raster_pipeline_highp_stride;
421     const int g = 1 * SkOpts::raster_pipeline_highp_stride;
422     const int b = 2 * SkOpts::raster_pipeline_highp_stride;
423     const int a = 3 * SkOpts::raster_pipeline_highp_stride;
424     for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
425         // `g` should have masked off any lanes that are currently executing.
426         int32_t expected = initial[g + index] & ~initial[a + index];
427         REPORTER_ASSERT(reporter, src[g + index] == expected);
428 
429         // `a` should contain `r & g & b`.
430         expected = src[r + index] & src[g + index] & src[b + index];
431         REPORTER_ASSERT(reporter, src[a + index] == expected);
432     }
433 }
434 
DEF_TEST(SkRasterPipeline_MaskOffReturnMask,reporter)435 DEF_TEST(SkRasterPipeline_MaskOffReturnMask, reporter) {
436     alignas(64) int32_t initial[64]  = {~0, ~0, ~0, ~0, ~0,  0, ~0, ~0,  // r (condition)
437                                         ~0,  0, ~0, ~0,  0,  0,  0, ~0,
438                                         ~0, ~0,  0, ~0,  0,  0, ~0, ~0,  // g (loop)
439                                         ~0,  0,  0, ~0,  0,  0,  0, ~0,
440                                         ~0, ~0, ~0, ~0, ~0,  0, ~0, ~0,  // b (return)
441                                         ~0,  0, ~0, ~0,  0,  0,  0, ~0,
442                                         ~0, ~0,  0, ~0,  0,  0, ~0, ~0,  // a (combined)
443                                         ~0,  0,  0, ~0,  0,  0,  0, ~0};
444     alignas(64) int32_t src[4 * SkRasterPipeline_kMaxStride_highp] = {};
445     static_assert(std::size(initial) == (4 * SkRasterPipeline_kMaxStride_highp));
446 
447     SkRasterPipeline_<256> p;
448     p.append(SkRasterPipelineOp::load_src, initial);
449     p.append(SkRasterPipelineOp::mask_off_return_mask);
450     p.append(SkRasterPipelineOp::store_src, src);
451     p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
452 
453     const int r = 0 * SkOpts::raster_pipeline_highp_stride;
454     const int g = 1 * SkOpts::raster_pipeline_highp_stride;
455     const int b = 2 * SkOpts::raster_pipeline_highp_stride;
456     const int a = 3 * SkOpts::raster_pipeline_highp_stride;
457     for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
458         // `b` should have masked off any lanes that are currently executing.
459         int32_t expected = initial[b + index] & ~initial[a + index];
460         REPORTER_ASSERT(reporter, src[b + index] == expected);
461 
462         // `a` should contain `r & g & b`.
463         expected = src[r + index] & src[g + index] & src[b + index];
464         REPORTER_ASSERT(reporter, src[a + index] == expected);
465     }
466 }
467 
DEF_TEST(SkRasterPipeline_InitLaneMasks,reporter)468 DEF_TEST(SkRasterPipeline_InitLaneMasks, reporter) {
469     for (size_t width = 1; width <= SkOpts::raster_pipeline_highp_stride; ++width) {
470         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
471         SkRasterPipeline p(&alloc);
472 
473         // Initialize RGBA to unrelated values.
474         alignas(64) static constexpr float kArbitraryColor[4] = {0.0f, 0.25f, 0.50f, 0.75f};
475         p.appendConstantColor(&alloc, kArbitraryColor);
476 
477         // Overwrite RGBA with lane masks up to the tail width.
478         SkRasterPipeline_InitLaneMasksCtx ctx;
479         p.append(SkRasterPipelineOp::init_lane_masks, &ctx);
480 
481         // Use the store_src command to write out RGBA for inspection.
482         alignas(64) int32_t RGBA[4 * SkRasterPipeline_kMaxStride_highp] = {};
483         p.append(SkRasterPipelineOp::store_src, RGBA);
484 
485         // Execute our program.
486         p.run(0,0,width,1);
487 
488         // Initialized data should look like on/on/on/on (RGBA are all set) and is
489         // striped by the raster pipeline stride because we wrote it using store_src.
490         size_t index = 0;
491         int32_t* channelR = RGBA;
492         int32_t* channelG = channelR + SkOpts::raster_pipeline_highp_stride;
493         int32_t* channelB = channelG + SkOpts::raster_pipeline_highp_stride;
494         int32_t* channelA = channelB + SkOpts::raster_pipeline_highp_stride;
495         for (; index < width; ++index) {
496             REPORTER_ASSERT(reporter, *channelR++ == ~0);
497             REPORTER_ASSERT(reporter, *channelG++ == ~0);
498             REPORTER_ASSERT(reporter, *channelB++ == ~0);
499             REPORTER_ASSERT(reporter, *channelA++ == ~0);
500         }
501 
502         // The rest of the output array should be untouched (all zero).
503         for (; index < SkOpts::raster_pipeline_highp_stride; ++index) {
504             REPORTER_ASSERT(reporter, *channelR++ == 0);
505             REPORTER_ASSERT(reporter, *channelG++ == 0);
506             REPORTER_ASSERT(reporter, *channelB++ == 0);
507             REPORTER_ASSERT(reporter, *channelA++ == 0);
508         }
509     }
510 }
511 
512 // This is the bit pattern of the "largest" signaling NaN. The next integer is a quiet NaN.
513 // We use this as the starting point for various memory-shuffling tests below, to ensure that our
514 // code doesn't interpret values as float when they might be integral. Using floats can cause
515 // signaling NaN values to change (becoming quiet), even with the most innocuous operations
516 // (particularly on 32-bit x86, where floats are often passed around in the x87 FPU).
517 static constexpr int kLastSignalingNaN    = 0x7fbfffff;
518 
519 // Similarly, this is the "smallest" (in magnitude) negative signaling NaN. The next integer is
520 // a quiet negative NaN. Only used when testing operations that need two distinct integer sequences
521 // as input, and the logic is asymmetric enough that we want NaNs fed into both sides.
522 static constexpr int kLastSignalingNegNaN = 0xffbfffff;
523 
DEF_TEST(SkRasterPipeline_CopyFromIndirectUnmasked,r)524 DEF_TEST(SkRasterPipeline_CopyFromIndirectUnmasked, r) {
525     // Allocate space for 5 source slots, and 5 dest slots.
526     alignas(64) int src[5 * SkRasterPipeline_kMaxStride_highp];
527     alignas(64) int dst[5 * SkRasterPipeline_kMaxStride_highp];
528 
529     // Test with various mixes of indirect offsets.
530     static_assert(SkRasterPipeline_kMaxStride_highp == 16);
531     alignas(64) const uint32_t kOffsets1[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
532     alignas(64) const uint32_t kOffsets2[16] = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2};
533     alignas(64) const uint32_t kOffsets3[16] = {0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2};
534     alignas(64) const uint32_t kOffsets4[16] = {99, 99, 0, 0, 99, 99, 0, 0,
535                                                 99, 99, 0, 0, 99, 99, 0, 0};
536 
537     const int N = SkOpts::raster_pipeline_highp_stride;
538 
539     for (const uint32_t* offsets : {kOffsets1, kOffsets2, kOffsets3, kOffsets4}) {
540         for (int copySize = 1; copySize <= 5; ++copySize) {
541             // Initialize the destination slots to 0,1,2.. and the source slots to various NaNs
542             std::iota(&dst[0], &dst[5 * N], 0);
543             std::iota(&src[0], &src[5 * N], kLastSignalingNaN);
544 
545             // Run `copy_from_indirect_unmasked` over our data.
546             SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
547             SkRasterPipeline p(&alloc);
548             auto* ctx = alloc.make<SkRasterPipeline_CopyIndirectCtx>();
549             ctx->dst = &dst[0];
550             ctx->src = &src[0];
551             ctx->indirectOffset = offsets;
552             ctx->indirectLimit = 5 - copySize;
553             ctx->slots = copySize;
554 
555             p.append(SkRasterPipelineOp::copy_from_indirect_unmasked, ctx);
556             p.run(0,0,N,1);
557 
558             // If the offset plus copy-size would overflow the source data, the results don't
559             // matter; indexing off the end of the buffer is UB, and we don't make any promises
560             // about the values you get. If we didn't crash, that's success. (In practice, we
561             // will have clamped the source pointer so that we don't read past the end.)
562             int maxOffset = *std::max_element(offsets, offsets + N);
563             if (copySize + maxOffset > 5) {
564                 continue;
565             }
566 
567             // Verify that the destination has been overwritten in the mask-on fields, and has
568             // not been overwritten in the mask-off fields, for each destination slot.
569             int expectedUnchanged = 0;
570             int expectedFromZero = src[0 * N], expectedFromTwo = src[2 * N];
571             int* destPtr = dst;
572             for (int checkSlot = 0; checkSlot < 5; ++checkSlot) {
573                 for (int checkLane = 0; checkLane < N; ++checkLane) {
574                     if (checkSlot < copySize) {
575                         if (offsets[checkLane] == 0) {
576                             REPORTER_ASSERT(r, *destPtr == expectedFromZero);
577                         } else if (offsets[checkLane] == 2) {
578                             REPORTER_ASSERT(r, *destPtr == expectedFromTwo);
579                         } else {
580                             ERRORF(r, "unexpected offset value");
581                         }
582                     } else {
583                         REPORTER_ASSERT(r, *destPtr == expectedUnchanged);
584                     }
585 
586                     ++destPtr;
587                     expectedUnchanged += 1;
588                     expectedFromZero += 1;
589                     expectedFromTwo += 1;
590                 }
591             }
592         }
593     }
594 }
595 
DEF_TEST(SkRasterPipeline_CopyFromIndirectUniformUnmasked,r)596 DEF_TEST(SkRasterPipeline_CopyFromIndirectUniformUnmasked, r) {
597     // Allocate space for 5 source uniform values, and 5 dest slots.
598     // (Note that unlike slots, uniforms don't use multiple lanes per value.)
599     alignas(64) int src[5];
600     alignas(64) int dst[5 * SkRasterPipeline_kMaxStride_highp];
601 
602     // Test with various mixes of indirect offsets.
603     static_assert(SkRasterPipeline_kMaxStride_highp == 16);
604     alignas(64) const uint32_t kOffsets1[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
605     alignas(64) const uint32_t kOffsets2[16] = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2};
606     alignas(64) const uint32_t kOffsets3[16] = {0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2};
607     alignas(64) const uint32_t kOffsets4[16] = {99, ~99u, 0, 0, ~99u, 99, 0, 0,
608                                                 99, ~99u, 0, 0, ~99u, 99, 0, 0};
609 
610     const int N = SkOpts::raster_pipeline_highp_stride;
611 
612     for (const uint32_t* offsets : {kOffsets1, kOffsets2, kOffsets3, kOffsets4}) {
613         for (int copySize = 1; copySize <= 5; ++copySize) {
614             // Initialize the destination slots to 0,1,2.. and the source uniforms to various NaNs
615             std::iota(&dst[0], &dst[5 * N], 0);
616             std::iota(&src[0], &src[5], kLastSignalingNaN);
617 
618             // Run `copy_from_indirect_unmasked` over our data.
619             SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
620             SkRasterPipeline p(&alloc);
621             auto* ctx = alloc.make<SkRasterPipeline_CopyIndirectCtx>();
622             ctx->dst = &dst[0];
623             ctx->src = &src[0];
624             ctx->indirectOffset = offsets;
625             ctx->indirectLimit = 5 - copySize;
626             ctx->slots = copySize;
627 
628             p.append(SkRasterPipelineOp::copy_from_indirect_uniform_unmasked, ctx);
629             p.run(0,0,N,1);
630 
631             // If the offset plus copy-size would overflow the source data, the results don't
632             // matter; indexing off the end of the buffer is UB, and we don't make any promises
633             // about the values you get. If we didn't crash, that's success. (In practice, we
634             // will have clamped the source pointer so that we don't read past the end.)
635             uint32_t maxOffset = *std::max_element(offsets, offsets + N);
636             if (copySize + maxOffset > 5) {
637                 continue;
638             }
639 
640             // Verify that the destination has been overwritten in each slot.
641             int expectedUnchanged = 0;
642             int expectedFromZero = src[0], expectedFromTwo = src[2];
643             int* destPtr = dst;
644             for (int checkSlot = 0; checkSlot < 5; ++checkSlot) {
645                 for (int checkLane = 0; checkLane < N; ++checkLane) {
646                     if (checkSlot < copySize) {
647                         if (offsets[checkLane] == 0) {
648                             REPORTER_ASSERT(r, *destPtr == expectedFromZero);
649                         } else if (offsets[checkLane] == 2) {
650                             REPORTER_ASSERT(r, *destPtr == expectedFromTwo);
651                         } else {
652                             ERRORF(r, "unexpected offset value");
653                         }
654                     } else {
655                         REPORTER_ASSERT(r, *destPtr == expectedUnchanged);
656                     }
657 
658                     ++destPtr;
659                     expectedUnchanged += 1;
660                 }
661                 expectedFromZero += 1;
662                 expectedFromTwo += 1;
663             }
664         }
665     }
666 }
667 
DEF_TEST(SkRasterPipeline_CopyToIndirectMasked,r)668 DEF_TEST(SkRasterPipeline_CopyToIndirectMasked, r) {
669     // Allocate space for 5 source slots, and 5 dest slots.
670     alignas(64) int src[5 * SkRasterPipeline_kMaxStride_highp];
671     alignas(64) int dst[5 * SkRasterPipeline_kMaxStride_highp];
672 
673     // Test with various mixes of indirect offsets.
674     static_assert(SkRasterPipeline_kMaxStride_highp == 16);
675     alignas(64) const uint32_t kOffsets1[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
676     alignas(64) const uint32_t kOffsets2[16] = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2};
677     alignas(64) const uint32_t kOffsets3[16] = {0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2};
678     alignas(64) const uint32_t kOffsets4[16] = {99, ~99u, 0, 0, ~99u, 99, 0, 0,
679                                                 99, ~99u, 0, 0, ~99u, 99, 0, 0};
680 
681     // Test with various masks.
682     alignas(64) const int32_t kMask1[16]  = {~0, ~0, ~0, ~0, ~0,  0, ~0, ~0,
683                                              ~0, ~0, ~0, ~0, ~0,  0, ~0, ~0};
684     alignas(64) const int32_t kMask2[16]  = {~0,  0, ~0, ~0,  0,  0,  0, ~0,
685                                              ~0,  0, ~0, ~0,  0,  0,  0, ~0};
686     alignas(64) const int32_t kMask3[16]  = {~0, ~0,  0, ~0,  0,  0, ~0, ~0,
687                                              ~0, ~0,  0, ~0,  0,  0, ~0, ~0};
688     alignas(64) const int32_t kMask4[16]  = { 0,  0,  0,  0,  0,  0,  0,  0,
689                                               0,  0,  0,  0,  0,  0,  0,  0};
690 
691     const int N = SkOpts::raster_pipeline_highp_stride;
692 
693     for (const int32_t* mask : {kMask1, kMask2, kMask3, kMask4}) {
694         for (const uint32_t* offsets : {kOffsets1, kOffsets2, kOffsets3, kOffsets4}) {
695             for (int copySize = 1; copySize <= 5; ++copySize) {
696                 // Initialize the destination slots to 0,1,2.. and the source slots to various NaNs
697                 std::iota(&dst[0], &dst[5 * N], 0);
698                 std::iota(&src[0], &src[5 * N], kLastSignalingNaN);
699 
700                 // Run `copy_to_indirect_masked` over our data.
701                 SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
702                 SkRasterPipeline p(&alloc);
703                 auto* ctx = alloc.make<SkRasterPipeline_CopyIndirectCtx>();
704                 ctx->dst = &dst[0];
705                 ctx->src = &src[0];
706                 ctx->indirectOffset = offsets;
707                 ctx->indirectLimit = 5 - copySize;
708                 ctx->slots = copySize;
709 
710                 SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx;
711                 p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx);
712                 p.append(SkRasterPipelineOp::load_condition_mask, mask);
713                 p.append(SkRasterPipelineOp::copy_to_indirect_masked, ctx);
714                 p.run(0,0,N,1);
715 
716                 // If the offset plus copy-size would overflow the destination, the results don't
717                 // matter; indexing off the end of the buffer is UB, and we don't make any promises
718                 // about the values you get. If we didn't crash, that's success. (In practice, we
719                 // will have clamped the destination pointer so that we don't read past the end.)
720                 uint32_t maxOffset = *std::max_element(offsets, offsets + N);
721                 if (copySize + maxOffset > 5) {
722                     continue;
723                 }
724 
725                 // Verify that the destination has been overwritten in the mask-on fields, and has
726                 // not been overwritten in the mask-off fields, for each destination slot.
727                 int expectedUnchanged = 0;
728                 int expectedFromZero = src[0], expectedFromTwo = src[0] - (2 * N);
729                 int* destPtr = dst;
730                 int pos = 0;
731                 for (int checkSlot = 0; checkSlot < 5; ++checkSlot) {
732                     for (int checkLane = 0; checkLane < N; ++checkLane) {
733                         int rangeStart = offsets[checkLane] * N;
734                         int rangeEnd   = (offsets[checkLane] + copySize) * N;
735                         if (mask[checkLane] && pos >= rangeStart && pos < rangeEnd) {
736                             if (offsets[checkLane] == 0) {
737                                 REPORTER_ASSERT(r, *destPtr == expectedFromZero);
738                             } else if (offsets[checkLane] == 2) {
739                                 REPORTER_ASSERT(r, *destPtr == expectedFromTwo);
740                             } else {
741                                 ERRORF(r, "unexpected offset value");
742                             }
743                         } else {
744                             REPORTER_ASSERT(r, *destPtr == expectedUnchanged);
745                         }
746 
747                         ++pos;
748                         ++destPtr;
749                         expectedUnchanged += 1;
750                         expectedFromZero += 1;
751                         expectedFromTwo += 1;
752                     }
753                 }
754             }
755         }
756     }
757 }
758 
DEF_TEST(SkRasterPipeline_SwizzleCopyToIndirectMasked,r)759 DEF_TEST(SkRasterPipeline_SwizzleCopyToIndirectMasked, r) {
760     // Allocate space for 5 source slots, and 5 dest slots.
761     alignas(64) int src[5 * SkRasterPipeline_kMaxStride_highp];
762     alignas(64) int dst[5 * SkRasterPipeline_kMaxStride_highp];
763 
764     // Test with various mixes of indirect offsets.
765     static_assert(SkRasterPipeline_kMaxStride_highp == 16);
766     alignas(64) const uint32_t kOffsets1[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
767     alignas(64) const uint32_t kOffsets2[16] = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2};
768     alignas(64) const uint32_t kOffsets3[16] = {0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2};
769     alignas(64) const uint32_t kOffsets4[16] = {99, ~99u, 0, 0, ~99u, 99, 0, 0,
770                                                 99, ~99u, 0, 0, ~99u, 99, 0, 0};
771 
772     // Test with various masks.
773     alignas(64) const int32_t kMask1[16]  = {~0, ~0, ~0, ~0, ~0,  0, ~0, ~0,
774                                              ~0, ~0, ~0, ~0, ~0,  0, ~0, ~0};
775     alignas(64) const int32_t kMask2[16]  = {~0,  0, ~0, ~0,  0,  0,  0, ~0,
776                                              ~0,  0, ~0, ~0,  0,  0,  0, ~0};
777     alignas(64) const int32_t kMask3[16]  = {~0, ~0,  0, ~0,  0,  0, ~0, ~0,
778                                              ~0, ~0,  0, ~0,  0,  0, ~0, ~0};
779     alignas(64) const int32_t kMask4[16]  = { 0,  0,  0,  0,  0,  0,  0,  0,
780                                               0,  0,  0,  0,  0,  0,  0,  0};
781 
782     // Test with various swizzle permutations.
783     struct TestPattern {
784         int swizzleSize;
785         int swizzleUpperBound;
786         uint16_t swizzle[4];
787     };
788 
789     static const TestPattern kPatterns[] = {
790         {1, 4, {3}},          // v.w    = (1)
791         {2, 2, {1, 0}},       // v.yx   = (1,2)
792         {3, 3, {2, 1, 0}},    // v.zyx  = (1,2,3)
793         {4, 4, {3, 0, 1, 2}}, // v.wxyz = (1,2,3,4)
794     };
795 
796     enum Result {
797         kOutOfBounds = 0,
798         kUnchanged = 1,
799         S0 = 2,
800         S1 = 3,
801         S2 = 4,
802         S3 = 5,
803         S4 = 6,
804     };
805 
806 #define __ kUnchanged
807 #define XX kOutOfBounds
808     static const Result kExpectationsAtZero[4][5] = {
809     //  d[0].w = 1        d[0].yx = (1,2)   d[0].zyx = (1,2,3) d[0].wxyz = (1,2,3,4)
810         {__,__,__,S0,__}, {S1,S0,__,__,__}, {S2,S1,S0,__,__},  {S1,S2,S3,S0,__},
811     };
812     static const Result kExpectationsAtTwo[4][5] = {
813     //  d[2].w = 1        d[2].yx = (1,2)   d[2].zyx = (1,2,3) d[2].wxyz = (1,2,3,4)
814         {XX,XX,XX,XX,XX}, {__,__,S1,S0,__}, {__,__,S2,S1,S0},  {XX,XX,XX,XX,XX},
815     };
816 #undef __
817 #undef XX
818 
819     const int N = SkOpts::raster_pipeline_highp_stride;
820 
821     for (const int32_t* mask : {kMask1, kMask2, kMask3, kMask4}) {
822         for (const uint32_t* offsets : {kOffsets1, kOffsets2, kOffsets3, kOffsets4}) {
823             for (size_t patternIndex = 0; patternIndex < std::size(kPatterns); ++patternIndex) {
824                 const TestPattern& pattern = kPatterns[patternIndex];
825 
826                 // Initialize the destination slots to 0,1,2.. and the source slots to various NaNs
827                 std::iota(&dst[0], &dst[5 * N], 0);
828                 std::iota(&src[0], &src[5 * N], kLastSignalingNaN);
829 
830                 // Run `swizzle_copy_to_indirect_masked` over our data.
831                 SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
832                 SkRasterPipeline p(&alloc);
833                 auto* ctx = alloc.make<SkRasterPipeline_SwizzleCopyIndirectCtx>();
834                 ctx->dst = &dst[0];
835                 ctx->src = &src[0];
836                 ctx->indirectOffset = offsets;
837                 ctx->indirectLimit = 5 - pattern.swizzleUpperBound;
838                 ctx->slots = pattern.swizzleSize;
839                 ctx->offsets[0] = pattern.swizzle[0] * N * sizeof(float);
840                 ctx->offsets[1] = pattern.swizzle[1] * N * sizeof(float);
841                 ctx->offsets[2] = pattern.swizzle[2] * N * sizeof(float);
842                 ctx->offsets[3] = pattern.swizzle[3] * N * sizeof(float);
843 
844                 SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx;
845                 p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx);
846                 p.append(SkRasterPipelineOp::load_condition_mask, mask);
847                 p.append(SkRasterPipelineOp::swizzle_copy_to_indirect_masked, ctx);
848                 p.run(0,0,N,1);
849 
850                 // If the offset plus copy-size would overflow the destination, the results don't
851                 // matter; indexing off the end of the buffer is UB, and we don't make any promises
852                 // about the values you get. If we didn't crash, that's success. (In practice, we
853                 // will have clamped the destination pointer so that we don't read past the end.)
854                 uint32_t maxOffset = *std::max_element(offsets, offsets + N);
855                 if (pattern.swizzleUpperBound + maxOffset > 5) {
856                     continue;
857                 }
858 
859                 // Verify that the destination has been overwritten in the mask-on fields, and has
860                 // not been overwritten in the mask-off fields, for each destination slot.
861                 int expectedUnchanged = 0;
862                 int* destPtr = dst;
863                 for (int checkSlot = 0; checkSlot < 5; ++checkSlot) {
864                     for (int checkLane = 0; checkLane < N; ++checkLane) {
865                         Result expectedType = kUnchanged;
866                         if (offsets[checkLane] == 0) {
867                             expectedType = kExpectationsAtZero[patternIndex][checkSlot];
868                         } else if (offsets[checkLane] == 2) {
869                             expectedType = kExpectationsAtTwo[patternIndex][checkSlot];
870                         }
871                         if (!mask[checkLane]) {
872                             expectedType = kUnchanged;
873                         }
874                         switch (expectedType) {
875                             case kOutOfBounds: // out of bounds; ignore result
876                                 break;
877                             case kUnchanged:
878                                 REPORTER_ASSERT(r, *destPtr == expectedUnchanged);
879                                 break;
880                             case S0: // destination should match source 0
881                                 REPORTER_ASSERT(r, *destPtr == src[0*N + checkLane]);
882                                 break;
883                             case S1: // destination should match source 1
884                                 REPORTER_ASSERT(r, *destPtr == src[1*N + checkLane]);
885                                 break;
886                             case S2: // destination should match source 2
887                                 REPORTER_ASSERT(r, *destPtr == src[2*N + checkLane]);
888                                 break;
889                             case S3: // destination should match source 3
890                                 REPORTER_ASSERT(r, *destPtr == src[3*N + checkLane]);
891                                 break;
892                             case S4: // destination should match source 4
893                                 REPORTER_ASSERT(r, *destPtr == src[4*N + checkLane]);
894                                 break;
895                         }
896 
897                         ++destPtr;
898                         expectedUnchanged += 1;
899                     }
900                 }
901             }
902         }
903     }
904 }
905 
DEF_TEST(SkRasterPipeline_TraceVar,r)906 DEF_TEST(SkRasterPipeline_TraceVar, r) {
907     const int N = SkOpts::raster_pipeline_highp_stride;
908 
909     class TestTraceHook : public SkSL::TraceHook {
910     public:
911         void line(int) override                  { fBuffer.push_back(-9999999); }
912         void enter(int) override                 { fBuffer.push_back(-9999999); }
913         void exit(int) override                  { fBuffer.push_back(-9999999); }
914         void scope(int) override                 { fBuffer.push_back(-9999999); }
915         void var(int slot, int32_t val) override {
916             fBuffer.push_back(slot);
917             fBuffer.push_back(val);
918         }
919 
920         TArray<int> fBuffer;
921     };
922 
923     static_assert(SkRasterPipeline_kMaxStride_highp == 16);
924     alignas(64) static constexpr int32_t  kMaskOn   [16] = {~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0,
925                                                             ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0};
926     alignas(64) static constexpr int32_t  kMaskOff  [16] = { 0,  0,  0,  0,  0,  0,  0,  0,
927                                                              0,  0,  0,  0,  0,  0,  0,  0};
928     alignas(64) static constexpr uint32_t kIndirect0[16] = { 0,  0,  0,  0,  0,  0,  0,  0,
929                                                              0,  0,  0,  0,  0,  0,  0,  0};
930     alignas(64) static constexpr uint32_t kIndirect1[16] = { 1,  1,  1,  1,  1,  1,  1,  1,
931                                                              1,  1,  1,  1,  1,  1,  1,  1};
932     alignas(64) int32_t kData333[16];
933     alignas(64) int32_t kData555[16];
934     alignas(64) int32_t kData666[16];
935     alignas(64) int32_t kData777[32];
936     alignas(64) int32_t kData999[32];
937     std::fill(kData333,     kData333 + N,   333);
938     std::fill(kData555,     kData555 + N,   555);
939     std::fill(kData666,     kData666 + N,   666);
940     std::fill(kData777,     kData777 + N,   777);
941     std::fill(kData777 + N, kData777 + 2*N, 707);
942     std::fill(kData999,     kData999 + N,   999);
943     std::fill(kData999 + N, kData999 + 2*N, 909);
944 
945     TestTraceHook trace;
946     SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
947     SkRasterPipeline p(&alloc);
948     SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx;
949     p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx);
950     const SkRasterPipeline_TraceVarCtx kTraceVar1 = {/*traceMask=*/kMaskOff,
951                                                      &trace, 2, 1, kData333,
952                                                      /*indirectOffset=*/nullptr,
953                                                      /*indirectLimit=*/0};
954     const SkRasterPipeline_TraceVarCtx kTraceVar2 = {/*traceMask=*/kMaskOn,
955                                                      &trace, 4, 1, kData555,
956                                                      /*indirectOffset=*/nullptr,
957                                                      /*indirectLimit=*/0};
958     const SkRasterPipeline_TraceVarCtx kTraceVar3 = {/*traceMask=*/kMaskOff,
959                                                      &trace, 5, 1, kData666,
960                                                      /*indirectOffset=*/nullptr,
961                                                      /*indirectLimit=*/0};
962     const SkRasterPipeline_TraceVarCtx kTraceVar4 = {/*traceMask=*/kMaskOn,
963                                                      &trace, 6, 2, kData777,
964                                                      /*indirectOffset=*/nullptr,
965                                                      /*indirectLimit=*/0};
966     const SkRasterPipeline_TraceVarCtx kTraceVar5 = {/*traceMask=*/kMaskOn,
967                                                      &trace, 8, 2, kData999,
968                                                      /*indirectOffset=*/nullptr,
969                                                      /*indirectLimit=*/0};
970     const SkRasterPipeline_TraceVarCtx kTraceVar6 = {/*traceMask=*/kMaskOn,
971                                                      &trace, 9, 1, kData999,
972                                                      /*indirectOffset=*/kIndirect0,
973                                                      /*indirectLimit=*/1};
974     const SkRasterPipeline_TraceVarCtx kTraceVar7 = {/*traceMask=*/kMaskOn,
975                                                      &trace, 9, 1, kData999,
976                                                      /*indirectOffset=*/kIndirect1,
977                                                      /*indirectLimit=*/1};
978 
979     p.append(SkRasterPipelineOp::load_condition_mask, kMaskOn);
980     p.append(SkRasterPipelineOp::trace_var, &kTraceVar1);
981     p.append(SkRasterPipelineOp::load_condition_mask, kMaskOn);
982     p.append(SkRasterPipelineOp::trace_var, &kTraceVar2);
983     p.append(SkRasterPipelineOp::load_condition_mask, kMaskOff);
984     p.append(SkRasterPipelineOp::trace_var, &kTraceVar3);
985     p.append(SkRasterPipelineOp::load_condition_mask, kMaskOn);
986     p.append(SkRasterPipelineOp::trace_var, &kTraceVar4);
987     p.append(SkRasterPipelineOp::load_condition_mask, kMaskOff);
988     p.append(SkRasterPipelineOp::trace_var, &kTraceVar5);
989     p.append(SkRasterPipelineOp::load_condition_mask, kMaskOn);
990     p.append(SkRasterPipelineOp::trace_var, &kTraceVar6);
991     p.append(SkRasterPipelineOp::load_condition_mask, kMaskOn);
992     p.append(SkRasterPipelineOp::trace_var, &kTraceVar7);
993     p.run(0,0,N,1);
994 
995     REPORTER_ASSERT(r, (trace.fBuffer == TArray<int>{4, 555, 6, 777, 7, 707, 9, 999, 10, 909}));
996 }
997 
DEF_TEST(SkRasterPipeline_TraceLine,r)998 DEF_TEST(SkRasterPipeline_TraceLine, r) {
999     const int N = SkOpts::raster_pipeline_highp_stride;
1000 
1001     class TestTraceHook : public SkSL::TraceHook {
1002     public:
1003         void var(int, int32_t) override { fBuffer.push_back(-9999999); }
1004         void enter(int) override        { fBuffer.push_back(-9999999); }
1005         void exit(int) override         { fBuffer.push_back(-9999999); }
1006         void scope(int) override        { fBuffer.push_back(-9999999); }
1007         void line(int lineNum) override { fBuffer.push_back(lineNum); }
1008 
1009         TArray<int> fBuffer;
1010     };
1011 
1012     static_assert(SkRasterPipeline_kMaxStride_highp == 16);
1013     alignas(64) static constexpr int32_t kMaskOn [16] = {~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0,
1014                                                          ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0};
1015     alignas(64) static constexpr int32_t kMaskOff[16] = { 0,  0,  0,  0,  0,  0,  0,  0,
1016                                                           0,  0,  0,  0,  0,  0,  0,  0};
1017 
1018     TestTraceHook trace;
1019     SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1020     SkRasterPipeline p(&alloc);
1021     SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx;
1022     p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx);
1023     const SkRasterPipeline_TraceLineCtx kTraceLine1 = {/*traceMask=*/kMaskOn,  &trace, 123};
1024     const SkRasterPipeline_TraceLineCtx kTraceLine2 = {/*traceMask=*/kMaskOff, &trace, 456};
1025     const SkRasterPipeline_TraceLineCtx kTraceLine3 = {/*traceMask=*/kMaskOn,  &trace, 567};
1026     const SkRasterPipeline_TraceLineCtx kTraceLine4 = {/*traceMask=*/kMaskOff, &trace, 678};
1027     const SkRasterPipeline_TraceLineCtx kTraceLine5 = {/*traceMask=*/kMaskOn,  &trace, 789};
1028 
1029     p.append(SkRasterPipelineOp::load_condition_mask, kMaskOn);
1030     p.append(SkRasterPipelineOp::trace_line, &kTraceLine1);
1031     p.append(SkRasterPipelineOp::load_condition_mask, kMaskOn);
1032     p.append(SkRasterPipelineOp::trace_line, &kTraceLine2);
1033     p.append(SkRasterPipelineOp::load_condition_mask, kMaskOff);
1034     p.append(SkRasterPipelineOp::trace_line, &kTraceLine3);
1035     p.append(SkRasterPipelineOp::load_condition_mask, kMaskOff);
1036     p.append(SkRasterPipelineOp::trace_line, &kTraceLine4);
1037     p.append(SkRasterPipelineOp::load_condition_mask, kMaskOn);
1038     p.append(SkRasterPipelineOp::trace_line, &kTraceLine5);
1039     p.run(0,0,N,1);
1040 
1041     REPORTER_ASSERT(r, (trace.fBuffer == TArray<int>{123, 789}));
1042 }
1043 
DEF_TEST(SkRasterPipeline_TraceEnterExit,r)1044 DEF_TEST(SkRasterPipeline_TraceEnterExit, r) {
1045     const int N = SkOpts::raster_pipeline_highp_stride;
1046 
1047     class TestTraceHook : public SkSL::TraceHook {
1048     public:
1049         void line(int) override         { fBuffer.push_back(-9999999); }
1050         void var(int, int32_t) override { fBuffer.push_back(-9999999); }
1051         void scope(int) override        { fBuffer.push_back(-9999999); }
1052         void enter(int fnIdx) override  {
1053             fBuffer.push_back(fnIdx);
1054             fBuffer.push_back(1);
1055         }
1056         void exit(int fnIdx) override {
1057             fBuffer.push_back(fnIdx);
1058             fBuffer.push_back(0);
1059         }
1060 
1061         TArray<int> fBuffer;
1062     };
1063 
1064     static_assert(SkRasterPipeline_kMaxStride_highp == 16);
1065     alignas(64) static constexpr int32_t kMaskOn [16] = {~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0,
1066                                                          ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0};
1067     alignas(64) static constexpr int32_t kMaskOff[16] = { 0,  0,  0,  0,  0,  0,  0,  0,
1068                                                           0,  0,  0,  0,  0,  0,  0,  0};
1069 
1070     TestTraceHook trace;
1071     SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1072     SkRasterPipeline p(&alloc);
1073     SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx;
1074     p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx);
1075     const SkRasterPipeline_TraceFuncCtx kTraceFunc1 = {/*traceMask=*/kMaskOff, &trace, 99};
1076     const SkRasterPipeline_TraceFuncCtx kTraceFunc2 = {/*traceMask=*/kMaskOn,  &trace, 12};
1077     const SkRasterPipeline_TraceFuncCtx kTraceFunc3 = {/*traceMask=*/kMaskOff, &trace, 34};
1078     const SkRasterPipeline_TraceFuncCtx kTraceFunc4 = {/*traceMask=*/kMaskOn,  &trace, 56};
1079     const SkRasterPipeline_TraceFuncCtx kTraceFunc5 = {/*traceMask=*/kMaskOn,  &trace, 78};
1080     const SkRasterPipeline_TraceFuncCtx kTraceFunc6 = {/*traceMask=*/kMaskOff, &trace, 90};
1081 
1082     p.append(SkRasterPipelineOp::load_condition_mask, kMaskOff);
1083     p.append(SkRasterPipelineOp::trace_enter, &kTraceFunc1);
1084     p.append(SkRasterPipelineOp::load_condition_mask, kMaskOn);
1085     p.append(SkRasterPipelineOp::trace_enter, &kTraceFunc2);
1086     p.append(SkRasterPipelineOp::trace_enter, &kTraceFunc3);
1087     p.append(SkRasterPipelineOp::trace_exit, &kTraceFunc4);
1088     p.append(SkRasterPipelineOp::load_condition_mask, kMaskOff);
1089     p.append(SkRasterPipelineOp::trace_exit, &kTraceFunc5);
1090     p.append(SkRasterPipelineOp::trace_exit, &kTraceFunc6);
1091     p.run(0,0,N,1);
1092 
1093     REPORTER_ASSERT(r, (trace.fBuffer == TArray<int>{12, 1, 56, 0}));
1094 }
1095 
DEF_TEST(SkRasterPipeline_TraceScope,r)1096 DEF_TEST(SkRasterPipeline_TraceScope, r) {
1097     const int N = SkOpts::raster_pipeline_highp_stride;
1098 
1099     class TestTraceHook : public SkSL::TraceHook {
1100     public:
1101         void line(int) override         { fBuffer.push_back(-9999999); }
1102         void var(int, int32_t) override { fBuffer.push_back(-9999999); }
1103         void enter(int) override        { fBuffer.push_back(-9999999); }
1104         void exit(int) override         { fBuffer.push_back(-9999999); }
1105         void scope(int delta) override  { fBuffer.push_back(delta); }
1106 
1107         TArray<int> fBuffer;
1108     };
1109 
1110     static_assert(SkRasterPipeline_kMaxStride_highp == 16);
1111     alignas(64) static constexpr int32_t kMaskOn [16] = {~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0,
1112                                                          ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0};
1113     alignas(64) static constexpr int32_t kMaskOff[16] = { 0,  0,  0,  0,  0,  0,  0,  0,
1114                                                           0,  0,  0,  0,  0,  0,  0,  0};
1115 
1116     TestTraceHook trace;
1117     SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1118     SkRasterPipeline p(&alloc);
1119     SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx;
1120     p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx);
1121     const SkRasterPipeline_TraceScopeCtx kTraceScope1  = {/*traceMask=*/kMaskOn,  &trace, +1};
1122     const SkRasterPipeline_TraceScopeCtx kTraceScope2  = {/*traceMask=*/kMaskOff, &trace, -2};
1123     const SkRasterPipeline_TraceScopeCtx kTraceScope3  = {/*traceMask=*/kMaskOff, &trace, +3};
1124     const SkRasterPipeline_TraceScopeCtx kTraceScope4  = {/*traceMask=*/kMaskOn,  &trace, +4};
1125     const SkRasterPipeline_TraceScopeCtx kTraceScope5  = {/*traceMask=*/kMaskOn,  &trace, -5};
1126 
1127     p.append(SkRasterPipelineOp::load_condition_mask, kMaskOn);
1128     p.append(SkRasterPipelineOp::trace_scope, &kTraceScope1);
1129     p.append(SkRasterPipelineOp::trace_scope, &kTraceScope2);
1130     p.append(SkRasterPipelineOp::load_condition_mask, kMaskOff);
1131     p.append(SkRasterPipelineOp::trace_scope, &kTraceScope3);
1132     p.append(SkRasterPipelineOp::trace_scope, &kTraceScope4);
1133     p.append(SkRasterPipelineOp::load_condition_mask, kMaskOn);
1134     p.append(SkRasterPipelineOp::trace_scope, &kTraceScope5);
1135     p.run(0,0,N,1);
1136 
1137     REPORTER_ASSERT(r, (trace.fBuffer == TArray<int>{+1, +4, -5}));
1138 }
1139 
DEF_TEST(SkRasterPipeline_CopySlotsMasked,r)1140 DEF_TEST(SkRasterPipeline_CopySlotsMasked, r) {
1141     // Allocate space for 5 source slots and 5 dest slots.
1142     alignas(64) int slots[10 * SkRasterPipeline_kMaxStride_highp];
1143     const int srcIndex = 0, dstIndex = 5;
1144 
1145     struct CopySlotsOp {
1146         SkRasterPipelineOp stage;
1147         int numSlotsAffected;
1148     };
1149 
1150     static const CopySlotsOp kCopyOps[] = {
1151         {SkRasterPipelineOp::copy_slot_masked,    1},
1152         {SkRasterPipelineOp::copy_2_slots_masked, 2},
1153         {SkRasterPipelineOp::copy_3_slots_masked, 3},
1154         {SkRasterPipelineOp::copy_4_slots_masked, 4},
1155     };
1156 
1157     static_assert(SkRasterPipeline_kMaxStride_highp == 16);
1158     alignas(64) const int32_t kMask1[16] = {~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0,
1159                                             ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0};
1160     alignas(64) const int32_t kMask2[16] = { 0,  0,  0,  0,  0,  0,  0,  0,
1161                                              0,  0,  0,  0,  0,  0,  0,  0};
1162     alignas(64) const int32_t kMask3[16] = {~0,  0, ~0, ~0, ~0, ~0,  0, ~0,
1163                                             ~0,  0, ~0, ~0, ~0, ~0,  0, ~0};
1164     alignas(64) const int32_t kMask4[16] = { 0, ~0,  0,  0,  0, ~0, ~0,  0,
1165                                              0, ~0,  0,  0,  0, ~0, ~0,  0};
1166 
1167     const int N = SkOpts::raster_pipeline_highp_stride;
1168 
1169     for (const CopySlotsOp& op : kCopyOps) {
1170         for (const int32_t* mask : {kMask1, kMask2, kMask3, kMask4}) {
1171             // Initialize the destination slots to 0,1,2.. and the source slots to various NaNs
1172             std::iota(&slots[N * dstIndex],  &slots[N * (dstIndex + 5)], 0);
1173             std::iota(&slots[N * srcIndex],  &slots[N * (srcIndex + 5)], kLastSignalingNaN);
1174 
1175             // Run `copy_slots_masked` over our data.
1176             SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1177             SkRasterPipeline p(&alloc);
1178             SkRasterPipeline_BinaryOpCtx ctx;
1179             ctx.dst = N * dstIndex * sizeof(float);
1180             ctx.src = N * srcIndex * sizeof(float);
1181 
1182             SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx;
1183             p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx);
1184             p.append(SkRasterPipelineOp::set_base_pointer, &slots[0]);
1185             p.append(SkRasterPipelineOp::load_condition_mask, mask);
1186             p.append(op.stage, SkRPCtxUtils::Pack(ctx, &alloc));
1187             p.run(0,0,N,1);
1188 
1189             // Verify that the destination has been overwritten in the mask-on fields, and has not
1190             // been overwritten in the mask-off fields, for each destination slot.
1191             int expectedUnchanged = 0, expectedChanged = kLastSignalingNaN;
1192             int* destPtr = &slots[N * dstIndex];
1193             for (int checkSlot = 0; checkSlot < 5; ++checkSlot) {
1194                 for (int checkMask = 0; checkMask < N; ++checkMask) {
1195                     if (checkSlot < op.numSlotsAffected && mask[checkMask]) {
1196                         REPORTER_ASSERT(r, *destPtr == expectedChanged);
1197                     } else {
1198                         REPORTER_ASSERT(r, *destPtr == expectedUnchanged);
1199                     }
1200 
1201                     ++destPtr;
1202                     expectedUnchanged += 1;
1203                     expectedChanged += 1;
1204                 }
1205             }
1206         }
1207     }
1208 }
1209 
DEF_TEST(SkRasterPipeline_CopySlotsUnmasked,r)1210 DEF_TEST(SkRasterPipeline_CopySlotsUnmasked, r) {
1211     // Allocate space for 5 source slots and 5 dest slots.
1212     alignas(64) int slots[10 * SkRasterPipeline_kMaxStride_highp];
1213     const int srcIndex = 0, dstIndex = 5;
1214     const int N = SkOpts::raster_pipeline_highp_stride;
1215 
1216     struct CopySlotsOp {
1217         SkRasterPipelineOp stage;
1218         int numSlotsAffected;
1219     };
1220 
1221     static const CopySlotsOp kCopyOps[] = {
1222         {SkRasterPipelineOp::copy_slot_unmasked,    1},
1223         {SkRasterPipelineOp::copy_2_slots_unmasked, 2},
1224         {SkRasterPipelineOp::copy_3_slots_unmasked, 3},
1225         {SkRasterPipelineOp::copy_4_slots_unmasked, 4},
1226     };
1227 
1228     for (const CopySlotsOp& op : kCopyOps) {
1229         // Initialize the destination slots to 0,1,2.. and the source slots to various NaNs
1230         std::iota(&slots[N * dstIndex],  &slots[N * (dstIndex + 5)], 0);
1231         std::iota(&slots[N * srcIndex],  &slots[N * (srcIndex + 5)], kLastSignalingNaN);
1232 
1233         // Run `copy_slots_unmasked` over our data.
1234         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1235         SkRasterPipeline p(&alloc);
1236         SkRasterPipeline_BinaryOpCtx ctx;
1237         ctx.dst = N * dstIndex * sizeof(float);
1238         ctx.src = N * srcIndex * sizeof(float);
1239         p.append(SkRasterPipelineOp::set_base_pointer, &slots[0]);
1240         p.append(op.stage, SkRPCtxUtils::Pack(ctx, &alloc));
1241         p.run(0,0,1,1);
1242 
1243         // Verify that the destination has been overwritten in each slot.
1244         int expectedUnchanged = 0, expectedChanged = kLastSignalingNaN;
1245         int* destPtr = &slots[N * dstIndex];
1246         for (int checkSlot = 0; checkSlot < 5; ++checkSlot) {
1247             for (int checkLane = 0; checkLane < N; ++checkLane) {
1248                 if (checkSlot < op.numSlotsAffected) {
1249                     REPORTER_ASSERT(r, *destPtr == expectedChanged);
1250                 } else {
1251                     REPORTER_ASSERT(r, *destPtr == expectedUnchanged);
1252                 }
1253 
1254                 ++destPtr;
1255                 expectedUnchanged += 1;
1256                 expectedChanged += 1;
1257             }
1258         }
1259     }
1260 }
1261 
DEF_TEST(SkRasterPipeline_CopyUniforms,r)1262 DEF_TEST(SkRasterPipeline_CopyUniforms, r) {
1263     // Allocate space for 5 dest slots.
1264     alignas(64) int slots[5 * SkRasterPipeline_kMaxStride_highp];
1265     int uniforms[5];
1266     const int N = SkOpts::raster_pipeline_highp_stride;
1267 
1268     struct CopyUniformsOp {
1269         SkRasterPipelineOp stage;
1270         int numSlotsAffected;
1271     };
1272 
1273     static const CopyUniformsOp kCopyOps[] = {
1274         {SkRasterPipelineOp::copy_uniform,    1},
1275         {SkRasterPipelineOp::copy_2_uniforms, 2},
1276         {SkRasterPipelineOp::copy_3_uniforms, 3},
1277         {SkRasterPipelineOp::copy_4_uniforms, 4},
1278     };
1279 
1280     for (const CopyUniformsOp& op : kCopyOps) {
1281         // Initialize the destination slots to 1,2,3...
1282         std::iota(&slots[0], &slots[5 * N], 1);
1283         // Initialize the uniform buffer to various NaNs
1284         std::iota(&uniforms[0], &uniforms[5], kLastSignalingNaN);
1285 
1286         // Run `copy_n_uniforms` over our data.
1287         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1288         SkRasterPipeline p(&alloc);
1289         auto* ctx = alloc.make<SkRasterPipeline_UniformCtx>();
1290         ctx->dst = slots;
1291         ctx->src = uniforms;
1292         p.append(op.stage, ctx);
1293         p.run(0,0,1,1);
1294 
1295         // Verify that our uniforms have been broadcast into each slot.
1296         int expectedUnchanged = 1;
1297         int expectedChanged = kLastSignalingNaN;
1298         int* destPtr = &slots[0];
1299         for (int checkSlot = 0; checkSlot < 5; ++checkSlot) {
1300             for (int checkLane = 0; checkLane < N; ++checkLane) {
1301                 if (checkSlot < op.numSlotsAffected) {
1302                     REPORTER_ASSERT(r, *destPtr == expectedChanged);
1303                 } else {
1304                     REPORTER_ASSERT(r, *destPtr == expectedUnchanged);
1305                 }
1306 
1307                 ++destPtr;
1308                 expectedUnchanged += 1;
1309             }
1310             expectedChanged += 1;
1311         }
1312     }
1313 }
1314 
DEF_TEST(SkRasterPipeline_CopyConstant,r)1315 DEF_TEST(SkRasterPipeline_CopyConstant, r) {
1316     // Allocate space for 5 dest slots.
1317     alignas(64) int slots[5 * SkRasterPipeline_kMaxStride_highp];
1318     const int N = SkOpts::raster_pipeline_highp_stride;
1319 
1320     for (int index = 0; index < 5; ++index) {
1321         // Initialize the destination slots to 1,2,3...
1322         std::iota(&slots[0], &slots[5 * N], 1);
1323 
1324         // Overwrite one destination slot with a constant (some NaN based on slot number).
1325         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1326         SkRasterPipeline p(&alloc);
1327         SkRasterPipeline_ConstantCtx ctx;
1328         ctx.dst = N * index * sizeof(float);
1329         ctx.value = kLastSignalingNaN + index;
1330         p.append(SkRasterPipelineOp::set_base_pointer, &slots[0]);
1331         p.append(SkRasterPipelineOp::copy_constant, SkRPCtxUtils::Pack(ctx, &alloc));
1332         p.run(0,0,1,1);
1333 
1334         // Verify that our constant value has been broadcast into exactly one slot.
1335         int expectedUnchanged = 1;
1336         int* destPtr = &slots[0];
1337         for (int checkSlot = 0; checkSlot < 5; ++checkSlot) {
1338             for (int checkLane = 0; checkLane < N; ++checkLane) {
1339                 if (checkSlot == index) {
1340                     REPORTER_ASSERT(r, *destPtr == ctx.value);
1341                 } else {
1342                     REPORTER_ASSERT(r, *destPtr == expectedUnchanged);
1343                 }
1344 
1345                 ++destPtr;
1346                 expectedUnchanged += 1;
1347             }
1348         }
1349     }
1350 }
1351 
DEF_TEST(SkRasterPipeline_Swizzle,r)1352 DEF_TEST(SkRasterPipeline_Swizzle, r) {
1353     // Allocate space for 4 dest slots.
1354     alignas(64) int slots[4 * SkRasterPipeline_kMaxStride_highp];
1355     const int N = SkOpts::raster_pipeline_highp_stride;
1356 
1357     struct TestPattern {
1358         SkRasterPipelineOp stage;
1359         uint8_t swizzle[4];
1360         uint8_t expectation[4];
1361     };
1362     static const TestPattern kPatterns[] = {
1363         {SkRasterPipelineOp::swizzle_1, {3},          {3, 1, 2, 3}}, // (1,2,3,4).w    = (4)
1364         {SkRasterPipelineOp::swizzle_2, {1, 0},       {1, 0, 2, 3}}, // (1,2,3,4).yx   = (2,1)
1365         {SkRasterPipelineOp::swizzle_3, {2, 2, 2},    {2, 2, 2, 3}}, // (1,2,3,4).zzz  = (3,3,3)
1366         {SkRasterPipelineOp::swizzle_4, {0, 0, 1, 2}, {0, 0, 1, 2}}, // (1,2,3,4).xxyz = (1,1,2,3)
1367     };
1368     static_assert(sizeof(TestPattern::swizzle) == sizeof(SkRasterPipeline_SwizzleCtx::offsets));
1369 
1370     for (const TestPattern& pattern : kPatterns) {
1371         // Initialize the destination slots to various NaNs
1372         std::iota(&slots[0], &slots[4 * N], kLastSignalingNaN);
1373 
1374         // Apply the test-pattern swizzle.
1375         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1376         SkRasterPipeline p(&alloc);
1377         SkRasterPipeline_SwizzleCtx ctx;
1378         ctx.dst = 0;
1379         for (size_t index = 0; index < std::size(ctx.offsets); ++index) {
1380             ctx.offsets[index] = pattern.swizzle[index] * N * sizeof(float);
1381         }
1382         p.append(SkRasterPipelineOp::set_base_pointer, &slots[0]);
1383         p.append(pattern.stage, SkRPCtxUtils::Pack(ctx, &alloc));
1384         p.run(0,0,1,1);
1385 
1386         // Verify that the swizzle has been applied in each slot.
1387         int* destPtr = &slots[0];
1388         for (int checkSlot = 0; checkSlot < 4; ++checkSlot) {
1389             int expected = pattern.expectation[checkSlot] * N + kLastSignalingNaN;
1390             for (int checkLane = 0; checkLane < N; ++checkLane) {
1391                 REPORTER_ASSERT(r, *destPtr == expected);
1392 
1393                 ++destPtr;
1394                 expected += 1;
1395             }
1396         }
1397     }
1398 }
1399 
DEF_TEST(SkRasterPipeline_SwizzleCopy,r)1400 DEF_TEST(SkRasterPipeline_SwizzleCopy, r) {
1401     const int N = SkOpts::raster_pipeline_highp_stride;
1402 
1403     struct TestPattern {
1404         SkRasterPipelineOp op;
1405         uint16_t swizzle[4];
1406         uint16_t expectation[4];
1407     };
1408     constexpr uint16_t _ = ~0;
1409     static const TestPattern kPatterns[] = {
1410         {SkRasterPipelineOp::swizzle_copy_slot_masked,    {3,_,_,_}, {_,_,_,0}},//v.w    = (1)
1411         {SkRasterPipelineOp::swizzle_copy_2_slots_masked, {1,0,_,_}, {1,0,_,_}},//v.yx   = (1,2)
1412         {SkRasterPipelineOp::swizzle_copy_3_slots_masked, {2,3,0,_}, {2,_,0,1}},//v.zwy  = (1,2,3)
1413         {SkRasterPipelineOp::swizzle_copy_4_slots_masked, {3,0,1,2}, {1,2,3,0}},//v.wxyz = (1,2,3,4)
1414     };
1415     static_assert(sizeof(TestPattern::swizzle) == sizeof(SkRasterPipeline_SwizzleCopyCtx::offsets));
1416 
1417     for (const TestPattern& pattern : kPatterns) {
1418         // Allocate space for 4 dest slots, and initialize them to zero.
1419         alignas(64) int dest[4 * SkRasterPipeline_kMaxStride_highp] = {};
1420 
1421         // Allocate 4 source slots and initialize them to various NaNs
1422         alignas(64) int source[4 * SkRasterPipeline_kMaxStride_highp] = {};
1423         std::iota(&source[0 * N], &source[4 * N], kLastSignalingNaN);
1424 
1425         // Apply the dest-swizzle pattern.
1426         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1427         SkRasterPipeline p(&alloc);
1428         SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx;
1429         SkRasterPipeline_SwizzleCopyCtx ctx = {};
1430         ctx.src = source;
1431         ctx.dst = dest;
1432         for (size_t index = 0; index < std::size(ctx.offsets); ++index) {
1433             if (pattern.swizzle[index] != _) {
1434                 ctx.offsets[index] = pattern.swizzle[index] * N * sizeof(float);
1435             }
1436         }
1437         p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx);
1438         p.append(pattern.op, &ctx);
1439         p.run(0,0,N,1);
1440 
1441         // Verify that the swizzle has been applied in each slot.
1442         int* destPtr = &dest[0];
1443         for (int checkSlot = 0; checkSlot < 4; ++checkSlot) {
1444             for (int checkLane = 0; checkLane < N; ++checkLane) {
1445                 if (pattern.expectation[checkSlot] == _) {
1446                     REPORTER_ASSERT(r, *destPtr == 0);
1447                 } else {
1448                     int expectedIdx = pattern.expectation[checkSlot] * N + checkLane;
1449                     REPORTER_ASSERT(r, *destPtr == source[expectedIdx]);
1450                 }
1451 
1452                 ++destPtr;
1453             }
1454         }
1455     }
1456 }
1457 
DEF_TEST(SkRasterPipeline_Shuffle,r)1458 DEF_TEST(SkRasterPipeline_Shuffle, r) {
1459     // Allocate space for 16 dest slots.
1460     alignas(64) int slots[16 * SkRasterPipeline_kMaxStride_highp];
1461     const int N = SkOpts::raster_pipeline_highp_stride;
1462 
1463     struct TestPattern {
1464         int count;
1465         uint16_t shuffle[16];
1466         uint16_t expectation[16];
1467     };
1468     static const TestPattern kPatterns[] = {
1469         {9,  { 0,  3,  6,
1470                1,  4,  7,
1471                2,  5,  8, /* past end: */  0,  0,  0,  0,  0,  0,  0},
1472              { 0,  3,  6,
1473                1,  4,  7,
1474                2,  5,  8, /* unchanged: */ 9, 10, 11, 12, 13, 14, 15}},
1475         {16, { 0,  4,  8, 12,
1476                1,  5,  9, 13,
1477                2,  6, 10, 14,
1478                3,  7, 11, 15},
1479              { 0,  4,  8, 12,
1480                1,  5,  9, 13,
1481                2,  6, 10, 14,
1482                3,  7, 11, 15}},
1483     };
1484     static_assert(sizeof(TestPattern::shuffle) == sizeof(SkRasterPipeline_ShuffleCtx::offsets));
1485 
1486     for (const TestPattern& pattern : kPatterns) {
1487         // Initialize the destination slots to various NaNs
1488         std::iota(&slots[0], &slots[16 * N], kLastSignalingNaN);
1489 
1490         // Apply the shuffle.
1491         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1492         SkRasterPipeline p(&alloc);
1493         SkRasterPipeline_ShuffleCtx ctx;
1494         ctx.ptr = slots;
1495         ctx.count = pattern.count;
1496         for (size_t index = 0; index < std::size(ctx.offsets); ++index) {
1497             ctx.offsets[index] = pattern.shuffle[index] * N * sizeof(float);
1498         }
1499         p.append(SkRasterPipelineOp::shuffle, &ctx);
1500         p.run(0,0,1,1);
1501 
1502         // Verify that the shuffle has been applied in each slot.
1503         int* destPtr = &slots[0];
1504         for (int checkSlot = 0; checkSlot < 16; ++checkSlot) {
1505             int expected = pattern.expectation[checkSlot] * N + kLastSignalingNaN;
1506             for (int checkLane = 0; checkLane < N; ++checkLane) {
1507                 REPORTER_ASSERT(r, *destPtr == expected);
1508 
1509                 ++destPtr;
1510                 expected += 1;
1511             }
1512         }
1513     }
1514 }
1515 
DEF_TEST(SkRasterPipeline_MatrixMultiply2x2,reporter)1516 DEF_TEST(SkRasterPipeline_MatrixMultiply2x2, reporter) {
1517     alignas(64) float slots[12 * SkRasterPipeline_kMaxStride_highp];
1518     const int N = SkOpts::raster_pipeline_highp_stride;
1519 
1520     // Populate the left- and right-matrix data. Slots 0-3 hold the result and are left as-is.
1521     std::iota(&slots[4 * N], &slots[12 * N], 1.0f);
1522 
1523     // Perform a 2x2 matrix multiply.
1524     SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1525     SkRasterPipeline p(&alloc);
1526     SkRasterPipeline_MatrixMultiplyCtx ctx;
1527     ctx.dst = 0;
1528     ctx.leftColumns = ctx.leftRows = ctx.rightColumns = ctx.rightRows = 2;
1529     p.append(SkRasterPipelineOp::set_base_pointer, &slots[0]);
1530     p.append(SkRasterPipelineOp::matrix_multiply_2, SkRPCtxUtils::Pack(ctx, &alloc));
1531     p.run(0,0,1,1);
1532 
1533     // Verify that the result slots hold a 2x2 matrix multiply.
1534     const float* const destPtr[2][2] = {
1535             {&slots[0 * N], &slots[1 * N]},
1536             {&slots[2 * N], &slots[3 * N]},
1537     };
1538     const float* const leftMtx[2][2] = {
1539             {&slots[4 * N], &slots[5 * N]},
1540             {&slots[6 * N], &slots[7 * N]},
1541     };
1542     const float* const rightMtx[2][2] = {
1543             {&slots[8 * N],  &slots[9 * N]},
1544             {&slots[10 * N], &slots[11 * N]},
1545     };
1546 
1547     for (int c = 0; c < 2; ++c) {
1548         for (int r = 0; r < 2; ++r) {
1549             for (int lane = 0; lane < N; ++lane) {
1550                 // Dot a vector from leftMtx[*][r] with rightMtx[c][*].
1551                 float dot = 0;
1552                 for (int n = 0; n < 2; ++n) {
1553                     dot += leftMtx[n][r][lane] * rightMtx[c][n][lane];
1554                 }
1555                 REPORTER_ASSERT(reporter, destPtr[c][r][lane] == dot);
1556             }
1557         }
1558     }
1559 }
1560 
DEF_TEST(SkRasterPipeline_MatrixMultiply3x3,reporter)1561 DEF_TEST(SkRasterPipeline_MatrixMultiply3x3, reporter) {
1562     alignas(64) float slots[27 * SkRasterPipeline_kMaxStride_highp];
1563     const int N = SkOpts::raster_pipeline_highp_stride;
1564 
1565     // Populate the left- and right-matrix data. Slots 0-8 hold the result and are left as-is.
1566     // To keep results in full-precision float range, we only set values between 0 and 25.
1567     float value = 0.0f;
1568     for (int idx = 9 * N; idx < 27 * N; ++idx) {
1569         slots[idx] = value;
1570         value = fmodf(value + 1.0f, 25.0f);
1571     }
1572 
1573     // Perform a 3x3 matrix multiply.
1574     SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1575     SkRasterPipeline p(&alloc);
1576     SkRasterPipeline_MatrixMultiplyCtx ctx;
1577     ctx.dst = 0;
1578     ctx.leftColumns = ctx.leftRows = ctx.rightColumns = ctx.rightRows = 3;
1579     p.append(SkRasterPipelineOp::set_base_pointer, &slots[0]);
1580     p.append(SkRasterPipelineOp::matrix_multiply_3, SkRPCtxUtils::Pack(ctx, &alloc));
1581     p.run(0,0,1,1);
1582 
1583     // Verify that the result slots hold a 3x3 matrix multiply.
1584     const float* const destPtr[3][3] = {
1585             {&slots[0 * N], &slots[1 * N], &slots[2 * N]},
1586             {&slots[3 * N], &slots[4 * N], &slots[5 * N]},
1587             {&slots[6 * N], &slots[7 * N], &slots[8 * N]},
1588     };
1589     const float* const leftMtx[3][3] = {
1590             {&slots[9 * N],  &slots[10 * N], &slots[11 * N]},
1591             {&slots[12 * N], &slots[13 * N], &slots[14 * N]},
1592             {&slots[15 * N], &slots[16 * N], &slots[17 * N]},
1593     };
1594     const float* const rightMtx[3][3] = {
1595             {&slots[18 * N], &slots[19 * N], &slots[20 * N]},
1596             {&slots[21 * N], &slots[22 * N], &slots[23 * N]},
1597             {&slots[24 * N], &slots[25 * N], &slots[26 * N]},
1598     };
1599 
1600     for (int c = 0; c < 3; ++c) {
1601         for (int r = 0; r < 3; ++r) {
1602             for (int lane = 0; lane < N; ++lane) {
1603                 // Dot a vector from leftMtx[*][r] with rightMtx[c][*].
1604                 float dot = 0;
1605                 for (int n = 0; n < 3; ++n) {
1606                     dot += leftMtx[n][r][lane] * rightMtx[c][n][lane];
1607                 }
1608                 REPORTER_ASSERT(reporter, destPtr[c][r][lane] == dot);
1609             }
1610         }
1611     }
1612 }
1613 
DEF_TEST(SkRasterPipeline_MatrixMultiply4x4,reporter)1614 DEF_TEST(SkRasterPipeline_MatrixMultiply4x4, reporter) {
1615     alignas(64) float slots[48 * SkRasterPipeline_kMaxStride_highp];
1616     const int N = SkOpts::raster_pipeline_highp_stride;
1617 
1618     // Populate the left- and right-matrix data. Slots 0-8 hold the result and are left as-is.
1619     // To keep results in full-precision float range, we only set values between 0 and 25.
1620     float value = 0.0f;
1621     for (int idx = 16 * N; idx < 48 * N; ++idx) {
1622         slots[idx] = value;
1623         value = fmodf(value + 1.0f, 25.0f);
1624     }
1625 
1626     // Perform a 4x4 matrix multiply.
1627     SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1628     SkRasterPipeline p(&alloc);
1629     SkRasterPipeline_MatrixMultiplyCtx ctx;
1630     ctx.dst = 0;
1631     ctx.leftColumns = ctx.leftRows = ctx.rightColumns = ctx.rightRows = 4;
1632     p.append(SkRasterPipelineOp::set_base_pointer, &slots[0]);
1633     p.append(SkRasterPipelineOp::matrix_multiply_4, SkRPCtxUtils::Pack(ctx, &alloc));
1634     p.run(0,0,1,1);
1635 
1636     // Verify that the result slots hold a 4x4 matrix multiply.
1637     const float* const destPtr[4][4] = {
1638             {&slots[0 * N],  &slots[1 * N],  &slots[2 * N],  &slots[3 * N]},
1639             {&slots[4 * N],  &slots[5 * N],  &slots[6 * N],  &slots[7 * N]},
1640             {&slots[8 * N],  &slots[9 * N],  &slots[10 * N], &slots[11 * N]},
1641             {&slots[12 * N], &slots[13 * N], &slots[14 * N], &slots[15 * N]},
1642     };
1643     const float* const leftMtx[4][4] = {
1644             {&slots[16 * N], &slots[17 * N], &slots[18 * N], &slots[19 * N]},
1645             {&slots[20 * N], &slots[21 * N], &slots[22 * N], &slots[23 * N]},
1646             {&slots[24 * N], &slots[25 * N], &slots[26 * N], &slots[27 * N]},
1647             {&slots[28 * N], &slots[29 * N], &slots[30 * N], &slots[31 * N]},
1648     };
1649     const float* const rightMtx[4][4] = {
1650             {&slots[32 * N], &slots[33 * N], &slots[34 * N], &slots[35 * N]},
1651             {&slots[36 * N], &slots[37 * N], &slots[38 * N], &slots[39 * N]},
1652             {&slots[40 * N], &slots[41 * N], &slots[42 * N], &slots[43 * N]},
1653             {&slots[44 * N], &slots[45 * N], &slots[46 * N], &slots[47 * N]},
1654     };
1655 
1656     for (int c = 0; c < 4; ++c) {
1657         for (int r = 0; r < 4; ++r) {
1658             for (int lane = 0; lane < N; ++lane) {
1659                 // Dot a vector from leftMtx[*][r] with rightMtx[c][*].
1660                 float dot = 0;
1661                 for (int n = 0; n < 4; ++n) {
1662                     dot += leftMtx[n][r][lane] * rightMtx[c][n][lane];
1663                 }
1664                 REPORTER_ASSERT(reporter, destPtr[c][r][lane] == dot);
1665             }
1666         }
1667     }
1668 }
1669 
DEF_TEST(SkRasterPipeline_FloatArithmeticWithNSlots,r)1670 DEF_TEST(SkRasterPipeline_FloatArithmeticWithNSlots, r) {
1671     // Allocate space for 5 dest and 5 source slots.
1672     alignas(64) float slots[10 * SkRasterPipeline_kMaxStride_highp];
1673     const int N = SkOpts::raster_pipeline_highp_stride;
1674 
1675     struct ArithmeticOp {
1676         SkRasterPipelineOp stage;
1677         std::function<float(float, float)> verify;
1678     };
1679 
1680     static const ArithmeticOp kArithmeticOps[] = {
1681         {SkRasterPipelineOp::add_n_floats, [](float a, float b) { return a + b; }},
1682         {SkRasterPipelineOp::sub_n_floats, [](float a, float b) { return a - b; }},
1683         {SkRasterPipelineOp::mul_n_floats, [](float a, float b) { return a * b; }},
1684         {SkRasterPipelineOp::div_n_floats, [](float a, float b) { return a / b; }},
1685     };
1686 
1687     for (const ArithmeticOp& op : kArithmeticOps) {
1688         for (int numSlotsAffected = 1; numSlotsAffected <= 5; ++numSlotsAffected) {
1689             // Initialize the slot values to 1,2,3...
1690             std::iota(&slots[0], &slots[10 * N], 1.0f);
1691 
1692             // Run the arithmetic op over our data.
1693             SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1694             SkRasterPipeline p(&alloc);
1695             SkRasterPipeline_BinaryOpCtx ctx;
1696             ctx.dst = 0;
1697             ctx.src = numSlotsAffected * N * sizeof(float);
1698             p.append(SkRasterPipelineOp::set_base_pointer, &slots[0]);
1699             p.append(op.stage, SkRPCtxUtils::Pack(ctx, &alloc));
1700             p.run(0,0,1,1);
1701 
1702             // Verify that the affected slots now equal (1,2,3...) op (4,5,6...).
1703             float leftValue = 1.0f;
1704             float rightValue = float(numSlotsAffected * N) + 1.0f;
1705             float* destPtr = &slots[0];
1706             for (int checkSlot = 0; checkSlot < 10; ++checkSlot) {
1707                 for (int checkLane = 0; checkLane < N; ++checkLane) {
1708                     if (checkSlot < numSlotsAffected) {
1709                         REPORTER_ASSERT(r, *destPtr == op.verify(leftValue, rightValue));
1710                     } else {
1711                         REPORTER_ASSERT(r, *destPtr == leftValue);
1712                     }
1713 
1714                     ++destPtr;
1715                     leftValue += 1.0f;
1716                     rightValue += 1.0f;
1717                 }
1718             }
1719         }
1720     }
1721 }
1722 
DEF_TEST(SkRasterPipeline_FloatArithmeticWithHardcodedSlots,r)1723 DEF_TEST(SkRasterPipeline_FloatArithmeticWithHardcodedSlots, r) {
1724     // Allocate space for 5 dest and 5 source slots.
1725     alignas(64) float slots[10 * SkRasterPipeline_kMaxStride_highp];
1726     const int N = SkOpts::raster_pipeline_highp_stride;
1727 
1728     struct ArithmeticOp {
1729         SkRasterPipelineOp stage;
1730         int numSlotsAffected;
1731         std::function<float(float, float)> verify;
1732     };
1733 
1734     static const ArithmeticOp kArithmeticOps[] = {
1735         {SkRasterPipelineOp::add_float,    1, [](float a, float b) { return a + b; }},
1736         {SkRasterPipelineOp::sub_float,    1, [](float a, float b) { return a - b; }},
1737         {SkRasterPipelineOp::mul_float,    1, [](float a, float b) { return a * b; }},
1738         {SkRasterPipelineOp::div_float,    1, [](float a, float b) { return a / b; }},
1739 
1740         {SkRasterPipelineOp::add_2_floats, 2, [](float a, float b) { return a + b; }},
1741         {SkRasterPipelineOp::sub_2_floats, 2, [](float a, float b) { return a - b; }},
1742         {SkRasterPipelineOp::mul_2_floats, 2, [](float a, float b) { return a * b; }},
1743         {SkRasterPipelineOp::div_2_floats, 2, [](float a, float b) { return a / b; }},
1744 
1745         {SkRasterPipelineOp::add_3_floats, 3, [](float a, float b) { return a + b; }},
1746         {SkRasterPipelineOp::sub_3_floats, 3, [](float a, float b) { return a - b; }},
1747         {SkRasterPipelineOp::mul_3_floats, 3, [](float a, float b) { return a * b; }},
1748         {SkRasterPipelineOp::div_3_floats, 3, [](float a, float b) { return a / b; }},
1749 
1750         {SkRasterPipelineOp::add_4_floats, 4, [](float a, float b) { return a + b; }},
1751         {SkRasterPipelineOp::sub_4_floats, 4, [](float a, float b) { return a - b; }},
1752         {SkRasterPipelineOp::mul_4_floats, 4, [](float a, float b) { return a * b; }},
1753         {SkRasterPipelineOp::div_4_floats, 4, [](float a, float b) { return a / b; }},
1754     };
1755 
1756     for (const ArithmeticOp& op : kArithmeticOps) {
1757         // Initialize the slot values to 1,2,3...
1758         std::iota(&slots[0], &slots[10 * N], 1.0f);
1759 
1760         // Run the arithmetic op over our data.
1761         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1762         SkRasterPipeline p(&alloc);
1763         p.append(op.stage, &slots[0]);
1764         p.run(0,0,1,1);
1765 
1766         // Verify that the affected slots now equal (1,2,3...) op (4,5,6...).
1767         float leftValue = 1.0f;
1768         float rightValue = float(op.numSlotsAffected * N) + 1.0f;
1769         float* destPtr = &slots[0];
1770         for (int checkSlot = 0; checkSlot < 10; ++checkSlot) {
1771             for (int checkLane = 0; checkLane < N; ++checkLane) {
1772                 if (checkSlot < op.numSlotsAffected) {
1773                     REPORTER_ASSERT(r, *destPtr == op.verify(leftValue, rightValue));
1774                 } else {
1775                     REPORTER_ASSERT(r, *destPtr == leftValue);
1776                 }
1777 
1778                 ++destPtr;
1779                 leftValue += 1.0f;
1780                 rightValue += 1.0f;
1781             }
1782         }
1783     }
1784 }
1785 
divide_unsigned(int a,int b)1786 static int divide_unsigned(int a, int b) { return int(uint32_t(a) / uint32_t(b)); }
min_unsigned(int a,int b)1787 static int min_unsigned   (int a, int b) { return uint32_t(a) < uint32_t(b) ? a : b; }
max_unsigned(int a,int b)1788 static int max_unsigned   (int a, int b) { return uint32_t(a) > uint32_t(b) ? a : b; }
1789 
DEF_TEST(SkRasterPipeline_IntArithmeticWithNSlots,r)1790 DEF_TEST(SkRasterPipeline_IntArithmeticWithNSlots, r) {
1791     // Allocate space for 5 dest and 5 source slots.
1792     alignas(64) int slots[10 * SkRasterPipeline_kMaxStride_highp];
1793     const int N = SkOpts::raster_pipeline_highp_stride;
1794 
1795     struct ArithmeticOp {
1796         SkRasterPipelineOp stage;
1797         std::function<int(int, int)> verify;
1798     };
1799 
1800     static const ArithmeticOp kArithmeticOps[] = {
1801         {SkRasterPipelineOp::add_n_ints,         [](int a, int b) { return a + b; }},
1802         {SkRasterPipelineOp::sub_n_ints,         [](int a, int b) { return a - b; }},
1803         {SkRasterPipelineOp::mul_n_ints,         [](int a, int b) { return a * b; }},
1804         {SkRasterPipelineOp::div_n_ints,         [](int a, int b) { return a / b; }},
1805         {SkRasterPipelineOp::div_n_uints,        divide_unsigned},
1806         {SkRasterPipelineOp::bitwise_and_n_ints, [](int a, int b) { return a & b; }},
1807         {SkRasterPipelineOp::bitwise_or_n_ints,  [](int a, int b) { return a | b; }},
1808         {SkRasterPipelineOp::bitwise_xor_n_ints, [](int a, int b) { return a ^ b; }},
1809         {SkRasterPipelineOp::min_n_ints,         [](int a, int b) { return a < b ? a : b; }},
1810         {SkRasterPipelineOp::min_n_uints,        min_unsigned},
1811         {SkRasterPipelineOp::max_n_ints,         [](int a, int b) { return a > b ? a : b; }},
1812         {SkRasterPipelineOp::max_n_uints,        max_unsigned},
1813     };
1814 
1815     for (const ArithmeticOp& op : kArithmeticOps) {
1816         for (int numSlotsAffected = 1; numSlotsAffected <= 5; ++numSlotsAffected) {
1817             // Initialize the slot values to 1,2,3...
1818             std::iota(&slots[0], &slots[10 * N], 1);
1819             int leftValue = slots[0];
1820             int rightValue = slots[numSlotsAffected * N];
1821 
1822             // Run the op (e.g. `add_n_ints`) over our data.
1823             SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1824             SkRasterPipeline p(&alloc);
1825             SkRasterPipeline_BinaryOpCtx ctx;
1826             ctx.dst = 0;
1827             ctx.src = numSlotsAffected * N * sizeof(float);
1828             p.append(SkRasterPipelineOp::set_base_pointer, &slots[0]);
1829             p.append(op.stage, SkRPCtxUtils::Pack(ctx, &alloc));
1830             p.run(0,0,1,1);
1831 
1832             // Verify that the affected slots now equal (1,2,3...) op (4,5,6...).
1833             int* destPtr = &slots[0];
1834             for (int checkSlot = 0; checkSlot < 10; ++checkSlot) {
1835                 for (int checkLane = 0; checkLane < N; ++checkLane) {
1836                     if (checkSlot < numSlotsAffected) {
1837                         REPORTER_ASSERT(r, *destPtr == op.verify(leftValue, rightValue));
1838                     } else {
1839                         REPORTER_ASSERT(r, *destPtr == leftValue);
1840                     }
1841 
1842                     ++destPtr;
1843                     leftValue += 1;
1844                     rightValue += 1;
1845                 }
1846             }
1847         }
1848     }
1849 }
1850 
DEF_TEST(SkRasterPipeline_IntArithmeticWithHardcodedSlots,r)1851 DEF_TEST(SkRasterPipeline_IntArithmeticWithHardcodedSlots, r) {
1852     // Allocate space for 5 dest and 5 source slots.
1853     alignas(64) int slots[10 * SkRasterPipeline_kMaxStride_highp];
1854     const int N = SkOpts::raster_pipeline_highp_stride;
1855 
1856     struct ArithmeticOp {
1857         SkRasterPipelineOp stage;
1858         int numSlotsAffected;
1859         std::function<int(int, int)> verify;
1860     };
1861 
1862     static const ArithmeticOp kArithmeticOps[] = {
1863         {SkRasterPipelineOp::add_int,            1, [](int a, int b) { return a + b; }},
1864         {SkRasterPipelineOp::sub_int,            1, [](int a, int b) { return a - b; }},
1865         {SkRasterPipelineOp::mul_int,            1, [](int a, int b) { return a * b; }},
1866         {SkRasterPipelineOp::div_int,            1, [](int a, int b) { return a / b; }},
1867         {SkRasterPipelineOp::div_uint,           1, divide_unsigned},
1868         {SkRasterPipelineOp::bitwise_and_int,    1, [](int a, int b) { return a & b; }},
1869         {SkRasterPipelineOp::bitwise_or_int,     1, [](int a, int b) { return a | b; }},
1870         {SkRasterPipelineOp::bitwise_xor_int,    1, [](int a, int b) { return a ^ b; }},
1871         {SkRasterPipelineOp::min_int,            1, [](int a, int b) { return a < b ? a: b; }},
1872         {SkRasterPipelineOp::min_uint,           1, min_unsigned},
1873         {SkRasterPipelineOp::max_int,            1, [](int a, int b) { return a > b ? a: b; }},
1874         {SkRasterPipelineOp::max_uint,           1, max_unsigned},
1875 
1876         {SkRasterPipelineOp::add_2_ints,         2, [](int a, int b) { return a + b; }},
1877         {SkRasterPipelineOp::sub_2_ints,         2, [](int a, int b) { return a - b; }},
1878         {SkRasterPipelineOp::mul_2_ints,         2, [](int a, int b) { return a * b; }},
1879         {SkRasterPipelineOp::div_2_ints,         2, [](int a, int b) { return a / b; }},
1880         {SkRasterPipelineOp::div_2_uints,        2, divide_unsigned},
1881         {SkRasterPipelineOp::bitwise_and_2_ints, 2, [](int a, int b) { return a & b; }},
1882         {SkRasterPipelineOp::bitwise_or_2_ints,  2, [](int a, int b) { return a | b; }},
1883         {SkRasterPipelineOp::bitwise_xor_2_ints, 2, [](int a, int b) { return a ^ b; }},
1884         {SkRasterPipelineOp::min_2_ints,         2, [](int a, int b) { return a < b ? a: b; }},
1885         {SkRasterPipelineOp::min_2_uints,        2, min_unsigned},
1886         {SkRasterPipelineOp::max_2_ints,         2, [](int a, int b) { return a > b ? a: b; }},
1887         {SkRasterPipelineOp::max_2_uints,        2, max_unsigned},
1888 
1889         {SkRasterPipelineOp::add_3_ints,         3, [](int a, int b) { return a + b; }},
1890         {SkRasterPipelineOp::sub_3_ints,         3, [](int a, int b) { return a - b; }},
1891         {SkRasterPipelineOp::mul_3_ints,         3, [](int a, int b) { return a * b; }},
1892         {SkRasterPipelineOp::div_3_ints,         3, [](int a, int b) { return a / b; }},
1893         {SkRasterPipelineOp::div_3_uints,        3, divide_unsigned},
1894         {SkRasterPipelineOp::bitwise_and_3_ints, 3, [](int a, int b) { return a & b; }},
1895         {SkRasterPipelineOp::bitwise_or_3_ints,  3, [](int a, int b) { return a | b; }},
1896         {SkRasterPipelineOp::bitwise_xor_3_ints, 3, [](int a, int b) { return a ^ b; }},
1897         {SkRasterPipelineOp::min_3_ints,         3, [](int a, int b) { return a < b ? a: b; }},
1898         {SkRasterPipelineOp::min_3_uints,        3, min_unsigned},
1899         {SkRasterPipelineOp::max_3_ints,         3, [](int a, int b) { return a > b ? a: b; }},
1900         {SkRasterPipelineOp::max_3_uints,        3, max_unsigned},
1901 
1902         {SkRasterPipelineOp::add_4_ints,         4, [](int a, int b) { return a + b; }},
1903         {SkRasterPipelineOp::sub_4_ints,         4, [](int a, int b) { return a - b; }},
1904         {SkRasterPipelineOp::mul_4_ints,         4, [](int a, int b) { return a * b; }},
1905         {SkRasterPipelineOp::div_4_ints,         4, [](int a, int b) { return a / b; }},
1906         {SkRasterPipelineOp::div_4_uints,        4, divide_unsigned},
1907         {SkRasterPipelineOp::bitwise_and_4_ints, 4, [](int a, int b) { return a & b; }},
1908         {SkRasterPipelineOp::bitwise_or_4_ints,  4, [](int a, int b) { return a | b; }},
1909         {SkRasterPipelineOp::bitwise_xor_4_ints, 4, [](int a, int b) { return a ^ b; }},
1910         {SkRasterPipelineOp::min_4_ints,         4, [](int a, int b) { return a < b ? a: b; }},
1911         {SkRasterPipelineOp::min_4_uints,        4, min_unsigned},
1912         {SkRasterPipelineOp::max_4_ints,         4, [](int a, int b) { return a > b ? a: b; }},
1913         {SkRasterPipelineOp::max_4_uints,        4, max_unsigned},
1914     };
1915 
1916     for (const ArithmeticOp& op : kArithmeticOps) {
1917         // Initialize the slot values to 1,2,3...
1918         std::iota(&slots[0], &slots[10 * N], 1);
1919         int leftValue = slots[0];
1920         int rightValue = slots[op.numSlotsAffected * N];
1921 
1922         // Run the op (e.g. `add_2_ints`) over our data.
1923         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1924         SkRasterPipeline p(&alloc);
1925         p.append(op.stage, &slots[0]);
1926         p.run(0,0,1,1);
1927 
1928         // Verify that the affected slots now equal (1,2,3...) op (4,5,6...).
1929         int* destPtr = &slots[0];
1930         for (int checkSlot = 0; checkSlot < 10; ++checkSlot) {
1931             for (int checkLane = 0; checkLane < N; ++checkLane) {
1932                 if (checkSlot < op.numSlotsAffected) {
1933                     REPORTER_ASSERT(r, *destPtr == op.verify(leftValue, rightValue));
1934                 } else {
1935                     REPORTER_ASSERT(r, *destPtr == leftValue);
1936                 }
1937 
1938                 ++destPtr;
1939                 leftValue += 1;
1940                 rightValue += 1;
1941             }
1942         }
1943     }
1944 }
1945 
DEF_TEST(SkRasterPipeline_CompareFloatsWithNSlots,r)1946 DEF_TEST(SkRasterPipeline_CompareFloatsWithNSlots, r) {
1947     // Allocate space for 5 dest and 5 source slots.
1948     alignas(64) float slots[10 * SkRasterPipeline_kMaxStride_highp];
1949     const int N = SkOpts::raster_pipeline_highp_stride;
1950 
1951     struct CompareOp {
1952         SkRasterPipelineOp stage;
1953         std::function<bool(float, float)> verify;
1954     };
1955 
1956     static const CompareOp kCompareOps[] = {
1957         {SkRasterPipelineOp::cmpeq_n_floats, [](float a, float b) { return a == b; }},
1958         {SkRasterPipelineOp::cmpne_n_floats, [](float a, float b) { return a != b; }},
1959         {SkRasterPipelineOp::cmplt_n_floats, [](float a, float b) { return a <  b; }},
1960         {SkRasterPipelineOp::cmple_n_floats, [](float a, float b) { return a <= b; }},
1961     };
1962 
1963     for (const CompareOp& op : kCompareOps) {
1964         for (int numSlotsAffected = 1; numSlotsAffected <= 5; ++numSlotsAffected) {
1965             // Initialize the slot values to 0,1,2,0,1,2,0,1,2...
1966             for (int index = 0; index < 10 * N; ++index) {
1967                 slots[index] = std::fmod(index, 3.0f);
1968             }
1969 
1970             float leftValue  = slots[0];
1971             float rightValue = slots[numSlotsAffected * N];
1972 
1973             // Run the comparison op over our data.
1974             SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1975             SkRasterPipeline p(&alloc);
1976             SkRasterPipeline_BinaryOpCtx ctx;
1977             ctx.dst = 0;
1978             ctx.src = numSlotsAffected * N * sizeof(float);
1979             p.append(SkRasterPipelineOp::set_base_pointer, &slots[0]);
1980             p.append(op.stage, SkRPCtxUtils::Pack(ctx, &alloc));
1981             p.run(0, 0, 1, 1);
1982 
1983             // Verify that the affected slots now contain "(0,1,2,0...) op (1,2,0,1...)".
1984             float* destPtr = &slots[0];
1985             for (int checkSlot = 0; checkSlot < 10; ++checkSlot) {
1986                 for (int checkLane = 0; checkLane < N; ++checkLane) {
1987                     if (checkSlot < numSlotsAffected) {
1988                         bool compareIsTrue = op.verify(leftValue, rightValue);
1989                         REPORTER_ASSERT(r, *(int*)destPtr == (compareIsTrue ? ~0 : 0));
1990                     } else {
1991                         REPORTER_ASSERT(r, *destPtr == leftValue);
1992                     }
1993 
1994                     ++destPtr;
1995                     leftValue = std::fmod(leftValue + 1.0f, 3.0f);
1996                     rightValue = std::fmod(rightValue + 1.0f, 3.0f);
1997                 }
1998             }
1999         }
2000     }
2001 }
2002 
DEF_TEST(SkRasterPipeline_CompareFloatsWithHardcodedSlots,r)2003 DEF_TEST(SkRasterPipeline_CompareFloatsWithHardcodedSlots, r) {
2004     // Allocate space for 5 dest and 5 source slots.
2005     alignas(64) float slots[10 * SkRasterPipeline_kMaxStride_highp];
2006     const int N = SkOpts::raster_pipeline_highp_stride;
2007 
2008     struct CompareOp {
2009         SkRasterPipelineOp stage;
2010         int numSlotsAffected;
2011         std::function<bool(float, float)> verify;
2012     };
2013 
2014     static const CompareOp kCompareOps[] = {
2015         {SkRasterPipelineOp::cmpeq_float,    1, [](float a, float b) { return a == b; }},
2016         {SkRasterPipelineOp::cmpne_float,    1, [](float a, float b) { return a != b; }},
2017         {SkRasterPipelineOp::cmplt_float,    1, [](float a, float b) { return a <  b; }},
2018         {SkRasterPipelineOp::cmple_float,    1, [](float a, float b) { return a <= b; }},
2019 
2020         {SkRasterPipelineOp::cmpeq_2_floats, 2, [](float a, float b) { return a == b; }},
2021         {SkRasterPipelineOp::cmpne_2_floats, 2, [](float a, float b) { return a != b; }},
2022         {SkRasterPipelineOp::cmplt_2_floats, 2, [](float a, float b) { return a <  b; }},
2023         {SkRasterPipelineOp::cmple_2_floats, 2, [](float a, float b) { return a <= b; }},
2024 
2025         {SkRasterPipelineOp::cmpeq_3_floats, 3, [](float a, float b) { return a == b; }},
2026         {SkRasterPipelineOp::cmpne_3_floats, 3, [](float a, float b) { return a != b; }},
2027         {SkRasterPipelineOp::cmplt_3_floats, 3, [](float a, float b) { return a <  b; }},
2028         {SkRasterPipelineOp::cmple_3_floats, 3, [](float a, float b) { return a <= b; }},
2029 
2030         {SkRasterPipelineOp::cmpeq_4_floats, 4, [](float a, float b) { return a == b; }},
2031         {SkRasterPipelineOp::cmpne_4_floats, 4, [](float a, float b) { return a != b; }},
2032         {SkRasterPipelineOp::cmplt_4_floats, 4, [](float a, float b) { return a <  b; }},
2033         {SkRasterPipelineOp::cmple_4_floats, 4, [](float a, float b) { return a <= b; }},
2034     };
2035 
2036     for (const CompareOp& op : kCompareOps) {
2037         // Initialize the slot values to 0,1,2,0,1,2,0,1,2...
2038         for (int index = 0; index < 10 * N; ++index) {
2039             slots[index] = std::fmod(index, 3.0f);
2040         }
2041 
2042         float leftValue  = slots[0];
2043         float rightValue = slots[op.numSlotsAffected * N];
2044 
2045         // Run the comparison op over our data.
2046         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
2047         SkRasterPipeline p(&alloc);
2048         p.append(op.stage, &slots[0]);
2049         p.run(0, 0, 1, 1);
2050 
2051         // Verify that the affected slots now contain "(0,1,2,0...) op (1,2,0,1...)".
2052         float* destPtr = &slots[0];
2053         for (int checkSlot = 0; checkSlot < 10; ++checkSlot) {
2054             for (int checkLane = 0; checkLane < N; ++checkLane) {
2055                 if (checkSlot < op.numSlotsAffected) {
2056                     bool compareIsTrue = op.verify(leftValue, rightValue);
2057                     REPORTER_ASSERT(r, *(int*)destPtr == (compareIsTrue ? ~0 : 0));
2058                 } else {
2059                     REPORTER_ASSERT(r, *destPtr == leftValue);
2060                 }
2061 
2062                 ++destPtr;
2063                 leftValue = std::fmod(leftValue + 1.0f, 3.0f);
2064                 rightValue = std::fmod(rightValue + 1.0f, 3.0f);
2065             }
2066         }
2067     }
2068 }
2069 
compare_lt_uint(int a,int b)2070 static bool compare_lt_uint  (int a, int b) { return uint32_t(a) <  uint32_t(b); }
compare_lteq_uint(int a,int b)2071 static bool compare_lteq_uint(int a, int b) { return uint32_t(a) <= uint32_t(b); }
2072 
DEF_TEST(SkRasterPipeline_CompareIntsWithNSlots,r)2073 DEF_TEST(SkRasterPipeline_CompareIntsWithNSlots, r) {
2074     // Allocate space for 5 dest and 5 source slots.
2075     alignas(64) int slots[10 * SkRasterPipeline_kMaxStride_highp];
2076     const int N = SkOpts::raster_pipeline_highp_stride;
2077 
2078     struct CompareOp {
2079         SkRasterPipelineOp stage;
2080         std::function<bool(int, int)> verify;
2081     };
2082 
2083     static const CompareOp kCompareOps[] = {
2084         {SkRasterPipelineOp::cmpeq_n_ints,  [](int a, int b) { return a == b; }},
2085         {SkRasterPipelineOp::cmpne_n_ints,  [](int a, int b) { return a != b; }},
2086         {SkRasterPipelineOp::cmplt_n_ints,  [](int a, int b) { return a <  b; }},
2087         {SkRasterPipelineOp::cmple_n_ints,  [](int a, int b) { return a <= b; }},
2088         {SkRasterPipelineOp::cmplt_n_uints, compare_lt_uint},
2089         {SkRasterPipelineOp::cmple_n_uints, compare_lteq_uint},
2090     };
2091 
2092     for (const CompareOp& op : kCompareOps) {
2093         for (int numSlotsAffected = 1; numSlotsAffected <= 5; ++numSlotsAffected) {
2094             // Initialize the slot values to -1,0,1,-1,0,1,-1,0,1,-1...
2095             for (int index = 0; index < 10 * N; ++index) {
2096                 slots[index] = (index % 3) - 1;
2097             }
2098 
2099             int leftValue = slots[0];
2100             int rightValue = slots[numSlotsAffected * N];
2101 
2102             // Run the comparison op over our data.
2103             SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
2104             SkRasterPipeline p(&alloc);
2105             SkRasterPipeline_BinaryOpCtx ctx;
2106             ctx.dst = 0;
2107             ctx.src = sizeof(float) * numSlotsAffected * N;
2108             p.append(SkRasterPipelineOp::set_base_pointer, &slots[0]);
2109             p.append(op.stage, SkRPCtxUtils::Pack(ctx, &alloc));
2110             p.run(0, 0, 1, 1);
2111 
2112             // Verify that the affected slots now contain "(-1,0,1,-1...) op (0,1,-1,0...)".
2113             int* destPtr = &slots[0];
2114             for (int checkSlot = 0; checkSlot < 10; ++checkSlot) {
2115                 for (int checkLane = 0; checkLane < N; ++checkLane) {
2116                     if (checkSlot < numSlotsAffected) {
2117                         bool compareIsTrue = op.verify(leftValue, rightValue);
2118                         REPORTER_ASSERT(r, *destPtr == (compareIsTrue ? ~0 : 0));
2119                     } else {
2120                         REPORTER_ASSERT(r, *destPtr == leftValue);
2121                     }
2122 
2123                     ++destPtr;
2124                     if (++leftValue == 2) {
2125                         leftValue = -1;
2126                     }
2127                     if (++rightValue == 2) {
2128                         rightValue = -1;
2129                     }
2130                 }
2131             }
2132         }
2133     }
2134 }
2135 
DEF_TEST(SkRasterPipeline_CompareIntsWithHardcodedSlots,r)2136 DEF_TEST(SkRasterPipeline_CompareIntsWithHardcodedSlots, r) {
2137     // Allocate space for 5 dest and 5 source slots.
2138     alignas(64) int slots[10 * SkRasterPipeline_kMaxStride_highp];
2139     const int N = SkOpts::raster_pipeline_highp_stride;
2140 
2141     struct CompareOp {
2142         SkRasterPipelineOp stage;
2143         int numSlotsAffected;
2144         std::function<bool(int, int)> verify;
2145     };
2146 
2147     static const CompareOp kCompareOps[] = {
2148         {SkRasterPipelineOp::cmpeq_int,     1, [](int a, int b) { return a == b; }},
2149         {SkRasterPipelineOp::cmpne_int,     1, [](int a, int b) { return a != b; }},
2150         {SkRasterPipelineOp::cmplt_int,     1, [](int a, int b) { return a <  b; }},
2151         {SkRasterPipelineOp::cmple_int,     1, [](int a, int b) { return a <= b; }},
2152         {SkRasterPipelineOp::cmplt_uint,    1, compare_lt_uint},
2153         {SkRasterPipelineOp::cmple_uint,    1, compare_lteq_uint},
2154 
2155         {SkRasterPipelineOp::cmpeq_2_ints,  2, [](int a, int b) { return a == b; }},
2156         {SkRasterPipelineOp::cmpne_2_ints,  2, [](int a, int b) { return a != b; }},
2157         {SkRasterPipelineOp::cmplt_2_ints,  2, [](int a, int b) { return a <  b; }},
2158         {SkRasterPipelineOp::cmple_2_ints,  2, [](int a, int b) { return a <= b; }},
2159         {SkRasterPipelineOp::cmplt_2_uints, 2, compare_lt_uint},
2160         {SkRasterPipelineOp::cmple_2_uints, 2, compare_lteq_uint},
2161 
2162         {SkRasterPipelineOp::cmpeq_3_ints,  3, [](int a, int b) { return a == b; }},
2163         {SkRasterPipelineOp::cmpne_3_ints,  3, [](int a, int b) { return a != b; }},
2164         {SkRasterPipelineOp::cmplt_3_ints,  3, [](int a, int b) { return a <  b; }},
2165         {SkRasterPipelineOp::cmple_3_ints,  3, [](int a, int b) { return a <= b; }},
2166         {SkRasterPipelineOp::cmplt_3_uints, 3, compare_lt_uint},
2167         {SkRasterPipelineOp::cmple_3_uints, 3, compare_lteq_uint},
2168 
2169         {SkRasterPipelineOp::cmpeq_4_ints,  4, [](int a, int b) { return a == b; }},
2170         {SkRasterPipelineOp::cmpne_4_ints,  4, [](int a, int b) { return a != b; }},
2171         {SkRasterPipelineOp::cmplt_4_ints,  4, [](int a, int b) { return a <  b; }},
2172         {SkRasterPipelineOp::cmple_4_ints,  4, [](int a, int b) { return a <= b; }},
2173         {SkRasterPipelineOp::cmplt_4_uints, 4, compare_lt_uint},
2174         {SkRasterPipelineOp::cmple_4_uints, 4, compare_lteq_uint},
2175     };
2176 
2177     for (const CompareOp& op : kCompareOps) {
2178         // Initialize the slot values to -1,0,1,-1,0,1,-1,0,1,-1...
2179         for (int index = 0; index < 10 * N; ++index) {
2180             slots[index] = (index % 3) - 1;
2181         }
2182 
2183         int leftValue = slots[0];
2184         int rightValue = slots[op.numSlotsAffected * N];
2185 
2186         // Run the comparison op over our data.
2187         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
2188         SkRasterPipeline p(&alloc);
2189         p.append(op.stage, &slots[0]);
2190         p.run(0, 0, 1, 1);
2191 
2192         // Verify that the affected slots now contain "(0,1,2,0...) op (1,2,0,1...)".
2193         int* destPtr = &slots[0];
2194         for (int checkSlot = 0; checkSlot < 10; ++checkSlot) {
2195             for (int checkLane = 0; checkLane < N; ++checkLane) {
2196                 if (checkSlot < op.numSlotsAffected) {
2197                     bool compareIsTrue = op.verify(leftValue, rightValue);
2198                     REPORTER_ASSERT(r, *destPtr == (compareIsTrue ? ~0 : 0));
2199                 } else {
2200                     REPORTER_ASSERT(r, *destPtr == leftValue);
2201                 }
2202 
2203                 ++destPtr;
2204                 if (++leftValue == 2) {
2205                     leftValue = -1;
2206                 }
2207                 if (++rightValue == 2) {
2208                     rightValue = -1;
2209                 }
2210             }
2211         }
2212     }
2213 }
2214 
to_float(int a)2215 static int to_float(int a) { return sk_bit_cast<int>((float)a); }
2216 
DEF_TEST(SkRasterPipeline_UnaryIntOps,r)2217 DEF_TEST(SkRasterPipeline_UnaryIntOps, r) {
2218     // Allocate space for 5 slots.
2219     alignas(64) int slots[5 * SkRasterPipeline_kMaxStride_highp];
2220     const int N = SkOpts::raster_pipeline_highp_stride;
2221 
2222     struct UnaryOp {
2223         SkRasterPipelineOp stage;
2224         int numSlotsAffected;
2225         std::function<int(int)> verify;
2226     };
2227 
2228     static const UnaryOp kUnaryOps[] = {
2229         {SkRasterPipelineOp::cast_to_float_from_int,    1, to_float},
2230         {SkRasterPipelineOp::cast_to_float_from_2_ints, 2, to_float},
2231         {SkRasterPipelineOp::cast_to_float_from_3_ints, 3, to_float},
2232         {SkRasterPipelineOp::cast_to_float_from_4_ints, 4, to_float},
2233 
2234         {SkRasterPipelineOp::abs_int,    1, [](int a) { return a < 0 ? -a : a; }},
2235         {SkRasterPipelineOp::abs_2_ints, 2, [](int a) { return a < 0 ? -a : a; }},
2236         {SkRasterPipelineOp::abs_3_ints, 3, [](int a) { return a < 0 ? -a : a; }},
2237         {SkRasterPipelineOp::abs_4_ints, 4, [](int a) { return a < 0 ? -a : a; }},
2238     };
2239 
2240     for (const UnaryOp& op : kUnaryOps) {
2241         // Initialize the slot values to -10,-9,-8...
2242         std::iota(&slots[0], &slots[5 * N], -10);
2243         int inputValue = slots[0];
2244 
2245         // Run the unary op over our data.
2246         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
2247         SkRasterPipeline p(&alloc);
2248         p.append(op.stage, &slots[0]);
2249         p.run(0, 0, 1, 1);
2250 
2251         // Verify that the destination slots have been updated.
2252         int* destPtr = &slots[0];
2253         for (int checkSlot = 0; checkSlot < 5; ++checkSlot) {
2254             for (int checkLane = 0; checkLane < N; ++checkLane) {
2255                 if (checkSlot < op.numSlotsAffected) {
2256                     int expected = op.verify(inputValue);
2257                     REPORTER_ASSERT(r, *destPtr == expected);
2258                 } else {
2259                     REPORTER_ASSERT(r, *destPtr == inputValue);
2260                 }
2261 
2262                 ++destPtr;
2263                 ++inputValue;
2264             }
2265         }
2266     }
2267 }
2268 
to_int(float a)2269 static float to_int(float a)  { return sk_bit_cast<float>((int)a); }
to_uint(float a)2270 static float to_uint(float a) { return sk_bit_cast<float>((unsigned int)a); }
2271 
DEF_TEST(SkRasterPipeline_UnaryFloatOps,r)2272 DEF_TEST(SkRasterPipeline_UnaryFloatOps, r) {
2273     // Allocate space for 5 slots.
2274     alignas(64) float slots[5 * SkRasterPipeline_kMaxStride_highp];
2275     const int N = SkOpts::raster_pipeline_highp_stride;
2276 
2277     struct UnaryOp {
2278         SkRasterPipelineOp stage;
2279         int numSlotsAffected;
2280         std::function<float(float)> verify;
2281     };
2282 
2283     static const UnaryOp kUnaryOps[] = {
2284         {SkRasterPipelineOp::cast_to_int_from_float,    1, to_int},
2285         {SkRasterPipelineOp::cast_to_int_from_2_floats, 2, to_int},
2286         {SkRasterPipelineOp::cast_to_int_from_3_floats, 3, to_int},
2287         {SkRasterPipelineOp::cast_to_int_from_4_floats, 4, to_int},
2288 
2289         {SkRasterPipelineOp::cast_to_uint_from_float,    1, to_uint},
2290         {SkRasterPipelineOp::cast_to_uint_from_2_floats, 2, to_uint},
2291         {SkRasterPipelineOp::cast_to_uint_from_3_floats, 3, to_uint},
2292         {SkRasterPipelineOp::cast_to_uint_from_4_floats, 4, to_uint},
2293 
2294         {SkRasterPipelineOp::floor_float,    1, [](float a) { return floorf(a); }},
2295         {SkRasterPipelineOp::floor_2_floats, 2, [](float a) { return floorf(a); }},
2296         {SkRasterPipelineOp::floor_3_floats, 3, [](float a) { return floorf(a); }},
2297         {SkRasterPipelineOp::floor_4_floats, 4, [](float a) { return floorf(a); }},
2298 
2299         {SkRasterPipelineOp::ceil_float,    1, [](float a) { return ceilf(a); }},
2300         {SkRasterPipelineOp::ceil_2_floats, 2, [](float a) { return ceilf(a); }},
2301         {SkRasterPipelineOp::ceil_3_floats, 3, [](float a) { return ceilf(a); }},
2302         {SkRasterPipelineOp::ceil_4_floats, 4, [](float a) { return ceilf(a); }},
2303     };
2304 
2305     for (const UnaryOp& op : kUnaryOps) {
2306         // The result of some ops are undefined with negative inputs, so only test positive values.
2307         bool positiveOnly = (op.stage == SkRasterPipelineOp::cast_to_uint_from_float ||
2308                              op.stage == SkRasterPipelineOp::cast_to_uint_from_2_floats ||
2309                              op.stage == SkRasterPipelineOp::cast_to_uint_from_3_floats ||
2310                              op.stage == SkRasterPipelineOp::cast_to_uint_from_4_floats);
2311 
2312         float iotaStart = positiveOnly ? 1.0f : -9.75f;
2313         std::iota(&slots[0], &slots[5 * N], iotaStart);
2314         float inputValue = slots[0];
2315 
2316         // Run the unary op over our data.
2317         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
2318         SkRasterPipeline p(&alloc);
2319         p.append(op.stage, &slots[0]);
2320         p.run(0, 0, 1, 1);
2321 
2322         // Verify that the destination slots have been updated.
2323         float* destPtr = &slots[0];
2324         for (int checkSlot = 0; checkSlot < 5; ++checkSlot) {
2325             for (int checkLane = 0; checkLane < N; ++checkLane) {
2326                 if (checkSlot < op.numSlotsAffected) {
2327                     float expected = op.verify(inputValue);
2328                     // The casting tests can generate NaN, depending on the input value, so a value
2329                     // match (via ==) might not succeed.
2330                     // The ceil tests can generate negative zeros _sometimes_, depending on the
2331                     // exact implementation of ceil(), so a bitwise match might not succeed.
2332                     // Because of this, we allow either a value match or a bitwise match.
2333                     bool bitwiseMatch = (0 == memcmp(destPtr, &expected, sizeof(float)));
2334                     bool valueMatch   = (*destPtr == expected);
2335                     REPORTER_ASSERT(r, valueMatch || bitwiseMatch);
2336                 } else {
2337                     REPORTER_ASSERT(r, *destPtr == inputValue);
2338                 }
2339 
2340                 ++destPtr;
2341                 ++inputValue;
2342             }
2343         }
2344     }
2345 }
2346 
to_mix_weight(float value)2347 static float to_mix_weight(float value) {
2348     // Convert a positive value to a mix-weight (a number between 0 and 1).
2349     value /= 16.0f;
2350     return value - std::floor(value);
2351 }
2352 
DEF_TEST(SkRasterPipeline_MixTest,r)2353 DEF_TEST(SkRasterPipeline_MixTest, r) {
2354     // Allocate space for 5 dest and 10 source slots.
2355     alignas(64) float slots[15 * SkRasterPipeline_kMaxStride_highp];
2356     const int N = SkOpts::raster_pipeline_highp_stride;
2357 
2358     struct MixOp {
2359         int numSlotsAffected;
2360         std::function<void(SkRasterPipeline*, SkArenaAlloc*)> append;
2361     };
2362 
2363     static const MixOp kMixOps[] = {
2364         {1, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) {
2365                 p->append(SkRasterPipelineOp::mix_float, slots);
2366             }},
2367         {2, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) {
2368                 p->append(SkRasterPipelineOp::mix_2_floats, slots);
2369             }},
2370         {3, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) {
2371                 p->append(SkRasterPipelineOp::mix_3_floats, slots);
2372             }},
2373         {4, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) {
2374                 p->append(SkRasterPipelineOp::mix_4_floats, slots);
2375             }},
2376         {5, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) {
2377                 SkRasterPipeline_TernaryOpCtx ctx;
2378                 ctx.dst = 0;
2379                 ctx.delta = 5 * N * sizeof(float);
2380                 p->append(SkRasterPipelineOp::mix_n_floats, SkRPCtxUtils::Pack(ctx, alloc));
2381             }},
2382     };
2383 
2384     for (const MixOp& op : kMixOps) {
2385         // Initialize the values to 1,2,3...
2386         std::iota(&slots[0], &slots[15 * N], 1.0f);
2387 
2388         float weightValue = slots[0];
2389         float fromValue   = slots[1 * op.numSlotsAffected * N];
2390         float toValue     = slots[2 * op.numSlotsAffected * N];
2391 
2392         // The first group of values (the weights) must be between zero and one.
2393         for (int idx = 0; idx < 1 * op.numSlotsAffected * N; ++idx) {
2394             slots[idx] = to_mix_weight(slots[idx]);
2395         }
2396 
2397         // Run the mix op over our data.
2398         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
2399         SkRasterPipeline p(&alloc);
2400         p.append(SkRasterPipelineOp::set_base_pointer, &slots[0]);
2401         op.append(&p, &alloc);
2402         p.run(0,0,1,1);
2403 
2404         // Verify that the affected slots now equal mix({0.25, 0.3125...}, {3,4...}, {5,6...}, ).
2405         float* destPtr = &slots[0];
2406         for (int checkSlot = 0; checkSlot < op.numSlotsAffected; ++checkSlot) {
2407             for (int checkLane = 0; checkLane < N; ++checkLane) {
2408                 float checkValue = (toValue - fromValue) * to_mix_weight(weightValue) + fromValue;
2409                 REPORTER_ASSERT(r, *destPtr == checkValue);
2410 
2411                 ++destPtr;
2412                 fromValue += 1.0f;
2413                 toValue += 1.0f;
2414                 weightValue += 1.0f;
2415             }
2416         }
2417     }
2418 }
2419 
DEF_TEST(SkRasterPipeline_MixIntTest,r)2420 DEF_TEST(SkRasterPipeline_MixIntTest, r) {
2421     // Allocate space for 5 dest and 10 source slots.
2422     alignas(64) int slots[15 * SkRasterPipeline_kMaxStride_highp];
2423     const int N = SkOpts::raster_pipeline_highp_stride;
2424 
2425     struct MixOp {
2426         int numSlotsAffected;
2427         std::function<void(SkRasterPipeline*, SkArenaAlloc*)> append;
2428     };
2429 
2430     static const MixOp kMixOps[] = {
2431         {1, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) {
2432                 p->append(SkRasterPipelineOp::mix_int, slots);
2433             }},
2434         {2, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) {
2435                 p->append(SkRasterPipelineOp::mix_2_ints, slots);
2436             }},
2437         {3, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) {
2438                 p->append(SkRasterPipelineOp::mix_3_ints, slots);
2439             }},
2440         {4, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) {
2441                 p->append(SkRasterPipelineOp::mix_4_ints, slots);
2442             }},
2443         {5, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) {
2444                 SkRasterPipeline_TernaryOpCtx ctx;
2445                 ctx.dst = 0;
2446                 ctx.delta = 5 * N * sizeof(int);
2447                 p->append(SkRasterPipelineOp::mix_n_ints, SkRPCtxUtils::Pack(ctx, alloc));
2448             }},
2449     };
2450 
2451     for (const MixOp& op : kMixOps) {
2452         // Initialize the selector ("weight") values to alternating masks
2453         for (int idx = 0; idx < 1 * op.numSlotsAffected * N; ++idx) {
2454             slots[idx] = (idx & 1) ? ~0 : 0;
2455         }
2456 
2457         // Initialize the other values to various NaNs
2458         std::iota(&slots[1 * op.numSlotsAffected * N], &slots[15 * N], kLastSignalingNaN);
2459 
2460         int weightValue = slots[0];
2461         int fromValue   = slots[1 * op.numSlotsAffected * N];
2462         int toValue     = slots[2 * op.numSlotsAffected * N];
2463 
2464         // Run the mix op over our data.
2465         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
2466         SkRasterPipeline p(&alloc);
2467         p.append(SkRasterPipelineOp::set_base_pointer, &slots[0]);
2468         op.append(&p, &alloc);
2469         p.run(0,0,1,1);
2470 
2471         // Verify that the affected slots now equal either fromValue or toValue, correctly
2472         int* destPtr = &slots[0];
2473         for (int checkSlot = 0; checkSlot < op.numSlotsAffected; ++checkSlot) {
2474             for (int checkLane = 0; checkLane < N; ++checkLane) {
2475                 int checkValue = weightValue ? toValue : fromValue;
2476                 REPORTER_ASSERT(r, *destPtr == checkValue);
2477 
2478                 ++destPtr;
2479                 fromValue += 1;
2480                 toValue += 1;
2481                 weightValue = ~weightValue;
2482             }
2483         }
2484     }
2485 }
2486 
DEF_TEST(SkRasterPipeline_Jump,r)2487 DEF_TEST(SkRasterPipeline_Jump, r) {
2488     // Allocate space for 4 slots.
2489     alignas(64) float slots[4 * SkRasterPipeline_kMaxStride_highp] = {};
2490     const int N = SkOpts::raster_pipeline_highp_stride;
2491 
2492     alignas(64) static constexpr float kColorDarkRed[4] = {0.5f, 0.0f, 0.0f, 0.75f};
2493     alignas(64) static constexpr float kColorGreen[4]   = {0.0f, 1.0f, 0.0f, 1.0f};
2494     const int offset = 2;
2495 
2496     // Make a program which jumps over an appendConstantColor op.
2497     SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
2498     SkRasterPipeline p(&alloc);
2499     p.appendConstantColor(&alloc, kColorGreen);        // assign green
2500     p.append(SkRasterPipelineOp::jump, &offset);       // jump over the dark-red color assignment
2501     p.appendConstantColor(&alloc, kColorDarkRed);      // (not executed)
2502     p.append(SkRasterPipelineOp::store_src, slots);    // store the result so we can check it
2503     p.run(0,0,1,1);
2504 
2505     // Verify that the slots contain green.
2506     float* destPtr = &slots[0];
2507     for (int checkSlot = 0; checkSlot < 4; ++checkSlot) {
2508         for (int checkLane = 0; checkLane < N; ++checkLane) {
2509             REPORTER_ASSERT(r, *destPtr == kColorGreen[checkSlot]);
2510             ++destPtr;
2511         }
2512     }
2513 }
2514 
DEF_TEST(SkRasterPipeline_ExchangeSrc,r)2515 DEF_TEST(SkRasterPipeline_ExchangeSrc, r) {
2516     const int N = SkOpts::raster_pipeline_highp_stride;
2517 
2518     alignas(64) int registerValue[4 * SkRasterPipeline_kMaxStride_highp] = {};
2519     alignas(64) int exchangeValue[4 * SkRasterPipeline_kMaxStride_highp] = {};
2520 
2521     std::iota(&registerValue[0], &registerValue[4 * N], kLastSignalingNaN);
2522     std::iota(&exchangeValue[0], &exchangeValue[4 * N], kLastSignalingNegNaN);
2523 
2524     // This program should swap the contents of `registerValue` and `exchangeValue`.
2525     SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
2526     SkRasterPipeline p(&alloc);
2527     p.append(SkRasterPipelineOp::load_src,     registerValue);
2528     p.append(SkRasterPipelineOp::exchange_src, exchangeValue);
2529     p.append(SkRasterPipelineOp::store_src,    registerValue);
2530     p.run(0,0,N,1);
2531 
2532     int* registerPtr = &registerValue[0];
2533     int* exchangePtr = &exchangeValue[0];
2534     int expectedRegister = kLastSignalingNegNaN, expectedExchange = kLastSignalingNaN;
2535     for (int checkSlot = 0; checkSlot < 4; ++checkSlot) {
2536         for (int checkLane = 0; checkLane < N; ++checkLane) {
2537             REPORTER_ASSERT(r, *registerPtr++ == expectedRegister);
2538             REPORTER_ASSERT(r, *exchangePtr++ == expectedExchange);
2539             expectedRegister += 1;
2540             expectedExchange += 1;
2541         }
2542     }
2543 }
2544 
DEF_TEST(SkRasterPipeline_BranchIfAllLanesActive,r)2545 DEF_TEST(SkRasterPipeline_BranchIfAllLanesActive, r) {
2546     const int N = SkOpts::raster_pipeline_highp_stride;
2547 
2548     SkRasterPipeline_BranchIfAllLanesActiveCtx ctx;
2549     ctx.offset = 2;
2550 
2551     // The branch should be taken when lane masks are all-on.
2552     {
2553         alignas(64) int32_t first [SkRasterPipeline_kMaxStride_highp];
2554         alignas(64) int32_t second[SkRasterPipeline_kMaxStride_highp];
2555         std::fill(&first [0], &first [N], 0x12345678);
2556         std::fill(&second[0], &second[N], 0x12345678);
2557 
2558         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
2559         SkRasterPipeline p(&alloc);
2560         SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx;
2561         p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx);
2562         p.append(SkRasterPipelineOp::branch_if_all_lanes_active, &ctx);
2563         p.append(SkRasterPipelineOp::store_src_a, first);
2564         p.append(SkRasterPipelineOp::store_src_a, second);
2565         p.run(0,0,N,1);
2566 
2567         int32_t* firstPtr = first;
2568         int32_t* secondPtr = second;
2569         for (int checkLane = 0; checkLane < N; ++checkLane) {
2570             REPORTER_ASSERT(r, *firstPtr++  == 0x12345678);
2571             REPORTER_ASSERT(r, *secondPtr++ != 0x12345678);
2572         }
2573     }
2574     // The branch should not be taken when lane masks are all-off.
2575     {
2576         alignas(64) int32_t first [SkRasterPipeline_kMaxStride_highp];
2577         alignas(64) int32_t second[SkRasterPipeline_kMaxStride_highp];
2578         std::fill(&first [0], &first [N], 0x12345678);
2579         std::fill(&second[0], &second[N], 0x12345678);
2580 
2581         alignas(64) constexpr int32_t kNoLanesActive[4 * SkRasterPipeline_kMaxStride_highp] = {};
2582 
2583         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
2584         SkRasterPipeline p(&alloc);
2585         p.append(SkRasterPipelineOp::load_src, kNoLanesActive);
2586         p.append(SkRasterPipelineOp::branch_if_all_lanes_active, &ctx);
2587         p.append(SkRasterPipelineOp::store_src_a, first);
2588         p.append(SkRasterPipelineOp::store_src_a, second);
2589         p.run(0,0,N,1);
2590 
2591         int32_t* firstPtr = first;
2592         int32_t* secondPtr = second;
2593         for (int checkLane = 0; checkLane < N; ++checkLane) {
2594             REPORTER_ASSERT(r, *firstPtr++  != 0x12345678);
2595             REPORTER_ASSERT(r, *secondPtr++ != 0x12345678);
2596         }
2597     }
2598     // The branch should not be taken when lane masks are partially-on.
2599     if (N > 1) {
2600         alignas(64) int32_t first [SkRasterPipeline_kMaxStride_highp];
2601         alignas(64) int32_t second[SkRasterPipeline_kMaxStride_highp];
2602         std::fill(&first [0], &first [N], 0x12345678);
2603         std::fill(&second[0], &second[N], 0x12345678);
2604 
2605         // An array of ~0s, except for a single zero in the last A slot.
2606         alignas(64) int32_t oneLaneInactive[4 * SkRasterPipeline_kMaxStride_highp] = {};
2607         std::fill(oneLaneInactive, &oneLaneInactive[4*N], ~0);
2608         oneLaneInactive[4*N - 1] = 0;
2609 
2610         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
2611         SkRasterPipeline p(&alloc);
2612         p.append(SkRasterPipelineOp::load_src, oneLaneInactive);
2613         p.append(SkRasterPipelineOp::branch_if_all_lanes_active, &ctx);
2614         p.append(SkRasterPipelineOp::store_src_a, first);
2615         p.append(SkRasterPipelineOp::store_src_a, second);
2616         p.run(0,0,N,1);
2617 
2618         int32_t* firstPtr = first;
2619         int32_t* secondPtr = second;
2620         for (int checkLane = 0; checkLane < N; ++checkLane) {
2621             REPORTER_ASSERT(r, *firstPtr++  != 0x12345678);
2622             REPORTER_ASSERT(r, *secondPtr++ != 0x12345678);
2623         }
2624     }
2625 }
2626 
DEF_TEST(SkRasterPipeline_BranchIfAnyLanesActive,r)2627 DEF_TEST(SkRasterPipeline_BranchIfAnyLanesActive, r) {
2628     const int N = SkOpts::raster_pipeline_highp_stride;
2629 
2630     SkRasterPipeline_BranchCtx ctx;
2631     ctx.offset = 2;
2632 
2633     // The branch should be taken when lane masks are all-on.
2634     {
2635         alignas(64) int32_t first [SkRasterPipeline_kMaxStride_highp];
2636         alignas(64) int32_t second[SkRasterPipeline_kMaxStride_highp];
2637         std::fill(&first [0], &first [N], 0x12345678);
2638         std::fill(&second[0], &second[N], 0x12345678);
2639 
2640         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
2641         SkRasterPipeline p(&alloc);
2642         SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx;
2643         p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx);
2644         p.append(SkRasterPipelineOp::branch_if_any_lanes_active, &ctx);
2645         p.append(SkRasterPipelineOp::store_src_a, first);
2646         p.append(SkRasterPipelineOp::store_src_a, second);
2647         p.run(0,0,N,1);
2648 
2649         int32_t* firstPtr = first;
2650         int32_t* secondPtr = second;
2651         for (int checkLane = 0; checkLane < N; ++checkLane) {
2652             REPORTER_ASSERT(r, *firstPtr++  == 0x12345678);
2653             REPORTER_ASSERT(r, *secondPtr++ != 0x12345678);
2654         }
2655     }
2656     // The branch should not be taken when lane masks are all-off.
2657     {
2658         alignas(64) int32_t first [SkRasterPipeline_kMaxStride_highp];
2659         alignas(64) int32_t second[SkRasterPipeline_kMaxStride_highp];
2660         std::fill(&first [0], &first [N], 0x12345678);
2661         std::fill(&second[0], &second[N], 0x12345678);
2662 
2663         alignas(64) constexpr int32_t kNoLanesActive[4 * SkRasterPipeline_kMaxStride_highp] = {};
2664 
2665         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
2666         SkRasterPipeline p(&alloc);
2667         p.append(SkRasterPipelineOp::load_src, kNoLanesActive);
2668         p.append(SkRasterPipelineOp::branch_if_any_lanes_active, &ctx);
2669         p.append(SkRasterPipelineOp::store_src_a, first);
2670         p.append(SkRasterPipelineOp::store_src_a, second);
2671         p.run(0,0,N,1);
2672 
2673         int32_t* firstPtr = first;
2674         int32_t* secondPtr = second;
2675         for (int checkLane = 0; checkLane < N; ++checkLane) {
2676             REPORTER_ASSERT(r, *firstPtr++  != 0x12345678);
2677             REPORTER_ASSERT(r, *secondPtr++ != 0x12345678);
2678         }
2679     }
2680     // The branch should be taken when lane masks are partially-on.
2681     if (N > 1) {
2682         alignas(64) int32_t first [SkRasterPipeline_kMaxStride_highp];
2683         alignas(64) int32_t second[SkRasterPipeline_kMaxStride_highp];
2684         std::fill(&first [0], &first [N], 0x12345678);
2685         std::fill(&second[0], &second[N], 0x12345678);
2686 
2687         // An array of all zeros, except for a single ~0 in the last A slot.
2688         alignas(64) int32_t oneLaneActive[4 * SkRasterPipeline_kMaxStride_highp] = {};
2689         oneLaneActive[4*N - 1] = ~0;
2690 
2691         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
2692         SkRasterPipeline p(&alloc);
2693         p.append(SkRasterPipelineOp::load_src, oneLaneActive);
2694         p.append(SkRasterPipelineOp::branch_if_any_lanes_active, &ctx);
2695         p.append(SkRasterPipelineOp::store_src_a, first);
2696         p.append(SkRasterPipelineOp::store_src_a, second);
2697         p.run(0,0,N,1);
2698 
2699         int32_t* firstPtr = first;
2700         int32_t* secondPtr = second;
2701         for (int checkLane = 0; checkLane < N; ++checkLane) {
2702             REPORTER_ASSERT(r, *firstPtr++  == 0x12345678);
2703             REPORTER_ASSERT(r, *secondPtr++ != 0x12345678);
2704         }
2705     }
2706 }
2707 
DEF_TEST(SkRasterPipeline_BranchIfNoLanesActive,r)2708 DEF_TEST(SkRasterPipeline_BranchIfNoLanesActive, r) {
2709     const int N = SkOpts::raster_pipeline_highp_stride;
2710 
2711     SkRasterPipeline_BranchCtx ctx;
2712     ctx.offset = 2;
2713 
2714     // The branch should not be taken when lane masks are all-on.
2715     {
2716         alignas(64) int32_t first [SkRasterPipeline_kMaxStride_highp];
2717         alignas(64) int32_t second[SkRasterPipeline_kMaxStride_highp];
2718         std::fill(&first [0], &first [N], 0x12345678);
2719         std::fill(&second[0], &second[N], 0x12345678);
2720 
2721         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
2722         SkRasterPipeline p(&alloc);
2723         SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx;
2724         p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx);
2725         p.append(SkRasterPipelineOp::branch_if_no_lanes_active, &ctx);
2726         p.append(SkRasterPipelineOp::store_src_a, first);
2727         p.append(SkRasterPipelineOp::store_src_a, second);
2728         p.run(0,0,N,1);
2729 
2730         int32_t* firstPtr = first;
2731         int32_t* secondPtr = second;
2732         for (int checkLane = 0; checkLane < N; ++checkLane) {
2733             REPORTER_ASSERT(r, *firstPtr++  != 0x12345678);
2734             REPORTER_ASSERT(r, *secondPtr++ != 0x12345678);
2735         }
2736     }
2737     // The branch should be taken when lane masks are all-off.
2738     {
2739         alignas(64) int32_t first [SkRasterPipeline_kMaxStride_highp];
2740         alignas(64) int32_t second[SkRasterPipeline_kMaxStride_highp];
2741         std::fill(&first [0], &first [N], 0x12345678);
2742         std::fill(&second[0], &second[N], 0x12345678);
2743 
2744         alignas(64) constexpr int32_t kNoLanesActive[4 * SkRasterPipeline_kMaxStride_highp] = {};
2745 
2746         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
2747         SkRasterPipeline p(&alloc);
2748         p.append(SkRasterPipelineOp::load_src, kNoLanesActive);
2749         p.append(SkRasterPipelineOp::branch_if_no_lanes_active, &ctx);
2750         p.append(SkRasterPipelineOp::store_src_a, first);
2751         p.append(SkRasterPipelineOp::store_src_a, second);
2752         p.run(0,0,N,1);
2753 
2754         int32_t* firstPtr = first;
2755         int32_t* secondPtr = second;
2756         for (int checkLane = 0; checkLane < N; ++checkLane) {
2757             REPORTER_ASSERT(r, *firstPtr++  == 0x12345678);
2758             REPORTER_ASSERT(r, *secondPtr++ != 0x12345678);
2759         }
2760     }
2761     // The branch should not be taken when lane masks are partially-on.
2762     if (N > 1) {
2763         alignas(64) int32_t first [SkRasterPipeline_kMaxStride_highp];
2764         alignas(64) int32_t second[SkRasterPipeline_kMaxStride_highp];
2765         std::fill(&first [0], &first [N], 0x12345678);
2766         std::fill(&second[0], &second[N], 0x12345678);
2767 
2768         // An array of all zeros, except for a single ~0 in the last A slot.
2769         alignas(64) int32_t oneLaneActive[4 * SkRasterPipeline_kMaxStride_highp] = {};
2770         oneLaneActive[4*N - 1] = ~0;
2771 
2772         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
2773         SkRasterPipeline p(&alloc);
2774         p.append(SkRasterPipelineOp::load_src, oneLaneActive);
2775         p.append(SkRasterPipelineOp::branch_if_no_lanes_active, &ctx);
2776         p.append(SkRasterPipelineOp::store_src_a, first);
2777         p.append(SkRasterPipelineOp::store_src_a, second);
2778         p.run(0,0,N,1);
2779 
2780         int32_t* firstPtr = first;
2781         int32_t* secondPtr = second;
2782         for (int checkLane = 0; checkLane < N; ++checkLane) {
2783             REPORTER_ASSERT(r, *firstPtr++  != 0x12345678);
2784             REPORTER_ASSERT(r, *secondPtr++ != 0x12345678);
2785         }
2786     }
2787 }
2788 
DEF_TEST(SkRasterPipeline_BranchIfActiveLanesEqual,r)2789 DEF_TEST(SkRasterPipeline_BranchIfActiveLanesEqual, r) {
2790     // Allocate space for 4 slots.
2791     const int N = SkOpts::raster_pipeline_highp_stride;
2792 
2793     // An array of all 6s.
2794     alignas(64) int allSixes[SkRasterPipeline_kMaxStride_highp] = {};
2795     std::fill(std::begin(allSixes), std::end(allSixes), 6);
2796 
2797     // An array of all 6s, except for a single 5 in one lane.
2798     alignas(64) int mostlySixesWithOneFive[SkRasterPipeline_kMaxStride_highp] = {};
2799     std::fill(std::begin(mostlySixesWithOneFive), std::end(mostlySixesWithOneFive), 6);
2800     mostlySixesWithOneFive[N - 1] = 5;
2801 
2802     SkRasterPipeline_BranchIfEqualCtx matching; // comparing all-six vs five will match
2803     matching.offset = 2;
2804     matching.value = 5;
2805     matching.ptr = allSixes;
2806 
2807     SkRasterPipeline_BranchIfEqualCtx nonmatching;  // comparing mostly-six vs five won't match
2808     nonmatching.offset = 2;
2809     nonmatching.value = 5;
2810     nonmatching.ptr = mostlySixesWithOneFive;
2811 
2812     // The branch should be taken when lane masks are all-on and we're checking 6 ≠ 5.
2813     {
2814         alignas(64) int32_t first [SkRasterPipeline_kMaxStride_highp];
2815         alignas(64) int32_t second[SkRasterPipeline_kMaxStride_highp];
2816         std::fill(&first [0], &first [N], 0x12345678);
2817         std::fill(&second[0], &second[N], 0x12345678);
2818 
2819         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
2820         SkRasterPipeline p(&alloc);
2821         SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx;
2822         p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx);
2823         p.append(SkRasterPipelineOp::branch_if_no_active_lanes_eq, &matching);
2824         p.append(SkRasterPipelineOp::store_src_a, first);
2825         p.append(SkRasterPipelineOp::store_src_a, second);
2826         p.run(0,0,N,1);
2827 
2828         int32_t* firstPtr = first;
2829         int32_t* secondPtr = second;
2830         for (int checkLane = 0; checkLane < N; ++checkLane) {
2831             REPORTER_ASSERT(r, *firstPtr++  == 0x12345678);
2832             REPORTER_ASSERT(r, *secondPtr++ != 0x12345678);
2833         }
2834     }
2835     // The branch should not be taken when lane masks are all-on and we're checking 5 ≠ 5
2836     {
2837         alignas(64) int32_t first [SkRasterPipeline_kMaxStride_highp];
2838         alignas(64) int32_t second[SkRasterPipeline_kMaxStride_highp];
2839         std::fill(&first [0], &first [N], 0x12345678);
2840         std::fill(&second[0], &second[N], 0x12345678);
2841 
2842         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
2843         SkRasterPipeline p(&alloc);
2844         SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx;
2845         p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx);
2846         p.append(SkRasterPipelineOp::branch_if_no_active_lanes_eq, &nonmatching);
2847         p.append(SkRasterPipelineOp::store_src_a, first);
2848         p.append(SkRasterPipelineOp::store_src_a, second);
2849         p.run(0,0,N,1);
2850 
2851         int32_t* firstPtr = first;
2852         int32_t* secondPtr = second;
2853         for (int checkLane = 0; checkLane < N; ++checkLane) {
2854             REPORTER_ASSERT(r, *firstPtr++  != 0x12345678);
2855             REPORTER_ASSERT(r, *secondPtr++ != 0x12345678);
2856         }
2857     }
2858     // The branch should be taken when the 5 = 5 lane is dead.
2859     if (N > 1) {
2860         alignas(64) int32_t first [SkRasterPipeline_kMaxStride_highp];
2861         alignas(64) int32_t second[SkRasterPipeline_kMaxStride_highp];
2862         std::fill(&first [0], &first [N], 0x12345678);
2863         std::fill(&second[0], &second[N], 0x12345678);
2864 
2865         // An execution mask with all lanes on except for the five-lane.
2866         alignas(64) int mask[4 * SkRasterPipeline_kMaxStride_highp] = {};
2867         std::fill(std::begin(mask), std::end(mask), ~0);
2868         mask[4*N - 1] = 0;
2869 
2870         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
2871         SkRasterPipeline p(&alloc);
2872         p.append(SkRasterPipelineOp::load_src, mask);
2873         p.append(SkRasterPipelineOp::branch_if_no_active_lanes_eq, &nonmatching);
2874         p.append(SkRasterPipelineOp::store_src_a, first);
2875         p.append(SkRasterPipelineOp::store_src_a, second);
2876         p.run(0,0,N,1);
2877 
2878         int32_t* firstPtr = first;
2879         int32_t* secondPtr = second;
2880         for (int checkLane = 0; checkLane < N; ++checkLane) {
2881             REPORTER_ASSERT(r, *firstPtr++  == 0x12345678);
2882             REPORTER_ASSERT(r, *secondPtr++ != 0x12345678);
2883         }
2884     }
2885 }
2886 
DEF_TEST(SkRasterPipeline_empty,r)2887 DEF_TEST(SkRasterPipeline_empty, r) {
2888     // No asserts... just a test that this is safe to run.
2889     SkRasterPipeline_<256> p;
2890     p.run(0,0,20,1);
2891 }
2892 
DEF_TEST(SkRasterPipeline_nonsense,r)2893 DEF_TEST(SkRasterPipeline_nonsense, r) {
2894     // No asserts... just a test that this is safe to run and terminates.
2895     // srcover() calls st->next(); this makes sure we've always got something there to call.
2896     SkRasterPipeline_<256> p;
2897     p.append(SkRasterPipelineOp::srcover);
2898     p.run(0,0,20,1);
2899 }
2900 
DEF_TEST(SkRasterPipeline_JIT,r)2901 DEF_TEST(SkRasterPipeline_JIT, r) {
2902     // This tests a couple odd corners that a JIT backend can stumble over.
2903 
2904     uint32_t buf[72] = {
2905          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2906          1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,
2907         13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
2908          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2909          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2910          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2911     };
2912 
2913     SkRasterPipeline_MemoryCtx src = { buf +  0, 0 },
2914                                dst = { buf + 36, 0 };
2915 
2916     // Copy buf[x] to buf[x+36] for x in [15,35).
2917     SkRasterPipeline_<256> p;
2918     p.append(SkRasterPipelineOp::load_8888,  &src);
2919     p.append(SkRasterPipelineOp::store_8888, &dst);
2920     p.run(15,0, 20,1);
2921 
2922     for (int i = 0; i < 36; i++) {
2923         if (i < 15 || i == 35) {
2924             REPORTER_ASSERT(r, buf[i+36] == 0);
2925         } else {
2926             REPORTER_ASSERT(r, buf[i+36] == (uint32_t)(i - 11));
2927         }
2928     }
2929 }
2930 
h(float f)2931 static uint16_t h(float f) {
2932     // Remember, a float is 1-8-23 (sign-exponent-mantissa) with 127 exponent bias.
2933     uint32_t sem;
2934     memcpy(&sem, &f, sizeof(sem));
2935     uint32_t s  = sem & 0x80000000,
2936              em = sem ^ s;
2937 
2938     // Convert to 1-5-10 half with 15 bias, flushing denorm halfs (including zero) to zero.
2939     auto denorm = (int32_t)em < 0x38800000;  // I32 comparison is often quicker, and always safe
2940     // here.
2941     return denorm ? SkTo<uint16_t>(0)
2942                   : SkTo<uint16_t>((s>>16) + (em>>13) - ((127-15)<<10));
2943 }
2944 
DEF_TEST(SkRasterPipeline_tail,r)2945 DEF_TEST(SkRasterPipeline_tail, r) {
2946     {
2947         float data[][4] = {
2948             {00, 01, 02, 03},
2949             {10, 11, 12, 13},
2950             {20, 21, 22, 23},
2951             {30, 31, 32, 33},
2952         };
2953 
2954         float buffer[4][4];
2955 
2956         SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
2957                            dst = { &buffer[0][0], 0 };
2958 
2959         for (unsigned i = 1; i <= 4; i++) {
2960             memset(buffer, 0xff, sizeof(buffer));
2961             SkRasterPipeline_<256> p;
2962             p.append(SkRasterPipelineOp::load_f32, &src);
2963             p.append(SkRasterPipelineOp::store_f32, &dst);
2964             p.run(0,0, i,1);
2965             for (unsigned j = 0; j < i; j++) {
2966                 for (unsigned k = 0; k < 4; k++) {
2967                     if (buffer[j][k] != data[j][k]) {
2968                         ERRORF(r, "(%u, %u) - a: %g r: %g\n", j, k, data[j][k], buffer[j][k]);
2969                     }
2970                 }
2971             }
2972             for (int j = i; j < 4; j++) {
2973                 for (auto f : buffer[j]) {
2974                     REPORTER_ASSERT(r, SkIsNaN(f));
2975                 }
2976             }
2977         }
2978     }
2979 
2980     {
2981         alignas(8) uint16_t data[][4] = {
2982             {h(00), h(01), h(02), h(03)},
2983             {h(10), h(11), h(12), h(13)},
2984             {h(20), h(21), h(22), h(23)},
2985             {h(30), h(31), h(32), h(33)},
2986         };
2987         alignas(8) uint16_t buffer[4][4];
2988         SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
2989                            dst = { &buffer[0][0], 0 };
2990 
2991         for (unsigned i = 1; i <= 4; i++) {
2992             memset(buffer, 0xff, sizeof(buffer));
2993             SkRasterPipeline_<256> p;
2994             p.append(SkRasterPipelineOp::load_f16, &src);
2995             p.append(SkRasterPipelineOp::store_f16, &dst);
2996             p.run(0,0, i,1);
2997             for (unsigned j = 0; j < i; j++) {
2998                 for (int k = 0; k < 4; k++) {
2999                     REPORTER_ASSERT(r, buffer[j][k] == data[j][k]);
3000                 }
3001             }
3002             for (int j = i; j < 4; j++) {
3003                 for (auto f : buffer[j]) {
3004                     REPORTER_ASSERT(r, f == 0xffff);
3005                 }
3006             }
3007         }
3008     }
3009 
3010     {
3011         alignas(8) uint16_t data[]= {
3012             h(00),
3013             h(10),
3014             h(20),
3015             h(30),
3016         };
3017         alignas(8) uint16_t buffer[4][4];
3018         SkRasterPipeline_MemoryCtx src = { &data[0], 0 },
3019                 dst = { &buffer[0][0], 0 };
3020 
3021         for (unsigned i = 1; i <= 4; i++) {
3022             memset(buffer, 0xff, sizeof(buffer));
3023             SkRasterPipeline_<256> p;
3024             p.append(SkRasterPipelineOp::load_af16, &src);
3025             p.append(SkRasterPipelineOp::store_f16, &dst);
3026             p.run(0,0, i,1);
3027             for (unsigned j = 0; j < i; j++) {
3028                 uint16_t expected[] = {0, 0, 0, data[j]};
3029                 REPORTER_ASSERT(r, !memcmp(expected, &buffer[j][0], sizeof(buffer[j])));
3030             }
3031             for (int j = i; j < 4; j++) {
3032                 for (auto f : buffer[j]) {
3033                     REPORTER_ASSERT(r, f == 0xffff);
3034                 }
3035             }
3036         }
3037     }
3038 
3039     {
3040         alignas(8) uint16_t data[][4] = {
3041             {h(00), h(01), h(02), h(03)},
3042             {h(10), h(11), h(12), h(13)},
3043             {h(20), h(21), h(22), h(23)},
3044             {h(30), h(31), h(32), h(33)},
3045         };
3046         alignas(8) uint16_t buffer[4];
3047         SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
3048                 dst = { &buffer[0], 0 };
3049 
3050         for (unsigned i = 1; i <= 4; i++) {
3051             memset(buffer, 0xff, sizeof(buffer));
3052             SkRasterPipeline_<256> p;
3053             p.append(SkRasterPipelineOp::load_f16, &src);
3054             p.append(SkRasterPipelineOp::store_af16, &dst);
3055             p.run(0,0, i,1);
3056             for (unsigned j = 0; j < i; j++) {
3057                 REPORTER_ASSERT(r, !memcmp(&data[j][3], &buffer[j], sizeof(buffer[j])));
3058             }
3059             for (int j = i; j < 4; j++) {
3060                 REPORTER_ASSERT(r, buffer[j] == 0xffff);
3061             }
3062         }
3063     }
3064 
3065     {
3066         alignas(8) uint16_t data[][4] = {
3067             {h(00), h(01), h(02), h(03)},
3068             {h(10), h(11), h(12), h(13)},
3069             {h(20), h(21), h(22), h(23)},
3070             {h(30), h(31), h(32), h(33)},
3071         };
3072         alignas(8) uint16_t buffer[4][2];
3073         SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
3074                 dst = { &buffer[0][0], 0 };
3075 
3076         for (unsigned i = 1; i <= 4; i++) {
3077             memset(buffer, 0xff, sizeof(buffer));
3078             SkRasterPipeline_<256> p;
3079             p.append(SkRasterPipelineOp::load_f16, &src);
3080             p.append(SkRasterPipelineOp::store_rgf16, &dst);
3081             p.run(0,0, i,1);
3082             for (unsigned j = 0; j < i; j++) {
3083                 REPORTER_ASSERT(r, !memcmp(&buffer[j], &data[j], 2 * sizeof(uint16_t)));
3084             }
3085             for (int j = i; j < 4; j++) {
3086                 for (auto h : buffer[j]) {
3087                     REPORTER_ASSERT(r, h == 0xffff);
3088                 }
3089             }
3090         }
3091     }
3092 
3093     {
3094         alignas(8) uint16_t data[][2] = {
3095             {h(00), h(01)},
3096             {h(10), h(11)},
3097             {h(20), h(21)},
3098             {h(30), h(31)},
3099         };
3100         alignas(8) uint16_t buffer[4][4];
3101         SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
3102                 dst = { &buffer[0][0], 0 };
3103 
3104         for (unsigned i = 1; i <= 4; i++) {
3105             memset(buffer, 0xff, sizeof(buffer));
3106             SkRasterPipeline_<256> p;
3107             p.append(SkRasterPipelineOp::load_rgf16, &src);
3108             p.append(SkRasterPipelineOp::store_f16, &dst);
3109             p.run(0,0, i,1);
3110             for (unsigned j = 0; j < i; j++) {
3111                 uint16_t expected[] = {data[j][0], data[j][1], h(0), h(1)};
3112                 REPORTER_ASSERT(r, !memcmp(&buffer[j], expected, sizeof(expected)));
3113             }
3114             for (int j = i; j < 4; j++) {
3115                 for (auto h : buffer[j]) {
3116                     REPORTER_ASSERT(r, h == 0xffff);
3117                 }
3118             }
3119         }
3120     }
3121 }
3122 
DEF_TEST(SkRasterPipeline_u16,r)3123 DEF_TEST(SkRasterPipeline_u16, r) {
3124     {
3125         alignas(8) uint16_t data[][2] = {
3126             {0x0000, 0x0111},
3127             {0x1010, 0x1111},
3128             {0x2020, 0x2121},
3129             {0x3030, 0x3131},
3130         };
3131         uint8_t buffer[4][4];
3132         SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
3133                 dst = { &buffer[0][0], 0 };
3134 
3135         for (unsigned i = 1; i <= 4; i++) {
3136             memset(buffer, 0xab, sizeof(buffer));
3137             SkRasterPipeline_<256> p;
3138             p.append(SkRasterPipelineOp::load_rg1616, &src);
3139             p.append(SkRasterPipelineOp::store_8888, &dst);
3140             p.run(0,0, i,1);
3141             for (unsigned j = 0; j < i; j++) {
3142                 uint8_t expected[] = {
3143                     SkToU8(data[j][0] >> 8),
3144                     SkToU8(data[j][1] >> 8),
3145                     000,
3146                     0xff
3147                 };
3148                 REPORTER_ASSERT(r, !memcmp(&buffer[j], expected, sizeof(expected)));
3149             }
3150             for (int j = i; j < 4; j++) {
3151                 for (auto b : buffer[j]) {
3152                     REPORTER_ASSERT(r, b == 0xab);
3153                 }
3154             }
3155         }
3156     }
3157 
3158     {
3159         alignas(8) uint16_t data[] = {
3160                 0x0000,
3161                 0x1010,
3162                 0x2020,
3163                 0x3030,
3164         };
3165         uint8_t buffer[4][4];
3166         SkRasterPipeline_MemoryCtx src = { &data[0], 0 },
3167                 dst = { &buffer[0][0], 0 };
3168 
3169         for (unsigned i = 1; i <= 4; i++) {
3170             memset(buffer, 0xff, sizeof(buffer));
3171             SkRasterPipeline_<256> p;
3172             p.append(SkRasterPipelineOp::load_a16, &src);
3173             p.append(SkRasterPipelineOp::store_8888, &dst);
3174             p.run(0,0, i,1);
3175             for (unsigned j = 0; j < i; j++) {
3176                 uint8_t expected[] = {0x00, 0x00, 0x00, SkToU8(data[j] >> 8)};
3177                 REPORTER_ASSERT(r, !memcmp(&buffer[j], expected, sizeof(expected)));
3178             }
3179             for (int j = i; j < 4; j++) {
3180                 for (auto b : buffer[j]) {
3181                     REPORTER_ASSERT(r, b == 0xff);
3182                 }
3183             }
3184         }
3185     }
3186 
3187     {
3188         uint8_t data[][4] = {
3189             {0x00, 0x01, 0x02, 0x03},
3190             {0x10, 0x11, 0x12, 0x13},
3191             {0x20, 0x21, 0x22, 0x23},
3192             {0x30, 0x31, 0x32, 0x33},
3193         };
3194         alignas(8) uint16_t buffer[4];
3195         SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
3196                 dst = { &buffer[0], 0 };
3197 
3198         for (unsigned i = 1; i <= 4; i++) {
3199             memset(buffer, 0xff, sizeof(buffer));
3200             SkRasterPipeline_<256> p;
3201             p.append(SkRasterPipelineOp::load_8888, &src);
3202             p.append(SkRasterPipelineOp::store_a16, &dst);
3203             p.run(0,0, i,1);
3204             for (unsigned j = 0; j < i; j++) {
3205                 uint16_t expected = (data[j][3] << 8) | data[j][3];
3206                 REPORTER_ASSERT(r, buffer[j] == expected);
3207             }
3208             for (int j = i; j < 4; j++) {
3209                 REPORTER_ASSERT(r, buffer[j] == 0xffff);
3210             }
3211         }
3212     }
3213 
3214     {
3215         alignas(8) uint16_t data[][4] = {
3216             {0x0000, 0x1000, 0x2000, 0x3000},
3217             {0x0001, 0x1001, 0x2001, 0x3001},
3218             {0x0002, 0x1002, 0x2002, 0x3002},
3219             {0x0003, 0x1003, 0x2003, 0x3003},
3220         };
3221         alignas(8) uint16_t buffer[4][4];
3222         SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
3223                 dst = { &buffer[0], 0 };
3224 
3225         for (unsigned i = 1; i <= 4; i++) {
3226             memset(buffer, 0xff, sizeof(buffer));
3227             SkRasterPipeline_<256> p;
3228             p.append(SkRasterPipelineOp::load_16161616, &src);
3229             p.append(SkRasterPipelineOp::swap_rb);
3230             p.append(SkRasterPipelineOp::store_16161616, &dst);
3231             p.run(0,0, i,1);
3232             for (unsigned j = 0; j < i; j++) {
3233                 uint16_t expected[4] = {data[j][2], data[j][1], data[j][0], data[j][3]};
3234                 REPORTER_ASSERT(r, !memcmp(&expected[0], &buffer[j], sizeof(expected)));
3235             }
3236             for (int j = i; j < 4; j++) {
3237                 for (uint16_t u16 : buffer[j])
3238                 REPORTER_ASSERT(r, u16 == 0xffff);
3239             }
3240         }
3241     }
3242 }
3243 
DEF_TEST(SkRasterPipeline_lowp,r)3244 DEF_TEST(SkRasterPipeline_lowp, r) {
3245     uint32_t rgba[64];
3246     for (int i = 0; i < 64; i++) {
3247         rgba[i] = (4*i+0) << 0
3248                 | (4*i+1) << 8
3249                 | (4*i+2) << 16
3250                 | (4*i+3) << 24;
3251     }
3252 
3253     SkRasterPipeline_MemoryCtx ptr = { rgba, 0 };
3254 
3255     SkRasterPipeline_<256> p;
3256     p.append(SkRasterPipelineOp::load_8888,  &ptr);
3257     p.append(SkRasterPipelineOp::swap_rb);
3258     p.append(SkRasterPipelineOp::store_8888, &ptr);
3259     p.run(0,0,64,1);
3260 
3261     for (int i = 0; i < 64; i++) {
3262         uint32_t want = (4*i+0) << 16
3263                       | (4*i+1) << 8
3264                       | (4*i+2) << 0
3265                       | (4*i+3) << 24;
3266         if (rgba[i] != want) {
3267             ERRORF(r, "got %08x, want %08x\n", rgba[i], want);
3268         }
3269     }
3270 }
3271 
DEF_TEST(SkRasterPipeline_swizzle,r)3272 DEF_TEST(SkRasterPipeline_swizzle, r) {
3273     // This takes the lowp code path
3274     {
3275         uint16_t rg[64];
3276         for (int i = 0; i < 64; i++) {
3277             rg[i] = (4*i+0) << 0
3278                   | (4*i+1) << 8;
3279         }
3280 
3281         skgpu::Swizzle swizzle("g1b1");
3282 
3283         SkRasterPipeline_MemoryCtx ptr = { rg, 0 };
3284         SkRasterPipeline_<256> p;
3285         p.append(SkRasterPipelineOp::load_rg88,  &ptr);
3286         swizzle.apply(&p);
3287         p.append(SkRasterPipelineOp::store_rg88, &ptr);
3288         p.run(0,0,64,1);
3289 
3290         for (int i = 0; i < 64; i++) {
3291             uint32_t want = 0xff    << 8
3292                           | (4*i+1) << 0;
3293             if (rg[i] != want) {
3294                 ERRORF(r, "got %08x, want %08x\n", rg[i], want);
3295             }
3296         }
3297     }
3298     // This takes the highp code path
3299     {
3300         float rg[64][4];
3301         for (int i = 0; i < 64; i++) {
3302             rg[i][0] = i + 1;
3303             rg[i][1] = 2 * i + 1;
3304             rg[i][2] = 0;
3305             rg[i][3] = 1;
3306         }
3307 
3308         skgpu::Swizzle swizzle("0gra");
3309 
3310         uint16_t buffer[64][4];
3311         SkRasterPipeline_MemoryCtx src = { rg,     0 },
3312                                    dst = { buffer, 0};
3313         SkRasterPipeline_<256> p;
3314         p.append(SkRasterPipelineOp::load_f32,  &src);
3315         swizzle.apply(&p);
3316         p.append(SkRasterPipelineOp::store_f16, &dst);
3317         p.run(0,0,64,1);
3318 
3319         for (int i = 0; i < 64; i++) {
3320             uint16_t want[4] {
3321                 h(0),
3322                 h(2 * i + 1),
3323                 h(i + 1),
3324                 h(1),
3325             };
3326             REPORTER_ASSERT(r, !memcmp(want, buffer[i], sizeof(buffer[i])));
3327         }
3328     }
3329 }
3330 
DEF_TEST(SkRasterPipeline_lowp_clamp01,r)3331 DEF_TEST(SkRasterPipeline_lowp_clamp01, r) {
3332     // This may seem like a funny pipeline to create,
3333     // but it certainly shouldn't crash when you run it.
3334 
3335     uint32_t rgba = 0xff00ff00;
3336 
3337     SkRasterPipeline_MemoryCtx ptr = { &rgba, 0 };
3338 
3339     SkRasterPipeline_<256> p;
3340     p.append(SkRasterPipelineOp::load_8888,  &ptr);
3341     p.append(SkRasterPipelineOp::swap_rb);
3342     p.append(SkRasterPipelineOp::clamp_01);
3343     p.append(SkRasterPipelineOp::store_8888, &ptr);
3344     p.run(0,0,1,1);
3345 }
3346 
3347 // Helper struct that can be used to scrape stack addresses at different points in a pipeline
3348 class StackCheckerCtx : SkRasterPipeline_CallbackCtx {
3349 public:
StackCheckerCtx()3350     StackCheckerCtx() {
3351         this->fn = [](SkRasterPipeline_CallbackCtx* self, int active_pixels) {
3352             auto ctx = (StackCheckerCtx*)self;
3353             ctx->fStackAddrs.push_back(&active_pixels);
3354         };
3355     }
3356 
3357     enum class Behavior {
3358         kGrowth,
3359         kBaseline,
3360         kUnknown,
3361     };
3362 
GrowthBehavior()3363     static Behavior GrowthBehavior() {
3364         // Only some stages use the musttail attribute, so we have no way of knowing what's going to
3365         // happen. In release builds, it's likely that the compiler will apply tail-call
3366         // optimization. Even in some debug builds (on Windows), we don't see stack growth.
3367         return Behavior::kUnknown;
3368     }
3369 
3370     // Call one of these two each time the checker callback is added:
expectGrowth()3371     StackCheckerCtx* expectGrowth() {
3372         fExpectedBehavior.push_back(GrowthBehavior());
3373         return this;
3374     }
3375 
expectBaseline()3376     StackCheckerCtx* expectBaseline() {
3377         fExpectedBehavior.push_back(Behavior::kBaseline);
3378         return this;
3379     }
3380 
validate(skiatest::Reporter * r)3381     void validate(skiatest::Reporter* r) {
3382         REPORTER_ASSERT(r, fStackAddrs.size() == fExpectedBehavior.size());
3383 
3384         // This test is storing and comparing stack pointers (to dead stack frames) as a way of
3385         // measuring stack usage. Unsurprisingly, ASAN doesn't like that. HWASAN actually inserts
3386         // tag bytes in the pointers, causing them not to match. Newer versions of vanilla ASAN
3387         // also appear to salt the stack slightly, causing repeated calls to scrape different
3388         // addresses, even though $rsp is identical on each invocation of the lambda.
3389 #if !defined(SK_SANITIZE_ADDRESS)
3390         void* baseline = fStackAddrs[0];
3391         for (size_t i = 1; i < fStackAddrs.size(); i++) {
3392             if (fExpectedBehavior[i] == Behavior::kGrowth) {
3393                 REPORTER_ASSERT(r, fStackAddrs[i] != baseline);
3394             } else if (fExpectedBehavior[i] == Behavior::kBaseline) {
3395                 REPORTER_ASSERT(r, fStackAddrs[i] == baseline);
3396             } else {
3397                 // Unknown behavior, nothing we can assert here
3398             }
3399         }
3400 #endif
3401     }
3402 
3403 private:
3404     std::vector<void*>    fStackAddrs;
3405     std::vector<Behavior> fExpectedBehavior;
3406 };
3407 
DEF_TEST(SkRasterPipeline_stack_rewind,r)3408 DEF_TEST(SkRasterPipeline_stack_rewind, r) {
3409     // This test verifies that we can control stack usage with stack_rewind
3410 
3411     // Without stack_rewind, we should (maybe) see stack growth
3412     {
3413         StackCheckerCtx stack;
3414         uint32_t rgba = 0xff0000ff;
3415         SkRasterPipeline_MemoryCtx ptr = { &rgba, 0 };
3416 
3417         SkRasterPipeline_<256> p;
3418         p.append(SkRasterPipelineOp::callback, stack.expectBaseline());
3419         p.append(SkRasterPipelineOp::load_8888,  &ptr);
3420         p.append(SkRasterPipelineOp::callback, stack.expectGrowth());
3421         p.append(SkRasterPipelineOp::swap_rb);
3422         p.append(SkRasterPipelineOp::callback, stack.expectGrowth());
3423         p.append(SkRasterPipelineOp::store_8888, &ptr);
3424         p.run(0,0,1,1);
3425 
3426         REPORTER_ASSERT(r, rgba == 0xffff0000); // Ensure the pipeline worked
3427         stack.validate(r);
3428     }
3429 
3430     // With stack_rewind, we should (always) be able to get back to baseline
3431     {
3432         StackCheckerCtx stack;
3433         uint32_t rgba = 0xff0000ff;
3434         SkRasterPipeline_MemoryCtx ptr = { &rgba, 0 };
3435 
3436         SkRasterPipeline_<256> p;
3437         p.append(SkRasterPipelineOp::callback, stack.expectBaseline());
3438         p.append(SkRasterPipelineOp::load_8888,  &ptr);
3439         p.append(SkRasterPipelineOp::callback, stack.expectGrowth());
3440         p.appendStackRewind();
3441         p.append(SkRasterPipelineOp::callback, stack.expectBaseline());
3442         p.append(SkRasterPipelineOp::swap_rb);
3443         p.append(SkRasterPipelineOp::callback, stack.expectGrowth());
3444         p.appendStackRewind();
3445         p.append(SkRasterPipelineOp::callback, stack.expectBaseline());
3446         p.append(SkRasterPipelineOp::store_8888, &ptr);
3447         p.run(0,0,1,1);
3448 
3449         REPORTER_ASSERT(r, rgba == 0xffff0000); // Ensure the pipeline worked
3450         stack.validate(r);
3451     }
3452 }
3453