xref: /aosp_15_r20/external/skia/src/gpu/graphite/UniformManager.h (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1 /*
2  * Copyright 2021 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #ifndef skgpu_UniformManager_DEFINED
9 #define skgpu_UniformManager_DEFINED
10 
11 #include "include/core/SkM44.h"
12 #include "include/core/SkMatrix.h"
13 #include "include/core/SkPoint.h"
14 #include "include/core/SkPoint3.h"
15 #include "include/core/SkRect.h"
16 #include "include/core/SkRefCnt.h"
17 #include "include/core/SkSize.h"
18 #include "include/core/SkSpan.h"
19 #include "include/private/SkColorData.h"
20 #include "include/private/base/SkAlign.h"
21 #include "include/private/base/SkTDArray.h"
22 #include "src/base/SkHalf.h"
23 #include "src/base/SkMathPriv.h"
24 #include "src/core/SkMatrixPriv.h"
25 #include "src/core/SkSLTypeShared.h"
26 #include "src/gpu/graphite/ResourceTypes.h"
27 #include "src/gpu/graphite/Uniform.h"
28 
29 #include <algorithm>
30 #include <memory>
31 
32 namespace skgpu::graphite {
33 
34 class UniformDataBlock;
35 
36 /**
37  * Layout::kStd140
38  * ===============
39  *
40  * From OpenGL Specification Section 7.6.2.2 "Standard Uniform Block Layout"
41  * [https://registry.khronos.org/OpenGL/specs/gl/glspec45.core.pdf#page=159]:
42  *  1. If the member is a scalar consuming N basic machine units, the base alignment is N.
43  *  2. If the member is a two- or four-component vector with components consuming N basic machine
44  *     units, the base alignment is 2N or 4N, respectively.
45  *  3. If the member is a three-component vector with components consuming N
46  *     basic machine units, the base alignment is 4N.
47  *  4. If the member is an array of scalars or vectors, the base alignment and array
48  *     stride are set to match the base alignment of a single array element, according
49  *     to rules (1), (2), and (3), and rounded up to the base alignment of a vec4. The
50  *     array may have padding at the end; the base offset of the member following
51  *     the array is rounded up to the next multiple of the base alignment.
52  *  5. If the member is a column-major matrix with C columns and R rows, the
53  *     matrix is stored identically to an array of C column vectors with R components each,
54  *     according to rule (4).
55  *  6. If the member is an array of S column-major matrices with C columns and
56  *     R rows, the matrix is stored identically to a row of S × C column vectors
57  *     with R components each, according to rule (4).
58  *  7. If the member is a row-major matrix with C columns and R rows, the matrix
59  *     is stored identically to an array of R row vectors with C components each,
60  *     according to rule (4).
61  *  8. If the member is an array of S row-major matrices with C columns and R
62  *     rows, the matrix is stored identically to a row of S × R row vectors with C
63  *    components each, according to rule (4).
64  *  9. If the member is a structure, the base alignment of the structure is N, where
65  *     N is the largest base alignment value of any of its members, and rounded
66  *     up to the base alignment of a vec4. The individual members of this substructure are then
67  *     assigned offsets by applying this set of rules recursively,
68  *     where the base offset of the first member of the sub-structure is equal to the
69  *     aligned offset of the structure. The structure may have padding at the end;
70  *     the base offset of the member following the sub-structure is rounded up to
71  *     the next multiple of the base alignment of the structure.
72  * 10. If the member is an array of S structures, the S elements of the array are laid
73  *     out in order, according to rule (9).
74  *
75  * Layout::kStd430
76  * ===============
77  *
78  * When using the std430 storage layout, shader storage blocks will be laid out in buffer storage
79  * identically to uniform and shader storage blocks using the std140 layout, except that the base
80  * alignment and stride of arrays of scalars and vectors in rule 4 and of structures in rule 9 are
81  * not rounded up a multiple of the base alignment of a vec4.
82  *
83  * NOTE: While not explicitly stated, the layout rules for WebGPU and WGSL are identical to std430
84  * for SSBOs and nearly identical to std140 for UBOs. The default mat2x2 type is treated as two
85  * float2's (not an array), so its size is 16 and alignment is 8 (vs. a size of 32 and alignment of
86  * 16 in std140). When emitting WGSL from SkSL, prepareUniformPolyfillsForInterfaceBlock() defined
87  * in WGSLCodeGenerator, will modify the type declaration to match std140 exactly. This allows the
88  * UniformManager and UniformOffsetCalculator to avoid having WebGPU-specific layout rules
89  * (whereas SkSL::MemoryLayout has more complete rules).
90  *
91  * Layout::kMetal
92  * ===============
93  *
94  * SkSL converts its types to the non-packed SIMD vector types in MSL. The size and alignment rules
95  * are equivalent to std430 with the exception of half3 and float3. In std430, the size consumed
96  * by non-array uniforms of these types is 3N while Metal consumes 4N (which is equal to the
97  * alignment of a vec3 in both Layouts).
98  *
99  * Half vs. Float Uniforms
100  * =======================
101  *
102  * Regardless of the precision when the shader is executed, std140 and std430 layouts consume
103  * "half"-based uniforms in full 32-bit precision. Metal consumes "half"-based uniforms expecting
104  * them to have already been converted to f16. WebGPU has an extension to support f16 types, which
105  * behave like this, but we do not currently utilize it.
106  *
107  * The rules for std430 can be easily extended to f16 by applying N = 2 instead of N = 4 for the
108  * base primitive alignment.
109  *
110  * NOTE: This could also apply to the int vs. short or uint vs. ushort types, but these smaller
111  * integer types are not supported on all platforms as uniforms. We disallow short integer uniforms
112  * entirely, and if the data savings are required, packing should be implemented manually.
113  * Short integer vertex attributes are supported when the vector type lets it pack into 32 bits
114  * (e.g. int16x2 or int8x4).
115  *
116  *
117  * Generalized Layout Rules
118  * ========================
119  *
120  * From the Layout descriptions above, the following simpler rules are sufficient:
121  *
122  * 1. If the base primitive type is "half" and the Layout expects half floats, N = 2; else, N = 4.
123  *
124  * 2. For arrays of scalars or vectors (with # of components, M = 1,2,3,4):
125  *    a. If arrays must be aligned on vec4 boundaries OR M=3, then align and stride = 4*N.
126  *    b. Otherwise, the align and stride = M*N.
127  *
128  *    In both cases, the total size required for the uniform is "array size"*stride.
129  *
130  * 3. For single scalars or vectors (M = 1,2,3,4), the align is SkNextPow2(M)*N (e.g. N,2N,4N,4N).
131  *    a. If M = 3 and the Layout aligns the size with the alignment, the size is 4*N and N
132  *       padding bytes must be zero'ed out afterwards.
133  *    b. Otherwise, the align and size = M*N
134  *
135  * 4. The starting offset to write data is the current offset aligned to the calculated align value.
136  *    The current offset is then incremented by the total size of the uniform.
137  *
138  *    For arrays and padded vec3's, the padding is included in the stride and total size, meeting
139  *    the requirements of the original rule 4 in std140. When a single float3 that is not padded
140  *    is written, the next offset only advances 12 bytes allowing a smaller type to pack tightly
141  *    next to the Z coordinate.
142  *
143  * When N = 4, the CPU and GPU primitives are compatible, regardless of being float, int, or uint.
144  * Contiguous ranges between any padding (for alignment or for array stride) can be memcpy'ed.
145  * When N = 2, the CPU data is float and the GPU data f16, so values must be converted one primitive
146  * at a time using SkFloatToHalf or skvx::to_half.
147  *
148  * The UniformManager will zero out any padding bytes (either prepended for starting alignment,
149  * or appended for stride alignment). This is so that the final byte array can be hashed for uniform
150  * value de-duplication before uploading to the GPU.
151  *
152  * While SkSL supports non-square matrices, the SkSLType enum and Graphite only expose support for
153  * square matrices. Graphite assumes all matrix uniforms are in column-major order. This matches the
154  * data layout of SkM44 already and UniformManager automatically transposes SkMatrix (which is in
155  * row-major data) to be column-major. Thus, for layout purposes, a matrix or an array of matrices
156  * can be laid out equivalently to an array of the column type with an array count multiplied by the
157  * number of columns.
158  *
159  * Graphite does not embed structs within structs for its UBO or SSBO declarations for paint or
160  * RenderSteps. However, when the "uniforms" are defined for use with SSBO random access, the
161  * ordered set of uniforms is actually defining a struct instead of just a top-level interface.
162  * As such, once all uniforms are recorded, the size must be rounded up to the maximum alignment
163  * encountered for its members to satisfy alignment rules for all Layouts.
164  *
165  * If Graphite starts to define sub-structs, UniformOffsetCalculator can be used recursively.
166  */
167 namespace LayoutRules {
168     // The three diverging behaviors across the different Layouts:
PadVec3Size(Layout layout)169     static constexpr bool PadVec3Size(Layout layout) { return layout == Layout::kMetal; }
AlignArraysAsVec4(Layout layout)170     static constexpr bool AlignArraysAsVec4(Layout layout) { return layout == Layout::kStd140; }
UseFullPrecision(Layout layout)171     static constexpr bool UseFullPrecision(Layout layout) { return layout != Layout::kMetal; }
172 }
173 
174 class UniformOffsetCalculator {
175 public:
176     UniformOffsetCalculator() = default;
177 
178     static UniformOffsetCalculator ForTopLevel(Layout layout, int offset = 0) {
179         return UniformOffsetCalculator(layout, offset, /*reqAlignment=*/1);
180     }
181 
ForStruct(Layout layout)182     static UniformOffsetCalculator ForStruct(Layout layout) {
183         const int reqAlignment = LayoutRules::AlignArraysAsVec4(layout) ? 16 : 1;
184         return UniformOffsetCalculator(layout, /*offset=*/0, reqAlignment);
185     }
186 
layout()187     Layout layout() const { return fLayout; }
188 
189     // NOTE: The returned size represents the last consumed byte (if the recorded
190     // uniforms are embedded within a struct, this will need to be rounded up to a multiple of
191     // requiredAlignment()).
size()192     int size() const { return fOffset; }
requiredAlignment()193     int requiredAlignment() const { return fReqAlignment; }
194 
195     // Returns the correctly aligned offset to accommodate `count` instances of `type` and
196     // advances the internal offset.
197     //
198     // After a call to this method, `size()` will return the offset to the end of `count` instances
199     // of `type` (while the return value equals the aligned start offset). Subsequent calls will
200     // calculate the new start offset starting at `size()`.
201     int advanceOffset(SkSLType type, int count = Uniform::kNonArray);
202 
203     // Returns the correctly aligned offset to accommodate `count` instances of a custom struct
204     // type that has had its own fields passed into the `substruct` offset calculator.
205     //
206     // After a call to this method, `size()` will return the offset to the end of `count` instances
207     // of the struct types (while the return value equals the aligned start offset). This includes
208     // any required padding of the struct size per rule #9.
209     int advanceStruct(const UniformOffsetCalculator& substruct, int count = Uniform::kNonArray);
210 
211 private:
UniformOffsetCalculator(Layout layout,int offset,int reqAlignment)212     UniformOffsetCalculator(Layout layout, int offset, int reqAlignment)
213             : fLayout(layout), fOffset(offset), fReqAlignment((reqAlignment)) {}
214 
215     Layout fLayout    = Layout::kInvalid;
216     int fOffset       = 0;
217     int fReqAlignment = 1;
218 };
219 
220 class UniformManager {
221 public:
UniformManager(Layout layout)222     UniformManager(Layout layout) { this->resetWithNewLayout(layout); }
223 
finish()224     SkSpan<const char> finish() {
225         this->alignTo(fReqAlignment);
226         return SkSpan(fStorage);
227     }
228 
size()229     size_t size() const { return fStorage.size(); }
230 
231     void resetWithNewLayout(Layout layout);
reset()232     void reset() { this->resetWithNewLayout(fLayout); }
233 
234     // scalars
write(float f)235     void write(float f)     { this->write<SkSLType::kFloat>(&f); }
write(int32_t i)236     void write(int32_t i)   { this->write<SkSLType::kInt  >(&i); }
writeHalf(float f)237     void writeHalf(float f) { this->write<SkSLType::kHalf >(&f); }
238 
239     // [i|h]vec4 and arrays thereof (just add overloads as needed)
write(const SkPMColor4f & c)240     void write(const SkPMColor4f& c) { this->write<SkSLType::kFloat4>(c.vec()); }
write(const SkRect & r)241     void write(const SkRect& r)      { this->write<SkSLType::kFloat4>(r.asScalars()); }
write(const SkV4 & v)242     void write(const SkV4& v)        { this->write<SkSLType::kFloat4>(v.ptr()); }
243 
write(const SkIRect & r)244     void write(const SkIRect& r)     { this->write<SkSLType::kInt4>(&r); }
245 
writeHalf(const SkPMColor4f & c)246     void writeHalf(const SkPMColor4f& c) { this->write<SkSLType::kHalf4>(c.vec()); }
writeHalf(const SkRect & r)247     void writeHalf(const SkRect& r)      { this->write<SkSLType::kHalf4>(r.asScalars()); }
writeHalf(const SkV4 & v)248     void writeHalf(const SkV4& v)        { this->write<SkSLType::kHalf4>(v.ptr()); }
249 
writeArray(SkSpan<const SkV4> v)250     void writeArray(SkSpan<const SkV4> v) {
251         this->writeArray<SkSLType::kFloat4>(v.data(), v.size());
252     }
writeArray(SkSpan<const SkPMColor4f> c)253     void writeArray(SkSpan<const SkPMColor4f> c) {
254         this->writeArray<SkSLType::kFloat4>(c.data(), c.size());
255     }
writeHalfArray(SkSpan<const SkPMColor4f> c)256     void writeHalfArray(SkSpan<const SkPMColor4f> c) {
257         this->writeArray<SkSLType::kHalf4>(c.data(), c.size());
258     }
259 
260     // [i|h]vec3
write(const SkV3 & v)261     void write(const SkV3& v)     { this->write<SkSLType::kFloat3>(v.ptr()); }
write(const SkPoint3 & p)262     void write(const SkPoint3& p) { this->write<SkSLType::kFloat3>(&p); }
263 
writeHalf(const SkV3 & v)264     void writeHalf(const SkV3& v)     { this->write<SkSLType::kHalf3>(v.ptr()); }
writeHalf(const SkPoint3 & p)265     void writeHalf(const SkPoint3& p) { this->write<SkSLType::kHalf3>(&p); }
266 
267     // NOTE: 3-element vectors never pack efficiently in arrays, so avoid using them
268 
269     // [i|h]vec2
write(const SkV2 & v)270     void write(const SkV2& v)    { this->write<SkSLType::kFloat2>(v.ptr()); }
write(const SkSize & s)271     void write(const SkSize& s)  { this->write<SkSLType::kFloat2>(&s); }
write(const SkPoint & p)272     void write(const SkPoint& p) { this->write<SkSLType::kFloat2>(&p); }
273 
write(const SkISize & s)274     void write(const SkISize& s) { this->write<SkSLType::kInt2>(&s); }
275 
writeHalf(const SkV2 & v)276     void writeHalf(const SkV2& v)    { this->write<SkSLType::kHalf2>(v.ptr()); }
writeHalf(const SkSize & s)277     void writeHalf(const SkSize& s)  { this->write<SkSLType::kHalf2>(&s); }
writeHalf(const SkPoint & p)278     void writeHalf(const SkPoint& p) { this->write<SkSLType::kHalf2>(&p); }
279 
280     // NOTE: 2-element vectors don't pack efficiently in std140, so avoid using them
281 
282     // matrices
write(const SkM44 & m)283     void write(const SkM44& m) {
284         // All Layouts treat a 4x4 column-major matrix as an array of vec4's, which is exactly how
285         // SkM44 already stores its data.
286         this->writeArray<SkSLType::kFloat4>(SkMatrixPriv::M44ColMajor(m), 4);
287     }
288 
writeHalf(const SkM44 & m)289     void writeHalf(const SkM44& m) {
290         this->writeArray<SkSLType::kHalf4>(SkMatrixPriv::M44ColMajor(m), 4);
291     }
292 
write(const SkMatrix & m)293     void write(const SkMatrix& m) {
294         // SkMatrix is row-major, so rewrite to column major. All Layouts treat a 3x3 column
295         // major matrix as an array of vec3's.
296         float colMajor[9] = {m[0], m[3], m[6],
297                              m[1], m[4], m[7],
298                              m[2], m[5], m[8]};
299         this->writeArray<SkSLType::kFloat3>(colMajor, 3);
300     }
writeHalf(const SkMatrix & m)301     void writeHalf(const SkMatrix& m) {
302         float colMajor[9] = {m[0], m[3], m[6],
303                              m[1], m[4], m[7],
304                              m[2], m[5], m[8]};
305         this->writeArray<SkSLType::kHalf3>(colMajor, 3);
306     }
307 
308     // NOTE: 2x2 matrices can be manually packed the same or better as a vec4, so prefer that
309 
310     // This is a specialized uniform writing entry point intended to deduplicate the paint
311     // color. If a more general system is required, the deduping logic can be added to the
312     // other write methods (and this specialized method would be removed).
writePaintColor(const SkPMColor4f & color)313     void writePaintColor(const SkPMColor4f& color) {
314         if (fWrotePaintColor) {
315             // Validate expected uniforms, but don't write a second copy since the paint color
316             // uniform can only ever be declared once in the final SkSL program.
317             SkASSERT(this->checkExpected(/*dst=*/nullptr, SkSLType::kFloat4, Uniform::kNonArray));
318         } else {
319             this->write<SkSLType::kFloat4>(&color);
320             fWrotePaintColor = true;
321         }
322     }
323 
324     // Copy from `src` using Uniform array-count semantics.
325     void write(const Uniform&, const void* src);
326 
327     // UniformManager has basic support for writing substructs with the caveats:
328     // 1. The base alignment of the substruct must be known a priori so the first member can be
329     //    written immediately.
330     // 2. Nested substructs are not supported (but could be if the padded-struct size was also
331     //    provided to endStruct()).
332     //
333     // Call beginStruct(baseAlignment) before writing the first field. Then call the regular
334     // write functions for each of the substruct's fields in order. Lastly, call endStruct() to
335     // go back to writing fields in the top-level interface block.
beginStruct(int baseAlignment)336     void beginStruct(int baseAlignment) {
337         SkASSERT(this->checkBeginStruct(baseAlignment)); // verifies baseAlignment matches layout
338 
339         this->alignTo(baseAlignment);
340         fStructBaseAlignment = baseAlignment;
341         fReqAlignment = std::max(fReqAlignment, baseAlignment);
342     }
endStruct()343     void endStruct() {
344         SkASSERT(fStructBaseAlignment >= 1); // Must have started a struct
345         this->alignTo(fStructBaseAlignment);
346         SkASSERT(this->checkEndStruct()); // validate after padding out to struct's alignment
347         fStructBaseAlignment = 0;
348     }
349 
350     // Debug-only functions to control uniform expectations.
351 #ifdef SK_DEBUG
352     bool isReset() const;
353     void setExpectedUniforms(SkSpan<const Uniform> expected, bool isSubstruct);
354     void doneWithExpectedUniforms();
355 #endif // SK_DEBUG
356 
357 private:
358     // All public write() functions in UniformManager already match scalar/vector SkSLTypes or have
359     // explicitly converted matrix SkSLTypes to a writeArray<column type> so this does not need to
360     // check anything beyond half[2,3,4].
IsHalfVector(SkSLType type)361     static constexpr bool IsHalfVector(SkSLType type) {
362         return type >= SkSLType::kHalf && type <= SkSLType::kHalf4;
363     }
364 
365     // Other than validation, actual layout doesn't care about 'type' and the logic can be
366     // based on vector length and whether or not it's half or full precision.
367     template <int N, bool Half> void write(const void* src, SkSLType type);
368     template <int N, bool Half> void writeArray(const void* src, int count, SkSLType type);
369 
370     // Helpers to select dimensionality and convert to full precision if required by the Layout.
write(const void * src)371     template <SkSLType Type> void write(const void* src) {
372         static constexpr int N = SkSLTypeVecLength(Type);
373         if (IsHalfVector(Type) && !LayoutRules::UseFullPrecision(fLayout)) {
374             this->write<N, /*Half=*/true>(src, Type);
375         } else {
376             this->write<N, /*Half=*/false>(src, Type);
377         }
378     }
writeArray(const void * src,int count)379     template <SkSLType Type> void writeArray(const void* src, int count) {
380         static constexpr int N = SkSLTypeVecLength(Type);
381         if (IsHalfVector(Type) && !LayoutRules::UseFullPrecision(fLayout)) {
382             this->writeArray<N, /*Half=*/true>(src, count, Type);
383         } else {
384             this->writeArray<N, /*Half=*/false>(src, count, Type);
385         }
386     }
387 
388     // This is marked 'inline' so that it can be defined below with write() and writeArray() and
389     // still link correctly.
390     inline char* append(int alignment, int size);
391     inline void alignTo(int alignment);
392 
393     SkTDArray<char> fStorage;
394 
395     Layout fLayout;
396     int fReqAlignment = 0;
397     int fStructBaseAlignment = 0;
398     // The paint color is treated special and we only add its uniform once.
399     bool fWrotePaintColor = false;
400 
401     // Debug-only verification that UniformOffsetCalculator is consistent and that write() calls
402     // match the expected uniform declaration order.
403 #ifdef SK_DEBUG
404     UniformOffsetCalculator fOffsetCalculator; // should match implicit offsets from append()
405     UniformOffsetCalculator fSubstructCalculator; // 0-based, used when inside a substruct
406     int fSubstructStartingOffset = -1; // offset within fOffsetCalculator of first field
407 
408     SkSpan<const Uniform> fExpectedUniforms;
409     int fExpectedUniformIndex = 0;
410 
411     bool checkExpected(const void* dst, SkSLType, int count);
412     bool checkBeginStruct(int baseAlignment);
413     bool checkEndStruct();
414 #endif // SK_DEBUG
415 };
416 
417 ///////////////////////////////////////////////////////////////////////////////////////////////////
418 // Definitions
419 
420 // Shared helper for both write() and writeArray()
421 template <int N, bool Half>
422 struct LayoutTraits {
423     static_assert(1 <= N && N <= 4);
424 
425     static constexpr int kElemSize = Half ? sizeof(SkHalf) : sizeof(float);
426     static constexpr int kSize     = N * kElemSize;
427     static constexpr int kAlign    = SkNextPow2_portable(N) * kElemSize;
428 
429     // Reads kSize bytes from 'src' and copies or converts (float->half) the N values
430     // into 'dst'. Does not add any other padding that may depend on usage and Layout.
CopyLayoutTraits431     static void Copy(const void* src, void* dst) {
432         if constexpr (Half) {
433             using VecF = skvx::Vec<SkNextPow2_portable(N), float>;
434             VecF srcData;
435             if constexpr (N == 3) {
436                 // Load the 3 values into a float4 to take advantage of vectorized conversion.
437                 // The 4th value will not be copied to dst.
438                 const float* srcF = static_cast<const float*>(src);
439                 srcData = VecF{srcF[0], srcF[1], srcF[2], 0.f};
440             } else {
441                 srcData = VecF::Load(src);
442             }
443 
444             auto dstData = to_half(srcData);
445             // NOTE: this is identical to Vec::store() for N=1,2,4 and correctly drops the 4th
446             // lane when N=3.
447             memcpy(dst, &dstData, kSize);
448         } else {
449             memcpy(dst, src, kSize);
450         }
451     }
452 
453 #ifdef SK_DEBUG
ValidateLayoutTraits454     static void Validate(const void* src, SkSLType type, Layout layout) {
455         // Src validation
456         SkASSERT(src);
457         // All primitives on the CPU side should be 4 byte aligned
458         SkASSERT(SkIsAlign4(reinterpret_cast<intptr_t>(src)));
459 
460         // Type and validation layout
461         SkASSERT(SkSLTypeCanBeUniformValue(type));
462         SkASSERT(SkSLTypeVecLength(type) == N); // Matrix types should have been flattened already
463         if constexpr (Half) {
464             SkASSERT(SkSLTypeIsFloatType(type));
465             SkASSERT(!SkSLTypeIsFullPrecisionNumericType(type));
466             SkASSERT(!LayoutRules::UseFullPrecision(layout));
467         } else {
468             SkASSERT(SkSLTypeIsFullPrecisionNumericType(type) ||
469                      LayoutRules::UseFullPrecision(layout));
470         }
471     }
472 #endif
473 };
474 
475 template<int N, bool Half>
write(const void * src,SkSLType type)476 void UniformManager::write(const void* src, SkSLType type) {
477     using L = LayoutTraits<N, Half>;
478     SkDEBUGCODE(L::Validate(src, type, fLayout);)
479 
480     // Layouts diverge in how vec3 size is determined for non-array usage
481     char* dst = (N == 3 && LayoutRules::PadVec3Size(fLayout))
482             ? this->append(L::kAlign, L::kSize + L::kElemSize)
483             : this->append(L::kAlign, L::kSize);
484     SkASSERT(this->checkExpected(dst, type, Uniform::kNonArray));
485 
486     L::Copy(src, dst);
487     if (N == 3 && LayoutRules::PadVec3Size(fLayout)) {
488         memset(dst + L::kSize, 0, L::kElemSize);
489     }
490 }
491 
492 template<int N, bool Half>
writeArray(const void * src,int count,SkSLType type)493 void UniformManager::writeArray(const void* src, int count, SkSLType type) {
494     using L = LayoutTraits<N, Half>;
495     static constexpr int kSrcStride = N * 4; // Source data is always in multiples of 4 bytes.
496 
497     SkDEBUGCODE(L::Validate(src, type, fLayout);)
498     SkASSERT(count > 0);
499 
500     if (Half || N == 3 || (N != 4 && LayoutRules::AlignArraysAsVec4(fLayout))) {
501         // A non-dense array (N == 3 is always padded to vec4, or the Layout requires it),
502         // or we have to perform half conversion so iterate over each element.
503         static constexpr int kStride  = Half ? L::kAlign : 4*L::kElemSize;
504         SkASSERT(!(Half && LayoutRules::AlignArraysAsVec4(fLayout))); // should be exclusive
505 
506         const char* srcBytes = reinterpret_cast<const char*>(src);
507         char* dst = this->append(kStride, kStride*count);
508         SkASSERT(this->checkExpected(dst, type, count));
509 
510         for (int i = 0; i < count; ++i) {
511             L::Copy(srcBytes, dst);
512             if constexpr (kStride - L::kSize > 0) {
513                 memset(dst + L::kSize, 0, kStride - L::kSize);
514             }
515 
516             dst += kStride;
517             srcBytes += kSrcStride;
518         }
519     } else {
520         // A dense array with no type conversion, so copy in one go.
521         SkASSERT(L::kAlign == L::kSize && kSrcStride == L::kSize);
522         char* dst = this->append(L::kAlign, L::kSize*count);
523         SkASSERT(this->checkExpected(dst, type, count));
524 
525         memcpy(dst, src, L::kSize*count);
526     }
527 }
528 
alignTo(int alignment)529 void UniformManager::alignTo(int alignment) {
530     SkASSERT(alignment >= 1 && SkIsPow2(alignment));
531     if ((fStorage.size() & (alignment - 1)) != 0) {
532         this->append(alignment, /*size=*/0);
533     }
534 }
535 
append(int alignment,int size)536 char* UniformManager::append(int alignment, int size) {
537     // The base alignment for a struct should have been calculated for the current layout using
538     // UniformOffsetCalculator, so every field appended within the struct should have an alignment
539     // less than or equal to that base alignment.
540     SkASSERT(fStructBaseAlignment <= 0 || alignment <= fStructBaseAlignment);
541 
542     const int offset = fStorage.size();
543     const int padding = SkAlignTo(offset, alignment) - offset;
544 
545     // These are just asserts not aborts because SkSL compilation imposes limits on the size of
546     // runtime effect arrays, and internal shaders should not be using excessive lengths.
547     SkASSERT(std::numeric_limits<int>::max() - alignment >= offset);
548     SkASSERT(std::numeric_limits<int>::max() - size >= padding);
549 
550     char* dst = fStorage.append(size + padding);
551     if (padding > 0) {
552         memset(dst, 0, padding);
553         dst += padding;
554     }
555 
556     fReqAlignment = std::max(fReqAlignment, alignment);
557     return dst;
558 }
559 
560 }  // namespace skgpu::graphite
561 
562 #endif // skgpu_UniformManager_DEFINED
563