xref: /aosp_15_r20/external/skia/src/utils/SkJSON.cpp (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1 /*
2  * Copyright 2018 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "src/utils/SkJSON.h"
9 
10 #include "include/core/SkData.h"
11 #include "include/core/SkRefCnt.h"
12 #include "include/core/SkStream.h"
13 #include "include/core/SkString.h"
14 #include "include/private/base/SkDebug.h"
15 #include "include/private/base/SkMalloc.h"
16 #include "include/private/base/SkTo.h"
17 #include "include/utils/SkParse.h"
18 #include "src/base/SkArenaAlloc.h"
19 #include "src/base/SkUTF.h"
20 
21 #include <cmath>
22 #include <cstdint>
23 #include <cstdlib>
24 #include <limits>
25 #include <new>
26 #include <tuple>
27 #include <vector>
28 
29 namespace skjson {
30 
31 // #define SK_JSON_REPORT_ERRORS
32 
33 static_assert( sizeof(Value) == 8, "");
34 static_assert(alignof(Value) == 8, "");
35 
36 static constexpr size_t kRecAlign = alignof(Value);
37 
init_tagged(Tag t)38 void Value::init_tagged(Tag t) {
39     memset(fData8, 0, sizeof(fData8));
40     fData8[0] = SkTo<uint8_t>(t);
41     SkASSERT(this->getTag() == t);
42 }
43 
44 // Pointer values store a type (in the lower kTagBits bits) and a pointer.
init_tagged_pointer(Tag t,void * p)45 void Value::init_tagged_pointer(Tag t, void* p) {
46     if (sizeof(Value) == sizeof(uintptr_t)) {
47         *this->cast<uintptr_t>() = reinterpret_cast<uintptr_t>(p);
48         // For 64-bit, we rely on the pointer lower bits being zero.
49         SkASSERT(!(fData8[0] & kTagMask));
50         fData8[0] |= SkTo<uint8_t>(t);
51     } else {
52         // For 32-bit, we store the pointer in the upper word
53         SkASSERT(sizeof(Value) == sizeof(uintptr_t) * 2);
54         this->init_tagged(t);
55         *this->cast<uintptr_t>() = reinterpret_cast<uintptr_t>(p);
56     }
57 
58     SkASSERT(this->getTag()    == t);
59     SkASSERT(this->ptr<void>() == p);
60 }
61 
NullValue()62 NullValue::NullValue() {
63     this->init_tagged(Tag::kNull);
64     SkASSERT(this->getTag() == Tag::kNull);
65 }
66 
BoolValue(bool b)67 BoolValue::BoolValue(bool b) {
68     this->init_tagged(Tag::kBool);
69     *this->cast<bool>() = b;
70     SkASSERT(this->getTag() == Tag::kBool);
71 }
72 
NumberValue(int32_t i)73 NumberValue::NumberValue(int32_t i) {
74     this->init_tagged(Tag::kInt);
75     *this->cast<int32_t>() = i;
76     SkASSERT(this->getTag() == Tag::kInt);
77 }
78 
NumberValue(float f)79 NumberValue::NumberValue(float f) {
80     this->init_tagged(Tag::kFloat);
81     *this->cast<float>() = f;
82     SkASSERT(this->getTag() == Tag::kFloat);
83 }
84 
85 // Vector recs point to externally allocated slabs with the following layout:
86 //
87 //   [size_t n] [REC_0] ... [REC_n-1] [optional extra trailing storage]
88 //
89 // Long strings use extra_alloc_size == 1 to store the \0 terminator.
90 //
91 template <typename T, size_t extra_alloc_size = 0>
MakeVector(size_t vec_size,const void * src,size_t src_size,SkArenaAlloc & alloc)92 static void* MakeVector(size_t vec_size, const void* src, size_t src_size, SkArenaAlloc& alloc) {
93     // The Ts are already in memory, so their size should be safe.
94     const auto total_size = sizeof(size_t) + vec_size * sizeof(T) + extra_alloc_size;
95     auto* size_ptr = reinterpret_cast<size_t*>(alloc.makeBytesAlignedTo(total_size, kRecAlign));
96 
97     *size_ptr = vec_size;
98     sk_careful_memcpy(size_ptr + 1, src, src_size * sizeof(T));
99 
100     return size_ptr;
101 }
102 
103 template <typename T, size_t extra_alloc_size = 0>
MakeVector(size_t vec_size,const void * src,SkArenaAlloc & alloc)104 static void* MakeVector(size_t vec_size, const void* src, SkArenaAlloc& alloc) {
105     return MakeVector<T, extra_alloc_size>(vec_size, src, vec_size, alloc);
106 }
107 
ArrayValue(const Value * src,size_t size,SkArenaAlloc & alloc)108 ArrayValue::ArrayValue(const Value* src, size_t size, SkArenaAlloc& alloc) {
109     this->init_tagged_pointer(Tag::kArray, MakeVector<Value>(size, src, alloc));
110     SkASSERT(this->getTag() == Tag::kArray);
111 }
112 
113 // Strings have two flavors:
114 //
115 // -- short strings (len <= 7) -> these are stored inline, in the record
116 //    (one byte reserved for null terminator/type):
117 //
118 //        [str] [\0]|[max_len - actual_len]
119 //
120 //    Storing [max_len - actual_len] allows the 'len' field to double-up as a
121 //    null terminator when size == max_len (this works 'cause kShortString == 0).
122 //
123 // -- long strings (len > 7) -> these are externally allocated vectors (VectorRec<char>).
124 //
125 // The string data plus a null-char terminator are copied over.
126 //
127 namespace {
128 
129 // An internal string builder with a fast 8 byte short string load path
130 // (for the common case where the string is not at the end of the stream).
131 class FastString final : public Value {
132 public:
FastString(const char * src,size_t size,const char * eos,SkArenaAlloc & alloc)133     FastString(const char* src, size_t size, const char* eos, SkArenaAlloc& alloc) {
134         SkASSERT(src <= eos);
135 
136         if (size > kMaxInlineStringSize) {
137             this->initLongString(src, size, alloc);
138             SkASSERT(this->getTag() == Tag::kString);
139             return;
140         }
141 
142         // initFastShortString is faster (doh), but requires access to 6 chars past src.
143         if (src && src + 6 <= eos) {
144             this->initFastShortString(src, size);
145         } else {
146             this->initShortString(src, size);
147         }
148 
149         SkASSERT(this->getTag() == Tag::kShortString);
150     }
151 
152 private:
153     // first byte reserved for tagging, \0 terminator => 6 usable chars
154     inline static constexpr size_t kMaxInlineStringSize = sizeof(Value) - 2;
155 
initLongString(const char * src,size_t size,SkArenaAlloc & alloc)156     void initLongString(const char* src, size_t size, SkArenaAlloc& alloc) {
157         SkASSERT(size > kMaxInlineStringSize);
158 
159         this->init_tagged_pointer(Tag::kString, MakeVector<char, 1>(size, src, alloc));
160 
161         auto* data = this->cast<VectorValue<char, Value::Type::kString>>()->begin();
162         const_cast<char*>(data)[size] = '\0';
163     }
164 
initShortString(const char * src,size_t size)165     void initShortString(const char* src, size_t size) {
166         SkASSERT(size <= kMaxInlineStringSize);
167 
168         this->init_tagged(Tag::kShortString);
169         sk_careful_memcpy(this->cast<char>(), src, size);
170         // Null terminator provided by init_tagged() above (fData8 is zero-initialized).
171     }
172 
initFastShortString(const char * src,size_t size)173     void initFastShortString(const char* src, size_t size) {
174         SkASSERT(size <= kMaxInlineStringSize);
175 
176         uint64_t* s64 = this->cast<uint64_t>();
177 
178         // Load 8 chars and mask out the tag and \0 terminator.
179         // Note: we picked kShortString == 0 to avoid setting explicitly below.
180         static_assert(SkToU8(Tag::kShortString) == 0, "please don't break this");
181 
182         // Since the first byte is occupied by the tag, we want the string chars [0..5] to land
183         // on bytes [1..6] => the fastest way is to read8 @(src - 1) (always safe, because the
184         // string requires a " prefix at the very least).
185         memcpy(s64, src - 1, 8);
186 
187 #if defined(SK_CPU_LENDIAN)
188         // The mask for a max-length string (6), with a leading tag and trailing \0 is
189         // 0x00ffffffffffff00.  Accounting for the final left-shift, this becomes
190         // 0x0000ffffffffffff.
191         *s64 &= (0x0000ffffffffffffULL >> ((kMaxInlineStringSize - size) * 8)) // trailing \0s
192                     << 8;                                                      // tag byte
193 #else
194         static_assert(false, "Big-endian builds are not supported at this time.");
195 #endif
196     }
197 };
198 
199 } // namespace
200 
StringValue(const char * src,SkArenaAlloc & alloc)201 StringValue::StringValue(const char* src, SkArenaAlloc& alloc)
202     : StringValue(src, strlen(src), alloc) {}
203 
StringValue(const char * src,size_t size,SkArenaAlloc & alloc)204 StringValue::StringValue(const char* src, size_t size, SkArenaAlloc& alloc) {
205     new (this) FastString(src, size, src, alloc);
206 }
207 
ObjectValue(const Member * src,size_t size,SkArenaAlloc & alloc)208 ObjectValue::ObjectValue(const Member* src, size_t size, SkArenaAlloc& alloc) {
209     this->init_tagged_pointer(Tag::kObject, MakeVector<Member>(size, src, alloc));
210     SkASSERT(this->getTag() == Tag::kObject);
211 }
212 
213 
214 // Boring public Value glue.
215 
inline_strcmp(const char a[],const char b[])216 static int inline_strcmp(const char a[], const char b[]) {
217     for (;;) {
218         char c = *a++;
219         if (c == 0) {
220             break;
221         }
222         if (c != *b++) {
223             return 1;
224         }
225     }
226     return *b != 0;
227 }
228 
find(const char * key) const229 const Member* ObjectValue::find(const char* key) const {
230     // Reverse search for duplicates resolution (policy: return last).
231     const auto* begin  = this->begin();
232     const auto* member = this->end();
233 
234     while (member > begin) {
235         --member;
236         if (0 == inline_strcmp(key, member->fKey.as<StringValue>().begin())) {
237             return member;
238         }
239     }
240 
241     return nullptr;
242 }
243 
writable(const char * key,SkArenaAlloc & alloc) const244 Value& ObjectValue::writable(const char* key, SkArenaAlloc& alloc) const {
245     Member* writable_member = const_cast<Member*>(this->find(key));
246 
247     if (!writable_member) {
248         ObjectValue* writable_obj = const_cast<ObjectValue*>(this);
249         writable_obj->init_tagged_pointer(Tag::kObject, MakeVector<Member>(this->size() + 1,
250                                                                            this->begin(),
251                                                                            this->size(),
252                                                                            alloc));
253         writable_member         = const_cast<Member*>(writable_obj->end() - 1);
254         writable_member->fKey   = StringValue(key, strlen(key), alloc);
255         writable_member->fValue = NullValue();
256     }
257 
258 
259     return writable_member->fValue;
260 }
261 
262 namespace {
263 
264 // Lexer/parser inspired by rapidjson [1], sajson [2] and pjson [3].
265 //
266 // [1] https://github.com/Tencent/rapidjson/
267 // [2] https://github.com/chadaustin/sajson
268 // [3] https://pastebin.com/hnhSTL3h
269 
270 
271 // bit 0 (0x01) - plain ASCII string character
272 // bit 1 (0x02) - whitespace
273 // bit 2 (0x04) - string terminator (" \\ \0 [control chars] **AND } ]** <- see matchString notes)
274 // bit 3 (0x08) - 0-9
275 // bit 4 (0x10) - 0-9 e E .
276 // bit 5 (0x20) - scope terminator (} ])
277 static constexpr uint8_t g_token_flags[256] = {
278  // 0    1    2    3    4    5    6    7      8    9    A    B    C    D    E    F
279     4,   4,   4,   4,   4,   4,   4,   4,     4,   6,   6,   4,   4,   6,   4,   4, // 0
280     4,   4,   4,   4,   4,   4,   4,   4,     4,   4,   4,   4,   4,   4,   4,   4, // 1
281     3,   1,   4,   1,   1,   1,   1,   1,     1,   1,   1,   1,   1,   1,   0x11,1, // 2
282  0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,  0x19,0x19,   1,   1,   1,   1,   1,   1, // 3
283     1,   1,   1,   1,   1,   0x11,1,   1,     1,   1,   1,   1,   1,   1,   1,   1, // 4
284     1,   1,   1,   1,   1,   1,   1,   1,     1,   1,   1,   1,   4,0x25,   1,   1, // 5
285     1,   1,   1,   1,   1,   0x11,1,   1,     1,   1,   1,   1,   1,   1,   1,   1, // 6
286     1,   1,   1,   1,   1,   1,   1,   1,     1,   1,   1,   1,   1,0x25,   1,   1, // 7
287 
288  // 128-255
289     0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,
290     0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,
291     0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,
292     0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0
293 };
294 
is_ws(char c)295 static inline bool is_ws(char c)       { return g_token_flags[static_cast<uint8_t>(c)] & 0x02; }
is_eostring(char c)296 static inline bool is_eostring(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x04; }
is_digit(char c)297 static inline bool is_digit(char c)    { return g_token_flags[static_cast<uint8_t>(c)] & 0x08; }
is_numeric(char c)298 static inline bool is_numeric(char c)  { return g_token_flags[static_cast<uint8_t>(c)] & 0x10; }
is_eoscope(char c)299 static inline bool is_eoscope(char c)  { return g_token_flags[static_cast<uint8_t>(c)] & 0x20; }
300 
skip_ws(const char * p)301 static inline const char* skip_ws(const char* p) {
302     while (is_ws(*p)) ++p;
303     return p;
304 }
305 
pow10(int32_t exp)306 static inline float pow10(int32_t exp) {
307     static constexpr float g_pow10_table[63] =
308     {
309        1.e-031f, 1.e-030f, 1.e-029f, 1.e-028f, 1.e-027f, 1.e-026f, 1.e-025f, 1.e-024f,
310        1.e-023f, 1.e-022f, 1.e-021f, 1.e-020f, 1.e-019f, 1.e-018f, 1.e-017f, 1.e-016f,
311        1.e-015f, 1.e-014f, 1.e-013f, 1.e-012f, 1.e-011f, 1.e-010f, 1.e-009f, 1.e-008f,
312        1.e-007f, 1.e-006f, 1.e-005f, 1.e-004f, 1.e-003f, 1.e-002f, 1.e-001f, 1.e+000f,
313        1.e+001f, 1.e+002f, 1.e+003f, 1.e+004f, 1.e+005f, 1.e+006f, 1.e+007f, 1.e+008f,
314        1.e+009f, 1.e+010f, 1.e+011f, 1.e+012f, 1.e+013f, 1.e+014f, 1.e+015f, 1.e+016f,
315        1.e+017f, 1.e+018f, 1.e+019f, 1.e+020f, 1.e+021f, 1.e+022f, 1.e+023f, 1.e+024f,
316        1.e+025f, 1.e+026f, 1.e+027f, 1.e+028f, 1.e+029f, 1.e+030f, 1.e+031f
317     };
318 
319     static constexpr int32_t k_exp_offset = std::size(g_pow10_table) / 2;
320 
321     // We only support negative exponents for now.
322     SkASSERT(exp <= 0);
323 
324     return (exp >= -k_exp_offset) ? g_pow10_table[exp + k_exp_offset]
325                                   : std::pow(10.0f, static_cast<float>(exp));
326 }
327 
328 class DOMParser {
329 public:
DOMParser(SkArenaAlloc & alloc)330     explicit DOMParser(SkArenaAlloc& alloc)
331         : fAlloc(alloc) {
332         fValueStack.reserve(kValueStackReserve);
333         fUnescapeBuffer.reserve(kUnescapeBufferReserve);
334     }
335 
parse(const char * p,size_t size)336     Value parse(const char* p, size_t size) {
337         if (!size) {
338             return this->error(NullValue(), p, "invalid empty input");
339         }
340 
341         const char* p_stop = p + size - 1;
342 
343         // We're only checking for end-of-stream on object/array close('}',']'),
344         // so we must trim any whitespace from the buffer tail.
345         while (p_stop > p && is_ws(*p_stop)) --p_stop;
346 
347         SkASSERT(p_stop >= p && p_stop < p + size);
348         if (!is_eoscope(*p_stop)) {
349             return this->error(NullValue(), p_stop, "invalid top-level value");
350         }
351 
352         p = skip_ws(p);
353 
354         switch (*p) {
355         case '{':
356             goto match_object;
357         case '[':
358             goto match_array;
359         default:
360             return this->error(NullValue(), p, "invalid top-level value");
361         }
362 
363     match_object:
364         SkASSERT(*p == '{');
365         p = skip_ws(p + 1);
366 
367         this->pushObjectScope();
368 
369         if (*p == '}') goto pop_object;
370 
371         // goto match_object_key;
372     match_object_key:
373         p = skip_ws(p);
374         if (*p != '"') return this->error(NullValue(), p, "expected object key");
375 
376         p = this->matchString(p, p_stop, [this](const char* key, size_t size, const char* eos) {
377             this->pushObjectKey(key, size, eos);
378         });
379         if (!p) return NullValue();
380 
381         p = skip_ws(p);
382         if (*p != ':') return this->error(NullValue(), p, "expected ':' separator");
383 
384         ++p;
385 
386         // goto match_value;
387     match_value:
388         p = skip_ws(p);
389 
390         switch (*p) {
391         case '\0':
392             return this->error(NullValue(), p, "unexpected input end");
393         case '"':
394             p = this->matchString(p, p_stop, [this](const char* str, size_t size, const char* eos) {
395                 this->pushString(str, size, eos);
396             });
397             break;
398         case '[':
399             goto match_array;
400         case 'f':
401             p = this->matchFalse(p);
402             break;
403         case 'n':
404             p = this->matchNull(p);
405             break;
406         case 't':
407             p = this->matchTrue(p);
408             break;
409         case '{':
410             goto match_object;
411         default:
412             p = this->matchNumber(p);
413             break;
414         }
415 
416         if (!p) return NullValue();
417 
418         // goto match_post_value;
419     match_post_value:
420         SkASSERT(!this->inTopLevelScope());
421 
422         p = skip_ws(p);
423         switch (*p) {
424         case ',':
425             ++p;
426             if (this->inObjectScope()) {
427                 goto match_object_key;
428             } else {
429                 SkASSERT(this->inArrayScope());
430                 goto match_value;
431             }
432         case ']':
433             goto pop_array;
434         case '}':
435             goto pop_object;
436         default:
437             return this->error(NullValue(), p - 1, "unexpected value-trailing token");
438         }
439 
440         // unreachable
441         SkASSERT(false);
442 
443     pop_object:
444         SkASSERT(*p == '}');
445 
446         if (this->inArrayScope()) {
447             return this->error(NullValue(), p, "unexpected object terminator");
448         }
449 
450         this->popObjectScope();
451 
452         // goto pop_common
453     pop_common:
454         SkASSERT(is_eoscope(*p));
455 
456         if (this->inTopLevelScope()) {
457             SkASSERT(fValueStack.size() == 1);
458 
459             // Success condition: parsed the top level element and reached the stop token.
460             return p == p_stop
461                 ? fValueStack.front()
462                 : this->error(NullValue(), p + 1, "trailing root garbage");
463         }
464 
465         if (p == p_stop) {
466             return this->error(NullValue(), p, "unexpected end-of-input");
467         }
468 
469         ++p;
470 
471         goto match_post_value;
472 
473     match_array:
474         SkASSERT(*p == '[');
475         p = skip_ws(p + 1);
476 
477         this->pushArrayScope();
478 
479         if (*p != ']') goto match_value;
480 
481         // goto pop_array;
482     pop_array:
483         SkASSERT(*p == ']');
484 
485         if (this->inObjectScope()) {
486             return this->error(NullValue(), p, "unexpected array terminator");
487         }
488 
489         this->popArrayScope();
490 
491         goto pop_common;
492 
493         SkASSERT(false);
494         return NullValue();
495     }
496 
getError() const497     std::tuple<const char*, const SkString> getError() const {
498         return std::make_tuple(fErrorToken, fErrorMessage);
499     }
500 
501 private:
502     SkArenaAlloc&         fAlloc;
503 
504     // Pending values stack.
505     inline static constexpr size_t kValueStackReserve = 256;
506     std::vector<Value>    fValueStack;
507 
508     // String unescape buffer.
509     inline static constexpr size_t kUnescapeBufferReserve = 512;
510     std::vector<char>     fUnescapeBuffer;
511 
512     // Tracks the current object/array scope, as an index into fStack:
513     //
514     //   - for objects: fScopeIndex =  (index of first value in scope)
515     //   - for arrays : fScopeIndex = -(index of first value in scope)
516     //
517     // fScopeIndex == 0 IFF we are at the top level (no current/active scope).
518     intptr_t              fScopeIndex = 0;
519 
520     // Error reporting.
521     const char*           fErrorToken = nullptr;
522     SkString              fErrorMessage;
523 
inTopLevelScope() const524     bool inTopLevelScope() const { return fScopeIndex == 0; }
inObjectScope() const525     bool inObjectScope()   const { return fScopeIndex >  0; }
inArrayScope() const526     bool inArrayScope()    const { return fScopeIndex <  0; }
527 
528     // Helper for masquerading raw primitive types as Values (bypassing tagging, etc).
529     template <typename T>
530     class RawValue final : public Value {
531     public:
RawValue(T v)532         explicit RawValue(T v) {
533             static_assert(sizeof(T) <= sizeof(Value), "");
534             *this->cast<T>() = v;
535         }
536 
operator *() const537         T operator *() const { return *this->cast<T>(); }
538     };
539 
540     template <typename VectorT>
popScopeAsVec(size_t scope_start)541     void popScopeAsVec(size_t scope_start) {
542         SkASSERT(scope_start > 0);
543         SkASSERT(scope_start <= fValueStack.size());
544 
545         using T = typename VectorT::ValueT;
546         static_assert( sizeof(T) >=  sizeof(Value), "");
547         static_assert( sizeof(T)  %  sizeof(Value) == 0, "");
548         static_assert(alignof(T) == alignof(Value), "");
549 
550         const auto scope_count = fValueStack.size() - scope_start,
551                          count = scope_count / (sizeof(T) / sizeof(Value));
552         SkASSERT(scope_count % (sizeof(T) / sizeof(Value)) == 0);
553 
554         const auto* begin = reinterpret_cast<const T*>(fValueStack.data() + scope_start);
555 
556         // Restore the previous scope index from saved placeholder value,
557         // and instantiate as a vector of values in scope.
558         auto& placeholder = fValueStack[scope_start - 1];
559         fScopeIndex = *static_cast<RawValue<intptr_t>&>(placeholder);
560         placeholder = VectorT(begin, count, fAlloc);
561 
562         // Drop the (consumed) values in scope.
563         fValueStack.resize(scope_start);
564     }
565 
pushObjectScope()566     void pushObjectScope() {
567         // Save a scope index now, and then later we'll overwrite this value as the Object itself.
568         fValueStack.push_back(RawValue<intptr_t>(fScopeIndex));
569 
570         // New object scope.
571         fScopeIndex = SkTo<intptr_t>(fValueStack.size());
572     }
573 
popObjectScope()574     void popObjectScope() {
575         SkASSERT(this->inObjectScope());
576         this->popScopeAsVec<ObjectValue>(SkTo<size_t>(fScopeIndex));
577 
578         SkDEBUGCODE(
579             const auto& obj = fValueStack.back().as<ObjectValue>();
580             SkASSERT(obj.is<ObjectValue>());
581             for (const auto& member : obj) {
582                 SkASSERT(member.fKey.is<StringValue>());
583             }
584         )
585     }
586 
pushArrayScope()587     void pushArrayScope() {
588         // Save a scope index now, and then later we'll overwrite this value as the Array itself.
589         fValueStack.push_back(RawValue<intptr_t>(fScopeIndex));
590 
591         // New array scope.
592         fScopeIndex = -SkTo<intptr_t>(fValueStack.size());
593     }
594 
popArrayScope()595     void popArrayScope() {
596         SkASSERT(this->inArrayScope());
597         this->popScopeAsVec<ArrayValue>(SkTo<size_t>(-fScopeIndex));
598 
599         SkDEBUGCODE(
600             const auto& arr = fValueStack.back().as<ArrayValue>();
601             SkASSERT(arr.is<ArrayValue>());
602         )
603     }
604 
pushObjectKey(const char * key,size_t size,const char * eos)605     void pushObjectKey(const char* key, size_t size, const char* eos) {
606         SkASSERT(this->inObjectScope());
607         SkASSERT(fValueStack.size() >= SkTo<size_t>(fScopeIndex));
608         SkASSERT(!((fValueStack.size() - SkTo<size_t>(fScopeIndex)) & 1));
609         this->pushString(key, size, eos);
610     }
611 
pushTrue()612     void pushTrue() {
613         fValueStack.push_back(BoolValue(true));
614     }
615 
pushFalse()616     void pushFalse() {
617         fValueStack.push_back(BoolValue(false));
618     }
619 
pushNull()620     void pushNull() {
621         fValueStack.push_back(NullValue());
622     }
623 
pushString(const char * s,size_t size,const char * eos)624     void pushString(const char* s, size_t size, const char* eos) {
625         fValueStack.push_back(FastString(s, size, eos, fAlloc));
626     }
627 
pushInt32(int32_t i)628     void pushInt32(int32_t i) {
629         fValueStack.push_back(NumberValue(i));
630     }
631 
pushFloat(float f)632     void pushFloat(float f) {
633         fValueStack.push_back(NumberValue(f));
634     }
635 
636     template <typename T>
error(T && ret_val,const char * p,const char * msg)637     T error(T&& ret_val, const char* p, const char* msg) {
638 #if defined(SK_JSON_REPORT_ERRORS)
639         fErrorToken = p;
640         fErrorMessage.set(msg);
641 #endif
642         return ret_val;
643     }
644 
matchTrue(const char * p)645     const char* matchTrue(const char* p) {
646         SkASSERT(p[0] == 't');
647 
648         if (p[1] == 'r' && p[2] == 'u' && p[3] == 'e') {
649             this->pushTrue();
650             return p + 4;
651         }
652 
653         return this->error(nullptr, p, "invalid token");
654     }
655 
matchFalse(const char * p)656     const char* matchFalse(const char* p) {
657         SkASSERT(p[0] == 'f');
658 
659         if (p[1] == 'a' && p[2] == 'l' && p[3] == 's' && p[4] == 'e') {
660             this->pushFalse();
661             return p + 5;
662         }
663 
664         return this->error(nullptr, p, "invalid token");
665     }
666 
matchNull(const char * p)667     const char* matchNull(const char* p) {
668         SkASSERT(p[0] == 'n');
669 
670         if (p[1] == 'u' && p[2] == 'l' && p[3] == 'l') {
671             this->pushNull();
672             return p + 4;
673         }
674 
675         return this->error(nullptr, p, "invalid token");
676     }
677 
unescapeString(const char * begin,const char * end)678     const std::vector<char>* unescapeString(const char* begin, const char* end) {
679         fUnescapeBuffer.clear();
680 
681         for (const auto* p = begin; p != end; ++p) {
682             if (*p != '\\') {
683                 fUnescapeBuffer.push_back(*p);
684                 continue;
685             }
686 
687             if (++p == end) {
688                 return nullptr;
689             }
690 
691             switch (*p) {
692             case  '"': fUnescapeBuffer.push_back( '"'); break;
693             case '\\': fUnescapeBuffer.push_back('\\'); break;
694             case  '/': fUnescapeBuffer.push_back( '/'); break;
695             case  'b': fUnescapeBuffer.push_back('\b'); break;
696             case  'f': fUnescapeBuffer.push_back('\f'); break;
697             case  'n': fUnescapeBuffer.push_back('\n'); break;
698             case  'r': fUnescapeBuffer.push_back('\r'); break;
699             case  't': fUnescapeBuffer.push_back('\t'); break;
700             case  'u': {
701                 if (p + 4 >= end) {
702                     return nullptr;
703                 }
704 
705                 uint32_t hexed;
706                 const char hex_str[] = {p[1], p[2], p[3], p[4], '\0'};
707                 const auto* eos = SkParse::FindHex(hex_str, &hexed);
708                 if (!eos || *eos) {
709                     return nullptr;
710                 }
711 
712                 char utf8[SkUTF::kMaxBytesInUTF8Sequence];
713                 const auto utf8_len = SkUTF::ToUTF8(SkTo<SkUnichar>(hexed), utf8);
714                 fUnescapeBuffer.insert(fUnescapeBuffer.end(), utf8, utf8 + utf8_len);
715                 p += 4;
716             } break;
717             default: return nullptr;
718             }
719         }
720 
721         return &fUnescapeBuffer;
722     }
723 
724     template <typename MatchFunc>
matchString(const char * p,const char * p_stop,MatchFunc && func)725     const char* matchString(const char* p, const char* p_stop, MatchFunc&& func) {
726         SkASSERT(*p == '"');
727         const auto* s_begin = p + 1;
728         bool requires_unescape = false;
729 
730         do {
731             // Consume string chars.
732             // This is the fast path, and hopefully we only hit it once then quick-exit below.
733             for (p = p + 1; !is_eostring(*p); ++p);
734 
735             if (*p == '"') {
736                 // Valid string found.
737                 if (!requires_unescape) {
738                     func(s_begin, p - s_begin, p_stop);
739                 } else {
740                     // Slow unescape.  We could avoid this extra copy with some effort,
741                     // but in practice escaped strings should be rare.
742                     const auto* buf = this->unescapeString(s_begin, p);
743                     if (!buf) {
744                         break;
745                     }
746 
747                     SkASSERT(!buf->empty());
748                     func(buf->data(), buf->size(), buf->data() + buf->size() - 1);
749                 }
750                 return p + 1;
751             }
752 
753             if (*p == '\\') {
754                 requires_unescape = true;
755                 ++p;
756                 continue;
757             }
758 
759             // End-of-scope chars are special: we use them to tag the end of the input.
760             // Thus they cannot be consumed indiscriminately -- we need to check if we hit the
761             // end of the input.  To that effect, we treat them as string terminators above,
762             // then we catch them here.
763             if (is_eoscope(*p)) {
764                 continue;
765             }
766 
767             // Invalid/unexpected char.
768             break;
769         } while (p != p_stop);
770 
771         // Premature end-of-input, or illegal string char.
772         return this->error(nullptr, s_begin - 1, "invalid string");
773     }
774 
matchFastFloatDecimalPart(const char * p,int sign,float f,int exp)775     const char* matchFastFloatDecimalPart(const char* p, int sign, float f, int exp) {
776         SkASSERT(exp <= 0);
777 
778         for (;;) {
779             if (!is_digit(*p)) break;
780             f = f * 10.f + (*p++ - '0'); --exp;
781             if (!is_digit(*p)) break;
782             f = f * 10.f + (*p++ - '0'); --exp;
783         }
784 
785         const auto decimal_scale = pow10(exp);
786         if (is_numeric(*p) || !decimal_scale) {
787             SkASSERT((*p == '.' || *p == 'e' || *p == 'E') || !decimal_scale);
788             // Malformed input, or an (unsupported) exponent, or a collapsed decimal factor.
789             return nullptr;
790         }
791 
792         this->pushFloat(sign * f * decimal_scale);
793 
794         return p;
795     }
796 
matchFastFloatPart(const char * p,int sign,float f)797     const char* matchFastFloatPart(const char* p, int sign, float f) {
798         for (;;) {
799             if (!is_digit(*p)) break;
800             f = f * 10.f + (*p++ - '0');
801             if (!is_digit(*p)) break;
802             f = f * 10.f + (*p++ - '0');
803         }
804 
805         if (!is_numeric(*p)) {
806             // Matched (integral) float.
807             this->pushFloat(sign * f);
808             return p;
809         }
810 
811         return (*p == '.') ? this->matchFastFloatDecimalPart(p + 1, sign, f, 0)
812                            : nullptr;
813     }
814 
matchFast32OrFloat(const char * p)815     const char* matchFast32OrFloat(const char* p) {
816         int sign = 1;
817         if (*p == '-') {
818             sign = -1;
819             ++p;
820         }
821 
822         const auto* digits_start = p;
823 
824         int32_t n32 = 0;
825 
826         // This is the largest absolute int32 value we can handle before
827         // risking overflow *on the next digit* (214748363).
828         static constexpr int32_t kMaxInt32 = (std::numeric_limits<int32_t>::max() - 9) / 10;
829 
830         if (is_digit(*p)) {
831             n32 = (*p++ - '0');
832             for (;;) {
833                 if (!is_digit(*p) || n32 > kMaxInt32) break;
834                 n32 = n32 * 10 + (*p++ - '0');
835             }
836         }
837 
838         if (!is_numeric(*p)) {
839             // Did we actually match any digits?
840             if (p > digits_start) {
841                 this->pushInt32(sign * n32);
842                 return p;
843             }
844             return nullptr;
845         }
846 
847         if (*p == '.') {
848             const auto* decimals_start = ++p;
849 
850             int exp = 0;
851 
852             for (;;) {
853                 if (!is_digit(*p) || n32 > kMaxInt32) break;
854                 n32 = n32 * 10 + (*p++ - '0'); --exp;
855                 if (!is_digit(*p) || n32 > kMaxInt32) break;
856                 n32 = n32 * 10 + (*p++ - '0'); --exp;
857             }
858 
859             if (!is_numeric(*p)) {
860                 // Did we actually match any digits?
861                 if (p > decimals_start) {
862                     this->pushFloat(sign * n32 * pow10(exp));
863                     return p;
864                 }
865                 return nullptr;
866             }
867 
868             if (n32 > kMaxInt32) {
869                 // we ran out on n32 bits
870                 return this->matchFastFloatDecimalPart(p, sign, n32, exp);
871             }
872         }
873 
874         return this->matchFastFloatPart(p, sign, n32);
875     }
876 
matchNumber(const char * p)877     const char* matchNumber(const char* p) {
878         if (const auto* fast = this->matchFast32OrFloat(p)) return fast;
879 
880         // slow fallback
881         char* matched;
882         float f = strtof(p, &matched);
883         if (matched > p) {
884             this->pushFloat(f);
885             return matched;
886         }
887         return this->error(nullptr, p, "invalid numeric token");
888     }
889 };
890 
Write(const Value & v,SkWStream * stream)891 void Write(const Value& v, SkWStream* stream) {
892     // We use the address of these as special tags in the pending list.
893     static const NullValue kArrayCloseTag,    // ]
894                            kObjectCloseTag,   // }
895                            kListSeparatorTag, // ,
896                            kKeySeparatorTag;  // :
897 
898     std::vector<const Value*> pending{&v};
899 
900     do {
901         const Value* val = pending.back();
902         pending.pop_back();
903 
904         if (val == &kArrayCloseTag) {
905             stream->writeText("]");
906             continue;
907         }
908 
909         if (val == &kObjectCloseTag) {
910             stream->writeText("}");
911             continue;
912         }
913 
914         if (val == &kListSeparatorTag) {
915             stream->writeText(",");
916             continue;
917         }
918 
919         if (val == &kKeySeparatorTag) {
920             stream->writeText(":");
921             continue;
922         }
923 
924         switch (val->getType()) {
925         case Value::Type::kNull:
926             stream->writeText("null");
927             break;
928         case Value::Type::kBool:
929             stream->writeText(*val->as<BoolValue>() ? "true" : "false");
930             break;
931         case Value::Type::kNumber:
932             stream->writeScalarAsText(*val->as<NumberValue>());
933             break;
934         case Value::Type::kString:
935             stream->writeText("\"");
936             stream->writeText(val->as<StringValue>().begin());
937             stream->writeText("\"");
938             break;
939         case Value::Type::kArray: {
940             const auto& array = val->as<ArrayValue>();
941             stream->writeText("[");
942             // "val, val, .. ]" in reverse order
943             pending.push_back(&kArrayCloseTag);
944             if (array.size() > 0) {
945                 bool last_value = true;
946                 for (const Value* it = array.end() - 1; it >= array.begin(); --it) {
947                     if (!last_value) pending.push_back(&kListSeparatorTag);
948                     pending.push_back(it);
949                     last_value = false;
950                 }
951             }
952         } break;
953         case Value::Type::kObject: {
954             const auto& object = val->as<ObjectValue>();
955             stream->writeText("{");
956             // "key: val, key: val, .. }" in reverse order
957             pending.push_back(&kObjectCloseTag);
958             if (object.size() > 0) {
959                 bool last_member = true;
960                 for (const Member* it = object.end() - 1; it >= object.begin(); --it) {
961                     if (!last_member) pending.push_back(&kListSeparatorTag);
962                     pending.push_back(&it->fValue);
963                     pending.push_back(&kKeySeparatorTag);
964                     pending.push_back(&it->fKey);
965                     last_member = false;
966                 }
967             }
968         } break;
969         }
970     } while (!pending.empty());
971 }
972 
973 } // namespace
974 
toString() const975 SkString Value::toString() const {
976     SkDynamicMemoryWStream wstream;
977     Write(*this, &wstream);
978     const auto data = wstream.detachAsData();
979     // TODO: is there a better way to pass data around without copying?
980     return SkString(static_cast<const char*>(data->data()), data->size());
981 }
982 
983 static constexpr size_t kMinChunkSize = 4096;
984 
DOM(const char * data,size_t size)985 DOM::DOM(const char* data, size_t size)
986     : fAlloc(kMinChunkSize) {
987     DOMParser parser(fAlloc);
988 
989     fRoot = parser.parse(data, size);
990 }
991 
write(SkWStream * stream) const992 void DOM::write(SkWStream* stream) const {
993     Write(fRoot, stream);
994 }
995 
996 } // namespace skjson
997