1 /*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "src/utils/SkJSON.h"
9
10 #include "include/core/SkData.h"
11 #include "include/core/SkRefCnt.h"
12 #include "include/core/SkStream.h"
13 #include "include/core/SkString.h"
14 #include "include/private/base/SkDebug.h"
15 #include "include/private/base/SkMalloc.h"
16 #include "include/private/base/SkTo.h"
17 #include "include/utils/SkParse.h"
18 #include "src/base/SkArenaAlloc.h"
19 #include "src/base/SkUTF.h"
20
21 #include <cmath>
22 #include <cstdint>
23 #include <cstdlib>
24 #include <limits>
25 #include <new>
26 #include <tuple>
27 #include <vector>
28
29 namespace skjson {
30
31 // #define SK_JSON_REPORT_ERRORS
32
33 static_assert( sizeof(Value) == 8, "");
34 static_assert(alignof(Value) == 8, "");
35
36 static constexpr size_t kRecAlign = alignof(Value);
37
init_tagged(Tag t)38 void Value::init_tagged(Tag t) {
39 memset(fData8, 0, sizeof(fData8));
40 fData8[0] = SkTo<uint8_t>(t);
41 SkASSERT(this->getTag() == t);
42 }
43
44 // Pointer values store a type (in the lower kTagBits bits) and a pointer.
init_tagged_pointer(Tag t,void * p)45 void Value::init_tagged_pointer(Tag t, void* p) {
46 if (sizeof(Value) == sizeof(uintptr_t)) {
47 *this->cast<uintptr_t>() = reinterpret_cast<uintptr_t>(p);
48 // For 64-bit, we rely on the pointer lower bits being zero.
49 SkASSERT(!(fData8[0] & kTagMask));
50 fData8[0] |= SkTo<uint8_t>(t);
51 } else {
52 // For 32-bit, we store the pointer in the upper word
53 SkASSERT(sizeof(Value) == sizeof(uintptr_t) * 2);
54 this->init_tagged(t);
55 *this->cast<uintptr_t>() = reinterpret_cast<uintptr_t>(p);
56 }
57
58 SkASSERT(this->getTag() == t);
59 SkASSERT(this->ptr<void>() == p);
60 }
61
NullValue()62 NullValue::NullValue() {
63 this->init_tagged(Tag::kNull);
64 SkASSERT(this->getTag() == Tag::kNull);
65 }
66
BoolValue(bool b)67 BoolValue::BoolValue(bool b) {
68 this->init_tagged(Tag::kBool);
69 *this->cast<bool>() = b;
70 SkASSERT(this->getTag() == Tag::kBool);
71 }
72
NumberValue(int32_t i)73 NumberValue::NumberValue(int32_t i) {
74 this->init_tagged(Tag::kInt);
75 *this->cast<int32_t>() = i;
76 SkASSERT(this->getTag() == Tag::kInt);
77 }
78
NumberValue(float f)79 NumberValue::NumberValue(float f) {
80 this->init_tagged(Tag::kFloat);
81 *this->cast<float>() = f;
82 SkASSERT(this->getTag() == Tag::kFloat);
83 }
84
85 // Vector recs point to externally allocated slabs with the following layout:
86 //
87 // [size_t n] [REC_0] ... [REC_n-1] [optional extra trailing storage]
88 //
89 // Long strings use extra_alloc_size == 1 to store the \0 terminator.
90 //
91 template <typename T, size_t extra_alloc_size = 0>
MakeVector(size_t vec_size,const void * src,size_t src_size,SkArenaAlloc & alloc)92 static void* MakeVector(size_t vec_size, const void* src, size_t src_size, SkArenaAlloc& alloc) {
93 // The Ts are already in memory, so their size should be safe.
94 const auto total_size = sizeof(size_t) + vec_size * sizeof(T) + extra_alloc_size;
95 auto* size_ptr = reinterpret_cast<size_t*>(alloc.makeBytesAlignedTo(total_size, kRecAlign));
96
97 *size_ptr = vec_size;
98 sk_careful_memcpy(size_ptr + 1, src, src_size * sizeof(T));
99
100 return size_ptr;
101 }
102
103 template <typename T, size_t extra_alloc_size = 0>
MakeVector(size_t vec_size,const void * src,SkArenaAlloc & alloc)104 static void* MakeVector(size_t vec_size, const void* src, SkArenaAlloc& alloc) {
105 return MakeVector<T, extra_alloc_size>(vec_size, src, vec_size, alloc);
106 }
107
ArrayValue(const Value * src,size_t size,SkArenaAlloc & alloc)108 ArrayValue::ArrayValue(const Value* src, size_t size, SkArenaAlloc& alloc) {
109 this->init_tagged_pointer(Tag::kArray, MakeVector<Value>(size, src, alloc));
110 SkASSERT(this->getTag() == Tag::kArray);
111 }
112
113 // Strings have two flavors:
114 //
115 // -- short strings (len <= 7) -> these are stored inline, in the record
116 // (one byte reserved for null terminator/type):
117 //
118 // [str] [\0]|[max_len - actual_len]
119 //
120 // Storing [max_len - actual_len] allows the 'len' field to double-up as a
121 // null terminator when size == max_len (this works 'cause kShortString == 0).
122 //
123 // -- long strings (len > 7) -> these are externally allocated vectors (VectorRec<char>).
124 //
125 // The string data plus a null-char terminator are copied over.
126 //
127 namespace {
128
129 // An internal string builder with a fast 8 byte short string load path
130 // (for the common case where the string is not at the end of the stream).
131 class FastString final : public Value {
132 public:
FastString(const char * src,size_t size,const char * eos,SkArenaAlloc & alloc)133 FastString(const char* src, size_t size, const char* eos, SkArenaAlloc& alloc) {
134 SkASSERT(src <= eos);
135
136 if (size > kMaxInlineStringSize) {
137 this->initLongString(src, size, alloc);
138 SkASSERT(this->getTag() == Tag::kString);
139 return;
140 }
141
142 // initFastShortString is faster (doh), but requires access to 6 chars past src.
143 if (src && src + 6 <= eos) {
144 this->initFastShortString(src, size);
145 } else {
146 this->initShortString(src, size);
147 }
148
149 SkASSERT(this->getTag() == Tag::kShortString);
150 }
151
152 private:
153 // first byte reserved for tagging, \0 terminator => 6 usable chars
154 inline static constexpr size_t kMaxInlineStringSize = sizeof(Value) - 2;
155
initLongString(const char * src,size_t size,SkArenaAlloc & alloc)156 void initLongString(const char* src, size_t size, SkArenaAlloc& alloc) {
157 SkASSERT(size > kMaxInlineStringSize);
158
159 this->init_tagged_pointer(Tag::kString, MakeVector<char, 1>(size, src, alloc));
160
161 auto* data = this->cast<VectorValue<char, Value::Type::kString>>()->begin();
162 const_cast<char*>(data)[size] = '\0';
163 }
164
initShortString(const char * src,size_t size)165 void initShortString(const char* src, size_t size) {
166 SkASSERT(size <= kMaxInlineStringSize);
167
168 this->init_tagged(Tag::kShortString);
169 sk_careful_memcpy(this->cast<char>(), src, size);
170 // Null terminator provided by init_tagged() above (fData8 is zero-initialized).
171 }
172
initFastShortString(const char * src,size_t size)173 void initFastShortString(const char* src, size_t size) {
174 SkASSERT(size <= kMaxInlineStringSize);
175
176 uint64_t* s64 = this->cast<uint64_t>();
177
178 // Load 8 chars and mask out the tag and \0 terminator.
179 // Note: we picked kShortString == 0 to avoid setting explicitly below.
180 static_assert(SkToU8(Tag::kShortString) == 0, "please don't break this");
181
182 // Since the first byte is occupied by the tag, we want the string chars [0..5] to land
183 // on bytes [1..6] => the fastest way is to read8 @(src - 1) (always safe, because the
184 // string requires a " prefix at the very least).
185 memcpy(s64, src - 1, 8);
186
187 #if defined(SK_CPU_LENDIAN)
188 // The mask for a max-length string (6), with a leading tag and trailing \0 is
189 // 0x00ffffffffffff00. Accounting for the final left-shift, this becomes
190 // 0x0000ffffffffffff.
191 *s64 &= (0x0000ffffffffffffULL >> ((kMaxInlineStringSize - size) * 8)) // trailing \0s
192 << 8; // tag byte
193 #else
194 static_assert(false, "Big-endian builds are not supported at this time.");
195 #endif
196 }
197 };
198
199 } // namespace
200
StringValue(const char * src,SkArenaAlloc & alloc)201 StringValue::StringValue(const char* src, SkArenaAlloc& alloc)
202 : StringValue(src, strlen(src), alloc) {}
203
StringValue(const char * src,size_t size,SkArenaAlloc & alloc)204 StringValue::StringValue(const char* src, size_t size, SkArenaAlloc& alloc) {
205 new (this) FastString(src, size, src, alloc);
206 }
207
ObjectValue(const Member * src,size_t size,SkArenaAlloc & alloc)208 ObjectValue::ObjectValue(const Member* src, size_t size, SkArenaAlloc& alloc) {
209 this->init_tagged_pointer(Tag::kObject, MakeVector<Member>(size, src, alloc));
210 SkASSERT(this->getTag() == Tag::kObject);
211 }
212
213
214 // Boring public Value glue.
215
inline_strcmp(const char a[],const char b[])216 static int inline_strcmp(const char a[], const char b[]) {
217 for (;;) {
218 char c = *a++;
219 if (c == 0) {
220 break;
221 }
222 if (c != *b++) {
223 return 1;
224 }
225 }
226 return *b != 0;
227 }
228
find(const char * key) const229 const Member* ObjectValue::find(const char* key) const {
230 // Reverse search for duplicates resolution (policy: return last).
231 const auto* begin = this->begin();
232 const auto* member = this->end();
233
234 while (member > begin) {
235 --member;
236 if (0 == inline_strcmp(key, member->fKey.as<StringValue>().begin())) {
237 return member;
238 }
239 }
240
241 return nullptr;
242 }
243
writable(const char * key,SkArenaAlloc & alloc) const244 Value& ObjectValue::writable(const char* key, SkArenaAlloc& alloc) const {
245 Member* writable_member = const_cast<Member*>(this->find(key));
246
247 if (!writable_member) {
248 ObjectValue* writable_obj = const_cast<ObjectValue*>(this);
249 writable_obj->init_tagged_pointer(Tag::kObject, MakeVector<Member>(this->size() + 1,
250 this->begin(),
251 this->size(),
252 alloc));
253 writable_member = const_cast<Member*>(writable_obj->end() - 1);
254 writable_member->fKey = StringValue(key, strlen(key), alloc);
255 writable_member->fValue = NullValue();
256 }
257
258
259 return writable_member->fValue;
260 }
261
262 namespace {
263
264 // Lexer/parser inspired by rapidjson [1], sajson [2] and pjson [3].
265 //
266 // [1] https://github.com/Tencent/rapidjson/
267 // [2] https://github.com/chadaustin/sajson
268 // [3] https://pastebin.com/hnhSTL3h
269
270
271 // bit 0 (0x01) - plain ASCII string character
272 // bit 1 (0x02) - whitespace
273 // bit 2 (0x04) - string terminator (" \\ \0 [control chars] **AND } ]** <- see matchString notes)
274 // bit 3 (0x08) - 0-9
275 // bit 4 (0x10) - 0-9 e E .
276 // bit 5 (0x20) - scope terminator (} ])
277 static constexpr uint8_t g_token_flags[256] = {
278 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
279 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 6, 4, 4, 6, 4, 4, // 0
280 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 1
281 3, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0x11,1, // 2
282 0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19, 0x19,0x19, 1, 1, 1, 1, 1, 1, // 3
283 1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
284 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4,0x25, 1, 1, // 5
285 1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
286 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,0x25, 1, 1, // 7
287
288 // 128-255
289 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
290 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
291 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
292 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0
293 };
294
is_ws(char c)295 static inline bool is_ws(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x02; }
is_eostring(char c)296 static inline bool is_eostring(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x04; }
is_digit(char c)297 static inline bool is_digit(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x08; }
is_numeric(char c)298 static inline bool is_numeric(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x10; }
is_eoscope(char c)299 static inline bool is_eoscope(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x20; }
300
skip_ws(const char * p)301 static inline const char* skip_ws(const char* p) {
302 while (is_ws(*p)) ++p;
303 return p;
304 }
305
pow10(int32_t exp)306 static inline float pow10(int32_t exp) {
307 static constexpr float g_pow10_table[63] =
308 {
309 1.e-031f, 1.e-030f, 1.e-029f, 1.e-028f, 1.e-027f, 1.e-026f, 1.e-025f, 1.e-024f,
310 1.e-023f, 1.e-022f, 1.e-021f, 1.e-020f, 1.e-019f, 1.e-018f, 1.e-017f, 1.e-016f,
311 1.e-015f, 1.e-014f, 1.e-013f, 1.e-012f, 1.e-011f, 1.e-010f, 1.e-009f, 1.e-008f,
312 1.e-007f, 1.e-006f, 1.e-005f, 1.e-004f, 1.e-003f, 1.e-002f, 1.e-001f, 1.e+000f,
313 1.e+001f, 1.e+002f, 1.e+003f, 1.e+004f, 1.e+005f, 1.e+006f, 1.e+007f, 1.e+008f,
314 1.e+009f, 1.e+010f, 1.e+011f, 1.e+012f, 1.e+013f, 1.e+014f, 1.e+015f, 1.e+016f,
315 1.e+017f, 1.e+018f, 1.e+019f, 1.e+020f, 1.e+021f, 1.e+022f, 1.e+023f, 1.e+024f,
316 1.e+025f, 1.e+026f, 1.e+027f, 1.e+028f, 1.e+029f, 1.e+030f, 1.e+031f
317 };
318
319 static constexpr int32_t k_exp_offset = std::size(g_pow10_table) / 2;
320
321 // We only support negative exponents for now.
322 SkASSERT(exp <= 0);
323
324 return (exp >= -k_exp_offset) ? g_pow10_table[exp + k_exp_offset]
325 : std::pow(10.0f, static_cast<float>(exp));
326 }
327
328 class DOMParser {
329 public:
DOMParser(SkArenaAlloc & alloc)330 explicit DOMParser(SkArenaAlloc& alloc)
331 : fAlloc(alloc) {
332 fValueStack.reserve(kValueStackReserve);
333 fUnescapeBuffer.reserve(kUnescapeBufferReserve);
334 }
335
parse(const char * p,size_t size)336 Value parse(const char* p, size_t size) {
337 if (!size) {
338 return this->error(NullValue(), p, "invalid empty input");
339 }
340
341 const char* p_stop = p + size - 1;
342
343 // We're only checking for end-of-stream on object/array close('}',']'),
344 // so we must trim any whitespace from the buffer tail.
345 while (p_stop > p && is_ws(*p_stop)) --p_stop;
346
347 SkASSERT(p_stop >= p && p_stop < p + size);
348 if (!is_eoscope(*p_stop)) {
349 return this->error(NullValue(), p_stop, "invalid top-level value");
350 }
351
352 p = skip_ws(p);
353
354 switch (*p) {
355 case '{':
356 goto match_object;
357 case '[':
358 goto match_array;
359 default:
360 return this->error(NullValue(), p, "invalid top-level value");
361 }
362
363 match_object:
364 SkASSERT(*p == '{');
365 p = skip_ws(p + 1);
366
367 this->pushObjectScope();
368
369 if (*p == '}') goto pop_object;
370
371 // goto match_object_key;
372 match_object_key:
373 p = skip_ws(p);
374 if (*p != '"') return this->error(NullValue(), p, "expected object key");
375
376 p = this->matchString(p, p_stop, [this](const char* key, size_t size, const char* eos) {
377 this->pushObjectKey(key, size, eos);
378 });
379 if (!p) return NullValue();
380
381 p = skip_ws(p);
382 if (*p != ':') return this->error(NullValue(), p, "expected ':' separator");
383
384 ++p;
385
386 // goto match_value;
387 match_value:
388 p = skip_ws(p);
389
390 switch (*p) {
391 case '\0':
392 return this->error(NullValue(), p, "unexpected input end");
393 case '"':
394 p = this->matchString(p, p_stop, [this](const char* str, size_t size, const char* eos) {
395 this->pushString(str, size, eos);
396 });
397 break;
398 case '[':
399 goto match_array;
400 case 'f':
401 p = this->matchFalse(p);
402 break;
403 case 'n':
404 p = this->matchNull(p);
405 break;
406 case 't':
407 p = this->matchTrue(p);
408 break;
409 case '{':
410 goto match_object;
411 default:
412 p = this->matchNumber(p);
413 break;
414 }
415
416 if (!p) return NullValue();
417
418 // goto match_post_value;
419 match_post_value:
420 SkASSERT(!this->inTopLevelScope());
421
422 p = skip_ws(p);
423 switch (*p) {
424 case ',':
425 ++p;
426 if (this->inObjectScope()) {
427 goto match_object_key;
428 } else {
429 SkASSERT(this->inArrayScope());
430 goto match_value;
431 }
432 case ']':
433 goto pop_array;
434 case '}':
435 goto pop_object;
436 default:
437 return this->error(NullValue(), p - 1, "unexpected value-trailing token");
438 }
439
440 // unreachable
441 SkASSERT(false);
442
443 pop_object:
444 SkASSERT(*p == '}');
445
446 if (this->inArrayScope()) {
447 return this->error(NullValue(), p, "unexpected object terminator");
448 }
449
450 this->popObjectScope();
451
452 // goto pop_common
453 pop_common:
454 SkASSERT(is_eoscope(*p));
455
456 if (this->inTopLevelScope()) {
457 SkASSERT(fValueStack.size() == 1);
458
459 // Success condition: parsed the top level element and reached the stop token.
460 return p == p_stop
461 ? fValueStack.front()
462 : this->error(NullValue(), p + 1, "trailing root garbage");
463 }
464
465 if (p == p_stop) {
466 return this->error(NullValue(), p, "unexpected end-of-input");
467 }
468
469 ++p;
470
471 goto match_post_value;
472
473 match_array:
474 SkASSERT(*p == '[');
475 p = skip_ws(p + 1);
476
477 this->pushArrayScope();
478
479 if (*p != ']') goto match_value;
480
481 // goto pop_array;
482 pop_array:
483 SkASSERT(*p == ']');
484
485 if (this->inObjectScope()) {
486 return this->error(NullValue(), p, "unexpected array terminator");
487 }
488
489 this->popArrayScope();
490
491 goto pop_common;
492
493 SkASSERT(false);
494 return NullValue();
495 }
496
getError() const497 std::tuple<const char*, const SkString> getError() const {
498 return std::make_tuple(fErrorToken, fErrorMessage);
499 }
500
501 private:
502 SkArenaAlloc& fAlloc;
503
504 // Pending values stack.
505 inline static constexpr size_t kValueStackReserve = 256;
506 std::vector<Value> fValueStack;
507
508 // String unescape buffer.
509 inline static constexpr size_t kUnescapeBufferReserve = 512;
510 std::vector<char> fUnescapeBuffer;
511
512 // Tracks the current object/array scope, as an index into fStack:
513 //
514 // - for objects: fScopeIndex = (index of first value in scope)
515 // - for arrays : fScopeIndex = -(index of first value in scope)
516 //
517 // fScopeIndex == 0 IFF we are at the top level (no current/active scope).
518 intptr_t fScopeIndex = 0;
519
520 // Error reporting.
521 const char* fErrorToken = nullptr;
522 SkString fErrorMessage;
523
inTopLevelScope() const524 bool inTopLevelScope() const { return fScopeIndex == 0; }
inObjectScope() const525 bool inObjectScope() const { return fScopeIndex > 0; }
inArrayScope() const526 bool inArrayScope() const { return fScopeIndex < 0; }
527
528 // Helper for masquerading raw primitive types as Values (bypassing tagging, etc).
529 template <typename T>
530 class RawValue final : public Value {
531 public:
RawValue(T v)532 explicit RawValue(T v) {
533 static_assert(sizeof(T) <= sizeof(Value), "");
534 *this->cast<T>() = v;
535 }
536
operator *() const537 T operator *() const { return *this->cast<T>(); }
538 };
539
540 template <typename VectorT>
popScopeAsVec(size_t scope_start)541 void popScopeAsVec(size_t scope_start) {
542 SkASSERT(scope_start > 0);
543 SkASSERT(scope_start <= fValueStack.size());
544
545 using T = typename VectorT::ValueT;
546 static_assert( sizeof(T) >= sizeof(Value), "");
547 static_assert( sizeof(T) % sizeof(Value) == 0, "");
548 static_assert(alignof(T) == alignof(Value), "");
549
550 const auto scope_count = fValueStack.size() - scope_start,
551 count = scope_count / (sizeof(T) / sizeof(Value));
552 SkASSERT(scope_count % (sizeof(T) / sizeof(Value)) == 0);
553
554 const auto* begin = reinterpret_cast<const T*>(fValueStack.data() + scope_start);
555
556 // Restore the previous scope index from saved placeholder value,
557 // and instantiate as a vector of values in scope.
558 auto& placeholder = fValueStack[scope_start - 1];
559 fScopeIndex = *static_cast<RawValue<intptr_t>&>(placeholder);
560 placeholder = VectorT(begin, count, fAlloc);
561
562 // Drop the (consumed) values in scope.
563 fValueStack.resize(scope_start);
564 }
565
pushObjectScope()566 void pushObjectScope() {
567 // Save a scope index now, and then later we'll overwrite this value as the Object itself.
568 fValueStack.push_back(RawValue<intptr_t>(fScopeIndex));
569
570 // New object scope.
571 fScopeIndex = SkTo<intptr_t>(fValueStack.size());
572 }
573
popObjectScope()574 void popObjectScope() {
575 SkASSERT(this->inObjectScope());
576 this->popScopeAsVec<ObjectValue>(SkTo<size_t>(fScopeIndex));
577
578 SkDEBUGCODE(
579 const auto& obj = fValueStack.back().as<ObjectValue>();
580 SkASSERT(obj.is<ObjectValue>());
581 for (const auto& member : obj) {
582 SkASSERT(member.fKey.is<StringValue>());
583 }
584 )
585 }
586
pushArrayScope()587 void pushArrayScope() {
588 // Save a scope index now, and then later we'll overwrite this value as the Array itself.
589 fValueStack.push_back(RawValue<intptr_t>(fScopeIndex));
590
591 // New array scope.
592 fScopeIndex = -SkTo<intptr_t>(fValueStack.size());
593 }
594
popArrayScope()595 void popArrayScope() {
596 SkASSERT(this->inArrayScope());
597 this->popScopeAsVec<ArrayValue>(SkTo<size_t>(-fScopeIndex));
598
599 SkDEBUGCODE(
600 const auto& arr = fValueStack.back().as<ArrayValue>();
601 SkASSERT(arr.is<ArrayValue>());
602 )
603 }
604
pushObjectKey(const char * key,size_t size,const char * eos)605 void pushObjectKey(const char* key, size_t size, const char* eos) {
606 SkASSERT(this->inObjectScope());
607 SkASSERT(fValueStack.size() >= SkTo<size_t>(fScopeIndex));
608 SkASSERT(!((fValueStack.size() - SkTo<size_t>(fScopeIndex)) & 1));
609 this->pushString(key, size, eos);
610 }
611
pushTrue()612 void pushTrue() {
613 fValueStack.push_back(BoolValue(true));
614 }
615
pushFalse()616 void pushFalse() {
617 fValueStack.push_back(BoolValue(false));
618 }
619
pushNull()620 void pushNull() {
621 fValueStack.push_back(NullValue());
622 }
623
pushString(const char * s,size_t size,const char * eos)624 void pushString(const char* s, size_t size, const char* eos) {
625 fValueStack.push_back(FastString(s, size, eos, fAlloc));
626 }
627
pushInt32(int32_t i)628 void pushInt32(int32_t i) {
629 fValueStack.push_back(NumberValue(i));
630 }
631
pushFloat(float f)632 void pushFloat(float f) {
633 fValueStack.push_back(NumberValue(f));
634 }
635
636 template <typename T>
error(T && ret_val,const char * p,const char * msg)637 T error(T&& ret_val, const char* p, const char* msg) {
638 #if defined(SK_JSON_REPORT_ERRORS)
639 fErrorToken = p;
640 fErrorMessage.set(msg);
641 #endif
642 return ret_val;
643 }
644
matchTrue(const char * p)645 const char* matchTrue(const char* p) {
646 SkASSERT(p[0] == 't');
647
648 if (p[1] == 'r' && p[2] == 'u' && p[3] == 'e') {
649 this->pushTrue();
650 return p + 4;
651 }
652
653 return this->error(nullptr, p, "invalid token");
654 }
655
matchFalse(const char * p)656 const char* matchFalse(const char* p) {
657 SkASSERT(p[0] == 'f');
658
659 if (p[1] == 'a' && p[2] == 'l' && p[3] == 's' && p[4] == 'e') {
660 this->pushFalse();
661 return p + 5;
662 }
663
664 return this->error(nullptr, p, "invalid token");
665 }
666
matchNull(const char * p)667 const char* matchNull(const char* p) {
668 SkASSERT(p[0] == 'n');
669
670 if (p[1] == 'u' && p[2] == 'l' && p[3] == 'l') {
671 this->pushNull();
672 return p + 4;
673 }
674
675 return this->error(nullptr, p, "invalid token");
676 }
677
unescapeString(const char * begin,const char * end)678 const std::vector<char>* unescapeString(const char* begin, const char* end) {
679 fUnescapeBuffer.clear();
680
681 for (const auto* p = begin; p != end; ++p) {
682 if (*p != '\\') {
683 fUnescapeBuffer.push_back(*p);
684 continue;
685 }
686
687 if (++p == end) {
688 return nullptr;
689 }
690
691 switch (*p) {
692 case '"': fUnescapeBuffer.push_back( '"'); break;
693 case '\\': fUnescapeBuffer.push_back('\\'); break;
694 case '/': fUnescapeBuffer.push_back( '/'); break;
695 case 'b': fUnescapeBuffer.push_back('\b'); break;
696 case 'f': fUnescapeBuffer.push_back('\f'); break;
697 case 'n': fUnescapeBuffer.push_back('\n'); break;
698 case 'r': fUnescapeBuffer.push_back('\r'); break;
699 case 't': fUnescapeBuffer.push_back('\t'); break;
700 case 'u': {
701 if (p + 4 >= end) {
702 return nullptr;
703 }
704
705 uint32_t hexed;
706 const char hex_str[] = {p[1], p[2], p[3], p[4], '\0'};
707 const auto* eos = SkParse::FindHex(hex_str, &hexed);
708 if (!eos || *eos) {
709 return nullptr;
710 }
711
712 char utf8[SkUTF::kMaxBytesInUTF8Sequence];
713 const auto utf8_len = SkUTF::ToUTF8(SkTo<SkUnichar>(hexed), utf8);
714 fUnescapeBuffer.insert(fUnescapeBuffer.end(), utf8, utf8 + utf8_len);
715 p += 4;
716 } break;
717 default: return nullptr;
718 }
719 }
720
721 return &fUnescapeBuffer;
722 }
723
724 template <typename MatchFunc>
matchString(const char * p,const char * p_stop,MatchFunc && func)725 const char* matchString(const char* p, const char* p_stop, MatchFunc&& func) {
726 SkASSERT(*p == '"');
727 const auto* s_begin = p + 1;
728 bool requires_unescape = false;
729
730 do {
731 // Consume string chars.
732 // This is the fast path, and hopefully we only hit it once then quick-exit below.
733 for (p = p + 1; !is_eostring(*p); ++p);
734
735 if (*p == '"') {
736 // Valid string found.
737 if (!requires_unescape) {
738 func(s_begin, p - s_begin, p_stop);
739 } else {
740 // Slow unescape. We could avoid this extra copy with some effort,
741 // but in practice escaped strings should be rare.
742 const auto* buf = this->unescapeString(s_begin, p);
743 if (!buf) {
744 break;
745 }
746
747 SkASSERT(!buf->empty());
748 func(buf->data(), buf->size(), buf->data() + buf->size() - 1);
749 }
750 return p + 1;
751 }
752
753 if (*p == '\\') {
754 requires_unescape = true;
755 ++p;
756 continue;
757 }
758
759 // End-of-scope chars are special: we use them to tag the end of the input.
760 // Thus they cannot be consumed indiscriminately -- we need to check if we hit the
761 // end of the input. To that effect, we treat them as string terminators above,
762 // then we catch them here.
763 if (is_eoscope(*p)) {
764 continue;
765 }
766
767 // Invalid/unexpected char.
768 break;
769 } while (p != p_stop);
770
771 // Premature end-of-input, or illegal string char.
772 return this->error(nullptr, s_begin - 1, "invalid string");
773 }
774
matchFastFloatDecimalPart(const char * p,int sign,float f,int exp)775 const char* matchFastFloatDecimalPart(const char* p, int sign, float f, int exp) {
776 SkASSERT(exp <= 0);
777
778 for (;;) {
779 if (!is_digit(*p)) break;
780 f = f * 10.f + (*p++ - '0'); --exp;
781 if (!is_digit(*p)) break;
782 f = f * 10.f + (*p++ - '0'); --exp;
783 }
784
785 const auto decimal_scale = pow10(exp);
786 if (is_numeric(*p) || !decimal_scale) {
787 SkASSERT((*p == '.' || *p == 'e' || *p == 'E') || !decimal_scale);
788 // Malformed input, or an (unsupported) exponent, or a collapsed decimal factor.
789 return nullptr;
790 }
791
792 this->pushFloat(sign * f * decimal_scale);
793
794 return p;
795 }
796
matchFastFloatPart(const char * p,int sign,float f)797 const char* matchFastFloatPart(const char* p, int sign, float f) {
798 for (;;) {
799 if (!is_digit(*p)) break;
800 f = f * 10.f + (*p++ - '0');
801 if (!is_digit(*p)) break;
802 f = f * 10.f + (*p++ - '0');
803 }
804
805 if (!is_numeric(*p)) {
806 // Matched (integral) float.
807 this->pushFloat(sign * f);
808 return p;
809 }
810
811 return (*p == '.') ? this->matchFastFloatDecimalPart(p + 1, sign, f, 0)
812 : nullptr;
813 }
814
matchFast32OrFloat(const char * p)815 const char* matchFast32OrFloat(const char* p) {
816 int sign = 1;
817 if (*p == '-') {
818 sign = -1;
819 ++p;
820 }
821
822 const auto* digits_start = p;
823
824 int32_t n32 = 0;
825
826 // This is the largest absolute int32 value we can handle before
827 // risking overflow *on the next digit* (214748363).
828 static constexpr int32_t kMaxInt32 = (std::numeric_limits<int32_t>::max() - 9) / 10;
829
830 if (is_digit(*p)) {
831 n32 = (*p++ - '0');
832 for (;;) {
833 if (!is_digit(*p) || n32 > kMaxInt32) break;
834 n32 = n32 * 10 + (*p++ - '0');
835 }
836 }
837
838 if (!is_numeric(*p)) {
839 // Did we actually match any digits?
840 if (p > digits_start) {
841 this->pushInt32(sign * n32);
842 return p;
843 }
844 return nullptr;
845 }
846
847 if (*p == '.') {
848 const auto* decimals_start = ++p;
849
850 int exp = 0;
851
852 for (;;) {
853 if (!is_digit(*p) || n32 > kMaxInt32) break;
854 n32 = n32 * 10 + (*p++ - '0'); --exp;
855 if (!is_digit(*p) || n32 > kMaxInt32) break;
856 n32 = n32 * 10 + (*p++ - '0'); --exp;
857 }
858
859 if (!is_numeric(*p)) {
860 // Did we actually match any digits?
861 if (p > decimals_start) {
862 this->pushFloat(sign * n32 * pow10(exp));
863 return p;
864 }
865 return nullptr;
866 }
867
868 if (n32 > kMaxInt32) {
869 // we ran out on n32 bits
870 return this->matchFastFloatDecimalPart(p, sign, n32, exp);
871 }
872 }
873
874 return this->matchFastFloatPart(p, sign, n32);
875 }
876
matchNumber(const char * p)877 const char* matchNumber(const char* p) {
878 if (const auto* fast = this->matchFast32OrFloat(p)) return fast;
879
880 // slow fallback
881 char* matched;
882 float f = strtof(p, &matched);
883 if (matched > p) {
884 this->pushFloat(f);
885 return matched;
886 }
887 return this->error(nullptr, p, "invalid numeric token");
888 }
889 };
890
Write(const Value & v,SkWStream * stream)891 void Write(const Value& v, SkWStream* stream) {
892 // We use the address of these as special tags in the pending list.
893 static const NullValue kArrayCloseTag, // ]
894 kObjectCloseTag, // }
895 kListSeparatorTag, // ,
896 kKeySeparatorTag; // :
897
898 std::vector<const Value*> pending{&v};
899
900 do {
901 const Value* val = pending.back();
902 pending.pop_back();
903
904 if (val == &kArrayCloseTag) {
905 stream->writeText("]");
906 continue;
907 }
908
909 if (val == &kObjectCloseTag) {
910 stream->writeText("}");
911 continue;
912 }
913
914 if (val == &kListSeparatorTag) {
915 stream->writeText(",");
916 continue;
917 }
918
919 if (val == &kKeySeparatorTag) {
920 stream->writeText(":");
921 continue;
922 }
923
924 switch (val->getType()) {
925 case Value::Type::kNull:
926 stream->writeText("null");
927 break;
928 case Value::Type::kBool:
929 stream->writeText(*val->as<BoolValue>() ? "true" : "false");
930 break;
931 case Value::Type::kNumber:
932 stream->writeScalarAsText(*val->as<NumberValue>());
933 break;
934 case Value::Type::kString:
935 stream->writeText("\"");
936 stream->writeText(val->as<StringValue>().begin());
937 stream->writeText("\"");
938 break;
939 case Value::Type::kArray: {
940 const auto& array = val->as<ArrayValue>();
941 stream->writeText("[");
942 // "val, val, .. ]" in reverse order
943 pending.push_back(&kArrayCloseTag);
944 if (array.size() > 0) {
945 bool last_value = true;
946 for (const Value* it = array.end() - 1; it >= array.begin(); --it) {
947 if (!last_value) pending.push_back(&kListSeparatorTag);
948 pending.push_back(it);
949 last_value = false;
950 }
951 }
952 } break;
953 case Value::Type::kObject: {
954 const auto& object = val->as<ObjectValue>();
955 stream->writeText("{");
956 // "key: val, key: val, .. }" in reverse order
957 pending.push_back(&kObjectCloseTag);
958 if (object.size() > 0) {
959 bool last_member = true;
960 for (const Member* it = object.end() - 1; it >= object.begin(); --it) {
961 if (!last_member) pending.push_back(&kListSeparatorTag);
962 pending.push_back(&it->fValue);
963 pending.push_back(&kKeySeparatorTag);
964 pending.push_back(&it->fKey);
965 last_member = false;
966 }
967 }
968 } break;
969 }
970 } while (!pending.empty());
971 }
972
973 } // namespace
974
toString() const975 SkString Value::toString() const {
976 SkDynamicMemoryWStream wstream;
977 Write(*this, &wstream);
978 const auto data = wstream.detachAsData();
979 // TODO: is there a better way to pass data around without copying?
980 return SkString(static_cast<const char*>(data->data()), data->size());
981 }
982
983 static constexpr size_t kMinChunkSize = 4096;
984
DOM(const char * data,size_t size)985 DOM::DOM(const char* data, size_t size)
986 : fAlloc(kMinChunkSize) {
987 DOMParser parser(fAlloc);
988
989 fRoot = parser.parse(data, size);
990 }
991
write(SkWStream * stream) const992 void DOM::write(SkWStream* stream) const {
993 Write(fRoot, stream);
994 }
995
996 } // namespace skjson
997