xref: /aosp_15_r20/external/golang-protobuf/internal/encoding/json/decode_string.go (revision 1c12ee1efe575feb122dbf939ff15148a3b3e8f2)
1*1c12ee1eSDan Willemsen// Copyright 2018 The Go Authors. All rights reserved.
2*1c12ee1eSDan Willemsen// Use of this source code is governed by a BSD-style
3*1c12ee1eSDan Willemsen// license that can be found in the LICENSE file.
4*1c12ee1eSDan Willemsen
5*1c12ee1eSDan Willemsenpackage json
6*1c12ee1eSDan Willemsen
7*1c12ee1eSDan Willemsenimport (
8*1c12ee1eSDan Willemsen	"strconv"
9*1c12ee1eSDan Willemsen	"unicode"
10*1c12ee1eSDan Willemsen	"unicode/utf16"
11*1c12ee1eSDan Willemsen	"unicode/utf8"
12*1c12ee1eSDan Willemsen
13*1c12ee1eSDan Willemsen	"google.golang.org/protobuf/internal/strs"
14*1c12ee1eSDan Willemsen)
15*1c12ee1eSDan Willemsen
16*1c12ee1eSDan Willemsenfunc (d *Decoder) parseString(in []byte) (string, int, error) {
17*1c12ee1eSDan Willemsen	in0 := in
18*1c12ee1eSDan Willemsen	if len(in) == 0 {
19*1c12ee1eSDan Willemsen		return "", 0, ErrUnexpectedEOF
20*1c12ee1eSDan Willemsen	}
21*1c12ee1eSDan Willemsen	if in[0] != '"' {
22*1c12ee1eSDan Willemsen		return "", 0, d.newSyntaxError(d.currPos(), "invalid character %q at start of string", in[0])
23*1c12ee1eSDan Willemsen	}
24*1c12ee1eSDan Willemsen	in = in[1:]
25*1c12ee1eSDan Willemsen	i := indexNeedEscapeInBytes(in)
26*1c12ee1eSDan Willemsen	in, out := in[i:], in[:i:i] // set cap to prevent mutations
27*1c12ee1eSDan Willemsen	for len(in) > 0 {
28*1c12ee1eSDan Willemsen		switch r, n := utf8.DecodeRune(in); {
29*1c12ee1eSDan Willemsen		case r == utf8.RuneError && n == 1:
30*1c12ee1eSDan Willemsen			return "", 0, d.newSyntaxError(d.currPos(), "invalid UTF-8 in string")
31*1c12ee1eSDan Willemsen		case r < ' ':
32*1c12ee1eSDan Willemsen			return "", 0, d.newSyntaxError(d.currPos(), "invalid character %q in string", r)
33*1c12ee1eSDan Willemsen		case r == '"':
34*1c12ee1eSDan Willemsen			in = in[1:]
35*1c12ee1eSDan Willemsen			n := len(in0) - len(in)
36*1c12ee1eSDan Willemsen			return string(out), n, nil
37*1c12ee1eSDan Willemsen		case r == '\\':
38*1c12ee1eSDan Willemsen			if len(in) < 2 {
39*1c12ee1eSDan Willemsen				return "", 0, ErrUnexpectedEOF
40*1c12ee1eSDan Willemsen			}
41*1c12ee1eSDan Willemsen			switch r := in[1]; r {
42*1c12ee1eSDan Willemsen			case '"', '\\', '/':
43*1c12ee1eSDan Willemsen				in, out = in[2:], append(out, r)
44*1c12ee1eSDan Willemsen			case 'b':
45*1c12ee1eSDan Willemsen				in, out = in[2:], append(out, '\b')
46*1c12ee1eSDan Willemsen			case 'f':
47*1c12ee1eSDan Willemsen				in, out = in[2:], append(out, '\f')
48*1c12ee1eSDan Willemsen			case 'n':
49*1c12ee1eSDan Willemsen				in, out = in[2:], append(out, '\n')
50*1c12ee1eSDan Willemsen			case 'r':
51*1c12ee1eSDan Willemsen				in, out = in[2:], append(out, '\r')
52*1c12ee1eSDan Willemsen			case 't':
53*1c12ee1eSDan Willemsen				in, out = in[2:], append(out, '\t')
54*1c12ee1eSDan Willemsen			case 'u':
55*1c12ee1eSDan Willemsen				if len(in) < 6 {
56*1c12ee1eSDan Willemsen					return "", 0, ErrUnexpectedEOF
57*1c12ee1eSDan Willemsen				}
58*1c12ee1eSDan Willemsen				v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
59*1c12ee1eSDan Willemsen				if err != nil {
60*1c12ee1eSDan Willemsen					return "", 0, d.newSyntaxError(d.currPos(), "invalid escape code %q in string", in[:6])
61*1c12ee1eSDan Willemsen				}
62*1c12ee1eSDan Willemsen				in = in[6:]
63*1c12ee1eSDan Willemsen
64*1c12ee1eSDan Willemsen				r := rune(v)
65*1c12ee1eSDan Willemsen				if utf16.IsSurrogate(r) {
66*1c12ee1eSDan Willemsen					if len(in) < 6 {
67*1c12ee1eSDan Willemsen						return "", 0, ErrUnexpectedEOF
68*1c12ee1eSDan Willemsen					}
69*1c12ee1eSDan Willemsen					v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
70*1c12ee1eSDan Willemsen					r = utf16.DecodeRune(r, rune(v))
71*1c12ee1eSDan Willemsen					if in[0] != '\\' || in[1] != 'u' ||
72*1c12ee1eSDan Willemsen						r == unicode.ReplacementChar || err != nil {
73*1c12ee1eSDan Willemsen						return "", 0, d.newSyntaxError(d.currPos(), "invalid escape code %q in string", in[:6])
74*1c12ee1eSDan Willemsen					}
75*1c12ee1eSDan Willemsen					in = in[6:]
76*1c12ee1eSDan Willemsen				}
77*1c12ee1eSDan Willemsen				out = append(out, string(r)...)
78*1c12ee1eSDan Willemsen			default:
79*1c12ee1eSDan Willemsen				return "", 0, d.newSyntaxError(d.currPos(), "invalid escape code %q in string", in[:2])
80*1c12ee1eSDan Willemsen			}
81*1c12ee1eSDan Willemsen		default:
82*1c12ee1eSDan Willemsen			i := indexNeedEscapeInBytes(in[n:])
83*1c12ee1eSDan Willemsen			in, out = in[n+i:], append(out, in[:n+i]...)
84*1c12ee1eSDan Willemsen		}
85*1c12ee1eSDan Willemsen	}
86*1c12ee1eSDan Willemsen	return "", 0, ErrUnexpectedEOF
87*1c12ee1eSDan Willemsen}
88*1c12ee1eSDan Willemsen
89*1c12ee1eSDan Willemsen// indexNeedEscapeInBytes returns the index of the character that needs
90*1c12ee1eSDan Willemsen// escaping. If no characters need escaping, this returns the input length.
91*1c12ee1eSDan Willemsenfunc indexNeedEscapeInBytes(b []byte) int { return indexNeedEscapeInString(strs.UnsafeString(b)) }
92