1*1c12ee1eSDan Willemsen// Copyright 2018 The Go Authors. All rights reserved. 2*1c12ee1eSDan Willemsen// Use of this source code is governed by a BSD-style 3*1c12ee1eSDan Willemsen// license that can be found in the LICENSE file. 4*1c12ee1eSDan Willemsen 5*1c12ee1eSDan Willemsenpackage json 6*1c12ee1eSDan Willemsen 7*1c12ee1eSDan Willemsenimport ( 8*1c12ee1eSDan Willemsen "strconv" 9*1c12ee1eSDan Willemsen "unicode" 10*1c12ee1eSDan Willemsen "unicode/utf16" 11*1c12ee1eSDan Willemsen "unicode/utf8" 12*1c12ee1eSDan Willemsen 13*1c12ee1eSDan Willemsen "google.golang.org/protobuf/internal/strs" 14*1c12ee1eSDan Willemsen) 15*1c12ee1eSDan Willemsen 16*1c12ee1eSDan Willemsenfunc (d *Decoder) parseString(in []byte) (string, int, error) { 17*1c12ee1eSDan Willemsen in0 := in 18*1c12ee1eSDan Willemsen if len(in) == 0 { 19*1c12ee1eSDan Willemsen return "", 0, ErrUnexpectedEOF 20*1c12ee1eSDan Willemsen } 21*1c12ee1eSDan Willemsen if in[0] != '"' { 22*1c12ee1eSDan Willemsen return "", 0, d.newSyntaxError(d.currPos(), "invalid character %q at start of string", in[0]) 23*1c12ee1eSDan Willemsen } 24*1c12ee1eSDan Willemsen in = in[1:] 25*1c12ee1eSDan Willemsen i := indexNeedEscapeInBytes(in) 26*1c12ee1eSDan Willemsen in, out := in[i:], in[:i:i] // set cap to prevent mutations 27*1c12ee1eSDan Willemsen for len(in) > 0 { 28*1c12ee1eSDan Willemsen switch r, n := utf8.DecodeRune(in); { 29*1c12ee1eSDan Willemsen case r == utf8.RuneError && n == 1: 30*1c12ee1eSDan Willemsen return "", 0, d.newSyntaxError(d.currPos(), "invalid UTF-8 in string") 31*1c12ee1eSDan Willemsen case r < ' ': 32*1c12ee1eSDan Willemsen return "", 0, d.newSyntaxError(d.currPos(), "invalid character %q in string", r) 33*1c12ee1eSDan Willemsen case r == '"': 34*1c12ee1eSDan Willemsen in = in[1:] 35*1c12ee1eSDan Willemsen n := len(in0) - len(in) 36*1c12ee1eSDan Willemsen return string(out), n, nil 37*1c12ee1eSDan Willemsen case r == '\\': 38*1c12ee1eSDan Willemsen if len(in) < 2 { 39*1c12ee1eSDan Willemsen return "", 0, ErrUnexpectedEOF 40*1c12ee1eSDan Willemsen } 41*1c12ee1eSDan Willemsen switch r := in[1]; r { 42*1c12ee1eSDan Willemsen case '"', '\\', '/': 43*1c12ee1eSDan Willemsen in, out = in[2:], append(out, r) 44*1c12ee1eSDan Willemsen case 'b': 45*1c12ee1eSDan Willemsen in, out = in[2:], append(out, '\b') 46*1c12ee1eSDan Willemsen case 'f': 47*1c12ee1eSDan Willemsen in, out = in[2:], append(out, '\f') 48*1c12ee1eSDan Willemsen case 'n': 49*1c12ee1eSDan Willemsen in, out = in[2:], append(out, '\n') 50*1c12ee1eSDan Willemsen case 'r': 51*1c12ee1eSDan Willemsen in, out = in[2:], append(out, '\r') 52*1c12ee1eSDan Willemsen case 't': 53*1c12ee1eSDan Willemsen in, out = in[2:], append(out, '\t') 54*1c12ee1eSDan Willemsen case 'u': 55*1c12ee1eSDan Willemsen if len(in) < 6 { 56*1c12ee1eSDan Willemsen return "", 0, ErrUnexpectedEOF 57*1c12ee1eSDan Willemsen } 58*1c12ee1eSDan Willemsen v, err := strconv.ParseUint(string(in[2:6]), 16, 16) 59*1c12ee1eSDan Willemsen if err != nil { 60*1c12ee1eSDan Willemsen return "", 0, d.newSyntaxError(d.currPos(), "invalid escape code %q in string", in[:6]) 61*1c12ee1eSDan Willemsen } 62*1c12ee1eSDan Willemsen in = in[6:] 63*1c12ee1eSDan Willemsen 64*1c12ee1eSDan Willemsen r := rune(v) 65*1c12ee1eSDan Willemsen if utf16.IsSurrogate(r) { 66*1c12ee1eSDan Willemsen if len(in) < 6 { 67*1c12ee1eSDan Willemsen return "", 0, ErrUnexpectedEOF 68*1c12ee1eSDan Willemsen } 69*1c12ee1eSDan Willemsen v, err := strconv.ParseUint(string(in[2:6]), 16, 16) 70*1c12ee1eSDan Willemsen r = utf16.DecodeRune(r, rune(v)) 71*1c12ee1eSDan Willemsen if in[0] != '\\' || in[1] != 'u' || 72*1c12ee1eSDan Willemsen r == unicode.ReplacementChar || err != nil { 73*1c12ee1eSDan Willemsen return "", 0, d.newSyntaxError(d.currPos(), "invalid escape code %q in string", in[:6]) 74*1c12ee1eSDan Willemsen } 75*1c12ee1eSDan Willemsen in = in[6:] 76*1c12ee1eSDan Willemsen } 77*1c12ee1eSDan Willemsen out = append(out, string(r)...) 78*1c12ee1eSDan Willemsen default: 79*1c12ee1eSDan Willemsen return "", 0, d.newSyntaxError(d.currPos(), "invalid escape code %q in string", in[:2]) 80*1c12ee1eSDan Willemsen } 81*1c12ee1eSDan Willemsen default: 82*1c12ee1eSDan Willemsen i := indexNeedEscapeInBytes(in[n:]) 83*1c12ee1eSDan Willemsen in, out = in[n+i:], append(out, in[:n+i]...) 84*1c12ee1eSDan Willemsen } 85*1c12ee1eSDan Willemsen } 86*1c12ee1eSDan Willemsen return "", 0, ErrUnexpectedEOF 87*1c12ee1eSDan Willemsen} 88*1c12ee1eSDan Willemsen 89*1c12ee1eSDan Willemsen// indexNeedEscapeInBytes returns the index of the character that needs 90*1c12ee1eSDan Willemsen// escaping. If no characters need escaping, this returns the input length. 91*1c12ee1eSDan Willemsenfunc indexNeedEscapeInBytes(b []byte) int { return indexNeedEscapeInString(strs.UnsafeString(b)) } 92