1// Copyright 2018 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package text 6 7import ( 8 "math" 9 "math/bits" 10 "strconv" 11 "strings" 12 "unicode/utf8" 13 14 "google.golang.org/protobuf/internal/detrand" 15 "google.golang.org/protobuf/internal/errors" 16) 17 18// encType represents an encoding type. 19type encType uint8 20 21const ( 22 _ encType = (1 << iota) / 2 23 name 24 scalar 25 messageOpen 26 messageClose 27) 28 29// Encoder provides methods to write out textproto constructs and values. The user is 30// responsible for producing valid sequences of constructs and values. 31type Encoder struct { 32 encoderState 33 34 indent string 35 delims [2]byte 36 outputASCII bool 37} 38 39type encoderState struct { 40 lastType encType 41 indents []byte 42 out []byte 43} 44 45// NewEncoder returns an Encoder. 46// 47// If indent is a non-empty string, it causes every entry in a List or Message 48// to be preceded by the indent and trailed by a newline. 49// 50// If delims is not the zero value, it controls the delimiter characters used 51// for messages (e.g., "{}" vs "<>"). 52// 53// If outputASCII is true, strings will be serialized in such a way that 54// multi-byte UTF-8 sequences are escaped. This property ensures that the 55// overall output is ASCII (as opposed to UTF-8). 56func NewEncoder(indent string, delims [2]byte, outputASCII bool) (*Encoder, error) { 57 e := &Encoder{} 58 if len(indent) > 0 { 59 if strings.Trim(indent, " \t") != "" { 60 return nil, errors.New("indent may only be composed of space and tab characters") 61 } 62 e.indent = indent 63 } 64 switch delims { 65 case [2]byte{0, 0}: 66 e.delims = [2]byte{'{', '}'} 67 case [2]byte{'{', '}'}, [2]byte{'<', '>'}: 68 e.delims = delims 69 default: 70 return nil, errors.New("delimiters may only be \"{}\" or \"<>\"") 71 } 72 e.outputASCII = outputASCII 73 74 return e, nil 75} 76 77// Bytes returns the content of the written bytes. 78func (e *Encoder) Bytes() []byte { 79 return e.out 80} 81 82// StartMessage writes out the '{' or '<' symbol. 83func (e *Encoder) StartMessage() { 84 e.prepareNext(messageOpen) 85 e.out = append(e.out, e.delims[0]) 86} 87 88// EndMessage writes out the '}' or '>' symbol. 89func (e *Encoder) EndMessage() { 90 e.prepareNext(messageClose) 91 e.out = append(e.out, e.delims[1]) 92} 93 94// WriteName writes out the field name and the separator ':'. 95func (e *Encoder) WriteName(s string) { 96 e.prepareNext(name) 97 e.out = append(e.out, s...) 98 e.out = append(e.out, ':') 99} 100 101// WriteBool writes out the given boolean value. 102func (e *Encoder) WriteBool(b bool) { 103 if b { 104 e.WriteLiteral("true") 105 } else { 106 e.WriteLiteral("false") 107 } 108} 109 110// WriteString writes out the given string value. 111func (e *Encoder) WriteString(s string) { 112 e.prepareNext(scalar) 113 e.out = appendString(e.out, s, e.outputASCII) 114} 115 116func appendString(out []byte, in string, outputASCII bool) []byte { 117 out = append(out, '"') 118 i := indexNeedEscapeInString(in) 119 in, out = in[i:], append(out, in[:i]...) 120 for len(in) > 0 { 121 switch r, n := utf8.DecodeRuneInString(in); { 122 case r == utf8.RuneError && n == 1: 123 // We do not report invalid UTF-8 because strings in the text format 124 // are used to represent both the proto string and bytes type. 125 r = rune(in[0]) 126 fallthrough 127 case r < ' ' || r == '"' || r == '\\' || r == 0x7f: 128 out = append(out, '\\') 129 switch r { 130 case '"', '\\': 131 out = append(out, byte(r)) 132 case '\n': 133 out = append(out, 'n') 134 case '\r': 135 out = append(out, 'r') 136 case '\t': 137 out = append(out, 't') 138 default: 139 out = append(out, 'x') 140 out = append(out, "00"[1+(bits.Len32(uint32(r))-1)/4:]...) 141 out = strconv.AppendUint(out, uint64(r), 16) 142 } 143 in = in[n:] 144 case r >= utf8.RuneSelf && (outputASCII || r <= 0x009f): 145 out = append(out, '\\') 146 if r <= math.MaxUint16 { 147 out = append(out, 'u') 148 out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...) 149 out = strconv.AppendUint(out, uint64(r), 16) 150 } else { 151 out = append(out, 'U') 152 out = append(out, "00000000"[1+(bits.Len32(uint32(r))-1)/4:]...) 153 out = strconv.AppendUint(out, uint64(r), 16) 154 } 155 in = in[n:] 156 default: 157 i := indexNeedEscapeInString(in[n:]) 158 in, out = in[n+i:], append(out, in[:n+i]...) 159 } 160 } 161 out = append(out, '"') 162 return out 163} 164 165// indexNeedEscapeInString returns the index of the character that needs 166// escaping. If no characters need escaping, this returns the input length. 167func indexNeedEscapeInString(s string) int { 168 for i := 0; i < len(s); i++ { 169 if c := s[i]; c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= 0x7f { 170 return i 171 } 172 } 173 return len(s) 174} 175 176// WriteFloat writes out the given float value for given bitSize. 177func (e *Encoder) WriteFloat(n float64, bitSize int) { 178 e.prepareNext(scalar) 179 e.out = appendFloat(e.out, n, bitSize) 180} 181 182func appendFloat(out []byte, n float64, bitSize int) []byte { 183 switch { 184 case math.IsNaN(n): 185 return append(out, "nan"...) 186 case math.IsInf(n, +1): 187 return append(out, "inf"...) 188 case math.IsInf(n, -1): 189 return append(out, "-inf"...) 190 default: 191 return strconv.AppendFloat(out, n, 'g', -1, bitSize) 192 } 193} 194 195// WriteInt writes out the given signed integer value. 196func (e *Encoder) WriteInt(n int64) { 197 e.prepareNext(scalar) 198 e.out = append(e.out, strconv.FormatInt(n, 10)...) 199} 200 201// WriteUint writes out the given unsigned integer value. 202func (e *Encoder) WriteUint(n uint64) { 203 e.prepareNext(scalar) 204 e.out = append(e.out, strconv.FormatUint(n, 10)...) 205} 206 207// WriteLiteral writes out the given string as a literal value without quotes. 208// This is used for writing enum literal strings. 209func (e *Encoder) WriteLiteral(s string) { 210 e.prepareNext(scalar) 211 e.out = append(e.out, s...) 212} 213 214// prepareNext adds possible space and indentation for the next value based 215// on last encType and indent option. It also updates e.lastType to next. 216func (e *Encoder) prepareNext(next encType) { 217 defer func() { 218 e.lastType = next 219 }() 220 221 // Single line. 222 if len(e.indent) == 0 { 223 // Add space after each field before the next one. 224 if e.lastType&(scalar|messageClose) != 0 && next == name { 225 e.out = append(e.out, ' ') 226 // Add a random extra space to make output unstable. 227 if detrand.Bool() { 228 e.out = append(e.out, ' ') 229 } 230 } 231 return 232 } 233 234 // Multi-line. 235 switch { 236 case e.lastType == name: 237 e.out = append(e.out, ' ') 238 // Add a random extra space after name: to make output unstable. 239 if detrand.Bool() { 240 e.out = append(e.out, ' ') 241 } 242 243 case e.lastType == messageOpen && next != messageClose: 244 e.indents = append(e.indents, e.indent...) 245 e.out = append(e.out, '\n') 246 e.out = append(e.out, e.indents...) 247 248 case e.lastType&(scalar|messageClose) != 0: 249 if next == messageClose { 250 e.indents = e.indents[:len(e.indents)-len(e.indent)] 251 } 252 e.out = append(e.out, '\n') 253 e.out = append(e.out, e.indents...) 254 } 255} 256 257// Snapshot returns the current snapshot for use in Reset. 258func (e *Encoder) Snapshot() encoderState { 259 return e.encoderState 260} 261 262// Reset resets the Encoder to the given encoderState from a Snapshot. 263func (e *Encoder) Reset(es encoderState) { 264 e.encoderState = es 265} 266 267// AppendString appends the escaped form of the input string to b. 268func AppendString(b []byte, s string) []byte { 269 return appendString(b, s, false) 270} 271