1// Copyright 2010 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package json 6 7import ( 8 "bytes" 9 "errors" 10 "io" 11) 12 13// A Decoder reads and decodes JSON values from an input stream. 14type Decoder struct { 15 r io.Reader 16 buf []byte 17 d decodeState 18 scanp int // start of unread data in buf 19 scanned int64 // amount of data already scanned 20 scan scanner 21 err error 22 23 tokenState int 24 tokenStack []int 25} 26 27// NewDecoder returns a new decoder that reads from r. 28// 29// The decoder introduces its own buffering and may 30// read data from r beyond the JSON values requested. 31func NewDecoder(r io.Reader) *Decoder { 32 return &Decoder{r: r} 33} 34 35// UseNumber causes the Decoder to unmarshal a number into an interface{} as a 36// [Number] instead of as a float64. 37func (dec *Decoder) UseNumber() { dec.d.useNumber = true } 38 39// DisallowUnknownFields causes the Decoder to return an error when the destination 40// is a struct and the input contains object keys which do not match any 41// non-ignored, exported fields in the destination. 42func (dec *Decoder) DisallowUnknownFields() { dec.d.disallowUnknownFields = true } 43 44// Decode reads the next JSON-encoded value from its 45// input and stores it in the value pointed to by v. 46// 47// See the documentation for [Unmarshal] for details about 48// the conversion of JSON into a Go value. 49func (dec *Decoder) Decode(v any) error { 50 if dec.err != nil { 51 return dec.err 52 } 53 54 if err := dec.tokenPrepareForDecode(); err != nil { 55 return err 56 } 57 58 if !dec.tokenValueAllowed() { 59 return &SyntaxError{msg: "not at beginning of value", Offset: dec.InputOffset()} 60 } 61 62 // Read whole value into buffer. 63 n, err := dec.readValue() 64 if err != nil { 65 return err 66 } 67 dec.d.init(dec.buf[dec.scanp : dec.scanp+n]) 68 dec.scanp += n 69 70 // Don't save err from unmarshal into dec.err: 71 // the connection is still usable since we read a complete JSON 72 // object from it before the error happened. 73 err = dec.d.unmarshal(v) 74 75 // fixup token streaming state 76 dec.tokenValueEnd() 77 78 return err 79} 80 81// Buffered returns a reader of the data remaining in the Decoder's 82// buffer. The reader is valid until the next call to [Decoder.Decode]. 83func (dec *Decoder) Buffered() io.Reader { 84 return bytes.NewReader(dec.buf[dec.scanp:]) 85} 86 87// readValue reads a JSON value into dec.buf. 88// It returns the length of the encoding. 89func (dec *Decoder) readValue() (int, error) { 90 dec.scan.reset() 91 92 scanp := dec.scanp 93 var err error 94Input: 95 // help the compiler see that scanp is never negative, so it can remove 96 // some bounds checks below. 97 for scanp >= 0 { 98 99 // Look in the buffer for a new value. 100 for ; scanp < len(dec.buf); scanp++ { 101 c := dec.buf[scanp] 102 dec.scan.bytes++ 103 switch dec.scan.step(&dec.scan, c) { 104 case scanEnd: 105 // scanEnd is delayed one byte so we decrement 106 // the scanner bytes count by 1 to ensure that 107 // this value is correct in the next call of Decode. 108 dec.scan.bytes-- 109 break Input 110 case scanEndObject, scanEndArray: 111 // scanEnd is delayed one byte. 112 // We might block trying to get that byte from src, 113 // so instead invent a space byte. 114 if stateEndValue(&dec.scan, ' ') == scanEnd { 115 scanp++ 116 break Input 117 } 118 case scanError: 119 dec.err = dec.scan.err 120 return 0, dec.scan.err 121 } 122 } 123 124 // Did the last read have an error? 125 // Delayed until now to allow buffer scan. 126 if err != nil { 127 if err == io.EOF { 128 if dec.scan.step(&dec.scan, ' ') == scanEnd { 129 break Input 130 } 131 if nonSpace(dec.buf) { 132 err = io.ErrUnexpectedEOF 133 } 134 } 135 dec.err = err 136 return 0, err 137 } 138 139 n := scanp - dec.scanp 140 err = dec.refill() 141 scanp = dec.scanp + n 142 } 143 return scanp - dec.scanp, nil 144} 145 146func (dec *Decoder) refill() error { 147 // Make room to read more into the buffer. 148 // First slide down data already consumed. 149 if dec.scanp > 0 { 150 dec.scanned += int64(dec.scanp) 151 n := copy(dec.buf, dec.buf[dec.scanp:]) 152 dec.buf = dec.buf[:n] 153 dec.scanp = 0 154 } 155 156 // Grow buffer if not large enough. 157 const minRead = 512 158 if cap(dec.buf)-len(dec.buf) < minRead { 159 newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead) 160 copy(newBuf, dec.buf) 161 dec.buf = newBuf 162 } 163 164 // Read. Delay error for next iteration (after scan). 165 n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)]) 166 dec.buf = dec.buf[0 : len(dec.buf)+n] 167 168 return err 169} 170 171func nonSpace(b []byte) bool { 172 for _, c := range b { 173 if !isSpace(c) { 174 return true 175 } 176 } 177 return false 178} 179 180// An Encoder writes JSON values to an output stream. 181type Encoder struct { 182 w io.Writer 183 err error 184 escapeHTML bool 185 186 indentBuf []byte 187 indentPrefix string 188 indentValue string 189} 190 191// NewEncoder returns a new encoder that writes to w. 192func NewEncoder(w io.Writer) *Encoder { 193 return &Encoder{w: w, escapeHTML: true} 194} 195 196// Encode writes the JSON encoding of v to the stream, 197// with insignificant space characters elided, 198// followed by a newline character. 199// 200// See the documentation for [Marshal] for details about the 201// conversion of Go values to JSON. 202func (enc *Encoder) Encode(v any) error { 203 if enc.err != nil { 204 return enc.err 205 } 206 207 e := newEncodeState() 208 defer encodeStatePool.Put(e) 209 210 err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML}) 211 if err != nil { 212 return err 213 } 214 215 // Terminate each value with a newline. 216 // This makes the output look a little nicer 217 // when debugging, and some kind of space 218 // is required if the encoded value was a number, 219 // so that the reader knows there aren't more 220 // digits coming. 221 e.WriteByte('\n') 222 223 b := e.Bytes() 224 if enc.indentPrefix != "" || enc.indentValue != "" { 225 enc.indentBuf, err = appendIndent(enc.indentBuf[:0], b, enc.indentPrefix, enc.indentValue) 226 if err != nil { 227 return err 228 } 229 b = enc.indentBuf 230 } 231 if _, err = enc.w.Write(b); err != nil { 232 enc.err = err 233 } 234 return err 235} 236 237// SetIndent instructs the encoder to format each subsequent encoded 238// value as if indented by the package-level function Indent(dst, src, prefix, indent). 239// Calling SetIndent("", "") disables indentation. 240func (enc *Encoder) SetIndent(prefix, indent string) { 241 enc.indentPrefix = prefix 242 enc.indentValue = indent 243} 244 245// SetEscapeHTML specifies whether problematic HTML characters 246// should be escaped inside JSON quoted strings. 247// The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e 248// to avoid certain safety problems that can arise when embedding JSON in HTML. 249// 250// In non-HTML settings where the escaping interferes with the readability 251// of the output, SetEscapeHTML(false) disables this behavior. 252func (enc *Encoder) SetEscapeHTML(on bool) { 253 enc.escapeHTML = on 254} 255 256// RawMessage is a raw encoded JSON value. 257// It implements [Marshaler] and [Unmarshaler] and can 258// be used to delay JSON decoding or precompute a JSON encoding. 259type RawMessage []byte 260 261// MarshalJSON returns m as the JSON encoding of m. 262func (m RawMessage) MarshalJSON() ([]byte, error) { 263 if m == nil { 264 return []byte("null"), nil 265 } 266 return m, nil 267} 268 269// UnmarshalJSON sets *m to a copy of data. 270func (m *RawMessage) UnmarshalJSON(data []byte) error { 271 if m == nil { 272 return errors.New("json.RawMessage: UnmarshalJSON on nil pointer") 273 } 274 *m = append((*m)[0:0], data...) 275 return nil 276} 277 278var _ Marshaler = (*RawMessage)(nil) 279var _ Unmarshaler = (*RawMessage)(nil) 280 281// A Token holds a value of one of these types: 282// 283// - [Delim], for the four JSON delimiters [ ] { } 284// - bool, for JSON booleans 285// - float64, for JSON numbers 286// - [Number], for JSON numbers 287// - string, for JSON string literals 288// - nil, for JSON null 289type Token any 290 291const ( 292 tokenTopValue = iota 293 tokenArrayStart 294 tokenArrayValue 295 tokenArrayComma 296 tokenObjectStart 297 tokenObjectKey 298 tokenObjectColon 299 tokenObjectValue 300 tokenObjectComma 301) 302 303// advance tokenstate from a separator state to a value state 304func (dec *Decoder) tokenPrepareForDecode() error { 305 // Note: Not calling peek before switch, to avoid 306 // putting peek into the standard Decode path. 307 // peek is only called when using the Token API. 308 switch dec.tokenState { 309 case tokenArrayComma: 310 c, err := dec.peek() 311 if err != nil { 312 return err 313 } 314 if c != ',' { 315 return &SyntaxError{"expected comma after array element", dec.InputOffset()} 316 } 317 dec.scanp++ 318 dec.tokenState = tokenArrayValue 319 case tokenObjectColon: 320 c, err := dec.peek() 321 if err != nil { 322 return err 323 } 324 if c != ':' { 325 return &SyntaxError{"expected colon after object key", dec.InputOffset()} 326 } 327 dec.scanp++ 328 dec.tokenState = tokenObjectValue 329 } 330 return nil 331} 332 333func (dec *Decoder) tokenValueAllowed() bool { 334 switch dec.tokenState { 335 case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue: 336 return true 337 } 338 return false 339} 340 341func (dec *Decoder) tokenValueEnd() { 342 switch dec.tokenState { 343 case tokenArrayStart, tokenArrayValue: 344 dec.tokenState = tokenArrayComma 345 case tokenObjectValue: 346 dec.tokenState = tokenObjectComma 347 } 348} 349 350// A Delim is a JSON array or object delimiter, one of [ ] { or }. 351type Delim rune 352 353func (d Delim) String() string { 354 return string(d) 355} 356 357// Token returns the next JSON token in the input stream. 358// At the end of the input stream, Token returns nil, [io.EOF]. 359// 360// Token guarantees that the delimiters [ ] { } it returns are 361// properly nested and matched: if Token encounters an unexpected 362// delimiter in the input, it will return an error. 363// 364// The input stream consists of basic JSON values—bool, string, 365// number, and null—along with delimiters [ ] { } of type [Delim] 366// to mark the start and end of arrays and objects. 367// Commas and colons are elided. 368func (dec *Decoder) Token() (Token, error) { 369 for { 370 c, err := dec.peek() 371 if err != nil { 372 return nil, err 373 } 374 switch c { 375 case '[': 376 if !dec.tokenValueAllowed() { 377 return dec.tokenError(c) 378 } 379 dec.scanp++ 380 dec.tokenStack = append(dec.tokenStack, dec.tokenState) 381 dec.tokenState = tokenArrayStart 382 return Delim('['), nil 383 384 case ']': 385 if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma { 386 return dec.tokenError(c) 387 } 388 dec.scanp++ 389 dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] 390 dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] 391 dec.tokenValueEnd() 392 return Delim(']'), nil 393 394 case '{': 395 if !dec.tokenValueAllowed() { 396 return dec.tokenError(c) 397 } 398 dec.scanp++ 399 dec.tokenStack = append(dec.tokenStack, dec.tokenState) 400 dec.tokenState = tokenObjectStart 401 return Delim('{'), nil 402 403 case '}': 404 if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma { 405 return dec.tokenError(c) 406 } 407 dec.scanp++ 408 dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] 409 dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] 410 dec.tokenValueEnd() 411 return Delim('}'), nil 412 413 case ':': 414 if dec.tokenState != tokenObjectColon { 415 return dec.tokenError(c) 416 } 417 dec.scanp++ 418 dec.tokenState = tokenObjectValue 419 continue 420 421 case ',': 422 if dec.tokenState == tokenArrayComma { 423 dec.scanp++ 424 dec.tokenState = tokenArrayValue 425 continue 426 } 427 if dec.tokenState == tokenObjectComma { 428 dec.scanp++ 429 dec.tokenState = tokenObjectKey 430 continue 431 } 432 return dec.tokenError(c) 433 434 case '"': 435 if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey { 436 var x string 437 old := dec.tokenState 438 dec.tokenState = tokenTopValue 439 err := dec.Decode(&x) 440 dec.tokenState = old 441 if err != nil { 442 return nil, err 443 } 444 dec.tokenState = tokenObjectColon 445 return x, nil 446 } 447 fallthrough 448 449 default: 450 if !dec.tokenValueAllowed() { 451 return dec.tokenError(c) 452 } 453 var x any 454 if err := dec.Decode(&x); err != nil { 455 return nil, err 456 } 457 return x, nil 458 } 459 } 460} 461 462func (dec *Decoder) tokenError(c byte) (Token, error) { 463 var context string 464 switch dec.tokenState { 465 case tokenTopValue: 466 context = " looking for beginning of value" 467 case tokenArrayStart, tokenArrayValue, tokenObjectValue: 468 context = " looking for beginning of value" 469 case tokenArrayComma: 470 context = " after array element" 471 case tokenObjectKey: 472 context = " looking for beginning of object key string" 473 case tokenObjectColon: 474 context = " after object key" 475 case tokenObjectComma: 476 context = " after object key:value pair" 477 } 478 return nil, &SyntaxError{"invalid character " + quoteChar(c) + context, dec.InputOffset()} 479} 480 481// More reports whether there is another element in the 482// current array or object being parsed. 483func (dec *Decoder) More() bool { 484 c, err := dec.peek() 485 return err == nil && c != ']' && c != '}' 486} 487 488func (dec *Decoder) peek() (byte, error) { 489 var err error 490 for { 491 for i := dec.scanp; i < len(dec.buf); i++ { 492 c := dec.buf[i] 493 if isSpace(c) { 494 continue 495 } 496 dec.scanp = i 497 return c, nil 498 } 499 // buffer has been scanned, now report any error 500 if err != nil { 501 return 0, err 502 } 503 err = dec.refill() 504 } 505} 506 507// InputOffset returns the input stream byte offset of the current decoder position. 508// The offset gives the location of the end of the most recently returned token 509// and the beginning of the next token. 510func (dec *Decoder) InputOffset() int64 { 511 return dec.scanned + int64(dec.scanp) 512} 513