David K. Bainbridge | 528b318 | 2017-01-23 08:51:59 -0800 | [diff] [blame] | 1 | // Copyright 2010 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package json |
| 6 | |
| 7 | import ( |
| 8 | "bytes" |
| 9 | "errors" |
| 10 | "io" |
| 11 | ) |
| 12 | |
| 13 | // A Decoder reads and decodes JSON values from an input stream. |
| 14 | type Decoder struct { |
| 15 | r io.Reader |
| 16 | buf []byte |
| 17 | d decodeState |
| 18 | scanp int // start of unread data in buf |
| 19 | scan scanner |
| 20 | err error |
| 21 | |
| 22 | tokenState int |
| 23 | tokenStack []int |
| 24 | } |
| 25 | |
| 26 | // NewDecoder returns a new decoder that reads from r. |
| 27 | // |
| 28 | // The decoder introduces its own buffering and may |
| 29 | // read data from r beyond the JSON values requested. |
| 30 | func NewDecoder(r io.Reader) *Decoder { |
| 31 | return &Decoder{r: r} |
| 32 | } |
| 33 | |
| 34 | // UseNumber causes the Decoder to unmarshal a number into an interface{} as a |
| 35 | // Number instead of as a float64. |
| 36 | func (dec *Decoder) UseNumber() { dec.d.useNumber = true } |
| 37 | |
| 38 | // Decode reads the next JSON-encoded value from its |
| 39 | // input and stores it in the value pointed to by v. |
| 40 | // |
| 41 | // See the documentation for Unmarshal for details about |
| 42 | // the conversion of JSON into a Go value. |
| 43 | func (dec *Decoder) Decode(v interface{}) error { |
| 44 | if dec.err != nil { |
| 45 | return dec.err |
| 46 | } |
| 47 | |
| 48 | if err := dec.tokenPrepareForDecode(); err != nil { |
| 49 | return err |
| 50 | } |
| 51 | |
| 52 | if !dec.tokenValueAllowed() { |
| 53 | return &SyntaxError{msg: "not at beginning of value"} |
| 54 | } |
| 55 | |
| 56 | // Read whole value into buffer. |
| 57 | n, err := dec.readValue() |
| 58 | if err != nil { |
| 59 | return err |
| 60 | } |
| 61 | dec.d.init(dec.buf[dec.scanp : dec.scanp+n]) |
| 62 | dec.scanp += n |
| 63 | |
| 64 | // Don't save err from unmarshal into dec.err: |
| 65 | // the connection is still usable since we read a complete JSON |
| 66 | // object from it before the error happened. |
| 67 | err = dec.d.unmarshal(v) |
| 68 | |
| 69 | // fixup token streaming state |
| 70 | dec.tokenValueEnd() |
| 71 | |
| 72 | return err |
| 73 | } |
| 74 | |
| 75 | // Buffered returns a reader of the data remaining in the Decoder's |
| 76 | // buffer. The reader is valid until the next call to Decode. |
| 77 | func (dec *Decoder) Buffered() io.Reader { |
| 78 | return bytes.NewReader(dec.buf[dec.scanp:]) |
| 79 | } |
| 80 | |
| 81 | // readValue reads a JSON value into dec.buf. |
| 82 | // It returns the length of the encoding. |
| 83 | func (dec *Decoder) readValue() (int, error) { |
| 84 | dec.scan.reset() |
| 85 | |
| 86 | scanp := dec.scanp |
| 87 | var err error |
| 88 | Input: |
| 89 | for { |
| 90 | // Look in the buffer for a new value. |
| 91 | for i, c := range dec.buf[scanp:] { |
| 92 | dec.scan.bytes++ |
| 93 | v := dec.scan.step(&dec.scan, c) |
| 94 | if v == scanEnd { |
| 95 | scanp += i |
| 96 | break Input |
| 97 | } |
| 98 | // scanEnd is delayed one byte. |
| 99 | // We might block trying to get that byte from src, |
| 100 | // so instead invent a space byte. |
| 101 | if (v == scanEndObject || v == scanEndArray) && dec.scan.step(&dec.scan, ' ') == scanEnd { |
| 102 | scanp += i + 1 |
| 103 | break Input |
| 104 | } |
| 105 | if v == scanError { |
| 106 | dec.err = dec.scan.err |
| 107 | return 0, dec.scan.err |
| 108 | } |
| 109 | } |
| 110 | scanp = len(dec.buf) |
| 111 | |
| 112 | // Did the last read have an error? |
| 113 | // Delayed until now to allow buffer scan. |
| 114 | if err != nil { |
| 115 | if err == io.EOF { |
| 116 | if dec.scan.step(&dec.scan, ' ') == scanEnd { |
| 117 | break Input |
| 118 | } |
| 119 | if nonSpace(dec.buf) { |
| 120 | err = io.ErrUnexpectedEOF |
| 121 | } |
| 122 | } |
| 123 | dec.err = err |
| 124 | return 0, err |
| 125 | } |
| 126 | |
| 127 | n := scanp - dec.scanp |
| 128 | err = dec.refill() |
| 129 | scanp = dec.scanp + n |
| 130 | } |
| 131 | return scanp - dec.scanp, nil |
| 132 | } |
| 133 | |
| 134 | func (dec *Decoder) refill() error { |
| 135 | // Make room to read more into the buffer. |
| 136 | // First slide down data already consumed. |
| 137 | if dec.scanp > 0 { |
| 138 | n := copy(dec.buf, dec.buf[dec.scanp:]) |
| 139 | dec.buf = dec.buf[:n] |
| 140 | dec.scanp = 0 |
| 141 | } |
| 142 | |
| 143 | // Grow buffer if not large enough. |
| 144 | const minRead = 512 |
| 145 | if cap(dec.buf)-len(dec.buf) < minRead { |
| 146 | newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead) |
| 147 | copy(newBuf, dec.buf) |
| 148 | dec.buf = newBuf |
| 149 | } |
| 150 | |
| 151 | // Read. Delay error for next iteration (after scan). |
| 152 | n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)]) |
| 153 | dec.buf = dec.buf[0 : len(dec.buf)+n] |
| 154 | |
| 155 | return err |
| 156 | } |
| 157 | |
| 158 | func nonSpace(b []byte) bool { |
| 159 | for _, c := range b { |
| 160 | if !isSpace(c) { |
| 161 | return true |
| 162 | } |
| 163 | } |
| 164 | return false |
| 165 | } |
| 166 | |
| 167 | // An Encoder writes JSON values to an output stream. |
| 168 | type Encoder struct { |
| 169 | w io.Writer |
| 170 | err error |
| 171 | escapeHTML bool |
| 172 | |
| 173 | indentBuf *bytes.Buffer |
| 174 | indentPrefix string |
| 175 | indentValue string |
| 176 | |
| 177 | ext Extension |
| 178 | } |
| 179 | |
| 180 | // NewEncoder returns a new encoder that writes to w. |
| 181 | func NewEncoder(w io.Writer) *Encoder { |
| 182 | return &Encoder{w: w, escapeHTML: true} |
| 183 | } |
| 184 | |
| 185 | // Encode writes the JSON encoding of v to the stream, |
| 186 | // followed by a newline character. |
| 187 | // |
| 188 | // See the documentation for Marshal for details about the |
| 189 | // conversion of Go values to JSON. |
| 190 | func (enc *Encoder) Encode(v interface{}) error { |
| 191 | if enc.err != nil { |
| 192 | return enc.err |
| 193 | } |
| 194 | e := newEncodeState() |
| 195 | e.ext = enc.ext |
| 196 | err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML}) |
| 197 | if err != nil { |
| 198 | return err |
| 199 | } |
| 200 | |
| 201 | // Terminate each value with a newline. |
| 202 | // This makes the output look a little nicer |
| 203 | // when debugging, and some kind of space |
| 204 | // is required if the encoded value was a number, |
| 205 | // so that the reader knows there aren't more |
| 206 | // digits coming. |
| 207 | e.WriteByte('\n') |
| 208 | |
| 209 | b := e.Bytes() |
| 210 | if enc.indentBuf != nil { |
| 211 | enc.indentBuf.Reset() |
| 212 | err = Indent(enc.indentBuf, b, enc.indentPrefix, enc.indentValue) |
| 213 | if err != nil { |
| 214 | return err |
| 215 | } |
| 216 | b = enc.indentBuf.Bytes() |
| 217 | } |
| 218 | if _, err = enc.w.Write(b); err != nil { |
| 219 | enc.err = err |
| 220 | } |
| 221 | encodeStatePool.Put(e) |
| 222 | return err |
| 223 | } |
| 224 | |
| 225 | // Indent sets the encoder to format each encoded value with Indent. |
| 226 | func (enc *Encoder) Indent(prefix, indent string) { |
| 227 | enc.indentBuf = new(bytes.Buffer) |
| 228 | enc.indentPrefix = prefix |
| 229 | enc.indentValue = indent |
| 230 | } |
| 231 | |
| 232 | // DisableHTMLEscaping causes the encoder not to escape angle brackets |
| 233 | // ("<" and ">") or ampersands ("&") in JSON strings. |
| 234 | func (enc *Encoder) DisableHTMLEscaping() { |
| 235 | enc.escapeHTML = false |
| 236 | } |
| 237 | |
| 238 | // RawMessage is a raw encoded JSON value. |
| 239 | // It implements Marshaler and Unmarshaler and can |
| 240 | // be used to delay JSON decoding or precompute a JSON encoding. |
| 241 | type RawMessage []byte |
| 242 | |
| 243 | // MarshalJSON returns *m as the JSON encoding of m. |
| 244 | func (m *RawMessage) MarshalJSON() ([]byte, error) { |
| 245 | return *m, nil |
| 246 | } |
| 247 | |
| 248 | // UnmarshalJSON sets *m to a copy of data. |
| 249 | func (m *RawMessage) UnmarshalJSON(data []byte) error { |
| 250 | if m == nil { |
| 251 | return errors.New("json.RawMessage: UnmarshalJSON on nil pointer") |
| 252 | } |
| 253 | *m = append((*m)[0:0], data...) |
| 254 | return nil |
| 255 | } |
| 256 | |
| 257 | var _ Marshaler = (*RawMessage)(nil) |
| 258 | var _ Unmarshaler = (*RawMessage)(nil) |
| 259 | |
| 260 | // A Token holds a value of one of these types: |
| 261 | // |
| 262 | // Delim, for the four JSON delimiters [ ] { } |
| 263 | // bool, for JSON booleans |
| 264 | // float64, for JSON numbers |
| 265 | // Number, for JSON numbers |
| 266 | // string, for JSON string literals |
| 267 | // nil, for JSON null |
| 268 | // |
| 269 | type Token interface{} |
| 270 | |
| 271 | const ( |
| 272 | tokenTopValue = iota |
| 273 | tokenArrayStart |
| 274 | tokenArrayValue |
| 275 | tokenArrayComma |
| 276 | tokenObjectStart |
| 277 | tokenObjectKey |
| 278 | tokenObjectColon |
| 279 | tokenObjectValue |
| 280 | tokenObjectComma |
| 281 | ) |
| 282 | |
| 283 | // advance tokenstate from a separator state to a value state |
| 284 | func (dec *Decoder) tokenPrepareForDecode() error { |
| 285 | // Note: Not calling peek before switch, to avoid |
| 286 | // putting peek into the standard Decode path. |
| 287 | // peek is only called when using the Token API. |
| 288 | switch dec.tokenState { |
| 289 | case tokenArrayComma: |
| 290 | c, err := dec.peek() |
| 291 | if err != nil { |
| 292 | return err |
| 293 | } |
| 294 | if c != ',' { |
| 295 | return &SyntaxError{"expected comma after array element", 0} |
| 296 | } |
| 297 | dec.scanp++ |
| 298 | dec.tokenState = tokenArrayValue |
| 299 | case tokenObjectColon: |
| 300 | c, err := dec.peek() |
| 301 | if err != nil { |
| 302 | return err |
| 303 | } |
| 304 | if c != ':' { |
| 305 | return &SyntaxError{"expected colon after object key", 0} |
| 306 | } |
| 307 | dec.scanp++ |
| 308 | dec.tokenState = tokenObjectValue |
| 309 | } |
| 310 | return nil |
| 311 | } |
| 312 | |
| 313 | func (dec *Decoder) tokenValueAllowed() bool { |
| 314 | switch dec.tokenState { |
| 315 | case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue: |
| 316 | return true |
| 317 | } |
| 318 | return false |
| 319 | } |
| 320 | |
| 321 | func (dec *Decoder) tokenValueEnd() { |
| 322 | switch dec.tokenState { |
| 323 | case tokenArrayStart, tokenArrayValue: |
| 324 | dec.tokenState = tokenArrayComma |
| 325 | case tokenObjectValue: |
| 326 | dec.tokenState = tokenObjectComma |
| 327 | } |
| 328 | } |
| 329 | |
| 330 | // A Delim is a JSON array or object delimiter, one of [ ] { or }. |
| 331 | type Delim rune |
| 332 | |
| 333 | func (d Delim) String() string { |
| 334 | return string(d) |
| 335 | } |
| 336 | |
| 337 | // Token returns the next JSON token in the input stream. |
| 338 | // At the end of the input stream, Token returns nil, io.EOF. |
| 339 | // |
| 340 | // Token guarantees that the delimiters [ ] { } it returns are |
| 341 | // properly nested and matched: if Token encounters an unexpected |
| 342 | // delimiter in the input, it will return an error. |
| 343 | // |
| 344 | // The input stream consists of basic JSON values—bool, string, |
| 345 | // number, and null—along with delimiters [ ] { } of type Delim |
| 346 | // to mark the start and end of arrays and objects. |
| 347 | // Commas and colons are elided. |
| 348 | func (dec *Decoder) Token() (Token, error) { |
| 349 | for { |
| 350 | c, err := dec.peek() |
| 351 | if err != nil { |
| 352 | return nil, err |
| 353 | } |
| 354 | switch c { |
| 355 | case '[': |
| 356 | if !dec.tokenValueAllowed() { |
| 357 | return dec.tokenError(c) |
| 358 | } |
| 359 | dec.scanp++ |
| 360 | dec.tokenStack = append(dec.tokenStack, dec.tokenState) |
| 361 | dec.tokenState = tokenArrayStart |
| 362 | return Delim('['), nil |
| 363 | |
| 364 | case ']': |
| 365 | if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma { |
| 366 | return dec.tokenError(c) |
| 367 | } |
| 368 | dec.scanp++ |
| 369 | dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] |
| 370 | dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] |
| 371 | dec.tokenValueEnd() |
| 372 | return Delim(']'), nil |
| 373 | |
| 374 | case '{': |
| 375 | if !dec.tokenValueAllowed() { |
| 376 | return dec.tokenError(c) |
| 377 | } |
| 378 | dec.scanp++ |
| 379 | dec.tokenStack = append(dec.tokenStack, dec.tokenState) |
| 380 | dec.tokenState = tokenObjectStart |
| 381 | return Delim('{'), nil |
| 382 | |
| 383 | case '}': |
| 384 | if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma { |
| 385 | return dec.tokenError(c) |
| 386 | } |
| 387 | dec.scanp++ |
| 388 | dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] |
| 389 | dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] |
| 390 | dec.tokenValueEnd() |
| 391 | return Delim('}'), nil |
| 392 | |
| 393 | case ':': |
| 394 | if dec.tokenState != tokenObjectColon { |
| 395 | return dec.tokenError(c) |
| 396 | } |
| 397 | dec.scanp++ |
| 398 | dec.tokenState = tokenObjectValue |
| 399 | continue |
| 400 | |
| 401 | case ',': |
| 402 | if dec.tokenState == tokenArrayComma { |
| 403 | dec.scanp++ |
| 404 | dec.tokenState = tokenArrayValue |
| 405 | continue |
| 406 | } |
| 407 | if dec.tokenState == tokenObjectComma { |
| 408 | dec.scanp++ |
| 409 | dec.tokenState = tokenObjectKey |
| 410 | continue |
| 411 | } |
| 412 | return dec.tokenError(c) |
| 413 | |
| 414 | case '"': |
| 415 | if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey { |
| 416 | var x string |
| 417 | old := dec.tokenState |
| 418 | dec.tokenState = tokenTopValue |
| 419 | err := dec.Decode(&x) |
| 420 | dec.tokenState = old |
| 421 | if err != nil { |
| 422 | clearOffset(err) |
| 423 | return nil, err |
| 424 | } |
| 425 | dec.tokenState = tokenObjectColon |
| 426 | return x, nil |
| 427 | } |
| 428 | fallthrough |
| 429 | |
| 430 | default: |
| 431 | if !dec.tokenValueAllowed() { |
| 432 | return dec.tokenError(c) |
| 433 | } |
| 434 | var x interface{} |
| 435 | if err := dec.Decode(&x); err != nil { |
| 436 | clearOffset(err) |
| 437 | return nil, err |
| 438 | } |
| 439 | return x, nil |
| 440 | } |
| 441 | } |
| 442 | } |
| 443 | |
| 444 | func clearOffset(err error) { |
| 445 | if s, ok := err.(*SyntaxError); ok { |
| 446 | s.Offset = 0 |
| 447 | } |
| 448 | } |
| 449 | |
| 450 | func (dec *Decoder) tokenError(c byte) (Token, error) { |
| 451 | var context string |
| 452 | switch dec.tokenState { |
| 453 | case tokenTopValue: |
| 454 | context = " looking for beginning of value" |
| 455 | case tokenArrayStart, tokenArrayValue, tokenObjectValue: |
| 456 | context = " looking for beginning of value" |
| 457 | case tokenArrayComma: |
| 458 | context = " after array element" |
| 459 | case tokenObjectKey: |
| 460 | context = " looking for beginning of object key string" |
| 461 | case tokenObjectColon: |
| 462 | context = " after object key" |
| 463 | case tokenObjectComma: |
| 464 | context = " after object key:value pair" |
| 465 | } |
| 466 | return nil, &SyntaxError{"invalid character " + quoteChar(c) + " " + context, 0} |
| 467 | } |
| 468 | |
| 469 | // More reports whether there is another element in the |
| 470 | // current array or object being parsed. |
| 471 | func (dec *Decoder) More() bool { |
| 472 | c, err := dec.peek() |
| 473 | return err == nil && c != ']' && c != '}' |
| 474 | } |
| 475 | |
| 476 | func (dec *Decoder) peek() (byte, error) { |
| 477 | var err error |
| 478 | for { |
| 479 | for i := dec.scanp; i < len(dec.buf); i++ { |
| 480 | c := dec.buf[i] |
| 481 | if isSpace(c) { |
| 482 | continue |
| 483 | } |
| 484 | dec.scanp = i |
| 485 | return c, nil |
| 486 | } |
| 487 | // buffer has been scanned, now report any error |
| 488 | if err != nil { |
| 489 | return 0, err |
| 490 | } |
| 491 | err = dec.refill() |
| 492 | } |
| 493 | } |
| 494 | |
| 495 | /* |
| 496 | TODO |
| 497 | |
| 498 | // EncodeToken writes the given JSON token to the stream. |
| 499 | // It returns an error if the delimiters [ ] { } are not properly used. |
| 500 | // |
| 501 | // EncodeToken does not call Flush, because usually it is part of |
| 502 | // a larger operation such as Encode, and those will call Flush when finished. |
| 503 | // Callers that create an Encoder and then invoke EncodeToken directly, |
| 504 | // without using Encode, need to call Flush when finished to ensure that |
| 505 | // the JSON is written to the underlying writer. |
| 506 | func (e *Encoder) EncodeToken(t Token) error { |
| 507 | ... |
| 508 | } |
| 509 | |
| 510 | */ |