blob: e023702b571274d2548cc81533d1567904a9bf18 [file] [log] [blame]
David K. Bainbridge528b3182017-01-23 08:51:59 -08001// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package json
6
7import (
8 "bytes"
9 "errors"
10 "io"
11)
12
13// A Decoder reads and decodes JSON values from an input stream.
14type Decoder struct {
15 r io.Reader
16 buf []byte
17 d decodeState
18 scanp int // start of unread data in buf
19 scan scanner
20 err error
21
22 tokenState int
23 tokenStack []int
24}
25
26// NewDecoder returns a new decoder that reads from r.
27//
28// The decoder introduces its own buffering and may
29// read data from r beyond the JSON values requested.
30func NewDecoder(r io.Reader) *Decoder {
31 return &Decoder{r: r}
32}
33
34// UseNumber causes the Decoder to unmarshal a number into an interface{} as a
35// Number instead of as a float64.
36func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
37
38// Decode reads the next JSON-encoded value from its
39// input and stores it in the value pointed to by v.
40//
41// See the documentation for Unmarshal for details about
42// the conversion of JSON into a Go value.
43func (dec *Decoder) Decode(v interface{}) error {
44 if dec.err != nil {
45 return dec.err
46 }
47
48 if err := dec.tokenPrepareForDecode(); err != nil {
49 return err
50 }
51
52 if !dec.tokenValueAllowed() {
53 return &SyntaxError{msg: "not at beginning of value"}
54 }
55
56 // Read whole value into buffer.
57 n, err := dec.readValue()
58 if err != nil {
59 return err
60 }
61 dec.d.init(dec.buf[dec.scanp : dec.scanp+n])
62 dec.scanp += n
63
64 // Don't save err from unmarshal into dec.err:
65 // the connection is still usable since we read a complete JSON
66 // object from it before the error happened.
67 err = dec.d.unmarshal(v)
68
69 // fixup token streaming state
70 dec.tokenValueEnd()
71
72 return err
73}
74
75// Buffered returns a reader of the data remaining in the Decoder's
76// buffer. The reader is valid until the next call to Decode.
77func (dec *Decoder) Buffered() io.Reader {
78 return bytes.NewReader(dec.buf[dec.scanp:])
79}
80
81// readValue reads a JSON value into dec.buf.
82// It returns the length of the encoding.
83func (dec *Decoder) readValue() (int, error) {
84 dec.scan.reset()
85
86 scanp := dec.scanp
87 var err error
88Input:
89 for {
90 // Look in the buffer for a new value.
91 for i, c := range dec.buf[scanp:] {
92 dec.scan.bytes++
93 v := dec.scan.step(&dec.scan, c)
94 if v == scanEnd {
95 scanp += i
96 break Input
97 }
98 // scanEnd is delayed one byte.
99 // We might block trying to get that byte from src,
100 // so instead invent a space byte.
101 if (v == scanEndObject || v == scanEndArray) && dec.scan.step(&dec.scan, ' ') == scanEnd {
102 scanp += i + 1
103 break Input
104 }
105 if v == scanError {
106 dec.err = dec.scan.err
107 return 0, dec.scan.err
108 }
109 }
110 scanp = len(dec.buf)
111
112 // Did the last read have an error?
113 // Delayed until now to allow buffer scan.
114 if err != nil {
115 if err == io.EOF {
116 if dec.scan.step(&dec.scan, ' ') == scanEnd {
117 break Input
118 }
119 if nonSpace(dec.buf) {
120 err = io.ErrUnexpectedEOF
121 }
122 }
123 dec.err = err
124 return 0, err
125 }
126
127 n := scanp - dec.scanp
128 err = dec.refill()
129 scanp = dec.scanp + n
130 }
131 return scanp - dec.scanp, nil
132}
133
134func (dec *Decoder) refill() error {
135 // Make room to read more into the buffer.
136 // First slide down data already consumed.
137 if dec.scanp > 0 {
138 n := copy(dec.buf, dec.buf[dec.scanp:])
139 dec.buf = dec.buf[:n]
140 dec.scanp = 0
141 }
142
143 // Grow buffer if not large enough.
144 const minRead = 512
145 if cap(dec.buf)-len(dec.buf) < minRead {
146 newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead)
147 copy(newBuf, dec.buf)
148 dec.buf = newBuf
149 }
150
151 // Read. Delay error for next iteration (after scan).
152 n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)])
153 dec.buf = dec.buf[0 : len(dec.buf)+n]
154
155 return err
156}
157
158func nonSpace(b []byte) bool {
159 for _, c := range b {
160 if !isSpace(c) {
161 return true
162 }
163 }
164 return false
165}
166
167// An Encoder writes JSON values to an output stream.
168type Encoder struct {
169 w io.Writer
170 err error
171 escapeHTML bool
172
173 indentBuf *bytes.Buffer
174 indentPrefix string
175 indentValue string
176
177 ext Extension
178}
179
180// NewEncoder returns a new encoder that writes to w.
181func NewEncoder(w io.Writer) *Encoder {
182 return &Encoder{w: w, escapeHTML: true}
183}
184
185// Encode writes the JSON encoding of v to the stream,
186// followed by a newline character.
187//
188// See the documentation for Marshal for details about the
189// conversion of Go values to JSON.
190func (enc *Encoder) Encode(v interface{}) error {
191 if enc.err != nil {
192 return enc.err
193 }
194 e := newEncodeState()
195 e.ext = enc.ext
196 err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML})
197 if err != nil {
198 return err
199 }
200
201 // Terminate each value with a newline.
202 // This makes the output look a little nicer
203 // when debugging, and some kind of space
204 // is required if the encoded value was a number,
205 // so that the reader knows there aren't more
206 // digits coming.
207 e.WriteByte('\n')
208
209 b := e.Bytes()
210 if enc.indentBuf != nil {
211 enc.indentBuf.Reset()
212 err = Indent(enc.indentBuf, b, enc.indentPrefix, enc.indentValue)
213 if err != nil {
214 return err
215 }
216 b = enc.indentBuf.Bytes()
217 }
218 if _, err = enc.w.Write(b); err != nil {
219 enc.err = err
220 }
221 encodeStatePool.Put(e)
222 return err
223}
224
225// Indent sets the encoder to format each encoded value with Indent.
226func (enc *Encoder) Indent(prefix, indent string) {
227 enc.indentBuf = new(bytes.Buffer)
228 enc.indentPrefix = prefix
229 enc.indentValue = indent
230}
231
232// DisableHTMLEscaping causes the encoder not to escape angle brackets
233// ("<" and ">") or ampersands ("&") in JSON strings.
234func (enc *Encoder) DisableHTMLEscaping() {
235 enc.escapeHTML = false
236}
237
238// RawMessage is a raw encoded JSON value.
239// It implements Marshaler and Unmarshaler and can
240// be used to delay JSON decoding or precompute a JSON encoding.
241type RawMessage []byte
242
243// MarshalJSON returns *m as the JSON encoding of m.
244func (m *RawMessage) MarshalJSON() ([]byte, error) {
245 return *m, nil
246}
247
248// UnmarshalJSON sets *m to a copy of data.
249func (m *RawMessage) UnmarshalJSON(data []byte) error {
250 if m == nil {
251 return errors.New("json.RawMessage: UnmarshalJSON on nil pointer")
252 }
253 *m = append((*m)[0:0], data...)
254 return nil
255}
256
257var _ Marshaler = (*RawMessage)(nil)
258var _ Unmarshaler = (*RawMessage)(nil)
259
260// A Token holds a value of one of these types:
261//
262// Delim, for the four JSON delimiters [ ] { }
263// bool, for JSON booleans
264// float64, for JSON numbers
265// Number, for JSON numbers
266// string, for JSON string literals
267// nil, for JSON null
268//
269type Token interface{}
270
271const (
272 tokenTopValue = iota
273 tokenArrayStart
274 tokenArrayValue
275 tokenArrayComma
276 tokenObjectStart
277 tokenObjectKey
278 tokenObjectColon
279 tokenObjectValue
280 tokenObjectComma
281)
282
283// advance tokenstate from a separator state to a value state
284func (dec *Decoder) tokenPrepareForDecode() error {
285 // Note: Not calling peek before switch, to avoid
286 // putting peek into the standard Decode path.
287 // peek is only called when using the Token API.
288 switch dec.tokenState {
289 case tokenArrayComma:
290 c, err := dec.peek()
291 if err != nil {
292 return err
293 }
294 if c != ',' {
295 return &SyntaxError{"expected comma after array element", 0}
296 }
297 dec.scanp++
298 dec.tokenState = tokenArrayValue
299 case tokenObjectColon:
300 c, err := dec.peek()
301 if err != nil {
302 return err
303 }
304 if c != ':' {
305 return &SyntaxError{"expected colon after object key", 0}
306 }
307 dec.scanp++
308 dec.tokenState = tokenObjectValue
309 }
310 return nil
311}
312
313func (dec *Decoder) tokenValueAllowed() bool {
314 switch dec.tokenState {
315 case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue:
316 return true
317 }
318 return false
319}
320
321func (dec *Decoder) tokenValueEnd() {
322 switch dec.tokenState {
323 case tokenArrayStart, tokenArrayValue:
324 dec.tokenState = tokenArrayComma
325 case tokenObjectValue:
326 dec.tokenState = tokenObjectComma
327 }
328}
329
330// A Delim is a JSON array or object delimiter, one of [ ] { or }.
331type Delim rune
332
333func (d Delim) String() string {
334 return string(d)
335}
336
337// Token returns the next JSON token in the input stream.
338// At the end of the input stream, Token returns nil, io.EOF.
339//
340// Token guarantees that the delimiters [ ] { } it returns are
341// properly nested and matched: if Token encounters an unexpected
342// delimiter in the input, it will return an error.
343//
344// The input stream consists of basic JSON values—bool, string,
345// number, and null—along with delimiters [ ] { } of type Delim
346// to mark the start and end of arrays and objects.
347// Commas and colons are elided.
348func (dec *Decoder) Token() (Token, error) {
349 for {
350 c, err := dec.peek()
351 if err != nil {
352 return nil, err
353 }
354 switch c {
355 case '[':
356 if !dec.tokenValueAllowed() {
357 return dec.tokenError(c)
358 }
359 dec.scanp++
360 dec.tokenStack = append(dec.tokenStack, dec.tokenState)
361 dec.tokenState = tokenArrayStart
362 return Delim('['), nil
363
364 case ']':
365 if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma {
366 return dec.tokenError(c)
367 }
368 dec.scanp++
369 dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
370 dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
371 dec.tokenValueEnd()
372 return Delim(']'), nil
373
374 case '{':
375 if !dec.tokenValueAllowed() {
376 return dec.tokenError(c)
377 }
378 dec.scanp++
379 dec.tokenStack = append(dec.tokenStack, dec.tokenState)
380 dec.tokenState = tokenObjectStart
381 return Delim('{'), nil
382
383 case '}':
384 if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma {
385 return dec.tokenError(c)
386 }
387 dec.scanp++
388 dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
389 dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
390 dec.tokenValueEnd()
391 return Delim('}'), nil
392
393 case ':':
394 if dec.tokenState != tokenObjectColon {
395 return dec.tokenError(c)
396 }
397 dec.scanp++
398 dec.tokenState = tokenObjectValue
399 continue
400
401 case ',':
402 if dec.tokenState == tokenArrayComma {
403 dec.scanp++
404 dec.tokenState = tokenArrayValue
405 continue
406 }
407 if dec.tokenState == tokenObjectComma {
408 dec.scanp++
409 dec.tokenState = tokenObjectKey
410 continue
411 }
412 return dec.tokenError(c)
413
414 case '"':
415 if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey {
416 var x string
417 old := dec.tokenState
418 dec.tokenState = tokenTopValue
419 err := dec.Decode(&x)
420 dec.tokenState = old
421 if err != nil {
422 clearOffset(err)
423 return nil, err
424 }
425 dec.tokenState = tokenObjectColon
426 return x, nil
427 }
428 fallthrough
429
430 default:
431 if !dec.tokenValueAllowed() {
432 return dec.tokenError(c)
433 }
434 var x interface{}
435 if err := dec.Decode(&x); err != nil {
436 clearOffset(err)
437 return nil, err
438 }
439 return x, nil
440 }
441 }
442}
443
444func clearOffset(err error) {
445 if s, ok := err.(*SyntaxError); ok {
446 s.Offset = 0
447 }
448}
449
450func (dec *Decoder) tokenError(c byte) (Token, error) {
451 var context string
452 switch dec.tokenState {
453 case tokenTopValue:
454 context = " looking for beginning of value"
455 case tokenArrayStart, tokenArrayValue, tokenObjectValue:
456 context = " looking for beginning of value"
457 case tokenArrayComma:
458 context = " after array element"
459 case tokenObjectKey:
460 context = " looking for beginning of object key string"
461 case tokenObjectColon:
462 context = " after object key"
463 case tokenObjectComma:
464 context = " after object key:value pair"
465 }
466 return nil, &SyntaxError{"invalid character " + quoteChar(c) + " " + context, 0}
467}
468
469// More reports whether there is another element in the
470// current array or object being parsed.
471func (dec *Decoder) More() bool {
472 c, err := dec.peek()
473 return err == nil && c != ']' && c != '}'
474}
475
476func (dec *Decoder) peek() (byte, error) {
477 var err error
478 for {
479 for i := dec.scanp; i < len(dec.buf); i++ {
480 c := dec.buf[i]
481 if isSpace(c) {
482 continue
483 }
484 dec.scanp = i
485 return c, nil
486 }
487 // buffer has been scanned, now report any error
488 if err != nil {
489 return 0, err
490 }
491 err = dec.refill()
492 }
493}
494
495/*
496TODO
497
498// EncodeToken writes the given JSON token to the stream.
499// It returns an error if the delimiters [ ] { } are not properly used.
500//
501// EncodeToken does not call Flush, because usually it is part of
502// a larger operation such as Encode, and those will call Flush when finished.
503// Callers that create an Encoder and then invoke EncodeToken directly,
504// without using Encode, need to call Flush when finished to ensure that
505// the JSON is written to the underlying writer.
506func (e *Encoder) EncodeToken(t Token) error {
507 ...
508}
509
510*/