blob: 689982c52cf79ca2055bd05a30250d1208ff325a [file] [log] [blame]
Don Newton379ae252019-04-01 12:17:06 -04001// Copyright (C) MongoDB, Inc. 2017-present.
2//
3// Licensed under the Apache License, Version 2.0 (the "License"); you may
4// not use this file except in compliance with the License. You may obtain
5// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
6
7package bsonrw
8
9import (
10 "errors"
11 "fmt"
12 "io"
13
14 "github.com/mongodb/mongo-go-driver/bson/bsontype"
15)
16
17const maxNestingDepth = 200
18
19// ErrInvalidJSON indicates the JSON input is invalid
20var ErrInvalidJSON = errors.New("invalid JSON input")
21
22type jsonParseState byte
23
24const (
25 jpsStartState jsonParseState = iota
26 jpsSawBeginObject
27 jpsSawEndObject
28 jpsSawBeginArray
29 jpsSawEndArray
30 jpsSawColon
31 jpsSawComma
32 jpsSawKey
33 jpsSawValue
34 jpsDoneState
35 jpsInvalidState
36)
37
38type jsonParseMode byte
39
40const (
41 jpmInvalidMode jsonParseMode = iota
42 jpmObjectMode
43 jpmArrayMode
44)
45
46type extJSONValue struct {
47 t bsontype.Type
48 v interface{}
49}
50
51type extJSONObject struct {
52 keys []string
53 values []*extJSONValue
54}
55
56type extJSONParser struct {
57 js *jsonScanner
58 s jsonParseState
59 m []jsonParseMode
60 k string
61 v *extJSONValue
62
63 err error
64 canonical bool
65 depth int
66 maxDepth int
67
68 emptyObject bool
69}
70
71// newExtJSONParser returns a new extended JSON parser, ready to to begin
72// parsing from the first character of the argued json input. It will not
73// perform any read-ahead and will therefore not report any errors about
74// malformed JSON at this point.
75func newExtJSONParser(r io.Reader, canonical bool) *extJSONParser {
76 return &extJSONParser{
77 js: &jsonScanner{r: r},
78 s: jpsStartState,
79 m: []jsonParseMode{},
80 canonical: canonical,
81 maxDepth: maxNestingDepth,
82 }
83}
84
85// peekType examines the next value and returns its BSON Type
86func (ejp *extJSONParser) peekType() (bsontype.Type, error) {
87 var t bsontype.Type
88 var err error
89
90 ejp.advanceState()
91 switch ejp.s {
92 case jpsSawValue:
93 t = ejp.v.t
94 case jpsSawBeginArray:
95 t = bsontype.Array
96 case jpsInvalidState:
97 err = ejp.err
98 case jpsSawComma:
99 // in array mode, seeing a comma means we need to progress again to actually observe a type
100 if ejp.peekMode() == jpmArrayMode {
101 return ejp.peekType()
102 }
103 case jpsSawEndArray:
104 // this would only be a valid state if we were in array mode, so return end-of-array error
105 err = ErrEOA
106 case jpsSawBeginObject:
107 // peek key to determine type
108 ejp.advanceState()
109 switch ejp.s {
110 case jpsSawEndObject: // empty embedded document
111 t = bsontype.EmbeddedDocument
112 ejp.emptyObject = true
113 case jpsInvalidState:
114 err = ejp.err
115 case jpsSawKey:
116 t = wrapperKeyBSONType(ejp.k)
117
118 if t == bsontype.JavaScript {
119 // just saw $code, need to check for $scope at same level
120 _, err := ejp.readValue(bsontype.JavaScript)
121
122 if err != nil {
123 break
124 }
125
126 switch ejp.s {
127 case jpsSawEndObject: // type is TypeJavaScript
128 case jpsSawComma:
129 ejp.advanceState()
130 if ejp.s == jpsSawKey && ejp.k == "$scope" {
131 t = bsontype.CodeWithScope
132 } else {
133 err = fmt.Errorf("invalid extended JSON: unexpected key %s in CodeWithScope object", ejp.k)
134 }
135 case jpsInvalidState:
136 err = ejp.err
137 default:
138 err = ErrInvalidJSON
139 }
140 }
141 }
142 }
143
144 return t, err
145}
146
147// readKey parses the next key and its type and returns them
148func (ejp *extJSONParser) readKey() (string, bsontype.Type, error) {
149 if ejp.emptyObject {
150 ejp.emptyObject = false
151 return "", 0, ErrEOD
152 }
153
154 // advance to key (or return with error)
155 switch ejp.s {
156 case jpsStartState:
157 ejp.advanceState()
158 if ejp.s == jpsSawBeginObject {
159 ejp.advanceState()
160 }
161 case jpsSawBeginObject:
162 ejp.advanceState()
163 case jpsSawValue, jpsSawEndObject, jpsSawEndArray:
164 ejp.advanceState()
165 switch ejp.s {
166 case jpsSawBeginObject, jpsSawComma:
167 ejp.advanceState()
168 case jpsSawEndObject:
169 return "", 0, ErrEOD
170 case jpsDoneState:
171 return "", 0, io.EOF
172 case jpsInvalidState:
173 return "", 0, ejp.err
174 default:
175 return "", 0, ErrInvalidJSON
176 }
177 case jpsSawKey: // do nothing (key was peeked before)
178 default:
179 return "", 0, invalidRequestError("key")
180 }
181
182 // read key
183 var key string
184
185 switch ejp.s {
186 case jpsSawKey:
187 key = ejp.k
188 case jpsSawEndObject:
189 return "", 0, ErrEOD
190 case jpsInvalidState:
191 return "", 0, ejp.err
192 default:
193 return "", 0, invalidRequestError("key")
194 }
195
196 // check for colon
197 ejp.advanceState()
198 if err := ensureColon(ejp.s, key); err != nil {
199 return "", 0, err
200 }
201
202 // peek at the value to determine type
203 t, err := ejp.peekType()
204 if err != nil {
205 return "", 0, err
206 }
207
208 return key, t, nil
209}
210
211// readValue returns the value corresponding to the Type returned by peekType
212func (ejp *extJSONParser) readValue(t bsontype.Type) (*extJSONValue, error) {
213 if ejp.s == jpsInvalidState {
214 return nil, ejp.err
215 }
216
217 var v *extJSONValue
218
219 switch t {
220 case bsontype.Null, bsontype.Boolean, bsontype.String:
221 if ejp.s != jpsSawValue {
222 return nil, invalidRequestError(t.String())
223 }
224 v = ejp.v
225 case bsontype.Int32, bsontype.Int64, bsontype.Double:
226 // relaxed version allows these to be literal number values
227 if ejp.s == jpsSawValue {
228 v = ejp.v
229 break
230 }
231 fallthrough
232 case bsontype.Decimal128, bsontype.Symbol, bsontype.ObjectID, bsontype.MinKey, bsontype.MaxKey, bsontype.Undefined:
233 switch ejp.s {
234 case jpsSawKey:
235 // read colon
236 ejp.advanceState()
237 if err := ensureColon(ejp.s, ejp.k); err != nil {
238 return nil, err
239 }
240
241 // read value
242 ejp.advanceState()
243 if ejp.s != jpsSawValue || !ejp.ensureExtValueType(t) {
244 return nil, invalidJSONErrorForType("value", t)
245 }
246
247 v = ejp.v
248
249 // read end object
250 ejp.advanceState()
251 if ejp.s != jpsSawEndObject {
252 return nil, invalidJSONErrorForType("} after value", t)
253 }
254 default:
255 return nil, invalidRequestError(t.String())
256 }
257 case bsontype.Binary, bsontype.Regex, bsontype.Timestamp, bsontype.DBPointer:
258 if ejp.s != jpsSawKey {
259 return nil, invalidRequestError(t.String())
260 }
261 // read colon
262 ejp.advanceState()
263 if err := ensureColon(ejp.s, ejp.k); err != nil {
264 return nil, err
265 }
266
267 // read KV pairs
268 keys, vals, err := ejp.readObject(2, false)
269 if err != nil {
270 return nil, err
271 }
272
273 ejp.advanceState()
274 if ejp.s != jpsSawEndObject {
275 return nil, invalidJSONErrorForType("2 key-value pairs and then }", t)
276 }
277
278 v = &extJSONValue{t: bsontype.EmbeddedDocument, v: &extJSONObject{keys: keys, values: vals}}
279 case bsontype.DateTime:
280 switch ejp.s {
281 case jpsSawValue:
282 v = ejp.v
283 case jpsSawKey:
284 // read colon
285 ejp.advanceState()
286 if err := ensureColon(ejp.s, ejp.k); err != nil {
287 return nil, err
288 }
289
290 ejp.advanceState()
291 switch ejp.s {
292 case jpsSawBeginObject:
293 keys, vals, err := ejp.readObject(1, true)
294 if err != nil {
295 return nil, err
296 }
297 v = &extJSONValue{t: bsontype.EmbeddedDocument, v: &extJSONObject{keys: keys, values: vals}}
298 case jpsSawValue:
299 if ejp.canonical {
300 return nil, invalidJSONError("{")
301 }
302 v = ejp.v
303 default:
304 if ejp.canonical {
305 return nil, invalidJSONErrorForType("object", t)
306 }
307 return nil, invalidJSONErrorForType("ISO-8601 Internet Date/Time Format as decribed in RFC-3339", t)
308 }
309
310 ejp.advanceState()
311 if ejp.s != jpsSawEndObject {
312 return nil, invalidJSONErrorForType("value and then }", t)
313 }
314 default:
315 return nil, invalidRequestError(t.String())
316 }
317 case bsontype.JavaScript:
318 switch ejp.s {
319 case jpsSawKey:
320 // read colon
321 ejp.advanceState()
322 if err := ensureColon(ejp.s, ejp.k); err != nil {
323 return nil, err
324 }
325
326 // read value
327 ejp.advanceState()
328 if ejp.s != jpsSawValue {
329 return nil, invalidJSONErrorForType("value", t)
330 }
331 v = ejp.v
332
333 // read end object or comma and just return
334 ejp.advanceState()
335 case jpsSawEndObject:
336 v = ejp.v
337 default:
338 return nil, invalidRequestError(t.String())
339 }
340 case bsontype.CodeWithScope:
341 if ejp.s == jpsSawKey && ejp.k == "$scope" {
342 v = ejp.v // this is the $code string from earlier
343
344 // read colon
345 ejp.advanceState()
346 if err := ensureColon(ejp.s, ejp.k); err != nil {
347 return nil, err
348 }
349
350 // read {
351 ejp.advanceState()
352 if ejp.s != jpsSawBeginObject {
353 return nil, invalidJSONError("$scope to be embedded document")
354 }
355 } else {
356 return nil, invalidRequestError(t.String())
357 }
358 case bsontype.EmbeddedDocument, bsontype.Array:
359 return nil, invalidRequestError(t.String())
360 }
361
362 return v, nil
363}
364
365// readObject is a utility method for reading full objects of known (or expected) size
366// it is useful for extended JSON types such as binary, datetime, regex, and timestamp
367func (ejp *extJSONParser) readObject(numKeys int, started bool) ([]string, []*extJSONValue, error) {
368 keys := make([]string, numKeys)
369 vals := make([]*extJSONValue, numKeys)
370
371 if !started {
372 ejp.advanceState()
373 if ejp.s != jpsSawBeginObject {
374 return nil, nil, invalidJSONError("{")
375 }
376 }
377
378 for i := 0; i < numKeys; i++ {
379 key, t, err := ejp.readKey()
380 if err != nil {
381 return nil, nil, err
382 }
383
384 switch ejp.s {
385 case jpsSawKey:
386 v, err := ejp.readValue(t)
387 if err != nil {
388 return nil, nil, err
389 }
390
391 keys[i] = key
392 vals[i] = v
393 case jpsSawValue:
394 keys[i] = key
395 vals[i] = ejp.v
396 default:
397 return nil, nil, invalidJSONError("value")
398 }
399 }
400
401 ejp.advanceState()
402 if ejp.s != jpsSawEndObject {
403 return nil, nil, invalidJSONError("}")
404 }
405
406 return keys, vals, nil
407}
408
409// advanceState reads the next JSON token from the scanner and transitions
410// from the current state based on that token's type
411func (ejp *extJSONParser) advanceState() {
412 if ejp.s == jpsDoneState || ejp.s == jpsInvalidState {
413 return
414 }
415
416 jt, err := ejp.js.nextToken()
417
418 if err != nil {
419 ejp.err = err
420 ejp.s = jpsInvalidState
421 return
422 }
423
424 valid := ejp.validateToken(jt.t)
425 if !valid {
426 ejp.err = unexpectedTokenError(jt)
427 ejp.s = jpsInvalidState
428 return
429 }
430
431 switch jt.t {
432 case jttBeginObject:
433 ejp.s = jpsSawBeginObject
434 ejp.pushMode(jpmObjectMode)
435 ejp.depth++
436
437 if ejp.depth > ejp.maxDepth {
438 ejp.err = nestingDepthError(jt.p, ejp.depth)
439 ejp.s = jpsInvalidState
440 }
441 case jttEndObject:
442 ejp.s = jpsSawEndObject
443 ejp.depth--
444
445 if ejp.popMode() != jpmObjectMode {
446 ejp.err = unexpectedTokenError(jt)
447 ejp.s = jpsInvalidState
448 }
449 case jttBeginArray:
450 ejp.s = jpsSawBeginArray
451 ejp.pushMode(jpmArrayMode)
452 case jttEndArray:
453 ejp.s = jpsSawEndArray
454
455 if ejp.popMode() != jpmArrayMode {
456 ejp.err = unexpectedTokenError(jt)
457 ejp.s = jpsInvalidState
458 }
459 case jttColon:
460 ejp.s = jpsSawColon
461 case jttComma:
462 ejp.s = jpsSawComma
463 case jttEOF:
464 ejp.s = jpsDoneState
465 if len(ejp.m) != 0 {
466 ejp.err = unexpectedTokenError(jt)
467 ejp.s = jpsInvalidState
468 }
469 case jttString:
470 switch ejp.s {
471 case jpsSawComma:
472 if ejp.peekMode() == jpmArrayMode {
473 ejp.s = jpsSawValue
474 ejp.v = extendJSONToken(jt)
475 return
476 }
477 fallthrough
478 case jpsSawBeginObject:
479 ejp.s = jpsSawKey
480 ejp.k = jt.v.(string)
481 return
482 }
483 fallthrough
484 default:
485 ejp.s = jpsSawValue
486 ejp.v = extendJSONToken(jt)
487 }
488}
489
490var jpsValidTransitionTokens = map[jsonParseState]map[jsonTokenType]bool{
491 jpsStartState: {
492 jttBeginObject: true,
493 jttBeginArray: true,
494 jttInt32: true,
495 jttInt64: true,
496 jttDouble: true,
497 jttString: true,
498 jttBool: true,
499 jttNull: true,
500 jttEOF: true,
501 },
502 jpsSawBeginObject: {
503 jttEndObject: true,
504 jttString: true,
505 },
506 jpsSawEndObject: {
507 jttEndObject: true,
508 jttEndArray: true,
509 jttComma: true,
510 jttEOF: true,
511 },
512 jpsSawBeginArray: {
513 jttBeginObject: true,
514 jttBeginArray: true,
515 jttEndArray: true,
516 jttInt32: true,
517 jttInt64: true,
518 jttDouble: true,
519 jttString: true,
520 jttBool: true,
521 jttNull: true,
522 },
523 jpsSawEndArray: {
524 jttEndObject: true,
525 jttEndArray: true,
526 jttComma: true,
527 jttEOF: true,
528 },
529 jpsSawColon: {
530 jttBeginObject: true,
531 jttBeginArray: true,
532 jttInt32: true,
533 jttInt64: true,
534 jttDouble: true,
535 jttString: true,
536 jttBool: true,
537 jttNull: true,
538 },
539 jpsSawComma: {
540 jttBeginObject: true,
541 jttBeginArray: true,
542 jttInt32: true,
543 jttInt64: true,
544 jttDouble: true,
545 jttString: true,
546 jttBool: true,
547 jttNull: true,
548 },
549 jpsSawKey: {
550 jttColon: true,
551 },
552 jpsSawValue: {
553 jttEndObject: true,
554 jttEndArray: true,
555 jttComma: true,
556 jttEOF: true,
557 },
558 jpsDoneState: {},
559 jpsInvalidState: {},
560}
561
562func (ejp *extJSONParser) validateToken(jtt jsonTokenType) bool {
563 switch ejp.s {
564 case jpsSawEndObject:
565 // if we are at depth zero and the next token is a '{',
566 // we can consider it valid only if we are not in array mode.
567 if jtt == jttBeginObject && ejp.depth == 0 {
568 return ejp.peekMode() != jpmArrayMode
569 }
570 case jpsSawComma:
571 switch ejp.peekMode() {
572 // the only valid next token after a comma inside a document is a string (a key)
573 case jpmObjectMode:
574 return jtt == jttString
575 case jpmInvalidMode:
576 return false
577 }
578 }
579
580 _, ok := jpsValidTransitionTokens[ejp.s][jtt]
581 return ok
582}
583
584// ensureExtValueType returns true if the current value has the expected
585// value type for single-key extended JSON types. For example,
586// {"$numberInt": v} v must be TypeString
587func (ejp *extJSONParser) ensureExtValueType(t bsontype.Type) bool {
588 switch t {
589 case bsontype.MinKey, bsontype.MaxKey:
590 return ejp.v.t == bsontype.Int32
591 case bsontype.Undefined:
592 return ejp.v.t == bsontype.Boolean
593 case bsontype.Int32, bsontype.Int64, bsontype.Double, bsontype.Decimal128, bsontype.Symbol, bsontype.ObjectID:
594 return ejp.v.t == bsontype.String
595 default:
596 return false
597 }
598}
599
600func (ejp *extJSONParser) pushMode(m jsonParseMode) {
601 ejp.m = append(ejp.m, m)
602}
603
604func (ejp *extJSONParser) popMode() jsonParseMode {
605 l := len(ejp.m)
606 if l == 0 {
607 return jpmInvalidMode
608 }
609
610 m := ejp.m[l-1]
611 ejp.m = ejp.m[:l-1]
612
613 return m
614}
615
616func (ejp *extJSONParser) peekMode() jsonParseMode {
617 l := len(ejp.m)
618 if l == 0 {
619 return jpmInvalidMode
620 }
621
622 return ejp.m[l-1]
623}
624
625func extendJSONToken(jt *jsonToken) *extJSONValue {
626 var t bsontype.Type
627
628 switch jt.t {
629 case jttInt32:
630 t = bsontype.Int32
631 case jttInt64:
632 t = bsontype.Int64
633 case jttDouble:
634 t = bsontype.Double
635 case jttString:
636 t = bsontype.String
637 case jttBool:
638 t = bsontype.Boolean
639 case jttNull:
640 t = bsontype.Null
641 default:
642 return nil
643 }
644
645 return &extJSONValue{t: t, v: jt.v}
646}
647
648func ensureColon(s jsonParseState, key string) error {
649 if s != jpsSawColon {
650 return fmt.Errorf("invalid JSON input: missing colon after key \"%s\"", key)
651 }
652
653 return nil
654}
655
656func invalidRequestError(s string) error {
657 return fmt.Errorf("invalid request to read %s", s)
658}
659
660func invalidJSONError(expected string) error {
661 return fmt.Errorf("invalid JSON input; expected %s", expected)
662}
663
664func invalidJSONErrorForType(expected string, t bsontype.Type) error {
665 return fmt.Errorf("invalid JSON input; expected %s for %s", expected, t)
666}
667
668func unexpectedTokenError(jt *jsonToken) error {
669 switch jt.t {
670 case jttInt32, jttInt64, jttDouble:
671 return fmt.Errorf("invalid JSON input; unexpected number (%v) at position %d", jt.v, jt.p)
672 case jttString:
673 return fmt.Errorf("invalid JSON input; unexpected string (\"%v\") at position %d", jt.v, jt.p)
674 case jttBool:
675 return fmt.Errorf("invalid JSON input; unexpected boolean literal (%v) at position %d", jt.v, jt.p)
676 case jttNull:
677 return fmt.Errorf("invalid JSON input; unexpected null literal at position %d", jt.p)
678 case jttEOF:
679 return fmt.Errorf("invalid JSON input; unexpected end of input at position %d", jt.p)
680 default:
681 return fmt.Errorf("invalid JSON input; unexpected %c at position %d", jt.v.(byte), jt.p)
682 }
683}
684
685func nestingDepthError(p, depth int) error {
686 return fmt.Errorf("invalid JSON input; nesting too deep (%d levels) at position %d", depth, p)
687}