blob: 03aabf5527d1c3ef22fa14bf08081eb835b2b086 [file] [log] [blame]
Don Newton379ae252019-04-01 12:17:06 -04001// Copyright (C) MongoDB, Inc. 2017-present.
2//
3// Licensed under the Apache License, Version 2.0 (the "License"); you may
4// not use this file except in compliance with the License. You may obtain
5// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
6
7package bsonrw
8
9import (
10 "bytes"
11 "errors"
12 "fmt"
13 "io"
14 "math"
15 "strconv"
16 "strings"
17 "unicode"
18)
19
20type jsonTokenType byte
21
22const (
23 jttBeginObject jsonTokenType = iota
24 jttEndObject
25 jttBeginArray
26 jttEndArray
27 jttColon
28 jttComma
29 jttInt32
30 jttInt64
31 jttDouble
32 jttString
33 jttBool
34 jttNull
35 jttEOF
36)
37
38type jsonToken struct {
39 t jsonTokenType
40 v interface{}
41 p int
42}
43
44type jsonScanner struct {
45 r io.Reader
46 buf []byte
47 pos int
48 lastReadErr error
49}
50
51// nextToken returns the next JSON token if one exists. A token is a character
52// of the JSON grammar, a number, a string, or a literal.
53func (js *jsonScanner) nextToken() (*jsonToken, error) {
54 c, err := js.readNextByte()
55
56 // keep reading until a non-space is encountered (break on read error or EOF)
57 for isWhiteSpace(c) && err == nil {
58 c, err = js.readNextByte()
59 }
60
61 if err == io.EOF {
62 return &jsonToken{t: jttEOF}, nil
63 } else if err != nil {
64 return nil, err
65 }
66
67 // switch on the character
68 switch c {
69 case '{':
70 return &jsonToken{t: jttBeginObject, v: byte('{'), p: js.pos - 1}, nil
71 case '}':
72 return &jsonToken{t: jttEndObject, v: byte('}'), p: js.pos - 1}, nil
73 case '[':
74 return &jsonToken{t: jttBeginArray, v: byte('['), p: js.pos - 1}, nil
75 case ']':
76 return &jsonToken{t: jttEndArray, v: byte(']'), p: js.pos - 1}, nil
77 case ':':
78 return &jsonToken{t: jttColon, v: byte(':'), p: js.pos - 1}, nil
79 case ',':
80 return &jsonToken{t: jttComma, v: byte(','), p: js.pos - 1}, nil
81 case '"': // RFC-8259 only allows for double quotes (") not single (')
82 return js.scanString()
83 default:
84 // check if it's a number
85 if c == '-' || isDigit(c) {
86 return js.scanNumber(c)
87 } else if c == 't' || c == 'f' || c == 'n' {
88 // maybe a literal
89 return js.scanLiteral(c)
90 } else {
91 return nil, fmt.Errorf("invalid JSON input. Position: %d. Character: %c", js.pos-1, c)
92 }
93 }
94}
95
96// readNextByte attempts to read the next byte from the buffer. If the buffer
97// has been exhausted, this function calls readIntoBuf, thus refilling the
98// buffer and resetting the read position to 0
99func (js *jsonScanner) readNextByte() (byte, error) {
100 if js.pos >= len(js.buf) {
101 err := js.readIntoBuf()
102
103 if err != nil {
104 return 0, err
105 }
106 }
107
108 b := js.buf[js.pos]
109 js.pos++
110
111 return b, nil
112}
113
114// readNNextBytes reads n bytes into dst, starting at offset
115func (js *jsonScanner) readNNextBytes(dst []byte, n, offset int) error {
116 var err error
117
118 for i := 0; i < n; i++ {
119 dst[i+offset], err = js.readNextByte()
120 if err != nil {
121 return err
122 }
123 }
124
125 return nil
126}
127
128// readIntoBuf reads up to 512 bytes from the scanner's io.Reader into the buffer
129func (js *jsonScanner) readIntoBuf() error {
130 if js.lastReadErr != nil {
131 js.buf = js.buf[:0]
132 js.pos = 0
133 return js.lastReadErr
134 }
135
136 if cap(js.buf) == 0 {
137 js.buf = make([]byte, 0, 512)
138 }
139
140 n, err := js.r.Read(js.buf[:cap(js.buf)])
141 if err != nil {
142 js.lastReadErr = err
143 if n > 0 {
144 err = nil
145 }
146 }
147 js.buf = js.buf[:n]
148 js.pos = 0
149
150 return err
151}
152
153func isWhiteSpace(c byte) bool {
154 return c == ' ' || c == '\t' || c == '\r' || c == '\n'
155}
156
157func isDigit(c byte) bool {
158 return unicode.IsDigit(rune(c))
159}
160
161func isValueTerminator(c byte) bool {
162 return c == ',' || c == '}' || c == ']' || isWhiteSpace(c)
163}
164
165// scanString reads from an opening '"' to a closing '"' and handles escaped characters
166func (js *jsonScanner) scanString() (*jsonToken, error) {
167 var b bytes.Buffer
168 var c byte
169 var err error
170
171 p := js.pos - 1
172
173 for {
174 c, err = js.readNextByte()
175 if err != nil {
176 if err == io.EOF {
177 return nil, errors.New("end of input in JSON string")
178 }
179 return nil, err
180 }
181
182 switch c {
183 case '\\':
184 c, err = js.readNextByte()
185 switch c {
186 case '"', '\\', '/', '\'':
187 b.WriteByte(c)
188 case 'b':
189 b.WriteByte('\b')
190 case 'f':
191 b.WriteByte('\f')
192 case 'n':
193 b.WriteByte('\n')
194 case 'r':
195 b.WriteByte('\r')
196 case 't':
197 b.WriteByte('\t')
198 case 'u':
199 us := make([]byte, 4)
200 err = js.readNNextBytes(us, 4, 0)
201 if err != nil {
202 return nil, fmt.Errorf("invalid unicode sequence in JSON string: %s", us)
203 }
204
205 s := fmt.Sprintf(`\u%s`, us)
206 s, err = strconv.Unquote(strings.Replace(strconv.Quote(s), `\\u`, `\u`, 1))
207 if err != nil {
208 return nil, err
209 }
210
211 b.WriteString(s)
212 default:
213 return nil, fmt.Errorf("invalid escape sequence in JSON string '\\%c'", c)
214 }
215 case '"':
216 return &jsonToken{t: jttString, v: b.String(), p: p}, nil
217 default:
218 b.WriteByte(c)
219 }
220 }
221}
222
223// scanLiteral reads an unquoted sequence of characters and determines if it is one of
224// three valid JSON literals (true, false, null); if so, it returns the appropriate
225// jsonToken; otherwise, it returns an error
226func (js *jsonScanner) scanLiteral(first byte) (*jsonToken, error) {
227 p := js.pos - 1
228
229 lit := make([]byte, 4)
230 lit[0] = first
231
232 err := js.readNNextBytes(lit, 3, 1)
233 if err != nil {
234 return nil, err
235 }
236
237 c5, err := js.readNextByte()
238
239 if bytes.Equal([]byte("true"), lit) && (isValueTerminator(c5) || err == io.EOF) {
240 js.pos = int(math.Max(0, float64(js.pos-1)))
241 return &jsonToken{t: jttBool, v: true, p: p}, nil
242 } else if bytes.Equal([]byte("null"), lit) && (isValueTerminator(c5) || err == io.EOF) {
243 js.pos = int(math.Max(0, float64(js.pos-1)))
244 return &jsonToken{t: jttNull, v: nil, p: p}, nil
245 } else if bytes.Equal([]byte("fals"), lit) {
246 if c5 == 'e' {
247 c5, err = js.readNextByte()
248
249 if isValueTerminator(c5) || err == io.EOF {
250 js.pos = int(math.Max(0, float64(js.pos-1)))
251 return &jsonToken{t: jttBool, v: false, p: p}, nil
252 }
253 }
254 }
255
256 return nil, fmt.Errorf("invalid JSON literal. Position: %d, literal: %s", p, lit)
257}
258
259type numberScanState byte
260
261const (
262 nssSawLeadingMinus numberScanState = iota
263 nssSawLeadingZero
264 nssSawIntegerDigits
265 nssSawDecimalPoint
266 nssSawFractionDigits
267 nssSawExponentLetter
268 nssSawExponentSign
269 nssSawExponentDigits
270 nssDone
271 nssInvalid
272)
273
274// scanNumber reads a JSON number (according to RFC-8259)
275func (js *jsonScanner) scanNumber(first byte) (*jsonToken, error) {
276 var b bytes.Buffer
277 var s numberScanState
278 var c byte
279 var err error
280
281 t := jttInt64 // assume it's an int64 until the type can be determined
282 start := js.pos - 1
283
284 b.WriteByte(first)
285
286 switch first {
287 case '-':
288 s = nssSawLeadingMinus
289 case '0':
290 s = nssSawLeadingZero
291 default:
292 s = nssSawIntegerDigits
293 }
294
295 for {
296 c, err = js.readNextByte()
297
298 if err != nil && err != io.EOF {
299 return nil, err
300 }
301
302 switch s {
303 case nssSawLeadingMinus:
304 switch c {
305 case '0':
306 s = nssSawLeadingZero
307 b.WriteByte(c)
308 default:
309 if isDigit(c) {
310 s = nssSawIntegerDigits
311 b.WriteByte(c)
312 } else {
313 s = nssInvalid
314 }
315 }
316 case nssSawLeadingZero:
317 switch c {
318 case '.':
319 s = nssSawDecimalPoint
320 b.WriteByte(c)
321 case 'e', 'E':
322 s = nssSawExponentLetter
323 b.WriteByte(c)
324 case '}', ']', ',':
325 s = nssDone
326 default:
327 if isWhiteSpace(c) || err == io.EOF {
328 s = nssDone
329 } else {
330 s = nssInvalid
331 }
332 }
333 case nssSawIntegerDigits:
334 switch c {
335 case '.':
336 s = nssSawDecimalPoint
337 b.WriteByte(c)
338 case 'e', 'E':
339 s = nssSawExponentLetter
340 b.WriteByte(c)
341 case '}', ']', ',':
342 s = nssDone
343 default:
344 if isWhiteSpace(c) || err == io.EOF {
345 s = nssDone
346 } else if isDigit(c) {
347 s = nssSawIntegerDigits
348 b.WriteByte(c)
349 } else {
350 s = nssInvalid
351 }
352 }
353 case nssSawDecimalPoint:
354 t = jttDouble
355 if isDigit(c) {
356 s = nssSawFractionDigits
357 b.WriteByte(c)
358 } else {
359 s = nssInvalid
360 }
361 case nssSawFractionDigits:
362 switch c {
363 case 'e', 'E':
364 s = nssSawExponentLetter
365 b.WriteByte(c)
366 case '}', ']', ',':
367 s = nssDone
368 default:
369 if isWhiteSpace(c) || err == io.EOF {
370 s = nssDone
371 } else if isDigit(c) {
372 s = nssSawFractionDigits
373 b.WriteByte(c)
374 } else {
375 s = nssInvalid
376 }
377 }
378 case nssSawExponentLetter:
379 t = jttDouble
380 switch c {
381 case '+', '-':
382 s = nssSawExponentSign
383 b.WriteByte(c)
384 default:
385 if isDigit(c) {
386 s = nssSawExponentDigits
387 b.WriteByte(c)
388 } else {
389 s = nssInvalid
390 }
391 }
392 case nssSawExponentSign:
393 if isDigit(c) {
394 s = nssSawExponentDigits
395 b.WriteByte(c)
396 } else {
397 s = nssInvalid
398 }
399 case nssSawExponentDigits:
400 switch c {
401 case '}', ']', ',':
402 s = nssDone
403 default:
404 if isWhiteSpace(c) || err == io.EOF {
405 s = nssDone
406 } else if isDigit(c) {
407 s = nssSawExponentDigits
408 b.WriteByte(c)
409 } else {
410 s = nssInvalid
411 }
412 }
413 }
414
415 switch s {
416 case nssInvalid:
417 return nil, fmt.Errorf("invalid JSON number. Position: %d", start)
418 case nssDone:
419 js.pos = int(math.Max(0, float64(js.pos-1)))
420 if t != jttDouble {
421 v, err := strconv.ParseInt(b.String(), 10, 64)
422 if err == nil {
423 if v < math.MinInt32 || v > math.MaxInt32 {
424 return &jsonToken{t: jttInt64, v: v, p: start}, nil
425 }
426
427 return &jsonToken{t: jttInt32, v: int32(v), p: start}, nil
428 }
429 }
430
431 v, err := strconv.ParseFloat(b.String(), 64)
432 if err != nil {
433 return nil, err
434 }
435
436 return &jsonToken{t: jttDouble, v: v, p: start}, nil
437 }
438 }
439}