khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 1 | // Copyright 2018 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package json |
| 6 | |
| 7 | import ( |
| 8 | "bytes" |
| 9 | "fmt" |
| 10 | "io" |
| 11 | "regexp" |
| 12 | "unicode/utf8" |
| 13 | |
| 14 | "google.golang.org/protobuf/internal/errors" |
| 15 | ) |
| 16 | |
| 17 | // call specifies which Decoder method was invoked. |
| 18 | type call uint8 |
| 19 | |
| 20 | const ( |
| 21 | readCall call = iota |
| 22 | peekCall |
| 23 | ) |
| 24 | |
| 25 | const unexpectedFmt = "unexpected token %s" |
| 26 | |
| 27 | // ErrUnexpectedEOF means that EOF was encountered in the middle of the input. |
| 28 | var ErrUnexpectedEOF = errors.New("%v", io.ErrUnexpectedEOF) |
| 29 | |
| 30 | // Decoder is a token-based JSON decoder. |
| 31 | type Decoder struct { |
| 32 | // lastCall is last method called, either readCall or peekCall. |
| 33 | // Initial value is readCall. |
| 34 | lastCall call |
| 35 | |
| 36 | // lastToken contains the last read token. |
| 37 | lastToken Token |
| 38 | |
| 39 | // lastErr contains the last read error. |
| 40 | lastErr error |
| 41 | |
| 42 | // openStack is a stack containing ObjectOpen and ArrayOpen values. The |
| 43 | // top of stack represents the object or the array the current value is |
| 44 | // directly located in. |
| 45 | openStack []Kind |
| 46 | |
| 47 | // orig is used in reporting line and column. |
| 48 | orig []byte |
| 49 | // in contains the unconsumed input. |
| 50 | in []byte |
| 51 | } |
| 52 | |
| 53 | // NewDecoder returns a Decoder to read the given []byte. |
| 54 | func NewDecoder(b []byte) *Decoder { |
| 55 | return &Decoder{orig: b, in: b} |
| 56 | } |
| 57 | |
| 58 | // Peek looks ahead and returns the next token kind without advancing a read. |
| 59 | func (d *Decoder) Peek() (Token, error) { |
| 60 | defer func() { d.lastCall = peekCall }() |
| 61 | if d.lastCall == readCall { |
| 62 | d.lastToken, d.lastErr = d.Read() |
| 63 | } |
| 64 | return d.lastToken, d.lastErr |
| 65 | } |
| 66 | |
| 67 | // Read returns the next JSON token. |
| 68 | // It will return an error if there is no valid token. |
| 69 | func (d *Decoder) Read() (Token, error) { |
| 70 | const scalar = Null | Bool | Number | String |
| 71 | |
| 72 | defer func() { d.lastCall = readCall }() |
| 73 | if d.lastCall == peekCall { |
| 74 | return d.lastToken, d.lastErr |
| 75 | } |
| 76 | |
| 77 | tok, err := d.parseNext() |
| 78 | if err != nil { |
| 79 | return Token{}, err |
| 80 | } |
| 81 | |
| 82 | switch tok.kind { |
| 83 | case EOF: |
| 84 | if len(d.openStack) != 0 || |
| 85 | d.lastToken.kind&scalar|ObjectClose|ArrayClose == 0 { |
| 86 | return Token{}, ErrUnexpectedEOF |
| 87 | } |
| 88 | |
| 89 | case Null: |
| 90 | if !d.isValueNext() { |
| 91 | return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString()) |
| 92 | } |
| 93 | |
| 94 | case Bool, Number: |
| 95 | if !d.isValueNext() { |
| 96 | return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString()) |
| 97 | } |
| 98 | |
| 99 | case String: |
| 100 | if d.isValueNext() { |
| 101 | break |
| 102 | } |
| 103 | // This string token should only be for a field name. |
| 104 | if d.lastToken.kind&(ObjectOpen|comma) == 0 { |
| 105 | return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString()) |
| 106 | } |
| 107 | if len(d.in) == 0 { |
| 108 | return Token{}, ErrUnexpectedEOF |
| 109 | } |
| 110 | if c := d.in[0]; c != ':' { |
| 111 | return Token{}, d.newSyntaxError(d.currPos(), `unexpected character %s, missing ":" after field name`, string(c)) |
| 112 | } |
| 113 | tok.kind = Name |
| 114 | d.consume(1) |
| 115 | |
| 116 | case ObjectOpen, ArrayOpen: |
| 117 | if !d.isValueNext() { |
| 118 | return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString()) |
| 119 | } |
| 120 | d.openStack = append(d.openStack, tok.kind) |
| 121 | |
| 122 | case ObjectClose: |
| 123 | if len(d.openStack) == 0 || |
| 124 | d.lastToken.kind == comma || |
| 125 | d.openStack[len(d.openStack)-1] != ObjectOpen { |
| 126 | return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString()) |
| 127 | } |
| 128 | d.openStack = d.openStack[:len(d.openStack)-1] |
| 129 | |
| 130 | case ArrayClose: |
| 131 | if len(d.openStack) == 0 || |
| 132 | d.lastToken.kind == comma || |
| 133 | d.openStack[len(d.openStack)-1] != ArrayOpen { |
| 134 | return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString()) |
| 135 | } |
| 136 | d.openStack = d.openStack[:len(d.openStack)-1] |
| 137 | |
| 138 | case comma: |
| 139 | if len(d.openStack) == 0 || |
| 140 | d.lastToken.kind&(scalar|ObjectClose|ArrayClose) == 0 { |
| 141 | return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString()) |
| 142 | } |
| 143 | } |
| 144 | |
| 145 | // Update d.lastToken only after validating token to be in the right sequence. |
| 146 | d.lastToken = tok |
| 147 | |
| 148 | if d.lastToken.kind == comma { |
| 149 | return d.Read() |
| 150 | } |
| 151 | return tok, nil |
| 152 | } |
| 153 | |
| 154 | // Any sequence that looks like a non-delimiter (for error reporting). |
| 155 | var errRegexp = regexp.MustCompile(`^([-+._a-zA-Z0-9]{1,32}|.)`) |
| 156 | |
| 157 | // parseNext parses for the next JSON token. It returns a Token object for |
| 158 | // different types, except for Name. It does not handle whether the next token |
| 159 | // is in a valid sequence or not. |
| 160 | func (d *Decoder) parseNext() (Token, error) { |
| 161 | // Trim leading spaces. |
| 162 | d.consume(0) |
| 163 | |
| 164 | in := d.in |
| 165 | if len(in) == 0 { |
| 166 | return d.consumeToken(EOF, 0), nil |
| 167 | } |
| 168 | |
| 169 | switch in[0] { |
| 170 | case 'n': |
| 171 | if n := matchWithDelim("null", in); n != 0 { |
| 172 | return d.consumeToken(Null, n), nil |
| 173 | } |
| 174 | |
| 175 | case 't': |
| 176 | if n := matchWithDelim("true", in); n != 0 { |
| 177 | return d.consumeBoolToken(true, n), nil |
| 178 | } |
| 179 | |
| 180 | case 'f': |
| 181 | if n := matchWithDelim("false", in); n != 0 { |
| 182 | return d.consumeBoolToken(false, n), nil |
| 183 | } |
| 184 | |
| 185 | case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': |
| 186 | if n, ok := parseNumber(in); ok { |
| 187 | return d.consumeToken(Number, n), nil |
| 188 | } |
| 189 | |
| 190 | case '"': |
| 191 | s, n, err := d.parseString(in) |
| 192 | if err != nil { |
| 193 | return Token{}, err |
| 194 | } |
| 195 | return d.consumeStringToken(s, n), nil |
| 196 | |
| 197 | case '{': |
| 198 | return d.consumeToken(ObjectOpen, 1), nil |
| 199 | |
| 200 | case '}': |
| 201 | return d.consumeToken(ObjectClose, 1), nil |
| 202 | |
| 203 | case '[': |
| 204 | return d.consumeToken(ArrayOpen, 1), nil |
| 205 | |
| 206 | case ']': |
| 207 | return d.consumeToken(ArrayClose, 1), nil |
| 208 | |
| 209 | case ',': |
| 210 | return d.consumeToken(comma, 1), nil |
| 211 | } |
| 212 | return Token{}, d.newSyntaxError(d.currPos(), "invalid value %s", errRegexp.Find(in)) |
| 213 | } |
| 214 | |
| 215 | // newSyntaxError returns an error with line and column information useful for |
| 216 | // syntax errors. |
| 217 | func (d *Decoder) newSyntaxError(pos int, f string, x ...interface{}) error { |
| 218 | e := errors.New(f, x...) |
| 219 | line, column := d.Position(pos) |
| 220 | return errors.New("syntax error (line %d:%d): %v", line, column, e) |
| 221 | } |
| 222 | |
| 223 | // Position returns line and column number of given index of the original input. |
| 224 | // It will panic if index is out of range. |
| 225 | func (d *Decoder) Position(idx int) (line int, column int) { |
| 226 | b := d.orig[:idx] |
| 227 | line = bytes.Count(b, []byte("\n")) + 1 |
| 228 | if i := bytes.LastIndexByte(b, '\n'); i >= 0 { |
| 229 | b = b[i+1:] |
| 230 | } |
| 231 | column = utf8.RuneCount(b) + 1 // ignore multi-rune characters |
| 232 | return line, column |
| 233 | } |
| 234 | |
| 235 | // currPos returns the current index position of d.in from d.orig. |
| 236 | func (d *Decoder) currPos() int { |
| 237 | return len(d.orig) - len(d.in) |
| 238 | } |
| 239 | |
| 240 | // matchWithDelim matches s with the input b and verifies that the match |
| 241 | // terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]"). |
| 242 | // As a special case, EOF is considered a delimiter. It returns the length of s |
| 243 | // if there is a match, else 0. |
| 244 | func matchWithDelim(s string, b []byte) int { |
| 245 | if !bytes.HasPrefix(b, []byte(s)) { |
| 246 | return 0 |
| 247 | } |
| 248 | |
| 249 | n := len(s) |
| 250 | if n < len(b) && isNotDelim(b[n]) { |
| 251 | return 0 |
| 252 | } |
| 253 | return n |
| 254 | } |
| 255 | |
| 256 | // isNotDelim returns true if given byte is a not delimiter character. |
| 257 | func isNotDelim(c byte) bool { |
| 258 | return (c == '-' || c == '+' || c == '.' || c == '_' || |
| 259 | ('a' <= c && c <= 'z') || |
| 260 | ('A' <= c && c <= 'Z') || |
| 261 | ('0' <= c && c <= '9')) |
| 262 | } |
| 263 | |
| 264 | // consume consumes n bytes of input and any subsequent whitespace. |
| 265 | func (d *Decoder) consume(n int) { |
| 266 | d.in = d.in[n:] |
| 267 | for len(d.in) > 0 { |
| 268 | switch d.in[0] { |
| 269 | case ' ', '\n', '\r', '\t': |
| 270 | d.in = d.in[1:] |
| 271 | default: |
| 272 | return |
| 273 | } |
| 274 | } |
| 275 | } |
| 276 | |
| 277 | // isValueNext returns true if next type should be a JSON value: Null, |
| 278 | // Number, String or Bool. |
| 279 | func (d *Decoder) isValueNext() bool { |
| 280 | if len(d.openStack) == 0 { |
| 281 | return d.lastToken.kind == 0 |
| 282 | } |
| 283 | |
| 284 | start := d.openStack[len(d.openStack)-1] |
| 285 | switch start { |
| 286 | case ObjectOpen: |
| 287 | return d.lastToken.kind&Name != 0 |
| 288 | case ArrayOpen: |
| 289 | return d.lastToken.kind&(ArrayOpen|comma) != 0 |
| 290 | } |
| 291 | panic(fmt.Sprintf( |
| 292 | "unreachable logic in Decoder.isValueNext, lastToken.kind: %v, openStack: %v", |
| 293 | d.lastToken.kind, start)) |
| 294 | } |
| 295 | |
| 296 | // consumeToken constructs a Token for given Kind with raw value derived from |
| 297 | // current d.in and given size, and consumes the given size-lenght of it. |
| 298 | func (d *Decoder) consumeToken(kind Kind, size int) Token { |
| 299 | tok := Token{ |
| 300 | kind: kind, |
| 301 | raw: d.in[:size], |
| 302 | pos: len(d.orig) - len(d.in), |
| 303 | } |
| 304 | d.consume(size) |
| 305 | return tok |
| 306 | } |
| 307 | |
| 308 | // consumeBoolToken constructs a Token for a Bool kind with raw value derived from |
| 309 | // current d.in and given size. |
| 310 | func (d *Decoder) consumeBoolToken(b bool, size int) Token { |
| 311 | tok := Token{ |
| 312 | kind: Bool, |
| 313 | raw: d.in[:size], |
| 314 | pos: len(d.orig) - len(d.in), |
| 315 | boo: b, |
| 316 | } |
| 317 | d.consume(size) |
| 318 | return tok |
| 319 | } |
| 320 | |
| 321 | // consumeStringToken constructs a Token for a String kind with raw value derived |
| 322 | // from current d.in and given size. |
| 323 | func (d *Decoder) consumeStringToken(s string, size int) Token { |
| 324 | tok := Token{ |
| 325 | kind: String, |
| 326 | raw: d.in[:size], |
| 327 | pos: len(d.orig) - len(d.in), |
| 328 | str: s, |
| 329 | } |
| 330 | d.consume(size) |
| 331 | return tok |
| 332 | } |
| 333 | |
| 334 | // Clone returns a copy of the Decoder for use in reading ahead the next JSON |
| 335 | // object, array or other values without affecting current Decoder. |
| 336 | func (d *Decoder) Clone() *Decoder { |
| 337 | ret := *d |
| 338 | ret.openStack = append([]Kind(nil), ret.openStack...) |
| 339 | return &ret |
| 340 | } |