blob: f7fea7d8dd4b507c2d610cd43439f0acf00366cb [file] [log] [blame]
khenaidood948f772021-08-11 17:49:24 -04001// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package json
6
7import (
8 "strconv"
9 "unicode"
10 "unicode/utf16"
11 "unicode/utf8"
12
13 "google.golang.org/protobuf/internal/strs"
14)
15
16func (d *Decoder) parseString(in []byte) (string, int, error) {
17 in0 := in
18 if len(in) == 0 {
19 return "", 0, ErrUnexpectedEOF
20 }
21 if in[0] != '"' {
22 return "", 0, d.newSyntaxError(d.currPos(), "invalid character %q at start of string", in[0])
23 }
24 in = in[1:]
25 i := indexNeedEscapeInBytes(in)
26 in, out := in[i:], in[:i:i] // set cap to prevent mutations
27 for len(in) > 0 {
28 switch r, n := utf8.DecodeRune(in); {
29 case r == utf8.RuneError && n == 1:
30 return "", 0, d.newSyntaxError(d.currPos(), "invalid UTF-8 in string")
31 case r < ' ':
32 return "", 0, d.newSyntaxError(d.currPos(), "invalid character %q in string", r)
33 case r == '"':
34 in = in[1:]
35 n := len(in0) - len(in)
36 return string(out), n, nil
37 case r == '\\':
38 if len(in) < 2 {
39 return "", 0, ErrUnexpectedEOF
40 }
41 switch r := in[1]; r {
42 case '"', '\\', '/':
43 in, out = in[2:], append(out, r)
44 case 'b':
45 in, out = in[2:], append(out, '\b')
46 case 'f':
47 in, out = in[2:], append(out, '\f')
48 case 'n':
49 in, out = in[2:], append(out, '\n')
50 case 'r':
51 in, out = in[2:], append(out, '\r')
52 case 't':
53 in, out = in[2:], append(out, '\t')
54 case 'u':
55 if len(in) < 6 {
56 return "", 0, ErrUnexpectedEOF
57 }
58 v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
59 if err != nil {
60 return "", 0, d.newSyntaxError(d.currPos(), "invalid escape code %q in string", in[:6])
61 }
62 in = in[6:]
63
64 r := rune(v)
65 if utf16.IsSurrogate(r) {
66 if len(in) < 6 {
67 return "", 0, ErrUnexpectedEOF
68 }
69 v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
70 r = utf16.DecodeRune(r, rune(v))
71 if in[0] != '\\' || in[1] != 'u' ||
72 r == unicode.ReplacementChar || err != nil {
73 return "", 0, d.newSyntaxError(d.currPos(), "invalid escape code %q in string", in[:6])
74 }
75 in = in[6:]
76 }
77 out = append(out, string(r)...)
78 default:
79 return "", 0, d.newSyntaxError(d.currPos(), "invalid escape code %q in string", in[:2])
80 }
81 default:
82 i := indexNeedEscapeInBytes(in[n:])
83 in, out = in[n+i:], append(out, in[:n+i]...)
84 }
85 }
86 return "", 0, ErrUnexpectedEOF
87}
88
89// indexNeedEscapeInBytes returns the index of the character that needs
90// escaping. If no characters need escaping, this returns the input length.
91func indexNeedEscapeInBytes(b []byte) int { return indexNeedEscapeInString(strs.UnsafeString(b)) }