blob: 83d2b0d5aec3ca22046e4c52e49f5f34c4a9ba46 [file] [log] [blame]
David K. Bainbridgee05cf0c2021-08-19 03:16:50 +00001// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package text
6
7import (
8 "bytes"
9 "fmt"
10 "math"
11 "strconv"
12 "strings"
13
14 "google.golang.org/protobuf/internal/flags"
15)
16
17// Kind represents a token kind expressible in the textproto format.
18type Kind uint8
19
20// Kind values.
21const (
22 Invalid Kind = iota
23 EOF
24 Name // Name indicates the field name.
25 Scalar // Scalar are scalar values, e.g. "string", 47, ENUM_LITERAL, true.
26 MessageOpen
27 MessageClose
28 ListOpen
29 ListClose
30
31 // comma and semi-colon are only for parsing in between values and should not be exposed.
32 comma
33 semicolon
34
35 // bof indicates beginning of file, which is the default token
36 // kind at the beginning of parsing.
37 bof = Invalid
38)
39
40func (t Kind) String() string {
41 switch t {
42 case Invalid:
43 return "<invalid>"
44 case EOF:
45 return "eof"
46 case Scalar:
47 return "scalar"
48 case Name:
49 return "name"
50 case MessageOpen:
51 return "{"
52 case MessageClose:
53 return "}"
54 case ListOpen:
55 return "["
56 case ListClose:
57 return "]"
58 case comma:
59 return ","
60 case semicolon:
61 return ";"
62 default:
63 return fmt.Sprintf("<invalid:%v>", uint8(t))
64 }
65}
66
67// NameKind represents different types of field names.
68type NameKind uint8
69
70// NameKind values.
71const (
72 IdentName NameKind = iota + 1
73 TypeName
74 FieldNumber
75)
76
77func (t NameKind) String() string {
78 switch t {
79 case IdentName:
80 return "IdentName"
81 case TypeName:
82 return "TypeName"
83 case FieldNumber:
84 return "FieldNumber"
85 default:
86 return fmt.Sprintf("<invalid:%v>", uint8(t))
87 }
88}
89
90// Bit mask in Token.attrs to indicate if a Name token is followed by the
91// separator char ':'. The field name separator char is optional for message
92// field or repeated message field, but required for all other types. Decoder
93// simply indicates whether a Name token is followed by separator or not. It is
94// up to the prototext package to validate.
95const hasSeparator = 1 << 7
96
97// Scalar value types.
98const (
99 numberValue = iota + 1
100 stringValue
101 literalValue
102)
103
104// Bit mask in Token.numAttrs to indicate that the number is a negative.
105const isNegative = 1 << 7
106
107// Token provides a parsed token kind and value. Values are provided by the
108// different accessor methods.
109type Token struct {
110 // Kind of the Token object.
111 kind Kind
112 // attrs contains metadata for the following Kinds:
113 // Name: hasSeparator bit and one of NameKind.
114 // Scalar: one of numberValue, stringValue, literalValue.
115 attrs uint8
116 // numAttrs contains metadata for numberValue:
117 // - highest bit is whether negative or positive.
118 // - lower bits indicate one of numDec, numHex, numOct, numFloat.
119 numAttrs uint8
120 // pos provides the position of the token in the original input.
121 pos int
122 // raw bytes of the serialized token.
123 // This is a subslice into the original input.
124 raw []byte
125 // str contains parsed string for the following:
126 // - stringValue of Scalar kind
127 // - numberValue of Scalar kind
128 // - TypeName of Name kind
129 str string
130}
131
132// Kind returns the token kind.
133func (t Token) Kind() Kind {
134 return t.kind
135}
136
137// RawString returns the read value in string.
138func (t Token) RawString() string {
139 return string(t.raw)
140}
141
142// Pos returns the token position from the input.
143func (t Token) Pos() int {
144 return t.pos
145}
146
147// NameKind returns IdentName, TypeName or FieldNumber.
148// It panics if type is not Name.
149func (t Token) NameKind() NameKind {
150 if t.kind == Name {
151 return NameKind(t.attrs &^ hasSeparator)
152 }
153 panic(fmt.Sprintf("Token is not a Name type: %s", t.kind))
154}
155
156// HasSeparator returns true if the field name is followed by the separator char
157// ':', else false. It panics if type is not Name.
158func (t Token) HasSeparator() bool {
159 if t.kind == Name {
160 return t.attrs&hasSeparator != 0
161 }
162 panic(fmt.Sprintf("Token is not a Name type: %s", t.kind))
163}
164
165// IdentName returns the value for IdentName type.
166func (t Token) IdentName() string {
167 if t.kind == Name && t.attrs&uint8(IdentName) != 0 {
168 return string(t.raw)
169 }
170 panic(fmt.Sprintf("Token is not an IdentName: %s:%s", t.kind, NameKind(t.attrs&^hasSeparator)))
171}
172
173// TypeName returns the value for TypeName type.
174func (t Token) TypeName() string {
175 if t.kind == Name && t.attrs&uint8(TypeName) != 0 {
176 return t.str
177 }
178 panic(fmt.Sprintf("Token is not a TypeName: %s:%s", t.kind, NameKind(t.attrs&^hasSeparator)))
179}
180
181// FieldNumber returns the value for FieldNumber type. It returns a
182// non-negative int32 value. Caller will still need to validate for the correct
183// field number range.
184func (t Token) FieldNumber() int32 {
185 if t.kind != Name || t.attrs&uint8(FieldNumber) == 0 {
186 panic(fmt.Sprintf("Token is not a FieldNumber: %s:%s", t.kind, NameKind(t.attrs&^hasSeparator)))
187 }
188 // Following should not return an error as it had already been called right
189 // before this Token was constructed.
190 num, _ := strconv.ParseInt(string(t.raw), 10, 32)
191 return int32(num)
192}
193
194// String returns the string value for a Scalar type.
195func (t Token) String() (string, bool) {
196 if t.kind != Scalar || t.attrs != stringValue {
197 return "", false
198 }
199 return t.str, true
200}
201
202// Enum returns the literal value for a Scalar type for use as enum literals.
203func (t Token) Enum() (string, bool) {
204 if t.kind != Scalar || t.attrs != literalValue || (len(t.raw) > 0 && t.raw[0] == '-') {
205 return "", false
206 }
207 return string(t.raw), true
208}
209
210// Bool returns the bool value for a Scalar type.
211func (t Token) Bool() (bool, bool) {
212 if t.kind != Scalar {
213 return false, false
214 }
215 switch t.attrs {
216 case literalValue:
217 if b, ok := boolLits[string(t.raw)]; ok {
218 return b, true
219 }
220 case numberValue:
221 // Unsigned integer representation of 0 or 1 is permitted: 00, 0x0, 01,
222 // 0x1, etc.
223 n, err := strconv.ParseUint(t.str, 0, 64)
224 if err == nil {
225 switch n {
226 case 0:
227 return false, true
228 case 1:
229 return true, true
230 }
231 }
232 }
233 return false, false
234}
235
236// These exact boolean literals are the ones supported in C++.
237var boolLits = map[string]bool{
238 "t": true,
239 "true": true,
240 "True": true,
241 "f": false,
242 "false": false,
243 "False": false,
244}
245
246// Uint64 returns the uint64 value for a Scalar type.
247func (t Token) Uint64() (uint64, bool) {
248 if t.kind != Scalar || t.attrs != numberValue ||
249 t.numAttrs&isNegative > 0 || t.numAttrs&numFloat > 0 {
250 return 0, false
251 }
252 n, err := strconv.ParseUint(t.str, 0, 64)
253 if err != nil {
254 return 0, false
255 }
256 return n, true
257}
258
259// Uint32 returns the uint32 value for a Scalar type.
260func (t Token) Uint32() (uint32, bool) {
261 if t.kind != Scalar || t.attrs != numberValue ||
262 t.numAttrs&isNegative > 0 || t.numAttrs&numFloat > 0 {
263 return 0, false
264 }
265 n, err := strconv.ParseUint(t.str, 0, 32)
266 if err != nil {
267 return 0, false
268 }
269 return uint32(n), true
270}
271
272// Int64 returns the int64 value for a Scalar type.
273func (t Token) Int64() (int64, bool) {
274 if t.kind != Scalar || t.attrs != numberValue || t.numAttrs&numFloat > 0 {
275 return 0, false
276 }
277 if n, err := strconv.ParseInt(t.str, 0, 64); err == nil {
278 return n, true
279 }
280 // C++ accepts large positive hex numbers as negative values.
281 // This feature is here for proto1 backwards compatibility purposes.
282 if flags.ProtoLegacy && (t.numAttrs == numHex) {
283 if n, err := strconv.ParseUint(t.str, 0, 64); err == nil {
284 return int64(n), true
285 }
286 }
287 return 0, false
288}
289
290// Int32 returns the int32 value for a Scalar type.
291func (t Token) Int32() (int32, bool) {
292 if t.kind != Scalar || t.attrs != numberValue || t.numAttrs&numFloat > 0 {
293 return 0, false
294 }
295 if n, err := strconv.ParseInt(t.str, 0, 32); err == nil {
296 return int32(n), true
297 }
298 // C++ accepts large positive hex numbers as negative values.
299 // This feature is here for proto1 backwards compatibility purposes.
300 if flags.ProtoLegacy && (t.numAttrs == numHex) {
301 if n, err := strconv.ParseUint(t.str, 0, 32); err == nil {
302 return int32(n), true
303 }
304 }
305 return 0, false
306}
307
308// Float64 returns the float64 value for a Scalar type.
309func (t Token) Float64() (float64, bool) {
310 if t.kind != Scalar {
311 return 0, false
312 }
313 switch t.attrs {
314 case literalValue:
315 if f, ok := floatLits[strings.ToLower(string(t.raw))]; ok {
316 return f, true
317 }
318 case numberValue:
319 n, err := strconv.ParseFloat(t.str, 64)
320 if err == nil {
321 return n, true
322 }
323 nerr := err.(*strconv.NumError)
324 if nerr.Err == strconv.ErrRange {
325 return n, true
326 }
327 }
328 return 0, false
329}
330
331// Float32 returns the float32 value for a Scalar type.
332func (t Token) Float32() (float32, bool) {
333 if t.kind != Scalar {
334 return 0, false
335 }
336 switch t.attrs {
337 case literalValue:
338 if f, ok := floatLits[strings.ToLower(string(t.raw))]; ok {
339 return float32(f), true
340 }
341 case numberValue:
342 n, err := strconv.ParseFloat(t.str, 64)
343 if err == nil {
344 // Overflows are treated as (-)infinity.
345 return float32(n), true
346 }
347 nerr := err.(*strconv.NumError)
348 if nerr.Err == strconv.ErrRange {
349 return float32(n), true
350 }
351 }
352 return 0, false
353}
354
355// These are the supported float literals which C++ permits case-insensitive
356// variants of these.
357var floatLits = map[string]float64{
358 "nan": math.NaN(),
359 "inf": math.Inf(1),
360 "infinity": math.Inf(1),
361 "-inf": math.Inf(-1),
362 "-infinity": math.Inf(-1),
363}
364
365// TokenEquals returns true if given Tokens are equal, else false.
366func TokenEquals(x, y Token) bool {
367 return x.kind == y.kind &&
368 x.attrs == y.attrs &&
369 x.numAttrs == y.numAttrs &&
370 x.pos == y.pos &&
371 bytes.Equal(x.raw, y.raw) &&
372 x.str == y.str
373}