amit.ghosh | 258d14c | 2020-10-02 15:13:38 +0200 | [diff] [blame] | 1 | // Copyright 2010 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package proto |
| 6 | |
| 7 | import ( |
| 8 | "encoding" |
| 9 | "errors" |
| 10 | "fmt" |
| 11 | "reflect" |
| 12 | "strconv" |
| 13 | "strings" |
| 14 | "unicode/utf8" |
| 15 | |
| 16 | "google.golang.org/protobuf/encoding/prototext" |
| 17 | protoV2 "google.golang.org/protobuf/proto" |
| 18 | "google.golang.org/protobuf/reflect/protoreflect" |
| 19 | "google.golang.org/protobuf/reflect/protoregistry" |
| 20 | ) |
| 21 | |
| 22 | const wrapTextUnmarshalV2 = false |
| 23 | |
| 24 | // ParseError is returned by UnmarshalText. |
| 25 | type ParseError struct { |
| 26 | Message string |
| 27 | |
| 28 | // Deprecated: Do not use. |
| 29 | Line, Offset int |
| 30 | } |
| 31 | |
| 32 | func (e *ParseError) Error() string { |
| 33 | if wrapTextUnmarshalV2 { |
| 34 | return e.Message |
| 35 | } |
| 36 | if e.Line == 1 { |
| 37 | return fmt.Sprintf("line 1.%d: %v", e.Offset, e.Message) |
| 38 | } |
| 39 | return fmt.Sprintf("line %d: %v", e.Line, e.Message) |
| 40 | } |
| 41 | |
| 42 | // UnmarshalText parses a proto text formatted string into m. |
| 43 | func UnmarshalText(s string, m Message) error { |
| 44 | if u, ok := m.(encoding.TextUnmarshaler); ok { |
| 45 | return u.UnmarshalText([]byte(s)) |
| 46 | } |
| 47 | |
| 48 | m.Reset() |
| 49 | mi := MessageV2(m) |
| 50 | |
| 51 | if wrapTextUnmarshalV2 { |
| 52 | err := prototext.UnmarshalOptions{ |
| 53 | AllowPartial: true, |
| 54 | }.Unmarshal([]byte(s), mi) |
| 55 | if err != nil { |
| 56 | return &ParseError{Message: err.Error()} |
| 57 | } |
| 58 | return checkRequiredNotSet(mi) |
| 59 | } else { |
| 60 | if err := newTextParser(s).unmarshalMessage(mi.ProtoReflect(), ""); err != nil { |
| 61 | return err |
| 62 | } |
| 63 | return checkRequiredNotSet(mi) |
| 64 | } |
| 65 | } |
| 66 | |
| 67 | type textParser struct { |
| 68 | s string // remaining input |
| 69 | done bool // whether the parsing is finished (success or error) |
| 70 | backed bool // whether back() was called |
| 71 | offset, line int |
| 72 | cur token |
| 73 | } |
| 74 | |
| 75 | type token struct { |
| 76 | value string |
| 77 | err *ParseError |
| 78 | line int // line number |
| 79 | offset int // byte number from start of input, not start of line |
| 80 | unquoted string // the unquoted version of value, if it was a quoted string |
| 81 | } |
| 82 | |
| 83 | func newTextParser(s string) *textParser { |
| 84 | p := new(textParser) |
| 85 | p.s = s |
| 86 | p.line = 1 |
| 87 | p.cur.line = 1 |
| 88 | return p |
| 89 | } |
| 90 | |
| 91 | func (p *textParser) unmarshalMessage(m protoreflect.Message, terminator string) (err error) { |
| 92 | md := m.Descriptor() |
| 93 | fds := md.Fields() |
| 94 | |
| 95 | // A struct is a sequence of "name: value", terminated by one of |
| 96 | // '>' or '}', or the end of the input. A name may also be |
| 97 | // "[extension]" or "[type/url]". |
| 98 | // |
| 99 | // The whole struct can also be an expanded Any message, like: |
| 100 | // [type/url] < ... struct contents ... > |
| 101 | seen := make(map[protoreflect.FieldNumber]bool) |
| 102 | for { |
| 103 | tok := p.next() |
| 104 | if tok.err != nil { |
| 105 | return tok.err |
| 106 | } |
| 107 | if tok.value == terminator { |
| 108 | break |
| 109 | } |
| 110 | if tok.value == "[" { |
| 111 | if err := p.unmarshalExtensionOrAny(m, seen); err != nil { |
| 112 | return err |
| 113 | } |
| 114 | continue |
| 115 | } |
| 116 | |
| 117 | // This is a normal, non-extension field. |
| 118 | name := protoreflect.Name(tok.value) |
| 119 | fd := fds.ByName(name) |
| 120 | switch { |
| 121 | case fd == nil: |
| 122 | gd := fds.ByName(protoreflect.Name(strings.ToLower(string(name)))) |
| 123 | if gd != nil && gd.Kind() == protoreflect.GroupKind && gd.Message().Name() == name { |
| 124 | fd = gd |
| 125 | } |
| 126 | case fd.Kind() == protoreflect.GroupKind && fd.Message().Name() != name: |
| 127 | fd = nil |
| 128 | case fd.IsWeak() && fd.Message().IsPlaceholder(): |
| 129 | fd = nil |
| 130 | } |
| 131 | if fd == nil { |
| 132 | typeName := string(md.FullName()) |
| 133 | if m, ok := m.Interface().(Message); ok { |
| 134 | t := reflect.TypeOf(m) |
| 135 | if t.Kind() == reflect.Ptr { |
| 136 | typeName = t.Elem().String() |
| 137 | } |
| 138 | } |
| 139 | return p.errorf("unknown field name %q in %v", name, typeName) |
| 140 | } |
| 141 | if od := fd.ContainingOneof(); od != nil && m.WhichOneof(od) != nil { |
| 142 | return p.errorf("field '%s' would overwrite already parsed oneof '%s'", name, od.Name()) |
| 143 | } |
| 144 | if fd.Cardinality() != protoreflect.Repeated && seen[fd.Number()] { |
| 145 | return p.errorf("non-repeated field %q was repeated", fd.Name()) |
| 146 | } |
| 147 | seen[fd.Number()] = true |
| 148 | |
| 149 | // Consume any colon. |
| 150 | if err := p.checkForColon(fd); err != nil { |
| 151 | return err |
| 152 | } |
| 153 | |
| 154 | // Parse into the field. |
| 155 | v := m.Get(fd) |
| 156 | if !m.Has(fd) && (fd.IsList() || fd.IsMap() || fd.Message() != nil) { |
| 157 | v = m.Mutable(fd) |
| 158 | } |
| 159 | if v, err = p.unmarshalValue(v, fd); err != nil { |
| 160 | return err |
| 161 | } |
| 162 | m.Set(fd, v) |
| 163 | |
| 164 | if err := p.consumeOptionalSeparator(); err != nil { |
| 165 | return err |
| 166 | } |
| 167 | } |
| 168 | return nil |
| 169 | } |
| 170 | |
| 171 | func (p *textParser) unmarshalExtensionOrAny(m protoreflect.Message, seen map[protoreflect.FieldNumber]bool) error { |
| 172 | name, err := p.consumeExtensionOrAnyName() |
| 173 | if err != nil { |
| 174 | return err |
| 175 | } |
| 176 | |
| 177 | // If it contains a slash, it's an Any type URL. |
| 178 | if slashIdx := strings.LastIndex(name, "/"); slashIdx >= 0 { |
| 179 | tok := p.next() |
| 180 | if tok.err != nil { |
| 181 | return tok.err |
| 182 | } |
| 183 | // consume an optional colon |
| 184 | if tok.value == ":" { |
| 185 | tok = p.next() |
| 186 | if tok.err != nil { |
| 187 | return tok.err |
| 188 | } |
| 189 | } |
| 190 | |
| 191 | var terminator string |
| 192 | switch tok.value { |
| 193 | case "<": |
| 194 | terminator = ">" |
| 195 | case "{": |
| 196 | terminator = "}" |
| 197 | default: |
| 198 | return p.errorf("expected '{' or '<', found %q", tok.value) |
| 199 | } |
| 200 | |
| 201 | mt, err := protoregistry.GlobalTypes.FindMessageByURL(name) |
| 202 | if err != nil { |
| 203 | return p.errorf("unrecognized message %q in google.protobuf.Any", name[slashIdx+len("/"):]) |
| 204 | } |
| 205 | m2 := mt.New() |
| 206 | if err := p.unmarshalMessage(m2, terminator); err != nil { |
| 207 | return err |
| 208 | } |
| 209 | b, err := protoV2.Marshal(m2.Interface()) |
| 210 | if err != nil { |
| 211 | return p.errorf("failed to marshal message of type %q: %v", name[slashIdx+len("/"):], err) |
| 212 | } |
| 213 | |
| 214 | urlFD := m.Descriptor().Fields().ByName("type_url") |
| 215 | valFD := m.Descriptor().Fields().ByName("value") |
| 216 | if seen[urlFD.Number()] { |
| 217 | return p.errorf("Any message unpacked multiple times, or %q already set", urlFD.Name()) |
| 218 | } |
| 219 | if seen[valFD.Number()] { |
| 220 | return p.errorf("Any message unpacked multiple times, or %q already set", valFD.Name()) |
| 221 | } |
| 222 | m.Set(urlFD, protoreflect.ValueOfString(name)) |
| 223 | m.Set(valFD, protoreflect.ValueOfBytes(b)) |
| 224 | seen[urlFD.Number()] = true |
| 225 | seen[valFD.Number()] = true |
| 226 | return nil |
| 227 | } |
| 228 | |
| 229 | xname := protoreflect.FullName(name) |
| 230 | xt, _ := protoregistry.GlobalTypes.FindExtensionByName(xname) |
| 231 | if xt == nil && isMessageSet(m.Descriptor()) { |
| 232 | xt, _ = protoregistry.GlobalTypes.FindExtensionByName(xname.Append("message_set_extension")) |
| 233 | } |
| 234 | if xt == nil { |
| 235 | return p.errorf("unrecognized extension %q", name) |
| 236 | } |
| 237 | fd := xt.TypeDescriptor() |
| 238 | if fd.ContainingMessage().FullName() != m.Descriptor().FullName() { |
| 239 | return p.errorf("extension field %q does not extend message %q", name, m.Descriptor().FullName()) |
| 240 | } |
| 241 | |
| 242 | if err := p.checkForColon(fd); err != nil { |
| 243 | return err |
| 244 | } |
| 245 | |
| 246 | v := m.Get(fd) |
| 247 | if !m.Has(fd) && (fd.IsList() || fd.IsMap() || fd.Message() != nil) { |
| 248 | v = m.Mutable(fd) |
| 249 | } |
| 250 | v, err = p.unmarshalValue(v, fd) |
| 251 | if err != nil { |
| 252 | return err |
| 253 | } |
| 254 | m.Set(fd, v) |
| 255 | return p.consumeOptionalSeparator() |
| 256 | } |
| 257 | |
| 258 | func (p *textParser) unmarshalValue(v protoreflect.Value, fd protoreflect.FieldDescriptor) (protoreflect.Value, error) { |
| 259 | tok := p.next() |
| 260 | if tok.err != nil { |
| 261 | return v, tok.err |
| 262 | } |
| 263 | if tok.value == "" { |
| 264 | return v, p.errorf("unexpected EOF") |
| 265 | } |
| 266 | |
| 267 | switch { |
| 268 | case fd.IsList(): |
| 269 | lv := v.List() |
| 270 | var err error |
| 271 | if tok.value == "[" { |
| 272 | // Repeated field with list notation, like [1,2,3]. |
| 273 | for { |
| 274 | vv := lv.NewElement() |
| 275 | vv, err = p.unmarshalSingularValue(vv, fd) |
| 276 | if err != nil { |
| 277 | return v, err |
| 278 | } |
| 279 | lv.Append(vv) |
| 280 | |
| 281 | tok := p.next() |
| 282 | if tok.err != nil { |
| 283 | return v, tok.err |
| 284 | } |
| 285 | if tok.value == "]" { |
| 286 | break |
| 287 | } |
| 288 | if tok.value != "," { |
| 289 | return v, p.errorf("Expected ']' or ',' found %q", tok.value) |
| 290 | } |
| 291 | } |
| 292 | return v, nil |
| 293 | } |
| 294 | |
| 295 | // One value of the repeated field. |
| 296 | p.back() |
| 297 | vv := lv.NewElement() |
| 298 | vv, err = p.unmarshalSingularValue(vv, fd) |
| 299 | if err != nil { |
| 300 | return v, err |
| 301 | } |
| 302 | lv.Append(vv) |
| 303 | return v, nil |
| 304 | case fd.IsMap(): |
| 305 | // The map entry should be this sequence of tokens: |
| 306 | // < key : KEY value : VALUE > |
| 307 | // However, implementations may omit key or value, and technically |
| 308 | // we should support them in any order. |
| 309 | var terminator string |
| 310 | switch tok.value { |
| 311 | case "<": |
| 312 | terminator = ">" |
| 313 | case "{": |
| 314 | terminator = "}" |
| 315 | default: |
| 316 | return v, p.errorf("expected '{' or '<', found %q", tok.value) |
| 317 | } |
| 318 | |
| 319 | keyFD := fd.MapKey() |
| 320 | valFD := fd.MapValue() |
| 321 | |
| 322 | mv := v.Map() |
| 323 | kv := keyFD.Default() |
| 324 | vv := mv.NewValue() |
| 325 | for { |
| 326 | tok := p.next() |
| 327 | if tok.err != nil { |
| 328 | return v, tok.err |
| 329 | } |
| 330 | if tok.value == terminator { |
| 331 | break |
| 332 | } |
| 333 | var err error |
| 334 | switch tok.value { |
| 335 | case "key": |
| 336 | if err := p.consumeToken(":"); err != nil { |
| 337 | return v, err |
| 338 | } |
| 339 | if kv, err = p.unmarshalSingularValue(kv, keyFD); err != nil { |
| 340 | return v, err |
| 341 | } |
| 342 | if err := p.consumeOptionalSeparator(); err != nil { |
| 343 | return v, err |
| 344 | } |
| 345 | case "value": |
| 346 | if err := p.checkForColon(valFD); err != nil { |
| 347 | return v, err |
| 348 | } |
| 349 | if vv, err = p.unmarshalSingularValue(vv, valFD); err != nil { |
| 350 | return v, err |
| 351 | } |
| 352 | if err := p.consumeOptionalSeparator(); err != nil { |
| 353 | return v, err |
| 354 | } |
| 355 | default: |
| 356 | p.back() |
| 357 | return v, p.errorf(`expected "key", "value", or %q, found %q`, terminator, tok.value) |
| 358 | } |
| 359 | } |
| 360 | mv.Set(kv.MapKey(), vv) |
| 361 | return v, nil |
| 362 | default: |
| 363 | p.back() |
| 364 | return p.unmarshalSingularValue(v, fd) |
| 365 | } |
| 366 | } |
| 367 | |
| 368 | func (p *textParser) unmarshalSingularValue(v protoreflect.Value, fd protoreflect.FieldDescriptor) (protoreflect.Value, error) { |
| 369 | tok := p.next() |
| 370 | if tok.err != nil { |
| 371 | return v, tok.err |
| 372 | } |
| 373 | if tok.value == "" { |
| 374 | return v, p.errorf("unexpected EOF") |
| 375 | } |
| 376 | |
| 377 | switch fd.Kind() { |
| 378 | case protoreflect.BoolKind: |
| 379 | switch tok.value { |
| 380 | case "true", "1", "t", "True": |
| 381 | return protoreflect.ValueOfBool(true), nil |
| 382 | case "false", "0", "f", "False": |
| 383 | return protoreflect.ValueOfBool(false), nil |
| 384 | } |
| 385 | case protoreflect.Int32Kind, protoreflect.Sint32Kind, protoreflect.Sfixed32Kind: |
| 386 | if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil { |
| 387 | return protoreflect.ValueOfInt32(int32(x)), nil |
| 388 | } |
| 389 | |
| 390 | // The C++ parser accepts large positive hex numbers that uses |
| 391 | // two's complement arithmetic to represent negative numbers. |
| 392 | // This feature is here for backwards compatibility with C++. |
| 393 | if strings.HasPrefix(tok.value, "0x") { |
| 394 | if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil { |
| 395 | return protoreflect.ValueOfInt32(int32(-(int64(^x) + 1))), nil |
| 396 | } |
| 397 | } |
| 398 | case protoreflect.Int64Kind, protoreflect.Sint64Kind, protoreflect.Sfixed64Kind: |
| 399 | if x, err := strconv.ParseInt(tok.value, 0, 64); err == nil { |
| 400 | return protoreflect.ValueOfInt64(int64(x)), nil |
| 401 | } |
| 402 | |
| 403 | // The C++ parser accepts large positive hex numbers that uses |
| 404 | // two's complement arithmetic to represent negative numbers. |
| 405 | // This feature is here for backwards compatibility with C++. |
| 406 | if strings.HasPrefix(tok.value, "0x") { |
| 407 | if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil { |
| 408 | return protoreflect.ValueOfInt64(int64(-(int64(^x) + 1))), nil |
| 409 | } |
| 410 | } |
| 411 | case protoreflect.Uint32Kind, protoreflect.Fixed32Kind: |
| 412 | if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil { |
| 413 | return protoreflect.ValueOfUint32(uint32(x)), nil |
| 414 | } |
| 415 | case protoreflect.Uint64Kind, protoreflect.Fixed64Kind: |
| 416 | if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil { |
| 417 | return protoreflect.ValueOfUint64(uint64(x)), nil |
| 418 | } |
| 419 | case protoreflect.FloatKind: |
| 420 | // Ignore 'f' for compatibility with output generated by C++, |
| 421 | // but don't remove 'f' when the value is "-inf" or "inf". |
| 422 | v := tok.value |
| 423 | if strings.HasSuffix(v, "f") && v != "-inf" && v != "inf" { |
| 424 | v = v[:len(v)-len("f")] |
| 425 | } |
| 426 | if x, err := strconv.ParseFloat(v, 32); err == nil { |
| 427 | return protoreflect.ValueOfFloat32(float32(x)), nil |
| 428 | } |
| 429 | case protoreflect.DoubleKind: |
| 430 | // Ignore 'f' for compatibility with output generated by C++, |
| 431 | // but don't remove 'f' when the value is "-inf" or "inf". |
| 432 | v := tok.value |
| 433 | if strings.HasSuffix(v, "f") && v != "-inf" && v != "inf" { |
| 434 | v = v[:len(v)-len("f")] |
| 435 | } |
| 436 | if x, err := strconv.ParseFloat(v, 64); err == nil { |
| 437 | return protoreflect.ValueOfFloat64(float64(x)), nil |
| 438 | } |
| 439 | case protoreflect.StringKind: |
| 440 | if isQuote(tok.value[0]) { |
| 441 | return protoreflect.ValueOfString(tok.unquoted), nil |
| 442 | } |
| 443 | case protoreflect.BytesKind: |
| 444 | if isQuote(tok.value[0]) { |
| 445 | return protoreflect.ValueOfBytes([]byte(tok.unquoted)), nil |
| 446 | } |
| 447 | case protoreflect.EnumKind: |
| 448 | if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil { |
| 449 | return protoreflect.ValueOfEnum(protoreflect.EnumNumber(x)), nil |
| 450 | } |
| 451 | vd := fd.Enum().Values().ByName(protoreflect.Name(tok.value)) |
| 452 | if vd != nil { |
| 453 | return protoreflect.ValueOfEnum(vd.Number()), nil |
| 454 | } |
| 455 | case protoreflect.MessageKind, protoreflect.GroupKind: |
| 456 | var terminator string |
| 457 | switch tok.value { |
| 458 | case "{": |
| 459 | terminator = "}" |
| 460 | case "<": |
| 461 | terminator = ">" |
| 462 | default: |
| 463 | return v, p.errorf("expected '{' or '<', found %q", tok.value) |
| 464 | } |
| 465 | err := p.unmarshalMessage(v.Message(), terminator) |
| 466 | return v, err |
| 467 | default: |
| 468 | panic(fmt.Sprintf("invalid kind %v", fd.Kind())) |
| 469 | } |
| 470 | return v, p.errorf("invalid %v: %v", fd.Kind(), tok.value) |
| 471 | } |
| 472 | |
| 473 | // Consume a ':' from the input stream (if the next token is a colon), |
| 474 | // returning an error if a colon is needed but not present. |
| 475 | func (p *textParser) checkForColon(fd protoreflect.FieldDescriptor) *ParseError { |
| 476 | tok := p.next() |
| 477 | if tok.err != nil { |
| 478 | return tok.err |
| 479 | } |
| 480 | if tok.value != ":" { |
| 481 | if fd.Message() == nil { |
| 482 | return p.errorf("expected ':', found %q", tok.value) |
| 483 | } |
| 484 | p.back() |
| 485 | } |
| 486 | return nil |
| 487 | } |
| 488 | |
| 489 | // consumeExtensionOrAnyName consumes an extension name or an Any type URL and |
| 490 | // the following ']'. It returns the name or URL consumed. |
| 491 | func (p *textParser) consumeExtensionOrAnyName() (string, error) { |
| 492 | tok := p.next() |
| 493 | if tok.err != nil { |
| 494 | return "", tok.err |
| 495 | } |
| 496 | |
| 497 | // If extension name or type url is quoted, it's a single token. |
| 498 | if len(tok.value) > 2 && isQuote(tok.value[0]) && tok.value[len(tok.value)-1] == tok.value[0] { |
| 499 | name, err := unquoteC(tok.value[1:len(tok.value)-1], rune(tok.value[0])) |
| 500 | if err != nil { |
| 501 | return "", err |
| 502 | } |
| 503 | return name, p.consumeToken("]") |
| 504 | } |
| 505 | |
| 506 | // Consume everything up to "]" |
| 507 | var parts []string |
| 508 | for tok.value != "]" { |
| 509 | parts = append(parts, tok.value) |
| 510 | tok = p.next() |
| 511 | if tok.err != nil { |
| 512 | return "", p.errorf("unrecognized type_url or extension name: %s", tok.err) |
| 513 | } |
| 514 | if p.done && tok.value != "]" { |
| 515 | return "", p.errorf("unclosed type_url or extension name") |
| 516 | } |
| 517 | } |
| 518 | return strings.Join(parts, ""), nil |
| 519 | } |
| 520 | |
| 521 | // consumeOptionalSeparator consumes an optional semicolon or comma. |
| 522 | // It is used in unmarshalMessage to provide backward compatibility. |
| 523 | func (p *textParser) consumeOptionalSeparator() error { |
| 524 | tok := p.next() |
| 525 | if tok.err != nil { |
| 526 | return tok.err |
| 527 | } |
| 528 | if tok.value != ";" && tok.value != "," { |
| 529 | p.back() |
| 530 | } |
| 531 | return nil |
| 532 | } |
| 533 | |
| 534 | func (p *textParser) errorf(format string, a ...interface{}) *ParseError { |
| 535 | pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset} |
| 536 | p.cur.err = pe |
| 537 | p.done = true |
| 538 | return pe |
| 539 | } |
| 540 | |
| 541 | func (p *textParser) skipWhitespace() { |
| 542 | i := 0 |
| 543 | for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') { |
| 544 | if p.s[i] == '#' { |
| 545 | // comment; skip to end of line or input |
| 546 | for i < len(p.s) && p.s[i] != '\n' { |
| 547 | i++ |
| 548 | } |
| 549 | if i == len(p.s) { |
| 550 | break |
| 551 | } |
| 552 | } |
| 553 | if p.s[i] == '\n' { |
| 554 | p.line++ |
| 555 | } |
| 556 | i++ |
| 557 | } |
| 558 | p.offset += i |
| 559 | p.s = p.s[i:len(p.s)] |
| 560 | if len(p.s) == 0 { |
| 561 | p.done = true |
| 562 | } |
| 563 | } |
| 564 | |
| 565 | func (p *textParser) advance() { |
| 566 | // Skip whitespace |
| 567 | p.skipWhitespace() |
| 568 | if p.done { |
| 569 | return |
| 570 | } |
| 571 | |
| 572 | // Start of non-whitespace |
| 573 | p.cur.err = nil |
| 574 | p.cur.offset, p.cur.line = p.offset, p.line |
| 575 | p.cur.unquoted = "" |
| 576 | switch p.s[0] { |
| 577 | case '<', '>', '{', '}', ':', '[', ']', ';', ',', '/': |
| 578 | // Single symbol |
| 579 | p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)] |
| 580 | case '"', '\'': |
| 581 | // Quoted string |
| 582 | i := 1 |
| 583 | for i < len(p.s) && p.s[i] != p.s[0] && p.s[i] != '\n' { |
| 584 | if p.s[i] == '\\' && i+1 < len(p.s) { |
| 585 | // skip escaped char |
| 586 | i++ |
| 587 | } |
| 588 | i++ |
| 589 | } |
| 590 | if i >= len(p.s) || p.s[i] != p.s[0] { |
| 591 | p.errorf("unmatched quote") |
| 592 | return |
| 593 | } |
| 594 | unq, err := unquoteC(p.s[1:i], rune(p.s[0])) |
| 595 | if err != nil { |
| 596 | p.errorf("invalid quoted string %s: %v", p.s[0:i+1], err) |
| 597 | return |
| 598 | } |
| 599 | p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)] |
| 600 | p.cur.unquoted = unq |
| 601 | default: |
| 602 | i := 0 |
| 603 | for i < len(p.s) && isIdentOrNumberChar(p.s[i]) { |
| 604 | i++ |
| 605 | } |
| 606 | if i == 0 { |
| 607 | p.errorf("unexpected byte %#x", p.s[0]) |
| 608 | return |
| 609 | } |
| 610 | p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)] |
| 611 | } |
| 612 | p.offset += len(p.cur.value) |
| 613 | } |
| 614 | |
| 615 | // Back off the parser by one token. Can only be done between calls to next(). |
| 616 | // It makes the next advance() a no-op. |
| 617 | func (p *textParser) back() { p.backed = true } |
| 618 | |
| 619 | // Advances the parser and returns the new current token. |
| 620 | func (p *textParser) next() *token { |
| 621 | if p.backed || p.done { |
| 622 | p.backed = false |
| 623 | return &p.cur |
| 624 | } |
| 625 | p.advance() |
| 626 | if p.done { |
| 627 | p.cur.value = "" |
| 628 | } else if len(p.cur.value) > 0 && isQuote(p.cur.value[0]) { |
| 629 | // Look for multiple quoted strings separated by whitespace, |
| 630 | // and concatenate them. |
| 631 | cat := p.cur |
| 632 | for { |
| 633 | p.skipWhitespace() |
| 634 | if p.done || !isQuote(p.s[0]) { |
| 635 | break |
| 636 | } |
| 637 | p.advance() |
| 638 | if p.cur.err != nil { |
| 639 | return &p.cur |
| 640 | } |
| 641 | cat.value += " " + p.cur.value |
| 642 | cat.unquoted += p.cur.unquoted |
| 643 | } |
| 644 | p.done = false // parser may have seen EOF, but we want to return cat |
| 645 | p.cur = cat |
| 646 | } |
| 647 | return &p.cur |
| 648 | } |
| 649 | |
| 650 | func (p *textParser) consumeToken(s string) error { |
| 651 | tok := p.next() |
| 652 | if tok.err != nil { |
| 653 | return tok.err |
| 654 | } |
| 655 | if tok.value != s { |
| 656 | p.back() |
| 657 | return p.errorf("expected %q, found %q", s, tok.value) |
| 658 | } |
| 659 | return nil |
| 660 | } |
| 661 | |
| 662 | var errBadUTF8 = errors.New("proto: bad UTF-8") |
| 663 | |
| 664 | func unquoteC(s string, quote rune) (string, error) { |
| 665 | // This is based on C++'s tokenizer.cc. |
| 666 | // Despite its name, this is *not* parsing C syntax. |
| 667 | // For instance, "\0" is an invalid quoted string. |
| 668 | |
| 669 | // Avoid allocation in trivial cases. |
| 670 | simple := true |
| 671 | for _, r := range s { |
| 672 | if r == '\\' || r == quote { |
| 673 | simple = false |
| 674 | break |
| 675 | } |
| 676 | } |
| 677 | if simple { |
| 678 | return s, nil |
| 679 | } |
| 680 | |
| 681 | buf := make([]byte, 0, 3*len(s)/2) |
| 682 | for len(s) > 0 { |
| 683 | r, n := utf8.DecodeRuneInString(s) |
| 684 | if r == utf8.RuneError && n == 1 { |
| 685 | return "", errBadUTF8 |
| 686 | } |
| 687 | s = s[n:] |
| 688 | if r != '\\' { |
| 689 | if r < utf8.RuneSelf { |
| 690 | buf = append(buf, byte(r)) |
| 691 | } else { |
| 692 | buf = append(buf, string(r)...) |
| 693 | } |
| 694 | continue |
| 695 | } |
| 696 | |
| 697 | ch, tail, err := unescape(s) |
| 698 | if err != nil { |
| 699 | return "", err |
| 700 | } |
| 701 | buf = append(buf, ch...) |
| 702 | s = tail |
| 703 | } |
| 704 | return string(buf), nil |
| 705 | } |
| 706 | |
| 707 | func unescape(s string) (ch string, tail string, err error) { |
| 708 | r, n := utf8.DecodeRuneInString(s) |
| 709 | if r == utf8.RuneError && n == 1 { |
| 710 | return "", "", errBadUTF8 |
| 711 | } |
| 712 | s = s[n:] |
| 713 | switch r { |
| 714 | case 'a': |
| 715 | return "\a", s, nil |
| 716 | case 'b': |
| 717 | return "\b", s, nil |
| 718 | case 'f': |
| 719 | return "\f", s, nil |
| 720 | case 'n': |
| 721 | return "\n", s, nil |
| 722 | case 'r': |
| 723 | return "\r", s, nil |
| 724 | case 't': |
| 725 | return "\t", s, nil |
| 726 | case 'v': |
| 727 | return "\v", s, nil |
| 728 | case '?': |
| 729 | return "?", s, nil // trigraph workaround |
| 730 | case '\'', '"', '\\': |
| 731 | return string(r), s, nil |
| 732 | case '0', '1', '2', '3', '4', '5', '6', '7': |
| 733 | if len(s) < 2 { |
| 734 | return "", "", fmt.Errorf(`\%c requires 2 following digits`, r) |
| 735 | } |
| 736 | ss := string(r) + s[:2] |
| 737 | s = s[2:] |
| 738 | i, err := strconv.ParseUint(ss, 8, 8) |
| 739 | if err != nil { |
| 740 | return "", "", fmt.Errorf(`\%s contains non-octal digits`, ss) |
| 741 | } |
| 742 | return string([]byte{byte(i)}), s, nil |
| 743 | case 'x', 'X', 'u', 'U': |
| 744 | var n int |
| 745 | switch r { |
| 746 | case 'x', 'X': |
| 747 | n = 2 |
| 748 | case 'u': |
| 749 | n = 4 |
| 750 | case 'U': |
| 751 | n = 8 |
| 752 | } |
| 753 | if len(s) < n { |
| 754 | return "", "", fmt.Errorf(`\%c requires %d following digits`, r, n) |
| 755 | } |
| 756 | ss := s[:n] |
| 757 | s = s[n:] |
| 758 | i, err := strconv.ParseUint(ss, 16, 64) |
| 759 | if err != nil { |
| 760 | return "", "", fmt.Errorf(`\%c%s contains non-hexadecimal digits`, r, ss) |
| 761 | } |
| 762 | if r == 'x' || r == 'X' { |
| 763 | return string([]byte{byte(i)}), s, nil |
| 764 | } |
| 765 | if i > utf8.MaxRune { |
| 766 | return "", "", fmt.Errorf(`\%c%s is not a valid Unicode code point`, r, ss) |
| 767 | } |
David K. Bainbridge | c415efe | 2021-08-19 13:05:21 +0000 | [diff] [blame] | 768 | return string(rune(i)), s, nil |
amit.ghosh | 258d14c | 2020-10-02 15:13:38 +0200 | [diff] [blame] | 769 | } |
| 770 | return "", "", fmt.Errorf(`unknown escape \%c`, r) |
| 771 | } |
| 772 | |
| 773 | func isIdentOrNumberChar(c byte) bool { |
| 774 | switch { |
| 775 | case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z': |
| 776 | return true |
| 777 | case '0' <= c && c <= '9': |
| 778 | return true |
| 779 | } |
| 780 | switch c { |
| 781 | case '-', '+', '.', '_': |
| 782 | return true |
| 783 | } |
| 784 | return false |
| 785 | } |
| 786 | |
| 787 | func isWhitespace(c byte) bool { |
| 788 | switch c { |
| 789 | case ' ', '\t', '\n', '\r': |
| 790 | return true |
| 791 | } |
| 792 | return false |
| 793 | } |
| 794 | |
| 795 | func isQuote(c byte) bool { |
| 796 | switch c { |
| 797 | case '"', '\'': |
| 798 | return true |
| 799 | } |
| 800 | return false |
| 801 | } |