blob: 5784d3efd9e8a3335b6a5526ac828f245bc7d369 [file] [log] [blame]
Zack Williamse940c7a2019-08-21 14:25:39 -07001package dynamic
2
3// Marshalling and unmarshalling of dynamic messages to/from proto's standard text format
4
5import (
6 "bytes"
7 "fmt"
8 "io"
9 "math"
10 "reflect"
11 "sort"
12 "strconv"
13 "strings"
14 "text/scanner"
15 "unicode"
16
17 "github.com/golang/protobuf/proto"
18 "github.com/golang/protobuf/protoc-gen-go/descriptor"
19
Scott Baker4a35a702019-11-26 08:17:33 -080020 "github.com/jhump/protoreflect/codec"
Zack Williamse940c7a2019-08-21 14:25:39 -070021 "github.com/jhump/protoreflect/desc"
22)
23
24// MarshalText serializes this message to bytes in the standard text format,
25// returning an error if the operation fails. The resulting bytes will be a
26// valid UTF8 string.
27//
28// This method uses a compact form: no newlines, and spaces between field
29// identifiers and values are elided.
30func (m *Message) MarshalText() ([]byte, error) {
31 var b indentBuffer
32 b.indentCount = -1 // no indentation
33 if err := m.marshalText(&b); err != nil {
34 return nil, err
35 }
36 return b.Bytes(), nil
37}
38
39// MarshalTextIndent serializes this message to bytes in the standard text
40// format, returning an error if the operation fails. The resulting bytes will
41// be a valid UTF8 string.
42//
43// This method uses a "pretty-printed" form, with each field on its own line and
44// spaces between field identifiers and values.
45func (m *Message) MarshalTextIndent() ([]byte, error) {
46 var b indentBuffer
47 b.indent = " " // TODO: option for indent?
48 if err := m.marshalText(&b); err != nil {
49 return nil, err
50 }
51 return b.Bytes(), nil
52}
53
54func (m *Message) marshalText(b *indentBuffer) error {
55 // TODO: option for emitting extended Any format?
56 first := true
57 // first the known fields
58 for _, tag := range m.knownFieldTags() {
59 itag := int32(tag)
60 v := m.values[itag]
61 fd := m.FindFieldDescriptor(itag)
62 if fd.IsMap() {
63 md := fd.GetMessageType()
64 kfd := md.FindFieldByNumber(1)
65 vfd := md.FindFieldByNumber(2)
66 mp := v.(map[interface{}]interface{})
67 keys := make([]interface{}, 0, len(mp))
68 for k := range mp {
69 keys = append(keys, k)
70 }
71 sort.Sort(sortable(keys))
72 for _, mk := range keys {
73 mv := mp[mk]
74 err := b.maybeNext(&first)
75 if err != nil {
76 return err
77 }
78 err = marshalKnownFieldMapEntryText(b, fd, kfd, mk, vfd, mv)
79 if err != nil {
80 return err
81 }
82 }
83 } else if fd.IsRepeated() {
84 sl := v.([]interface{})
85 for _, slv := range sl {
86 err := b.maybeNext(&first)
87 if err != nil {
88 return err
89 }
90 err = marshalKnownFieldText(b, fd, slv)
91 if err != nil {
92 return err
93 }
94 }
95 } else {
96 err := b.maybeNext(&first)
97 if err != nil {
98 return err
99 }
100 err = marshalKnownFieldText(b, fd, v)
101 if err != nil {
102 return err
103 }
104 }
105 }
106 // then the unknown fields
107 for _, tag := range m.unknownFieldTags() {
108 itag := int32(tag)
109 ufs := m.unknownFields[itag]
110 for _, uf := range ufs {
111 err := b.maybeNext(&first)
112 if err != nil {
113 return err
114 }
115 _, err = fmt.Fprintf(b, "%d", tag)
116 if err != nil {
117 return err
118 }
119 if uf.Encoding == proto.WireStartGroup {
120 err = b.WriteByte('{')
121 if err != nil {
122 return err
123 }
124 err = b.start()
125 if err != nil {
126 return err
127 }
Scott Baker4a35a702019-11-26 08:17:33 -0800128 in := codec.NewBuffer(uf.Contents)
Zack Williamse940c7a2019-08-21 14:25:39 -0700129 err = marshalUnknownGroupText(b, in, true)
130 if err != nil {
131 return err
132 }
133 err = b.end()
134 if err != nil {
135 return err
136 }
137 err = b.WriteByte('}')
138 if err != nil {
139 return err
140 }
141 } else {
142 err = b.sep()
143 if err != nil {
144 return err
145 }
146 if uf.Encoding == proto.WireBytes {
147 err = writeString(b, string(uf.Contents))
148 if err != nil {
149 return err
150 }
151 } else {
152 _, err = b.WriteString(strconv.FormatUint(uf.Value, 10))
153 if err != nil {
154 return err
155 }
156 }
157 }
158 }
159 }
160 return nil
161}
162
163func marshalKnownFieldMapEntryText(b *indentBuffer, fd *desc.FieldDescriptor, kfd *desc.FieldDescriptor, mk interface{}, vfd *desc.FieldDescriptor, mv interface{}) error {
164 var name string
165 if fd.IsExtension() {
166 name = fmt.Sprintf("[%s]", fd.GetFullyQualifiedName())
167 } else {
168 name = fd.GetName()
169 }
170 _, err := b.WriteString(name)
171 if err != nil {
172 return err
173 }
174 err = b.sep()
175 if err != nil {
176 return err
177 }
178
179 err = b.WriteByte('<')
180 if err != nil {
181 return err
182 }
183 err = b.start()
184 if err != nil {
185 return err
186 }
187
188 err = marshalKnownFieldText(b, kfd, mk)
189 if err != nil {
190 return err
191 }
192 err = b.next()
193 if err != nil {
194 return err
195 }
Joey Armstrong903c69d2024-02-01 19:46:39 -0500196 if !isNil(mv) {
197 err = marshalKnownFieldText(b, vfd, mv)
198 if err != nil {
199 return err
200 }
Zack Williamse940c7a2019-08-21 14:25:39 -0700201 }
202
203 err = b.end()
204 if err != nil {
205 return err
206 }
207 return b.WriteByte('>')
208}
209
210func marshalKnownFieldText(b *indentBuffer, fd *desc.FieldDescriptor, v interface{}) error {
211 group := fd.GetType() == descriptor.FieldDescriptorProto_TYPE_GROUP
212 if group {
213 var name string
214 if fd.IsExtension() {
215 name = fmt.Sprintf("[%s]", fd.GetMessageType().GetFullyQualifiedName())
216 } else {
217 name = fd.GetMessageType().GetName()
218 }
219 _, err := b.WriteString(name)
220 if err != nil {
221 return err
222 }
223 } else {
224 var name string
225 if fd.IsExtension() {
226 name = fmt.Sprintf("[%s]", fd.GetFullyQualifiedName())
227 } else {
228 name = fd.GetName()
229 }
230 _, err := b.WriteString(name)
231 if err != nil {
232 return err
233 }
234 err = b.sep()
235 if err != nil {
236 return err
237 }
238 }
239 rv := reflect.ValueOf(v)
240 switch rv.Kind() {
241 case reflect.Int32, reflect.Int64:
242 ed := fd.GetEnumType()
243 if ed != nil {
244 n := int32(rv.Int())
245 vd := ed.FindValueByNumber(n)
246 if vd == nil {
247 _, err := b.WriteString(strconv.FormatInt(rv.Int(), 10))
248 return err
249 } else {
250 _, err := b.WriteString(vd.GetName())
251 return err
252 }
253 } else {
254 _, err := b.WriteString(strconv.FormatInt(rv.Int(), 10))
255 return err
256 }
257 case reflect.Uint32, reflect.Uint64:
258 _, err := b.WriteString(strconv.FormatUint(rv.Uint(), 10))
259 return err
260 case reflect.Float32, reflect.Float64:
261 f := rv.Float()
262 var str string
263 if math.IsNaN(f) {
264 str = "nan"
265 } else if math.IsInf(f, 1) {
266 str = "inf"
267 } else if math.IsInf(f, -1) {
268 str = "-inf"
269 } else {
270 var bits int
271 if rv.Kind() == reflect.Float32 {
272 bits = 32
273 } else {
274 bits = 64
275 }
276 str = strconv.FormatFloat(rv.Float(), 'g', -1, bits)
277 }
278 _, err := b.WriteString(str)
279 return err
280 case reflect.Bool:
281 _, err := b.WriteString(strconv.FormatBool(rv.Bool()))
282 return err
283 case reflect.Slice:
284 return writeString(b, string(rv.Bytes()))
285 case reflect.String:
286 return writeString(b, rv.String())
287 default:
288 var err error
289 if group {
290 err = b.WriteByte('{')
291 } else {
292 err = b.WriteByte('<')
293 }
294 if err != nil {
295 return err
296 }
297 err = b.start()
298 if err != nil {
299 return err
300 }
301 // must be a message
302 if dm, ok := v.(*Message); ok {
303 err = dm.marshalText(b)
304 if err != nil {
305 return err
306 }
307 } else {
308 err = proto.CompactText(b, v.(proto.Message))
309 if err != nil {
310 return err
311 }
312 }
313 err = b.end()
314 if err != nil {
315 return err
316 }
317 if group {
318 return b.WriteByte('}')
319 } else {
320 return b.WriteByte('>')
321 }
322 }
323}
324
325// writeString writes a string in the protocol buffer text format.
326// It is similar to strconv.Quote except we don't use Go escape sequences,
327// we treat the string as a byte sequence, and we use octal escapes.
328// These differences are to maintain interoperability with the other
329// languages' implementations of the text format.
330func writeString(b *indentBuffer, s string) error {
331 // use WriteByte here to get any needed indent
332 if err := b.WriteByte('"'); err != nil {
333 return err
334 }
335 // Loop over the bytes, not the runes.
336 for i := 0; i < len(s); i++ {
337 var err error
338 // Divergence from C++: we don't escape apostrophes.
339 // There's no need to escape them, and the C++ parser
340 // copes with a naked apostrophe.
341 switch c := s[i]; c {
342 case '\n':
343 _, err = b.WriteString("\\n")
344 case '\r':
345 _, err = b.WriteString("\\r")
346 case '\t':
347 _, err = b.WriteString("\\t")
348 case '"':
Joey Armstrong903c69d2024-02-01 19:46:39 -0500349 _, err = b.WriteString("\\\"")
Zack Williamse940c7a2019-08-21 14:25:39 -0700350 case '\\':
351 _, err = b.WriteString("\\\\")
352 default:
353 if c >= 0x20 && c < 0x7f {
354 err = b.WriteByte(c)
355 } else {
356 _, err = fmt.Fprintf(b, "\\%03o", c)
357 }
358 }
359 if err != nil {
360 return err
361 }
362 }
363 return b.WriteByte('"')
364}
365
Scott Baker4a35a702019-11-26 08:17:33 -0800366func marshalUnknownGroupText(b *indentBuffer, in *codec.Buffer, topLevel bool) error {
Zack Williamse940c7a2019-08-21 14:25:39 -0700367 first := true
368 for {
Scott Baker4a35a702019-11-26 08:17:33 -0800369 if in.EOF() {
Zack Williamse940c7a2019-08-21 14:25:39 -0700370 if topLevel {
371 return nil
372 }
373 // this is a nested message: we are expecting an end-group tag, not EOF!
374 return io.ErrUnexpectedEOF
375 }
Scott Baker4a35a702019-11-26 08:17:33 -0800376 tag, wireType, err := in.DecodeTagAndWireType()
Zack Williamse940c7a2019-08-21 14:25:39 -0700377 if err != nil {
378 return err
379 }
380 if wireType == proto.WireEndGroup {
381 return nil
382 }
383 err = b.maybeNext(&first)
384 if err != nil {
385 return err
386 }
387 _, err = fmt.Fprintf(b, "%d", tag)
388 if err != nil {
389 return err
390 }
391 if wireType == proto.WireStartGroup {
392 err = b.WriteByte('{')
393 if err != nil {
394 return err
395 }
396 err = b.start()
397 if err != nil {
398 return err
399 }
400 err = marshalUnknownGroupText(b, in, false)
401 if err != nil {
402 return err
403 }
404 err = b.end()
405 if err != nil {
406 return err
407 }
408 err = b.WriteByte('}')
409 if err != nil {
410 return err
411 }
412 continue
413 } else {
414 err = b.sep()
415 if err != nil {
416 return err
417 }
418 if wireType == proto.WireBytes {
Scott Baker4a35a702019-11-26 08:17:33 -0800419 contents, err := in.DecodeRawBytes(false)
Zack Williamse940c7a2019-08-21 14:25:39 -0700420 if err != nil {
421 return err
422 }
423 err = writeString(b, string(contents))
424 if err != nil {
425 return err
426 }
427 } else {
428 var v uint64
429 switch wireType {
430 case proto.WireVarint:
Scott Baker4a35a702019-11-26 08:17:33 -0800431 v, err = in.DecodeVarint()
Zack Williamse940c7a2019-08-21 14:25:39 -0700432 case proto.WireFixed32:
Scott Baker4a35a702019-11-26 08:17:33 -0800433 v, err = in.DecodeFixed32()
Zack Williamse940c7a2019-08-21 14:25:39 -0700434 case proto.WireFixed64:
Scott Baker4a35a702019-11-26 08:17:33 -0800435 v, err = in.DecodeFixed64()
Zack Williamse940c7a2019-08-21 14:25:39 -0700436 default:
437 return proto.ErrInternalBadWireType
438 }
439 if err != nil {
440 return err
441 }
442 _, err = b.WriteString(strconv.FormatUint(v, 10))
443 if err != nil {
444 return err
445 }
446 }
447 }
448 }
449}
450
451// UnmarshalText de-serializes the message that is present, in text format, in
452// the given bytes into this message. It first resets the current message. It
453// returns an error if the given bytes do not contain a valid encoding of this
454// message type in the standard text format
455func (m *Message) UnmarshalText(text []byte) error {
456 m.Reset()
457 if err := m.UnmarshalMergeText(text); err != nil {
458 return err
459 }
460 return m.Validate()
461}
462
463// UnmarshalMergeText de-serializes the message that is present, in text format,
464// in the given bytes into this message. Unlike UnmarshalText, it does not first
465// reset the message, instead merging the data in the given bytes into the
466// existing data in this message.
467func (m *Message) UnmarshalMergeText(text []byte) error {
468 return m.unmarshalText(newReader(text), tokenEOF)
469}
470
471func (m *Message) unmarshalText(tr *txtReader, end tokenType) error {
472 for {
473 tok := tr.next()
474 if tok.tokTyp == end {
475 return nil
476 }
477 if tok.tokTyp == tokenEOF {
478 return io.ErrUnexpectedEOF
479 }
480 var fd *desc.FieldDescriptor
481 var extendedAnyType *desc.MessageDescriptor
482 if tok.tokTyp == tokenInt {
483 // tag number (indicates unknown field)
484 tag, err := strconv.ParseInt(tok.val.(string), 10, 32)
485 if err != nil {
486 return err
487 }
488 itag := int32(tag)
489 fd = m.FindFieldDescriptor(itag)
490 if fd == nil {
491 // can't parse the value w/out field descriptor, so skip it
492 tok = tr.next()
493 if tok.tokTyp == tokenEOF {
494 return io.ErrUnexpectedEOF
495 } else if tok.tokTyp == tokenOpenBrace {
496 if err := skipMessageText(tr, true); err != nil {
497 return err
498 }
499 } else if tok.tokTyp == tokenColon {
500 if err := skipFieldValueText(tr); err != nil {
501 return err
502 }
503 } else {
504 return textError(tok, "Expecting a colon ':' or brace '{'; instead got %q", tok.txt)
505 }
506 tok = tr.peek()
507 if tok.tokTyp.IsSep() {
508 tr.next() // consume separator
509 }
510 continue
511 }
512 } else {
513 fieldName, err := unmarshalFieldNameText(tr, tok)
514 if err != nil {
515 return err
516 }
517 fd = m.FindFieldDescriptorByName(fieldName)
518 if fd == nil {
519 // See if it's a group name
520 for _, field := range m.md.GetFields() {
521 if field.GetType() == descriptor.FieldDescriptorProto_TYPE_GROUP && field.GetMessageType().GetName() == fieldName {
522 fd = field
523 break
524 }
525 }
526 if fd == nil {
527 // maybe this is an extended Any
528 if m.md.GetFullyQualifiedName() == "google.protobuf.Any" && fieldName[0] == '[' && strings.Contains(fieldName, "/") {
529 // strip surrounding "[" and "]" and extract type name from URL
530 typeUrl := fieldName[1 : len(fieldName)-1]
531 mname := typeUrl
532 if slash := strings.LastIndex(mname, "/"); slash >= 0 {
533 mname = mname[slash+1:]
534 }
535 // TODO: add a way to weave an AnyResolver to this point
536 extendedAnyType = findMessageDescriptor(mname, m.md.GetFile())
537 if extendedAnyType == nil {
538 return textError(tok, "could not parse Any with unknown type URL %q", fieldName)
539 }
540 // field 1 is "type_url"
541 typeUrlField := m.md.FindFieldByNumber(1)
542 if err := m.TrySetField(typeUrlField, typeUrl); err != nil {
543 return err
544 }
545 } else {
546 // TODO: add a flag to just ignore unrecognized field names
547 return textError(tok, "%q is not a recognized field name of %q", fieldName, m.md.GetFullyQualifiedName())
548 }
549 }
550 }
551 }
552 tok = tr.next()
553 if tok.tokTyp == tokenEOF {
554 return io.ErrUnexpectedEOF
555 }
556 if extendedAnyType != nil {
557 // consume optional colon; make sure this is a "start message" token
558 if tok.tokTyp == tokenColon {
559 tok = tr.next()
560 if tok.tokTyp == tokenEOF {
561 return io.ErrUnexpectedEOF
562 }
563 }
564 if tok.tokTyp.EndToken() == tokenError {
565 return textError(tok, "Expecting a '<' or '{'; instead got %q", tok.txt)
566 }
567
568 // TODO: use mf.NewMessage and, if not a dynamic message, use proto.UnmarshalText to unmarshal it
569 g := m.mf.NewDynamicMessage(extendedAnyType)
570 if err := g.unmarshalText(tr, tok.tokTyp.EndToken()); err != nil {
571 return err
572 }
573 // now we marshal the message to bytes and store in the Any
574 b, err := g.Marshal()
575 if err != nil {
576 return err
577 }
578 // field 2 is "value"
579 anyValueField := m.md.FindFieldByNumber(2)
580 if err := m.TrySetField(anyValueField, b); err != nil {
581 return err
582 }
583
584 } else if (fd.GetType() == descriptor.FieldDescriptorProto_TYPE_GROUP ||
585 fd.GetType() == descriptor.FieldDescriptorProto_TYPE_MESSAGE) &&
586 tok.tokTyp.EndToken() != tokenError {
587
588 // TODO: use mf.NewMessage and, if not a dynamic message, use proto.UnmarshalText to unmarshal it
589 g := m.mf.NewDynamicMessage(fd.GetMessageType())
590 if err := g.unmarshalText(tr, tok.tokTyp.EndToken()); err != nil {
591 return err
592 }
593 if fd.IsRepeated() {
594 if err := m.TryAddRepeatedField(fd, g); err != nil {
595 return err
596 }
597 } else {
598 if err := m.TrySetField(fd, g); err != nil {
599 return err
600 }
601 }
602 } else {
603 if tok.tokTyp != tokenColon {
604 return textError(tok, "Expecting a colon ':'; instead got %q", tok.txt)
605 }
606 if err := m.unmarshalFieldValueText(fd, tr); err != nil {
607 return err
608 }
609 }
610 tok = tr.peek()
611 if tok.tokTyp.IsSep() {
612 tr.next() // consume separator
613 }
614 }
615}
616func findMessageDescriptor(name string, fd *desc.FileDescriptor) *desc.MessageDescriptor {
617 md := findMessageInTransitiveDeps(name, fd, map[*desc.FileDescriptor]struct{}{})
618 if md == nil {
619 // couldn't find it; see if we have this message linked in
620 md, _ = desc.LoadMessageDescriptor(name)
621 }
622 return md
623}
624
625func findMessageInTransitiveDeps(name string, fd *desc.FileDescriptor, seen map[*desc.FileDescriptor]struct{}) *desc.MessageDescriptor {
626 if _, ok := seen[fd]; ok {
627 // already checked this file
628 return nil
629 }
630 seen[fd] = struct{}{}
631 md := fd.FindMessage(name)
632 if md != nil {
633 return md
634 }
635 // not in this file so recursively search its deps
636 for _, dep := range fd.GetDependencies() {
637 md = findMessageInTransitiveDeps(name, dep, seen)
638 if md != nil {
639 return md
640 }
641 }
642 // couldn't find it
643 return nil
644}
645
646func textError(tok *token, format string, args ...interface{}) error {
647 var msg string
648 if tok.tokTyp == tokenError {
649 msg = tok.val.(error).Error()
650 } else {
651 msg = fmt.Sprintf(format, args...)
652 }
653 return fmt.Errorf("line %d, col %d: %s", tok.pos.Line, tok.pos.Column, msg)
654}
655
656type setFunction func(*Message, *desc.FieldDescriptor, interface{}) error
657
658func (m *Message) unmarshalFieldValueText(fd *desc.FieldDescriptor, tr *txtReader) error {
659 var set setFunction
660 if fd.IsRepeated() {
661 set = (*Message).addRepeatedField
662 } else {
663 set = mergeField
664 }
665 tok := tr.peek()
666 if tok.tokTyp == tokenOpenBracket {
667 tr.next() // consume tok
668 for {
669 if err := m.unmarshalFieldElementText(fd, tr, set); err != nil {
670 return err
671 }
672 tok = tr.peek()
673 if tok.tokTyp == tokenCloseBracket {
674 tr.next() // consume tok
675 return nil
676 } else if tok.tokTyp.IsSep() {
677 tr.next() // consume separator
678 }
679 }
680 }
681 return m.unmarshalFieldElementText(fd, tr, set)
682}
683
684func (m *Message) unmarshalFieldElementText(fd *desc.FieldDescriptor, tr *txtReader, set setFunction) error {
685 tok := tr.next()
686 if tok.tokTyp == tokenEOF {
687 return io.ErrUnexpectedEOF
688 }
689
690 var expected string
691 switch fd.GetType() {
692 case descriptor.FieldDescriptorProto_TYPE_BOOL:
693 if tok.tokTyp == tokenIdent {
694 if tok.val.(string) == "true" {
695 return set(m, fd, true)
696 } else if tok.val.(string) == "false" {
697 return set(m, fd, false)
698 }
699 }
700 expected = "boolean value"
701 case descriptor.FieldDescriptorProto_TYPE_BYTES:
702 if tok.tokTyp == tokenString {
703 return set(m, fd, []byte(tok.val.(string)))
704 }
705 expected = "bytes string value"
706 case descriptor.FieldDescriptorProto_TYPE_STRING:
707 if tok.tokTyp == tokenString {
708 return set(m, fd, tok.val)
709 }
710 expected = "string value"
711 case descriptor.FieldDescriptorProto_TYPE_FLOAT:
712 switch tok.tokTyp {
713 case tokenFloat:
714 return set(m, fd, float32(tok.val.(float64)))
715 case tokenInt:
716 if f, err := strconv.ParseFloat(tok.val.(string), 32); err != nil {
717 return err
718 } else {
719 return set(m, fd, float32(f))
720 }
721 case tokenIdent:
722 ident := strings.ToLower(tok.val.(string))
723 if ident == "inf" {
724 return set(m, fd, float32(math.Inf(1)))
725 } else if ident == "nan" {
726 return set(m, fd, float32(math.NaN()))
727 }
728 case tokenMinus:
729 peeked := tr.peek()
730 if peeked.tokTyp == tokenIdent {
731 ident := strings.ToLower(peeked.val.(string))
732 if ident == "inf" {
733 tr.next() // consume peeked token
734 return set(m, fd, float32(math.Inf(-1)))
735 }
736 }
737 }
738 expected = "float value"
739 case descriptor.FieldDescriptorProto_TYPE_DOUBLE:
740 switch tok.tokTyp {
741 case tokenFloat:
742 return set(m, fd, tok.val)
743 case tokenInt:
744 if f, err := strconv.ParseFloat(tok.val.(string), 64); err != nil {
745 return err
746 } else {
747 return set(m, fd, f)
748 }
749 case tokenIdent:
750 ident := strings.ToLower(tok.val.(string))
751 if ident == "inf" {
752 return set(m, fd, math.Inf(1))
753 } else if ident == "nan" {
754 return set(m, fd, math.NaN())
755 }
756 case tokenMinus:
757 peeked := tr.peek()
758 if peeked.tokTyp == tokenIdent {
759 ident := strings.ToLower(peeked.val.(string))
760 if ident == "inf" {
761 tr.next() // consume peeked token
762 return set(m, fd, math.Inf(-1))
763 }
764 }
765 }
766 expected = "float value"
767 case descriptor.FieldDescriptorProto_TYPE_INT32,
768 descriptor.FieldDescriptorProto_TYPE_SINT32,
769 descriptor.FieldDescriptorProto_TYPE_SFIXED32:
770 if tok.tokTyp == tokenInt {
771 if i, err := strconv.ParseInt(tok.val.(string), 10, 32); err != nil {
772 return err
773 } else {
774 return set(m, fd, int32(i))
775 }
776 }
777 expected = "int value"
778 case descriptor.FieldDescriptorProto_TYPE_INT64,
779 descriptor.FieldDescriptorProto_TYPE_SINT64,
780 descriptor.FieldDescriptorProto_TYPE_SFIXED64:
781 if tok.tokTyp == tokenInt {
782 if i, err := strconv.ParseInt(tok.val.(string), 10, 64); err != nil {
783 return err
784 } else {
785 return set(m, fd, i)
786 }
787 }
788 expected = "int value"
789 case descriptor.FieldDescriptorProto_TYPE_UINT32,
790 descriptor.FieldDescriptorProto_TYPE_FIXED32:
791 if tok.tokTyp == tokenInt {
792 if i, err := strconv.ParseUint(tok.val.(string), 10, 32); err != nil {
793 return err
794 } else {
795 return set(m, fd, uint32(i))
796 }
797 }
798 expected = "unsigned int value"
799 case descriptor.FieldDescriptorProto_TYPE_UINT64,
800 descriptor.FieldDescriptorProto_TYPE_FIXED64:
801 if tok.tokTyp == tokenInt {
802 if i, err := strconv.ParseUint(tok.val.(string), 10, 64); err != nil {
803 return err
804 } else {
805 return set(m, fd, i)
806 }
807 }
808 expected = "unsigned int value"
809 case descriptor.FieldDescriptorProto_TYPE_ENUM:
810 if tok.tokTyp == tokenIdent {
811 // TODO: add a flag to just ignore unrecognized enum value names?
812 vd := fd.GetEnumType().FindValueByName(tok.val.(string))
813 if vd != nil {
814 return set(m, fd, vd.GetNumber())
815 }
816 } else if tok.tokTyp == tokenInt {
817 if i, err := strconv.ParseInt(tok.val.(string), 10, 32); err != nil {
818 return err
819 } else {
820 return set(m, fd, int32(i))
821 }
822 }
823 expected = fmt.Sprintf("enum %s value", fd.GetEnumType().GetFullyQualifiedName())
824 case descriptor.FieldDescriptorProto_TYPE_MESSAGE,
825 descriptor.FieldDescriptorProto_TYPE_GROUP:
826
827 endTok := tok.tokTyp.EndToken()
828 if endTok != tokenError {
829 dm := m.mf.NewDynamicMessage(fd.GetMessageType())
830 if err := dm.unmarshalText(tr, endTok); err != nil {
831 return err
832 }
833 // TODO: ideally we would use mf.NewMessage and, if not a dynamic message, use
834 // proto package to unmarshal it. But the text parser isn't particularly amenable
835 // to that, so we instead convert a dynamic message to a generated one if the
836 // known-type registry knows about the generated type...
837 var ktr *KnownTypeRegistry
838 if m.mf != nil {
839 ktr = m.mf.ktr
840 }
841 pm := ktr.CreateIfKnown(fd.GetMessageType().GetFullyQualifiedName())
842 if pm != nil {
843 if err := dm.ConvertTo(pm); err != nil {
844 return set(m, fd, pm)
845 }
846 }
847 return set(m, fd, dm)
848 }
849 expected = fmt.Sprintf("message %s value", fd.GetMessageType().GetFullyQualifiedName())
850 default:
851 return fmt.Errorf("field %q of message %q has unrecognized type: %v", fd.GetFullyQualifiedName(), m.md.GetFullyQualifiedName(), fd.GetType())
852 }
853
854 // if we get here, token was wrong type; create error message
855 var article string
856 if strings.Contains("aieou", expected[0:1]) {
857 article = "an"
858 } else {
859 article = "a"
860 }
861 return textError(tok, "Expecting %s %s; got %q", article, expected, tok.txt)
862}
863
864func unmarshalFieldNameText(tr *txtReader, tok *token) (string, error) {
865 if tok.tokTyp == tokenOpenBracket || tok.tokTyp == tokenOpenParen {
866 // extension name
867 var closeType tokenType
868 var closeChar string
869 if tok.tokTyp == tokenOpenBracket {
870 closeType = tokenCloseBracket
871 closeChar = "close bracket ']'"
872 } else {
873 closeType = tokenCloseParen
874 closeChar = "close paren ')'"
875 }
876 // must be followed by an identifier
877 idents := make([]string, 0, 1)
878 for {
879 tok = tr.next()
880 if tok.tokTyp == tokenEOF {
881 return "", io.ErrUnexpectedEOF
882 } else if tok.tokTyp != tokenIdent {
883 return "", textError(tok, "Expecting an identifier; instead got %q", tok.txt)
884 }
885 idents = append(idents, tok.val.(string))
886 // and then close bracket/paren, or "/" to keep adding URL elements to name
887 tok = tr.next()
888 if tok.tokTyp == tokenEOF {
889 return "", io.ErrUnexpectedEOF
890 } else if tok.tokTyp == closeType {
891 break
892 } else if tok.tokTyp != tokenSlash {
893 return "", textError(tok, "Expecting a %s; instead got %q", closeChar, tok.txt)
894 }
895 }
896 return "[" + strings.Join(idents, "/") + "]", nil
897 } else if tok.tokTyp == tokenIdent {
898 // normal field name
899 return tok.val.(string), nil
900 } else {
901 return "", textError(tok, "Expecting an identifier or tag number; instead got %q", tok.txt)
902 }
903}
904
905func skipFieldNameText(tr *txtReader) error {
906 tok := tr.next()
907 if tok.tokTyp == tokenEOF {
908 return io.ErrUnexpectedEOF
909 } else if tok.tokTyp == tokenInt || tok.tokTyp == tokenIdent {
910 return nil
911 } else {
912 _, err := unmarshalFieldNameText(tr, tok)
913 return err
914 }
915}
916
917func skipFieldValueText(tr *txtReader) error {
918 tok := tr.peek()
919 if tok.tokTyp == tokenOpenBracket {
920 tr.next() // consume tok
921 for {
922 if err := skipFieldElementText(tr); err != nil {
923 return err
924 }
925 tok = tr.peek()
926 if tok.tokTyp == tokenCloseBracket {
927 tr.next() // consume tok
928 return nil
929 } else if tok.tokTyp.IsSep() {
930 tr.next() // consume separator
931 }
932
933 }
934 }
935 return skipFieldElementText(tr)
936}
937
938func skipFieldElementText(tr *txtReader) error {
939 tok := tr.next()
940 switch tok.tokTyp {
941 case tokenEOF:
942 return io.ErrUnexpectedEOF
943 case tokenInt, tokenFloat, tokenString, tokenIdent:
944 return nil
945 case tokenOpenAngle:
946 return skipMessageText(tr, false)
947 default:
948 return textError(tok, "Expecting an angle bracket '<' or a value; instead got %q", tok.txt)
949 }
950}
951
952func skipMessageText(tr *txtReader, isGroup bool) error {
953 for {
954 tok := tr.peek()
955 if tok.tokTyp == tokenEOF {
956 return io.ErrUnexpectedEOF
957 } else if isGroup && tok.tokTyp == tokenCloseBrace {
958 return nil
959 } else if !isGroup && tok.tokTyp == tokenCloseAngle {
960 return nil
961 }
962
963 // field name or tag
964 if err := skipFieldNameText(tr); err != nil {
965 return err
966 }
967
968 // field value
969 tok = tr.next()
970 if tok.tokTyp == tokenEOF {
971 return io.ErrUnexpectedEOF
972 } else if tok.tokTyp == tokenOpenBrace {
973 if err := skipMessageText(tr, true); err != nil {
974 return err
975 }
976 } else if tok.tokTyp == tokenColon {
977 if err := skipFieldValueText(tr); err != nil {
978 return err
979 }
980 } else {
981 return textError(tok, "Expecting a colon ':' or brace '{'; instead got %q", tok.txt)
982 }
983
984 tok = tr.peek()
985 if tok.tokTyp.IsSep() {
986 tr.next() // consume separator
987 }
988 }
989}
990
991type tokenType int
992
993const (
994 tokenError tokenType = iota
995 tokenEOF
996 tokenIdent
997 tokenString
998 tokenInt
999 tokenFloat
1000 tokenColon
1001 tokenComma
1002 tokenSemiColon
1003 tokenOpenBrace
1004 tokenCloseBrace
1005 tokenOpenBracket
1006 tokenCloseBracket
1007 tokenOpenAngle
1008 tokenCloseAngle
1009 tokenOpenParen
1010 tokenCloseParen
1011 tokenSlash
1012 tokenMinus
1013)
1014
1015func (t tokenType) IsSep() bool {
1016 return t == tokenComma || t == tokenSemiColon
1017}
1018
1019func (t tokenType) EndToken() tokenType {
1020 switch t {
1021 case tokenOpenAngle:
1022 return tokenCloseAngle
1023 case tokenOpenBrace:
1024 return tokenCloseBrace
1025 default:
1026 return tokenError
1027 }
1028}
1029
1030type token struct {
1031 tokTyp tokenType
1032 val interface{}
1033 txt string
1034 pos scanner.Position
1035}
1036
1037type txtReader struct {
1038 scanner scanner.Scanner
1039 peeked token
1040 havePeeked bool
1041}
1042
1043func newReader(text []byte) *txtReader {
1044 sc := scanner.Scanner{}
1045 sc.Init(bytes.NewReader(text))
1046 sc.Mode = scanner.ScanIdents | scanner.ScanInts | scanner.ScanFloats | scanner.ScanChars |
1047 scanner.ScanStrings | scanner.ScanComments | scanner.SkipComments
1048 // identifiers are same restrictions as Go identifiers, except we also allow dots since
1049 // we accept fully-qualified names
1050 sc.IsIdentRune = func(ch rune, i int) bool {
1051 return ch == '_' || unicode.IsLetter(ch) ||
1052 (i > 0 && unicode.IsDigit(ch)) ||
1053 (i > 0 && ch == '.')
1054 }
1055 // ignore errors; we handle them if/when we see malformed tokens
1056 sc.Error = func(s *scanner.Scanner, msg string) {}
1057 return &txtReader{scanner: sc}
1058}
1059
1060func (p *txtReader) peek() *token {
1061 if p.havePeeked {
1062 return &p.peeked
1063 }
1064 t := p.scanner.Scan()
1065 if t == scanner.EOF {
1066 p.peeked.tokTyp = tokenEOF
1067 p.peeked.val = nil
1068 p.peeked.txt = ""
1069 p.peeked.pos = p.scanner.Position
1070 } else if err := p.processToken(t, p.scanner.TokenText(), p.scanner.Position); err != nil {
1071 p.peeked.tokTyp = tokenError
1072 p.peeked.val = err
1073 }
1074 p.havePeeked = true
1075 return &p.peeked
1076}
1077
1078func (p *txtReader) processToken(t rune, text string, pos scanner.Position) error {
1079 p.peeked.pos = pos
1080 p.peeked.txt = text
1081 switch t {
1082 case scanner.Ident:
1083 p.peeked.tokTyp = tokenIdent
1084 p.peeked.val = text
1085 case scanner.Int:
1086 p.peeked.tokTyp = tokenInt
1087 p.peeked.val = text // can't parse the number because we don't know if it's signed or unsigned
1088 case scanner.Float:
1089 p.peeked.tokTyp = tokenFloat
1090 var err error
1091 if p.peeked.val, err = strconv.ParseFloat(text, 64); err != nil {
1092 return err
1093 }
1094 case scanner.Char, scanner.String:
1095 p.peeked.tokTyp = tokenString
1096 var err error
1097 if p.peeked.val, err = strconv.Unquote(text); err != nil {
1098 return err
1099 }
1100 case '-': // unary minus, for negative ints and floats
1101 ch := p.scanner.Peek()
1102 if ch < '0' || ch > '9' {
1103 p.peeked.tokTyp = tokenMinus
1104 p.peeked.val = '-'
1105 } else {
1106 t := p.scanner.Scan()
1107 if t == scanner.EOF {
1108 return io.ErrUnexpectedEOF
1109 } else if t == scanner.Float {
1110 p.peeked.tokTyp = tokenFloat
1111 text += p.scanner.TokenText()
1112 p.peeked.txt = text
1113 var err error
1114 if p.peeked.val, err = strconv.ParseFloat(text, 64); err != nil {
1115 p.peeked.pos = p.scanner.Position
1116 return err
1117 }
1118 } else if t == scanner.Int {
1119 p.peeked.tokTyp = tokenInt
1120 text += p.scanner.TokenText()
1121 p.peeked.txt = text
1122 p.peeked.val = text // can't parse the number because we don't know if it's signed or unsigned
1123 } else {
1124 p.peeked.pos = p.scanner.Position
1125 return fmt.Errorf("expecting an int or float but got %q", p.scanner.TokenText())
1126 }
1127 }
1128 case ':':
1129 p.peeked.tokTyp = tokenColon
1130 p.peeked.val = ':'
1131 case ',':
1132 p.peeked.tokTyp = tokenComma
1133 p.peeked.val = ','
1134 case ';':
1135 p.peeked.tokTyp = tokenSemiColon
1136 p.peeked.val = ';'
1137 case '{':
1138 p.peeked.tokTyp = tokenOpenBrace
1139 p.peeked.val = '{'
1140 case '}':
1141 p.peeked.tokTyp = tokenCloseBrace
1142 p.peeked.val = '}'
1143 case '<':
1144 p.peeked.tokTyp = tokenOpenAngle
1145 p.peeked.val = '<'
1146 case '>':
1147 p.peeked.tokTyp = tokenCloseAngle
1148 p.peeked.val = '>'
1149 case '[':
1150 p.peeked.tokTyp = tokenOpenBracket
1151 p.peeked.val = '['
1152 case ']':
1153 p.peeked.tokTyp = tokenCloseBracket
1154 p.peeked.val = ']'
1155 case '(':
1156 p.peeked.tokTyp = tokenOpenParen
1157 p.peeked.val = '('
1158 case ')':
1159 p.peeked.tokTyp = tokenCloseParen
1160 p.peeked.val = ')'
1161 case '/':
1162 // only allowed to separate URL components in expanded Any format
1163 p.peeked.tokTyp = tokenSlash
1164 p.peeked.val = '/'
1165 default:
1166 return fmt.Errorf("invalid character: %c", t)
1167 }
1168 return nil
1169}
1170
1171func (p *txtReader) next() *token {
1172 t := p.peek()
1173 if t.tokTyp != tokenEOF && t.tokTyp != tokenError {
1174 p.havePeeked = false
1175 }
1176 return t
1177}