blob: 2d0fa04305fb104b137e5b4a4f16bfd3bac4dcfc [file] [log] [blame]
Zack Williamse940c7a2019-08-21 14:25:39 -07001package dynamic
2
3// Marshalling and unmarshalling of dynamic messages to/from proto's standard text format
4
5import (
6 "bytes"
7 "fmt"
8 "io"
9 "math"
10 "reflect"
11 "sort"
12 "strconv"
13 "strings"
14 "text/scanner"
15 "unicode"
16
17 "github.com/golang/protobuf/proto"
18 "github.com/golang/protobuf/protoc-gen-go/descriptor"
19
20 "github.com/jhump/protoreflect/desc"
21)
22
23// MarshalText serializes this message to bytes in the standard text format,
24// returning an error if the operation fails. The resulting bytes will be a
25// valid UTF8 string.
26//
27// This method uses a compact form: no newlines, and spaces between field
28// identifiers and values are elided.
29func (m *Message) MarshalText() ([]byte, error) {
30 var b indentBuffer
31 b.indentCount = -1 // no indentation
32 if err := m.marshalText(&b); err != nil {
33 return nil, err
34 }
35 return b.Bytes(), nil
36}
37
38// MarshalTextIndent serializes this message to bytes in the standard text
39// format, returning an error if the operation fails. The resulting bytes will
40// be a valid UTF8 string.
41//
42// This method uses a "pretty-printed" form, with each field on its own line and
43// spaces between field identifiers and values.
44func (m *Message) MarshalTextIndent() ([]byte, error) {
45 var b indentBuffer
46 b.indent = " " // TODO: option for indent?
47 if err := m.marshalText(&b); err != nil {
48 return nil, err
49 }
50 return b.Bytes(), nil
51}
52
53func (m *Message) marshalText(b *indentBuffer) error {
54 // TODO: option for emitting extended Any format?
55 first := true
56 // first the known fields
57 for _, tag := range m.knownFieldTags() {
58 itag := int32(tag)
59 v := m.values[itag]
60 fd := m.FindFieldDescriptor(itag)
61 if fd.IsMap() {
62 md := fd.GetMessageType()
63 kfd := md.FindFieldByNumber(1)
64 vfd := md.FindFieldByNumber(2)
65 mp := v.(map[interface{}]interface{})
66 keys := make([]interface{}, 0, len(mp))
67 for k := range mp {
68 keys = append(keys, k)
69 }
70 sort.Sort(sortable(keys))
71 for _, mk := range keys {
72 mv := mp[mk]
73 err := b.maybeNext(&first)
74 if err != nil {
75 return err
76 }
77 err = marshalKnownFieldMapEntryText(b, fd, kfd, mk, vfd, mv)
78 if err != nil {
79 return err
80 }
81 }
82 } else if fd.IsRepeated() {
83 sl := v.([]interface{})
84 for _, slv := range sl {
85 err := b.maybeNext(&first)
86 if err != nil {
87 return err
88 }
89 err = marshalKnownFieldText(b, fd, slv)
90 if err != nil {
91 return err
92 }
93 }
94 } else {
95 err := b.maybeNext(&first)
96 if err != nil {
97 return err
98 }
99 err = marshalKnownFieldText(b, fd, v)
100 if err != nil {
101 return err
102 }
103 }
104 }
105 // then the unknown fields
106 for _, tag := range m.unknownFieldTags() {
107 itag := int32(tag)
108 ufs := m.unknownFields[itag]
109 for _, uf := range ufs {
110 err := b.maybeNext(&first)
111 if err != nil {
112 return err
113 }
114 _, err = fmt.Fprintf(b, "%d", tag)
115 if err != nil {
116 return err
117 }
118 if uf.Encoding == proto.WireStartGroup {
119 err = b.WriteByte('{')
120 if err != nil {
121 return err
122 }
123 err = b.start()
124 if err != nil {
125 return err
126 }
127 in := newCodedBuffer(uf.Contents)
128 err = marshalUnknownGroupText(b, in, true)
129 if err != nil {
130 return err
131 }
132 err = b.end()
133 if err != nil {
134 return err
135 }
136 err = b.WriteByte('}')
137 if err != nil {
138 return err
139 }
140 } else {
141 err = b.sep()
142 if err != nil {
143 return err
144 }
145 if uf.Encoding == proto.WireBytes {
146 err = writeString(b, string(uf.Contents))
147 if err != nil {
148 return err
149 }
150 } else {
151 _, err = b.WriteString(strconv.FormatUint(uf.Value, 10))
152 if err != nil {
153 return err
154 }
155 }
156 }
157 }
158 }
159 return nil
160}
161
162func marshalKnownFieldMapEntryText(b *indentBuffer, fd *desc.FieldDescriptor, kfd *desc.FieldDescriptor, mk interface{}, vfd *desc.FieldDescriptor, mv interface{}) error {
163 var name string
164 if fd.IsExtension() {
165 name = fmt.Sprintf("[%s]", fd.GetFullyQualifiedName())
166 } else {
167 name = fd.GetName()
168 }
169 _, err := b.WriteString(name)
170 if err != nil {
171 return err
172 }
173 err = b.sep()
174 if err != nil {
175 return err
176 }
177
178 err = b.WriteByte('<')
179 if err != nil {
180 return err
181 }
182 err = b.start()
183 if err != nil {
184 return err
185 }
186
187 err = marshalKnownFieldText(b, kfd, mk)
188 if err != nil {
189 return err
190 }
191 err = b.next()
192 if err != nil {
193 return err
194 }
195 err = marshalKnownFieldText(b, vfd, mv)
196 if err != nil {
197 return err
198 }
199
200 err = b.end()
201 if err != nil {
202 return err
203 }
204 return b.WriteByte('>')
205}
206
207func marshalKnownFieldText(b *indentBuffer, fd *desc.FieldDescriptor, v interface{}) error {
208 group := fd.GetType() == descriptor.FieldDescriptorProto_TYPE_GROUP
209 if group {
210 var name string
211 if fd.IsExtension() {
212 name = fmt.Sprintf("[%s]", fd.GetMessageType().GetFullyQualifiedName())
213 } else {
214 name = fd.GetMessageType().GetName()
215 }
216 _, err := b.WriteString(name)
217 if err != nil {
218 return err
219 }
220 } else {
221 var name string
222 if fd.IsExtension() {
223 name = fmt.Sprintf("[%s]", fd.GetFullyQualifiedName())
224 } else {
225 name = fd.GetName()
226 }
227 _, err := b.WriteString(name)
228 if err != nil {
229 return err
230 }
231 err = b.sep()
232 if err != nil {
233 return err
234 }
235 }
236 rv := reflect.ValueOf(v)
237 switch rv.Kind() {
238 case reflect.Int32, reflect.Int64:
239 ed := fd.GetEnumType()
240 if ed != nil {
241 n := int32(rv.Int())
242 vd := ed.FindValueByNumber(n)
243 if vd == nil {
244 _, err := b.WriteString(strconv.FormatInt(rv.Int(), 10))
245 return err
246 } else {
247 _, err := b.WriteString(vd.GetName())
248 return err
249 }
250 } else {
251 _, err := b.WriteString(strconv.FormatInt(rv.Int(), 10))
252 return err
253 }
254 case reflect.Uint32, reflect.Uint64:
255 _, err := b.WriteString(strconv.FormatUint(rv.Uint(), 10))
256 return err
257 case reflect.Float32, reflect.Float64:
258 f := rv.Float()
259 var str string
260 if math.IsNaN(f) {
261 str = "nan"
262 } else if math.IsInf(f, 1) {
263 str = "inf"
264 } else if math.IsInf(f, -1) {
265 str = "-inf"
266 } else {
267 var bits int
268 if rv.Kind() == reflect.Float32 {
269 bits = 32
270 } else {
271 bits = 64
272 }
273 str = strconv.FormatFloat(rv.Float(), 'g', -1, bits)
274 }
275 _, err := b.WriteString(str)
276 return err
277 case reflect.Bool:
278 _, err := b.WriteString(strconv.FormatBool(rv.Bool()))
279 return err
280 case reflect.Slice:
281 return writeString(b, string(rv.Bytes()))
282 case reflect.String:
283 return writeString(b, rv.String())
284 default:
285 var err error
286 if group {
287 err = b.WriteByte('{')
288 } else {
289 err = b.WriteByte('<')
290 }
291 if err != nil {
292 return err
293 }
294 err = b.start()
295 if err != nil {
296 return err
297 }
298 // must be a message
299 if dm, ok := v.(*Message); ok {
300 err = dm.marshalText(b)
301 if err != nil {
302 return err
303 }
304 } else {
305 err = proto.CompactText(b, v.(proto.Message))
306 if err != nil {
307 return err
308 }
309 }
310 err = b.end()
311 if err != nil {
312 return err
313 }
314 if group {
315 return b.WriteByte('}')
316 } else {
317 return b.WriteByte('>')
318 }
319 }
320}
321
322// writeString writes a string in the protocol buffer text format.
323// It is similar to strconv.Quote except we don't use Go escape sequences,
324// we treat the string as a byte sequence, and we use octal escapes.
325// These differences are to maintain interoperability with the other
326// languages' implementations of the text format.
327func writeString(b *indentBuffer, s string) error {
328 // use WriteByte here to get any needed indent
329 if err := b.WriteByte('"'); err != nil {
330 return err
331 }
332 // Loop over the bytes, not the runes.
333 for i := 0; i < len(s); i++ {
334 var err error
335 // Divergence from C++: we don't escape apostrophes.
336 // There's no need to escape them, and the C++ parser
337 // copes with a naked apostrophe.
338 switch c := s[i]; c {
339 case '\n':
340 _, err = b.WriteString("\\n")
341 case '\r':
342 _, err = b.WriteString("\\r")
343 case '\t':
344 _, err = b.WriteString("\\t")
345 case '"':
346 _, err = b.WriteString("\\")
347 case '\\':
348 _, err = b.WriteString("\\\\")
349 default:
350 if c >= 0x20 && c < 0x7f {
351 err = b.WriteByte(c)
352 } else {
353 _, err = fmt.Fprintf(b, "\\%03o", c)
354 }
355 }
356 if err != nil {
357 return err
358 }
359 }
360 return b.WriteByte('"')
361}
362
363func marshalUnknownGroupText(b *indentBuffer, in *codedBuffer, topLevel bool) error {
364 first := true
365 for {
366 if in.eof() {
367 if topLevel {
368 return nil
369 }
370 // this is a nested message: we are expecting an end-group tag, not EOF!
371 return io.ErrUnexpectedEOF
372 }
373 tag, wireType, err := in.decodeTagAndWireType()
374 if err != nil {
375 return err
376 }
377 if wireType == proto.WireEndGroup {
378 return nil
379 }
380 err = b.maybeNext(&first)
381 if err != nil {
382 return err
383 }
384 _, err = fmt.Fprintf(b, "%d", tag)
385 if err != nil {
386 return err
387 }
388 if wireType == proto.WireStartGroup {
389 err = b.WriteByte('{')
390 if err != nil {
391 return err
392 }
393 err = b.start()
394 if err != nil {
395 return err
396 }
397 err = marshalUnknownGroupText(b, in, false)
398 if err != nil {
399 return err
400 }
401 err = b.end()
402 if err != nil {
403 return err
404 }
405 err = b.WriteByte('}')
406 if err != nil {
407 return err
408 }
409 continue
410 } else {
411 err = b.sep()
412 if err != nil {
413 return err
414 }
415 if wireType == proto.WireBytes {
416 contents, err := in.decodeRawBytes(false)
417 if err != nil {
418 return err
419 }
420 err = writeString(b, string(contents))
421 if err != nil {
422 return err
423 }
424 } else {
425 var v uint64
426 switch wireType {
427 case proto.WireVarint:
428 v, err = in.decodeVarint()
429 case proto.WireFixed32:
430 v, err = in.decodeFixed32()
431 case proto.WireFixed64:
432 v, err = in.decodeFixed64()
433 default:
434 return proto.ErrInternalBadWireType
435 }
436 if err != nil {
437 return err
438 }
439 _, err = b.WriteString(strconv.FormatUint(v, 10))
440 if err != nil {
441 return err
442 }
443 }
444 }
445 }
446}
447
448// UnmarshalText de-serializes the message that is present, in text format, in
449// the given bytes into this message. It first resets the current message. It
450// returns an error if the given bytes do not contain a valid encoding of this
451// message type in the standard text format
452func (m *Message) UnmarshalText(text []byte) error {
453 m.Reset()
454 if err := m.UnmarshalMergeText(text); err != nil {
455 return err
456 }
457 return m.Validate()
458}
459
460// UnmarshalMergeText de-serializes the message that is present, in text format,
461// in the given bytes into this message. Unlike UnmarshalText, it does not first
462// reset the message, instead merging the data in the given bytes into the
463// existing data in this message.
464func (m *Message) UnmarshalMergeText(text []byte) error {
465 return m.unmarshalText(newReader(text), tokenEOF)
466}
467
468func (m *Message) unmarshalText(tr *txtReader, end tokenType) error {
469 for {
470 tok := tr.next()
471 if tok.tokTyp == end {
472 return nil
473 }
474 if tok.tokTyp == tokenEOF {
475 return io.ErrUnexpectedEOF
476 }
477 var fd *desc.FieldDescriptor
478 var extendedAnyType *desc.MessageDescriptor
479 if tok.tokTyp == tokenInt {
480 // tag number (indicates unknown field)
481 tag, err := strconv.ParseInt(tok.val.(string), 10, 32)
482 if err != nil {
483 return err
484 }
485 itag := int32(tag)
486 fd = m.FindFieldDescriptor(itag)
487 if fd == nil {
488 // can't parse the value w/out field descriptor, so skip it
489 tok = tr.next()
490 if tok.tokTyp == tokenEOF {
491 return io.ErrUnexpectedEOF
492 } else if tok.tokTyp == tokenOpenBrace {
493 if err := skipMessageText(tr, true); err != nil {
494 return err
495 }
496 } else if tok.tokTyp == tokenColon {
497 if err := skipFieldValueText(tr); err != nil {
498 return err
499 }
500 } else {
501 return textError(tok, "Expecting a colon ':' or brace '{'; instead got %q", tok.txt)
502 }
503 tok = tr.peek()
504 if tok.tokTyp.IsSep() {
505 tr.next() // consume separator
506 }
507 continue
508 }
509 } else {
510 fieldName, err := unmarshalFieldNameText(tr, tok)
511 if err != nil {
512 return err
513 }
514 fd = m.FindFieldDescriptorByName(fieldName)
515 if fd == nil {
516 // See if it's a group name
517 for _, field := range m.md.GetFields() {
518 if field.GetType() == descriptor.FieldDescriptorProto_TYPE_GROUP && field.GetMessageType().GetName() == fieldName {
519 fd = field
520 break
521 }
522 }
523 if fd == nil {
524 // maybe this is an extended Any
525 if m.md.GetFullyQualifiedName() == "google.protobuf.Any" && fieldName[0] == '[' && strings.Contains(fieldName, "/") {
526 // strip surrounding "[" and "]" and extract type name from URL
527 typeUrl := fieldName[1 : len(fieldName)-1]
528 mname := typeUrl
529 if slash := strings.LastIndex(mname, "/"); slash >= 0 {
530 mname = mname[slash+1:]
531 }
532 // TODO: add a way to weave an AnyResolver to this point
533 extendedAnyType = findMessageDescriptor(mname, m.md.GetFile())
534 if extendedAnyType == nil {
535 return textError(tok, "could not parse Any with unknown type URL %q", fieldName)
536 }
537 // field 1 is "type_url"
538 typeUrlField := m.md.FindFieldByNumber(1)
539 if err := m.TrySetField(typeUrlField, typeUrl); err != nil {
540 return err
541 }
542 } else {
543 // TODO: add a flag to just ignore unrecognized field names
544 return textError(tok, "%q is not a recognized field name of %q", fieldName, m.md.GetFullyQualifiedName())
545 }
546 }
547 }
548 }
549 tok = tr.next()
550 if tok.tokTyp == tokenEOF {
551 return io.ErrUnexpectedEOF
552 }
553 if extendedAnyType != nil {
554 // consume optional colon; make sure this is a "start message" token
555 if tok.tokTyp == tokenColon {
556 tok = tr.next()
557 if tok.tokTyp == tokenEOF {
558 return io.ErrUnexpectedEOF
559 }
560 }
561 if tok.tokTyp.EndToken() == tokenError {
562 return textError(tok, "Expecting a '<' or '{'; instead got %q", tok.txt)
563 }
564
565 // TODO: use mf.NewMessage and, if not a dynamic message, use proto.UnmarshalText to unmarshal it
566 g := m.mf.NewDynamicMessage(extendedAnyType)
567 if err := g.unmarshalText(tr, tok.tokTyp.EndToken()); err != nil {
568 return err
569 }
570 // now we marshal the message to bytes and store in the Any
571 b, err := g.Marshal()
572 if err != nil {
573 return err
574 }
575 // field 2 is "value"
576 anyValueField := m.md.FindFieldByNumber(2)
577 if err := m.TrySetField(anyValueField, b); err != nil {
578 return err
579 }
580
581 } else if (fd.GetType() == descriptor.FieldDescriptorProto_TYPE_GROUP ||
582 fd.GetType() == descriptor.FieldDescriptorProto_TYPE_MESSAGE) &&
583 tok.tokTyp.EndToken() != tokenError {
584
585 // TODO: use mf.NewMessage and, if not a dynamic message, use proto.UnmarshalText to unmarshal it
586 g := m.mf.NewDynamicMessage(fd.GetMessageType())
587 if err := g.unmarshalText(tr, tok.tokTyp.EndToken()); err != nil {
588 return err
589 }
590 if fd.IsRepeated() {
591 if err := m.TryAddRepeatedField(fd, g); err != nil {
592 return err
593 }
594 } else {
595 if err := m.TrySetField(fd, g); err != nil {
596 return err
597 }
598 }
599 } else {
600 if tok.tokTyp != tokenColon {
601 return textError(tok, "Expecting a colon ':'; instead got %q", tok.txt)
602 }
603 if err := m.unmarshalFieldValueText(fd, tr); err != nil {
604 return err
605 }
606 }
607 tok = tr.peek()
608 if tok.tokTyp.IsSep() {
609 tr.next() // consume separator
610 }
611 }
612}
613func findMessageDescriptor(name string, fd *desc.FileDescriptor) *desc.MessageDescriptor {
614 md := findMessageInTransitiveDeps(name, fd, map[*desc.FileDescriptor]struct{}{})
615 if md == nil {
616 // couldn't find it; see if we have this message linked in
617 md, _ = desc.LoadMessageDescriptor(name)
618 }
619 return md
620}
621
622func findMessageInTransitiveDeps(name string, fd *desc.FileDescriptor, seen map[*desc.FileDescriptor]struct{}) *desc.MessageDescriptor {
623 if _, ok := seen[fd]; ok {
624 // already checked this file
625 return nil
626 }
627 seen[fd] = struct{}{}
628 md := fd.FindMessage(name)
629 if md != nil {
630 return md
631 }
632 // not in this file so recursively search its deps
633 for _, dep := range fd.GetDependencies() {
634 md = findMessageInTransitiveDeps(name, dep, seen)
635 if md != nil {
636 return md
637 }
638 }
639 // couldn't find it
640 return nil
641}
642
643func textError(tok *token, format string, args ...interface{}) error {
644 var msg string
645 if tok.tokTyp == tokenError {
646 msg = tok.val.(error).Error()
647 } else {
648 msg = fmt.Sprintf(format, args...)
649 }
650 return fmt.Errorf("line %d, col %d: %s", tok.pos.Line, tok.pos.Column, msg)
651}
652
653type setFunction func(*Message, *desc.FieldDescriptor, interface{}) error
654
655func (m *Message) unmarshalFieldValueText(fd *desc.FieldDescriptor, tr *txtReader) error {
656 var set setFunction
657 if fd.IsRepeated() {
658 set = (*Message).addRepeatedField
659 } else {
660 set = mergeField
661 }
662 tok := tr.peek()
663 if tok.tokTyp == tokenOpenBracket {
664 tr.next() // consume tok
665 for {
666 if err := m.unmarshalFieldElementText(fd, tr, set); err != nil {
667 return err
668 }
669 tok = tr.peek()
670 if tok.tokTyp == tokenCloseBracket {
671 tr.next() // consume tok
672 return nil
673 } else if tok.tokTyp.IsSep() {
674 tr.next() // consume separator
675 }
676 }
677 }
678 return m.unmarshalFieldElementText(fd, tr, set)
679}
680
681func (m *Message) unmarshalFieldElementText(fd *desc.FieldDescriptor, tr *txtReader, set setFunction) error {
682 tok := tr.next()
683 if tok.tokTyp == tokenEOF {
684 return io.ErrUnexpectedEOF
685 }
686
687 var expected string
688 switch fd.GetType() {
689 case descriptor.FieldDescriptorProto_TYPE_BOOL:
690 if tok.tokTyp == tokenIdent {
691 if tok.val.(string) == "true" {
692 return set(m, fd, true)
693 } else if tok.val.(string) == "false" {
694 return set(m, fd, false)
695 }
696 }
697 expected = "boolean value"
698 case descriptor.FieldDescriptorProto_TYPE_BYTES:
699 if tok.tokTyp == tokenString {
700 return set(m, fd, []byte(tok.val.(string)))
701 }
702 expected = "bytes string value"
703 case descriptor.FieldDescriptorProto_TYPE_STRING:
704 if tok.tokTyp == tokenString {
705 return set(m, fd, tok.val)
706 }
707 expected = "string value"
708 case descriptor.FieldDescriptorProto_TYPE_FLOAT:
709 switch tok.tokTyp {
710 case tokenFloat:
711 return set(m, fd, float32(tok.val.(float64)))
712 case tokenInt:
713 if f, err := strconv.ParseFloat(tok.val.(string), 32); err != nil {
714 return err
715 } else {
716 return set(m, fd, float32(f))
717 }
718 case tokenIdent:
719 ident := strings.ToLower(tok.val.(string))
720 if ident == "inf" {
721 return set(m, fd, float32(math.Inf(1)))
722 } else if ident == "nan" {
723 return set(m, fd, float32(math.NaN()))
724 }
725 case tokenMinus:
726 peeked := tr.peek()
727 if peeked.tokTyp == tokenIdent {
728 ident := strings.ToLower(peeked.val.(string))
729 if ident == "inf" {
730 tr.next() // consume peeked token
731 return set(m, fd, float32(math.Inf(-1)))
732 }
733 }
734 }
735 expected = "float value"
736 case descriptor.FieldDescriptorProto_TYPE_DOUBLE:
737 switch tok.tokTyp {
738 case tokenFloat:
739 return set(m, fd, tok.val)
740 case tokenInt:
741 if f, err := strconv.ParseFloat(tok.val.(string), 64); err != nil {
742 return err
743 } else {
744 return set(m, fd, f)
745 }
746 case tokenIdent:
747 ident := strings.ToLower(tok.val.(string))
748 if ident == "inf" {
749 return set(m, fd, math.Inf(1))
750 } else if ident == "nan" {
751 return set(m, fd, math.NaN())
752 }
753 case tokenMinus:
754 peeked := tr.peek()
755 if peeked.tokTyp == tokenIdent {
756 ident := strings.ToLower(peeked.val.(string))
757 if ident == "inf" {
758 tr.next() // consume peeked token
759 return set(m, fd, math.Inf(-1))
760 }
761 }
762 }
763 expected = "float value"
764 case descriptor.FieldDescriptorProto_TYPE_INT32,
765 descriptor.FieldDescriptorProto_TYPE_SINT32,
766 descriptor.FieldDescriptorProto_TYPE_SFIXED32:
767 if tok.tokTyp == tokenInt {
768 if i, err := strconv.ParseInt(tok.val.(string), 10, 32); err != nil {
769 return err
770 } else {
771 return set(m, fd, int32(i))
772 }
773 }
774 expected = "int value"
775 case descriptor.FieldDescriptorProto_TYPE_INT64,
776 descriptor.FieldDescriptorProto_TYPE_SINT64,
777 descriptor.FieldDescriptorProto_TYPE_SFIXED64:
778 if tok.tokTyp == tokenInt {
779 if i, err := strconv.ParseInt(tok.val.(string), 10, 64); err != nil {
780 return err
781 } else {
782 return set(m, fd, i)
783 }
784 }
785 expected = "int value"
786 case descriptor.FieldDescriptorProto_TYPE_UINT32,
787 descriptor.FieldDescriptorProto_TYPE_FIXED32:
788 if tok.tokTyp == tokenInt {
789 if i, err := strconv.ParseUint(tok.val.(string), 10, 32); err != nil {
790 return err
791 } else {
792 return set(m, fd, uint32(i))
793 }
794 }
795 expected = "unsigned int value"
796 case descriptor.FieldDescriptorProto_TYPE_UINT64,
797 descriptor.FieldDescriptorProto_TYPE_FIXED64:
798 if tok.tokTyp == tokenInt {
799 if i, err := strconv.ParseUint(tok.val.(string), 10, 64); err != nil {
800 return err
801 } else {
802 return set(m, fd, i)
803 }
804 }
805 expected = "unsigned int value"
806 case descriptor.FieldDescriptorProto_TYPE_ENUM:
807 if tok.tokTyp == tokenIdent {
808 // TODO: add a flag to just ignore unrecognized enum value names?
809 vd := fd.GetEnumType().FindValueByName(tok.val.(string))
810 if vd != nil {
811 return set(m, fd, vd.GetNumber())
812 }
813 } else if tok.tokTyp == tokenInt {
814 if i, err := strconv.ParseInt(tok.val.(string), 10, 32); err != nil {
815 return err
816 } else {
817 return set(m, fd, int32(i))
818 }
819 }
820 expected = fmt.Sprintf("enum %s value", fd.GetEnumType().GetFullyQualifiedName())
821 case descriptor.FieldDescriptorProto_TYPE_MESSAGE,
822 descriptor.FieldDescriptorProto_TYPE_GROUP:
823
824 endTok := tok.tokTyp.EndToken()
825 if endTok != tokenError {
826 dm := m.mf.NewDynamicMessage(fd.GetMessageType())
827 if err := dm.unmarshalText(tr, endTok); err != nil {
828 return err
829 }
830 // TODO: ideally we would use mf.NewMessage and, if not a dynamic message, use
831 // proto package to unmarshal it. But the text parser isn't particularly amenable
832 // to that, so we instead convert a dynamic message to a generated one if the
833 // known-type registry knows about the generated type...
834 var ktr *KnownTypeRegistry
835 if m.mf != nil {
836 ktr = m.mf.ktr
837 }
838 pm := ktr.CreateIfKnown(fd.GetMessageType().GetFullyQualifiedName())
839 if pm != nil {
840 if err := dm.ConvertTo(pm); err != nil {
841 return set(m, fd, pm)
842 }
843 }
844 return set(m, fd, dm)
845 }
846 expected = fmt.Sprintf("message %s value", fd.GetMessageType().GetFullyQualifiedName())
847 default:
848 return fmt.Errorf("field %q of message %q has unrecognized type: %v", fd.GetFullyQualifiedName(), m.md.GetFullyQualifiedName(), fd.GetType())
849 }
850
851 // if we get here, token was wrong type; create error message
852 var article string
853 if strings.Contains("aieou", expected[0:1]) {
854 article = "an"
855 } else {
856 article = "a"
857 }
858 return textError(tok, "Expecting %s %s; got %q", article, expected, tok.txt)
859}
860
861func unmarshalFieldNameText(tr *txtReader, tok *token) (string, error) {
862 if tok.tokTyp == tokenOpenBracket || tok.tokTyp == tokenOpenParen {
863 // extension name
864 var closeType tokenType
865 var closeChar string
866 if tok.tokTyp == tokenOpenBracket {
867 closeType = tokenCloseBracket
868 closeChar = "close bracket ']'"
869 } else {
870 closeType = tokenCloseParen
871 closeChar = "close paren ')'"
872 }
873 // must be followed by an identifier
874 idents := make([]string, 0, 1)
875 for {
876 tok = tr.next()
877 if tok.tokTyp == tokenEOF {
878 return "", io.ErrUnexpectedEOF
879 } else if tok.tokTyp != tokenIdent {
880 return "", textError(tok, "Expecting an identifier; instead got %q", tok.txt)
881 }
882 idents = append(idents, tok.val.(string))
883 // and then close bracket/paren, or "/" to keep adding URL elements to name
884 tok = tr.next()
885 if tok.tokTyp == tokenEOF {
886 return "", io.ErrUnexpectedEOF
887 } else if tok.tokTyp == closeType {
888 break
889 } else if tok.tokTyp != tokenSlash {
890 return "", textError(tok, "Expecting a %s; instead got %q", closeChar, tok.txt)
891 }
892 }
893 return "[" + strings.Join(idents, "/") + "]", nil
894 } else if tok.tokTyp == tokenIdent {
895 // normal field name
896 return tok.val.(string), nil
897 } else {
898 return "", textError(tok, "Expecting an identifier or tag number; instead got %q", tok.txt)
899 }
900}
901
902func skipFieldNameText(tr *txtReader) error {
903 tok := tr.next()
904 if tok.tokTyp == tokenEOF {
905 return io.ErrUnexpectedEOF
906 } else if tok.tokTyp == tokenInt || tok.tokTyp == tokenIdent {
907 return nil
908 } else {
909 _, err := unmarshalFieldNameText(tr, tok)
910 return err
911 }
912}
913
914func skipFieldValueText(tr *txtReader) error {
915 tok := tr.peek()
916 if tok.tokTyp == tokenOpenBracket {
917 tr.next() // consume tok
918 for {
919 if err := skipFieldElementText(tr); err != nil {
920 return err
921 }
922 tok = tr.peek()
923 if tok.tokTyp == tokenCloseBracket {
924 tr.next() // consume tok
925 return nil
926 } else if tok.tokTyp.IsSep() {
927 tr.next() // consume separator
928 }
929
930 }
931 }
932 return skipFieldElementText(tr)
933}
934
935func skipFieldElementText(tr *txtReader) error {
936 tok := tr.next()
937 switch tok.tokTyp {
938 case tokenEOF:
939 return io.ErrUnexpectedEOF
940 case tokenInt, tokenFloat, tokenString, tokenIdent:
941 return nil
942 case tokenOpenAngle:
943 return skipMessageText(tr, false)
944 default:
945 return textError(tok, "Expecting an angle bracket '<' or a value; instead got %q", tok.txt)
946 }
947}
948
949func skipMessageText(tr *txtReader, isGroup bool) error {
950 for {
951 tok := tr.peek()
952 if tok.tokTyp == tokenEOF {
953 return io.ErrUnexpectedEOF
954 } else if isGroup && tok.tokTyp == tokenCloseBrace {
955 return nil
956 } else if !isGroup && tok.tokTyp == tokenCloseAngle {
957 return nil
958 }
959
960 // field name or tag
961 if err := skipFieldNameText(tr); err != nil {
962 return err
963 }
964
965 // field value
966 tok = tr.next()
967 if tok.tokTyp == tokenEOF {
968 return io.ErrUnexpectedEOF
969 } else if tok.tokTyp == tokenOpenBrace {
970 if err := skipMessageText(tr, true); err != nil {
971 return err
972 }
973 } else if tok.tokTyp == tokenColon {
974 if err := skipFieldValueText(tr); err != nil {
975 return err
976 }
977 } else {
978 return textError(tok, "Expecting a colon ':' or brace '{'; instead got %q", tok.txt)
979 }
980
981 tok = tr.peek()
982 if tok.tokTyp.IsSep() {
983 tr.next() // consume separator
984 }
985 }
986}
987
988type tokenType int
989
990const (
991 tokenError tokenType = iota
992 tokenEOF
993 tokenIdent
994 tokenString
995 tokenInt
996 tokenFloat
997 tokenColon
998 tokenComma
999 tokenSemiColon
1000 tokenOpenBrace
1001 tokenCloseBrace
1002 tokenOpenBracket
1003 tokenCloseBracket
1004 tokenOpenAngle
1005 tokenCloseAngle
1006 tokenOpenParen
1007 tokenCloseParen
1008 tokenSlash
1009 tokenMinus
1010)
1011
1012func (t tokenType) IsSep() bool {
1013 return t == tokenComma || t == tokenSemiColon
1014}
1015
1016func (t tokenType) EndToken() tokenType {
1017 switch t {
1018 case tokenOpenAngle:
1019 return tokenCloseAngle
1020 case tokenOpenBrace:
1021 return tokenCloseBrace
1022 default:
1023 return tokenError
1024 }
1025}
1026
1027type token struct {
1028 tokTyp tokenType
1029 val interface{}
1030 txt string
1031 pos scanner.Position
1032}
1033
1034type txtReader struct {
1035 scanner scanner.Scanner
1036 peeked token
1037 havePeeked bool
1038}
1039
1040func newReader(text []byte) *txtReader {
1041 sc := scanner.Scanner{}
1042 sc.Init(bytes.NewReader(text))
1043 sc.Mode = scanner.ScanIdents | scanner.ScanInts | scanner.ScanFloats | scanner.ScanChars |
1044 scanner.ScanStrings | scanner.ScanComments | scanner.SkipComments
1045 // identifiers are same restrictions as Go identifiers, except we also allow dots since
1046 // we accept fully-qualified names
1047 sc.IsIdentRune = func(ch rune, i int) bool {
1048 return ch == '_' || unicode.IsLetter(ch) ||
1049 (i > 0 && unicode.IsDigit(ch)) ||
1050 (i > 0 && ch == '.')
1051 }
1052 // ignore errors; we handle them if/when we see malformed tokens
1053 sc.Error = func(s *scanner.Scanner, msg string) {}
1054 return &txtReader{scanner: sc}
1055}
1056
1057func (p *txtReader) peek() *token {
1058 if p.havePeeked {
1059 return &p.peeked
1060 }
1061 t := p.scanner.Scan()
1062 if t == scanner.EOF {
1063 p.peeked.tokTyp = tokenEOF
1064 p.peeked.val = nil
1065 p.peeked.txt = ""
1066 p.peeked.pos = p.scanner.Position
1067 } else if err := p.processToken(t, p.scanner.TokenText(), p.scanner.Position); err != nil {
1068 p.peeked.tokTyp = tokenError
1069 p.peeked.val = err
1070 }
1071 p.havePeeked = true
1072 return &p.peeked
1073}
1074
1075func (p *txtReader) processToken(t rune, text string, pos scanner.Position) error {
1076 p.peeked.pos = pos
1077 p.peeked.txt = text
1078 switch t {
1079 case scanner.Ident:
1080 p.peeked.tokTyp = tokenIdent
1081 p.peeked.val = text
1082 case scanner.Int:
1083 p.peeked.tokTyp = tokenInt
1084 p.peeked.val = text // can't parse the number because we don't know if it's signed or unsigned
1085 case scanner.Float:
1086 p.peeked.tokTyp = tokenFloat
1087 var err error
1088 if p.peeked.val, err = strconv.ParseFloat(text, 64); err != nil {
1089 return err
1090 }
1091 case scanner.Char, scanner.String:
1092 p.peeked.tokTyp = tokenString
1093 var err error
1094 if p.peeked.val, err = strconv.Unquote(text); err != nil {
1095 return err
1096 }
1097 case '-': // unary minus, for negative ints and floats
1098 ch := p.scanner.Peek()
1099 if ch < '0' || ch > '9' {
1100 p.peeked.tokTyp = tokenMinus
1101 p.peeked.val = '-'
1102 } else {
1103 t := p.scanner.Scan()
1104 if t == scanner.EOF {
1105 return io.ErrUnexpectedEOF
1106 } else if t == scanner.Float {
1107 p.peeked.tokTyp = tokenFloat
1108 text += p.scanner.TokenText()
1109 p.peeked.txt = text
1110 var err error
1111 if p.peeked.val, err = strconv.ParseFloat(text, 64); err != nil {
1112 p.peeked.pos = p.scanner.Position
1113 return err
1114 }
1115 } else if t == scanner.Int {
1116 p.peeked.tokTyp = tokenInt
1117 text += p.scanner.TokenText()
1118 p.peeked.txt = text
1119 p.peeked.val = text // can't parse the number because we don't know if it's signed or unsigned
1120 } else {
1121 p.peeked.pos = p.scanner.Position
1122 return fmt.Errorf("expecting an int or float but got %q", p.scanner.TokenText())
1123 }
1124 }
1125 case ':':
1126 p.peeked.tokTyp = tokenColon
1127 p.peeked.val = ':'
1128 case ',':
1129 p.peeked.tokTyp = tokenComma
1130 p.peeked.val = ','
1131 case ';':
1132 p.peeked.tokTyp = tokenSemiColon
1133 p.peeked.val = ';'
1134 case '{':
1135 p.peeked.tokTyp = tokenOpenBrace
1136 p.peeked.val = '{'
1137 case '}':
1138 p.peeked.tokTyp = tokenCloseBrace
1139 p.peeked.val = '}'
1140 case '<':
1141 p.peeked.tokTyp = tokenOpenAngle
1142 p.peeked.val = '<'
1143 case '>':
1144 p.peeked.tokTyp = tokenCloseAngle
1145 p.peeked.val = '>'
1146 case '[':
1147 p.peeked.tokTyp = tokenOpenBracket
1148 p.peeked.val = '['
1149 case ']':
1150 p.peeked.tokTyp = tokenCloseBracket
1151 p.peeked.val = ']'
1152 case '(':
1153 p.peeked.tokTyp = tokenOpenParen
1154 p.peeked.val = '('
1155 case ')':
1156 p.peeked.tokTyp = tokenCloseParen
1157 p.peeked.val = ')'
1158 case '/':
1159 // only allowed to separate URL components in expanded Any format
1160 p.peeked.tokTyp = tokenSlash
1161 p.peeked.val = '/'
1162 default:
1163 return fmt.Errorf("invalid character: %c", t)
1164 }
1165 return nil
1166}
1167
1168func (p *txtReader) next() *token {
1169 t := p.peek()
1170 if t.tokTyp != tokenEOF && t.tokTyp != tokenError {
1171 p.havePeeked = false
1172 }
1173 return t
1174}