blob: 72636f2f146c5a1b3aec405a38a96d4fd75547cf [file] [log] [blame]
Zack Williamse940c7a2019-08-21 14:25:39 -07001package dynamic
2
3// Marshalling and unmarshalling of dynamic messages to/from proto's standard text format
4
5import (
6 "bytes"
7 "fmt"
8 "io"
9 "math"
10 "reflect"
11 "sort"
12 "strconv"
13 "strings"
14 "text/scanner"
15 "unicode"
16
17 "github.com/golang/protobuf/proto"
18 "github.com/golang/protobuf/protoc-gen-go/descriptor"
19
Scott Baker4a35a702019-11-26 08:17:33 -080020 "github.com/jhump/protoreflect/codec"
Zack Williamse940c7a2019-08-21 14:25:39 -070021 "github.com/jhump/protoreflect/desc"
22)
23
24// MarshalText serializes this message to bytes in the standard text format,
25// returning an error if the operation fails. The resulting bytes will be a
26// valid UTF8 string.
27//
28// This method uses a compact form: no newlines, and spaces between field
29// identifiers and values are elided.
30func (m *Message) MarshalText() ([]byte, error) {
31 var b indentBuffer
32 b.indentCount = -1 // no indentation
33 if err := m.marshalText(&b); err != nil {
34 return nil, err
35 }
36 return b.Bytes(), nil
37}
38
39// MarshalTextIndent serializes this message to bytes in the standard text
40// format, returning an error if the operation fails. The resulting bytes will
41// be a valid UTF8 string.
42//
43// This method uses a "pretty-printed" form, with each field on its own line and
44// spaces between field identifiers and values.
45func (m *Message) MarshalTextIndent() ([]byte, error) {
46 var b indentBuffer
47 b.indent = " " // TODO: option for indent?
48 if err := m.marshalText(&b); err != nil {
49 return nil, err
50 }
51 return b.Bytes(), nil
52}
53
54func (m *Message) marshalText(b *indentBuffer) error {
55 // TODO: option for emitting extended Any format?
56 first := true
57 // first the known fields
58 for _, tag := range m.knownFieldTags() {
59 itag := int32(tag)
60 v := m.values[itag]
61 fd := m.FindFieldDescriptor(itag)
62 if fd.IsMap() {
63 md := fd.GetMessageType()
64 kfd := md.FindFieldByNumber(1)
65 vfd := md.FindFieldByNumber(2)
66 mp := v.(map[interface{}]interface{})
67 keys := make([]interface{}, 0, len(mp))
68 for k := range mp {
69 keys = append(keys, k)
70 }
71 sort.Sort(sortable(keys))
72 for _, mk := range keys {
73 mv := mp[mk]
74 err := b.maybeNext(&first)
75 if err != nil {
76 return err
77 }
78 err = marshalKnownFieldMapEntryText(b, fd, kfd, mk, vfd, mv)
79 if err != nil {
80 return err
81 }
82 }
83 } else if fd.IsRepeated() {
84 sl := v.([]interface{})
85 for _, slv := range sl {
86 err := b.maybeNext(&first)
87 if err != nil {
88 return err
89 }
90 err = marshalKnownFieldText(b, fd, slv)
91 if err != nil {
92 return err
93 }
94 }
95 } else {
96 err := b.maybeNext(&first)
97 if err != nil {
98 return err
99 }
100 err = marshalKnownFieldText(b, fd, v)
101 if err != nil {
102 return err
103 }
104 }
105 }
106 // then the unknown fields
107 for _, tag := range m.unknownFieldTags() {
108 itag := int32(tag)
109 ufs := m.unknownFields[itag]
110 for _, uf := range ufs {
111 err := b.maybeNext(&first)
112 if err != nil {
113 return err
114 }
115 _, err = fmt.Fprintf(b, "%d", tag)
116 if err != nil {
117 return err
118 }
119 if uf.Encoding == proto.WireStartGroup {
120 err = b.WriteByte('{')
121 if err != nil {
122 return err
123 }
124 err = b.start()
125 if err != nil {
126 return err
127 }
Scott Baker4a35a702019-11-26 08:17:33 -0800128 in := codec.NewBuffer(uf.Contents)
Zack Williamse940c7a2019-08-21 14:25:39 -0700129 err = marshalUnknownGroupText(b, in, true)
130 if err != nil {
131 return err
132 }
133 err = b.end()
134 if err != nil {
135 return err
136 }
137 err = b.WriteByte('}')
138 if err != nil {
139 return err
140 }
141 } else {
142 err = b.sep()
143 if err != nil {
144 return err
145 }
146 if uf.Encoding == proto.WireBytes {
147 err = writeString(b, string(uf.Contents))
148 if err != nil {
149 return err
150 }
151 } else {
152 _, err = b.WriteString(strconv.FormatUint(uf.Value, 10))
153 if err != nil {
154 return err
155 }
156 }
157 }
158 }
159 }
160 return nil
161}
162
163func marshalKnownFieldMapEntryText(b *indentBuffer, fd *desc.FieldDescriptor, kfd *desc.FieldDescriptor, mk interface{}, vfd *desc.FieldDescriptor, mv interface{}) error {
164 var name string
165 if fd.IsExtension() {
166 name = fmt.Sprintf("[%s]", fd.GetFullyQualifiedName())
167 } else {
168 name = fd.GetName()
169 }
170 _, err := b.WriteString(name)
171 if err != nil {
172 return err
173 }
174 err = b.sep()
175 if err != nil {
176 return err
177 }
178
179 err = b.WriteByte('<')
180 if err != nil {
181 return err
182 }
183 err = b.start()
184 if err != nil {
185 return err
186 }
187
188 err = marshalKnownFieldText(b, kfd, mk)
189 if err != nil {
190 return err
191 }
192 err = b.next()
193 if err != nil {
194 return err
195 }
196 err = marshalKnownFieldText(b, vfd, mv)
197 if err != nil {
198 return err
199 }
200
201 err = b.end()
202 if err != nil {
203 return err
204 }
205 return b.WriteByte('>')
206}
207
208func marshalKnownFieldText(b *indentBuffer, fd *desc.FieldDescriptor, v interface{}) error {
209 group := fd.GetType() == descriptor.FieldDescriptorProto_TYPE_GROUP
210 if group {
211 var name string
212 if fd.IsExtension() {
213 name = fmt.Sprintf("[%s]", fd.GetMessageType().GetFullyQualifiedName())
214 } else {
215 name = fd.GetMessageType().GetName()
216 }
217 _, err := b.WriteString(name)
218 if err != nil {
219 return err
220 }
221 } else {
222 var name string
223 if fd.IsExtension() {
224 name = fmt.Sprintf("[%s]", fd.GetFullyQualifiedName())
225 } else {
226 name = fd.GetName()
227 }
228 _, err := b.WriteString(name)
229 if err != nil {
230 return err
231 }
232 err = b.sep()
233 if err != nil {
234 return err
235 }
236 }
237 rv := reflect.ValueOf(v)
238 switch rv.Kind() {
239 case reflect.Int32, reflect.Int64:
240 ed := fd.GetEnumType()
241 if ed != nil {
242 n := int32(rv.Int())
243 vd := ed.FindValueByNumber(n)
244 if vd == nil {
245 _, err := b.WriteString(strconv.FormatInt(rv.Int(), 10))
246 return err
247 } else {
248 _, err := b.WriteString(vd.GetName())
249 return err
250 }
251 } else {
252 _, err := b.WriteString(strconv.FormatInt(rv.Int(), 10))
253 return err
254 }
255 case reflect.Uint32, reflect.Uint64:
256 _, err := b.WriteString(strconv.FormatUint(rv.Uint(), 10))
257 return err
258 case reflect.Float32, reflect.Float64:
259 f := rv.Float()
260 var str string
261 if math.IsNaN(f) {
262 str = "nan"
263 } else if math.IsInf(f, 1) {
264 str = "inf"
265 } else if math.IsInf(f, -1) {
266 str = "-inf"
267 } else {
268 var bits int
269 if rv.Kind() == reflect.Float32 {
270 bits = 32
271 } else {
272 bits = 64
273 }
274 str = strconv.FormatFloat(rv.Float(), 'g', -1, bits)
275 }
276 _, err := b.WriteString(str)
277 return err
278 case reflect.Bool:
279 _, err := b.WriteString(strconv.FormatBool(rv.Bool()))
280 return err
281 case reflect.Slice:
282 return writeString(b, string(rv.Bytes()))
283 case reflect.String:
284 return writeString(b, rv.String())
285 default:
286 var err error
287 if group {
288 err = b.WriteByte('{')
289 } else {
290 err = b.WriteByte('<')
291 }
292 if err != nil {
293 return err
294 }
295 err = b.start()
296 if err != nil {
297 return err
298 }
299 // must be a message
300 if dm, ok := v.(*Message); ok {
301 err = dm.marshalText(b)
302 if err != nil {
303 return err
304 }
305 } else {
306 err = proto.CompactText(b, v.(proto.Message))
307 if err != nil {
308 return err
309 }
310 }
311 err = b.end()
312 if err != nil {
313 return err
314 }
315 if group {
316 return b.WriteByte('}')
317 } else {
318 return b.WriteByte('>')
319 }
320 }
321}
322
323// writeString writes a string in the protocol buffer text format.
324// It is similar to strconv.Quote except we don't use Go escape sequences,
325// we treat the string as a byte sequence, and we use octal escapes.
326// These differences are to maintain interoperability with the other
327// languages' implementations of the text format.
328func writeString(b *indentBuffer, s string) error {
329 // use WriteByte here to get any needed indent
330 if err := b.WriteByte('"'); err != nil {
331 return err
332 }
333 // Loop over the bytes, not the runes.
334 for i := 0; i < len(s); i++ {
335 var err error
336 // Divergence from C++: we don't escape apostrophes.
337 // There's no need to escape them, and the C++ parser
338 // copes with a naked apostrophe.
339 switch c := s[i]; c {
340 case '\n':
341 _, err = b.WriteString("\\n")
342 case '\r':
343 _, err = b.WriteString("\\r")
344 case '\t':
345 _, err = b.WriteString("\\t")
346 case '"':
347 _, err = b.WriteString("\\")
348 case '\\':
349 _, err = b.WriteString("\\\\")
350 default:
351 if c >= 0x20 && c < 0x7f {
352 err = b.WriteByte(c)
353 } else {
354 _, err = fmt.Fprintf(b, "\\%03o", c)
355 }
356 }
357 if err != nil {
358 return err
359 }
360 }
361 return b.WriteByte('"')
362}
363
Scott Baker4a35a702019-11-26 08:17:33 -0800364func marshalUnknownGroupText(b *indentBuffer, in *codec.Buffer, topLevel bool) error {
Zack Williamse940c7a2019-08-21 14:25:39 -0700365 first := true
366 for {
Scott Baker4a35a702019-11-26 08:17:33 -0800367 if in.EOF() {
Zack Williamse940c7a2019-08-21 14:25:39 -0700368 if topLevel {
369 return nil
370 }
371 // this is a nested message: we are expecting an end-group tag, not EOF!
372 return io.ErrUnexpectedEOF
373 }
Scott Baker4a35a702019-11-26 08:17:33 -0800374 tag, wireType, err := in.DecodeTagAndWireType()
Zack Williamse940c7a2019-08-21 14:25:39 -0700375 if err != nil {
376 return err
377 }
378 if wireType == proto.WireEndGroup {
379 return nil
380 }
381 err = b.maybeNext(&first)
382 if err != nil {
383 return err
384 }
385 _, err = fmt.Fprintf(b, "%d", tag)
386 if err != nil {
387 return err
388 }
389 if wireType == proto.WireStartGroup {
390 err = b.WriteByte('{')
391 if err != nil {
392 return err
393 }
394 err = b.start()
395 if err != nil {
396 return err
397 }
398 err = marshalUnknownGroupText(b, in, false)
399 if err != nil {
400 return err
401 }
402 err = b.end()
403 if err != nil {
404 return err
405 }
406 err = b.WriteByte('}')
407 if err != nil {
408 return err
409 }
410 continue
411 } else {
412 err = b.sep()
413 if err != nil {
414 return err
415 }
416 if wireType == proto.WireBytes {
Scott Baker4a35a702019-11-26 08:17:33 -0800417 contents, err := in.DecodeRawBytes(false)
Zack Williamse940c7a2019-08-21 14:25:39 -0700418 if err != nil {
419 return err
420 }
421 err = writeString(b, string(contents))
422 if err != nil {
423 return err
424 }
425 } else {
426 var v uint64
427 switch wireType {
428 case proto.WireVarint:
Scott Baker4a35a702019-11-26 08:17:33 -0800429 v, err = in.DecodeVarint()
Zack Williamse940c7a2019-08-21 14:25:39 -0700430 case proto.WireFixed32:
Scott Baker4a35a702019-11-26 08:17:33 -0800431 v, err = in.DecodeFixed32()
Zack Williamse940c7a2019-08-21 14:25:39 -0700432 case proto.WireFixed64:
Scott Baker4a35a702019-11-26 08:17:33 -0800433 v, err = in.DecodeFixed64()
Zack Williamse940c7a2019-08-21 14:25:39 -0700434 default:
435 return proto.ErrInternalBadWireType
436 }
437 if err != nil {
438 return err
439 }
440 _, err = b.WriteString(strconv.FormatUint(v, 10))
441 if err != nil {
442 return err
443 }
444 }
445 }
446 }
447}
448
449// UnmarshalText de-serializes the message that is present, in text format, in
450// the given bytes into this message. It first resets the current message. It
451// returns an error if the given bytes do not contain a valid encoding of this
452// message type in the standard text format
453func (m *Message) UnmarshalText(text []byte) error {
454 m.Reset()
455 if err := m.UnmarshalMergeText(text); err != nil {
456 return err
457 }
458 return m.Validate()
459}
460
461// UnmarshalMergeText de-serializes the message that is present, in text format,
462// in the given bytes into this message. Unlike UnmarshalText, it does not first
463// reset the message, instead merging the data in the given bytes into the
464// existing data in this message.
465func (m *Message) UnmarshalMergeText(text []byte) error {
466 return m.unmarshalText(newReader(text), tokenEOF)
467}
468
469func (m *Message) unmarshalText(tr *txtReader, end tokenType) error {
470 for {
471 tok := tr.next()
472 if tok.tokTyp == end {
473 return nil
474 }
475 if tok.tokTyp == tokenEOF {
476 return io.ErrUnexpectedEOF
477 }
478 var fd *desc.FieldDescriptor
479 var extendedAnyType *desc.MessageDescriptor
480 if tok.tokTyp == tokenInt {
481 // tag number (indicates unknown field)
482 tag, err := strconv.ParseInt(tok.val.(string), 10, 32)
483 if err != nil {
484 return err
485 }
486 itag := int32(tag)
487 fd = m.FindFieldDescriptor(itag)
488 if fd == nil {
489 // can't parse the value w/out field descriptor, so skip it
490 tok = tr.next()
491 if tok.tokTyp == tokenEOF {
492 return io.ErrUnexpectedEOF
493 } else if tok.tokTyp == tokenOpenBrace {
494 if err := skipMessageText(tr, true); err != nil {
495 return err
496 }
497 } else if tok.tokTyp == tokenColon {
498 if err := skipFieldValueText(tr); err != nil {
499 return err
500 }
501 } else {
502 return textError(tok, "Expecting a colon ':' or brace '{'; instead got %q", tok.txt)
503 }
504 tok = tr.peek()
505 if tok.tokTyp.IsSep() {
506 tr.next() // consume separator
507 }
508 continue
509 }
510 } else {
511 fieldName, err := unmarshalFieldNameText(tr, tok)
512 if err != nil {
513 return err
514 }
515 fd = m.FindFieldDescriptorByName(fieldName)
516 if fd == nil {
517 // See if it's a group name
518 for _, field := range m.md.GetFields() {
519 if field.GetType() == descriptor.FieldDescriptorProto_TYPE_GROUP && field.GetMessageType().GetName() == fieldName {
520 fd = field
521 break
522 }
523 }
524 if fd == nil {
525 // maybe this is an extended Any
526 if m.md.GetFullyQualifiedName() == "google.protobuf.Any" && fieldName[0] == '[' && strings.Contains(fieldName, "/") {
527 // strip surrounding "[" and "]" and extract type name from URL
528 typeUrl := fieldName[1 : len(fieldName)-1]
529 mname := typeUrl
530 if slash := strings.LastIndex(mname, "/"); slash >= 0 {
531 mname = mname[slash+1:]
532 }
533 // TODO: add a way to weave an AnyResolver to this point
534 extendedAnyType = findMessageDescriptor(mname, m.md.GetFile())
535 if extendedAnyType == nil {
536 return textError(tok, "could not parse Any with unknown type URL %q", fieldName)
537 }
538 // field 1 is "type_url"
539 typeUrlField := m.md.FindFieldByNumber(1)
540 if err := m.TrySetField(typeUrlField, typeUrl); err != nil {
541 return err
542 }
543 } else {
544 // TODO: add a flag to just ignore unrecognized field names
545 return textError(tok, "%q is not a recognized field name of %q", fieldName, m.md.GetFullyQualifiedName())
546 }
547 }
548 }
549 }
550 tok = tr.next()
551 if tok.tokTyp == tokenEOF {
552 return io.ErrUnexpectedEOF
553 }
554 if extendedAnyType != nil {
555 // consume optional colon; make sure this is a "start message" token
556 if tok.tokTyp == tokenColon {
557 tok = tr.next()
558 if tok.tokTyp == tokenEOF {
559 return io.ErrUnexpectedEOF
560 }
561 }
562 if tok.tokTyp.EndToken() == tokenError {
563 return textError(tok, "Expecting a '<' or '{'; instead got %q", tok.txt)
564 }
565
566 // TODO: use mf.NewMessage and, if not a dynamic message, use proto.UnmarshalText to unmarshal it
567 g := m.mf.NewDynamicMessage(extendedAnyType)
568 if err := g.unmarshalText(tr, tok.tokTyp.EndToken()); err != nil {
569 return err
570 }
571 // now we marshal the message to bytes and store in the Any
572 b, err := g.Marshal()
573 if err != nil {
574 return err
575 }
576 // field 2 is "value"
577 anyValueField := m.md.FindFieldByNumber(2)
578 if err := m.TrySetField(anyValueField, b); err != nil {
579 return err
580 }
581
582 } else if (fd.GetType() == descriptor.FieldDescriptorProto_TYPE_GROUP ||
583 fd.GetType() == descriptor.FieldDescriptorProto_TYPE_MESSAGE) &&
584 tok.tokTyp.EndToken() != tokenError {
585
586 // TODO: use mf.NewMessage and, if not a dynamic message, use proto.UnmarshalText to unmarshal it
587 g := m.mf.NewDynamicMessage(fd.GetMessageType())
588 if err := g.unmarshalText(tr, tok.tokTyp.EndToken()); err != nil {
589 return err
590 }
591 if fd.IsRepeated() {
592 if err := m.TryAddRepeatedField(fd, g); err != nil {
593 return err
594 }
595 } else {
596 if err := m.TrySetField(fd, g); err != nil {
597 return err
598 }
599 }
600 } else {
601 if tok.tokTyp != tokenColon {
602 return textError(tok, "Expecting a colon ':'; instead got %q", tok.txt)
603 }
604 if err := m.unmarshalFieldValueText(fd, tr); err != nil {
605 return err
606 }
607 }
608 tok = tr.peek()
609 if tok.tokTyp.IsSep() {
610 tr.next() // consume separator
611 }
612 }
613}
614func findMessageDescriptor(name string, fd *desc.FileDescriptor) *desc.MessageDescriptor {
615 md := findMessageInTransitiveDeps(name, fd, map[*desc.FileDescriptor]struct{}{})
616 if md == nil {
617 // couldn't find it; see if we have this message linked in
618 md, _ = desc.LoadMessageDescriptor(name)
619 }
620 return md
621}
622
623func findMessageInTransitiveDeps(name string, fd *desc.FileDescriptor, seen map[*desc.FileDescriptor]struct{}) *desc.MessageDescriptor {
624 if _, ok := seen[fd]; ok {
625 // already checked this file
626 return nil
627 }
628 seen[fd] = struct{}{}
629 md := fd.FindMessage(name)
630 if md != nil {
631 return md
632 }
633 // not in this file so recursively search its deps
634 for _, dep := range fd.GetDependencies() {
635 md = findMessageInTransitiveDeps(name, dep, seen)
636 if md != nil {
637 return md
638 }
639 }
640 // couldn't find it
641 return nil
642}
643
644func textError(tok *token, format string, args ...interface{}) error {
645 var msg string
646 if tok.tokTyp == tokenError {
647 msg = tok.val.(error).Error()
648 } else {
649 msg = fmt.Sprintf(format, args...)
650 }
651 return fmt.Errorf("line %d, col %d: %s", tok.pos.Line, tok.pos.Column, msg)
652}
653
654type setFunction func(*Message, *desc.FieldDescriptor, interface{}) error
655
656func (m *Message) unmarshalFieldValueText(fd *desc.FieldDescriptor, tr *txtReader) error {
657 var set setFunction
658 if fd.IsRepeated() {
659 set = (*Message).addRepeatedField
660 } else {
661 set = mergeField
662 }
663 tok := tr.peek()
664 if tok.tokTyp == tokenOpenBracket {
665 tr.next() // consume tok
666 for {
667 if err := m.unmarshalFieldElementText(fd, tr, set); err != nil {
668 return err
669 }
670 tok = tr.peek()
671 if tok.tokTyp == tokenCloseBracket {
672 tr.next() // consume tok
673 return nil
674 } else if tok.tokTyp.IsSep() {
675 tr.next() // consume separator
676 }
677 }
678 }
679 return m.unmarshalFieldElementText(fd, tr, set)
680}
681
682func (m *Message) unmarshalFieldElementText(fd *desc.FieldDescriptor, tr *txtReader, set setFunction) error {
683 tok := tr.next()
684 if tok.tokTyp == tokenEOF {
685 return io.ErrUnexpectedEOF
686 }
687
688 var expected string
689 switch fd.GetType() {
690 case descriptor.FieldDescriptorProto_TYPE_BOOL:
691 if tok.tokTyp == tokenIdent {
692 if tok.val.(string) == "true" {
693 return set(m, fd, true)
694 } else if tok.val.(string) == "false" {
695 return set(m, fd, false)
696 }
697 }
698 expected = "boolean value"
699 case descriptor.FieldDescriptorProto_TYPE_BYTES:
700 if tok.tokTyp == tokenString {
701 return set(m, fd, []byte(tok.val.(string)))
702 }
703 expected = "bytes string value"
704 case descriptor.FieldDescriptorProto_TYPE_STRING:
705 if tok.tokTyp == tokenString {
706 return set(m, fd, tok.val)
707 }
708 expected = "string value"
709 case descriptor.FieldDescriptorProto_TYPE_FLOAT:
710 switch tok.tokTyp {
711 case tokenFloat:
712 return set(m, fd, float32(tok.val.(float64)))
713 case tokenInt:
714 if f, err := strconv.ParseFloat(tok.val.(string), 32); err != nil {
715 return err
716 } else {
717 return set(m, fd, float32(f))
718 }
719 case tokenIdent:
720 ident := strings.ToLower(tok.val.(string))
721 if ident == "inf" {
722 return set(m, fd, float32(math.Inf(1)))
723 } else if ident == "nan" {
724 return set(m, fd, float32(math.NaN()))
725 }
726 case tokenMinus:
727 peeked := tr.peek()
728 if peeked.tokTyp == tokenIdent {
729 ident := strings.ToLower(peeked.val.(string))
730 if ident == "inf" {
731 tr.next() // consume peeked token
732 return set(m, fd, float32(math.Inf(-1)))
733 }
734 }
735 }
736 expected = "float value"
737 case descriptor.FieldDescriptorProto_TYPE_DOUBLE:
738 switch tok.tokTyp {
739 case tokenFloat:
740 return set(m, fd, tok.val)
741 case tokenInt:
742 if f, err := strconv.ParseFloat(tok.val.(string), 64); err != nil {
743 return err
744 } else {
745 return set(m, fd, f)
746 }
747 case tokenIdent:
748 ident := strings.ToLower(tok.val.(string))
749 if ident == "inf" {
750 return set(m, fd, math.Inf(1))
751 } else if ident == "nan" {
752 return set(m, fd, math.NaN())
753 }
754 case tokenMinus:
755 peeked := tr.peek()
756 if peeked.tokTyp == tokenIdent {
757 ident := strings.ToLower(peeked.val.(string))
758 if ident == "inf" {
759 tr.next() // consume peeked token
760 return set(m, fd, math.Inf(-1))
761 }
762 }
763 }
764 expected = "float value"
765 case descriptor.FieldDescriptorProto_TYPE_INT32,
766 descriptor.FieldDescriptorProto_TYPE_SINT32,
767 descriptor.FieldDescriptorProto_TYPE_SFIXED32:
768 if tok.tokTyp == tokenInt {
769 if i, err := strconv.ParseInt(tok.val.(string), 10, 32); err != nil {
770 return err
771 } else {
772 return set(m, fd, int32(i))
773 }
774 }
775 expected = "int value"
776 case descriptor.FieldDescriptorProto_TYPE_INT64,
777 descriptor.FieldDescriptorProto_TYPE_SINT64,
778 descriptor.FieldDescriptorProto_TYPE_SFIXED64:
779 if tok.tokTyp == tokenInt {
780 if i, err := strconv.ParseInt(tok.val.(string), 10, 64); err != nil {
781 return err
782 } else {
783 return set(m, fd, i)
784 }
785 }
786 expected = "int value"
787 case descriptor.FieldDescriptorProto_TYPE_UINT32,
788 descriptor.FieldDescriptorProto_TYPE_FIXED32:
789 if tok.tokTyp == tokenInt {
790 if i, err := strconv.ParseUint(tok.val.(string), 10, 32); err != nil {
791 return err
792 } else {
793 return set(m, fd, uint32(i))
794 }
795 }
796 expected = "unsigned int value"
797 case descriptor.FieldDescriptorProto_TYPE_UINT64,
798 descriptor.FieldDescriptorProto_TYPE_FIXED64:
799 if tok.tokTyp == tokenInt {
800 if i, err := strconv.ParseUint(tok.val.(string), 10, 64); err != nil {
801 return err
802 } else {
803 return set(m, fd, i)
804 }
805 }
806 expected = "unsigned int value"
807 case descriptor.FieldDescriptorProto_TYPE_ENUM:
808 if tok.tokTyp == tokenIdent {
809 // TODO: add a flag to just ignore unrecognized enum value names?
810 vd := fd.GetEnumType().FindValueByName(tok.val.(string))
811 if vd != nil {
812 return set(m, fd, vd.GetNumber())
813 }
814 } else if tok.tokTyp == tokenInt {
815 if i, err := strconv.ParseInt(tok.val.(string), 10, 32); err != nil {
816 return err
817 } else {
818 return set(m, fd, int32(i))
819 }
820 }
821 expected = fmt.Sprintf("enum %s value", fd.GetEnumType().GetFullyQualifiedName())
822 case descriptor.FieldDescriptorProto_TYPE_MESSAGE,
823 descriptor.FieldDescriptorProto_TYPE_GROUP:
824
825 endTok := tok.tokTyp.EndToken()
826 if endTok != tokenError {
827 dm := m.mf.NewDynamicMessage(fd.GetMessageType())
828 if err := dm.unmarshalText(tr, endTok); err != nil {
829 return err
830 }
831 // TODO: ideally we would use mf.NewMessage and, if not a dynamic message, use
832 // proto package to unmarshal it. But the text parser isn't particularly amenable
833 // to that, so we instead convert a dynamic message to a generated one if the
834 // known-type registry knows about the generated type...
835 var ktr *KnownTypeRegistry
836 if m.mf != nil {
837 ktr = m.mf.ktr
838 }
839 pm := ktr.CreateIfKnown(fd.GetMessageType().GetFullyQualifiedName())
840 if pm != nil {
841 if err := dm.ConvertTo(pm); err != nil {
842 return set(m, fd, pm)
843 }
844 }
845 return set(m, fd, dm)
846 }
847 expected = fmt.Sprintf("message %s value", fd.GetMessageType().GetFullyQualifiedName())
848 default:
849 return fmt.Errorf("field %q of message %q has unrecognized type: %v", fd.GetFullyQualifiedName(), m.md.GetFullyQualifiedName(), fd.GetType())
850 }
851
852 // if we get here, token was wrong type; create error message
853 var article string
854 if strings.Contains("aieou", expected[0:1]) {
855 article = "an"
856 } else {
857 article = "a"
858 }
859 return textError(tok, "Expecting %s %s; got %q", article, expected, tok.txt)
860}
861
862func unmarshalFieldNameText(tr *txtReader, tok *token) (string, error) {
863 if tok.tokTyp == tokenOpenBracket || tok.tokTyp == tokenOpenParen {
864 // extension name
865 var closeType tokenType
866 var closeChar string
867 if tok.tokTyp == tokenOpenBracket {
868 closeType = tokenCloseBracket
869 closeChar = "close bracket ']'"
870 } else {
871 closeType = tokenCloseParen
872 closeChar = "close paren ')'"
873 }
874 // must be followed by an identifier
875 idents := make([]string, 0, 1)
876 for {
877 tok = tr.next()
878 if tok.tokTyp == tokenEOF {
879 return "", io.ErrUnexpectedEOF
880 } else if tok.tokTyp != tokenIdent {
881 return "", textError(tok, "Expecting an identifier; instead got %q", tok.txt)
882 }
883 idents = append(idents, tok.val.(string))
884 // and then close bracket/paren, or "/" to keep adding URL elements to name
885 tok = tr.next()
886 if tok.tokTyp == tokenEOF {
887 return "", io.ErrUnexpectedEOF
888 } else if tok.tokTyp == closeType {
889 break
890 } else if tok.tokTyp != tokenSlash {
891 return "", textError(tok, "Expecting a %s; instead got %q", closeChar, tok.txt)
892 }
893 }
894 return "[" + strings.Join(idents, "/") + "]", nil
895 } else if tok.tokTyp == tokenIdent {
896 // normal field name
897 return tok.val.(string), nil
898 } else {
899 return "", textError(tok, "Expecting an identifier or tag number; instead got %q", tok.txt)
900 }
901}
902
903func skipFieldNameText(tr *txtReader) error {
904 tok := tr.next()
905 if tok.tokTyp == tokenEOF {
906 return io.ErrUnexpectedEOF
907 } else if tok.tokTyp == tokenInt || tok.tokTyp == tokenIdent {
908 return nil
909 } else {
910 _, err := unmarshalFieldNameText(tr, tok)
911 return err
912 }
913}
914
915func skipFieldValueText(tr *txtReader) error {
916 tok := tr.peek()
917 if tok.tokTyp == tokenOpenBracket {
918 tr.next() // consume tok
919 for {
920 if err := skipFieldElementText(tr); err != nil {
921 return err
922 }
923 tok = tr.peek()
924 if tok.tokTyp == tokenCloseBracket {
925 tr.next() // consume tok
926 return nil
927 } else if tok.tokTyp.IsSep() {
928 tr.next() // consume separator
929 }
930
931 }
932 }
933 return skipFieldElementText(tr)
934}
935
936func skipFieldElementText(tr *txtReader) error {
937 tok := tr.next()
938 switch tok.tokTyp {
939 case tokenEOF:
940 return io.ErrUnexpectedEOF
941 case tokenInt, tokenFloat, tokenString, tokenIdent:
942 return nil
943 case tokenOpenAngle:
944 return skipMessageText(tr, false)
945 default:
946 return textError(tok, "Expecting an angle bracket '<' or a value; instead got %q", tok.txt)
947 }
948}
949
950func skipMessageText(tr *txtReader, isGroup bool) error {
951 for {
952 tok := tr.peek()
953 if tok.tokTyp == tokenEOF {
954 return io.ErrUnexpectedEOF
955 } else if isGroup && tok.tokTyp == tokenCloseBrace {
956 return nil
957 } else if !isGroup && tok.tokTyp == tokenCloseAngle {
958 return nil
959 }
960
961 // field name or tag
962 if err := skipFieldNameText(tr); err != nil {
963 return err
964 }
965
966 // field value
967 tok = tr.next()
968 if tok.tokTyp == tokenEOF {
969 return io.ErrUnexpectedEOF
970 } else if tok.tokTyp == tokenOpenBrace {
971 if err := skipMessageText(tr, true); err != nil {
972 return err
973 }
974 } else if tok.tokTyp == tokenColon {
975 if err := skipFieldValueText(tr); err != nil {
976 return err
977 }
978 } else {
979 return textError(tok, "Expecting a colon ':' or brace '{'; instead got %q", tok.txt)
980 }
981
982 tok = tr.peek()
983 if tok.tokTyp.IsSep() {
984 tr.next() // consume separator
985 }
986 }
987}
988
989type tokenType int
990
991const (
992 tokenError tokenType = iota
993 tokenEOF
994 tokenIdent
995 tokenString
996 tokenInt
997 tokenFloat
998 tokenColon
999 tokenComma
1000 tokenSemiColon
1001 tokenOpenBrace
1002 tokenCloseBrace
1003 tokenOpenBracket
1004 tokenCloseBracket
1005 tokenOpenAngle
1006 tokenCloseAngle
1007 tokenOpenParen
1008 tokenCloseParen
1009 tokenSlash
1010 tokenMinus
1011)
1012
1013func (t tokenType) IsSep() bool {
1014 return t == tokenComma || t == tokenSemiColon
1015}
1016
1017func (t tokenType) EndToken() tokenType {
1018 switch t {
1019 case tokenOpenAngle:
1020 return tokenCloseAngle
1021 case tokenOpenBrace:
1022 return tokenCloseBrace
1023 default:
1024 return tokenError
1025 }
1026}
1027
1028type token struct {
1029 tokTyp tokenType
1030 val interface{}
1031 txt string
1032 pos scanner.Position
1033}
1034
1035type txtReader struct {
1036 scanner scanner.Scanner
1037 peeked token
1038 havePeeked bool
1039}
1040
1041func newReader(text []byte) *txtReader {
1042 sc := scanner.Scanner{}
1043 sc.Init(bytes.NewReader(text))
1044 sc.Mode = scanner.ScanIdents | scanner.ScanInts | scanner.ScanFloats | scanner.ScanChars |
1045 scanner.ScanStrings | scanner.ScanComments | scanner.SkipComments
1046 // identifiers are same restrictions as Go identifiers, except we also allow dots since
1047 // we accept fully-qualified names
1048 sc.IsIdentRune = func(ch rune, i int) bool {
1049 return ch == '_' || unicode.IsLetter(ch) ||
1050 (i > 0 && unicode.IsDigit(ch)) ||
1051 (i > 0 && ch == '.')
1052 }
1053 // ignore errors; we handle them if/when we see malformed tokens
1054 sc.Error = func(s *scanner.Scanner, msg string) {}
1055 return &txtReader{scanner: sc}
1056}
1057
1058func (p *txtReader) peek() *token {
1059 if p.havePeeked {
1060 return &p.peeked
1061 }
1062 t := p.scanner.Scan()
1063 if t == scanner.EOF {
1064 p.peeked.tokTyp = tokenEOF
1065 p.peeked.val = nil
1066 p.peeked.txt = ""
1067 p.peeked.pos = p.scanner.Position
1068 } else if err := p.processToken(t, p.scanner.TokenText(), p.scanner.Position); err != nil {
1069 p.peeked.tokTyp = tokenError
1070 p.peeked.val = err
1071 }
1072 p.havePeeked = true
1073 return &p.peeked
1074}
1075
1076func (p *txtReader) processToken(t rune, text string, pos scanner.Position) error {
1077 p.peeked.pos = pos
1078 p.peeked.txt = text
1079 switch t {
1080 case scanner.Ident:
1081 p.peeked.tokTyp = tokenIdent
1082 p.peeked.val = text
1083 case scanner.Int:
1084 p.peeked.tokTyp = tokenInt
1085 p.peeked.val = text // can't parse the number because we don't know if it's signed or unsigned
1086 case scanner.Float:
1087 p.peeked.tokTyp = tokenFloat
1088 var err error
1089 if p.peeked.val, err = strconv.ParseFloat(text, 64); err != nil {
1090 return err
1091 }
1092 case scanner.Char, scanner.String:
1093 p.peeked.tokTyp = tokenString
1094 var err error
1095 if p.peeked.val, err = strconv.Unquote(text); err != nil {
1096 return err
1097 }
1098 case '-': // unary minus, for negative ints and floats
1099 ch := p.scanner.Peek()
1100 if ch < '0' || ch > '9' {
1101 p.peeked.tokTyp = tokenMinus
1102 p.peeked.val = '-'
1103 } else {
1104 t := p.scanner.Scan()
1105 if t == scanner.EOF {
1106 return io.ErrUnexpectedEOF
1107 } else if t == scanner.Float {
1108 p.peeked.tokTyp = tokenFloat
1109 text += p.scanner.TokenText()
1110 p.peeked.txt = text
1111 var err error
1112 if p.peeked.val, err = strconv.ParseFloat(text, 64); err != nil {
1113 p.peeked.pos = p.scanner.Position
1114 return err
1115 }
1116 } else if t == scanner.Int {
1117 p.peeked.tokTyp = tokenInt
1118 text += p.scanner.TokenText()
1119 p.peeked.txt = text
1120 p.peeked.val = text // can't parse the number because we don't know if it's signed or unsigned
1121 } else {
1122 p.peeked.pos = p.scanner.Position
1123 return fmt.Errorf("expecting an int or float but got %q", p.scanner.TokenText())
1124 }
1125 }
1126 case ':':
1127 p.peeked.tokTyp = tokenColon
1128 p.peeked.val = ':'
1129 case ',':
1130 p.peeked.tokTyp = tokenComma
1131 p.peeked.val = ','
1132 case ';':
1133 p.peeked.tokTyp = tokenSemiColon
1134 p.peeked.val = ';'
1135 case '{':
1136 p.peeked.tokTyp = tokenOpenBrace
1137 p.peeked.val = '{'
1138 case '}':
1139 p.peeked.tokTyp = tokenCloseBrace
1140 p.peeked.val = '}'
1141 case '<':
1142 p.peeked.tokTyp = tokenOpenAngle
1143 p.peeked.val = '<'
1144 case '>':
1145 p.peeked.tokTyp = tokenCloseAngle
1146 p.peeked.val = '>'
1147 case '[':
1148 p.peeked.tokTyp = tokenOpenBracket
1149 p.peeked.val = '['
1150 case ']':
1151 p.peeked.tokTyp = tokenCloseBracket
1152 p.peeked.val = ']'
1153 case '(':
1154 p.peeked.tokTyp = tokenOpenParen
1155 p.peeked.val = '('
1156 case ')':
1157 p.peeked.tokTyp = tokenCloseParen
1158 p.peeked.val = ')'
1159 case '/':
1160 // only allowed to separate URL components in expanded Any format
1161 p.peeked.tokTyp = tokenSlash
1162 p.peeked.val = '/'
1163 default:
1164 return fmt.Errorf("invalid character: %c", t)
1165 }
1166 return nil
1167}
1168
1169func (p *txtReader) next() *token {
1170 t := p.peek()
1171 if t.tokTyp != tokenEOF && t.tokTyp != tokenError {
1172 p.havePeeked = false
1173 }
1174 return t
1175}