blob: 91679b5b496db76f837f909c3cca69e2a24408d5 [file] [log] [blame]
Scott Bakerbdb962b2020-04-03 10:53:36 -07001package yaml
2
3import (
4 "encoding"
5 "encoding/base64"
6 "fmt"
7 "io"
8 "math"
9 "reflect"
10 "strconv"
11 "time"
12)
13
14const (
15 documentNode = 1 << iota
16 mappingNode
17 sequenceNode
18 scalarNode
19 aliasNode
20)
21
22type node struct {
23 kind int
24 line, column int
25 tag string
26 // For an alias node, alias holds the resolved alias.
27 alias *node
28 value string
29 implicit bool
30 children []*node
31 anchors map[string]*node
32}
33
34// ----------------------------------------------------------------------------
35// Parser, produces a node tree out of a libyaml event stream.
36
37type parser struct {
38 parser yaml_parser_t
39 event yaml_event_t
40 doc *node
41 doneInit bool
42}
43
44func newParser(b []byte) *parser {
45 p := parser{}
46 if !yaml_parser_initialize(&p.parser) {
47 panic("failed to initialize YAML emitter")
48 }
49 if len(b) == 0 {
50 b = []byte{'\n'}
51 }
52 yaml_parser_set_input_string(&p.parser, b)
53 return &p
54}
55
56func newParserFromReader(r io.Reader) *parser {
57 p := parser{}
58 if !yaml_parser_initialize(&p.parser) {
59 panic("failed to initialize YAML emitter")
60 }
61 yaml_parser_set_input_reader(&p.parser, r)
62 return &p
63}
64
65func (p *parser) init() {
66 if p.doneInit {
67 return
68 }
69 p.expect(yaml_STREAM_START_EVENT)
70 p.doneInit = true
71}
72
73func (p *parser) destroy() {
74 if p.event.typ != yaml_NO_EVENT {
75 yaml_event_delete(&p.event)
76 }
77 yaml_parser_delete(&p.parser)
78}
79
80// expect consumes an event from the event stream and
81// checks that it's of the expected type.
82func (p *parser) expect(e yaml_event_type_t) {
83 if p.event.typ == yaml_NO_EVENT {
84 if !yaml_parser_parse(&p.parser, &p.event) {
85 p.fail()
86 }
87 }
88 if p.event.typ == yaml_STREAM_END_EVENT {
89 failf("attempted to go past the end of stream; corrupted value?")
90 }
91 if p.event.typ != e {
92 p.parser.problem = fmt.Sprintf("expected %s event but got %s", e, p.event.typ)
93 p.fail()
94 }
95 yaml_event_delete(&p.event)
96 p.event.typ = yaml_NO_EVENT
97}
98
99// peek peeks at the next event in the event stream,
100// puts the results into p.event and returns the event type.
101func (p *parser) peek() yaml_event_type_t {
102 if p.event.typ != yaml_NO_EVENT {
103 return p.event.typ
104 }
105 if !yaml_parser_parse(&p.parser, &p.event) {
106 p.fail()
107 }
108 return p.event.typ
109}
110
111func (p *parser) fail() {
112 var where string
113 var line int
114 if p.parser.problem_mark.line != 0 {
115 line = p.parser.problem_mark.line
116 // Scanner errors don't iterate line before returning error
117 if p.parser.error == yaml_SCANNER_ERROR {
118 line++
119 }
120 } else if p.parser.context_mark.line != 0 {
121 line = p.parser.context_mark.line
122 }
123 if line != 0 {
124 where = "line " + strconv.Itoa(line) + ": "
125 }
126 var msg string
127 if len(p.parser.problem) > 0 {
128 msg = p.parser.problem
129 } else {
130 msg = "unknown problem parsing YAML content"
131 }
132 failf("%s%s", where, msg)
133}
134
135func (p *parser) anchor(n *node, anchor []byte) {
136 if anchor != nil {
137 p.doc.anchors[string(anchor)] = n
138 }
139}
140
141func (p *parser) parse() *node {
142 p.init()
143 switch p.peek() {
144 case yaml_SCALAR_EVENT:
145 return p.scalar()
146 case yaml_ALIAS_EVENT:
147 return p.alias()
148 case yaml_MAPPING_START_EVENT:
149 return p.mapping()
150 case yaml_SEQUENCE_START_EVENT:
151 return p.sequence()
152 case yaml_DOCUMENT_START_EVENT:
153 return p.document()
154 case yaml_STREAM_END_EVENT:
155 // Happens when attempting to decode an empty buffer.
156 return nil
157 default:
158 panic("attempted to parse unknown event: " + p.event.typ.String())
159 }
160}
161
162func (p *parser) node(kind int) *node {
163 return &node{
164 kind: kind,
165 line: p.event.start_mark.line,
166 column: p.event.start_mark.column,
167 }
168}
169
170func (p *parser) document() *node {
171 n := p.node(documentNode)
172 n.anchors = make(map[string]*node)
173 p.doc = n
174 p.expect(yaml_DOCUMENT_START_EVENT)
175 n.children = append(n.children, p.parse())
176 p.expect(yaml_DOCUMENT_END_EVENT)
177 return n
178}
179
180func (p *parser) alias() *node {
181 n := p.node(aliasNode)
182 n.value = string(p.event.anchor)
183 n.alias = p.doc.anchors[n.value]
184 if n.alias == nil {
185 failf("unknown anchor '%s' referenced", n.value)
186 }
187 p.expect(yaml_ALIAS_EVENT)
188 return n
189}
190
191func (p *parser) scalar() *node {
192 n := p.node(scalarNode)
193 n.value = string(p.event.value)
194 n.tag = string(p.event.tag)
195 n.implicit = p.event.implicit
196 p.anchor(n, p.event.anchor)
197 p.expect(yaml_SCALAR_EVENT)
198 return n
199}
200
201func (p *parser) sequence() *node {
202 n := p.node(sequenceNode)
203 p.anchor(n, p.event.anchor)
204 p.expect(yaml_SEQUENCE_START_EVENT)
205 for p.peek() != yaml_SEQUENCE_END_EVENT {
206 n.children = append(n.children, p.parse())
207 }
208 p.expect(yaml_SEQUENCE_END_EVENT)
209 return n
210}
211
212func (p *parser) mapping() *node {
213 n := p.node(mappingNode)
214 p.anchor(n, p.event.anchor)
215 p.expect(yaml_MAPPING_START_EVENT)
216 for p.peek() != yaml_MAPPING_END_EVENT {
217 n.children = append(n.children, p.parse(), p.parse())
218 }
219 p.expect(yaml_MAPPING_END_EVENT)
220 return n
221}
222
223// ----------------------------------------------------------------------------
224// Decoder, unmarshals a node into a provided value.
225
226type decoder struct {
227 doc *node
228 aliases map[*node]bool
229 mapType reflect.Type
230 terrors []string
231 strict bool
232
233 decodeCount int
234 aliasCount int
235 aliasDepth int
236}
237
238var (
239 mapItemType = reflect.TypeOf(MapItem{})
240 durationType = reflect.TypeOf(time.Duration(0))
241 defaultMapType = reflect.TypeOf(map[interface{}]interface{}{})
242 ifaceType = defaultMapType.Elem()
243 timeType = reflect.TypeOf(time.Time{})
244 ptrTimeType = reflect.TypeOf(&time.Time{})
245)
246
247func newDecoder(strict bool) *decoder {
248 d := &decoder{mapType: defaultMapType, strict: strict}
249 d.aliases = make(map[*node]bool)
250 return d
251}
252
253func (d *decoder) terror(n *node, tag string, out reflect.Value) {
254 if n.tag != "" {
255 tag = n.tag
256 }
257 value := n.value
258 if tag != yaml_SEQ_TAG && tag != yaml_MAP_TAG {
259 if len(value) > 10 {
260 value = " `" + value[:7] + "...`"
261 } else {
262 value = " `" + value + "`"
263 }
264 }
265 d.terrors = append(d.terrors, fmt.Sprintf("line %d: cannot unmarshal %s%s into %s", n.line+1, shortTag(tag), value, out.Type()))
266}
267
268func (d *decoder) callUnmarshaler(n *node, u Unmarshaler) (good bool) {
269 terrlen := len(d.terrors)
270 err := u.UnmarshalYAML(func(v interface{}) (err error) {
271 defer handleErr(&err)
272 d.unmarshal(n, reflect.ValueOf(v))
273 if len(d.terrors) > terrlen {
274 issues := d.terrors[terrlen:]
275 d.terrors = d.terrors[:terrlen]
276 return &TypeError{issues}
277 }
278 return nil
279 })
280 if e, ok := err.(*TypeError); ok {
281 d.terrors = append(d.terrors, e.Errors...)
282 return false
283 }
284 if err != nil {
285 fail(err)
286 }
287 return true
288}
289
290// d.prepare initializes and dereferences pointers and calls UnmarshalYAML
291// if a value is found to implement it.
292// It returns the initialized and dereferenced out value, whether
293// unmarshalling was already done by UnmarshalYAML, and if so whether
294// its types unmarshalled appropriately.
295//
296// If n holds a null value, prepare returns before doing anything.
297func (d *decoder) prepare(n *node, out reflect.Value) (newout reflect.Value, unmarshaled, good bool) {
298 if n.tag == yaml_NULL_TAG || n.kind == scalarNode && n.tag == "" && (n.value == "null" || n.value == "~" || n.value == "" && n.implicit) {
299 return out, false, false
300 }
301 again := true
302 for again {
303 again = false
304 if out.Kind() == reflect.Ptr {
305 if out.IsNil() {
306 out.Set(reflect.New(out.Type().Elem()))
307 }
308 out = out.Elem()
309 again = true
310 }
311 if out.CanAddr() {
312 if u, ok := out.Addr().Interface().(Unmarshaler); ok {
313 good = d.callUnmarshaler(n, u)
314 return out, true, good
315 }
316 }
317 }
318 return out, false, false
319}
320
321func (d *decoder) unmarshal(n *node, out reflect.Value) (good bool) {
322 d.decodeCount++
323 if d.aliasDepth > 0 {
324 d.aliasCount++
325 }
326 if d.aliasCount > 100 && d.decodeCount > 1000 && float64(d.aliasCount)/float64(d.decodeCount) > 0.99 {
327 failf("document contains excessive aliasing")
328 }
329 switch n.kind {
330 case documentNode:
331 return d.document(n, out)
332 case aliasNode:
333 return d.alias(n, out)
334 }
335 out, unmarshaled, good := d.prepare(n, out)
336 if unmarshaled {
337 return good
338 }
339 switch n.kind {
340 case scalarNode:
341 good = d.scalar(n, out)
342 case mappingNode:
343 good = d.mapping(n, out)
344 case sequenceNode:
345 good = d.sequence(n, out)
346 default:
347 panic("internal error: unknown node kind: " + strconv.Itoa(n.kind))
348 }
349 return good
350}
351
352func (d *decoder) document(n *node, out reflect.Value) (good bool) {
353 if len(n.children) == 1 {
354 d.doc = n
355 d.unmarshal(n.children[0], out)
356 return true
357 }
358 return false
359}
360
361func (d *decoder) alias(n *node, out reflect.Value) (good bool) {
362 if d.aliases[n] {
363 // TODO this could actually be allowed in some circumstances.
364 failf("anchor '%s' value contains itself", n.value)
365 }
366 d.aliases[n] = true
367 d.aliasDepth++
368 good = d.unmarshal(n.alias, out)
369 d.aliasDepth--
370 delete(d.aliases, n)
371 return good
372}
373
374var zeroValue reflect.Value
375
376func resetMap(out reflect.Value) {
377 for _, k := range out.MapKeys() {
378 out.SetMapIndex(k, zeroValue)
379 }
380}
381
382func (d *decoder) scalar(n *node, out reflect.Value) bool {
383 var tag string
384 var resolved interface{}
385 if n.tag == "" && !n.implicit {
386 tag = yaml_STR_TAG
387 resolved = n.value
388 } else {
389 tag, resolved = resolve(n.tag, n.value)
390 if tag == yaml_BINARY_TAG {
391 data, err := base64.StdEncoding.DecodeString(resolved.(string))
392 if err != nil {
393 failf("!!binary value contains invalid base64 data")
394 }
395 resolved = string(data)
396 }
397 }
398 if resolved == nil {
399 if out.Kind() == reflect.Map && !out.CanAddr() {
400 resetMap(out)
401 } else {
402 out.Set(reflect.Zero(out.Type()))
403 }
404 return true
405 }
406 if resolvedv := reflect.ValueOf(resolved); out.Type() == resolvedv.Type() {
407 // We've resolved to exactly the type we want, so use that.
408 out.Set(resolvedv)
409 return true
410 }
411 // Perhaps we can use the value as a TextUnmarshaler to
412 // set its value.
413 if out.CanAddr() {
414 u, ok := out.Addr().Interface().(encoding.TextUnmarshaler)
415 if ok {
416 var text []byte
417 if tag == yaml_BINARY_TAG {
418 text = []byte(resolved.(string))
419 } else {
420 // We let any value be unmarshaled into TextUnmarshaler.
421 // That might be more lax than we'd like, but the
422 // TextUnmarshaler itself should bowl out any dubious values.
423 text = []byte(n.value)
424 }
425 err := u.UnmarshalText(text)
426 if err != nil {
427 fail(err)
428 }
429 return true
430 }
431 }
432 switch out.Kind() {
433 case reflect.String:
434 if tag == yaml_BINARY_TAG {
435 out.SetString(resolved.(string))
436 return true
437 }
438 if resolved != nil {
439 out.SetString(n.value)
440 return true
441 }
442 case reflect.Interface:
443 if resolved == nil {
444 out.Set(reflect.Zero(out.Type()))
445 } else if tag == yaml_TIMESTAMP_TAG {
446 // It looks like a timestamp but for backward compatibility
447 // reasons we set it as a string, so that code that unmarshals
448 // timestamp-like values into interface{} will continue to
449 // see a string and not a time.Time.
450 // TODO(v3) Drop this.
451 out.Set(reflect.ValueOf(n.value))
452 } else {
453 out.Set(reflect.ValueOf(resolved))
454 }
455 return true
456 case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
457 switch resolved := resolved.(type) {
458 case int:
459 if !out.OverflowInt(int64(resolved)) {
460 out.SetInt(int64(resolved))
461 return true
462 }
463 case int64:
464 if !out.OverflowInt(resolved) {
465 out.SetInt(resolved)
466 return true
467 }
468 case uint64:
469 if resolved <= math.MaxInt64 && !out.OverflowInt(int64(resolved)) {
470 out.SetInt(int64(resolved))
471 return true
472 }
473 case float64:
474 if resolved <= math.MaxInt64 && !out.OverflowInt(int64(resolved)) {
475 out.SetInt(int64(resolved))
476 return true
477 }
478 case string:
479 if out.Type() == durationType {
480 d, err := time.ParseDuration(resolved)
481 if err == nil {
482 out.SetInt(int64(d))
483 return true
484 }
485 }
486 }
487 case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
488 switch resolved := resolved.(type) {
489 case int:
490 if resolved >= 0 && !out.OverflowUint(uint64(resolved)) {
491 out.SetUint(uint64(resolved))
492 return true
493 }
494 case int64:
495 if resolved >= 0 && !out.OverflowUint(uint64(resolved)) {
496 out.SetUint(uint64(resolved))
497 return true
498 }
499 case uint64:
500 if !out.OverflowUint(uint64(resolved)) {
501 out.SetUint(uint64(resolved))
502 return true
503 }
504 case float64:
505 if resolved <= math.MaxUint64 && !out.OverflowUint(uint64(resolved)) {
506 out.SetUint(uint64(resolved))
507 return true
508 }
509 }
510 case reflect.Bool:
511 switch resolved := resolved.(type) {
512 case bool:
513 out.SetBool(resolved)
514 return true
515 }
516 case reflect.Float32, reflect.Float64:
517 switch resolved := resolved.(type) {
518 case int:
519 out.SetFloat(float64(resolved))
520 return true
521 case int64:
522 out.SetFloat(float64(resolved))
523 return true
524 case uint64:
525 out.SetFloat(float64(resolved))
526 return true
527 case float64:
528 out.SetFloat(resolved)
529 return true
530 }
531 case reflect.Struct:
532 if resolvedv := reflect.ValueOf(resolved); out.Type() == resolvedv.Type() {
533 out.Set(resolvedv)
534 return true
535 }
536 case reflect.Ptr:
537 if out.Type().Elem() == reflect.TypeOf(resolved) {
538 // TODO DOes this make sense? When is out a Ptr except when decoding a nil value?
539 elem := reflect.New(out.Type().Elem())
540 elem.Elem().Set(reflect.ValueOf(resolved))
541 out.Set(elem)
542 return true
543 }
544 }
545 d.terror(n, tag, out)
546 return false
547}
548
549func settableValueOf(i interface{}) reflect.Value {
550 v := reflect.ValueOf(i)
551 sv := reflect.New(v.Type()).Elem()
552 sv.Set(v)
553 return sv
554}
555
556func (d *decoder) sequence(n *node, out reflect.Value) (good bool) {
557 l := len(n.children)
558
559 var iface reflect.Value
560 switch out.Kind() {
561 case reflect.Slice:
562 out.Set(reflect.MakeSlice(out.Type(), l, l))
563 case reflect.Array:
564 if l != out.Len() {
565 failf("invalid array: want %d elements but got %d", out.Len(), l)
566 }
567 case reflect.Interface:
568 // No type hints. Will have to use a generic sequence.
569 iface = out
570 out = settableValueOf(make([]interface{}, l))
571 default:
572 d.terror(n, yaml_SEQ_TAG, out)
573 return false
574 }
575 et := out.Type().Elem()
576
577 j := 0
578 for i := 0; i < l; i++ {
579 e := reflect.New(et).Elem()
580 if ok := d.unmarshal(n.children[i], e); ok {
581 out.Index(j).Set(e)
582 j++
583 }
584 }
585 if out.Kind() != reflect.Array {
586 out.Set(out.Slice(0, j))
587 }
588 if iface.IsValid() {
589 iface.Set(out)
590 }
591 return true
592}
593
594func (d *decoder) mapping(n *node, out reflect.Value) (good bool) {
595 switch out.Kind() {
596 case reflect.Struct:
597 return d.mappingStruct(n, out)
598 case reflect.Slice:
599 return d.mappingSlice(n, out)
600 case reflect.Map:
601 // okay
602 case reflect.Interface:
603 if d.mapType.Kind() == reflect.Map {
604 iface := out
605 out = reflect.MakeMap(d.mapType)
606 iface.Set(out)
607 } else {
608 slicev := reflect.New(d.mapType).Elem()
609 if !d.mappingSlice(n, slicev) {
610 return false
611 }
612 out.Set(slicev)
613 return true
614 }
615 default:
616 d.terror(n, yaml_MAP_TAG, out)
617 return false
618 }
619 outt := out.Type()
620 kt := outt.Key()
621 et := outt.Elem()
622
623 mapType := d.mapType
624 if outt.Key() == ifaceType && outt.Elem() == ifaceType {
625 d.mapType = outt
626 }
627
628 if out.IsNil() {
629 out.Set(reflect.MakeMap(outt))
630 }
631 l := len(n.children)
632 for i := 0; i < l; i += 2 {
633 if isMerge(n.children[i]) {
634 d.merge(n.children[i+1], out)
635 continue
636 }
637 k := reflect.New(kt).Elem()
638 if d.unmarshal(n.children[i], k) {
639 kkind := k.Kind()
640 if kkind == reflect.Interface {
641 kkind = k.Elem().Kind()
642 }
643 if kkind == reflect.Map || kkind == reflect.Slice {
644 failf("invalid map key: %#v", k.Interface())
645 }
646 e := reflect.New(et).Elem()
647 if d.unmarshal(n.children[i+1], e) {
648 d.setMapIndex(n.children[i+1], out, k, e)
649 }
650 }
651 }
652 d.mapType = mapType
653 return true
654}
655
656func (d *decoder) setMapIndex(n *node, out, k, v reflect.Value) {
657 if d.strict && out.MapIndex(k) != zeroValue {
658 d.terrors = append(d.terrors, fmt.Sprintf("line %d: key %#v already set in map", n.line+1, k.Interface()))
659 return
660 }
661 out.SetMapIndex(k, v)
662}
663
664func (d *decoder) mappingSlice(n *node, out reflect.Value) (good bool) {
665 outt := out.Type()
666 if outt.Elem() != mapItemType {
667 d.terror(n, yaml_MAP_TAG, out)
668 return false
669 }
670
671 mapType := d.mapType
672 d.mapType = outt
673
674 var slice []MapItem
675 var l = len(n.children)
676 for i := 0; i < l; i += 2 {
677 if isMerge(n.children[i]) {
678 d.merge(n.children[i+1], out)
679 continue
680 }
681 item := MapItem{}
682 k := reflect.ValueOf(&item.Key).Elem()
683 if d.unmarshal(n.children[i], k) {
684 v := reflect.ValueOf(&item.Value).Elem()
685 if d.unmarshal(n.children[i+1], v) {
686 slice = append(slice, item)
687 }
688 }
689 }
690 out.Set(reflect.ValueOf(slice))
691 d.mapType = mapType
692 return true
693}
694
695func (d *decoder) mappingStruct(n *node, out reflect.Value) (good bool) {
696 sinfo, err := getStructInfo(out.Type())
697 if err != nil {
698 panic(err)
699 }
700 name := settableValueOf("")
701 l := len(n.children)
702
703 var inlineMap reflect.Value
704 var elemType reflect.Type
705 if sinfo.InlineMap != -1 {
706 inlineMap = out.Field(sinfo.InlineMap)
707 inlineMap.Set(reflect.New(inlineMap.Type()).Elem())
708 elemType = inlineMap.Type().Elem()
709 }
710
711 var doneFields []bool
712 if d.strict {
713 doneFields = make([]bool, len(sinfo.FieldsList))
714 }
715 for i := 0; i < l; i += 2 {
716 ni := n.children[i]
717 if isMerge(ni) {
718 d.merge(n.children[i+1], out)
719 continue
720 }
721 if !d.unmarshal(ni, name) {
722 continue
723 }
724 if info, ok := sinfo.FieldsMap[name.String()]; ok {
725 if d.strict {
726 if doneFields[info.Id] {
727 d.terrors = append(d.terrors, fmt.Sprintf("line %d: field %s already set in type %s", ni.line+1, name.String(), out.Type()))
728 continue
729 }
730 doneFields[info.Id] = true
731 }
732 var field reflect.Value
733 if info.Inline == nil {
734 field = out.Field(info.Num)
735 } else {
736 field = out.FieldByIndex(info.Inline)
737 }
738 d.unmarshal(n.children[i+1], field)
739 } else if sinfo.InlineMap != -1 {
740 if inlineMap.IsNil() {
741 inlineMap.Set(reflect.MakeMap(inlineMap.Type()))
742 }
743 value := reflect.New(elemType).Elem()
744 d.unmarshal(n.children[i+1], value)
745 d.setMapIndex(n.children[i+1], inlineMap, name, value)
746 } else if d.strict {
747 d.terrors = append(d.terrors, fmt.Sprintf("line %d: field %s not found in type %s", ni.line+1, name.String(), out.Type()))
748 }
749 }
750 return true
751}
752
753func failWantMap() {
754 failf("map merge requires map or sequence of maps as the value")
755}
756
757func (d *decoder) merge(n *node, out reflect.Value) {
758 switch n.kind {
759 case mappingNode:
760 d.unmarshal(n, out)
761 case aliasNode:
762 an, ok := d.doc.anchors[n.value]
763 if ok && an.kind != mappingNode {
764 failWantMap()
765 }
766 d.unmarshal(n, out)
767 case sequenceNode:
768 // Step backwards as earlier nodes take precedence.
769 for i := len(n.children) - 1; i >= 0; i-- {
770 ni := n.children[i]
771 if ni.kind == aliasNode {
772 an, ok := d.doc.anchors[ni.value]
773 if ok && an.kind != mappingNode {
774 failWantMap()
775 }
776 } else if ni.kind != mappingNode {
777 failWantMap()
778 }
779 d.unmarshal(ni, out)
780 }
781 default:
782 failWantMap()
783 }
784}
785
786func isMerge(n *node) bool {
787 return n.kind == scalarNode && n.value == "<<" && (n.implicit == true || n.tag == yaml_MERGE_TAG)
788}