blob: 0b9bb6030a0fae4384db726f9b51c9ebbd7d5271 [file] [log] [blame]
Zack Williamse940c7a2019-08-21 14:25:39 -07001package yaml
2
3import (
4 "bytes"
5 "fmt"
6)
7
8// Introduction
9// ************
10//
11// The following notes assume that you are familiar with the YAML specification
12// (http://yaml.org/spec/1.2/spec.html). We mostly follow it, although in
13// some cases we are less restrictive that it requires.
14//
15// The process of transforming a YAML stream into a sequence of events is
16// divided on two steps: Scanning and Parsing.
17//
18// The Scanner transforms the input stream into a sequence of tokens, while the
19// parser transform the sequence of tokens produced by the Scanner into a
20// sequence of parsing events.
21//
22// The Scanner is rather clever and complicated. The Parser, on the contrary,
23// is a straightforward implementation of a recursive-descendant parser (or,
24// LL(1) parser, as it is usually called).
25//
26// Actually there are two issues of Scanning that might be called "clever", the
27// rest is quite straightforward. The issues are "block collection start" and
28// "simple keys". Both issues are explained below in details.
29//
30// Here the Scanning step is explained and implemented. We start with the list
31// of all the tokens produced by the Scanner together with short descriptions.
32//
33// Now, tokens:
34//
35// STREAM-START(encoding) # The stream start.
36// STREAM-END # The stream end.
37// VERSION-DIRECTIVE(major,minor) # The '%YAML' directive.
38// TAG-DIRECTIVE(handle,prefix) # The '%TAG' directive.
39// DOCUMENT-START # '---'
40// DOCUMENT-END # '...'
41// BLOCK-SEQUENCE-START # Indentation increase denoting a block
42// BLOCK-MAPPING-START # sequence or a block mapping.
43// BLOCK-END # Indentation decrease.
44// FLOW-SEQUENCE-START # '['
45// FLOW-SEQUENCE-END # ']'
46// BLOCK-SEQUENCE-START # '{'
47// BLOCK-SEQUENCE-END # '}'
48// BLOCK-ENTRY # '-'
49// FLOW-ENTRY # ','
50// KEY # '?' or nothing (simple keys).
51// VALUE # ':'
52// ALIAS(anchor) # '*anchor'
53// ANCHOR(anchor) # '&anchor'
54// TAG(handle,suffix) # '!handle!suffix'
55// SCALAR(value,style) # A scalar.
56//
57// The following two tokens are "virtual" tokens denoting the beginning and the
58// end of the stream:
59//
60// STREAM-START(encoding)
61// STREAM-END
62//
63// We pass the information about the input stream encoding with the
64// STREAM-START token.
65//
66// The next two tokens are responsible for tags:
67//
68// VERSION-DIRECTIVE(major,minor)
69// TAG-DIRECTIVE(handle,prefix)
70//
71// Example:
72//
73// %YAML 1.1
74// %TAG ! !foo
75// %TAG !yaml! tag:yaml.org,2002:
76// ---
77//
78// The correspoding sequence of tokens:
79//
80// STREAM-START(utf-8)
81// VERSION-DIRECTIVE(1,1)
82// TAG-DIRECTIVE("!","!foo")
83// TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:")
84// DOCUMENT-START
85// STREAM-END
86//
87// Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole
88// line.
89//
90// The document start and end indicators are represented by:
91//
92// DOCUMENT-START
93// DOCUMENT-END
94//
95// Note that if a YAML stream contains an implicit document (without '---'
96// and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be
97// produced.
98//
99// In the following examples, we present whole documents together with the
100// produced tokens.
101//
102// 1. An implicit document:
103//
104// 'a scalar'
105//
106// Tokens:
107//
108// STREAM-START(utf-8)
109// SCALAR("a scalar",single-quoted)
110// STREAM-END
111//
112// 2. An explicit document:
113//
114// ---
115// 'a scalar'
116// ...
117//
118// Tokens:
119//
120// STREAM-START(utf-8)
121// DOCUMENT-START
122// SCALAR("a scalar",single-quoted)
123// DOCUMENT-END
124// STREAM-END
125//
126// 3. Several documents in a stream:
127//
128// 'a scalar'
129// ---
130// 'another scalar'
131// ---
132// 'yet another scalar'
133//
134// Tokens:
135//
136// STREAM-START(utf-8)
137// SCALAR("a scalar",single-quoted)
138// DOCUMENT-START
139// SCALAR("another scalar",single-quoted)
140// DOCUMENT-START
141// SCALAR("yet another scalar",single-quoted)
142// STREAM-END
143//
144// We have already introduced the SCALAR token above. The following tokens are
145// used to describe aliases, anchors, tag, and scalars:
146//
147// ALIAS(anchor)
148// ANCHOR(anchor)
149// TAG(handle,suffix)
150// SCALAR(value,style)
151//
152// The following series of examples illustrate the usage of these tokens:
153//
154// 1. A recursive sequence:
155//
156// &A [ *A ]
157//
158// Tokens:
159//
160// STREAM-START(utf-8)
161// ANCHOR("A")
162// FLOW-SEQUENCE-START
163// ALIAS("A")
164// FLOW-SEQUENCE-END
165// STREAM-END
166//
167// 2. A tagged scalar:
168//
169// !!float "3.14" # A good approximation.
170//
171// Tokens:
172//
173// STREAM-START(utf-8)
174// TAG("!!","float")
175// SCALAR("3.14",double-quoted)
176// STREAM-END
177//
178// 3. Various scalar styles:
179//
180// --- # Implicit empty plain scalars do not produce tokens.
181// --- a plain scalar
182// --- 'a single-quoted scalar'
183// --- "a double-quoted scalar"
184// --- |-
185// a literal scalar
186// --- >-
187// a folded
188// scalar
189//
190// Tokens:
191//
192// STREAM-START(utf-8)
193// DOCUMENT-START
194// DOCUMENT-START
195// SCALAR("a plain scalar",plain)
196// DOCUMENT-START
197// SCALAR("a single-quoted scalar",single-quoted)
198// DOCUMENT-START
199// SCALAR("a double-quoted scalar",double-quoted)
200// DOCUMENT-START
201// SCALAR("a literal scalar",literal)
202// DOCUMENT-START
203// SCALAR("a folded scalar",folded)
204// STREAM-END
205//
206// Now it's time to review collection-related tokens. We will start with
207// flow collections:
208//
209// FLOW-SEQUENCE-START
210// FLOW-SEQUENCE-END
211// FLOW-MAPPING-START
212// FLOW-MAPPING-END
213// FLOW-ENTRY
214// KEY
215// VALUE
216//
217// The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and
218// FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}'
219// correspondingly. FLOW-ENTRY represent the ',' indicator. Finally the
220// indicators '?' and ':', which are used for denoting mapping keys and values,
221// are represented by the KEY and VALUE tokens.
222//
223// The following examples show flow collections:
224//
225// 1. A flow sequence:
226//
227// [item 1, item 2, item 3]
228//
229// Tokens:
230//
231// STREAM-START(utf-8)
232// FLOW-SEQUENCE-START
233// SCALAR("item 1",plain)
234// FLOW-ENTRY
235// SCALAR("item 2",plain)
236// FLOW-ENTRY
237// SCALAR("item 3",plain)
238// FLOW-SEQUENCE-END
239// STREAM-END
240//
241// 2. A flow mapping:
242//
243// {
244// a simple key: a value, # Note that the KEY token is produced.
245// ? a complex key: another value,
246// }
247//
248// Tokens:
249//
250// STREAM-START(utf-8)
251// FLOW-MAPPING-START
252// KEY
253// SCALAR("a simple key",plain)
254// VALUE
255// SCALAR("a value",plain)
256// FLOW-ENTRY
257// KEY
258// SCALAR("a complex key",plain)
259// VALUE
260// SCALAR("another value",plain)
261// FLOW-ENTRY
262// FLOW-MAPPING-END
263// STREAM-END
264//
265// A simple key is a key which is not denoted by the '?' indicator. Note that
266// the Scanner still produce the KEY token whenever it encounters a simple key.
267//
268// For scanning block collections, the following tokens are used (note that we
269// repeat KEY and VALUE here):
270//
271// BLOCK-SEQUENCE-START
272// BLOCK-MAPPING-START
273// BLOCK-END
274// BLOCK-ENTRY
275// KEY
276// VALUE
277//
278// The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation
279// increase that precedes a block collection (cf. the INDENT token in Python).
280// The token BLOCK-END denote indentation decrease that ends a block collection
281// (cf. the DEDENT token in Python). However YAML has some syntax pecularities
282// that makes detections of these tokens more complex.
283//
284// The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators
285// '-', '?', and ':' correspondingly.
286//
287// The following examples show how the tokens BLOCK-SEQUENCE-START,
288// BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner:
289//
290// 1. Block sequences:
291//
292// - item 1
293// - item 2
294// -
295// - item 3.1
296// - item 3.2
297// -
298// key 1: value 1
299// key 2: value 2
300//
301// Tokens:
302//
303// STREAM-START(utf-8)
304// BLOCK-SEQUENCE-START
305// BLOCK-ENTRY
306// SCALAR("item 1",plain)
307// BLOCK-ENTRY
308// SCALAR("item 2",plain)
309// BLOCK-ENTRY
310// BLOCK-SEQUENCE-START
311// BLOCK-ENTRY
312// SCALAR("item 3.1",plain)
313// BLOCK-ENTRY
314// SCALAR("item 3.2",plain)
315// BLOCK-END
316// BLOCK-ENTRY
317// BLOCK-MAPPING-START
318// KEY
319// SCALAR("key 1",plain)
320// VALUE
321// SCALAR("value 1",plain)
322// KEY
323// SCALAR("key 2",plain)
324// VALUE
325// SCALAR("value 2",plain)
326// BLOCK-END
327// BLOCK-END
328// STREAM-END
329//
330// 2. Block mappings:
331//
332// a simple key: a value # The KEY token is produced here.
333// ? a complex key
334// : another value
335// a mapping:
336// key 1: value 1
337// key 2: value 2
338// a sequence:
339// - item 1
340// - item 2
341//
342// Tokens:
343//
344// STREAM-START(utf-8)
345// BLOCK-MAPPING-START
346// KEY
347// SCALAR("a simple key",plain)
348// VALUE
349// SCALAR("a value",plain)
350// KEY
351// SCALAR("a complex key",plain)
352// VALUE
353// SCALAR("another value",plain)
354// KEY
355// SCALAR("a mapping",plain)
356// BLOCK-MAPPING-START
357// KEY
358// SCALAR("key 1",plain)
359// VALUE
360// SCALAR("value 1",plain)
361// KEY
362// SCALAR("key 2",plain)
363// VALUE
364// SCALAR("value 2",plain)
365// BLOCK-END
366// KEY
367// SCALAR("a sequence",plain)
368// VALUE
369// BLOCK-SEQUENCE-START
370// BLOCK-ENTRY
371// SCALAR("item 1",plain)
372// BLOCK-ENTRY
373// SCALAR("item 2",plain)
374// BLOCK-END
375// BLOCK-END
376// STREAM-END
377//
378// YAML does not always require to start a new block collection from a new
379// line. If the current line contains only '-', '?', and ':' indicators, a new
380// block collection may start at the current line. The following examples
381// illustrate this case:
382//
383// 1. Collections in a sequence:
384//
385// - - item 1
386// - item 2
387// - key 1: value 1
388// key 2: value 2
389// - ? complex key
390// : complex value
391//
392// Tokens:
393//
394// STREAM-START(utf-8)
395// BLOCK-SEQUENCE-START
396// BLOCK-ENTRY
397// BLOCK-SEQUENCE-START
398// BLOCK-ENTRY
399// SCALAR("item 1",plain)
400// BLOCK-ENTRY
401// SCALAR("item 2",plain)
402// BLOCK-END
403// BLOCK-ENTRY
404// BLOCK-MAPPING-START
405// KEY
406// SCALAR("key 1",plain)
407// VALUE
408// SCALAR("value 1",plain)
409// KEY
410// SCALAR("key 2",plain)
411// VALUE
412// SCALAR("value 2",plain)
413// BLOCK-END
414// BLOCK-ENTRY
415// BLOCK-MAPPING-START
416// KEY
417// SCALAR("complex key")
418// VALUE
419// SCALAR("complex value")
420// BLOCK-END
421// BLOCK-END
422// STREAM-END
423//
424// 2. Collections in a mapping:
425//
426// ? a sequence
427// : - item 1
428// - item 2
429// ? a mapping
430// : key 1: value 1
431// key 2: value 2
432//
433// Tokens:
434//
435// STREAM-START(utf-8)
436// BLOCK-MAPPING-START
437// KEY
438// SCALAR("a sequence",plain)
439// VALUE
440// BLOCK-SEQUENCE-START
441// BLOCK-ENTRY
442// SCALAR("item 1",plain)
443// BLOCK-ENTRY
444// SCALAR("item 2",plain)
445// BLOCK-END
446// KEY
447// SCALAR("a mapping",plain)
448// VALUE
449// BLOCK-MAPPING-START
450// KEY
451// SCALAR("key 1",plain)
452// VALUE
453// SCALAR("value 1",plain)
454// KEY
455// SCALAR("key 2",plain)
456// VALUE
457// SCALAR("value 2",plain)
458// BLOCK-END
459// BLOCK-END
460// STREAM-END
461//
462// YAML also permits non-indented sequences if they are included into a block
463// mapping. In this case, the token BLOCK-SEQUENCE-START is not produced:
464//
465// key:
466// - item 1 # BLOCK-SEQUENCE-START is NOT produced here.
467// - item 2
468//
469// Tokens:
470//
471// STREAM-START(utf-8)
472// BLOCK-MAPPING-START
473// KEY
474// SCALAR("key",plain)
475// VALUE
476// BLOCK-ENTRY
477// SCALAR("item 1",plain)
478// BLOCK-ENTRY
479// SCALAR("item 2",plain)
480// BLOCK-END
481//
482
483// Ensure that the buffer contains the required number of characters.
484// Return true on success, false on failure (reader error or memory error).
485func cache(parser *yaml_parser_t, length int) bool {
486 // [Go] This was inlined: !cache(A, B) -> unread < B && !update(A, B)
487 return parser.unread >= length || yaml_parser_update_buffer(parser, length)
488}
489
490// Advance the buffer pointer.
491func skip(parser *yaml_parser_t) {
492 parser.mark.index++
493 parser.mark.column++
494 parser.unread--
495 parser.buffer_pos += width(parser.buffer[parser.buffer_pos])
496}
497
498func skip_line(parser *yaml_parser_t) {
499 if is_crlf(parser.buffer, parser.buffer_pos) {
500 parser.mark.index += 2
501 parser.mark.column = 0
502 parser.mark.line++
503 parser.unread -= 2
504 parser.buffer_pos += 2
505 } else if is_break(parser.buffer, parser.buffer_pos) {
506 parser.mark.index++
507 parser.mark.column = 0
508 parser.mark.line++
509 parser.unread--
510 parser.buffer_pos += width(parser.buffer[parser.buffer_pos])
511 }
512}
513
514// Copy a character to a string buffer and advance pointers.
515func read(parser *yaml_parser_t, s []byte) []byte {
516 w := width(parser.buffer[parser.buffer_pos])
517 if w == 0 {
518 panic("invalid character sequence")
519 }
520 if len(s) == 0 {
521 s = make([]byte, 0, 32)
522 }
523 if w == 1 && len(s)+w <= cap(s) {
524 s = s[:len(s)+1]
525 s[len(s)-1] = parser.buffer[parser.buffer_pos]
526 parser.buffer_pos++
527 } else {
528 s = append(s, parser.buffer[parser.buffer_pos:parser.buffer_pos+w]...)
529 parser.buffer_pos += w
530 }
531 parser.mark.index++
532 parser.mark.column++
533 parser.unread--
534 return s
535}
536
537// Copy a line break character to a string buffer and advance pointers.
538func read_line(parser *yaml_parser_t, s []byte) []byte {
539 buf := parser.buffer
540 pos := parser.buffer_pos
541 switch {
542 case buf[pos] == '\r' && buf[pos+1] == '\n':
543 // CR LF . LF
544 s = append(s, '\n')
545 parser.buffer_pos += 2
546 parser.mark.index++
547 parser.unread--
548 case buf[pos] == '\r' || buf[pos] == '\n':
549 // CR|LF . LF
550 s = append(s, '\n')
551 parser.buffer_pos += 1
552 case buf[pos] == '\xC2' && buf[pos+1] == '\x85':
553 // NEL . LF
554 s = append(s, '\n')
555 parser.buffer_pos += 2
556 case buf[pos] == '\xE2' && buf[pos+1] == '\x80' && (buf[pos+2] == '\xA8' || buf[pos+2] == '\xA9'):
557 // LS|PS . LS|PS
558 s = append(s, buf[parser.buffer_pos:pos+3]...)
559 parser.buffer_pos += 3
560 default:
561 return s
562 }
563 parser.mark.index++
564 parser.mark.column = 0
565 parser.mark.line++
566 parser.unread--
567 return s
568}
569
570// Get the next token.
571func yaml_parser_scan(parser *yaml_parser_t, token *yaml_token_t) bool {
572 // Erase the token object.
573 *token = yaml_token_t{} // [Go] Is this necessary?
574
575 // No tokens after STREAM-END or error.
576 if parser.stream_end_produced || parser.error != yaml_NO_ERROR {
577 return true
578 }
579
580 // Ensure that the tokens queue contains enough tokens.
581 if !parser.token_available {
582 if !yaml_parser_fetch_more_tokens(parser) {
583 return false
584 }
585 }
586
587 // Fetch the next token from the queue.
588 *token = parser.tokens[parser.tokens_head]
589 parser.tokens_head++
590 parser.tokens_parsed++
591 parser.token_available = false
592
593 if token.typ == yaml_STREAM_END_TOKEN {
594 parser.stream_end_produced = true
595 }
596 return true
597}
598
599// Set the scanner error and return false.
600func yaml_parser_set_scanner_error(parser *yaml_parser_t, context string, context_mark yaml_mark_t, problem string) bool {
601 parser.error = yaml_SCANNER_ERROR
602 parser.context = context
603 parser.context_mark = context_mark
604 parser.problem = problem
605 parser.problem_mark = parser.mark
606 return false
607}
608
609func yaml_parser_set_scanner_tag_error(parser *yaml_parser_t, directive bool, context_mark yaml_mark_t, problem string) bool {
610 context := "while parsing a tag"
611 if directive {
612 context = "while parsing a %TAG directive"
613 }
614 return yaml_parser_set_scanner_error(parser, context, context_mark, problem)
615}
616
617func trace(args ...interface{}) func() {
618 pargs := append([]interface{}{"+++"}, args...)
619 fmt.Println(pargs...)
620 pargs = append([]interface{}{"---"}, args...)
621 return func() { fmt.Println(pargs...) }
622}
623
624// Ensure that the tokens queue contains at least one token which can be
625// returned to the Parser.
626func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool {
627 // While we need more tokens to fetch, do it.
628 for {
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000629 if parser.tokens_head != len(parser.tokens) {
630 // If queue is non-empty, check if any potential simple key may
631 // occupy the head position.
632 head_tok_idx, ok := parser.simple_keys_by_tok[parser.tokens_parsed]
633 if !ok {
634 break
635 } else if valid, ok := yaml_simple_key_is_valid(parser, &parser.simple_keys[head_tok_idx]); !ok {
Zack Williamse940c7a2019-08-21 14:25:39 -0700636 return false
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000637 } else if !valid {
638 break
Zack Williamse940c7a2019-08-21 14:25:39 -0700639 }
Zack Williamse940c7a2019-08-21 14:25:39 -0700640 }
641 // Fetch the next token.
642 if !yaml_parser_fetch_next_token(parser) {
643 return false
644 }
645 }
646
647 parser.token_available = true
648 return true
649}
650
651// The dispatcher for token fetchers.
652func yaml_parser_fetch_next_token(parser *yaml_parser_t) bool {
653 // Ensure that the buffer is initialized.
654 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
655 return false
656 }
657
658 // Check if we just started scanning. Fetch STREAM-START then.
659 if !parser.stream_start_produced {
660 return yaml_parser_fetch_stream_start(parser)
661 }
662
663 // Eat whitespaces and comments until we reach the next token.
664 if !yaml_parser_scan_to_next_token(parser) {
665 return false
666 }
667
Zack Williamse940c7a2019-08-21 14:25:39 -0700668 // Check the indentation level against the current column.
669 if !yaml_parser_unroll_indent(parser, parser.mark.column) {
670 return false
671 }
672
673 // Ensure that the buffer contains at least 4 characters. 4 is the length
674 // of the longest indicators ('--- ' and '... ').
675 if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) {
676 return false
677 }
678
679 // Is it the end of the stream?
680 if is_z(parser.buffer, parser.buffer_pos) {
681 return yaml_parser_fetch_stream_end(parser)
682 }
683
684 // Is it a directive?
685 if parser.mark.column == 0 && parser.buffer[parser.buffer_pos] == '%' {
686 return yaml_parser_fetch_directive(parser)
687 }
688
689 buf := parser.buffer
690 pos := parser.buffer_pos
691
692 // Is it the document start indicator?
693 if parser.mark.column == 0 && buf[pos] == '-' && buf[pos+1] == '-' && buf[pos+2] == '-' && is_blankz(buf, pos+3) {
694 return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_START_TOKEN)
695 }
696
697 // Is it the document end indicator?
698 if parser.mark.column == 0 && buf[pos] == '.' && buf[pos+1] == '.' && buf[pos+2] == '.' && is_blankz(buf, pos+3) {
699 return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_END_TOKEN)
700 }
701
702 // Is it the flow sequence start indicator?
703 if buf[pos] == '[' {
704 return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_SEQUENCE_START_TOKEN)
705 }
706
707 // Is it the flow mapping start indicator?
708 if parser.buffer[parser.buffer_pos] == '{' {
709 return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_MAPPING_START_TOKEN)
710 }
711
712 // Is it the flow sequence end indicator?
713 if parser.buffer[parser.buffer_pos] == ']' {
714 return yaml_parser_fetch_flow_collection_end(parser,
715 yaml_FLOW_SEQUENCE_END_TOKEN)
716 }
717
718 // Is it the flow mapping end indicator?
719 if parser.buffer[parser.buffer_pos] == '}' {
720 return yaml_parser_fetch_flow_collection_end(parser,
721 yaml_FLOW_MAPPING_END_TOKEN)
722 }
723
724 // Is it the flow entry indicator?
725 if parser.buffer[parser.buffer_pos] == ',' {
726 return yaml_parser_fetch_flow_entry(parser)
727 }
728
729 // Is it the block entry indicator?
730 if parser.buffer[parser.buffer_pos] == '-' && is_blankz(parser.buffer, parser.buffer_pos+1) {
731 return yaml_parser_fetch_block_entry(parser)
732 }
733
734 // Is it the key indicator?
735 if parser.buffer[parser.buffer_pos] == '?' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) {
736 return yaml_parser_fetch_key(parser)
737 }
738
739 // Is it the value indicator?
740 if parser.buffer[parser.buffer_pos] == ':' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) {
741 return yaml_parser_fetch_value(parser)
742 }
743
744 // Is it an alias?
745 if parser.buffer[parser.buffer_pos] == '*' {
746 return yaml_parser_fetch_anchor(parser, yaml_ALIAS_TOKEN)
747 }
748
749 // Is it an anchor?
750 if parser.buffer[parser.buffer_pos] == '&' {
751 return yaml_parser_fetch_anchor(parser, yaml_ANCHOR_TOKEN)
752 }
753
754 // Is it a tag?
755 if parser.buffer[parser.buffer_pos] == '!' {
756 return yaml_parser_fetch_tag(parser)
757 }
758
759 // Is it a literal scalar?
760 if parser.buffer[parser.buffer_pos] == '|' && parser.flow_level == 0 {
761 return yaml_parser_fetch_block_scalar(parser, true)
762 }
763
764 // Is it a folded scalar?
765 if parser.buffer[parser.buffer_pos] == '>' && parser.flow_level == 0 {
766 return yaml_parser_fetch_block_scalar(parser, false)
767 }
768
769 // Is it a single-quoted scalar?
770 if parser.buffer[parser.buffer_pos] == '\'' {
771 return yaml_parser_fetch_flow_scalar(parser, true)
772 }
773
774 // Is it a double-quoted scalar?
775 if parser.buffer[parser.buffer_pos] == '"' {
776 return yaml_parser_fetch_flow_scalar(parser, false)
777 }
778
779 // Is it a plain scalar?
780 //
781 // A plain scalar may start with any non-blank characters except
782 //
783 // '-', '?', ':', ',', '[', ']', '{', '}',
784 // '#', '&', '*', '!', '|', '>', '\'', '\"',
785 // '%', '@', '`'.
786 //
787 // In the block context (and, for the '-' indicator, in the flow context
788 // too), it may also start with the characters
789 //
790 // '-', '?', ':'
791 //
792 // if it is followed by a non-space character.
793 //
794 // The last rule is more restrictive than the specification requires.
795 // [Go] Make this logic more reasonable.
796 //switch parser.buffer[parser.buffer_pos] {
797 //case '-', '?', ':', ',', '?', '-', ',', ':', ']', '[', '}', '{', '&', '#', '!', '*', '>', '|', '"', '\'', '@', '%', '-', '`':
798 //}
799 if !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '-' ||
800 parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':' ||
801 parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '[' ||
802 parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' ||
803 parser.buffer[parser.buffer_pos] == '}' || parser.buffer[parser.buffer_pos] == '#' ||
804 parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '*' ||
805 parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '|' ||
806 parser.buffer[parser.buffer_pos] == '>' || parser.buffer[parser.buffer_pos] == '\'' ||
807 parser.buffer[parser.buffer_pos] == '"' || parser.buffer[parser.buffer_pos] == '%' ||
808 parser.buffer[parser.buffer_pos] == '@' || parser.buffer[parser.buffer_pos] == '`') ||
809 (parser.buffer[parser.buffer_pos] == '-' && !is_blank(parser.buffer, parser.buffer_pos+1)) ||
810 (parser.flow_level == 0 &&
811 (parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':') &&
812 !is_blankz(parser.buffer, parser.buffer_pos+1)) {
813 return yaml_parser_fetch_plain_scalar(parser)
814 }
815
816 // If we don't determine the token type so far, it is an error.
817 return yaml_parser_set_scanner_error(parser,
818 "while scanning for the next token", parser.mark,
819 "found character that cannot start any token")
820}
821
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000822func yaml_simple_key_is_valid(parser *yaml_parser_t, simple_key *yaml_simple_key_t) (valid, ok bool) {
823 if !simple_key.possible {
824 return false, true
Zack Williamse940c7a2019-08-21 14:25:39 -0700825 }
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000826
827 // The 1.2 specification says:
828 //
829 // "If the ? indicator is omitted, parsing needs to see past the
830 // implicit key to recognize it as such. To limit the amount of
831 // lookahead required, the “:” indicator must appear at most 1024
832 // Unicode characters beyond the start of the key. In addition, the key
833 // is restricted to a single line."
834 //
835 if simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index {
836 // Check if the potential simple key to be removed is required.
837 if simple_key.required {
838 return false, yaml_parser_set_scanner_error(parser,
839 "while scanning a simple key", simple_key.mark,
840 "could not find expected ':'")
841 }
842 simple_key.possible = false
843 return false, true
844 }
845 return true, true
Zack Williamse940c7a2019-08-21 14:25:39 -0700846}
847
848// Check if a simple key may start at the current position and add it if
849// needed.
850func yaml_parser_save_simple_key(parser *yaml_parser_t) bool {
851 // A simple key is required at the current position if the scanner is in
852 // the block context and the current column coincides with the indentation
853 // level.
854
855 required := parser.flow_level == 0 && parser.indent == parser.mark.column
856
857 //
858 // If the current position may start a simple key, save it.
859 //
860 if parser.simple_key_allowed {
861 simple_key := yaml_simple_key_t{
862 possible: true,
863 required: required,
864 token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head),
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000865 mark: parser.mark,
Zack Williamse940c7a2019-08-21 14:25:39 -0700866 }
Zack Williamse940c7a2019-08-21 14:25:39 -0700867
868 if !yaml_parser_remove_simple_key(parser) {
869 return false
870 }
871 parser.simple_keys[len(parser.simple_keys)-1] = simple_key
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000872 parser.simple_keys_by_tok[simple_key.token_number] = len(parser.simple_keys) - 1
Zack Williamse940c7a2019-08-21 14:25:39 -0700873 }
874 return true
875}
876
877// Remove a potential simple key at the current flow level.
878func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool {
879 i := len(parser.simple_keys) - 1
880 if parser.simple_keys[i].possible {
881 // If the key is required, it is an error.
882 if parser.simple_keys[i].required {
883 return yaml_parser_set_scanner_error(parser,
884 "while scanning a simple key", parser.simple_keys[i].mark,
885 "could not find expected ':'")
886 }
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000887 // Remove the key from the stack.
888 parser.simple_keys[i].possible = false
889 delete(parser.simple_keys_by_tok, parser.simple_keys[i].token_number)
Zack Williamse940c7a2019-08-21 14:25:39 -0700890 }
Zack Williamse940c7a2019-08-21 14:25:39 -0700891 return true
892}
893
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000894// max_flow_level limits the flow_level
895const max_flow_level = 10000
896
Zack Williamse940c7a2019-08-21 14:25:39 -0700897// Increase the flow level and resize the simple key list if needed.
898func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool {
899 // Reset the simple key on the next level.
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000900 parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{
901 possible: false,
902 required: false,
903 token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head),
904 mark: parser.mark,
905 })
Zack Williamse940c7a2019-08-21 14:25:39 -0700906
907 // Increase the flow level.
908 parser.flow_level++
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000909 if parser.flow_level > max_flow_level {
910 return yaml_parser_set_scanner_error(parser,
911 "while increasing flow level", parser.simple_keys[len(parser.simple_keys)-1].mark,
912 fmt.Sprintf("exceeded max depth of %d", max_flow_level))
913 }
Zack Williamse940c7a2019-08-21 14:25:39 -0700914 return true
915}
916
917// Decrease the flow level.
918func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool {
919 if parser.flow_level > 0 {
920 parser.flow_level--
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000921 last := len(parser.simple_keys) - 1
922 delete(parser.simple_keys_by_tok, parser.simple_keys[last].token_number)
923 parser.simple_keys = parser.simple_keys[:last]
Zack Williamse940c7a2019-08-21 14:25:39 -0700924 }
925 return true
926}
927
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000928// max_indents limits the indents stack size
929const max_indents = 10000
930
Zack Williamse940c7a2019-08-21 14:25:39 -0700931// Push the current indentation level to the stack and set the new level
932// the current column is greater than the indentation level. In this case,
933// append or insert the specified token into the token queue.
934func yaml_parser_roll_indent(parser *yaml_parser_t, column, number int, typ yaml_token_type_t, mark yaml_mark_t) bool {
935 // In the flow context, do nothing.
936 if parser.flow_level > 0 {
937 return true
938 }
939
940 if parser.indent < column {
941 // Push the current indentation level to the stack and set the new
942 // indentation level.
943 parser.indents = append(parser.indents, parser.indent)
944 parser.indent = column
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000945 if len(parser.indents) > max_indents {
946 return yaml_parser_set_scanner_error(parser,
947 "while increasing indent level", parser.simple_keys[len(parser.simple_keys)-1].mark,
948 fmt.Sprintf("exceeded max depth of %d", max_indents))
949 }
Zack Williamse940c7a2019-08-21 14:25:39 -0700950
951 // Create a token and insert it into the queue.
952 token := yaml_token_t{
953 typ: typ,
954 start_mark: mark,
955 end_mark: mark,
956 }
957 if number > -1 {
958 number -= parser.tokens_parsed
959 }
960 yaml_insert_token(parser, number, &token)
961 }
962 return true
963}
964
965// Pop indentation levels from the indents stack until the current level
966// becomes less or equal to the column. For each indentation level, append
967// the BLOCK-END token.
968func yaml_parser_unroll_indent(parser *yaml_parser_t, column int) bool {
969 // In the flow context, do nothing.
970 if parser.flow_level > 0 {
971 return true
972 }
973
974 // Loop through the indentation levels in the stack.
975 for parser.indent > column {
976 // Create a token and append it to the queue.
977 token := yaml_token_t{
978 typ: yaml_BLOCK_END_TOKEN,
979 start_mark: parser.mark,
980 end_mark: parser.mark,
981 }
982 yaml_insert_token(parser, -1, &token)
983
984 // Pop the indentation level.
985 parser.indent = parser.indents[len(parser.indents)-1]
986 parser.indents = parser.indents[:len(parser.indents)-1]
987 }
988 return true
989}
990
991// Initialize the scanner and produce the STREAM-START token.
992func yaml_parser_fetch_stream_start(parser *yaml_parser_t) bool {
993
994 // Set the initial indentation.
995 parser.indent = -1
996
997 // Initialize the simple key stack.
998 parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{})
999
David K. Bainbridgebd6b2882021-08-26 13:31:02 +00001000 parser.simple_keys_by_tok = make(map[int]int)
1001
Zack Williamse940c7a2019-08-21 14:25:39 -07001002 // A simple key is allowed at the beginning of the stream.
1003 parser.simple_key_allowed = true
1004
1005 // We have started.
1006 parser.stream_start_produced = true
1007
1008 // Create the STREAM-START token and append it to the queue.
1009 token := yaml_token_t{
1010 typ: yaml_STREAM_START_TOKEN,
1011 start_mark: parser.mark,
1012 end_mark: parser.mark,
1013 encoding: parser.encoding,
1014 }
1015 yaml_insert_token(parser, -1, &token)
1016 return true
1017}
1018
1019// Produce the STREAM-END token and shut down the scanner.
1020func yaml_parser_fetch_stream_end(parser *yaml_parser_t) bool {
1021
1022 // Force new line.
1023 if parser.mark.column != 0 {
1024 parser.mark.column = 0
1025 parser.mark.line++
1026 }
1027
1028 // Reset the indentation level.
1029 if !yaml_parser_unroll_indent(parser, -1) {
1030 return false
1031 }
1032
1033 // Reset simple keys.
1034 if !yaml_parser_remove_simple_key(parser) {
1035 return false
1036 }
1037
1038 parser.simple_key_allowed = false
1039
1040 // Create the STREAM-END token and append it to the queue.
1041 token := yaml_token_t{
1042 typ: yaml_STREAM_END_TOKEN,
1043 start_mark: parser.mark,
1044 end_mark: parser.mark,
1045 }
1046 yaml_insert_token(parser, -1, &token)
1047 return true
1048}
1049
1050// Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token.
1051func yaml_parser_fetch_directive(parser *yaml_parser_t) bool {
1052 // Reset the indentation level.
1053 if !yaml_parser_unroll_indent(parser, -1) {
1054 return false
1055 }
1056
1057 // Reset simple keys.
1058 if !yaml_parser_remove_simple_key(parser) {
1059 return false
1060 }
1061
1062 parser.simple_key_allowed = false
1063
1064 // Create the YAML-DIRECTIVE or TAG-DIRECTIVE token.
1065 token := yaml_token_t{}
1066 if !yaml_parser_scan_directive(parser, &token) {
1067 return false
1068 }
1069 // Append the token to the queue.
1070 yaml_insert_token(parser, -1, &token)
1071 return true
1072}
1073
1074// Produce the DOCUMENT-START or DOCUMENT-END token.
1075func yaml_parser_fetch_document_indicator(parser *yaml_parser_t, typ yaml_token_type_t) bool {
1076 // Reset the indentation level.
1077 if !yaml_parser_unroll_indent(parser, -1) {
1078 return false
1079 }
1080
1081 // Reset simple keys.
1082 if !yaml_parser_remove_simple_key(parser) {
1083 return false
1084 }
1085
1086 parser.simple_key_allowed = false
1087
1088 // Consume the token.
1089 start_mark := parser.mark
1090
1091 skip(parser)
1092 skip(parser)
1093 skip(parser)
1094
1095 end_mark := parser.mark
1096
1097 // Create the DOCUMENT-START or DOCUMENT-END token.
1098 token := yaml_token_t{
1099 typ: typ,
1100 start_mark: start_mark,
1101 end_mark: end_mark,
1102 }
1103 // Append the token to the queue.
1104 yaml_insert_token(parser, -1, &token)
1105 return true
1106}
1107
1108// Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
1109func yaml_parser_fetch_flow_collection_start(parser *yaml_parser_t, typ yaml_token_type_t) bool {
1110 // The indicators '[' and '{' may start a simple key.
1111 if !yaml_parser_save_simple_key(parser) {
1112 return false
1113 }
1114
1115 // Increase the flow level.
1116 if !yaml_parser_increase_flow_level(parser) {
1117 return false
1118 }
1119
1120 // A simple key may follow the indicators '[' and '{'.
1121 parser.simple_key_allowed = true
1122
1123 // Consume the token.
1124 start_mark := parser.mark
1125 skip(parser)
1126 end_mark := parser.mark
1127
1128 // Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token.
1129 token := yaml_token_t{
1130 typ: typ,
1131 start_mark: start_mark,
1132 end_mark: end_mark,
1133 }
1134 // Append the token to the queue.
1135 yaml_insert_token(parser, -1, &token)
1136 return true
1137}
1138
1139// Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token.
1140func yaml_parser_fetch_flow_collection_end(parser *yaml_parser_t, typ yaml_token_type_t) bool {
1141 // Reset any potential simple key on the current flow level.
1142 if !yaml_parser_remove_simple_key(parser) {
1143 return false
1144 }
1145
1146 // Decrease the flow level.
1147 if !yaml_parser_decrease_flow_level(parser) {
1148 return false
1149 }
1150
1151 // No simple keys after the indicators ']' and '}'.
1152 parser.simple_key_allowed = false
1153
1154 // Consume the token.
1155
1156 start_mark := parser.mark
1157 skip(parser)
1158 end_mark := parser.mark
1159
1160 // Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token.
1161 token := yaml_token_t{
1162 typ: typ,
1163 start_mark: start_mark,
1164 end_mark: end_mark,
1165 }
1166 // Append the token to the queue.
1167 yaml_insert_token(parser, -1, &token)
1168 return true
1169}
1170
1171// Produce the FLOW-ENTRY token.
1172func yaml_parser_fetch_flow_entry(parser *yaml_parser_t) bool {
1173 // Reset any potential simple keys on the current flow level.
1174 if !yaml_parser_remove_simple_key(parser) {
1175 return false
1176 }
1177
1178 // Simple keys are allowed after ','.
1179 parser.simple_key_allowed = true
1180
1181 // Consume the token.
1182 start_mark := parser.mark
1183 skip(parser)
1184 end_mark := parser.mark
1185
1186 // Create the FLOW-ENTRY token and append it to the queue.
1187 token := yaml_token_t{
1188 typ: yaml_FLOW_ENTRY_TOKEN,
1189 start_mark: start_mark,
1190 end_mark: end_mark,
1191 }
1192 yaml_insert_token(parser, -1, &token)
1193 return true
1194}
1195
1196// Produce the BLOCK-ENTRY token.
1197func yaml_parser_fetch_block_entry(parser *yaml_parser_t) bool {
1198 // Check if the scanner is in the block context.
1199 if parser.flow_level == 0 {
1200 // Check if we are allowed to start a new entry.
1201 if !parser.simple_key_allowed {
1202 return yaml_parser_set_scanner_error(parser, "", parser.mark,
1203 "block sequence entries are not allowed in this context")
1204 }
1205 // Add the BLOCK-SEQUENCE-START token if needed.
1206 if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_SEQUENCE_START_TOKEN, parser.mark) {
1207 return false
1208 }
1209 } else {
1210 // It is an error for the '-' indicator to occur in the flow context,
1211 // but we let the Parser detect and report about it because the Parser
1212 // is able to point to the context.
1213 }
1214
1215 // Reset any potential simple keys on the current flow level.
1216 if !yaml_parser_remove_simple_key(parser) {
1217 return false
1218 }
1219
1220 // Simple keys are allowed after '-'.
1221 parser.simple_key_allowed = true
1222
1223 // Consume the token.
1224 start_mark := parser.mark
1225 skip(parser)
1226 end_mark := parser.mark
1227
1228 // Create the BLOCK-ENTRY token and append it to the queue.
1229 token := yaml_token_t{
1230 typ: yaml_BLOCK_ENTRY_TOKEN,
1231 start_mark: start_mark,
1232 end_mark: end_mark,
1233 }
1234 yaml_insert_token(parser, -1, &token)
1235 return true
1236}
1237
1238// Produce the KEY token.
1239func yaml_parser_fetch_key(parser *yaml_parser_t) bool {
1240
1241 // In the block context, additional checks are required.
1242 if parser.flow_level == 0 {
1243 // Check if we are allowed to start a new key (not nessesary simple).
1244 if !parser.simple_key_allowed {
1245 return yaml_parser_set_scanner_error(parser, "", parser.mark,
1246 "mapping keys are not allowed in this context")
1247 }
1248 // Add the BLOCK-MAPPING-START token if needed.
1249 if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) {
1250 return false
1251 }
1252 }
1253
1254 // Reset any potential simple keys on the current flow level.
1255 if !yaml_parser_remove_simple_key(parser) {
1256 return false
1257 }
1258
1259 // Simple keys are allowed after '?' in the block context.
1260 parser.simple_key_allowed = parser.flow_level == 0
1261
1262 // Consume the token.
1263 start_mark := parser.mark
1264 skip(parser)
1265 end_mark := parser.mark
1266
1267 // Create the KEY token and append it to the queue.
1268 token := yaml_token_t{
1269 typ: yaml_KEY_TOKEN,
1270 start_mark: start_mark,
1271 end_mark: end_mark,
1272 }
1273 yaml_insert_token(parser, -1, &token)
1274 return true
1275}
1276
1277// Produce the VALUE token.
1278func yaml_parser_fetch_value(parser *yaml_parser_t) bool {
1279
1280 simple_key := &parser.simple_keys[len(parser.simple_keys)-1]
1281
1282 // Have we found a simple key?
David K. Bainbridgebd6b2882021-08-26 13:31:02 +00001283 if valid, ok := yaml_simple_key_is_valid(parser, simple_key); !ok {
1284 return false
1285
1286 } else if valid {
1287
Zack Williamse940c7a2019-08-21 14:25:39 -07001288 // Create the KEY token and insert it into the queue.
1289 token := yaml_token_t{
1290 typ: yaml_KEY_TOKEN,
1291 start_mark: simple_key.mark,
1292 end_mark: simple_key.mark,
1293 }
1294 yaml_insert_token(parser, simple_key.token_number-parser.tokens_parsed, &token)
1295
1296 // In the block context, we may need to add the BLOCK-MAPPING-START token.
1297 if !yaml_parser_roll_indent(parser, simple_key.mark.column,
1298 simple_key.token_number,
1299 yaml_BLOCK_MAPPING_START_TOKEN, simple_key.mark) {
1300 return false
1301 }
1302
1303 // Remove the simple key.
1304 simple_key.possible = false
David K. Bainbridgebd6b2882021-08-26 13:31:02 +00001305 delete(parser.simple_keys_by_tok, simple_key.token_number)
Zack Williamse940c7a2019-08-21 14:25:39 -07001306
1307 // A simple key cannot follow another simple key.
1308 parser.simple_key_allowed = false
1309
1310 } else {
1311 // The ':' indicator follows a complex key.
1312
1313 // In the block context, extra checks are required.
1314 if parser.flow_level == 0 {
1315
1316 // Check if we are allowed to start a complex value.
1317 if !parser.simple_key_allowed {
1318 return yaml_parser_set_scanner_error(parser, "", parser.mark,
1319 "mapping values are not allowed in this context")
1320 }
1321
1322 // Add the BLOCK-MAPPING-START token if needed.
1323 if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) {
1324 return false
1325 }
1326 }
1327
1328 // Simple keys after ':' are allowed in the block context.
1329 parser.simple_key_allowed = parser.flow_level == 0
1330 }
1331
1332 // Consume the token.
1333 start_mark := parser.mark
1334 skip(parser)
1335 end_mark := parser.mark
1336
1337 // Create the VALUE token and append it to the queue.
1338 token := yaml_token_t{
1339 typ: yaml_VALUE_TOKEN,
1340 start_mark: start_mark,
1341 end_mark: end_mark,
1342 }
1343 yaml_insert_token(parser, -1, &token)
1344 return true
1345}
1346
1347// Produce the ALIAS or ANCHOR token.
1348func yaml_parser_fetch_anchor(parser *yaml_parser_t, typ yaml_token_type_t) bool {
1349 // An anchor or an alias could be a simple key.
1350 if !yaml_parser_save_simple_key(parser) {
1351 return false
1352 }
1353
1354 // A simple key cannot follow an anchor or an alias.
1355 parser.simple_key_allowed = false
1356
1357 // Create the ALIAS or ANCHOR token and append it to the queue.
1358 var token yaml_token_t
1359 if !yaml_parser_scan_anchor(parser, &token, typ) {
1360 return false
1361 }
1362 yaml_insert_token(parser, -1, &token)
1363 return true
1364}
1365
1366// Produce the TAG token.
1367func yaml_parser_fetch_tag(parser *yaml_parser_t) bool {
1368 // A tag could be a simple key.
1369 if !yaml_parser_save_simple_key(parser) {
1370 return false
1371 }
1372
1373 // A simple key cannot follow a tag.
1374 parser.simple_key_allowed = false
1375
1376 // Create the TAG token and append it to the queue.
1377 var token yaml_token_t
1378 if !yaml_parser_scan_tag(parser, &token) {
1379 return false
1380 }
1381 yaml_insert_token(parser, -1, &token)
1382 return true
1383}
1384
1385// Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens.
1386func yaml_parser_fetch_block_scalar(parser *yaml_parser_t, literal bool) bool {
1387 // Remove any potential simple keys.
1388 if !yaml_parser_remove_simple_key(parser) {
1389 return false
1390 }
1391
1392 // A simple key may follow a block scalar.
1393 parser.simple_key_allowed = true
1394
1395 // Create the SCALAR token and append it to the queue.
1396 var token yaml_token_t
1397 if !yaml_parser_scan_block_scalar(parser, &token, literal) {
1398 return false
1399 }
1400 yaml_insert_token(parser, -1, &token)
1401 return true
1402}
1403
1404// Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens.
1405func yaml_parser_fetch_flow_scalar(parser *yaml_parser_t, single bool) bool {
1406 // A plain scalar could be a simple key.
1407 if !yaml_parser_save_simple_key(parser) {
1408 return false
1409 }
1410
1411 // A simple key cannot follow a flow scalar.
1412 parser.simple_key_allowed = false
1413
1414 // Create the SCALAR token and append it to the queue.
1415 var token yaml_token_t
1416 if !yaml_parser_scan_flow_scalar(parser, &token, single) {
1417 return false
1418 }
1419 yaml_insert_token(parser, -1, &token)
1420 return true
1421}
1422
1423// Produce the SCALAR(...,plain) token.
1424func yaml_parser_fetch_plain_scalar(parser *yaml_parser_t) bool {
1425 // A plain scalar could be a simple key.
1426 if !yaml_parser_save_simple_key(parser) {
1427 return false
1428 }
1429
1430 // A simple key cannot follow a flow scalar.
1431 parser.simple_key_allowed = false
1432
1433 // Create the SCALAR token and append it to the queue.
1434 var token yaml_token_t
1435 if !yaml_parser_scan_plain_scalar(parser, &token) {
1436 return false
1437 }
1438 yaml_insert_token(parser, -1, &token)
1439 return true
1440}
1441
1442// Eat whitespaces and comments until the next token is found.
1443func yaml_parser_scan_to_next_token(parser *yaml_parser_t) bool {
1444
1445 // Until the next token is not found.
1446 for {
1447 // Allow the BOM mark to start a line.
1448 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1449 return false
1450 }
1451 if parser.mark.column == 0 && is_bom(parser.buffer, parser.buffer_pos) {
1452 skip(parser)
1453 }
1454
1455 // Eat whitespaces.
1456 // Tabs are allowed:
1457 // - in the flow context
1458 // - in the block context, but not at the beginning of the line or
1459 // after '-', '?', or ':' (complex value).
1460 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1461 return false
1462 }
1463
1464 for parser.buffer[parser.buffer_pos] == ' ' || ((parser.flow_level > 0 || !parser.simple_key_allowed) && parser.buffer[parser.buffer_pos] == '\t') {
1465 skip(parser)
1466 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1467 return false
1468 }
1469 }
1470
1471 // Eat a comment until a line break.
1472 if parser.buffer[parser.buffer_pos] == '#' {
1473 for !is_breakz(parser.buffer, parser.buffer_pos) {
1474 skip(parser)
1475 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1476 return false
1477 }
1478 }
1479 }
1480
1481 // If it is a line break, eat it.
1482 if is_break(parser.buffer, parser.buffer_pos) {
1483 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
1484 return false
1485 }
1486 skip_line(parser)
1487
1488 // In the block context, a new line may start a simple key.
1489 if parser.flow_level == 0 {
1490 parser.simple_key_allowed = true
1491 }
1492 } else {
1493 break // We have found a token.
1494 }
1495 }
1496
1497 return true
1498}
1499
1500// Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token.
1501//
1502// Scope:
1503// %YAML 1.1 # a comment \n
1504// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1505// %TAG !yaml! tag:yaml.org,2002: \n
1506// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1507//
1508func yaml_parser_scan_directive(parser *yaml_parser_t, token *yaml_token_t) bool {
1509 // Eat '%'.
1510 start_mark := parser.mark
1511 skip(parser)
1512
1513 // Scan the directive name.
1514 var name []byte
1515 if !yaml_parser_scan_directive_name(parser, start_mark, &name) {
1516 return false
1517 }
1518
1519 // Is it a YAML directive?
1520 if bytes.Equal(name, []byte("YAML")) {
1521 // Scan the VERSION directive value.
1522 var major, minor int8
1523 if !yaml_parser_scan_version_directive_value(parser, start_mark, &major, &minor) {
1524 return false
1525 }
1526 end_mark := parser.mark
1527
1528 // Create a VERSION-DIRECTIVE token.
1529 *token = yaml_token_t{
1530 typ: yaml_VERSION_DIRECTIVE_TOKEN,
1531 start_mark: start_mark,
1532 end_mark: end_mark,
1533 major: major,
1534 minor: minor,
1535 }
1536
1537 // Is it a TAG directive?
1538 } else if bytes.Equal(name, []byte("TAG")) {
1539 // Scan the TAG directive value.
1540 var handle, prefix []byte
1541 if !yaml_parser_scan_tag_directive_value(parser, start_mark, &handle, &prefix) {
1542 return false
1543 }
1544 end_mark := parser.mark
1545
1546 // Create a TAG-DIRECTIVE token.
1547 *token = yaml_token_t{
1548 typ: yaml_TAG_DIRECTIVE_TOKEN,
1549 start_mark: start_mark,
1550 end_mark: end_mark,
1551 value: handle,
1552 prefix: prefix,
1553 }
1554
1555 // Unknown directive.
1556 } else {
1557 yaml_parser_set_scanner_error(parser, "while scanning a directive",
1558 start_mark, "found unknown directive name")
1559 return false
1560 }
1561
1562 // Eat the rest of the line including any comments.
1563 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1564 return false
1565 }
1566
1567 for is_blank(parser.buffer, parser.buffer_pos) {
1568 skip(parser)
1569 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1570 return false
1571 }
1572 }
1573
1574 if parser.buffer[parser.buffer_pos] == '#' {
1575 for !is_breakz(parser.buffer, parser.buffer_pos) {
1576 skip(parser)
1577 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1578 return false
1579 }
1580 }
1581 }
1582
1583 // Check if we are at the end of the line.
1584 if !is_breakz(parser.buffer, parser.buffer_pos) {
1585 yaml_parser_set_scanner_error(parser, "while scanning a directive",
1586 start_mark, "did not find expected comment or line break")
1587 return false
1588 }
1589
1590 // Eat a line break.
1591 if is_break(parser.buffer, parser.buffer_pos) {
1592 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
1593 return false
1594 }
1595 skip_line(parser)
1596 }
1597
1598 return true
1599}
1600
1601// Scan the directive name.
1602//
1603// Scope:
1604// %YAML 1.1 # a comment \n
1605// ^^^^
1606// %TAG !yaml! tag:yaml.org,2002: \n
1607// ^^^
1608//
1609func yaml_parser_scan_directive_name(parser *yaml_parser_t, start_mark yaml_mark_t, name *[]byte) bool {
1610 // Consume the directive name.
1611 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1612 return false
1613 }
1614
1615 var s []byte
1616 for is_alpha(parser.buffer, parser.buffer_pos) {
1617 s = read(parser, s)
1618 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1619 return false
1620 }
1621 }
1622
1623 // Check if the name is empty.
1624 if len(s) == 0 {
1625 yaml_parser_set_scanner_error(parser, "while scanning a directive",
1626 start_mark, "could not find expected directive name")
1627 return false
1628 }
1629
1630 // Check for an blank character after the name.
1631 if !is_blankz(parser.buffer, parser.buffer_pos) {
1632 yaml_parser_set_scanner_error(parser, "while scanning a directive",
1633 start_mark, "found unexpected non-alphabetical character")
1634 return false
1635 }
1636 *name = s
1637 return true
1638}
1639
1640// Scan the value of VERSION-DIRECTIVE.
1641//
1642// Scope:
1643// %YAML 1.1 # a comment \n
1644// ^^^^^^
1645func yaml_parser_scan_version_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, major, minor *int8) bool {
1646 // Eat whitespaces.
1647 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1648 return false
1649 }
1650 for is_blank(parser.buffer, parser.buffer_pos) {
1651 skip(parser)
1652 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1653 return false
1654 }
1655 }
1656
1657 // Consume the major version number.
1658 if !yaml_parser_scan_version_directive_number(parser, start_mark, major) {
1659 return false
1660 }
1661
1662 // Eat '.'.
1663 if parser.buffer[parser.buffer_pos] != '.' {
1664 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
1665 start_mark, "did not find expected digit or '.' character")
1666 }
1667
1668 skip(parser)
1669
1670 // Consume the minor version number.
1671 if !yaml_parser_scan_version_directive_number(parser, start_mark, minor) {
1672 return false
1673 }
1674 return true
1675}
1676
1677const max_number_length = 2
1678
1679// Scan the version number of VERSION-DIRECTIVE.
1680//
1681// Scope:
1682// %YAML 1.1 # a comment \n
1683// ^
1684// %YAML 1.1 # a comment \n
1685// ^
1686func yaml_parser_scan_version_directive_number(parser *yaml_parser_t, start_mark yaml_mark_t, number *int8) bool {
1687
1688 // Repeat while the next character is digit.
1689 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1690 return false
1691 }
1692 var value, length int8
1693 for is_digit(parser.buffer, parser.buffer_pos) {
1694 // Check if the number is too long.
1695 length++
1696 if length > max_number_length {
1697 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
1698 start_mark, "found extremely long version number")
1699 }
1700 value = value*10 + int8(as_digit(parser.buffer, parser.buffer_pos))
1701 skip(parser)
1702 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1703 return false
1704 }
1705 }
1706
1707 // Check if the number was present.
1708 if length == 0 {
1709 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
1710 start_mark, "did not find expected version number")
1711 }
1712 *number = value
1713 return true
1714}
1715
1716// Scan the value of a TAG-DIRECTIVE token.
1717//
1718// Scope:
1719// %TAG !yaml! tag:yaml.org,2002: \n
1720// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1721//
1722func yaml_parser_scan_tag_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, handle, prefix *[]byte) bool {
1723 var handle_value, prefix_value []byte
1724
1725 // Eat whitespaces.
1726 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1727 return false
1728 }
1729
1730 for is_blank(parser.buffer, parser.buffer_pos) {
1731 skip(parser)
1732 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1733 return false
1734 }
1735 }
1736
1737 // Scan a handle.
1738 if !yaml_parser_scan_tag_handle(parser, true, start_mark, &handle_value) {
1739 return false
1740 }
1741
1742 // Expect a whitespace.
1743 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1744 return false
1745 }
1746 if !is_blank(parser.buffer, parser.buffer_pos) {
1747 yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
1748 start_mark, "did not find expected whitespace")
1749 return false
1750 }
1751
1752 // Eat whitespaces.
1753 for is_blank(parser.buffer, parser.buffer_pos) {
1754 skip(parser)
1755 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1756 return false
1757 }
1758 }
1759
1760 // Scan a prefix.
1761 if !yaml_parser_scan_tag_uri(parser, true, nil, start_mark, &prefix_value) {
1762 return false
1763 }
1764
1765 // Expect a whitespace or line break.
1766 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1767 return false
1768 }
1769 if !is_blankz(parser.buffer, parser.buffer_pos) {
1770 yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
1771 start_mark, "did not find expected whitespace or line break")
1772 return false
1773 }
1774
1775 *handle = handle_value
1776 *prefix = prefix_value
1777 return true
1778}
1779
1780func yaml_parser_scan_anchor(parser *yaml_parser_t, token *yaml_token_t, typ yaml_token_type_t) bool {
1781 var s []byte
1782
1783 // Eat the indicator character.
1784 start_mark := parser.mark
1785 skip(parser)
1786
1787 // Consume the value.
1788 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1789 return false
1790 }
1791
1792 for is_alpha(parser.buffer, parser.buffer_pos) {
1793 s = read(parser, s)
1794 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1795 return false
1796 }
1797 }
1798
1799 end_mark := parser.mark
1800
1801 /*
1802 * Check if length of the anchor is greater than 0 and it is followed by
1803 * a whitespace character or one of the indicators:
1804 *
1805 * '?', ':', ',', ']', '}', '%', '@', '`'.
1806 */
1807
1808 if len(s) == 0 ||
1809 !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '?' ||
1810 parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == ',' ||
1811 parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '}' ||
1812 parser.buffer[parser.buffer_pos] == '%' || parser.buffer[parser.buffer_pos] == '@' ||
1813 parser.buffer[parser.buffer_pos] == '`') {
1814 context := "while scanning an alias"
1815 if typ == yaml_ANCHOR_TOKEN {
1816 context = "while scanning an anchor"
1817 }
1818 yaml_parser_set_scanner_error(parser, context, start_mark,
1819 "did not find expected alphabetic or numeric character")
1820 return false
1821 }
1822
1823 // Create a token.
1824 *token = yaml_token_t{
1825 typ: typ,
1826 start_mark: start_mark,
1827 end_mark: end_mark,
1828 value: s,
1829 }
1830
1831 return true
1832}
1833
1834/*
1835 * Scan a TAG token.
1836 */
1837
1838func yaml_parser_scan_tag(parser *yaml_parser_t, token *yaml_token_t) bool {
1839 var handle, suffix []byte
1840
1841 start_mark := parser.mark
1842
1843 // Check if the tag is in the canonical form.
1844 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
1845 return false
1846 }
1847
1848 if parser.buffer[parser.buffer_pos+1] == '<' {
1849 // Keep the handle as ''
1850
1851 // Eat '!<'
1852 skip(parser)
1853 skip(parser)
1854
1855 // Consume the tag value.
1856 if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) {
1857 return false
1858 }
1859
1860 // Check for '>' and eat it.
1861 if parser.buffer[parser.buffer_pos] != '>' {
1862 yaml_parser_set_scanner_error(parser, "while scanning a tag",
1863 start_mark, "did not find the expected '>'")
1864 return false
1865 }
1866
1867 skip(parser)
1868 } else {
1869 // The tag has either the '!suffix' or the '!handle!suffix' form.
1870
1871 // First, try to scan a handle.
1872 if !yaml_parser_scan_tag_handle(parser, false, start_mark, &handle) {
1873 return false
1874 }
1875
1876 // Check if it is, indeed, handle.
1877 if handle[0] == '!' && len(handle) > 1 && handle[len(handle)-1] == '!' {
1878 // Scan the suffix now.
1879 if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) {
1880 return false
1881 }
1882 } else {
1883 // It wasn't a handle after all. Scan the rest of the tag.
1884 if !yaml_parser_scan_tag_uri(parser, false, handle, start_mark, &suffix) {
1885 return false
1886 }
1887
1888 // Set the handle to '!'.
1889 handle = []byte{'!'}
1890
1891 // A special case: the '!' tag. Set the handle to '' and the
1892 // suffix to '!'.
1893 if len(suffix) == 0 {
1894 handle, suffix = suffix, handle
1895 }
1896 }
1897 }
1898
1899 // Check the character which ends the tag.
1900 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1901 return false
1902 }
1903 if !is_blankz(parser.buffer, parser.buffer_pos) {
1904 yaml_parser_set_scanner_error(parser, "while scanning a tag",
1905 start_mark, "did not find expected whitespace or line break")
1906 return false
1907 }
1908
1909 end_mark := parser.mark
1910
1911 // Create a token.
1912 *token = yaml_token_t{
1913 typ: yaml_TAG_TOKEN,
1914 start_mark: start_mark,
1915 end_mark: end_mark,
1916 value: handle,
1917 suffix: suffix,
1918 }
1919 return true
1920}
1921
1922// Scan a tag handle.
1923func yaml_parser_scan_tag_handle(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, handle *[]byte) bool {
1924 // Check the initial '!' character.
1925 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1926 return false
1927 }
1928 if parser.buffer[parser.buffer_pos] != '!' {
1929 yaml_parser_set_scanner_tag_error(parser, directive,
1930 start_mark, "did not find expected '!'")
1931 return false
1932 }
1933
1934 var s []byte
1935
1936 // Copy the '!' character.
1937 s = read(parser, s)
1938
1939 // Copy all subsequent alphabetical and numerical characters.
1940 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1941 return false
1942 }
1943 for is_alpha(parser.buffer, parser.buffer_pos) {
1944 s = read(parser, s)
1945 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1946 return false
1947 }
1948 }
1949
1950 // Check if the trailing character is '!' and copy it.
1951 if parser.buffer[parser.buffer_pos] == '!' {
1952 s = read(parser, s)
1953 } else {
1954 // It's either the '!' tag or not really a tag handle. If it's a %TAG
1955 // directive, it's an error. If it's a tag token, it must be a part of URI.
1956 if directive && string(s) != "!" {
1957 yaml_parser_set_scanner_tag_error(parser, directive,
1958 start_mark, "did not find expected '!'")
1959 return false
1960 }
1961 }
1962
1963 *handle = s
1964 return true
1965}
1966
1967// Scan a tag.
1968func yaml_parser_scan_tag_uri(parser *yaml_parser_t, directive bool, head []byte, start_mark yaml_mark_t, uri *[]byte) bool {
1969 //size_t length = head ? strlen((char *)head) : 0
1970 var s []byte
1971 hasTag := len(head) > 0
1972
1973 // Copy the head if needed.
1974 //
1975 // Note that we don't copy the leading '!' character.
1976 if len(head) > 1 {
1977 s = append(s, head[1:]...)
1978 }
1979
1980 // Scan the tag.
1981 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1982 return false
1983 }
1984
1985 // The set of characters that may appear in URI is as follows:
1986 //
1987 // '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
1988 // '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',
1989 // '%'.
1990 // [Go] Convert this into more reasonable logic.
1991 for is_alpha(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == ';' ||
1992 parser.buffer[parser.buffer_pos] == '/' || parser.buffer[parser.buffer_pos] == '?' ||
1993 parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == '@' ||
1994 parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '=' ||
1995 parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '$' ||
1996 parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '.' ||
1997 parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '~' ||
1998 parser.buffer[parser.buffer_pos] == '*' || parser.buffer[parser.buffer_pos] == '\'' ||
1999 parser.buffer[parser.buffer_pos] == '(' || parser.buffer[parser.buffer_pos] == ')' ||
2000 parser.buffer[parser.buffer_pos] == '[' || parser.buffer[parser.buffer_pos] == ']' ||
2001 parser.buffer[parser.buffer_pos] == '%' {
2002 // Check if it is a URI-escape sequence.
2003 if parser.buffer[parser.buffer_pos] == '%' {
2004 if !yaml_parser_scan_uri_escapes(parser, directive, start_mark, &s) {
2005 return false
2006 }
2007 } else {
2008 s = read(parser, s)
2009 }
2010 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2011 return false
2012 }
2013 hasTag = true
2014 }
2015
2016 if !hasTag {
2017 yaml_parser_set_scanner_tag_error(parser, directive,
2018 start_mark, "did not find expected tag URI")
2019 return false
2020 }
2021 *uri = s
2022 return true
2023}
2024
2025// Decode an URI-escape sequence corresponding to a single UTF-8 character.
2026func yaml_parser_scan_uri_escapes(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, s *[]byte) bool {
2027
2028 // Decode the required number of characters.
2029 w := 1024
2030 for w > 0 {
2031 // Check for a URI-escaped octet.
2032 if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) {
2033 return false
2034 }
2035
2036 if !(parser.buffer[parser.buffer_pos] == '%' &&
2037 is_hex(parser.buffer, parser.buffer_pos+1) &&
2038 is_hex(parser.buffer, parser.buffer_pos+2)) {
2039 return yaml_parser_set_scanner_tag_error(parser, directive,
2040 start_mark, "did not find URI escaped octet")
2041 }
2042
2043 // Get the octet.
2044 octet := byte((as_hex(parser.buffer, parser.buffer_pos+1) << 4) + as_hex(parser.buffer, parser.buffer_pos+2))
2045
2046 // If it is the leading octet, determine the length of the UTF-8 sequence.
2047 if w == 1024 {
2048 w = width(octet)
2049 if w == 0 {
2050 return yaml_parser_set_scanner_tag_error(parser, directive,
2051 start_mark, "found an incorrect leading UTF-8 octet")
2052 }
2053 } else {
2054 // Check if the trailing octet is correct.
2055 if octet&0xC0 != 0x80 {
2056 return yaml_parser_set_scanner_tag_error(parser, directive,
2057 start_mark, "found an incorrect trailing UTF-8 octet")
2058 }
2059 }
2060
2061 // Copy the octet and move the pointers.
2062 *s = append(*s, octet)
2063 skip(parser)
2064 skip(parser)
2065 skip(parser)
2066 w--
2067 }
2068 return true
2069}
2070
2071// Scan a block scalar.
2072func yaml_parser_scan_block_scalar(parser *yaml_parser_t, token *yaml_token_t, literal bool) bool {
2073 // Eat the indicator '|' or '>'.
2074 start_mark := parser.mark
2075 skip(parser)
2076
2077 // Scan the additional block scalar indicators.
2078 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2079 return false
2080 }
2081
2082 // Check for a chomping indicator.
2083 var chomping, increment int
2084 if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' {
2085 // Set the chomping method and eat the indicator.
2086 if parser.buffer[parser.buffer_pos] == '+' {
2087 chomping = +1
2088 } else {
2089 chomping = -1
2090 }
2091 skip(parser)
2092
2093 // Check for an indentation indicator.
2094 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2095 return false
2096 }
2097 if is_digit(parser.buffer, parser.buffer_pos) {
2098 // Check that the indentation is greater than 0.
2099 if parser.buffer[parser.buffer_pos] == '0' {
2100 yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2101 start_mark, "found an indentation indicator equal to 0")
2102 return false
2103 }
2104
2105 // Get the indentation level and eat the indicator.
2106 increment = as_digit(parser.buffer, parser.buffer_pos)
2107 skip(parser)
2108 }
2109
2110 } else if is_digit(parser.buffer, parser.buffer_pos) {
2111 // Do the same as above, but in the opposite order.
2112
2113 if parser.buffer[parser.buffer_pos] == '0' {
2114 yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2115 start_mark, "found an indentation indicator equal to 0")
2116 return false
2117 }
2118 increment = as_digit(parser.buffer, parser.buffer_pos)
2119 skip(parser)
2120
2121 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2122 return false
2123 }
2124 if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' {
2125 if parser.buffer[parser.buffer_pos] == '+' {
2126 chomping = +1
2127 } else {
2128 chomping = -1
2129 }
2130 skip(parser)
2131 }
2132 }
2133
2134 // Eat whitespaces and comments to the end of the line.
2135 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2136 return false
2137 }
2138 for is_blank(parser.buffer, parser.buffer_pos) {
2139 skip(parser)
2140 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2141 return false
2142 }
2143 }
2144 if parser.buffer[parser.buffer_pos] == '#' {
2145 for !is_breakz(parser.buffer, parser.buffer_pos) {
2146 skip(parser)
2147 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2148 return false
2149 }
2150 }
2151 }
2152
2153 // Check if we are at the end of the line.
2154 if !is_breakz(parser.buffer, parser.buffer_pos) {
2155 yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2156 start_mark, "did not find expected comment or line break")
2157 return false
2158 }
2159
2160 // Eat a line break.
2161 if is_break(parser.buffer, parser.buffer_pos) {
2162 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
2163 return false
2164 }
2165 skip_line(parser)
2166 }
2167
2168 end_mark := parser.mark
2169
2170 // Set the indentation level if it was specified.
2171 var indent int
2172 if increment > 0 {
2173 if parser.indent >= 0 {
2174 indent = parser.indent + increment
2175 } else {
2176 indent = increment
2177 }
2178 }
2179
2180 // Scan the leading line breaks and determine the indentation level if needed.
2181 var s, leading_break, trailing_breaks []byte
2182 if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) {
2183 return false
2184 }
2185
2186 // Scan the block scalar content.
2187 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2188 return false
2189 }
2190 var leading_blank, trailing_blank bool
2191 for parser.mark.column == indent && !is_z(parser.buffer, parser.buffer_pos) {
2192 // We are at the beginning of a non-empty line.
2193
2194 // Is it a trailing whitespace?
2195 trailing_blank = is_blank(parser.buffer, parser.buffer_pos)
2196
2197 // Check if we need to fold the leading line break.
2198 if !literal && !leading_blank && !trailing_blank && len(leading_break) > 0 && leading_break[0] == '\n' {
2199 // Do we need to join the lines by space?
2200 if len(trailing_breaks) == 0 {
2201 s = append(s, ' ')
2202 }
2203 } else {
2204 s = append(s, leading_break...)
2205 }
2206 leading_break = leading_break[:0]
2207
2208 // Append the remaining line breaks.
2209 s = append(s, trailing_breaks...)
2210 trailing_breaks = trailing_breaks[:0]
2211
2212 // Is it a leading whitespace?
2213 leading_blank = is_blank(parser.buffer, parser.buffer_pos)
2214
2215 // Consume the current line.
2216 for !is_breakz(parser.buffer, parser.buffer_pos) {
2217 s = read(parser, s)
2218 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2219 return false
2220 }
2221 }
2222
2223 // Consume the line break.
2224 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
2225 return false
2226 }
2227
2228 leading_break = read_line(parser, leading_break)
2229
2230 // Eat the following indentation spaces and line breaks.
2231 if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) {
2232 return false
2233 }
2234 }
2235
2236 // Chomp the tail.
2237 if chomping != -1 {
2238 s = append(s, leading_break...)
2239 }
2240 if chomping == 1 {
2241 s = append(s, trailing_breaks...)
2242 }
2243
2244 // Create a token.
2245 *token = yaml_token_t{
2246 typ: yaml_SCALAR_TOKEN,
2247 start_mark: start_mark,
2248 end_mark: end_mark,
2249 value: s,
2250 style: yaml_LITERAL_SCALAR_STYLE,
2251 }
2252 if !literal {
2253 token.style = yaml_FOLDED_SCALAR_STYLE
2254 }
2255 return true
2256}
2257
2258// Scan indentation spaces and line breaks for a block scalar. Determine the
2259// indentation level if needed.
2260func yaml_parser_scan_block_scalar_breaks(parser *yaml_parser_t, indent *int, breaks *[]byte, start_mark yaml_mark_t, end_mark *yaml_mark_t) bool {
2261 *end_mark = parser.mark
2262
2263 // Eat the indentation spaces and line breaks.
2264 max_indent := 0
2265 for {
2266 // Eat the indentation spaces.
2267 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2268 return false
2269 }
2270 for (*indent == 0 || parser.mark.column < *indent) && is_space(parser.buffer, parser.buffer_pos) {
2271 skip(parser)
2272 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2273 return false
2274 }
2275 }
2276 if parser.mark.column > max_indent {
2277 max_indent = parser.mark.column
2278 }
2279
2280 // Check for a tab character messing the indentation.
2281 if (*indent == 0 || parser.mark.column < *indent) && is_tab(parser.buffer, parser.buffer_pos) {
2282 return yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2283 start_mark, "found a tab character where an indentation space is expected")
2284 }
2285
2286 // Have we found a non-empty line?
2287 if !is_break(parser.buffer, parser.buffer_pos) {
2288 break
2289 }
2290
2291 // Consume the line break.
2292 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
2293 return false
2294 }
2295 // [Go] Should really be returning breaks instead.
2296 *breaks = read_line(parser, *breaks)
2297 *end_mark = parser.mark
2298 }
2299
2300 // Determine the indentation level if needed.
2301 if *indent == 0 {
2302 *indent = max_indent
2303 if *indent < parser.indent+1 {
2304 *indent = parser.indent + 1
2305 }
2306 if *indent < 1 {
2307 *indent = 1
2308 }
2309 }
2310 return true
2311}
2312
2313// Scan a quoted scalar.
2314func yaml_parser_scan_flow_scalar(parser *yaml_parser_t, token *yaml_token_t, single bool) bool {
2315 // Eat the left quote.
2316 start_mark := parser.mark
2317 skip(parser)
2318
2319 // Consume the content of the quoted scalar.
2320 var s, leading_break, trailing_breaks, whitespaces []byte
2321 for {
2322 // Check that there are no document indicators at the beginning of the line.
2323 if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) {
2324 return false
2325 }
2326
2327 if parser.mark.column == 0 &&
2328 ((parser.buffer[parser.buffer_pos+0] == '-' &&
2329 parser.buffer[parser.buffer_pos+1] == '-' &&
2330 parser.buffer[parser.buffer_pos+2] == '-') ||
2331 (parser.buffer[parser.buffer_pos+0] == '.' &&
2332 parser.buffer[parser.buffer_pos+1] == '.' &&
2333 parser.buffer[parser.buffer_pos+2] == '.')) &&
2334 is_blankz(parser.buffer, parser.buffer_pos+3) {
2335 yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
2336 start_mark, "found unexpected document indicator")
2337 return false
2338 }
2339
2340 // Check for EOF.
2341 if is_z(parser.buffer, parser.buffer_pos) {
2342 yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
2343 start_mark, "found unexpected end of stream")
2344 return false
2345 }
2346
2347 // Consume non-blank characters.
2348 leading_blanks := false
2349 for !is_blankz(parser.buffer, parser.buffer_pos) {
2350 if single && parser.buffer[parser.buffer_pos] == '\'' && parser.buffer[parser.buffer_pos+1] == '\'' {
2351 // Is is an escaped single quote.
2352 s = append(s, '\'')
2353 skip(parser)
2354 skip(parser)
2355
2356 } else if single && parser.buffer[parser.buffer_pos] == '\'' {
2357 // It is a right single quote.
2358 break
2359 } else if !single && parser.buffer[parser.buffer_pos] == '"' {
2360 // It is a right double quote.
2361 break
2362
2363 } else if !single && parser.buffer[parser.buffer_pos] == '\\' && is_break(parser.buffer, parser.buffer_pos+1) {
2364 // It is an escaped line break.
2365 if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) {
2366 return false
2367 }
2368 skip(parser)
2369 skip_line(parser)
2370 leading_blanks = true
2371 break
2372
2373 } else if !single && parser.buffer[parser.buffer_pos] == '\\' {
2374 // It is an escape sequence.
2375 code_length := 0
2376
2377 // Check the escape character.
2378 switch parser.buffer[parser.buffer_pos+1] {
2379 case '0':
2380 s = append(s, 0)
2381 case 'a':
2382 s = append(s, '\x07')
2383 case 'b':
2384 s = append(s, '\x08')
2385 case 't', '\t':
2386 s = append(s, '\x09')
2387 case 'n':
2388 s = append(s, '\x0A')
2389 case 'v':
2390 s = append(s, '\x0B')
2391 case 'f':
2392 s = append(s, '\x0C')
2393 case 'r':
2394 s = append(s, '\x0D')
2395 case 'e':
2396 s = append(s, '\x1B')
2397 case ' ':
2398 s = append(s, '\x20')
2399 case '"':
2400 s = append(s, '"')
2401 case '\'':
2402 s = append(s, '\'')
2403 case '\\':
2404 s = append(s, '\\')
2405 case 'N': // NEL (#x85)
2406 s = append(s, '\xC2')
2407 s = append(s, '\x85')
2408 case '_': // #xA0
2409 s = append(s, '\xC2')
2410 s = append(s, '\xA0')
2411 case 'L': // LS (#x2028)
2412 s = append(s, '\xE2')
2413 s = append(s, '\x80')
2414 s = append(s, '\xA8')
2415 case 'P': // PS (#x2029)
2416 s = append(s, '\xE2')
2417 s = append(s, '\x80')
2418 s = append(s, '\xA9')
2419 case 'x':
2420 code_length = 2
2421 case 'u':
2422 code_length = 4
2423 case 'U':
2424 code_length = 8
2425 default:
2426 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
2427 start_mark, "found unknown escape character")
2428 return false
2429 }
2430
2431 skip(parser)
2432 skip(parser)
2433
2434 // Consume an arbitrary escape code.
2435 if code_length > 0 {
2436 var value int
2437
2438 // Scan the character value.
2439 if parser.unread < code_length && !yaml_parser_update_buffer(parser, code_length) {
2440 return false
2441 }
2442 for k := 0; k < code_length; k++ {
2443 if !is_hex(parser.buffer, parser.buffer_pos+k) {
2444 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
2445 start_mark, "did not find expected hexdecimal number")
2446 return false
2447 }
2448 value = (value << 4) + as_hex(parser.buffer, parser.buffer_pos+k)
2449 }
2450
2451 // Check the value and write the character.
2452 if (value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF {
2453 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
2454 start_mark, "found invalid Unicode character escape code")
2455 return false
2456 }
2457 if value <= 0x7F {
2458 s = append(s, byte(value))
2459 } else if value <= 0x7FF {
2460 s = append(s, byte(0xC0+(value>>6)))
2461 s = append(s, byte(0x80+(value&0x3F)))
2462 } else if value <= 0xFFFF {
2463 s = append(s, byte(0xE0+(value>>12)))
2464 s = append(s, byte(0x80+((value>>6)&0x3F)))
2465 s = append(s, byte(0x80+(value&0x3F)))
2466 } else {
2467 s = append(s, byte(0xF0+(value>>18)))
2468 s = append(s, byte(0x80+((value>>12)&0x3F)))
2469 s = append(s, byte(0x80+((value>>6)&0x3F)))
2470 s = append(s, byte(0x80+(value&0x3F)))
2471 }
2472
2473 // Advance the pointer.
2474 for k := 0; k < code_length; k++ {
2475 skip(parser)
2476 }
2477 }
2478 } else {
2479 // It is a non-escaped non-blank character.
2480 s = read(parser, s)
2481 }
2482 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
2483 return false
2484 }
2485 }
2486
2487 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2488 return false
2489 }
2490
2491 // Check if we are at the end of the scalar.
2492 if single {
2493 if parser.buffer[parser.buffer_pos] == '\'' {
2494 break
2495 }
2496 } else {
2497 if parser.buffer[parser.buffer_pos] == '"' {
2498 break
2499 }
2500 }
2501
2502 // Consume blank characters.
2503 for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) {
2504 if is_blank(parser.buffer, parser.buffer_pos) {
2505 // Consume a space or a tab character.
2506 if !leading_blanks {
2507 whitespaces = read(parser, whitespaces)
2508 } else {
2509 skip(parser)
2510 }
2511 } else {
2512 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
2513 return false
2514 }
2515
2516 // Check if it is a first line break.
2517 if !leading_blanks {
2518 whitespaces = whitespaces[:0]
2519 leading_break = read_line(parser, leading_break)
2520 leading_blanks = true
2521 } else {
2522 trailing_breaks = read_line(parser, trailing_breaks)
2523 }
2524 }
2525 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2526 return false
2527 }
2528 }
2529
2530 // Join the whitespaces or fold line breaks.
2531 if leading_blanks {
2532 // Do we need to fold line breaks?
2533 if len(leading_break) > 0 && leading_break[0] == '\n' {
2534 if len(trailing_breaks) == 0 {
2535 s = append(s, ' ')
2536 } else {
2537 s = append(s, trailing_breaks...)
2538 }
2539 } else {
2540 s = append(s, leading_break...)
2541 s = append(s, trailing_breaks...)
2542 }
2543 trailing_breaks = trailing_breaks[:0]
2544 leading_break = leading_break[:0]
2545 } else {
2546 s = append(s, whitespaces...)
2547 whitespaces = whitespaces[:0]
2548 }
2549 }
2550
2551 // Eat the right quote.
2552 skip(parser)
2553 end_mark := parser.mark
2554
2555 // Create a token.
2556 *token = yaml_token_t{
2557 typ: yaml_SCALAR_TOKEN,
2558 start_mark: start_mark,
2559 end_mark: end_mark,
2560 value: s,
2561 style: yaml_SINGLE_QUOTED_SCALAR_STYLE,
2562 }
2563 if !single {
2564 token.style = yaml_DOUBLE_QUOTED_SCALAR_STYLE
2565 }
2566 return true
2567}
2568
2569// Scan a plain scalar.
2570func yaml_parser_scan_plain_scalar(parser *yaml_parser_t, token *yaml_token_t) bool {
2571
2572 var s, leading_break, trailing_breaks, whitespaces []byte
2573 var leading_blanks bool
2574 var indent = parser.indent + 1
2575
2576 start_mark := parser.mark
2577 end_mark := parser.mark
2578
2579 // Consume the content of the plain scalar.
2580 for {
2581 // Check for a document indicator.
2582 if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) {
2583 return false
2584 }
2585 if parser.mark.column == 0 &&
2586 ((parser.buffer[parser.buffer_pos+0] == '-' &&
2587 parser.buffer[parser.buffer_pos+1] == '-' &&
2588 parser.buffer[parser.buffer_pos+2] == '-') ||
2589 (parser.buffer[parser.buffer_pos+0] == '.' &&
2590 parser.buffer[parser.buffer_pos+1] == '.' &&
2591 parser.buffer[parser.buffer_pos+2] == '.')) &&
2592 is_blankz(parser.buffer, parser.buffer_pos+3) {
2593 break
2594 }
2595
2596 // Check for a comment.
2597 if parser.buffer[parser.buffer_pos] == '#' {
2598 break
2599 }
2600
2601 // Consume non-blank characters.
2602 for !is_blankz(parser.buffer, parser.buffer_pos) {
2603
2604 // Check for indicators that may end a plain scalar.
2605 if (parser.buffer[parser.buffer_pos] == ':' && is_blankz(parser.buffer, parser.buffer_pos+1)) ||
2606 (parser.flow_level > 0 &&
2607 (parser.buffer[parser.buffer_pos] == ',' ||
2608 parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == '[' ||
2609 parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' ||
2610 parser.buffer[parser.buffer_pos] == '}')) {
2611 break
2612 }
2613
2614 // Check if we need to join whitespaces and breaks.
2615 if leading_blanks || len(whitespaces) > 0 {
2616 if leading_blanks {
2617 // Do we need to fold line breaks?
2618 if leading_break[0] == '\n' {
2619 if len(trailing_breaks) == 0 {
2620 s = append(s, ' ')
2621 } else {
2622 s = append(s, trailing_breaks...)
2623 }
2624 } else {
2625 s = append(s, leading_break...)
2626 s = append(s, trailing_breaks...)
2627 }
2628 trailing_breaks = trailing_breaks[:0]
2629 leading_break = leading_break[:0]
2630 leading_blanks = false
2631 } else {
2632 s = append(s, whitespaces...)
2633 whitespaces = whitespaces[:0]
2634 }
2635 }
2636
2637 // Copy the character.
2638 s = read(parser, s)
2639
2640 end_mark = parser.mark
2641 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
2642 return false
2643 }
2644 }
2645
2646 // Is it the end?
2647 if !(is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos)) {
2648 break
2649 }
2650
2651 // Consume blank characters.
2652 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2653 return false
2654 }
2655
2656 for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) {
2657 if is_blank(parser.buffer, parser.buffer_pos) {
2658
2659 // Check for tab characters that abuse indentation.
2660 if leading_blanks && parser.mark.column < indent && is_tab(parser.buffer, parser.buffer_pos) {
2661 yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
2662 start_mark, "found a tab character that violates indentation")
2663 return false
2664 }
2665
2666 // Consume a space or a tab character.
2667 if !leading_blanks {
2668 whitespaces = read(parser, whitespaces)
2669 } else {
2670 skip(parser)
2671 }
2672 } else {
2673 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
2674 return false
2675 }
2676
2677 // Check if it is a first line break.
2678 if !leading_blanks {
2679 whitespaces = whitespaces[:0]
2680 leading_break = read_line(parser, leading_break)
2681 leading_blanks = true
2682 } else {
2683 trailing_breaks = read_line(parser, trailing_breaks)
2684 }
2685 }
2686 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2687 return false
2688 }
2689 }
2690
2691 // Check indentation level.
2692 if parser.flow_level == 0 && parser.mark.column < indent {
2693 break
2694 }
2695 }
2696
2697 // Create a token.
2698 *token = yaml_token_t{
2699 typ: yaml_SCALAR_TOKEN,
2700 start_mark: start_mark,
2701 end_mark: end_mark,
2702 value: s,
2703 style: yaml_PLAIN_SCALAR_STYLE,
2704 }
2705
2706 // Note that we change the 'simple_key_allowed' flag.
2707 if leading_blanks {
2708 parser.simple_key_allowed = true
2709 }
2710 return true
2711}