Mercurial > yakumo_izuru > aya
comparison vendor/gopkg.in/yaml.v2/scannerc.go @ 66:787b5ee0289d draft
Use vendored modules
Signed-off-by: Izuru Yakumo <yakumo.izuru@chaotic.ninja>
author | yakumo.izuru |
---|---|
date | Sun, 23 Jul 2023 13:18:53 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
65:6d985efa0f7a | 66:787b5ee0289d |
---|---|
1 package yaml | |
2 | |
3 import ( | |
4 "bytes" | |
5 "fmt" | |
6 ) | |
7 | |
8 // Introduction | |
9 // ************ | |
10 // | |
11 // The following notes assume that you are familiar with the YAML specification | |
12 // (http://yaml.org/spec/1.2/spec.html). We mostly follow it, although in | |
13 // some cases we are less restrictive that it requires. | |
14 // | |
15 // The process of transforming a YAML stream into a sequence of events is | |
16 // divided on two steps: Scanning and Parsing. | |
17 // | |
18 // The Scanner transforms the input stream into a sequence of tokens, while the | |
19 // parser transform the sequence of tokens produced by the Scanner into a | |
20 // sequence of parsing events. | |
21 // | |
22 // The Scanner is rather clever and complicated. The Parser, on the contrary, | |
23 // is a straightforward implementation of a recursive-descendant parser (or, | |
24 // LL(1) parser, as it is usually called). | |
25 // | |
26 // Actually there are two issues of Scanning that might be called "clever", the | |
27 // rest is quite straightforward. The issues are "block collection start" and | |
28 // "simple keys". Both issues are explained below in details. | |
29 // | |
30 // Here the Scanning step is explained and implemented. We start with the list | |
31 // of all the tokens produced by the Scanner together with short descriptions. | |
32 // | |
33 // Now, tokens: | |
34 // | |
35 // STREAM-START(encoding) # The stream start. | |
36 // STREAM-END # The stream end. | |
37 // VERSION-DIRECTIVE(major,minor) # The '%YAML' directive. | |
38 // TAG-DIRECTIVE(handle,prefix) # The '%TAG' directive. | |
39 // DOCUMENT-START # '---' | |
40 // DOCUMENT-END # '...' | |
41 // BLOCK-SEQUENCE-START # Indentation increase denoting a block | |
42 // BLOCK-MAPPING-START # sequence or a block mapping. | |
43 // BLOCK-END # Indentation decrease. | |
44 // FLOW-SEQUENCE-START # '[' | |
45 // FLOW-SEQUENCE-END # ']' | |
46 // BLOCK-SEQUENCE-START # '{' | |
47 // BLOCK-SEQUENCE-END # '}' | |
48 // BLOCK-ENTRY # '-' | |
49 // FLOW-ENTRY # ',' | |
50 // KEY # '?' or nothing (simple keys). | |
51 // VALUE # ':' | |
52 // ALIAS(anchor) # '*anchor' | |
53 // ANCHOR(anchor) # '&anchor' | |
54 // TAG(handle,suffix) # '!handle!suffix' | |
55 // SCALAR(value,style) # A scalar. | |
56 // | |
57 // The following two tokens are "virtual" tokens denoting the beginning and the | |
58 // end of the stream: | |
59 // | |
60 // STREAM-START(encoding) | |
61 // STREAM-END | |
62 // | |
63 // We pass the information about the input stream encoding with the | |
64 // STREAM-START token. | |
65 // | |
66 // The next two tokens are responsible for tags: | |
67 // | |
68 // VERSION-DIRECTIVE(major,minor) | |
69 // TAG-DIRECTIVE(handle,prefix) | |
70 // | |
71 // Example: | |
72 // | |
73 // %YAML 1.1 | |
74 // %TAG ! !foo | |
75 // %TAG !yaml! tag:yaml.org,2002: | |
76 // --- | |
77 // | |
78 // The correspoding sequence of tokens: | |
79 // | |
80 // STREAM-START(utf-8) | |
81 // VERSION-DIRECTIVE(1,1) | |
82 // TAG-DIRECTIVE("!","!foo") | |
83 // TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:") | |
84 // DOCUMENT-START | |
85 // STREAM-END | |
86 // | |
87 // Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole | |
88 // line. | |
89 // | |
90 // The document start and end indicators are represented by: | |
91 // | |
92 // DOCUMENT-START | |
93 // DOCUMENT-END | |
94 // | |
95 // Note that if a YAML stream contains an implicit document (without '---' | |
96 // and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be | |
97 // produced. | |
98 // | |
99 // In the following examples, we present whole documents together with the | |
100 // produced tokens. | |
101 // | |
102 // 1. An implicit document: | |
103 // | |
104 // 'a scalar' | |
105 // | |
106 // Tokens: | |
107 // | |
108 // STREAM-START(utf-8) | |
109 // SCALAR("a scalar",single-quoted) | |
110 // STREAM-END | |
111 // | |
112 // 2. An explicit document: | |
113 // | |
114 // --- | |
115 // 'a scalar' | |
116 // ... | |
117 // | |
118 // Tokens: | |
119 // | |
120 // STREAM-START(utf-8) | |
121 // DOCUMENT-START | |
122 // SCALAR("a scalar",single-quoted) | |
123 // DOCUMENT-END | |
124 // STREAM-END | |
125 // | |
126 // 3. Several documents in a stream: | |
127 // | |
128 // 'a scalar' | |
129 // --- | |
130 // 'another scalar' | |
131 // --- | |
132 // 'yet another scalar' | |
133 // | |
134 // Tokens: | |
135 // | |
136 // STREAM-START(utf-8) | |
137 // SCALAR("a scalar",single-quoted) | |
138 // DOCUMENT-START | |
139 // SCALAR("another scalar",single-quoted) | |
140 // DOCUMENT-START | |
141 // SCALAR("yet another scalar",single-quoted) | |
142 // STREAM-END | |
143 // | |
144 // We have already introduced the SCALAR token above. The following tokens are | |
145 // used to describe aliases, anchors, tag, and scalars: | |
146 // | |
147 // ALIAS(anchor) | |
148 // ANCHOR(anchor) | |
149 // TAG(handle,suffix) | |
150 // SCALAR(value,style) | |
151 // | |
152 // The following series of examples illustrate the usage of these tokens: | |
153 // | |
154 // 1. A recursive sequence: | |
155 // | |
156 // &A [ *A ] | |
157 // | |
158 // Tokens: | |
159 // | |
160 // STREAM-START(utf-8) | |
161 // ANCHOR("A") | |
162 // FLOW-SEQUENCE-START | |
163 // ALIAS("A") | |
164 // FLOW-SEQUENCE-END | |
165 // STREAM-END | |
166 // | |
167 // 2. A tagged scalar: | |
168 // | |
169 // !!float "3.14" # A good approximation. | |
170 // | |
171 // Tokens: | |
172 // | |
173 // STREAM-START(utf-8) | |
174 // TAG("!!","float") | |
175 // SCALAR("3.14",double-quoted) | |
176 // STREAM-END | |
177 // | |
178 // 3. Various scalar styles: | |
179 // | |
180 // --- # Implicit empty plain scalars do not produce tokens. | |
181 // --- a plain scalar | |
182 // --- 'a single-quoted scalar' | |
183 // --- "a double-quoted scalar" | |
184 // --- |- | |
185 // a literal scalar | |
186 // --- >- | |
187 // a folded | |
188 // scalar | |
189 // | |
190 // Tokens: | |
191 // | |
192 // STREAM-START(utf-8) | |
193 // DOCUMENT-START | |
194 // DOCUMENT-START | |
195 // SCALAR("a plain scalar",plain) | |
196 // DOCUMENT-START | |
197 // SCALAR("a single-quoted scalar",single-quoted) | |
198 // DOCUMENT-START | |
199 // SCALAR("a double-quoted scalar",double-quoted) | |
200 // DOCUMENT-START | |
201 // SCALAR("a literal scalar",literal) | |
202 // DOCUMENT-START | |
203 // SCALAR("a folded scalar",folded) | |
204 // STREAM-END | |
205 // | |
206 // Now it's time to review collection-related tokens. We will start with | |
207 // flow collections: | |
208 // | |
209 // FLOW-SEQUENCE-START | |
210 // FLOW-SEQUENCE-END | |
211 // FLOW-MAPPING-START | |
212 // FLOW-MAPPING-END | |
213 // FLOW-ENTRY | |
214 // KEY | |
215 // VALUE | |
216 // | |
217 // The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and | |
218 // FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}' | |
219 // correspondingly. FLOW-ENTRY represent the ',' indicator. Finally the | |
220 // indicators '?' and ':', which are used for denoting mapping keys and values, | |
221 // are represented by the KEY and VALUE tokens. | |
222 // | |
223 // The following examples show flow collections: | |
224 // | |
225 // 1. A flow sequence: | |
226 // | |
227 // [item 1, item 2, item 3] | |
228 // | |
229 // Tokens: | |
230 // | |
231 // STREAM-START(utf-8) | |
232 // FLOW-SEQUENCE-START | |
233 // SCALAR("item 1",plain) | |
234 // FLOW-ENTRY | |
235 // SCALAR("item 2",plain) | |
236 // FLOW-ENTRY | |
237 // SCALAR("item 3",plain) | |
238 // FLOW-SEQUENCE-END | |
239 // STREAM-END | |
240 // | |
241 // 2. A flow mapping: | |
242 // | |
243 // { | |
244 // a simple key: a value, # Note that the KEY token is produced. | |
245 // ? a complex key: another value, | |
246 // } | |
247 // | |
248 // Tokens: | |
249 // | |
250 // STREAM-START(utf-8) | |
251 // FLOW-MAPPING-START | |
252 // KEY | |
253 // SCALAR("a simple key",plain) | |
254 // VALUE | |
255 // SCALAR("a value",plain) | |
256 // FLOW-ENTRY | |
257 // KEY | |
258 // SCALAR("a complex key",plain) | |
259 // VALUE | |
260 // SCALAR("another value",plain) | |
261 // FLOW-ENTRY | |
262 // FLOW-MAPPING-END | |
263 // STREAM-END | |
264 // | |
265 // A simple key is a key which is not denoted by the '?' indicator. Note that | |
266 // the Scanner still produce the KEY token whenever it encounters a simple key. | |
267 // | |
268 // For scanning block collections, the following tokens are used (note that we | |
269 // repeat KEY and VALUE here): | |
270 // | |
271 // BLOCK-SEQUENCE-START | |
272 // BLOCK-MAPPING-START | |
273 // BLOCK-END | |
274 // BLOCK-ENTRY | |
275 // KEY | |
276 // VALUE | |
277 // | |
278 // The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation | |
279 // increase that precedes a block collection (cf. the INDENT token in Python). | |
280 // The token BLOCK-END denote indentation decrease that ends a block collection | |
281 // (cf. the DEDENT token in Python). However YAML has some syntax pecularities | |
282 // that makes detections of these tokens more complex. | |
283 // | |
284 // The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators | |
285 // '-', '?', and ':' correspondingly. | |
286 // | |
287 // The following examples show how the tokens BLOCK-SEQUENCE-START, | |
288 // BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner: | |
289 // | |
290 // 1. Block sequences: | |
291 // | |
292 // - item 1 | |
293 // - item 2 | |
294 // - | |
295 // - item 3.1 | |
296 // - item 3.2 | |
297 // - | |
298 // key 1: value 1 | |
299 // key 2: value 2 | |
300 // | |
301 // Tokens: | |
302 // | |
303 // STREAM-START(utf-8) | |
304 // BLOCK-SEQUENCE-START | |
305 // BLOCK-ENTRY | |
306 // SCALAR("item 1",plain) | |
307 // BLOCK-ENTRY | |
308 // SCALAR("item 2",plain) | |
309 // BLOCK-ENTRY | |
310 // BLOCK-SEQUENCE-START | |
311 // BLOCK-ENTRY | |
312 // SCALAR("item 3.1",plain) | |
313 // BLOCK-ENTRY | |
314 // SCALAR("item 3.2",plain) | |
315 // BLOCK-END | |
316 // BLOCK-ENTRY | |
317 // BLOCK-MAPPING-START | |
318 // KEY | |
319 // SCALAR("key 1",plain) | |
320 // VALUE | |
321 // SCALAR("value 1",plain) | |
322 // KEY | |
323 // SCALAR("key 2",plain) | |
324 // VALUE | |
325 // SCALAR("value 2",plain) | |
326 // BLOCK-END | |
327 // BLOCK-END | |
328 // STREAM-END | |
329 // | |
330 // 2. Block mappings: | |
331 // | |
332 // a simple key: a value # The KEY token is produced here. | |
333 // ? a complex key | |
334 // : another value | |
335 // a mapping: | |
336 // key 1: value 1 | |
337 // key 2: value 2 | |
338 // a sequence: | |
339 // - item 1 | |
340 // - item 2 | |
341 // | |
342 // Tokens: | |
343 // | |
344 // STREAM-START(utf-8) | |
345 // BLOCK-MAPPING-START | |
346 // KEY | |
347 // SCALAR("a simple key",plain) | |
348 // VALUE | |
349 // SCALAR("a value",plain) | |
350 // KEY | |
351 // SCALAR("a complex key",plain) | |
352 // VALUE | |
353 // SCALAR("another value",plain) | |
354 // KEY | |
355 // SCALAR("a mapping",plain) | |
356 // BLOCK-MAPPING-START | |
357 // KEY | |
358 // SCALAR("key 1",plain) | |
359 // VALUE | |
360 // SCALAR("value 1",plain) | |
361 // KEY | |
362 // SCALAR("key 2",plain) | |
363 // VALUE | |
364 // SCALAR("value 2",plain) | |
365 // BLOCK-END | |
366 // KEY | |
367 // SCALAR("a sequence",plain) | |
368 // VALUE | |
369 // BLOCK-SEQUENCE-START | |
370 // BLOCK-ENTRY | |
371 // SCALAR("item 1",plain) | |
372 // BLOCK-ENTRY | |
373 // SCALAR("item 2",plain) | |
374 // BLOCK-END | |
375 // BLOCK-END | |
376 // STREAM-END | |
377 // | |
378 // YAML does not always require to start a new block collection from a new | |
379 // line. If the current line contains only '-', '?', and ':' indicators, a new | |
380 // block collection may start at the current line. The following examples | |
381 // illustrate this case: | |
382 // | |
383 // 1. Collections in a sequence: | |
384 // | |
385 // - - item 1 | |
386 // - item 2 | |
387 // - key 1: value 1 | |
388 // key 2: value 2 | |
389 // - ? complex key | |
390 // : complex value | |
391 // | |
392 // Tokens: | |
393 // | |
394 // STREAM-START(utf-8) | |
395 // BLOCK-SEQUENCE-START | |
396 // BLOCK-ENTRY | |
397 // BLOCK-SEQUENCE-START | |
398 // BLOCK-ENTRY | |
399 // SCALAR("item 1",plain) | |
400 // BLOCK-ENTRY | |
401 // SCALAR("item 2",plain) | |
402 // BLOCK-END | |
403 // BLOCK-ENTRY | |
404 // BLOCK-MAPPING-START | |
405 // KEY | |
406 // SCALAR("key 1",plain) | |
407 // VALUE | |
408 // SCALAR("value 1",plain) | |
409 // KEY | |
410 // SCALAR("key 2",plain) | |
411 // VALUE | |
412 // SCALAR("value 2",plain) | |
413 // BLOCK-END | |
414 // BLOCK-ENTRY | |
415 // BLOCK-MAPPING-START | |
416 // KEY | |
417 // SCALAR("complex key") | |
418 // VALUE | |
419 // SCALAR("complex value") | |
420 // BLOCK-END | |
421 // BLOCK-END | |
422 // STREAM-END | |
423 // | |
424 // 2. Collections in a mapping: | |
425 // | |
426 // ? a sequence | |
427 // : - item 1 | |
428 // - item 2 | |
429 // ? a mapping | |
430 // : key 1: value 1 | |
431 // key 2: value 2 | |
432 // | |
433 // Tokens: | |
434 // | |
435 // STREAM-START(utf-8) | |
436 // BLOCK-MAPPING-START | |
437 // KEY | |
438 // SCALAR("a sequence",plain) | |
439 // VALUE | |
440 // BLOCK-SEQUENCE-START | |
441 // BLOCK-ENTRY | |
442 // SCALAR("item 1",plain) | |
443 // BLOCK-ENTRY | |
444 // SCALAR("item 2",plain) | |
445 // BLOCK-END | |
446 // KEY | |
447 // SCALAR("a mapping",plain) | |
448 // VALUE | |
449 // BLOCK-MAPPING-START | |
450 // KEY | |
451 // SCALAR("key 1",plain) | |
452 // VALUE | |
453 // SCALAR("value 1",plain) | |
454 // KEY | |
455 // SCALAR("key 2",plain) | |
456 // VALUE | |
457 // SCALAR("value 2",plain) | |
458 // BLOCK-END | |
459 // BLOCK-END | |
460 // STREAM-END | |
461 // | |
462 // YAML also permits non-indented sequences if they are included into a block | |
463 // mapping. In this case, the token BLOCK-SEQUENCE-START is not produced: | |
464 // | |
465 // key: | |
466 // - item 1 # BLOCK-SEQUENCE-START is NOT produced here. | |
467 // - item 2 | |
468 // | |
469 // Tokens: | |
470 // | |
471 // STREAM-START(utf-8) | |
472 // BLOCK-MAPPING-START | |
473 // KEY | |
474 // SCALAR("key",plain) | |
475 // VALUE | |
476 // BLOCK-ENTRY | |
477 // SCALAR("item 1",plain) | |
478 // BLOCK-ENTRY | |
479 // SCALAR("item 2",plain) | |
480 // BLOCK-END | |
481 // | |
482 | |
483 // Ensure that the buffer contains the required number of characters. | |
484 // Return true on success, false on failure (reader error or memory error). | |
485 func cache(parser *yaml_parser_t, length int) bool { | |
486 // [Go] This was inlined: !cache(A, B) -> unread < B && !update(A, B) | |
487 return parser.unread >= length || yaml_parser_update_buffer(parser, length) | |
488 } | |
489 | |
490 // Advance the buffer pointer. | |
491 func skip(parser *yaml_parser_t) { | |
492 parser.mark.index++ | |
493 parser.mark.column++ | |
494 parser.unread-- | |
495 parser.buffer_pos += width(parser.buffer[parser.buffer_pos]) | |
496 } | |
497 | |
498 func skip_line(parser *yaml_parser_t) { | |
499 if is_crlf(parser.buffer, parser.buffer_pos) { | |
500 parser.mark.index += 2 | |
501 parser.mark.column = 0 | |
502 parser.mark.line++ | |
503 parser.unread -= 2 | |
504 parser.buffer_pos += 2 | |
505 } else if is_break(parser.buffer, parser.buffer_pos) { | |
506 parser.mark.index++ | |
507 parser.mark.column = 0 | |
508 parser.mark.line++ | |
509 parser.unread-- | |
510 parser.buffer_pos += width(parser.buffer[parser.buffer_pos]) | |
511 } | |
512 } | |
513 | |
514 // Copy a character to a string buffer and advance pointers. | |
515 func read(parser *yaml_parser_t, s []byte) []byte { | |
516 w := width(parser.buffer[parser.buffer_pos]) | |
517 if w == 0 { | |
518 panic("invalid character sequence") | |
519 } | |
520 if len(s) == 0 { | |
521 s = make([]byte, 0, 32) | |
522 } | |
523 if w == 1 && len(s)+w <= cap(s) { | |
524 s = s[:len(s)+1] | |
525 s[len(s)-1] = parser.buffer[parser.buffer_pos] | |
526 parser.buffer_pos++ | |
527 } else { | |
528 s = append(s, parser.buffer[parser.buffer_pos:parser.buffer_pos+w]...) | |
529 parser.buffer_pos += w | |
530 } | |
531 parser.mark.index++ | |
532 parser.mark.column++ | |
533 parser.unread-- | |
534 return s | |
535 } | |
536 | |
537 // Copy a line break character to a string buffer and advance pointers. | |
538 func read_line(parser *yaml_parser_t, s []byte) []byte { | |
539 buf := parser.buffer | |
540 pos := parser.buffer_pos | |
541 switch { | |
542 case buf[pos] == '\r' && buf[pos+1] == '\n': | |
543 // CR LF . LF | |
544 s = append(s, '\n') | |
545 parser.buffer_pos += 2 | |
546 parser.mark.index++ | |
547 parser.unread-- | |
548 case buf[pos] == '\r' || buf[pos] == '\n': | |
549 // CR|LF . LF | |
550 s = append(s, '\n') | |
551 parser.buffer_pos += 1 | |
552 case buf[pos] == '\xC2' && buf[pos+1] == '\x85': | |
553 // NEL . LF | |
554 s = append(s, '\n') | |
555 parser.buffer_pos += 2 | |
556 case buf[pos] == '\xE2' && buf[pos+1] == '\x80' && (buf[pos+2] == '\xA8' || buf[pos+2] == '\xA9'): | |
557 // LS|PS . LS|PS | |
558 s = append(s, buf[parser.buffer_pos:pos+3]...) | |
559 parser.buffer_pos += 3 | |
560 default: | |
561 return s | |
562 } | |
563 parser.mark.index++ | |
564 parser.mark.column = 0 | |
565 parser.mark.line++ | |
566 parser.unread-- | |
567 return s | |
568 } | |
569 | |
570 // Get the next token. | |
571 func yaml_parser_scan(parser *yaml_parser_t, token *yaml_token_t) bool { | |
572 // Erase the token object. | |
573 *token = yaml_token_t{} // [Go] Is this necessary? | |
574 | |
575 // No tokens after STREAM-END or error. | |
576 if parser.stream_end_produced || parser.error != yaml_NO_ERROR { | |
577 return true | |
578 } | |
579 | |
580 // Ensure that the tokens queue contains enough tokens. | |
581 if !parser.token_available { | |
582 if !yaml_parser_fetch_more_tokens(parser) { | |
583 return false | |
584 } | |
585 } | |
586 | |
587 // Fetch the next token from the queue. | |
588 *token = parser.tokens[parser.tokens_head] | |
589 parser.tokens_head++ | |
590 parser.tokens_parsed++ | |
591 parser.token_available = false | |
592 | |
593 if token.typ == yaml_STREAM_END_TOKEN { | |
594 parser.stream_end_produced = true | |
595 } | |
596 return true | |
597 } | |
598 | |
599 // Set the scanner error and return false. | |
600 func yaml_parser_set_scanner_error(parser *yaml_parser_t, context string, context_mark yaml_mark_t, problem string) bool { | |
601 parser.error = yaml_SCANNER_ERROR | |
602 parser.context = context | |
603 parser.context_mark = context_mark | |
604 parser.problem = problem | |
605 parser.problem_mark = parser.mark | |
606 return false | |
607 } | |
608 | |
609 func yaml_parser_set_scanner_tag_error(parser *yaml_parser_t, directive bool, context_mark yaml_mark_t, problem string) bool { | |
610 context := "while parsing a tag" | |
611 if directive { | |
612 context = "while parsing a %TAG directive" | |
613 } | |
614 return yaml_parser_set_scanner_error(parser, context, context_mark, problem) | |
615 } | |
616 | |
617 func trace(args ...interface{}) func() { | |
618 pargs := append([]interface{}{"+++"}, args...) | |
619 fmt.Println(pargs...) | |
620 pargs = append([]interface{}{"---"}, args...) | |
621 return func() { fmt.Println(pargs...) } | |
622 } | |
623 | |
624 // Ensure that the tokens queue contains at least one token which can be | |
625 // returned to the Parser. | |
626 func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool { | |
627 // While we need more tokens to fetch, do it. | |
628 for { | |
629 if parser.tokens_head != len(parser.tokens) { | |
630 // If queue is non-empty, check if any potential simple key may | |
631 // occupy the head position. | |
632 head_tok_idx, ok := parser.simple_keys_by_tok[parser.tokens_parsed] | |
633 if !ok { | |
634 break | |
635 } else if valid, ok := yaml_simple_key_is_valid(parser, &parser.simple_keys[head_tok_idx]); !ok { | |
636 return false | |
637 } else if !valid { | |
638 break | |
639 } | |
640 } | |
641 // Fetch the next token. | |
642 if !yaml_parser_fetch_next_token(parser) { | |
643 return false | |
644 } | |
645 } | |
646 | |
647 parser.token_available = true | |
648 return true | |
649 } | |
650 | |
651 // The dispatcher for token fetchers. | |
652 func yaml_parser_fetch_next_token(parser *yaml_parser_t) bool { | |
653 // Ensure that the buffer is initialized. | |
654 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
655 return false | |
656 } | |
657 | |
658 // Check if we just started scanning. Fetch STREAM-START then. | |
659 if !parser.stream_start_produced { | |
660 return yaml_parser_fetch_stream_start(parser) | |
661 } | |
662 | |
663 // Eat whitespaces and comments until we reach the next token. | |
664 if !yaml_parser_scan_to_next_token(parser) { | |
665 return false | |
666 } | |
667 | |
668 // Check the indentation level against the current column. | |
669 if !yaml_parser_unroll_indent(parser, parser.mark.column) { | |
670 return false | |
671 } | |
672 | |
673 // Ensure that the buffer contains at least 4 characters. 4 is the length | |
674 // of the longest indicators ('--- ' and '... '). | |
675 if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) { | |
676 return false | |
677 } | |
678 | |
679 // Is it the end of the stream? | |
680 if is_z(parser.buffer, parser.buffer_pos) { | |
681 return yaml_parser_fetch_stream_end(parser) | |
682 } | |
683 | |
684 // Is it a directive? | |
685 if parser.mark.column == 0 && parser.buffer[parser.buffer_pos] == '%' { | |
686 return yaml_parser_fetch_directive(parser) | |
687 } | |
688 | |
689 buf := parser.buffer | |
690 pos := parser.buffer_pos | |
691 | |
692 // Is it the document start indicator? | |
693 if parser.mark.column == 0 && buf[pos] == '-' && buf[pos+1] == '-' && buf[pos+2] == '-' && is_blankz(buf, pos+3) { | |
694 return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_START_TOKEN) | |
695 } | |
696 | |
697 // Is it the document end indicator? | |
698 if parser.mark.column == 0 && buf[pos] == '.' && buf[pos+1] == '.' && buf[pos+2] == '.' && is_blankz(buf, pos+3) { | |
699 return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_END_TOKEN) | |
700 } | |
701 | |
702 // Is it the flow sequence start indicator? | |
703 if buf[pos] == '[' { | |
704 return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_SEQUENCE_START_TOKEN) | |
705 } | |
706 | |
707 // Is it the flow mapping start indicator? | |
708 if parser.buffer[parser.buffer_pos] == '{' { | |
709 return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_MAPPING_START_TOKEN) | |
710 } | |
711 | |
712 // Is it the flow sequence end indicator? | |
713 if parser.buffer[parser.buffer_pos] == ']' { | |
714 return yaml_parser_fetch_flow_collection_end(parser, | |
715 yaml_FLOW_SEQUENCE_END_TOKEN) | |
716 } | |
717 | |
718 // Is it the flow mapping end indicator? | |
719 if parser.buffer[parser.buffer_pos] == '}' { | |
720 return yaml_parser_fetch_flow_collection_end(parser, | |
721 yaml_FLOW_MAPPING_END_TOKEN) | |
722 } | |
723 | |
724 // Is it the flow entry indicator? | |
725 if parser.buffer[parser.buffer_pos] == ',' { | |
726 return yaml_parser_fetch_flow_entry(parser) | |
727 } | |
728 | |
729 // Is it the block entry indicator? | |
730 if parser.buffer[parser.buffer_pos] == '-' && is_blankz(parser.buffer, parser.buffer_pos+1) { | |
731 return yaml_parser_fetch_block_entry(parser) | |
732 } | |
733 | |
734 // Is it the key indicator? | |
735 if parser.buffer[parser.buffer_pos] == '?' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) { | |
736 return yaml_parser_fetch_key(parser) | |
737 } | |
738 | |
739 // Is it the value indicator? | |
740 if parser.buffer[parser.buffer_pos] == ':' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) { | |
741 return yaml_parser_fetch_value(parser) | |
742 } | |
743 | |
744 // Is it an alias? | |
745 if parser.buffer[parser.buffer_pos] == '*' { | |
746 return yaml_parser_fetch_anchor(parser, yaml_ALIAS_TOKEN) | |
747 } | |
748 | |
749 // Is it an anchor? | |
750 if parser.buffer[parser.buffer_pos] == '&' { | |
751 return yaml_parser_fetch_anchor(parser, yaml_ANCHOR_TOKEN) | |
752 } | |
753 | |
754 // Is it a tag? | |
755 if parser.buffer[parser.buffer_pos] == '!' { | |
756 return yaml_parser_fetch_tag(parser) | |
757 } | |
758 | |
759 // Is it a literal scalar? | |
760 if parser.buffer[parser.buffer_pos] == '|' && parser.flow_level == 0 { | |
761 return yaml_parser_fetch_block_scalar(parser, true) | |
762 } | |
763 | |
764 // Is it a folded scalar? | |
765 if parser.buffer[parser.buffer_pos] == '>' && parser.flow_level == 0 { | |
766 return yaml_parser_fetch_block_scalar(parser, false) | |
767 } | |
768 | |
769 // Is it a single-quoted scalar? | |
770 if parser.buffer[parser.buffer_pos] == '\'' { | |
771 return yaml_parser_fetch_flow_scalar(parser, true) | |
772 } | |
773 | |
774 // Is it a double-quoted scalar? | |
775 if parser.buffer[parser.buffer_pos] == '"' { | |
776 return yaml_parser_fetch_flow_scalar(parser, false) | |
777 } | |
778 | |
779 // Is it a plain scalar? | |
780 // | |
781 // A plain scalar may start with any non-blank characters except | |
782 // | |
783 // '-', '?', ':', ',', '[', ']', '{', '}', | |
784 // '#', '&', '*', '!', '|', '>', '\'', '\"', | |
785 // '%', '@', '`'. | |
786 // | |
787 // In the block context (and, for the '-' indicator, in the flow context | |
788 // too), it may also start with the characters | |
789 // | |
790 // '-', '?', ':' | |
791 // | |
792 // if it is followed by a non-space character. | |
793 // | |
794 // The last rule is more restrictive than the specification requires. | |
795 // [Go] Make this logic more reasonable. | |
796 //switch parser.buffer[parser.buffer_pos] { | |
797 //case '-', '?', ':', ',', '?', '-', ',', ':', ']', '[', '}', '{', '&', '#', '!', '*', '>', '|', '"', '\'', '@', '%', '-', '`': | |
798 //} | |
799 if !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '-' || | |
800 parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':' || | |
801 parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '[' || | |
802 parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' || | |
803 parser.buffer[parser.buffer_pos] == '}' || parser.buffer[parser.buffer_pos] == '#' || | |
804 parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '*' || | |
805 parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '|' || | |
806 parser.buffer[parser.buffer_pos] == '>' || parser.buffer[parser.buffer_pos] == '\'' || | |
807 parser.buffer[parser.buffer_pos] == '"' || parser.buffer[parser.buffer_pos] == '%' || | |
808 parser.buffer[parser.buffer_pos] == '@' || parser.buffer[parser.buffer_pos] == '`') || | |
809 (parser.buffer[parser.buffer_pos] == '-' && !is_blank(parser.buffer, parser.buffer_pos+1)) || | |
810 (parser.flow_level == 0 && | |
811 (parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':') && | |
812 !is_blankz(parser.buffer, parser.buffer_pos+1)) { | |
813 return yaml_parser_fetch_plain_scalar(parser) | |
814 } | |
815 | |
816 // If we don't determine the token type so far, it is an error. | |
817 return yaml_parser_set_scanner_error(parser, | |
818 "while scanning for the next token", parser.mark, | |
819 "found character that cannot start any token") | |
820 } | |
821 | |
822 func yaml_simple_key_is_valid(parser *yaml_parser_t, simple_key *yaml_simple_key_t) (valid, ok bool) { | |
823 if !simple_key.possible { | |
824 return false, true | |
825 } | |
826 | |
827 // The 1.2 specification says: | |
828 // | |
829 // "If the ? indicator is omitted, parsing needs to see past the | |
830 // implicit key to recognize it as such. To limit the amount of | |
831 // lookahead required, the “:” indicator must appear at most 1024 | |
832 // Unicode characters beyond the start of the key. In addition, the key | |
833 // is restricted to a single line." | |
834 // | |
835 if simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index { | |
836 // Check if the potential simple key to be removed is required. | |
837 if simple_key.required { | |
838 return false, yaml_parser_set_scanner_error(parser, | |
839 "while scanning a simple key", simple_key.mark, | |
840 "could not find expected ':'") | |
841 } | |
842 simple_key.possible = false | |
843 return false, true | |
844 } | |
845 return true, true | |
846 } | |
847 | |
848 // Check if a simple key may start at the current position and add it if | |
849 // needed. | |
850 func yaml_parser_save_simple_key(parser *yaml_parser_t) bool { | |
851 // A simple key is required at the current position if the scanner is in | |
852 // the block context and the current column coincides with the indentation | |
853 // level. | |
854 | |
855 required := parser.flow_level == 0 && parser.indent == parser.mark.column | |
856 | |
857 // | |
858 // If the current position may start a simple key, save it. | |
859 // | |
860 if parser.simple_key_allowed { | |
861 simple_key := yaml_simple_key_t{ | |
862 possible: true, | |
863 required: required, | |
864 token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head), | |
865 mark: parser.mark, | |
866 } | |
867 | |
868 if !yaml_parser_remove_simple_key(parser) { | |
869 return false | |
870 } | |
871 parser.simple_keys[len(parser.simple_keys)-1] = simple_key | |
872 parser.simple_keys_by_tok[simple_key.token_number] = len(parser.simple_keys) - 1 | |
873 } | |
874 return true | |
875 } | |
876 | |
877 // Remove a potential simple key at the current flow level. | |
878 func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool { | |
879 i := len(parser.simple_keys) - 1 | |
880 if parser.simple_keys[i].possible { | |
881 // If the key is required, it is an error. | |
882 if parser.simple_keys[i].required { | |
883 return yaml_parser_set_scanner_error(parser, | |
884 "while scanning a simple key", parser.simple_keys[i].mark, | |
885 "could not find expected ':'") | |
886 } | |
887 // Remove the key from the stack. | |
888 parser.simple_keys[i].possible = false | |
889 delete(parser.simple_keys_by_tok, parser.simple_keys[i].token_number) | |
890 } | |
891 return true | |
892 } | |
893 | |
894 // max_flow_level limits the flow_level | |
895 const max_flow_level = 10000 | |
896 | |
897 // Increase the flow level and resize the simple key list if needed. | |
898 func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool { | |
899 // Reset the simple key on the next level. | |
900 parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{ | |
901 possible: false, | |
902 required: false, | |
903 token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head), | |
904 mark: parser.mark, | |
905 }) | |
906 | |
907 // Increase the flow level. | |
908 parser.flow_level++ | |
909 if parser.flow_level > max_flow_level { | |
910 return yaml_parser_set_scanner_error(parser, | |
911 "while increasing flow level", parser.simple_keys[len(parser.simple_keys)-1].mark, | |
912 fmt.Sprintf("exceeded max depth of %d", max_flow_level)) | |
913 } | |
914 return true | |
915 } | |
916 | |
917 // Decrease the flow level. | |
918 func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool { | |
919 if parser.flow_level > 0 { | |
920 parser.flow_level-- | |
921 last := len(parser.simple_keys) - 1 | |
922 delete(parser.simple_keys_by_tok, parser.simple_keys[last].token_number) | |
923 parser.simple_keys = parser.simple_keys[:last] | |
924 } | |
925 return true | |
926 } | |
927 | |
928 // max_indents limits the indents stack size | |
929 const max_indents = 10000 | |
930 | |
931 // Push the current indentation level to the stack and set the new level | |
932 // the current column is greater than the indentation level. In this case, | |
933 // append or insert the specified token into the token queue. | |
934 func yaml_parser_roll_indent(parser *yaml_parser_t, column, number int, typ yaml_token_type_t, mark yaml_mark_t) bool { | |
935 // In the flow context, do nothing. | |
936 if parser.flow_level > 0 { | |
937 return true | |
938 } | |
939 | |
940 if parser.indent < column { | |
941 // Push the current indentation level to the stack and set the new | |
942 // indentation level. | |
943 parser.indents = append(parser.indents, parser.indent) | |
944 parser.indent = column | |
945 if len(parser.indents) > max_indents { | |
946 return yaml_parser_set_scanner_error(parser, | |
947 "while increasing indent level", parser.simple_keys[len(parser.simple_keys)-1].mark, | |
948 fmt.Sprintf("exceeded max depth of %d", max_indents)) | |
949 } | |
950 | |
951 // Create a token and insert it into the queue. | |
952 token := yaml_token_t{ | |
953 typ: typ, | |
954 start_mark: mark, | |
955 end_mark: mark, | |
956 } | |
957 if number > -1 { | |
958 number -= parser.tokens_parsed | |
959 } | |
960 yaml_insert_token(parser, number, &token) | |
961 } | |
962 return true | |
963 } | |
964 | |
965 // Pop indentation levels from the indents stack until the current level | |
966 // becomes less or equal to the column. For each indentation level, append | |
967 // the BLOCK-END token. | |
968 func yaml_parser_unroll_indent(parser *yaml_parser_t, column int) bool { | |
969 // In the flow context, do nothing. | |
970 if parser.flow_level > 0 { | |
971 return true | |
972 } | |
973 | |
974 // Loop through the indentation levels in the stack. | |
975 for parser.indent > column { | |
976 // Create a token and append it to the queue. | |
977 token := yaml_token_t{ | |
978 typ: yaml_BLOCK_END_TOKEN, | |
979 start_mark: parser.mark, | |
980 end_mark: parser.mark, | |
981 } | |
982 yaml_insert_token(parser, -1, &token) | |
983 | |
984 // Pop the indentation level. | |
985 parser.indent = parser.indents[len(parser.indents)-1] | |
986 parser.indents = parser.indents[:len(parser.indents)-1] | |
987 } | |
988 return true | |
989 } | |
990 | |
991 // Initialize the scanner and produce the STREAM-START token. | |
992 func yaml_parser_fetch_stream_start(parser *yaml_parser_t) bool { | |
993 | |
994 // Set the initial indentation. | |
995 parser.indent = -1 | |
996 | |
997 // Initialize the simple key stack. | |
998 parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{}) | |
999 | |
1000 parser.simple_keys_by_tok = make(map[int]int) | |
1001 | |
1002 // A simple key is allowed at the beginning of the stream. | |
1003 parser.simple_key_allowed = true | |
1004 | |
1005 // We have started. | |
1006 parser.stream_start_produced = true | |
1007 | |
1008 // Create the STREAM-START token and append it to the queue. | |
1009 token := yaml_token_t{ | |
1010 typ: yaml_STREAM_START_TOKEN, | |
1011 start_mark: parser.mark, | |
1012 end_mark: parser.mark, | |
1013 encoding: parser.encoding, | |
1014 } | |
1015 yaml_insert_token(parser, -1, &token) | |
1016 return true | |
1017 } | |
1018 | |
1019 // Produce the STREAM-END token and shut down the scanner. | |
1020 func yaml_parser_fetch_stream_end(parser *yaml_parser_t) bool { | |
1021 | |
1022 // Force new line. | |
1023 if parser.mark.column != 0 { | |
1024 parser.mark.column = 0 | |
1025 parser.mark.line++ | |
1026 } | |
1027 | |
1028 // Reset the indentation level. | |
1029 if !yaml_parser_unroll_indent(parser, -1) { | |
1030 return false | |
1031 } | |
1032 | |
1033 // Reset simple keys. | |
1034 if !yaml_parser_remove_simple_key(parser) { | |
1035 return false | |
1036 } | |
1037 | |
1038 parser.simple_key_allowed = false | |
1039 | |
1040 // Create the STREAM-END token and append it to the queue. | |
1041 token := yaml_token_t{ | |
1042 typ: yaml_STREAM_END_TOKEN, | |
1043 start_mark: parser.mark, | |
1044 end_mark: parser.mark, | |
1045 } | |
1046 yaml_insert_token(parser, -1, &token) | |
1047 return true | |
1048 } | |
1049 | |
1050 // Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token. | |
1051 func yaml_parser_fetch_directive(parser *yaml_parser_t) bool { | |
1052 // Reset the indentation level. | |
1053 if !yaml_parser_unroll_indent(parser, -1) { | |
1054 return false | |
1055 } | |
1056 | |
1057 // Reset simple keys. | |
1058 if !yaml_parser_remove_simple_key(parser) { | |
1059 return false | |
1060 } | |
1061 | |
1062 parser.simple_key_allowed = false | |
1063 | |
1064 // Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. | |
1065 token := yaml_token_t{} | |
1066 if !yaml_parser_scan_directive(parser, &token) { | |
1067 return false | |
1068 } | |
1069 // Append the token to the queue. | |
1070 yaml_insert_token(parser, -1, &token) | |
1071 return true | |
1072 } | |
1073 | |
1074 // Produce the DOCUMENT-START or DOCUMENT-END token. | |
1075 func yaml_parser_fetch_document_indicator(parser *yaml_parser_t, typ yaml_token_type_t) bool { | |
1076 // Reset the indentation level. | |
1077 if !yaml_parser_unroll_indent(parser, -1) { | |
1078 return false | |
1079 } | |
1080 | |
1081 // Reset simple keys. | |
1082 if !yaml_parser_remove_simple_key(parser) { | |
1083 return false | |
1084 } | |
1085 | |
1086 parser.simple_key_allowed = false | |
1087 | |
1088 // Consume the token. | |
1089 start_mark := parser.mark | |
1090 | |
1091 skip(parser) | |
1092 skip(parser) | |
1093 skip(parser) | |
1094 | |
1095 end_mark := parser.mark | |
1096 | |
1097 // Create the DOCUMENT-START or DOCUMENT-END token. | |
1098 token := yaml_token_t{ | |
1099 typ: typ, | |
1100 start_mark: start_mark, | |
1101 end_mark: end_mark, | |
1102 } | |
1103 // Append the token to the queue. | |
1104 yaml_insert_token(parser, -1, &token) | |
1105 return true | |
1106 } | |
1107 | |
1108 // Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token. | |
1109 func yaml_parser_fetch_flow_collection_start(parser *yaml_parser_t, typ yaml_token_type_t) bool { | |
1110 // The indicators '[' and '{' may start a simple key. | |
1111 if !yaml_parser_save_simple_key(parser) { | |
1112 return false | |
1113 } | |
1114 | |
1115 // Increase the flow level. | |
1116 if !yaml_parser_increase_flow_level(parser) { | |
1117 return false | |
1118 } | |
1119 | |
1120 // A simple key may follow the indicators '[' and '{'. | |
1121 parser.simple_key_allowed = true | |
1122 | |
1123 // Consume the token. | |
1124 start_mark := parser.mark | |
1125 skip(parser) | |
1126 end_mark := parser.mark | |
1127 | |
1128 // Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. | |
1129 token := yaml_token_t{ | |
1130 typ: typ, | |
1131 start_mark: start_mark, | |
1132 end_mark: end_mark, | |
1133 } | |
1134 // Append the token to the queue. | |
1135 yaml_insert_token(parser, -1, &token) | |
1136 return true | |
1137 } | |
1138 | |
1139 // Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token. | |
1140 func yaml_parser_fetch_flow_collection_end(parser *yaml_parser_t, typ yaml_token_type_t) bool { | |
1141 // Reset any potential simple key on the current flow level. | |
1142 if !yaml_parser_remove_simple_key(parser) { | |
1143 return false | |
1144 } | |
1145 | |
1146 // Decrease the flow level. | |
1147 if !yaml_parser_decrease_flow_level(parser) { | |
1148 return false | |
1149 } | |
1150 | |
1151 // No simple keys after the indicators ']' and '}'. | |
1152 parser.simple_key_allowed = false | |
1153 | |
1154 // Consume the token. | |
1155 | |
1156 start_mark := parser.mark | |
1157 skip(parser) | |
1158 end_mark := parser.mark | |
1159 | |
1160 // Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. | |
1161 token := yaml_token_t{ | |
1162 typ: typ, | |
1163 start_mark: start_mark, | |
1164 end_mark: end_mark, | |
1165 } | |
1166 // Append the token to the queue. | |
1167 yaml_insert_token(parser, -1, &token) | |
1168 return true | |
1169 } | |
1170 | |
1171 // Produce the FLOW-ENTRY token. | |
1172 func yaml_parser_fetch_flow_entry(parser *yaml_parser_t) bool { | |
1173 // Reset any potential simple keys on the current flow level. | |
1174 if !yaml_parser_remove_simple_key(parser) { | |
1175 return false | |
1176 } | |
1177 | |
1178 // Simple keys are allowed after ','. | |
1179 parser.simple_key_allowed = true | |
1180 | |
1181 // Consume the token. | |
1182 start_mark := parser.mark | |
1183 skip(parser) | |
1184 end_mark := parser.mark | |
1185 | |
1186 // Create the FLOW-ENTRY token and append it to the queue. | |
1187 token := yaml_token_t{ | |
1188 typ: yaml_FLOW_ENTRY_TOKEN, | |
1189 start_mark: start_mark, | |
1190 end_mark: end_mark, | |
1191 } | |
1192 yaml_insert_token(parser, -1, &token) | |
1193 return true | |
1194 } | |
1195 | |
1196 // Produce the BLOCK-ENTRY token. | |
1197 func yaml_parser_fetch_block_entry(parser *yaml_parser_t) bool { | |
1198 // Check if the scanner is in the block context. | |
1199 if parser.flow_level == 0 { | |
1200 // Check if we are allowed to start a new entry. | |
1201 if !parser.simple_key_allowed { | |
1202 return yaml_parser_set_scanner_error(parser, "", parser.mark, | |
1203 "block sequence entries are not allowed in this context") | |
1204 } | |
1205 // Add the BLOCK-SEQUENCE-START token if needed. | |
1206 if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_SEQUENCE_START_TOKEN, parser.mark) { | |
1207 return false | |
1208 } | |
1209 } else { | |
1210 // It is an error for the '-' indicator to occur in the flow context, | |
1211 // but we let the Parser detect and report about it because the Parser | |
1212 // is able to point to the context. | |
1213 } | |
1214 | |
1215 // Reset any potential simple keys on the current flow level. | |
1216 if !yaml_parser_remove_simple_key(parser) { | |
1217 return false | |
1218 } | |
1219 | |
1220 // Simple keys are allowed after '-'. | |
1221 parser.simple_key_allowed = true | |
1222 | |
1223 // Consume the token. | |
1224 start_mark := parser.mark | |
1225 skip(parser) | |
1226 end_mark := parser.mark | |
1227 | |
1228 // Create the BLOCK-ENTRY token and append it to the queue. | |
1229 token := yaml_token_t{ | |
1230 typ: yaml_BLOCK_ENTRY_TOKEN, | |
1231 start_mark: start_mark, | |
1232 end_mark: end_mark, | |
1233 } | |
1234 yaml_insert_token(parser, -1, &token) | |
1235 return true | |
1236 } | |
1237 | |
1238 // Produce the KEY token. | |
1239 func yaml_parser_fetch_key(parser *yaml_parser_t) bool { | |
1240 | |
1241 // In the block context, additional checks are required. | |
1242 if parser.flow_level == 0 { | |
1243 // Check if we are allowed to start a new key (not nessesary simple). | |
1244 if !parser.simple_key_allowed { | |
1245 return yaml_parser_set_scanner_error(parser, "", parser.mark, | |
1246 "mapping keys are not allowed in this context") | |
1247 } | |
1248 // Add the BLOCK-MAPPING-START token if needed. | |
1249 if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) { | |
1250 return false | |
1251 } | |
1252 } | |
1253 | |
1254 // Reset any potential simple keys on the current flow level. | |
1255 if !yaml_parser_remove_simple_key(parser) { | |
1256 return false | |
1257 } | |
1258 | |
1259 // Simple keys are allowed after '?' in the block context. | |
1260 parser.simple_key_allowed = parser.flow_level == 0 | |
1261 | |
1262 // Consume the token. | |
1263 start_mark := parser.mark | |
1264 skip(parser) | |
1265 end_mark := parser.mark | |
1266 | |
1267 // Create the KEY token and append it to the queue. | |
1268 token := yaml_token_t{ | |
1269 typ: yaml_KEY_TOKEN, | |
1270 start_mark: start_mark, | |
1271 end_mark: end_mark, | |
1272 } | |
1273 yaml_insert_token(parser, -1, &token) | |
1274 return true | |
1275 } | |
1276 | |
1277 // Produce the VALUE token. | |
1278 func yaml_parser_fetch_value(parser *yaml_parser_t) bool { | |
1279 | |
1280 simple_key := &parser.simple_keys[len(parser.simple_keys)-1] | |
1281 | |
1282 // Have we found a simple key? | |
1283 if valid, ok := yaml_simple_key_is_valid(parser, simple_key); !ok { | |
1284 return false | |
1285 | |
1286 } else if valid { | |
1287 | |
1288 // Create the KEY token and insert it into the queue. | |
1289 token := yaml_token_t{ | |
1290 typ: yaml_KEY_TOKEN, | |
1291 start_mark: simple_key.mark, | |
1292 end_mark: simple_key.mark, | |
1293 } | |
1294 yaml_insert_token(parser, simple_key.token_number-parser.tokens_parsed, &token) | |
1295 | |
1296 // In the block context, we may need to add the BLOCK-MAPPING-START token. | |
1297 if !yaml_parser_roll_indent(parser, simple_key.mark.column, | |
1298 simple_key.token_number, | |
1299 yaml_BLOCK_MAPPING_START_TOKEN, simple_key.mark) { | |
1300 return false | |
1301 } | |
1302 | |
1303 // Remove the simple key. | |
1304 simple_key.possible = false | |
1305 delete(parser.simple_keys_by_tok, simple_key.token_number) | |
1306 | |
1307 // A simple key cannot follow another simple key. | |
1308 parser.simple_key_allowed = false | |
1309 | |
1310 } else { | |
1311 // The ':' indicator follows a complex key. | |
1312 | |
1313 // In the block context, extra checks are required. | |
1314 if parser.flow_level == 0 { | |
1315 | |
1316 // Check if we are allowed to start a complex value. | |
1317 if !parser.simple_key_allowed { | |
1318 return yaml_parser_set_scanner_error(parser, "", parser.mark, | |
1319 "mapping values are not allowed in this context") | |
1320 } | |
1321 | |
1322 // Add the BLOCK-MAPPING-START token if needed. | |
1323 if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) { | |
1324 return false | |
1325 } | |
1326 } | |
1327 | |
1328 // Simple keys after ':' are allowed in the block context. | |
1329 parser.simple_key_allowed = parser.flow_level == 0 | |
1330 } | |
1331 | |
1332 // Consume the token. | |
1333 start_mark := parser.mark | |
1334 skip(parser) | |
1335 end_mark := parser.mark | |
1336 | |
1337 // Create the VALUE token and append it to the queue. | |
1338 token := yaml_token_t{ | |
1339 typ: yaml_VALUE_TOKEN, | |
1340 start_mark: start_mark, | |
1341 end_mark: end_mark, | |
1342 } | |
1343 yaml_insert_token(parser, -1, &token) | |
1344 return true | |
1345 } | |
1346 | |
1347 // Produce the ALIAS or ANCHOR token. | |
1348 func yaml_parser_fetch_anchor(parser *yaml_parser_t, typ yaml_token_type_t) bool { | |
1349 // An anchor or an alias could be a simple key. | |
1350 if !yaml_parser_save_simple_key(parser) { | |
1351 return false | |
1352 } | |
1353 | |
1354 // A simple key cannot follow an anchor or an alias. | |
1355 parser.simple_key_allowed = false | |
1356 | |
1357 // Create the ALIAS or ANCHOR token and append it to the queue. | |
1358 var token yaml_token_t | |
1359 if !yaml_parser_scan_anchor(parser, &token, typ) { | |
1360 return false | |
1361 } | |
1362 yaml_insert_token(parser, -1, &token) | |
1363 return true | |
1364 } | |
1365 | |
1366 // Produce the TAG token. | |
1367 func yaml_parser_fetch_tag(parser *yaml_parser_t) bool { | |
1368 // A tag could be a simple key. | |
1369 if !yaml_parser_save_simple_key(parser) { | |
1370 return false | |
1371 } | |
1372 | |
1373 // A simple key cannot follow a tag. | |
1374 parser.simple_key_allowed = false | |
1375 | |
1376 // Create the TAG token and append it to the queue. | |
1377 var token yaml_token_t | |
1378 if !yaml_parser_scan_tag(parser, &token) { | |
1379 return false | |
1380 } | |
1381 yaml_insert_token(parser, -1, &token) | |
1382 return true | |
1383 } | |
1384 | |
1385 // Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens. | |
1386 func yaml_parser_fetch_block_scalar(parser *yaml_parser_t, literal bool) bool { | |
1387 // Remove any potential simple keys. | |
1388 if !yaml_parser_remove_simple_key(parser) { | |
1389 return false | |
1390 } | |
1391 | |
1392 // A simple key may follow a block scalar. | |
1393 parser.simple_key_allowed = true | |
1394 | |
1395 // Create the SCALAR token and append it to the queue. | |
1396 var token yaml_token_t | |
1397 if !yaml_parser_scan_block_scalar(parser, &token, literal) { | |
1398 return false | |
1399 } | |
1400 yaml_insert_token(parser, -1, &token) | |
1401 return true | |
1402 } | |
1403 | |
1404 // Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens. | |
1405 func yaml_parser_fetch_flow_scalar(parser *yaml_parser_t, single bool) bool { | |
1406 // A plain scalar could be a simple key. | |
1407 if !yaml_parser_save_simple_key(parser) { | |
1408 return false | |
1409 } | |
1410 | |
1411 // A simple key cannot follow a flow scalar. | |
1412 parser.simple_key_allowed = false | |
1413 | |
1414 // Create the SCALAR token and append it to the queue. | |
1415 var token yaml_token_t | |
1416 if !yaml_parser_scan_flow_scalar(parser, &token, single) { | |
1417 return false | |
1418 } | |
1419 yaml_insert_token(parser, -1, &token) | |
1420 return true | |
1421 } | |
1422 | |
1423 // Produce the SCALAR(...,plain) token. | |
1424 func yaml_parser_fetch_plain_scalar(parser *yaml_parser_t) bool { | |
1425 // A plain scalar could be a simple key. | |
1426 if !yaml_parser_save_simple_key(parser) { | |
1427 return false | |
1428 } | |
1429 | |
1430 // A simple key cannot follow a flow scalar. | |
1431 parser.simple_key_allowed = false | |
1432 | |
1433 // Create the SCALAR token and append it to the queue. | |
1434 var token yaml_token_t | |
1435 if !yaml_parser_scan_plain_scalar(parser, &token) { | |
1436 return false | |
1437 } | |
1438 yaml_insert_token(parser, -1, &token) | |
1439 return true | |
1440 } | |
1441 | |
1442 // Eat whitespaces and comments until the next token is found. | |
1443 func yaml_parser_scan_to_next_token(parser *yaml_parser_t) bool { | |
1444 | |
1445 // Until the next token is not found. | |
1446 for { | |
1447 // Allow the BOM mark to start a line. | |
1448 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
1449 return false | |
1450 } | |
1451 if parser.mark.column == 0 && is_bom(parser.buffer, parser.buffer_pos) { | |
1452 skip(parser) | |
1453 } | |
1454 | |
1455 // Eat whitespaces. | |
1456 // Tabs are allowed: | |
1457 // - in the flow context | |
1458 // - in the block context, but not at the beginning of the line or | |
1459 // after '-', '?', or ':' (complex value). | |
1460 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
1461 return false | |
1462 } | |
1463 | |
1464 for parser.buffer[parser.buffer_pos] == ' ' || ((parser.flow_level > 0 || !parser.simple_key_allowed) && parser.buffer[parser.buffer_pos] == '\t') { | |
1465 skip(parser) | |
1466 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
1467 return false | |
1468 } | |
1469 } | |
1470 | |
1471 // Eat a comment until a line break. | |
1472 if parser.buffer[parser.buffer_pos] == '#' { | |
1473 for !is_breakz(parser.buffer, parser.buffer_pos) { | |
1474 skip(parser) | |
1475 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
1476 return false | |
1477 } | |
1478 } | |
1479 } | |
1480 | |
1481 // If it is a line break, eat it. | |
1482 if is_break(parser.buffer, parser.buffer_pos) { | |
1483 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { | |
1484 return false | |
1485 } | |
1486 skip_line(parser) | |
1487 | |
1488 // In the block context, a new line may start a simple key. | |
1489 if parser.flow_level == 0 { | |
1490 parser.simple_key_allowed = true | |
1491 } | |
1492 } else { | |
1493 break // We have found a token. | |
1494 } | |
1495 } | |
1496 | |
1497 return true | |
1498 } | |
1499 | |
1500 // Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token. | |
1501 // | |
1502 // Scope: | |
1503 // %YAML 1.1 # a comment \n | |
1504 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
1505 // %TAG !yaml! tag:yaml.org,2002: \n | |
1506 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
1507 // | |
1508 func yaml_parser_scan_directive(parser *yaml_parser_t, token *yaml_token_t) bool { | |
1509 // Eat '%'. | |
1510 start_mark := parser.mark | |
1511 skip(parser) | |
1512 | |
1513 // Scan the directive name. | |
1514 var name []byte | |
1515 if !yaml_parser_scan_directive_name(parser, start_mark, &name) { | |
1516 return false | |
1517 } | |
1518 | |
1519 // Is it a YAML directive? | |
1520 if bytes.Equal(name, []byte("YAML")) { | |
1521 // Scan the VERSION directive value. | |
1522 var major, minor int8 | |
1523 if !yaml_parser_scan_version_directive_value(parser, start_mark, &major, &minor) { | |
1524 return false | |
1525 } | |
1526 end_mark := parser.mark | |
1527 | |
1528 // Create a VERSION-DIRECTIVE token. | |
1529 *token = yaml_token_t{ | |
1530 typ: yaml_VERSION_DIRECTIVE_TOKEN, | |
1531 start_mark: start_mark, | |
1532 end_mark: end_mark, | |
1533 major: major, | |
1534 minor: minor, | |
1535 } | |
1536 | |
1537 // Is it a TAG directive? | |
1538 } else if bytes.Equal(name, []byte("TAG")) { | |
1539 // Scan the TAG directive value. | |
1540 var handle, prefix []byte | |
1541 if !yaml_parser_scan_tag_directive_value(parser, start_mark, &handle, &prefix) { | |
1542 return false | |
1543 } | |
1544 end_mark := parser.mark | |
1545 | |
1546 // Create a TAG-DIRECTIVE token. | |
1547 *token = yaml_token_t{ | |
1548 typ: yaml_TAG_DIRECTIVE_TOKEN, | |
1549 start_mark: start_mark, | |
1550 end_mark: end_mark, | |
1551 value: handle, | |
1552 prefix: prefix, | |
1553 } | |
1554 | |
1555 // Unknown directive. | |
1556 } else { | |
1557 yaml_parser_set_scanner_error(parser, "while scanning a directive", | |
1558 start_mark, "found unknown directive name") | |
1559 return false | |
1560 } | |
1561 | |
1562 // Eat the rest of the line including any comments. | |
1563 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
1564 return false | |
1565 } | |
1566 | |
1567 for is_blank(parser.buffer, parser.buffer_pos) { | |
1568 skip(parser) | |
1569 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
1570 return false | |
1571 } | |
1572 } | |
1573 | |
1574 if parser.buffer[parser.buffer_pos] == '#' { | |
1575 for !is_breakz(parser.buffer, parser.buffer_pos) { | |
1576 skip(parser) | |
1577 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
1578 return false | |
1579 } | |
1580 } | |
1581 } | |
1582 | |
1583 // Check if we are at the end of the line. | |
1584 if !is_breakz(parser.buffer, parser.buffer_pos) { | |
1585 yaml_parser_set_scanner_error(parser, "while scanning a directive", | |
1586 start_mark, "did not find expected comment or line break") | |
1587 return false | |
1588 } | |
1589 | |
1590 // Eat a line break. | |
1591 if is_break(parser.buffer, parser.buffer_pos) { | |
1592 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { | |
1593 return false | |
1594 } | |
1595 skip_line(parser) | |
1596 } | |
1597 | |
1598 return true | |
1599 } | |
1600 | |
1601 // Scan the directive name. | |
1602 // | |
1603 // Scope: | |
1604 // %YAML 1.1 # a comment \n | |
1605 // ^^^^ | |
1606 // %TAG !yaml! tag:yaml.org,2002: \n | |
1607 // ^^^ | |
1608 // | |
1609 func yaml_parser_scan_directive_name(parser *yaml_parser_t, start_mark yaml_mark_t, name *[]byte) bool { | |
1610 // Consume the directive name. | |
1611 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
1612 return false | |
1613 } | |
1614 | |
1615 var s []byte | |
1616 for is_alpha(parser.buffer, parser.buffer_pos) { | |
1617 s = read(parser, s) | |
1618 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
1619 return false | |
1620 } | |
1621 } | |
1622 | |
1623 // Check if the name is empty. | |
1624 if len(s) == 0 { | |
1625 yaml_parser_set_scanner_error(parser, "while scanning a directive", | |
1626 start_mark, "could not find expected directive name") | |
1627 return false | |
1628 } | |
1629 | |
1630 // Check for an blank character after the name. | |
1631 if !is_blankz(parser.buffer, parser.buffer_pos) { | |
1632 yaml_parser_set_scanner_error(parser, "while scanning a directive", | |
1633 start_mark, "found unexpected non-alphabetical character") | |
1634 return false | |
1635 } | |
1636 *name = s | |
1637 return true | |
1638 } | |
1639 | |
1640 // Scan the value of VERSION-DIRECTIVE. | |
1641 // | |
1642 // Scope: | |
1643 // %YAML 1.1 # a comment \n | |
1644 // ^^^^^^ | |
1645 func yaml_parser_scan_version_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, major, minor *int8) bool { | |
1646 // Eat whitespaces. | |
1647 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
1648 return false | |
1649 } | |
1650 for is_blank(parser.buffer, parser.buffer_pos) { | |
1651 skip(parser) | |
1652 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
1653 return false | |
1654 } | |
1655 } | |
1656 | |
1657 // Consume the major version number. | |
1658 if !yaml_parser_scan_version_directive_number(parser, start_mark, major) { | |
1659 return false | |
1660 } | |
1661 | |
1662 // Eat '.'. | |
1663 if parser.buffer[parser.buffer_pos] != '.' { | |
1664 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", | |
1665 start_mark, "did not find expected digit or '.' character") | |
1666 } | |
1667 | |
1668 skip(parser) | |
1669 | |
1670 // Consume the minor version number. | |
1671 if !yaml_parser_scan_version_directive_number(parser, start_mark, minor) { | |
1672 return false | |
1673 } | |
1674 return true | |
1675 } | |
1676 | |
1677 const max_number_length = 2 | |
1678 | |
1679 // Scan the version number of VERSION-DIRECTIVE. | |
1680 // | |
1681 // Scope: | |
1682 // %YAML 1.1 # a comment \n | |
1683 // ^ | |
1684 // %YAML 1.1 # a comment \n | |
1685 // ^ | |
1686 func yaml_parser_scan_version_directive_number(parser *yaml_parser_t, start_mark yaml_mark_t, number *int8) bool { | |
1687 | |
1688 // Repeat while the next character is digit. | |
1689 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
1690 return false | |
1691 } | |
1692 var value, length int8 | |
1693 for is_digit(parser.buffer, parser.buffer_pos) { | |
1694 // Check if the number is too long. | |
1695 length++ | |
1696 if length > max_number_length { | |
1697 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", | |
1698 start_mark, "found extremely long version number") | |
1699 } | |
1700 value = value*10 + int8(as_digit(parser.buffer, parser.buffer_pos)) | |
1701 skip(parser) | |
1702 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
1703 return false | |
1704 } | |
1705 } | |
1706 | |
1707 // Check if the number was present. | |
1708 if length == 0 { | |
1709 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", | |
1710 start_mark, "did not find expected version number") | |
1711 } | |
1712 *number = value | |
1713 return true | |
1714 } | |
1715 | |
1716 // Scan the value of a TAG-DIRECTIVE token. | |
1717 // | |
1718 // Scope: | |
1719 // %TAG !yaml! tag:yaml.org,2002: \n | |
1720 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
1721 // | |
1722 func yaml_parser_scan_tag_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, handle, prefix *[]byte) bool { | |
1723 var handle_value, prefix_value []byte | |
1724 | |
1725 // Eat whitespaces. | |
1726 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
1727 return false | |
1728 } | |
1729 | |
1730 for is_blank(parser.buffer, parser.buffer_pos) { | |
1731 skip(parser) | |
1732 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
1733 return false | |
1734 } | |
1735 } | |
1736 | |
1737 // Scan a handle. | |
1738 if !yaml_parser_scan_tag_handle(parser, true, start_mark, &handle_value) { | |
1739 return false | |
1740 } | |
1741 | |
1742 // Expect a whitespace. | |
1743 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
1744 return false | |
1745 } | |
1746 if !is_blank(parser.buffer, parser.buffer_pos) { | |
1747 yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", | |
1748 start_mark, "did not find expected whitespace") | |
1749 return false | |
1750 } | |
1751 | |
1752 // Eat whitespaces. | |
1753 for is_blank(parser.buffer, parser.buffer_pos) { | |
1754 skip(parser) | |
1755 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
1756 return false | |
1757 } | |
1758 } | |
1759 | |
1760 // Scan a prefix. | |
1761 if !yaml_parser_scan_tag_uri(parser, true, nil, start_mark, &prefix_value) { | |
1762 return false | |
1763 } | |
1764 | |
1765 // Expect a whitespace or line break. | |
1766 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
1767 return false | |
1768 } | |
1769 if !is_blankz(parser.buffer, parser.buffer_pos) { | |
1770 yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", | |
1771 start_mark, "did not find expected whitespace or line break") | |
1772 return false | |
1773 } | |
1774 | |
1775 *handle = handle_value | |
1776 *prefix = prefix_value | |
1777 return true | |
1778 } | |
1779 | |
1780 func yaml_parser_scan_anchor(parser *yaml_parser_t, token *yaml_token_t, typ yaml_token_type_t) bool { | |
1781 var s []byte | |
1782 | |
1783 // Eat the indicator character. | |
1784 start_mark := parser.mark | |
1785 skip(parser) | |
1786 | |
1787 // Consume the value. | |
1788 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
1789 return false | |
1790 } | |
1791 | |
1792 for is_alpha(parser.buffer, parser.buffer_pos) { | |
1793 s = read(parser, s) | |
1794 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
1795 return false | |
1796 } | |
1797 } | |
1798 | |
1799 end_mark := parser.mark | |
1800 | |
1801 /* | |
1802 * Check if length of the anchor is greater than 0 and it is followed by | |
1803 * a whitespace character or one of the indicators: | |
1804 * | |
1805 * '?', ':', ',', ']', '}', '%', '@', '`'. | |
1806 */ | |
1807 | |
1808 if len(s) == 0 || | |
1809 !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '?' || | |
1810 parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == ',' || | |
1811 parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '}' || | |
1812 parser.buffer[parser.buffer_pos] == '%' || parser.buffer[parser.buffer_pos] == '@' || | |
1813 parser.buffer[parser.buffer_pos] == '`') { | |
1814 context := "while scanning an alias" | |
1815 if typ == yaml_ANCHOR_TOKEN { | |
1816 context = "while scanning an anchor" | |
1817 } | |
1818 yaml_parser_set_scanner_error(parser, context, start_mark, | |
1819 "did not find expected alphabetic or numeric character") | |
1820 return false | |
1821 } | |
1822 | |
1823 // Create a token. | |
1824 *token = yaml_token_t{ | |
1825 typ: typ, | |
1826 start_mark: start_mark, | |
1827 end_mark: end_mark, | |
1828 value: s, | |
1829 } | |
1830 | |
1831 return true | |
1832 } | |
1833 | |
1834 /* | |
1835 * Scan a TAG token. | |
1836 */ | |
1837 | |
1838 func yaml_parser_scan_tag(parser *yaml_parser_t, token *yaml_token_t) bool { | |
1839 var handle, suffix []byte | |
1840 | |
1841 start_mark := parser.mark | |
1842 | |
1843 // Check if the tag is in the canonical form. | |
1844 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { | |
1845 return false | |
1846 } | |
1847 | |
1848 if parser.buffer[parser.buffer_pos+1] == '<' { | |
1849 // Keep the handle as '' | |
1850 | |
1851 // Eat '!<' | |
1852 skip(parser) | |
1853 skip(parser) | |
1854 | |
1855 // Consume the tag value. | |
1856 if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) { | |
1857 return false | |
1858 } | |
1859 | |
1860 // Check for '>' and eat it. | |
1861 if parser.buffer[parser.buffer_pos] != '>' { | |
1862 yaml_parser_set_scanner_error(parser, "while scanning a tag", | |
1863 start_mark, "did not find the expected '>'") | |
1864 return false | |
1865 } | |
1866 | |
1867 skip(parser) | |
1868 } else { | |
1869 // The tag has either the '!suffix' or the '!handle!suffix' form. | |
1870 | |
1871 // First, try to scan a handle. | |
1872 if !yaml_parser_scan_tag_handle(parser, false, start_mark, &handle) { | |
1873 return false | |
1874 } | |
1875 | |
1876 // Check if it is, indeed, handle. | |
1877 if handle[0] == '!' && len(handle) > 1 && handle[len(handle)-1] == '!' { | |
1878 // Scan the suffix now. | |
1879 if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) { | |
1880 return false | |
1881 } | |
1882 } else { | |
1883 // It wasn't a handle after all. Scan the rest of the tag. | |
1884 if !yaml_parser_scan_tag_uri(parser, false, handle, start_mark, &suffix) { | |
1885 return false | |
1886 } | |
1887 | |
1888 // Set the handle to '!'. | |
1889 handle = []byte{'!'} | |
1890 | |
1891 // A special case: the '!' tag. Set the handle to '' and the | |
1892 // suffix to '!'. | |
1893 if len(suffix) == 0 { | |
1894 handle, suffix = suffix, handle | |
1895 } | |
1896 } | |
1897 } | |
1898 | |
1899 // Check the character which ends the tag. | |
1900 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
1901 return false | |
1902 } | |
1903 if !is_blankz(parser.buffer, parser.buffer_pos) { | |
1904 yaml_parser_set_scanner_error(parser, "while scanning a tag", | |
1905 start_mark, "did not find expected whitespace or line break") | |
1906 return false | |
1907 } | |
1908 | |
1909 end_mark := parser.mark | |
1910 | |
1911 // Create a token. | |
1912 *token = yaml_token_t{ | |
1913 typ: yaml_TAG_TOKEN, | |
1914 start_mark: start_mark, | |
1915 end_mark: end_mark, | |
1916 value: handle, | |
1917 suffix: suffix, | |
1918 } | |
1919 return true | |
1920 } | |
1921 | |
1922 // Scan a tag handle. | |
1923 func yaml_parser_scan_tag_handle(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, handle *[]byte) bool { | |
1924 // Check the initial '!' character. | |
1925 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
1926 return false | |
1927 } | |
1928 if parser.buffer[parser.buffer_pos] != '!' { | |
1929 yaml_parser_set_scanner_tag_error(parser, directive, | |
1930 start_mark, "did not find expected '!'") | |
1931 return false | |
1932 } | |
1933 | |
1934 var s []byte | |
1935 | |
1936 // Copy the '!' character. | |
1937 s = read(parser, s) | |
1938 | |
1939 // Copy all subsequent alphabetical and numerical characters. | |
1940 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
1941 return false | |
1942 } | |
1943 for is_alpha(parser.buffer, parser.buffer_pos) { | |
1944 s = read(parser, s) | |
1945 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
1946 return false | |
1947 } | |
1948 } | |
1949 | |
1950 // Check if the trailing character is '!' and copy it. | |
1951 if parser.buffer[parser.buffer_pos] == '!' { | |
1952 s = read(parser, s) | |
1953 } else { | |
1954 // It's either the '!' tag or not really a tag handle. If it's a %TAG | |
1955 // directive, it's an error. If it's a tag token, it must be a part of URI. | |
1956 if directive && string(s) != "!" { | |
1957 yaml_parser_set_scanner_tag_error(parser, directive, | |
1958 start_mark, "did not find expected '!'") | |
1959 return false | |
1960 } | |
1961 } | |
1962 | |
1963 *handle = s | |
1964 return true | |
1965 } | |
1966 | |
1967 // Scan a tag. | |
1968 func yaml_parser_scan_tag_uri(parser *yaml_parser_t, directive bool, head []byte, start_mark yaml_mark_t, uri *[]byte) bool { | |
1969 //size_t length = head ? strlen((char *)head) : 0 | |
1970 var s []byte | |
1971 hasTag := len(head) > 0 | |
1972 | |
1973 // Copy the head if needed. | |
1974 // | |
1975 // Note that we don't copy the leading '!' character. | |
1976 if len(head) > 1 { | |
1977 s = append(s, head[1:]...) | |
1978 } | |
1979 | |
1980 // Scan the tag. | |
1981 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
1982 return false | |
1983 } | |
1984 | |
1985 // The set of characters that may appear in URI is as follows: | |
1986 // | |
1987 // '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&', | |
1988 // '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']', | |
1989 // '%'. | |
1990 // [Go] Convert this into more reasonable logic. | |
1991 for is_alpha(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == ';' || | |
1992 parser.buffer[parser.buffer_pos] == '/' || parser.buffer[parser.buffer_pos] == '?' || | |
1993 parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == '@' || | |
1994 parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '=' || | |
1995 parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '$' || | |
1996 parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '.' || | |
1997 parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '~' || | |
1998 parser.buffer[parser.buffer_pos] == '*' || parser.buffer[parser.buffer_pos] == '\'' || | |
1999 parser.buffer[parser.buffer_pos] == '(' || parser.buffer[parser.buffer_pos] == ')' || | |
2000 parser.buffer[parser.buffer_pos] == '[' || parser.buffer[parser.buffer_pos] == ']' || | |
2001 parser.buffer[parser.buffer_pos] == '%' { | |
2002 // Check if it is a URI-escape sequence. | |
2003 if parser.buffer[parser.buffer_pos] == '%' { | |
2004 if !yaml_parser_scan_uri_escapes(parser, directive, start_mark, &s) { | |
2005 return false | |
2006 } | |
2007 } else { | |
2008 s = read(parser, s) | |
2009 } | |
2010 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
2011 return false | |
2012 } | |
2013 hasTag = true | |
2014 } | |
2015 | |
2016 if !hasTag { | |
2017 yaml_parser_set_scanner_tag_error(parser, directive, | |
2018 start_mark, "did not find expected tag URI") | |
2019 return false | |
2020 } | |
2021 *uri = s | |
2022 return true | |
2023 } | |
2024 | |
2025 // Decode an URI-escape sequence corresponding to a single UTF-8 character. | |
2026 func yaml_parser_scan_uri_escapes(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, s *[]byte) bool { | |
2027 | |
2028 // Decode the required number of characters. | |
2029 w := 1024 | |
2030 for w > 0 { | |
2031 // Check for a URI-escaped octet. | |
2032 if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) { | |
2033 return false | |
2034 } | |
2035 | |
2036 if !(parser.buffer[parser.buffer_pos] == '%' && | |
2037 is_hex(parser.buffer, parser.buffer_pos+1) && | |
2038 is_hex(parser.buffer, parser.buffer_pos+2)) { | |
2039 return yaml_parser_set_scanner_tag_error(parser, directive, | |
2040 start_mark, "did not find URI escaped octet") | |
2041 } | |
2042 | |
2043 // Get the octet. | |
2044 octet := byte((as_hex(parser.buffer, parser.buffer_pos+1) << 4) + as_hex(parser.buffer, parser.buffer_pos+2)) | |
2045 | |
2046 // If it is the leading octet, determine the length of the UTF-8 sequence. | |
2047 if w == 1024 { | |
2048 w = width(octet) | |
2049 if w == 0 { | |
2050 return yaml_parser_set_scanner_tag_error(parser, directive, | |
2051 start_mark, "found an incorrect leading UTF-8 octet") | |
2052 } | |
2053 } else { | |
2054 // Check if the trailing octet is correct. | |
2055 if octet&0xC0 != 0x80 { | |
2056 return yaml_parser_set_scanner_tag_error(parser, directive, | |
2057 start_mark, "found an incorrect trailing UTF-8 octet") | |
2058 } | |
2059 } | |
2060 | |
2061 // Copy the octet and move the pointers. | |
2062 *s = append(*s, octet) | |
2063 skip(parser) | |
2064 skip(parser) | |
2065 skip(parser) | |
2066 w-- | |
2067 } | |
2068 return true | |
2069 } | |
2070 | |
2071 // Scan a block scalar. | |
2072 func yaml_parser_scan_block_scalar(parser *yaml_parser_t, token *yaml_token_t, literal bool) bool { | |
2073 // Eat the indicator '|' or '>'. | |
2074 start_mark := parser.mark | |
2075 skip(parser) | |
2076 | |
2077 // Scan the additional block scalar indicators. | |
2078 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
2079 return false | |
2080 } | |
2081 | |
2082 // Check for a chomping indicator. | |
2083 var chomping, increment int | |
2084 if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' { | |
2085 // Set the chomping method and eat the indicator. | |
2086 if parser.buffer[parser.buffer_pos] == '+' { | |
2087 chomping = +1 | |
2088 } else { | |
2089 chomping = -1 | |
2090 } | |
2091 skip(parser) | |
2092 | |
2093 // Check for an indentation indicator. | |
2094 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
2095 return false | |
2096 } | |
2097 if is_digit(parser.buffer, parser.buffer_pos) { | |
2098 // Check that the indentation is greater than 0. | |
2099 if parser.buffer[parser.buffer_pos] == '0' { | |
2100 yaml_parser_set_scanner_error(parser, "while scanning a block scalar", | |
2101 start_mark, "found an indentation indicator equal to 0") | |
2102 return false | |
2103 } | |
2104 | |
2105 // Get the indentation level and eat the indicator. | |
2106 increment = as_digit(parser.buffer, parser.buffer_pos) | |
2107 skip(parser) | |
2108 } | |
2109 | |
2110 } else if is_digit(parser.buffer, parser.buffer_pos) { | |
2111 // Do the same as above, but in the opposite order. | |
2112 | |
2113 if parser.buffer[parser.buffer_pos] == '0' { | |
2114 yaml_parser_set_scanner_error(parser, "while scanning a block scalar", | |
2115 start_mark, "found an indentation indicator equal to 0") | |
2116 return false | |
2117 } | |
2118 increment = as_digit(parser.buffer, parser.buffer_pos) | |
2119 skip(parser) | |
2120 | |
2121 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
2122 return false | |
2123 } | |
2124 if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' { | |
2125 if parser.buffer[parser.buffer_pos] == '+' { | |
2126 chomping = +1 | |
2127 } else { | |
2128 chomping = -1 | |
2129 } | |
2130 skip(parser) | |
2131 } | |
2132 } | |
2133 | |
2134 // Eat whitespaces and comments to the end of the line. | |
2135 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
2136 return false | |
2137 } | |
2138 for is_blank(parser.buffer, parser.buffer_pos) { | |
2139 skip(parser) | |
2140 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
2141 return false | |
2142 } | |
2143 } | |
2144 if parser.buffer[parser.buffer_pos] == '#' { | |
2145 for !is_breakz(parser.buffer, parser.buffer_pos) { | |
2146 skip(parser) | |
2147 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
2148 return false | |
2149 } | |
2150 } | |
2151 } | |
2152 | |
2153 // Check if we are at the end of the line. | |
2154 if !is_breakz(parser.buffer, parser.buffer_pos) { | |
2155 yaml_parser_set_scanner_error(parser, "while scanning a block scalar", | |
2156 start_mark, "did not find expected comment or line break") | |
2157 return false | |
2158 } | |
2159 | |
2160 // Eat a line break. | |
2161 if is_break(parser.buffer, parser.buffer_pos) { | |
2162 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { | |
2163 return false | |
2164 } | |
2165 skip_line(parser) | |
2166 } | |
2167 | |
2168 end_mark := parser.mark | |
2169 | |
2170 // Set the indentation level if it was specified. | |
2171 var indent int | |
2172 if increment > 0 { | |
2173 if parser.indent >= 0 { | |
2174 indent = parser.indent + increment | |
2175 } else { | |
2176 indent = increment | |
2177 } | |
2178 } | |
2179 | |
2180 // Scan the leading line breaks and determine the indentation level if needed. | |
2181 var s, leading_break, trailing_breaks []byte | |
2182 if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) { | |
2183 return false | |
2184 } | |
2185 | |
2186 // Scan the block scalar content. | |
2187 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
2188 return false | |
2189 } | |
2190 var leading_blank, trailing_blank bool | |
2191 for parser.mark.column == indent && !is_z(parser.buffer, parser.buffer_pos) { | |
2192 // We are at the beginning of a non-empty line. | |
2193 | |
2194 // Is it a trailing whitespace? | |
2195 trailing_blank = is_blank(parser.buffer, parser.buffer_pos) | |
2196 | |
2197 // Check if we need to fold the leading line break. | |
2198 if !literal && !leading_blank && !trailing_blank && len(leading_break) > 0 && leading_break[0] == '\n' { | |
2199 // Do we need to join the lines by space? | |
2200 if len(trailing_breaks) == 0 { | |
2201 s = append(s, ' ') | |
2202 } | |
2203 } else { | |
2204 s = append(s, leading_break...) | |
2205 } | |
2206 leading_break = leading_break[:0] | |
2207 | |
2208 // Append the remaining line breaks. | |
2209 s = append(s, trailing_breaks...) | |
2210 trailing_breaks = trailing_breaks[:0] | |
2211 | |
2212 // Is it a leading whitespace? | |
2213 leading_blank = is_blank(parser.buffer, parser.buffer_pos) | |
2214 | |
2215 // Consume the current line. | |
2216 for !is_breakz(parser.buffer, parser.buffer_pos) { | |
2217 s = read(parser, s) | |
2218 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
2219 return false | |
2220 } | |
2221 } | |
2222 | |
2223 // Consume the line break. | |
2224 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { | |
2225 return false | |
2226 } | |
2227 | |
2228 leading_break = read_line(parser, leading_break) | |
2229 | |
2230 // Eat the following indentation spaces and line breaks. | |
2231 if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) { | |
2232 return false | |
2233 } | |
2234 } | |
2235 | |
2236 // Chomp the tail. | |
2237 if chomping != -1 { | |
2238 s = append(s, leading_break...) | |
2239 } | |
2240 if chomping == 1 { | |
2241 s = append(s, trailing_breaks...) | |
2242 } | |
2243 | |
2244 // Create a token. | |
2245 *token = yaml_token_t{ | |
2246 typ: yaml_SCALAR_TOKEN, | |
2247 start_mark: start_mark, | |
2248 end_mark: end_mark, | |
2249 value: s, | |
2250 style: yaml_LITERAL_SCALAR_STYLE, | |
2251 } | |
2252 if !literal { | |
2253 token.style = yaml_FOLDED_SCALAR_STYLE | |
2254 } | |
2255 return true | |
2256 } | |
2257 | |
2258 // Scan indentation spaces and line breaks for a block scalar. Determine the | |
2259 // indentation level if needed. | |
2260 func yaml_parser_scan_block_scalar_breaks(parser *yaml_parser_t, indent *int, breaks *[]byte, start_mark yaml_mark_t, end_mark *yaml_mark_t) bool { | |
2261 *end_mark = parser.mark | |
2262 | |
2263 // Eat the indentation spaces and line breaks. | |
2264 max_indent := 0 | |
2265 for { | |
2266 // Eat the indentation spaces. | |
2267 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
2268 return false | |
2269 } | |
2270 for (*indent == 0 || parser.mark.column < *indent) && is_space(parser.buffer, parser.buffer_pos) { | |
2271 skip(parser) | |
2272 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
2273 return false | |
2274 } | |
2275 } | |
2276 if parser.mark.column > max_indent { | |
2277 max_indent = parser.mark.column | |
2278 } | |
2279 | |
2280 // Check for a tab character messing the indentation. | |
2281 if (*indent == 0 || parser.mark.column < *indent) && is_tab(parser.buffer, parser.buffer_pos) { | |
2282 return yaml_parser_set_scanner_error(parser, "while scanning a block scalar", | |
2283 start_mark, "found a tab character where an indentation space is expected") | |
2284 } | |
2285 | |
2286 // Have we found a non-empty line? | |
2287 if !is_break(parser.buffer, parser.buffer_pos) { | |
2288 break | |
2289 } | |
2290 | |
2291 // Consume the line break. | |
2292 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { | |
2293 return false | |
2294 } | |
2295 // [Go] Should really be returning breaks instead. | |
2296 *breaks = read_line(parser, *breaks) | |
2297 *end_mark = parser.mark | |
2298 } | |
2299 | |
2300 // Determine the indentation level if needed. | |
2301 if *indent == 0 { | |
2302 *indent = max_indent | |
2303 if *indent < parser.indent+1 { | |
2304 *indent = parser.indent + 1 | |
2305 } | |
2306 if *indent < 1 { | |
2307 *indent = 1 | |
2308 } | |
2309 } | |
2310 return true | |
2311 } | |
2312 | |
2313 // Scan a quoted scalar. | |
2314 func yaml_parser_scan_flow_scalar(parser *yaml_parser_t, token *yaml_token_t, single bool) bool { | |
2315 // Eat the left quote. | |
2316 start_mark := parser.mark | |
2317 skip(parser) | |
2318 | |
2319 // Consume the content of the quoted scalar. | |
2320 var s, leading_break, trailing_breaks, whitespaces []byte | |
2321 for { | |
2322 // Check that there are no document indicators at the beginning of the line. | |
2323 if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) { | |
2324 return false | |
2325 } | |
2326 | |
2327 if parser.mark.column == 0 && | |
2328 ((parser.buffer[parser.buffer_pos+0] == '-' && | |
2329 parser.buffer[parser.buffer_pos+1] == '-' && | |
2330 parser.buffer[parser.buffer_pos+2] == '-') || | |
2331 (parser.buffer[parser.buffer_pos+0] == '.' && | |
2332 parser.buffer[parser.buffer_pos+1] == '.' && | |
2333 parser.buffer[parser.buffer_pos+2] == '.')) && | |
2334 is_blankz(parser.buffer, parser.buffer_pos+3) { | |
2335 yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", | |
2336 start_mark, "found unexpected document indicator") | |
2337 return false | |
2338 } | |
2339 | |
2340 // Check for EOF. | |
2341 if is_z(parser.buffer, parser.buffer_pos) { | |
2342 yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", | |
2343 start_mark, "found unexpected end of stream") | |
2344 return false | |
2345 } | |
2346 | |
2347 // Consume non-blank characters. | |
2348 leading_blanks := false | |
2349 for !is_blankz(parser.buffer, parser.buffer_pos) { | |
2350 if single && parser.buffer[parser.buffer_pos] == '\'' && parser.buffer[parser.buffer_pos+1] == '\'' { | |
2351 // Is is an escaped single quote. | |
2352 s = append(s, '\'') | |
2353 skip(parser) | |
2354 skip(parser) | |
2355 | |
2356 } else if single && parser.buffer[parser.buffer_pos] == '\'' { | |
2357 // It is a right single quote. | |
2358 break | |
2359 } else if !single && parser.buffer[parser.buffer_pos] == '"' { | |
2360 // It is a right double quote. | |
2361 break | |
2362 | |
2363 } else if !single && parser.buffer[parser.buffer_pos] == '\\' && is_break(parser.buffer, parser.buffer_pos+1) { | |
2364 // It is an escaped line break. | |
2365 if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) { | |
2366 return false | |
2367 } | |
2368 skip(parser) | |
2369 skip_line(parser) | |
2370 leading_blanks = true | |
2371 break | |
2372 | |
2373 } else if !single && parser.buffer[parser.buffer_pos] == '\\' { | |
2374 // It is an escape sequence. | |
2375 code_length := 0 | |
2376 | |
2377 // Check the escape character. | |
2378 switch parser.buffer[parser.buffer_pos+1] { | |
2379 case '0': | |
2380 s = append(s, 0) | |
2381 case 'a': | |
2382 s = append(s, '\x07') | |
2383 case 'b': | |
2384 s = append(s, '\x08') | |
2385 case 't', '\t': | |
2386 s = append(s, '\x09') | |
2387 case 'n': | |
2388 s = append(s, '\x0A') | |
2389 case 'v': | |
2390 s = append(s, '\x0B') | |
2391 case 'f': | |
2392 s = append(s, '\x0C') | |
2393 case 'r': | |
2394 s = append(s, '\x0D') | |
2395 case 'e': | |
2396 s = append(s, '\x1B') | |
2397 case ' ': | |
2398 s = append(s, '\x20') | |
2399 case '"': | |
2400 s = append(s, '"') | |
2401 case '\'': | |
2402 s = append(s, '\'') | |
2403 case '\\': | |
2404 s = append(s, '\\') | |
2405 case 'N': // NEL (#x85) | |
2406 s = append(s, '\xC2') | |
2407 s = append(s, '\x85') | |
2408 case '_': // #xA0 | |
2409 s = append(s, '\xC2') | |
2410 s = append(s, '\xA0') | |
2411 case 'L': // LS (#x2028) | |
2412 s = append(s, '\xE2') | |
2413 s = append(s, '\x80') | |
2414 s = append(s, '\xA8') | |
2415 case 'P': // PS (#x2029) | |
2416 s = append(s, '\xE2') | |
2417 s = append(s, '\x80') | |
2418 s = append(s, '\xA9') | |
2419 case 'x': | |
2420 code_length = 2 | |
2421 case 'u': | |
2422 code_length = 4 | |
2423 case 'U': | |
2424 code_length = 8 | |
2425 default: | |
2426 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", | |
2427 start_mark, "found unknown escape character") | |
2428 return false | |
2429 } | |
2430 | |
2431 skip(parser) | |
2432 skip(parser) | |
2433 | |
2434 // Consume an arbitrary escape code. | |
2435 if code_length > 0 { | |
2436 var value int | |
2437 | |
2438 // Scan the character value. | |
2439 if parser.unread < code_length && !yaml_parser_update_buffer(parser, code_length) { | |
2440 return false | |
2441 } | |
2442 for k := 0; k < code_length; k++ { | |
2443 if !is_hex(parser.buffer, parser.buffer_pos+k) { | |
2444 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", | |
2445 start_mark, "did not find expected hexdecimal number") | |
2446 return false | |
2447 } | |
2448 value = (value << 4) + as_hex(parser.buffer, parser.buffer_pos+k) | |
2449 } | |
2450 | |
2451 // Check the value and write the character. | |
2452 if (value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF { | |
2453 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", | |
2454 start_mark, "found invalid Unicode character escape code") | |
2455 return false | |
2456 } | |
2457 if value <= 0x7F { | |
2458 s = append(s, byte(value)) | |
2459 } else if value <= 0x7FF { | |
2460 s = append(s, byte(0xC0+(value>>6))) | |
2461 s = append(s, byte(0x80+(value&0x3F))) | |
2462 } else if value <= 0xFFFF { | |
2463 s = append(s, byte(0xE0+(value>>12))) | |
2464 s = append(s, byte(0x80+((value>>6)&0x3F))) | |
2465 s = append(s, byte(0x80+(value&0x3F))) | |
2466 } else { | |
2467 s = append(s, byte(0xF0+(value>>18))) | |
2468 s = append(s, byte(0x80+((value>>12)&0x3F))) | |
2469 s = append(s, byte(0x80+((value>>6)&0x3F))) | |
2470 s = append(s, byte(0x80+(value&0x3F))) | |
2471 } | |
2472 | |
2473 // Advance the pointer. | |
2474 for k := 0; k < code_length; k++ { | |
2475 skip(parser) | |
2476 } | |
2477 } | |
2478 } else { | |
2479 // It is a non-escaped non-blank character. | |
2480 s = read(parser, s) | |
2481 } | |
2482 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { | |
2483 return false | |
2484 } | |
2485 } | |
2486 | |
2487 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
2488 return false | |
2489 } | |
2490 | |
2491 // Check if we are at the end of the scalar. | |
2492 if single { | |
2493 if parser.buffer[parser.buffer_pos] == '\'' { | |
2494 break | |
2495 } | |
2496 } else { | |
2497 if parser.buffer[parser.buffer_pos] == '"' { | |
2498 break | |
2499 } | |
2500 } | |
2501 | |
2502 // Consume blank characters. | |
2503 for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) { | |
2504 if is_blank(parser.buffer, parser.buffer_pos) { | |
2505 // Consume a space or a tab character. | |
2506 if !leading_blanks { | |
2507 whitespaces = read(parser, whitespaces) | |
2508 } else { | |
2509 skip(parser) | |
2510 } | |
2511 } else { | |
2512 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { | |
2513 return false | |
2514 } | |
2515 | |
2516 // Check if it is a first line break. | |
2517 if !leading_blanks { | |
2518 whitespaces = whitespaces[:0] | |
2519 leading_break = read_line(parser, leading_break) | |
2520 leading_blanks = true | |
2521 } else { | |
2522 trailing_breaks = read_line(parser, trailing_breaks) | |
2523 } | |
2524 } | |
2525 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
2526 return false | |
2527 } | |
2528 } | |
2529 | |
2530 // Join the whitespaces or fold line breaks. | |
2531 if leading_blanks { | |
2532 // Do we need to fold line breaks? | |
2533 if len(leading_break) > 0 && leading_break[0] == '\n' { | |
2534 if len(trailing_breaks) == 0 { | |
2535 s = append(s, ' ') | |
2536 } else { | |
2537 s = append(s, trailing_breaks...) | |
2538 } | |
2539 } else { | |
2540 s = append(s, leading_break...) | |
2541 s = append(s, trailing_breaks...) | |
2542 } | |
2543 trailing_breaks = trailing_breaks[:0] | |
2544 leading_break = leading_break[:0] | |
2545 } else { | |
2546 s = append(s, whitespaces...) | |
2547 whitespaces = whitespaces[:0] | |
2548 } | |
2549 } | |
2550 | |
2551 // Eat the right quote. | |
2552 skip(parser) | |
2553 end_mark := parser.mark | |
2554 | |
2555 // Create a token. | |
2556 *token = yaml_token_t{ | |
2557 typ: yaml_SCALAR_TOKEN, | |
2558 start_mark: start_mark, | |
2559 end_mark: end_mark, | |
2560 value: s, | |
2561 style: yaml_SINGLE_QUOTED_SCALAR_STYLE, | |
2562 } | |
2563 if !single { | |
2564 token.style = yaml_DOUBLE_QUOTED_SCALAR_STYLE | |
2565 } | |
2566 return true | |
2567 } | |
2568 | |
2569 // Scan a plain scalar. | |
2570 func yaml_parser_scan_plain_scalar(parser *yaml_parser_t, token *yaml_token_t) bool { | |
2571 | |
2572 var s, leading_break, trailing_breaks, whitespaces []byte | |
2573 var leading_blanks bool | |
2574 var indent = parser.indent + 1 | |
2575 | |
2576 start_mark := parser.mark | |
2577 end_mark := parser.mark | |
2578 | |
2579 // Consume the content of the plain scalar. | |
2580 for { | |
2581 // Check for a document indicator. | |
2582 if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) { | |
2583 return false | |
2584 } | |
2585 if parser.mark.column == 0 && | |
2586 ((parser.buffer[parser.buffer_pos+0] == '-' && | |
2587 parser.buffer[parser.buffer_pos+1] == '-' && | |
2588 parser.buffer[parser.buffer_pos+2] == '-') || | |
2589 (parser.buffer[parser.buffer_pos+0] == '.' && | |
2590 parser.buffer[parser.buffer_pos+1] == '.' && | |
2591 parser.buffer[parser.buffer_pos+2] == '.')) && | |
2592 is_blankz(parser.buffer, parser.buffer_pos+3) { | |
2593 break | |
2594 } | |
2595 | |
2596 // Check for a comment. | |
2597 if parser.buffer[parser.buffer_pos] == '#' { | |
2598 break | |
2599 } | |
2600 | |
2601 // Consume non-blank characters. | |
2602 for !is_blankz(parser.buffer, parser.buffer_pos) { | |
2603 | |
2604 // Check for indicators that may end a plain scalar. | |
2605 if (parser.buffer[parser.buffer_pos] == ':' && is_blankz(parser.buffer, parser.buffer_pos+1)) || | |
2606 (parser.flow_level > 0 && | |
2607 (parser.buffer[parser.buffer_pos] == ',' || | |
2608 parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == '[' || | |
2609 parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' || | |
2610 parser.buffer[parser.buffer_pos] == '}')) { | |
2611 break | |
2612 } | |
2613 | |
2614 // Check if we need to join whitespaces and breaks. | |
2615 if leading_blanks || len(whitespaces) > 0 { | |
2616 if leading_blanks { | |
2617 // Do we need to fold line breaks? | |
2618 if leading_break[0] == '\n' { | |
2619 if len(trailing_breaks) == 0 { | |
2620 s = append(s, ' ') | |
2621 } else { | |
2622 s = append(s, trailing_breaks...) | |
2623 } | |
2624 } else { | |
2625 s = append(s, leading_break...) | |
2626 s = append(s, trailing_breaks...) | |
2627 } | |
2628 trailing_breaks = trailing_breaks[:0] | |
2629 leading_break = leading_break[:0] | |
2630 leading_blanks = false | |
2631 } else { | |
2632 s = append(s, whitespaces...) | |
2633 whitespaces = whitespaces[:0] | |
2634 } | |
2635 } | |
2636 | |
2637 // Copy the character. | |
2638 s = read(parser, s) | |
2639 | |
2640 end_mark = parser.mark | |
2641 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { | |
2642 return false | |
2643 } | |
2644 } | |
2645 | |
2646 // Is it the end? | |
2647 if !(is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos)) { | |
2648 break | |
2649 } | |
2650 | |
2651 // Consume blank characters. | |
2652 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
2653 return false | |
2654 } | |
2655 | |
2656 for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) { | |
2657 if is_blank(parser.buffer, parser.buffer_pos) { | |
2658 | |
2659 // Check for tab characters that abuse indentation. | |
2660 if leading_blanks && parser.mark.column < indent && is_tab(parser.buffer, parser.buffer_pos) { | |
2661 yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", | |
2662 start_mark, "found a tab character that violates indentation") | |
2663 return false | |
2664 } | |
2665 | |
2666 // Consume a space or a tab character. | |
2667 if !leading_blanks { | |
2668 whitespaces = read(parser, whitespaces) | |
2669 } else { | |
2670 skip(parser) | |
2671 } | |
2672 } else { | |
2673 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { | |
2674 return false | |
2675 } | |
2676 | |
2677 // Check if it is a first line break. | |
2678 if !leading_blanks { | |
2679 whitespaces = whitespaces[:0] | |
2680 leading_break = read_line(parser, leading_break) | |
2681 leading_blanks = true | |
2682 } else { | |
2683 trailing_breaks = read_line(parser, trailing_breaks) | |
2684 } | |
2685 } | |
2686 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { | |
2687 return false | |
2688 } | |
2689 } | |
2690 | |
2691 // Check indentation level. | |
2692 if parser.flow_level == 0 && parser.mark.column < indent { | |
2693 break | |
2694 } | |
2695 } | |
2696 | |
2697 // Create a token. | |
2698 *token = yaml_token_t{ | |
2699 typ: yaml_SCALAR_TOKEN, | |
2700 start_mark: start_mark, | |
2701 end_mark: end_mark, | |
2702 value: s, | |
2703 style: yaml_PLAIN_SCALAR_STYLE, | |
2704 } | |
2705 | |
2706 // Note that we change the 'simple_key_allowed' flag. | |
2707 if leading_blanks { | |
2708 parser.simple_key_allowed = true | |
2709 } | |
2710 return true | |
2711 } |