Mercurial > yakumo_izuru > aya
comparison vendor/github.com/eknkc/amber/parser/scanner.go @ 66:787b5ee0289d draft
Use vendored modules
Signed-off-by: Izuru Yakumo <yakumo.izuru@chaotic.ninja>
author | yakumo.izuru |
---|---|
date | Sun, 23 Jul 2023 13:18:53 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
65:6d985efa0f7a | 66:787b5ee0289d |
---|---|
1 package parser | |
2 | |
3 import ( | |
4 "bufio" | |
5 "container/list" | |
6 "fmt" | |
7 "io" | |
8 "regexp" | |
9 ) | |
10 | |
11 const ( | |
12 tokEOF = -(iota + 1) | |
13 tokDoctype | |
14 tokComment | |
15 tokIndent | |
16 tokOutdent | |
17 tokBlank | |
18 tokId | |
19 tokClassName | |
20 tokTag | |
21 tokText | |
22 tokAttribute | |
23 tokIf | |
24 tokElse | |
25 tokEach | |
26 tokAssignment | |
27 tokImport | |
28 tokNamedBlock | |
29 tokExtends | |
30 tokMixin | |
31 tokMixinCall | |
32 ) | |
33 | |
34 const ( | |
35 scnNewLine = iota | |
36 scnLine | |
37 scnEOF | |
38 ) | |
39 | |
40 type scanner struct { | |
41 reader *bufio.Reader | |
42 indentStack *list.List | |
43 stash *list.List | |
44 | |
45 state int32 | |
46 buffer string | |
47 | |
48 line int | |
49 col int | |
50 lastTokenLine int | |
51 lastTokenCol int | |
52 lastTokenSize int | |
53 | |
54 readRaw bool | |
55 } | |
56 | |
57 type token struct { | |
58 Kind rune | |
59 Value string | |
60 Data map[string]string | |
61 } | |
62 | |
63 func newScanner(r io.Reader) *scanner { | |
64 s := new(scanner) | |
65 s.reader = bufio.NewReader(r) | |
66 s.indentStack = list.New() | |
67 s.stash = list.New() | |
68 s.state = scnNewLine | |
69 s.line = -1 | |
70 s.col = 0 | |
71 | |
72 return s | |
73 } | |
74 | |
75 func (s *scanner) Pos() SourcePosition { | |
76 return SourcePosition{s.lastTokenLine + 1, s.lastTokenCol + 1, s.lastTokenSize, ""} | |
77 } | |
78 | |
79 // Returns next token found in buffer | |
80 func (s *scanner) Next() *token { | |
81 if s.readRaw { | |
82 s.readRaw = false | |
83 return s.NextRaw() | |
84 } | |
85 | |
86 s.ensureBuffer() | |
87 | |
88 if stashed := s.stash.Front(); stashed != nil { | |
89 tok := stashed.Value.(*token) | |
90 s.stash.Remove(stashed) | |
91 return tok | |
92 } | |
93 | |
94 switch s.state { | |
95 case scnEOF: | |
96 if outdent := s.indentStack.Back(); outdent != nil { | |
97 s.indentStack.Remove(outdent) | |
98 return &token{tokOutdent, "", nil} | |
99 } | |
100 | |
101 return &token{tokEOF, "", nil} | |
102 case scnNewLine: | |
103 s.state = scnLine | |
104 | |
105 if tok := s.scanIndent(); tok != nil { | |
106 return tok | |
107 } | |
108 | |
109 return s.Next() | |
110 case scnLine: | |
111 if tok := s.scanMixin(); tok != nil { | |
112 return tok | |
113 } | |
114 | |
115 if tok := s.scanMixinCall(); tok != nil { | |
116 return tok | |
117 } | |
118 | |
119 if tok := s.scanDoctype(); tok != nil { | |
120 return tok | |
121 } | |
122 | |
123 if tok := s.scanCondition(); tok != nil { | |
124 return tok | |
125 } | |
126 | |
127 if tok := s.scanEach(); tok != nil { | |
128 return tok | |
129 } | |
130 | |
131 if tok := s.scanImport(); tok != nil { | |
132 return tok | |
133 } | |
134 | |
135 if tok := s.scanExtends(); tok != nil { | |
136 return tok | |
137 } | |
138 | |
139 if tok := s.scanBlock(); tok != nil { | |
140 return tok | |
141 } | |
142 | |
143 if tok := s.scanAssignment(); tok != nil { | |
144 return tok | |
145 } | |
146 | |
147 if tok := s.scanTag(); tok != nil { | |
148 return tok | |
149 } | |
150 | |
151 if tok := s.scanId(); tok != nil { | |
152 return tok | |
153 } | |
154 | |
155 if tok := s.scanClassName(); tok != nil { | |
156 return tok | |
157 } | |
158 | |
159 if tok := s.scanAttribute(); tok != nil { | |
160 return tok | |
161 } | |
162 | |
163 if tok := s.scanComment(); tok != nil { | |
164 return tok | |
165 } | |
166 | |
167 if tok := s.scanText(); tok != nil { | |
168 return tok | |
169 } | |
170 } | |
171 | |
172 return nil | |
173 } | |
174 | |
175 func (s *scanner) NextRaw() *token { | |
176 result := "" | |
177 level := 0 | |
178 | |
179 for { | |
180 s.ensureBuffer() | |
181 | |
182 switch s.state { | |
183 case scnEOF: | |
184 return &token{tokText, result, map[string]string{"Mode": "raw"}} | |
185 case scnNewLine: | |
186 s.state = scnLine | |
187 | |
188 if tok := s.scanIndent(); tok != nil { | |
189 if tok.Kind == tokIndent { | |
190 level++ | |
191 } else if tok.Kind == tokOutdent { | |
192 level-- | |
193 } else { | |
194 result = result + "\n" | |
195 continue | |
196 } | |
197 | |
198 if level < 0 { | |
199 s.stash.PushBack(&token{tokOutdent, "", nil}) | |
200 | |
201 if len(result) > 0 && result[len(result)-1] == '\n' { | |
202 result = result[:len(result)-1] | |
203 } | |
204 | |
205 return &token{tokText, result, map[string]string{"Mode": "raw"}} | |
206 } | |
207 } | |
208 case scnLine: | |
209 if len(result) > 0 { | |
210 result = result + "\n" | |
211 } | |
212 for i := 0; i < level; i++ { | |
213 result += "\t" | |
214 } | |
215 result = result + s.buffer | |
216 s.consume(len(s.buffer)) | |
217 } | |
218 } | |
219 | |
220 return nil | |
221 } | |
222 | |
223 var rgxIndent = regexp.MustCompile(`^(\s+)`) | |
224 | |
225 func (s *scanner) scanIndent() *token { | |
226 if len(s.buffer) == 0 { | |
227 return &token{tokBlank, "", nil} | |
228 } | |
229 | |
230 var head *list.Element | |
231 for head = s.indentStack.Front(); head != nil; head = head.Next() { | |
232 value := head.Value.(*regexp.Regexp) | |
233 | |
234 if match := value.FindString(s.buffer); len(match) != 0 { | |
235 s.consume(len(match)) | |
236 } else { | |
237 break | |
238 } | |
239 } | |
240 | |
241 newIndent := rgxIndent.FindString(s.buffer) | |
242 | |
243 if len(newIndent) != 0 && head == nil { | |
244 s.indentStack.PushBack(regexp.MustCompile(regexp.QuoteMeta(newIndent))) | |
245 s.consume(len(newIndent)) | |
246 return &token{tokIndent, newIndent, nil} | |
247 } | |
248 | |
249 if len(newIndent) == 0 && head != nil { | |
250 for head != nil { | |
251 next := head.Next() | |
252 s.indentStack.Remove(head) | |
253 if next == nil { | |
254 return &token{tokOutdent, "", nil} | |
255 } else { | |
256 s.stash.PushBack(&token{tokOutdent, "", nil}) | |
257 } | |
258 head = next | |
259 } | |
260 } | |
261 | |
262 if len(newIndent) != 0 && head != nil { | |
263 panic("Mismatching indentation. Please use a coherent indent schema.") | |
264 } | |
265 | |
266 return nil | |
267 } | |
268 | |
269 var rgxDoctype = regexp.MustCompile(`^(!!!|doctype)\s*(.*)`) | |
270 | |
271 func (s *scanner) scanDoctype() *token { | |
272 if sm := rgxDoctype.FindStringSubmatch(s.buffer); len(sm) != 0 { | |
273 if len(sm[2]) == 0 { | |
274 sm[2] = "html" | |
275 } | |
276 | |
277 s.consume(len(sm[0])) | |
278 return &token{tokDoctype, sm[2], nil} | |
279 } | |
280 | |
281 return nil | |
282 } | |
283 | |
284 var rgxIf = regexp.MustCompile(`^if\s+(.+)$`) | |
285 var rgxElse = regexp.MustCompile(`^else\s*`) | |
286 | |
287 func (s *scanner) scanCondition() *token { | |
288 if sm := rgxIf.FindStringSubmatch(s.buffer); len(sm) != 0 { | |
289 s.consume(len(sm[0])) | |
290 return &token{tokIf, sm[1], nil} | |
291 } | |
292 | |
293 if sm := rgxElse.FindStringSubmatch(s.buffer); len(sm) != 0 { | |
294 s.consume(len(sm[0])) | |
295 return &token{tokElse, "", nil} | |
296 } | |
297 | |
298 return nil | |
299 } | |
300 | |
301 var rgxEach = regexp.MustCompile(`^each\s+(\$[\w0-9\-_]*)(?:\s*,\s*(\$[\w0-9\-_]*))?\s+in\s+(.+)$`) | |
302 | |
303 func (s *scanner) scanEach() *token { | |
304 if sm := rgxEach.FindStringSubmatch(s.buffer); len(sm) != 0 { | |
305 s.consume(len(sm[0])) | |
306 return &token{tokEach, sm[3], map[string]string{"X": sm[1], "Y": sm[2]}} | |
307 } | |
308 | |
309 return nil | |
310 } | |
311 | |
312 var rgxAssignment = regexp.MustCompile(`^(\$[\w0-9\-_]*)?\s*=\s*(.+)$`) | |
313 | |
314 func (s *scanner) scanAssignment() *token { | |
315 if sm := rgxAssignment.FindStringSubmatch(s.buffer); len(sm) != 0 { | |
316 s.consume(len(sm[0])) | |
317 return &token{tokAssignment, sm[2], map[string]string{"X": sm[1]}} | |
318 } | |
319 | |
320 return nil | |
321 } | |
322 | |
323 var rgxComment = regexp.MustCompile(`^\/\/(-)?\s*(.*)$`) | |
324 | |
325 func (s *scanner) scanComment() *token { | |
326 if sm := rgxComment.FindStringSubmatch(s.buffer); len(sm) != 0 { | |
327 mode := "embed" | |
328 if len(sm[1]) != 0 { | |
329 mode = "silent" | |
330 } | |
331 | |
332 s.consume(len(sm[0])) | |
333 return &token{tokComment, sm[2], map[string]string{"Mode": mode}} | |
334 } | |
335 | |
336 return nil | |
337 } | |
338 | |
339 var rgxId = regexp.MustCompile(`^#([\w-]+)(?:\s*\?\s*(.*)$)?`) | |
340 | |
341 func (s *scanner) scanId() *token { | |
342 if sm := rgxId.FindStringSubmatch(s.buffer); len(sm) != 0 { | |
343 s.consume(len(sm[0])) | |
344 return &token{tokId, sm[1], map[string]string{"Condition": sm[2]}} | |
345 } | |
346 | |
347 return nil | |
348 } | |
349 | |
350 var rgxClassName = regexp.MustCompile(`^\.([\w-]+)(?:\s*\?\s*(.*)$)?`) | |
351 | |
352 func (s *scanner) scanClassName() *token { | |
353 if sm := rgxClassName.FindStringSubmatch(s.buffer); len(sm) != 0 { | |
354 s.consume(len(sm[0])) | |
355 return &token{tokClassName, sm[1], map[string]string{"Condition": sm[2]}} | |
356 } | |
357 | |
358 return nil | |
359 } | |
360 | |
361 var rgxAttribute = regexp.MustCompile(`^\[([\w\-:@\.]+)\s*(?:=\s*(\"([^\"\\]*)\"|([^\]]+)))?\](?:\s*\?\s*(.*)$)?`) | |
362 | |
363 func (s *scanner) scanAttribute() *token { | |
364 if sm := rgxAttribute.FindStringSubmatch(s.buffer); len(sm) != 0 { | |
365 s.consume(len(sm[0])) | |
366 | |
367 if len(sm[3]) != 0 || sm[2] == "" { | |
368 return &token{tokAttribute, sm[1], map[string]string{"Content": sm[3], "Mode": "raw", "Condition": sm[5]}} | |
369 } | |
370 | |
371 return &token{tokAttribute, sm[1], map[string]string{"Content": sm[4], "Mode": "expression", "Condition": sm[5]}} | |
372 } | |
373 | |
374 return nil | |
375 } | |
376 | |
377 var rgxImport = regexp.MustCompile(`^import\s+([0-9a-zA-Z_\-\. \/]*)$`) | |
378 | |
379 func (s *scanner) scanImport() *token { | |
380 if sm := rgxImport.FindStringSubmatch(s.buffer); len(sm) != 0 { | |
381 s.consume(len(sm[0])) | |
382 return &token{tokImport, sm[1], nil} | |
383 } | |
384 | |
385 return nil | |
386 } | |
387 | |
388 var rgxExtends = regexp.MustCompile(`^extends\s+([0-9a-zA-Z_\-\. \/]*)$`) | |
389 | |
390 func (s *scanner) scanExtends() *token { | |
391 if sm := rgxExtends.FindStringSubmatch(s.buffer); len(sm) != 0 { | |
392 s.consume(len(sm[0])) | |
393 return &token{tokExtends, sm[1], nil} | |
394 } | |
395 | |
396 return nil | |
397 } | |
398 | |
399 var rgxBlock = regexp.MustCompile(`^block\s+(?:(append|prepend)\s+)?([0-9a-zA-Z_\-\. \/]*)$`) | |
400 | |
401 func (s *scanner) scanBlock() *token { | |
402 if sm := rgxBlock.FindStringSubmatch(s.buffer); len(sm) != 0 { | |
403 s.consume(len(sm[0])) | |
404 return &token{tokNamedBlock, sm[2], map[string]string{"Modifier": sm[1]}} | |
405 } | |
406 | |
407 return nil | |
408 } | |
409 | |
410 var rgxTag = regexp.MustCompile(`^(\w[-:\w]*)`) | |
411 | |
412 func (s *scanner) scanTag() *token { | |
413 if sm := rgxTag.FindStringSubmatch(s.buffer); len(sm) != 0 { | |
414 s.consume(len(sm[0])) | |
415 return &token{tokTag, sm[1], nil} | |
416 } | |
417 | |
418 return nil | |
419 } | |
420 | |
421 var rgxMixin = regexp.MustCompile(`^mixin ([a-zA-Z_-]+\w*)(\(((\$\w*(,\s)?)*)\))?$`) | |
422 | |
423 func (s *scanner) scanMixin() *token { | |
424 if sm := rgxMixin.FindStringSubmatch(s.buffer); len(sm) != 0 { | |
425 s.consume(len(sm[0])) | |
426 return &token{tokMixin, sm[1], map[string]string{"Args": sm[3]}} | |
427 } | |
428 | |
429 return nil | |
430 } | |
431 | |
432 var rgxMixinCall = regexp.MustCompile(`^\+([A-Za-z_-]+\w*)(\((.+(,\s)?)*\))?$`) | |
433 | |
434 func (s *scanner) scanMixinCall() *token { | |
435 if sm := rgxMixinCall.FindStringSubmatch(s.buffer); len(sm) != 0 { | |
436 s.consume(len(sm[0])) | |
437 return &token{tokMixinCall, sm[1], map[string]string{"Args": sm[3]}} | |
438 } | |
439 | |
440 return nil | |
441 } | |
442 | |
443 var rgxText = regexp.MustCompile(`^(\|)? ?(.*)$`) | |
444 | |
445 func (s *scanner) scanText() *token { | |
446 if sm := rgxText.FindStringSubmatch(s.buffer); len(sm) != 0 { | |
447 s.consume(len(sm[0])) | |
448 | |
449 mode := "inline" | |
450 if sm[1] == "|" { | |
451 mode = "piped" | |
452 } | |
453 | |
454 return &token{tokText, sm[2], map[string]string{"Mode": mode}} | |
455 } | |
456 | |
457 return nil | |
458 } | |
459 | |
460 // Moves position forward, and removes beginning of s.buffer (len bytes) | |
461 func (s *scanner) consume(runes int) { | |
462 if len(s.buffer) < runes { | |
463 panic(fmt.Sprintf("Unable to consume %d runes from buffer.", runes)) | |
464 } | |
465 | |
466 s.lastTokenLine = s.line | |
467 s.lastTokenCol = s.col | |
468 s.lastTokenSize = runes | |
469 | |
470 s.buffer = s.buffer[runes:] | |
471 s.col += runes | |
472 } | |
473 | |
474 // Reads string into s.buffer | |
475 func (s *scanner) ensureBuffer() { | |
476 if len(s.buffer) > 0 { | |
477 return | |
478 } | |
479 | |
480 buf, err := s.reader.ReadString('\n') | |
481 | |
482 if err != nil && err != io.EOF { | |
483 panic(err) | |
484 } else if err != nil && len(buf) == 0 { | |
485 s.state = scnEOF | |
486 } else { | |
487 // endline "LF only" or "\n" use Unix, Linux, modern MacOS X, FreeBSD, BeOS, RISC OS | |
488 if buf[len(buf)-1] == '\n' { | |
489 buf = buf[:len(buf)-1] | |
490 } | |
491 // endline "CR+LF" or "\r\n" use internet protocols, DEC RT-11, Windows, CP/M, MS-DOS, OS/2, Symbian OS | |
492 if len(buf) > 0 && buf[len(buf)-1] == '\r' { | |
493 buf = buf[:len(buf)-1] | |
494 } | |
495 | |
496 s.state = scnNewLine | |
497 s.buffer = buf | |
498 s.line += 1 | |
499 s.col = 0 | |
500 } | |
501 } |