Mercurial > yakumo_izuru > aya
comparison vendor/github.com/alecthomas/chroma/v2/lexers/raku.go @ 66:787b5ee0289d draft
Use vendored modules
Signed-off-by: Izuru Yakumo <yakumo.izuru@chaotic.ninja>
author | yakumo.izuru |
---|---|
date | Sun, 23 Jul 2023 13:18:53 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
65:6d985efa0f7a | 66:787b5ee0289d |
---|---|
1 package lexers | |
2 | |
3 import ( | |
4 "regexp" | |
5 "strings" | |
6 "unicode/utf8" | |
7 | |
8 "github.com/dlclark/regexp2" | |
9 | |
10 . "github.com/alecthomas/chroma/v2" // nolint | |
11 ) | |
12 | |
13 // Raku lexer. | |
14 var Raku Lexer = Register(MustNewLexer( | |
15 &Config{ | |
16 Name: "Raku", | |
17 Aliases: []string{"perl6", "pl6", "raku"}, | |
18 Filenames: []string{ | |
19 "*.pl", "*.pm", "*.nqp", "*.p6", "*.6pl", "*.p6l", "*.pl6", "*.6pm", | |
20 "*.p6m", "*.pm6", "*.t", "*.raku", "*.rakumod", "*.rakutest", "*.rakudoc", | |
21 }, | |
22 MimeTypes: []string{ | |
23 "text/x-perl6", "application/x-perl6", | |
24 "text/x-raku", "application/x-raku", | |
25 }, | |
26 DotAll: true, | |
27 }, | |
28 rakuRules, | |
29 )) | |
30 | |
31 func rakuRules() Rules { | |
32 type RakuToken int | |
33 | |
34 const ( | |
35 rakuQuote RakuToken = iota | |
36 rakuNameAttribute | |
37 rakuPod | |
38 rakuPodFormatter | |
39 rakuPodDeclaration | |
40 rakuMultilineComment | |
41 rakuMatchRegex | |
42 rakuSubstitutionRegex | |
43 ) | |
44 | |
45 const ( | |
46 colonPairOpeningBrackets = `(?:<<|<|«|\(|\[|\{)` | |
47 colonPairClosingBrackets = `(?:>>|>|»|\)|\]|\})` | |
48 colonPairPattern = `(?<!:)(?<colon>:)(?<key>\w[\w'-]*)(?<opening_delimiters>` + colonPairOpeningBrackets + `)` | |
49 colonPairLookahead = `(?=(:['\w-]+` + | |
50 colonPairOpeningBrackets + `.+?` + colonPairClosingBrackets + `)?` | |
51 namePattern = `(?:(?!` + colonPairPattern + `)(?:::|[\w':-]))+` | |
52 variablePattern = `[$@%&]+[.^:?=!~]?` + namePattern | |
53 globalVariablePattern = `[$@%&]+\*` + namePattern | |
54 ) | |
55 | |
56 keywords := []string{ | |
57 `BEGIN`, `CATCH`, `CHECK`, `CLOSE`, `CONTROL`, `DOC`, `END`, `ENTER`, `FIRST`, `INIT`, | |
58 `KEEP`, `LAST`, `LEAVE`, `NEXT`, `POST`, `PRE`, `QUIT`, `UNDO`, `anon`, `augment`, `but`, | |
59 `class`, `constant`, `default`, `does`, `else`, `elsif`, `enum`, `for`, `gather`, `given`, | |
60 `grammar`, `has`, `if`, `import`, `is`, `of`, `let`, `loop`, `made`, `make`, `method`, | |
61 `module`, `multi`, `my`, `need`, `orwith`, `our`, `proceed`, `proto`, `repeat`, `require`, | |
62 `where`, `return`, `return-rw`, `returns`, `->`, `-->`, `role`, `state`, `sub`, `no`, | |
63 `submethod`, `subset`, `succeed`, `supersede`, `try`, `unit`, `unless`, `until`, | |
64 `use`, `when`, `while`, `with`, `without`, `export`, `native`, `repr`, `required`, `rw`, | |
65 `symbol`, `default`, `cached`, `DEPRECATED`, `dynamic`, `hidden-from-backtrace`, `nodal`, | |
66 `pure`, `raw`, `start`, `react`, `supply`, `whenever`, `also`, `rule`, `token`, `regex`, | |
67 `dynamic-scope`, `built`, `temp`, | |
68 } | |
69 | |
70 keywordsPattern := Words(`(?<!['\w:-])`, `(?!['\w:-])`, keywords...) | |
71 | |
72 wordOperators := []string{ | |
73 `X`, `Z`, `R`, `after`, `and`, `andthen`, `before`, `cmp`, `div`, `eq`, `eqv`, `extra`, `ge`, | |
74 `gt`, `le`, `leg`, `lt`, `mod`, `ne`, `or`, `orelse`, `x`, `xor`, `xx`, `gcd`, `lcm`, | |
75 `but`, `min`, `max`, `^fff`, `fff^`, `fff`, `^ff`, `ff^`, `ff`, `so`, `not`, `unicmp`, | |
76 `TR`, `o`, `(&)`, `(.)`, `(|)`, `(+)`, `(-)`, `(^)`, `coll`, `(elem)`, `(==)`, | |
77 `(cont)`, `(<)`, `(<=)`, `(>)`, `(>=)`, `minmax`, `notandthen`, `S`, | |
78 } | |
79 | |
80 wordOperatorsPattern := Words(`(?<=^|\b|\s)`, `(?=$|\b|\s)`, wordOperators...) | |
81 | |
82 operators := []string{ | |
83 `++`, `--`, `-`, `**`, `!`, `+`, `~`, `?`, `+^`, `~^`, `?^`, `^`, `*`, `/`, `%`, `%%`, `+&`, | |
84 `+<`, `+>`, `~&`, `~<`, `~>`, `?&`, `+|`, `+^`, `~|`, `~^`, `?`, `?|`, `?^`, `&`, `^`, | |
85 `<=>`, `^…^`, `^…`, `…^`, `…`, `...`, `...^`, `^...`, `^...^`, `..`, `..^`, `^..`, `^..^`, | |
86 `::=`, `:=`, `!=`, `==`, `<=`, `<`, `>=`, `>`, `~~`, `===`, `&&`, `||`, `|`, `^^`, `//`, | |
87 `??`, `!!`, `^fff^`, `^ff^`, `<==`, `==>`, `<<==`, `==>>`, `=>`, `=`, `<<`, `«`, `>>`, `»`, | |
88 `,`, `>>.`, `».`, `.&`, `.=`, `.^`, `.?`, `.+`, `.*`, `.`, `∘`, `∩`, `⊍`, `∪`, `⊎`, `∖`, | |
89 `⊖`, `≠`, `≤`, `≥`, `=:=`, `=~=`, `≅`, `∈`, `∉`, `≡`, `≢`, `∋`, `∌`, `⊂`, `⊄`, `⊆`, `⊈`, | |
90 `⊃`, `⊅`, `⊇`, `⊉`, `:`, `!!!`, `???`, `¯`, `×`, `÷`, `−`, `⁺`, `⁻`, | |
91 } | |
92 | |
93 operatorsPattern := Words(``, ``, operators...) | |
94 | |
95 builtinTypes := []string{ | |
96 `False`, `True`, `Order`, `More`, `Less`, `Same`, `Any`, `Array`, `Associative`, `AST`, | |
97 `atomicint`, `Attribute`, `Backtrace`, `Backtrace::Frame`, `Bag`, `Baggy`, `BagHash`, | |
98 `Blob`, `Block`, `Bool`, `Buf`, `Callable`, `CallFrame`, `Cancellation`, `Capture`, | |
99 `CArray`, `Channel`, `Code`, `compiler`, `Complex`, `ComplexStr`, `CompUnit`, | |
100 `CompUnit::PrecompilationRepository`, `CompUnit::Repository`, `Empty`, | |
101 `CompUnit::Repository::FileSystem`, `CompUnit::Repository::Installation`, `Cool`, | |
102 `CurrentThreadScheduler`, `CX::Warn`, `CX::Take`, `CX::Succeed`, `CX::Return`, `CX::Redo`, | |
103 `CX::Proceed`, `CX::Next`, `CX::Last`, `CX::Emit`, `CX::Done`, `Cursor`, `Date`, `Dateish`, | |
104 `DateTime`, `Distribution`, `Distribution::Hash`, `Distribution::Locally`, | |
105 `Distribution::Path`, `Distribution::Resource`, `Distro`, `Duration`, `Encoding`, | |
106 `Encoding::GlobalLexerRegistry`, `Endian`, `Enumeration`, `Exception`, `Failure`, `FatRat`, `Grammar`, | |
107 `Hash`, `HyperWhatever`, `Instant`, `Int`, `int`, `int16`, `int32`, `int64`, `int8`, `str`, | |
108 `IntStr`, `IO`, `IO::ArgFiles`, `IO::CatHandle`, `IO::Handle`, `IO::Notification`, | |
109 `IO::Notification::Change`, `IO::Path`, `IO::Path::Cygwin`, `IO::Path::Parts`, | |
110 `IO::Path::QNX`, `IO::Path::Unix`, `IO::Path::Win32`, `IO::Pipe`, `IO::Socket`, | |
111 `IO::Socket::Async`, `IO::Socket::Async::ListenSocket`, `IO::Socket::INET`, `IO::Spec`, | |
112 `IO::Spec::Cygwin`, `IO::Spec::QNX`, `IO::Spec::Unix`, `IO::Spec::Win32`, `IO::Special`, | |
113 `Iterable`, `Iterator`, `Junction`, `Kernel`, `Label`, `List`, `Lock`, `Lock::Async`, | |
114 `Lock::ConditionVariable`, `long`, `longlong`, `Macro`, `Map`, `Match`, | |
115 `Metamodel::AttributeContainer`, `Metamodel::C3MRO`, `Metamodel::ClassHOW`, | |
116 `Metamodel::ConcreteRoleHOW`, `Metamodel::CurriedRoleHOW`, `Metamodel::DefiniteHOW`, | |
117 `Metamodel::Documenting`, `Metamodel::EnumHOW`, `Metamodel::Finalization`, | |
118 `Metamodel::MethodContainer`, `Metamodel::Mixins`, `Metamodel::MROBasedMethodDispatch`, | |
119 `Metamodel::MultipleInheritance`, `Metamodel::Naming`, `Metamodel::Primitives`, | |
120 `Metamodel::PrivateMethodContainer`, `Metamodel::RoleContainer`, `Metamodel::RolePunning`, | |
121 `Metamodel::Stashing`, `Metamodel::Trusting`, `Metamodel::Versioning`, `Method`, `Mix`, | |
122 `MixHash`, `Mixy`, `Mu`, `NFC`, `NFD`, `NFKC`, `NFKD`, `Nil`, `Num`, `num32`, `num64`, | |
123 `Numeric`, `NumStr`, `ObjAt`, `Order`, `Pair`, `Parameter`, `Perl`, `Pod::Block`, | |
124 `Pod::Block::Code`, `Pod::Block::Comment`, `Pod::Block::Declarator`, `Pod::Block::Named`, | |
125 `Pod::Block::Para`, `Pod::Block::Table`, `Pod::Heading`, `Pod::Item`, `Pointer`, | |
126 `Positional`, `PositionalBindFailover`, `Proc`, `Proc::Async`, `Promise`, `Proxy`, | |
127 `PseudoStash`, `QuantHash`, `RaceSeq`, `Raku`, `Range`, `Rat`, `Rational`, `RatStr`, | |
128 `Real`, `Regex`, `Routine`, `Routine::WrapHandle`, `Scalar`, `Scheduler`, `Semaphore`, | |
129 `Seq`, `Sequence`, `Set`, `SetHash`, `Setty`, `Signature`, `size_t`, `Slip`, `Stash`, | |
130 `Str`, `StrDistance`, `Stringy`, `Sub`, `Submethod`, `Supplier`, `Supplier::Preserving`, | |
131 `Supply`, `Systemic`, `Tap`, `Telemetry`, `Telemetry::Instrument::Thread`, | |
132 `Telemetry::Instrument::ThreadPool`, `Telemetry::Instrument::Usage`, `Telemetry::Period`, | |
133 `Telemetry::Sampler`, `Thread`, `Test`, `ThreadPoolScheduler`, `UInt`, `uint16`, `uint32`, | |
134 `uint64`, `uint8`, `Uni`, `utf8`, `ValueObjAt`, `Variable`, `Version`, `VM`, `Whatever`, | |
135 `WhateverCode`, `WrapHandle`, `NativeCall`, | |
136 // Pragmas | |
137 `precompilation`, `experimental`, `worries`, `MONKEY-TYPING`, `MONKEY-SEE-NO-EVAL`, | |
138 `MONKEY-GUTS`, `fatal`, `lib`, `isms`, `newline`, `nqp`, `soft`, | |
139 `strict`, `trace`, `variables`, | |
140 } | |
141 | |
142 builtinTypesPattern := Words(`(?<!['\w:-])`, `(?::[_UD])?(?!['\w:-])`, builtinTypes...) | |
143 | |
144 builtinRoutines := []string{ | |
145 `ACCEPTS`, `abs`, `abs2rel`, `absolute`, `accept`, `accepts_type`, `accessed`, `acos`, | |
146 `acosec`, `acosech`, `acosh`, `acotan`, `acotanh`, `acquire`, `act`, `action`, `actions`, | |
147 `add`, `add_attribute`, `add_enum_value`, `add_fallback`, `add_method`, `add_parent`, | |
148 `add_private_method`, `add_role`, `add_stash`, `add_trustee`, `addendum`, `adverb`, `after`, | |
149 `all`, `allocate`, `allof`, `allowed`, `alternative-names`, `annotations`, `antipair`, | |
150 `antipairs`, `any`, `anyof`, `api`, `app_lifetime`, `append`, `arch`, `archetypes`, | |
151 `archname`, `args`, `ARGS-TO-CAPTURE`, `arity`, `Array`, `asec`, `asech`, `asin`, `asinh`, | |
152 `ASSIGN-KEY`, `ASSIGN-POS`, `assuming`, `ast`, `at`, `atan`, `atan2`, `atanh`, `AT-KEY`, | |
153 `atomic-assign`, `atomic-dec-fetch`, `atomic-fetch`, `atomic-fetch-add`, `atomic-fetch-dec`, | |
154 `atomic-fetch-inc`, `atomic-fetch-sub`, `atomic-inc-fetch`, `AT-POS`, `attributes`, `auth`, | |
155 `await`, `backend`, `backtrace`, `Bag`, `bag`, `Baggy`, `BagHash`, `bail-out`, `base`, | |
156 `basename`, `base-repeating`, `base_type`, `batch`, `BIND-KEY`, `BIND-POS`, `bind-stderr`, | |
157 `bind-stdin`, `bind-stdout`, `bind-udp`, `bits`, `bless`, `block`, `Bool`, `bool-only`, | |
158 `bounds`, `break`, `Bridge`, `broken`, `BUILD`, `TWEAK`, `build-date`, `bytes`, `cache`, | |
159 `callframe`, `calling-package`, `CALL-ME`, `callsame`, `callwith`, `can`, `cancel`, | |
160 `candidates`, `cando`, `can-ok`, `canonpath`, `caps`, `caption`, `Capture`, `capture`, | |
161 `cas`, `catdir`, `categorize`, `categorize-list`, `catfile`, `catpath`, `cause`, `ceiling`, | |
162 `cglobal`, `changed`, `Channel`, `channel`, `chars`, `chdir`, `child`, `child-name`, | |
163 `child-typename`, `chmod`, `chomp`, `chop`, `chr`, `chrs`, `chunks`, `cis`, `classify`, | |
164 `classify-list`, `cleanup`, `clone`, `close`, `closed`, `close-stdin`, `cmp-ok`, `code`, | |
165 `codename`, `codes`, `coerce_type`, `coll`, `collate`, `column`, `comb`, `combinations`, | |
166 `command`, `comment`, `compiler`, `Complex`, `compose`, `composalizer`, `compose_type`, | |
167 `compose_values`, `composer`, `compute_mro`, `condition`, `config`, `configure_destroy`, | |
168 `configure_type_checking`, `conj`, `connect`, `constraints`, `construct`, `contains`, | |
169 `content`, `contents`, `copy`, `cos`, `cosec`, `cosech`, `cosh`, `cotan`, `cotanh`, `count`, | |
170 `count-only`, `cpu-cores`, `cpu-usage`, `CREATE`, `create_type`, `cross`, `cue`, `curdir`, | |
171 `curupdir`, `d`, `Date`, `DateTime`, `day`, `daycount`, `day-of-month`, `day-of-week`, | |
172 `day-of-year`, `days-in-month`, `dd-mm-yyyy`, `declaration`, `decode`, `decoder`, `deepmap`, | |
173 `default`, `defined`, `DEFINITE`, `definite`, `delayed`, `delete`, `delete-by-compiler`, | |
174 `DELETE-KEY`, `DELETE-POS`, `denominator`, `desc`, `DESTROY`, `destroyers`, `devnull`, | |
175 `diag`, `did-you-mean`, `die`, `dies-ok`, `dir`, `dirname`, `distribution`, `dir-sep`, | |
176 `DISTROnames`, `do`, `does`, `does-ok`, `done`, `done-testing`, `duckmap`, `dynamic`, `e`, | |
177 `eager`, `earlier`, `elems`, `emit`, `enclosing`, `encode`, `encoder`, `encoding`, `end`, | |
178 `endian`, `ends-with`, `enum_from_value`, `enum_value_list`, `enum_values`, `enums`, `EOF`, | |
179 `eof`, `EVAL`, `eval-dies-ok`, `EVALFILE`, `eval-lives-ok`, `event`, `exception`, | |
180 `excludes-max`, `excludes-min`, `EXISTS-KEY`, `EXISTS-POS`, `exit`, `exitcode`, `exp`, | |
181 `expected`, `explicitly-manage`, `expmod`, `export_callback`, `extension`, `f`, `fail`, | |
182 `FALLBACK`, `fails-like`, `fc`, `feature`, `file`, `filename`, `files`, `find`, | |
183 `find_method`, `find_method_qualified`, `finish`, `first`, `flat`, `first-date-in-month`, | |
184 `flatmap`, `flip`, `floor`, `flunk`, `flush`, `flush_cache`, `fmt`, `format`, `formatter`, | |
185 `free-memory`, `freeze`, `from`, `from-list`, `from-loop`, `from-posix`, `from-slurpy`, | |
186 `full`, `full-barrier`, `GENERATE-USAGE`, `generate_mixin`, `get`, `get_value`, `getc`, | |
187 `gist`, `got`, `grab`, `grabpairs`, `grep`, `handle`, `handled`, `handles`, `hardware`, | |
188 `has_accessor`, `Hash`, `hash`, `head`, `headers`, `hh-mm-ss`, `hidden`, `hides`, `hostname`, | |
189 `hour`, `how`, `hyper`, `id`, `illegal`, `im`, `in`, `in-timezone`, `indent`, `index`, | |
190 `indices`, `indir`, `infinite`, `infix`, `postcirumfix`, `cicumfix`, `install`, | |
191 `install_method_cache`, `Instant`, `instead`, `Int`, `int-bounds`, `interval`, `in-timezone`, | |
192 `invalid-str`, `invert`, `invocant`, `IO`, `IO::Notification.watch-path`, `is_trusted`, | |
193 `is_type`, `isa`, `is-absolute`, `isa-ok`, `is-approx`, `is-deeply`, `is-hidden`, | |
194 `is-initial-thread`, `is-int`, `is-lazy`, `is-leap-year`, `isNaN`, `isnt`, `is-prime`, | |
195 `is-relative`, `is-routine`, `is-setting`, `is-win`, `item`, `iterator`, `join`, `keep`, | |
196 `kept`, `KERNELnames`, `key`, `keyof`, `keys`, `kill`, `kv`, `kxxv`, `l`, `lang`, `last`, | |
197 `lastcall`, `later`, `lazy`, `lc`, `leading`, `level`, `like`, `line`, `lines`, `link`, | |
198 `List`, `list`, `listen`, `live`, `lives-ok`, `load`, `load-repo-id`, `load-unit`, `loaded`, | |
199 `loads`, `local`, `lock`, `log`, `log10`, `lookup`, `lsb`, `made`, `MAIN`, `make`, `Map`, | |
200 `map`, `match`, `max`, `maxpairs`, `merge`, `message`, `method`, `meta`, `method_table`, | |
201 `methods`, `migrate`, `min`, `minmax`, `minpairs`, `minute`, `misplaced`, `Mix`, `mix`, | |
202 `MixHash`, `mixin`, `mixin_attribute`, `Mixy`, `mkdir`, `mode`, `modified`, `month`, `move`, | |
203 `mro`, `msb`, `multi`, `multiness`, `name`, `named`, `named_names`, `narrow`, | |
204 `nativecast`, `native-descriptor`, `nativesizeof`, `need`, `new`, `new_type`, | |
205 `new-from-daycount`, `new-from-pairs`, `next`, `nextcallee`, `next-handle`, `nextsame`, | |
206 `nextwith`, `next-interesting-index`, `NFC`, `NFD`, `NFKC`, `NFKD`, `nice`, `nl-in`, | |
207 `nl-out`, `nodemap`, `nok`, `normalize`, `none`, `norm`, `not`, `note`, `now`, `nude`, | |
208 `Num`, `numerator`, `Numeric`, `of`, `offset`, `offset-in-hours`, `offset-in-minutes`, | |
209 `ok`, `old`, `on-close`, `one`, `on-switch`, `open`, `opened`, `operation`, `optional`, | |
210 `ord`, `ords`, `orig`, `os-error`, `osname`, `out-buffer`, `pack`, `package`, `package-kind`, | |
211 `package-name`, `packages`, `Pair`, `pair`, `pairs`, `pairup`, `parameter`, `params`, | |
212 `parent`, `parent-name`, `parents`, `parse`, `parse-base`, `parsefile`, `parse-names`, | |
213 `parts`, `pass`, `path`, `path-sep`, `payload`, `peer-host`, `peer-port`, `periods`, `perl`, | |
214 `permutations`, `phaser`, `pick`, `pickpairs`, `pid`, `placeholder`, `plan`, `plus`, | |
215 `polar`, `poll`, `polymod`, `pop`, `pos`, `positional`, `posix`, `postfix`, `postmatch`, | |
216 `precomp-ext`, `precomp-target`, `precompiled`, `pred`, `prefix`, `prematch`, `prepend`, | |
217 `primary`, `print`, `printf`, `print-nl`, `print-to`, `private`, `private_method_names`, | |
218 `private_method_table`, `proc`, `produce`, `Promise`, `promise`, `prompt`, `protect`, | |
219 `protect-or-queue-on-recursion`, `publish_method_cache`, `pull-one`, `push`, `push-all`, | |
220 `push-at-least`, `push-exactly`, `push-until-lazy`, `put`, `qualifier-type`, `quaternary`, | |
221 `quit`, `r`, `race`, `radix`, `raku`, `rand`, `Range`, `range`, `Rat`, `raw`, `re`, `read`, | |
222 `read-bits`, `read-int128`, `read-int16`, `read-int32`, `read-int64`, `read-int8`, | |
223 `read-num32`, `read-num64`, `read-ubits`, `read-uint128`, `read-uint16`, `read-uint32`, | |
224 `read-uint64`, `read-uint8`, `readchars`, `readonly`, `ready`, `Real`, `reallocate`, | |
225 `reals`, `reason`, `rebless`, `receive`, `recv`, `redispatcher`, `redo`, `reduce`, | |
226 `rel2abs`, `relative`, `release`, `remove`, `rename`, `repeated`, `replacement`, | |
227 `replace-with`, `repo`, `repo-id`, `report`, `required`, `reserved`, `resolve`, `restore`, | |
228 `result`, `resume`, `rethrow`, `return`, `return-rw`, `returns`, `reverse`, `right`, | |
229 `rindex`, `rmdir`, `role`, `roles_to_compose`, `rolish`, `roll`, `rootdir`, `roots`, | |
230 `rotate`, `rotor`, `round`, `roundrobin`, `routine-type`, `run`, `RUN-MAIN`, `rw`, `rwx`, | |
231 `samecase`, `samemark`, `samewith`, `say`, `schedule-on`, `scheduler`, `scope`, `sec`, | |
232 `sech`, `second`, `secondary`, `seek`, `self`, `send`, `Seq`, `Set`, `set`, `serial`, | |
233 `set_hidden`, `set_name`, `set_package`, `set_rw`, `set_value`, `set_api`, `set_auth`, | |
234 `set_composalizer`, `set_export_callback`, `set_is_mixin`, `set_mixin_attribute`, | |
235 `set_package`, `set_ver`, `set_why`, `SetHash`, `Setty`, `set-instruments`, | |
236 `setup_finalization`, `setup_mixin_cache`, `shape`, `share`, `shell`, `short-id`, | |
237 `short-name`, `shortname`, `shift`, `sibling`, `sigil`, `sign`, `signal`, `signals`, | |
238 `signature`, `sin`, `sinh`, `sink`, `sink-all`, `skip`, `skip-at-least`, | |
239 `skip-at-least-pull-one`, `skip-one`, `skip-rest`, `sleep`, `sleep-timer`, `sleep-until`, | |
240 `Slip`, `slip`, `slurp`, `slurp-rest`, `slurpy`, `snap`, `snapper`, `so`, `socket-host`, | |
241 `socket-port`, `sort`, `source`, `source-package`, `spawn`, `SPEC`, `splice`, `split`, | |
242 `splitdir`, `splitpath`, `sprintf`, `spurt`, `sqrt`, `squish`, `srand`, `stable`, `start`, | |
243 `started`, `starts-with`, `status`, `stderr`, `stdout`, `STORE`, `store-file`, | |
244 `store-repo-id`, `store-unit`, `Str`, `Stringy`, `sub_signature`, `subbuf`, `subbuf-rw`, | |
245 `subname`, `subparse`, `subst`, `subst-mutate`, `substr`, `substr-eq`, `substr-rw`, | |
246 `subtest`, `succ`, `sum`, `suffix`, `summary`, `Supply`, `symlink`, `T`, `t`, `tail`, | |
247 `take`, `take-rw`, `tan`, `tanh`, `tap`, `target`, `target-name`, `tc`, `tclc`, `tell`, | |
248 `term`, `tertiary`, `then`, `throttle`, `throw`, `throws-like`, `time`, `timezone`, | |
249 `tmpdir`, `to`, `today`, `todo`, `toggle`, `to-posix`, `total`, `total-memory`, `trailing`, | |
250 `trans`, `tree`, `trim`, `trim-leading`, `trim-trailing`, `truncate`, `truncated-to`, | |
251 `trusts`, `try_acquire`, `trying`, `twigil`, `type`, `type_captures`, `type_check`, | |
252 `typename`, `uc`, `udp`, `uncaught_handler`, `undefine`, `unimatch`, `unicmp`, `uniname`, | |
253 `uninames`, `uninstall`, `uniparse`, `uniprop`, `uniprops`, `unique`, `unival`, `univals`, | |
254 `unlike`, `unlink`, `unlock`, `unpack`, `unpolar`, `unset`, `unshift`, `unwrap`, `updir`, | |
255 `USAGE`, `usage-name`, `use-ok`, `utc`, `val`, `value`, `values`, `VAR`, `variable`, `ver`, | |
256 `verbose-config`, `Version`, `version`, `VMnames`, `volume`, `vow`, `w`, `wait`, `warn`, | |
257 `watch`, `watch-path`, `week`, `weekday-of-month`, `week-number`, `week-year`, `WHAT`, | |
258 `what`, `when`, `WHERE`, `WHEREFORE`, `WHICH`, `WHO`, `whole-second`, `WHY`, `why`, | |
259 `with-lock-hidden-from-recursion-check`, `wordcase`, `words`, `workaround`, `wrap`, | |
260 `write`, `write-bits`, `write-int128`, `write-int16`, `write-int32`, `write-int64`, | |
261 `write-int8`, `write-num32`, `write-num64`, `write-ubits`, `write-uint128`, `write-uint16`, | |
262 `write-uint32`, `write-uint64`, `write-uint8`, `write-to`, `x`, `yada`, `year`, `yield`, | |
263 `yyyy-mm-dd`, `z`, `zip`, `zip-latest`, `HOW`, `s`, `DEPRECATED`, `trait_mod`, | |
264 } | |
265 | |
266 builtinRoutinesPattern := Words(`(?<!['\w:-])`, `(?!['\w-])`, builtinRoutines...) | |
267 | |
268 // A map of opening and closing brackets | |
269 brackets := map[rune]rune{ | |
270 '\u0028': '\u0029', '\u003c': '\u003e', '\u005b': '\u005d', | |
271 '\u007b': '\u007d', '\u00ab': '\u00bb', '\u0f3a': '\u0f3b', | |
272 '\u0f3c': '\u0f3d', '\u169b': '\u169c', '\u2018': '\u2019', | |
273 '\u201a': '\u2019', '\u201b': '\u2019', '\u201c': '\u201d', | |
274 '\u201e': '\u201d', '\u201f': '\u201d', '\u2039': '\u203a', | |
275 '\u2045': '\u2046', '\u207d': '\u207e', '\u208d': '\u208e', | |
276 '\u2208': '\u220b', '\u2209': '\u220c', '\u220a': '\u220d', | |
277 '\u2215': '\u29f5', '\u223c': '\u223d', '\u2243': '\u22cd', | |
278 '\u2252': '\u2253', '\u2254': '\u2255', '\u2264': '\u2265', | |
279 '\u2266': '\u2267', '\u2268': '\u2269', '\u226a': '\u226b', | |
280 '\u226e': '\u226f', '\u2270': '\u2271', '\u2272': '\u2273', | |
281 '\u2274': '\u2275', '\u2276': '\u2277', '\u2278': '\u2279', | |
282 '\u227a': '\u227b', '\u227c': '\u227d', '\u227e': '\u227f', | |
283 '\u2280': '\u2281', '\u2282': '\u2283', '\u2284': '\u2285', | |
284 '\u2286': '\u2287', '\u2288': '\u2289', '\u228a': '\u228b', | |
285 '\u228f': '\u2290', '\u2291': '\u2292', '\u2298': '\u29b8', | |
286 '\u22a2': '\u22a3', '\u22a6': '\u2ade', '\u22a8': '\u2ae4', | |
287 '\u22a9': '\u2ae3', '\u22ab': '\u2ae5', '\u22b0': '\u22b1', | |
288 '\u22b2': '\u22b3', '\u22b4': '\u22b5', '\u22b6': '\u22b7', | |
289 '\u22c9': '\u22ca', '\u22cb': '\u22cc', '\u22d0': '\u22d1', | |
290 '\u22d6': '\u22d7', '\u22d8': '\u22d9', '\u22da': '\u22db', | |
291 '\u22dc': '\u22dd', '\u22de': '\u22df', '\u22e0': '\u22e1', | |
292 '\u22e2': '\u22e3', '\u22e4': '\u22e5', '\u22e6': '\u22e7', | |
293 '\u22e8': '\u22e9', '\u22ea': '\u22eb', '\u22ec': '\u22ed', | |
294 '\u22f0': '\u22f1', '\u22f2': '\u22fa', '\u22f3': '\u22fb', | |
295 '\u22f4': '\u22fc', '\u22f6': '\u22fd', '\u22f7': '\u22fe', | |
296 '\u2308': '\u2309', '\u230a': '\u230b', '\u2329': '\u232a', | |
297 '\u23b4': '\u23b5', '\u2768': '\u2769', '\u276a': '\u276b', | |
298 '\u276c': '\u276d', '\u276e': '\u276f', '\u2770': '\u2771', | |
299 '\u2772': '\u2773', '\u2774': '\u2775', '\u27c3': '\u27c4', | |
300 '\u27c5': '\u27c6', '\u27d5': '\u27d6', '\u27dd': '\u27de', | |
301 '\u27e2': '\u27e3', '\u27e4': '\u27e5', '\u27e6': '\u27e7', | |
302 '\u27e8': '\u27e9', '\u27ea': '\u27eb', '\u2983': '\u2984', | |
303 '\u2985': '\u2986', '\u2987': '\u2988', '\u2989': '\u298a', | |
304 '\u298b': '\u298c', '\u298d': '\u298e', '\u298f': '\u2990', | |
305 '\u2991': '\u2992', '\u2993': '\u2994', '\u2995': '\u2996', | |
306 '\u2997': '\u2998', '\u29c0': '\u29c1', '\u29c4': '\u29c5', | |
307 '\u29cf': '\u29d0', '\u29d1': '\u29d2', '\u29d4': '\u29d5', | |
308 '\u29d8': '\u29d9', '\u29da': '\u29db', '\u29f8': '\u29f9', | |
309 '\u29fc': '\u29fd', '\u2a2b': '\u2a2c', '\u2a2d': '\u2a2e', | |
310 '\u2a34': '\u2a35', '\u2a3c': '\u2a3d', '\u2a64': '\u2a65', | |
311 '\u2a79': '\u2a7a', '\u2a7d': '\u2a7e', '\u2a7f': '\u2a80', | |
312 '\u2a81': '\u2a82', '\u2a83': '\u2a84', '\u2a8b': '\u2a8c', | |
313 '\u2a91': '\u2a92', '\u2a93': '\u2a94', '\u2a95': '\u2a96', | |
314 '\u2a97': '\u2a98', '\u2a99': '\u2a9a', '\u2a9b': '\u2a9c', | |
315 '\u2aa1': '\u2aa2', '\u2aa6': '\u2aa7', '\u2aa8': '\u2aa9', | |
316 '\u2aaa': '\u2aab', '\u2aac': '\u2aad', '\u2aaf': '\u2ab0', | |
317 '\u2ab3': '\u2ab4', '\u2abb': '\u2abc', '\u2abd': '\u2abe', | |
318 '\u2abf': '\u2ac0', '\u2ac1': '\u2ac2', '\u2ac3': '\u2ac4', | |
319 '\u2ac5': '\u2ac6', '\u2acd': '\u2ace', '\u2acf': '\u2ad0', | |
320 '\u2ad1': '\u2ad2', '\u2ad3': '\u2ad4', '\u2ad5': '\u2ad6', | |
321 '\u2aec': '\u2aed', '\u2af7': '\u2af8', '\u2af9': '\u2afa', | |
322 '\u2e02': '\u2e03', '\u2e04': '\u2e05', '\u2e09': '\u2e0a', | |
323 '\u2e0c': '\u2e0d', '\u2e1c': '\u2e1d', '\u2e20': '\u2e21', | |
324 '\u3008': '\u3009', '\u300a': '\u300b', '\u300c': '\u300d', | |
325 '\u300e': '\u300f', '\u3010': '\u3011', '\u3014': '\u3015', | |
326 '\u3016': '\u3017', '\u3018': '\u3019', '\u301a': '\u301b', | |
327 '\u301d': '\u301e', '\ufd3e': '\ufd3f', '\ufe17': '\ufe18', | |
328 '\ufe35': '\ufe36', '\ufe37': '\ufe38', '\ufe39': '\ufe3a', | |
329 '\ufe3b': '\ufe3c', '\ufe3d': '\ufe3e', '\ufe3f': '\ufe40', | |
330 '\ufe41': '\ufe42', '\ufe43': '\ufe44', '\ufe47': '\ufe48', | |
331 '\ufe59': '\ufe5a', '\ufe5b': '\ufe5c', '\ufe5d': '\ufe5e', | |
332 '\uff08': '\uff09', '\uff1c': '\uff1e', '\uff3b': '\uff3d', | |
333 '\uff5b': '\uff5d', '\uff5f': '\uff60', '\uff62': '\uff63', | |
334 } | |
335 | |
336 bracketsPattern := `[` + regexp.QuoteMeta(joinRuneMap(brackets)) + `]` | |
337 | |
338 // Finds opening brackets and their closing counterparts (including pod and heredoc) | |
339 // and modifies state groups and position accordingly | |
340 findBrackets := func(tokenClass RakuToken) MutatorFunc { | |
341 return func(state *LexerState) error { | |
342 var openingChars []rune | |
343 var adverbs []rune | |
344 switch tokenClass { | |
345 case rakuPod: | |
346 openingChars = []rune(strings.Join(state.Groups[1:5], ``)) | |
347 default: | |
348 adverbs = []rune(state.NamedGroups[`adverbs`]) | |
349 openingChars = []rune(state.NamedGroups[`opening_delimiters`]) | |
350 } | |
351 | |
352 openingChar := openingChars[0] | |
353 | |
354 nChars := len(openingChars) | |
355 | |
356 var closingChar rune | |
357 var closingCharExists bool | |
358 var closingChars []rune | |
359 | |
360 switch tokenClass { | |
361 case rakuPod: | |
362 closingCharExists = true | |
363 default: | |
364 closingChar, closingCharExists = brackets[openingChar] | |
365 } | |
366 | |
367 switch tokenClass { | |
368 case rakuPodFormatter: | |
369 formatter := StringOther | |
370 | |
371 switch state.NamedGroups[`keyword`] { | |
372 case "B": | |
373 formatter = GenericStrong | |
374 case "I": | |
375 formatter = GenericEmph | |
376 case "U": | |
377 formatter = GenericUnderline | |
378 } | |
379 | |
380 formatterRule := ruleReplacingConfig{ | |
381 pattern: `.+?`, | |
382 tokenType: formatter, | |
383 mutator: nil, | |
384 stateName: `pod-formatter`, | |
385 rulePosition: bottomRule, | |
386 } | |
387 | |
388 err := replaceRule(formatterRule)(state) | |
389 if err != nil { | |
390 panic(err) | |
391 } | |
392 | |
393 err = replaceRule(ruleReplacingConfig{ | |
394 delimiter: []rune{closingChar}, | |
395 tokenType: Punctuation, | |
396 stateName: `pod-formatter`, | |
397 pushState: true, | |
398 numberOfDelimiterChars: nChars, | |
399 appendMutator: popRule(formatterRule), | |
400 })(state) | |
401 if err != nil { | |
402 panic(err) | |
403 } | |
404 | |
405 return nil | |
406 case rakuMatchRegex: | |
407 var delimiter []rune | |
408 if closingCharExists { | |
409 delimiter = []rune{closingChar} | |
410 } else { | |
411 delimiter = openingChars | |
412 } | |
413 | |
414 err := replaceRule(ruleReplacingConfig{ | |
415 delimiter: delimiter, | |
416 tokenType: Punctuation, | |
417 stateName: `regex`, | |
418 popState: true, | |
419 pushState: true, | |
420 })(state) | |
421 if err != nil { | |
422 panic(err) | |
423 } | |
424 | |
425 return nil | |
426 case rakuSubstitutionRegex: | |
427 delimiter := regexp2.Escape(string(openingChars)) | |
428 | |
429 err := replaceRule(ruleReplacingConfig{ | |
430 pattern: `(` + delimiter + `)` + `((?:\\\\|\\/|.)*?)` + `(` + delimiter + `)`, | |
431 tokenType: ByGroups(Punctuation, UsingSelf(`qq`), Punctuation), | |
432 rulePosition: topRule, | |
433 stateName: `regex`, | |
434 popState: true, | |
435 pushState: true, | |
436 })(state) | |
437 if err != nil { | |
438 panic(err) | |
439 } | |
440 | |
441 return nil | |
442 } | |
443 | |
444 text := state.Text | |
445 | |
446 var endPos int | |
447 | |
448 var nonMirroredOpeningCharPosition int | |
449 | |
450 if !closingCharExists { | |
451 // it's not a mirrored character, which means we | |
452 // just need to look for the next occurrence | |
453 closingChars = openingChars | |
454 nonMirroredOpeningCharPosition = indexAt(text, closingChars, state.Pos) | |
455 endPos = nonMirroredOpeningCharPosition | |
456 } else { | |
457 var podRegex *regexp2.Regexp | |
458 if tokenClass == rakuPod { | |
459 podRegex = regexp2.MustCompile( | |
460 state.NamedGroups[`ws`]+`=end`+`\s+`+regexp2.Escape(state.NamedGroups[`name`]), | |
461 0, | |
462 ) | |
463 } else { | |
464 closingChars = []rune(strings.Repeat(string(closingChar), nChars)) | |
465 } | |
466 | |
467 // we need to look for the corresponding closing character, | |
468 // keep nesting in mind | |
469 nestingLevel := 1 | |
470 | |
471 searchPos := state.Pos - nChars | |
472 | |
473 var nextClosePos int | |
474 | |
475 for nestingLevel > 0 { | |
476 if tokenClass == rakuPod { | |
477 match, err := podRegex.FindRunesMatchStartingAt(text, searchPos+nChars) | |
478 if err == nil { | |
479 closingChars = match.Runes() | |
480 nextClosePos = match.Index | |
481 } else { | |
482 nextClosePos = -1 | |
483 } | |
484 } else { | |
485 nextClosePos = indexAt(text, closingChars, searchPos+nChars) | |
486 } | |
487 | |
488 nextOpenPos := indexAt(text, openingChars, searchPos+nChars) | |
489 | |
490 switch { | |
491 case nextClosePos == -1: | |
492 nextClosePos = len(text) | |
493 nestingLevel = 0 | |
494 case nextOpenPos != -1 && nextOpenPos < nextClosePos: | |
495 nestingLevel++ | |
496 nChars = len(openingChars) | |
497 searchPos = nextOpenPos | |
498 default: // next_close_pos < next_open_pos | |
499 nestingLevel-- | |
500 nChars = len(closingChars) | |
501 searchPos = nextClosePos | |
502 } | |
503 } | |
504 | |
505 endPos = nextClosePos | |
506 } | |
507 | |
508 if endPos < 0 { | |
509 // if we didn't find a closer, just highlight the | |
510 // rest of the text in this class | |
511 endPos = len(text) | |
512 } | |
513 | |
514 adverbre := regexp.MustCompile(`:to\b|:heredoc\b`) | |
515 var heredocTerminator []rune | |
516 var endHeredocPos int | |
517 if adverbre.MatchString(string(adverbs)) { | |
518 if endPos != len(text) { | |
519 heredocTerminator = text[state.Pos:endPos] | |
520 nChars = len(heredocTerminator) | |
521 } else { | |
522 endPos = state.Pos + 1 | |
523 heredocTerminator = []rune{} | |
524 nChars = 0 | |
525 } | |
526 | |
527 if nChars > 0 { | |
528 endHeredocPos = indexAt(text[endPos:], heredocTerminator, 0) | |
529 if endHeredocPos > -1 { | |
530 endPos += endHeredocPos | |
531 } else { | |
532 endPos = len(text) | |
533 } | |
534 } | |
535 } | |
536 | |
537 textBetweenBrackets := string(text[state.Pos:endPos]) | |
538 switch tokenClass { | |
539 case rakuPod, rakuPodDeclaration, rakuNameAttribute: | |
540 state.NamedGroups[`value`] = textBetweenBrackets | |
541 state.NamedGroups[`closing_delimiters`] = string(closingChars) | |
542 case rakuQuote: | |
543 if len(heredocTerminator) > 0 { | |
544 // Length of heredoc terminator + closing chars + `;` | |
545 heredocFristPunctuationLen := nChars + len(openingChars) + 1 | |
546 | |
547 state.NamedGroups[`opening_delimiters`] = string(openingChars) + | |
548 string(text[state.Pos:state.Pos+heredocFristPunctuationLen]) | |
549 | |
550 state.NamedGroups[`value`] = | |
551 string(text[state.Pos+heredocFristPunctuationLen : endPos]) | |
552 | |
553 if endHeredocPos > -1 { | |
554 state.NamedGroups[`closing_delimiters`] = string(heredocTerminator) | |
555 } | |
556 } else { | |
557 state.NamedGroups[`value`] = textBetweenBrackets | |
558 if nChars > 0 { | |
559 state.NamedGroups[`closing_delimiters`] = string(closingChars) | |
560 } | |
561 } | |
562 default: | |
563 state.Groups = []string{state.Groups[0] + string(text[state.Pos:endPos+nChars])} | |
564 } | |
565 | |
566 state.Pos = endPos + nChars | |
567 | |
568 return nil | |
569 } | |
570 } | |
571 | |
572 // Raku rules | |
573 // Empty capture groups are placeholders and will be replaced by mutators | |
574 // DO NOT REMOVE THEM! | |
575 return Rules{ | |
576 "root": { | |
577 // Placeholder, will be overwritten by mutators, DO NOT REMOVE! | |
578 {`\A\z`, nil, nil}, | |
579 Include("common"), | |
580 {`{`, Punctuation, Push(`root`)}, | |
581 {`\(`, Punctuation, Push(`root`)}, | |
582 {`[)}]`, Punctuation, Pop(1)}, | |
583 {`;`, Punctuation, nil}, | |
584 {`\[|\]`, Operator, nil}, | |
585 {`.+?`, Text, nil}, | |
586 }, | |
587 "common": { | |
588 {`^#![^\n]*$`, CommentHashbang, nil}, | |
589 Include("pod"), | |
590 // Multi-line, Embedded comment | |
591 { | |
592 "#`(?<opening_delimiters>(?<delimiter>" + bracketsPattern + `)\k<delimiter>*)`, | |
593 CommentMultiline, | |
594 findBrackets(rakuMultilineComment), | |
595 }, | |
596 {`#[^\n]*$`, CommentSingle, nil}, | |
597 // /regex/ | |
598 { | |
599 `(?<=(?:^|\(|=|:|~~|\[|{|,|=>)\s*)(/)(?!\]|\))((?:\\\\|\\/|.)*?)((?<!(?<!\\)\\)/(?!'|"))`, | |
600 ByGroups(Punctuation, UsingSelf("regex"), Punctuation), | |
601 nil, | |
602 }, | |
603 Include("variable"), | |
604 // ::?VARIABLE | |
605 {`::\?\w+(?::[_UD])?`, NameVariableGlobal, nil}, | |
606 // Version | |
607 { | |
608 `\b(v)(\d+)((?:\.(?:\*|[\d\w]+))*)(\+)?`, | |
609 ByGroups(Keyword, NumberInteger, NameEntity, Operator), | |
610 nil, | |
611 }, | |
612 Include("number"), | |
613 // Hyperoperator | »*« | |
614 {`(>>)(\S+?)(<<)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, | |
615 {`(»)(\S+?)(«)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, | |
616 // Hyperoperator | «*« | |
617 {`(<<)(\S+?)(<<)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, | |
618 {`(«)(\S+?)(«)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, | |
619 // Hyperoperator | »*» | |
620 {`(>>)(\S+?)(>>)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, | |
621 {`(»)(\S+?)(»)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, | |
622 // <<quoted words>> | |
623 {`(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(<<)(?!(?:(?!>>)[^\n])+?[},;] *\n)(?!(?:(?!>>).)+?>>\S+?>>)`, Punctuation, Push("<<")}, | |
624 // «quoted words» | |
625 {`(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(«)(?![^»]+?[},;] *\n)(?![^»]+?»\S+?»)`, Punctuation, Push("«")}, | |
626 // [<] | |
627 {`(?<=\[\\?)<(?=\])`, Operator, nil}, | |
628 // < and > operators | something < onething > something | |
629 { | |
630 `(?<=[$@%&]?\w[\w':-]* +)(<=?)( *[^ ]+? *)(>=?)(?= *[$@%&]?\w[\w':-]*)`, | |
631 ByGroups(Operator, UsingSelf("root"), Operator), | |
632 nil, | |
633 }, | |
634 // <quoted words> | |
635 { | |
636 `(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(<)((?:(?![,;)}] *(?:#[^\n]+)?\n)[^<>])+?)(>)(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?\w[\w':-]*[^(]|\s+\[))`, | |
637 ByGroups(Punctuation, String, Punctuation), | |
638 nil, | |
639 }, | |
640 {`C?X::['\w:-]+`, NameException, nil}, | |
641 Include("metaoperator"), | |
642 // Pair | key => value | |
643 { | |
644 `(\w[\w'-]*)(\s*)(=>)`, | |
645 ByGroups(String, Text, Operator), | |
646 nil, | |
647 }, | |
648 Include("colon-pair"), | |
649 // Token | |
650 { | |
651 `(?<=(?:^|\s)(?:regex|token|rule)(\s+))` + namePattern + colonPairLookahead + `\s*[({])`, | |
652 NameFunction, | |
653 Push("token", "name-adverb"), | |
654 }, | |
655 // Substitution | |
656 {`(?<=^|\b|\s)(?<!\.)(ss|S|s|TR|tr)\b(\s*)`, ByGroups(Keyword, Text), Push("substitution")}, | |
657 {keywordsPattern, Keyword, nil}, | |
658 {builtinTypesPattern, NameBuiltin, nil}, | |
659 {builtinRoutinesPattern, NameBuiltin, nil}, | |
660 // Class name | |
661 { | |
662 `(?<=(?:^|\s)(?:class|grammar|role|does|but|is|subset|of)\s+)` + namePattern, | |
663 NameClass, | |
664 Push("name-adverb"), | |
665 }, | |
666 // Routine | |
667 { | |
668 `(?<=(?:^|\s)(?:sub|method|multi sub|multi)\s+)!?` + namePattern + colonPairLookahead + `\s*[({])`, | |
669 NameFunction, | |
670 Push("name-adverb"), | |
671 }, | |
672 // Constant | |
673 {`(?<=\bconstant\s+)` + namePattern, NameConstant, Push("name-adverb")}, | |
674 // Namespace | |
675 {`(?<=\b(?:use|module|package)\s+)` + namePattern, NameNamespace, Push("name-adverb")}, | |
676 Include("operator"), | |
677 Include("single-quote"), | |
678 {`(?<!(?<!\\)\\)"`, Punctuation, Push("double-quotes")}, | |
679 // m,rx regex | |
680 {`(?<=^|\b|\s)(ms|m|rx)\b(\s*)`, ByGroups(Keyword, Text), Push("rx")}, | |
681 // Quote constructs | |
682 { | |
683 `(?<=^|\b|\s)(?<keyword>(?:qq|q|Q))(?<adverbs>(?::?(?:heredoc|to|qq|ww|q|w|s|a|h|f|c|b|to|v|x))*)(?<ws>\s*)(?<opening_delimiters>(?<delimiter>[^0-9a-zA-Z:\s])\k<delimiter>*)`, | |
684 EmitterFunc(quote), | |
685 findBrackets(rakuQuote), | |
686 }, | |
687 // Function | |
688 { | |
689 `\b` + namePattern + colonPairLookahead + `\()`, | |
690 NameFunction, | |
691 Push("name-adverb"), | |
692 }, | |
693 // Method | |
694 { | |
695 `(?<!\.\.[?^*+]?)(?<=(?:\.[?^*+&]?)|self!)` + namePattern + colonPairLookahead + `\b)`, | |
696 NameFunction, | |
697 Push("name-adverb"), | |
698 }, | |
699 // Indirect invocant | |
700 {namePattern + `(?=\s+\W?['\w:-]+:\W)`, NameFunction, Push("name-adverb")}, | |
701 {`(?<=\W)(?:∅|i|e|𝑒|tau|τ|pi|π|Inf|∞)(?=\W)`, NameConstant, nil}, | |
702 {`(「)([^」]*)(」)`, ByGroups(Punctuation, String, Punctuation), nil}, | |
703 {`(?<=^ *)\b` + namePattern + `(?=:\s*(?:for|while|loop))`, NameLabel, nil}, | |
704 // Sigilless variable | |
705 { | |
706 `(?<=\b(?:my|our|constant|let|temp)\s+)\\` + namePattern, | |
707 NameVariable, | |
708 Push("name-adverb"), | |
709 }, | |
710 {namePattern, Name, Push("name-adverb")}, | |
711 }, | |
712 "rx": { | |
713 Include("colon-pair-attribute"), | |
714 { | |
715 `(?<opening_delimiters>(?<delimiter>[^\w:\s])\k<delimiter>*)`, | |
716 ByGroupNames( | |
717 map[string]Emitter{ | |
718 `opening_delimiters`: Punctuation, | |
719 `delimiter`: nil, | |
720 }, | |
721 ), | |
722 findBrackets(rakuMatchRegex), | |
723 }, | |
724 }, | |
725 "substitution": { | |
726 Include("colon-pair-attribute"), | |
727 // Substitution | s{regex} = value | |
728 { | |
729 `(?<opening_delimiters>(?<delimiter>` + bracketsPattern + `)\k<delimiter>*)`, | |
730 ByGroupNames(map[string]Emitter{ | |
731 `opening_delimiters`: Punctuation, | |
732 `delimiter`: nil, | |
733 }), | |
734 findBrackets(rakuMatchRegex), | |
735 }, | |
736 // Substitution | s/regex/string/ | |
737 { | |
738 `(?<opening_delimiters>[^\w:\s])`, | |
739 Punctuation, | |
740 findBrackets(rakuSubstitutionRegex), | |
741 }, | |
742 }, | |
743 "number": { | |
744 {`0_?[0-7]+(_[0-7]+)*`, LiteralNumberOct, nil}, | |
745 {`0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*`, LiteralNumberHex, nil}, | |
746 {`0b[01]+(_[01]+)*`, LiteralNumberBin, nil}, | |
747 { | |
748 `(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?`, | |
749 LiteralNumberFloat, | |
750 nil, | |
751 }, | |
752 {`(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*`, LiteralNumberFloat, nil}, | |
753 {`(?<=\d+)i`, NameConstant, nil}, | |
754 {`\d+(_\d+)*`, LiteralNumberInteger, nil}, | |
755 }, | |
756 "name-adverb": { | |
757 Include("colon-pair-attribute-keyvalue"), | |
758 Default(Pop(1)), | |
759 }, | |
760 "colon-pair": { | |
761 // :key(value) | |
762 {colonPairPattern, colonPair(String), findBrackets(rakuNameAttribute)}, | |
763 // :123abc | |
764 { | |
765 `(:)(\d+)(\w[\w'-]*)`, | |
766 ByGroups(Punctuation, UsingSelf("number"), String), | |
767 nil, | |
768 }, | |
769 // :key | |
770 {`(:)(!?)(\w[\w'-]*)`, ByGroups(Punctuation, Operator, String), nil}, | |
771 {`\s+`, Text, nil}, | |
772 }, | |
773 "colon-pair-attribute": { | |
774 // :key(value) | |
775 {colonPairPattern, colonPair(NameAttribute), findBrackets(rakuNameAttribute)}, | |
776 // :123abc | |
777 { | |
778 `(:)(\d+)(\w[\w'-]*)`, | |
779 ByGroups(Punctuation, UsingSelf("number"), NameAttribute), | |
780 nil, | |
781 }, | |
782 // :key | |
783 {`(:)(!?)(\w[\w'-]*)`, ByGroups(Punctuation, Operator, NameAttribute), nil}, | |
784 {`\s+`, Text, nil}, | |
785 }, | |
786 "colon-pair-attribute-keyvalue": { | |
787 // :key(value) | |
788 {colonPairPattern, colonPair(NameAttribute), findBrackets(rakuNameAttribute)}, | |
789 }, | |
790 "escape-qq": { | |
791 { | |
792 `(?<!(?<!\\)\\)(\\qq)(\[)(.+?)(\])`, | |
793 ByGroups(StringEscape, Punctuation, UsingSelf("qq"), Punctuation), | |
794 nil, | |
795 }, | |
796 }, | |
797 `escape-char`: { | |
798 {`(?<!(?<!\\)\\)(\\[abfrnrt])`, StringEscape, nil}, | |
799 }, | |
800 `escape-single-quote`: { | |
801 {`(?<!(?<!\\)\\)(\\)(['\\])`, ByGroups(StringEscape, StringSingle), nil}, | |
802 }, | |
803 "escape-c-name": { | |
804 { | |
805 `(?<!(?<!\\)\\)(\\[c|C])(\[)(.+?)(\])`, | |
806 ByGroups(StringEscape, Punctuation, String, Punctuation), | |
807 nil, | |
808 }, | |
809 }, | |
810 "escape-hexadecimal": { | |
811 { | |
812 `(?<!(?<!\\)\\)(\\[x|X])(\[)([0-9a-fA-F]+)(\])`, | |
813 ByGroups(StringEscape, Punctuation, NumberHex, Punctuation), | |
814 nil, | |
815 }, | |
816 {`(\\[x|X])([0-9a-fA-F]+)`, ByGroups(StringEscape, NumberHex), nil}, | |
817 }, | |
818 "regex": { | |
819 // Placeholder, will be overwritten by mutators, DO NOT REMOVE! | |
820 {`\A\z`, nil, nil}, | |
821 Include("regex-escape-class"), | |
822 Include(`regex-character-escape`), | |
823 // $(code) | |
824 { | |
825 `([$@])((?<!(?<!\\)\\)\()`, | |
826 ByGroups(Keyword, Punctuation), | |
827 replaceRule(ruleReplacingConfig{ | |
828 delimiter: []rune(`)`), | |
829 tokenType: Punctuation, | |
830 stateName: `root`, | |
831 pushState: true, | |
832 }), | |
833 }, | |
834 // Exclude $/ from variables, because we can't get out of the end of the slash regex: $/; | |
835 {`\$(?=/)`, NameEntity, nil}, | |
836 // Exclude $ from variables | |
837 {`\$(?=\z|\s|[^<(\w*!.])`, NameEntity, nil}, | |
838 Include("variable"), | |
839 Include("escape-c-name"), | |
840 Include("escape-hexadecimal"), | |
841 Include("number"), | |
842 Include("single-quote"), | |
843 // :my variable code ... | |
844 { | |
845 `(?<!(?<!\\)\\)(:)(my|our|state|constant|temp|let)`, | |
846 ByGroups(Operator, KeywordDeclaration), | |
847 replaceRule(ruleReplacingConfig{ | |
848 delimiter: []rune(`;`), | |
849 tokenType: Punctuation, | |
850 stateName: `root`, | |
851 pushState: true, | |
852 }), | |
853 }, | |
854 // <{code}> | |
855 { | |
856 `(?<!(?<!\\)\\)(<)([?!.]*)((?<!(?<!\\)\\){)`, | |
857 ByGroups(Punctuation, Operator, Punctuation), | |
858 replaceRule(ruleReplacingConfig{ | |
859 delimiter: []rune(`}>`), | |
860 tokenType: Punctuation, | |
861 stateName: `root`, | |
862 pushState: true, | |
863 }), | |
864 }, | |
865 // {code} | |
866 Include(`closure`), | |
867 // Properties | |
868 {`(:)(\w+)`, ByGroups(Punctuation, NameAttribute), nil}, | |
869 // Operator | |
870 {`\|\||\||&&|&|\.\.|\*\*|%%|%|:|!|<<|«|>>|»|\+|\*\*|\*|\?|=|~|<~~>`, Operator, nil}, | |
871 // Anchors | |
872 {`\^\^|\^|\$\$|\$`, NameEntity, nil}, | |
873 {`\.`, NameEntity, nil}, | |
874 {`#[^\n]*\n`, CommentSingle, nil}, | |
875 // Lookaround | |
876 { | |
877 `(?<!(?<!\\)\\)(<)(\s*)([?!.]+)(\s*)(after|before)`, | |
878 ByGroups(Punctuation, Text, Operator, Text, OperatorWord), | |
879 replaceRule(ruleReplacingConfig{ | |
880 delimiter: []rune(`>`), | |
881 tokenType: Punctuation, | |
882 stateName: `regex`, | |
883 pushState: true, | |
884 }), | |
885 }, | |
886 { | |
887 `(?<!(?<!\\)\\)(<)([|!?.]*)(wb|ww|ws|w)(>)`, | |
888 ByGroups(Punctuation, Operator, OperatorWord, Punctuation), | |
889 nil, | |
890 }, | |
891 // <$variable> | |
892 { | |
893 `(?<!(?<!\\)\\)(<)([?!.]*)([$@]\w[\w:-]*)(>)`, | |
894 ByGroups(Punctuation, Operator, NameVariable, Punctuation), | |
895 nil, | |
896 }, | |
897 // Capture markers | |
898 {`(?<!(?<!\\)\\)<\(|\)>`, Operator, nil}, | |
899 { | |
900 `(?<!(?<!\\)\\)(<)(\w[\w:-]*)(=\.?)`, | |
901 ByGroups(Punctuation, NameVariable, Operator), | |
902 Push(`regex-variable`), | |
903 }, | |
904 { | |
905 `(?<!(?<!\\)\\)(<)([|!?.&]*)(\w(?:(?!:\s)[\w':-])*)`, | |
906 ByGroups(Punctuation, Operator, NameFunction), | |
907 Push(`regex-function`), | |
908 }, | |
909 {`(?<!(?<!\\)\\)<`, Punctuation, Push("regex-property")}, | |
910 {`(?<!(?<!\\)\\)"`, Punctuation, Push("double-quotes")}, | |
911 {`(?<!(?<!\\)\\)(?:\]|\))`, Punctuation, Pop(1)}, | |
912 {`(?<!(?<!\\)\\)(?:\[|\()`, Punctuation, Push("regex")}, | |
913 {`.+?`, StringRegex, nil}, | |
914 }, | |
915 "regex-class-builtin": { | |
916 { | |
917 `\b(?:alnum|alpha|blank|cntrl|digit|graph|lower|print|punct|space|upper|xdigit|same|ident)\b`, | |
918 NameBuiltin, | |
919 nil, | |
920 }, | |
921 }, | |
922 "regex-function": { | |
923 // <function> | |
924 {`(?<!(?<!\\)\\)>`, Punctuation, Pop(1)}, | |
925 // <function(parameter)> | |
926 { | |
927 `\(`, | |
928 Punctuation, | |
929 replaceRule(ruleReplacingConfig{ | |
930 delimiter: []rune(`)>`), | |
931 tokenType: Punctuation, | |
932 stateName: `root`, | |
933 popState: true, | |
934 pushState: true, | |
935 }), | |
936 }, | |
937 // <function value> | |
938 { | |
939 `\s+`, | |
940 StringRegex, | |
941 replaceRule(ruleReplacingConfig{ | |
942 delimiter: []rune(`>`), | |
943 tokenType: Punctuation, | |
944 stateName: `regex`, | |
945 popState: true, | |
946 pushState: true, | |
947 }), | |
948 }, | |
949 // <function: value> | |
950 { | |
951 `:`, | |
952 Punctuation, | |
953 replaceRule(ruleReplacingConfig{ | |
954 delimiter: []rune(`>`), | |
955 tokenType: Punctuation, | |
956 stateName: `root`, | |
957 popState: true, | |
958 pushState: true, | |
959 }), | |
960 }, | |
961 }, | |
962 "regex-variable": { | |
963 Include(`regex-starting-operators`), | |
964 // <var=function( | |
965 { | |
966 `(&)?(\w(?:(?!:\s)[\w':-])*)(?=\()`, | |
967 ByGroups(Operator, NameFunction), | |
968 Mutators(Pop(1), Push(`regex-function`)), | |
969 }, | |
970 // <var=function> | |
971 {`(&)?(\w[\w':-]*)(>)`, ByGroups(Operator, NameFunction, Punctuation), Pop(1)}, | |
972 // <var= | |
973 Default(Pop(1), Push(`regex-property`)), | |
974 }, | |
975 "regex-property": { | |
976 {`(?<!(?<!\\)\\)>`, Punctuation, Pop(1)}, | |
977 Include("regex-class-builtin"), | |
978 Include("variable"), | |
979 Include(`regex-starting-operators`), | |
980 Include("colon-pair-attribute"), | |
981 {`(?<!(?<!\\)\\)\[`, Punctuation, Push("regex-character-class")}, | |
982 {`\+|\-`, Operator, nil}, | |
983 {`@[\w':-]+`, NameVariable, nil}, | |
984 {`.+?`, StringRegex, nil}, | |
985 }, | |
986 `regex-starting-operators`: { | |
987 {`(?<=<)[|!?.]+`, Operator, nil}, | |
988 }, | |
989 "regex-escape-class": { | |
990 {`(?i)\\n|\\t|\\h|\\v|\\s|\\d|\\w`, StringEscape, nil}, | |
991 }, | |
992 `regex-character-escape`: { | |
993 {`(?<!(?<!\\)\\)(\\)(.)`, ByGroups(StringEscape, StringRegex), nil}, | |
994 }, | |
995 "regex-character-class": { | |
996 {`(?<!(?<!\\)\\)\]`, Punctuation, Pop(1)}, | |
997 Include("regex-escape-class"), | |
998 Include("escape-c-name"), | |
999 Include("escape-hexadecimal"), | |
1000 Include(`regex-character-escape`), | |
1001 Include("number"), | |
1002 {`\.\.`, Operator, nil}, | |
1003 {`.+?`, StringRegex, nil}, | |
1004 }, | |
1005 "metaoperator": { | |
1006 // Z[=>] | |
1007 { | |
1008 `\b([RZX]+)\b(\[)([^\s\]]+?)(\])`, | |
1009 ByGroups(OperatorWord, Punctuation, UsingSelf("root"), Punctuation), | |
1010 nil, | |
1011 }, | |
1012 // Z=> | |
1013 {`\b([RZX]+)\b([^\s\]]+)`, ByGroups(OperatorWord, UsingSelf("operator")), nil}, | |
1014 }, | |
1015 "operator": { | |
1016 // Word Operator | |
1017 {wordOperatorsPattern, OperatorWord, nil}, | |
1018 // Operator | |
1019 {operatorsPattern, Operator, nil}, | |
1020 }, | |
1021 "pod": { | |
1022 // Single-line pod declaration | |
1023 {`(#[|=])\s`, Keyword, Push("pod-single")}, | |
1024 // Multi-line pod declaration | |
1025 { | |
1026 "(?<keyword>#[|=])(?<opening_delimiters>(?<delimiter>" + bracketsPattern + `)\k<delimiter>*)(?<value>)(?<closing_delimiters>)`, | |
1027 ByGroupNames( | |
1028 map[string]Emitter{ | |
1029 `keyword`: Keyword, | |
1030 `opening_delimiters`: Punctuation, | |
1031 `delimiter`: nil, | |
1032 `value`: UsingSelf("pod-declaration"), | |
1033 `closing_delimiters`: Punctuation, | |
1034 }), | |
1035 findBrackets(rakuPodDeclaration), | |
1036 }, | |
1037 Include("pod-blocks"), | |
1038 }, | |
1039 "pod-blocks": { | |
1040 // =begin code | |
1041 { | |
1042 `(?<=^ *)(?<ws> *)(?<keyword>=begin)(?<ws2> +)(?<name>code)(?<config>[^\n]*)(?<value>.*?)(?<ws3>^\k<ws>)(?<end_keyword>=end)(?<ws4> +)\k<name>`, | |
1043 EmitterFunc(podCode), | |
1044 nil, | |
1045 }, | |
1046 // =begin | |
1047 { | |
1048 `(?<=^ *)(?<ws> *)(?<keyword>=begin)(?<ws2> +)(?!code)(?<name>\w[\w'-]*)(?<config>[^\n]*)(?<value>)(?<closing_delimiters>)`, | |
1049 ByGroupNames( | |
1050 map[string]Emitter{ | |
1051 `ws`: Comment, | |
1052 `keyword`: Keyword, | |
1053 `ws2`: StringDoc, | |
1054 `name`: Keyword, | |
1055 `config`: EmitterFunc(podConfig), | |
1056 `value`: UsingSelf("pod-begin"), | |
1057 `closing_delimiters`: Keyword, | |
1058 }), | |
1059 findBrackets(rakuPod), | |
1060 }, | |
1061 // =for ... | |
1062 { | |
1063 `(?<=^ *)(?<ws> *)(?<keyword>=(?:for|defn))(?<ws2> +)(?<name>\w[\w'-]*)(?<config>[^\n]*\n)`, | |
1064 ByGroups(Comment, Keyword, StringDoc, Keyword, EmitterFunc(podConfig)), | |
1065 Push("pod-paragraph"), | |
1066 }, | |
1067 // =config | |
1068 { | |
1069 `(?<=^ *)(?<ws> *)(?<keyword>=config)(?<ws2> +)(?<name>\w[\w'-]*)(?<config>[^\n]*\n)`, | |
1070 ByGroups(Comment, Keyword, StringDoc, Keyword, EmitterFunc(podConfig)), | |
1071 nil, | |
1072 }, | |
1073 // =alias | |
1074 { | |
1075 `(?<=^ *)(?<ws> *)(?<keyword>=alias)(?<ws2> +)(?<name>\w[\w'-]*)(?<value>[^\n]*\n)`, | |
1076 ByGroups(Comment, Keyword, StringDoc, Keyword, StringDoc), | |
1077 nil, | |
1078 }, | |
1079 // =encoding | |
1080 { | |
1081 `(?<=^ *)(?<ws> *)(?<keyword>=encoding)(?<ws2> +)(?<name>[^\n]+)`, | |
1082 ByGroups(Comment, Keyword, StringDoc, Name), | |
1083 nil, | |
1084 }, | |
1085 // =para ... | |
1086 { | |
1087 `(?<=^ *)(?<ws> *)(?<keyword>=(?:para|table|pod))(?<config>(?<!\n\s*)[^\n]*\n)`, | |
1088 ByGroups(Comment, Keyword, EmitterFunc(podConfig)), | |
1089 Push("pod-paragraph"), | |
1090 }, | |
1091 // =head1 ... | |
1092 { | |
1093 `(?<=^ *)(?<ws> *)(?<keyword>=head\d+)(?<ws2> *)(?<config>#?)`, | |
1094 ByGroups(Comment, Keyword, GenericHeading, Keyword), | |
1095 Push("pod-heading"), | |
1096 }, | |
1097 // =item ... | |
1098 { | |
1099 `(?<=^ *)(?<ws> *)(?<keyword>=(?:item\d*|comment|data|[A-Z]+))(?<ws2> *)(?<config>#?)`, | |
1100 ByGroups(Comment, Keyword, StringDoc, Keyword), | |
1101 Push("pod-paragraph"), | |
1102 }, | |
1103 { | |
1104 `(?<=^ *)(?<ws> *)(?<keyword>=finish)(?<config>[^\n]*)`, | |
1105 ByGroups(Comment, Keyword, EmitterFunc(podConfig)), | |
1106 Push("pod-finish"), | |
1107 }, | |
1108 // ={custom} ... | |
1109 { | |
1110 `(?<=^ *)(?<ws> *)(?<name>=\w[\w'-]*)(?<ws2> *)(?<config>#?)`, | |
1111 ByGroups(Comment, Name, StringDoc, Keyword), | |
1112 Push("pod-paragraph"), | |
1113 }, | |
1114 // = podconfig | |
1115 { | |
1116 `(?<=^ *)(?<keyword> *=)(?<ws> *)(?<config>(?::\w[\w'-]*(?:` + colonPairOpeningBrackets + `.+?` + | |
1117 colonPairClosingBrackets + `) *)*\n)`, | |
1118 ByGroups(Keyword, StringDoc, EmitterFunc(podConfig)), | |
1119 nil, | |
1120 }, | |
1121 }, | |
1122 "pod-begin": { | |
1123 Include("pod-blocks"), | |
1124 Include("pre-pod-formatter"), | |
1125 {`.+?`, StringDoc, nil}, | |
1126 }, | |
1127 "pod-declaration": { | |
1128 Include("pre-pod-formatter"), | |
1129 {`.+?`, StringDoc, nil}, | |
1130 }, | |
1131 "pod-paragraph": { | |
1132 {`\n *\n|\n(?=^ *=)`, StringDoc, Pop(1)}, | |
1133 Include("pre-pod-formatter"), | |
1134 {`.+?`, StringDoc, nil}, | |
1135 }, | |
1136 "pod-single": { | |
1137 {`\n`, StringDoc, Pop(1)}, | |
1138 Include("pre-pod-formatter"), | |
1139 {`.+?`, StringDoc, nil}, | |
1140 }, | |
1141 "pod-heading": { | |
1142 {`\n *\n|\n(?=^ *=)`, GenericHeading, Pop(1)}, | |
1143 Include("pre-pod-formatter"), | |
1144 {`.+?`, GenericHeading, nil}, | |
1145 }, | |
1146 "pod-finish": { | |
1147 {`\z`, nil, Pop(1)}, | |
1148 Include("pre-pod-formatter"), | |
1149 {`.+?`, StringDoc, nil}, | |
1150 }, | |
1151 "pre-pod-formatter": { | |
1152 // C<code>, B<bold>, ... | |
1153 { | |
1154 `(?<keyword>[CBIUDTKRPAELZVMSXN])(?<opening_delimiters><+|«)`, | |
1155 ByGroups(Keyword, Punctuation), | |
1156 findBrackets(rakuPodFormatter), | |
1157 }, | |
1158 }, | |
1159 "pod-formatter": { | |
1160 // Placeholder rule, will be replaced by mutators. DO NOT REMOVE! | |
1161 {`>`, Punctuation, Pop(1)}, | |
1162 Include("pre-pod-formatter"), | |
1163 // Placeholder rule, will be replaced by mutators. DO NOT REMOVE! | |
1164 {`.+?`, StringOther, nil}, | |
1165 }, | |
1166 "variable": { | |
1167 {variablePattern, NameVariable, Push("name-adverb")}, | |
1168 {globalVariablePattern, NameVariableGlobal, Push("name-adverb")}, | |
1169 {`[$@]<[^>]+>`, NameVariable, nil}, | |
1170 {`\$[/!¢]`, NameVariable, nil}, | |
1171 {`[$@%]`, NameVariable, nil}, | |
1172 }, | |
1173 "single-quote": { | |
1174 {`(?<!(?<!\\)\\)'`, Punctuation, Push("single-quote-inner")}, | |
1175 }, | |
1176 "single-quote-inner": { | |
1177 {`(?<!(?<!(?<!\\)\\)\\)'`, Punctuation, Pop(1)}, | |
1178 Include("escape-single-quote"), | |
1179 Include("escape-qq"), | |
1180 {`(?:\\\\|\\[^\\]|[^'\\])+?`, StringSingle, nil}, | |
1181 }, | |
1182 "double-quotes": { | |
1183 {`(?<!(?<!\\)\\)"`, Punctuation, Pop(1)}, | |
1184 Include("qq"), | |
1185 }, | |
1186 "<<": { | |
1187 {`>>(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+|\s+\[))`, Punctuation, Pop(1)}, | |
1188 Include("ww"), | |
1189 }, | |
1190 "«": { | |
1191 {`»(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+|\s+\[))`, Punctuation, Pop(1)}, | |
1192 Include("ww"), | |
1193 }, | |
1194 "ww": { | |
1195 Include("single-quote"), | |
1196 Include("qq"), | |
1197 }, | |
1198 "qq": { | |
1199 Include("qq-variable"), | |
1200 Include("closure"), | |
1201 Include(`escape-char`), | |
1202 Include("escape-hexadecimal"), | |
1203 Include("escape-c-name"), | |
1204 Include("escape-qq"), | |
1205 {`.+?`, StringDouble, nil}, | |
1206 }, | |
1207 "qq-variable": { | |
1208 { | |
1209 `(?<!(?<!\\)\\)(?:` + variablePattern + `|` + globalVariablePattern + `)` + colonPairLookahead + `)`, | |
1210 NameVariable, | |
1211 Push("qq-variable-extras", "name-adverb"), | |
1212 }, | |
1213 }, | |
1214 "qq-variable-extras": { | |
1215 // Method | |
1216 { | |
1217 `(?<operator>\.)(?<method_name>` + namePattern + `)` + colonPairLookahead + `\()`, | |
1218 ByGroupNames(map[string]Emitter{ | |
1219 `operator`: Operator, | |
1220 `method_name`: NameFunction, | |
1221 }), | |
1222 Push(`name-adverb`), | |
1223 }, | |
1224 // Function/Signature | |
1225 { | |
1226 `\(`, Punctuation, replaceRule( | |
1227 ruleReplacingConfig{ | |
1228 delimiter: []rune(`)`), | |
1229 tokenType: Punctuation, | |
1230 stateName: `root`, | |
1231 pushState: true, | |
1232 }), | |
1233 }, | |
1234 Default(Pop(1)), | |
1235 }, | |
1236 "Q": { | |
1237 Include("escape-qq"), | |
1238 {`.+?`, String, nil}, | |
1239 }, | |
1240 "Q-closure": { | |
1241 Include("escape-qq"), | |
1242 Include("closure"), | |
1243 {`.+?`, String, nil}, | |
1244 }, | |
1245 "Q-variable": { | |
1246 Include("escape-qq"), | |
1247 Include("qq-variable"), | |
1248 {`.+?`, String, nil}, | |
1249 }, | |
1250 "closure": { | |
1251 {`(?<!(?<!\\)\\){`, Punctuation, replaceRule( | |
1252 ruleReplacingConfig{ | |
1253 delimiter: []rune(`}`), | |
1254 tokenType: Punctuation, | |
1255 stateName: `root`, | |
1256 pushState: true, | |
1257 }), | |
1258 }, | |
1259 }, | |
1260 "token": { | |
1261 // Token signature | |
1262 {`\(`, Punctuation, replaceRule( | |
1263 ruleReplacingConfig{ | |
1264 delimiter: []rune(`)`), | |
1265 tokenType: Punctuation, | |
1266 stateName: `root`, | |
1267 pushState: true, | |
1268 }), | |
1269 }, | |
1270 {`{`, Punctuation, replaceRule( | |
1271 ruleReplacingConfig{ | |
1272 delimiter: []rune(`}`), | |
1273 tokenType: Punctuation, | |
1274 stateName: `regex`, | |
1275 popState: true, | |
1276 pushState: true, | |
1277 }), | |
1278 }, | |
1279 {`\s*`, Text, nil}, | |
1280 Default(Pop(1)), | |
1281 }, | |
1282 } | |
1283 } | |
1284 | |
1285 // Joins keys of rune map | |
1286 func joinRuneMap(m map[rune]rune) string { | |
1287 runes := make([]rune, 0, len(m)) | |
1288 for k := range m { | |
1289 runes = append(runes, k) | |
1290 } | |
1291 | |
1292 return string(runes) | |
1293 } | |
1294 | |
1295 // Finds the index of substring in the string starting at position n | |
1296 func indexAt(str []rune, substr []rune, pos int) int { | |
1297 strFromPos := str[pos:] | |
1298 text := string(strFromPos) | |
1299 | |
1300 idx := strings.Index(text, string(substr)) | |
1301 if idx > -1 { | |
1302 idx = utf8.RuneCountInString(text[:idx]) | |
1303 | |
1304 // Search again if the substr is escaped with backslash | |
1305 if (idx > 1 && strFromPos[idx-1] == '\\' && strFromPos[idx-2] != '\\') || | |
1306 (idx == 1 && strFromPos[idx-1] == '\\') { | |
1307 idx = indexAt(str[pos:], substr, idx+1) | |
1308 | |
1309 idx = utf8.RuneCountInString(text[:idx]) | |
1310 | |
1311 if idx < 0 { | |
1312 return idx | |
1313 } | |
1314 } | |
1315 idx += pos | |
1316 } | |
1317 | |
1318 return idx | |
1319 } | |
1320 | |
1321 // Tells if an array of string contains a string | |
1322 func contains(s []string, e string) bool { | |
1323 for _, value := range s { | |
1324 if value == e { | |
1325 return true | |
1326 } | |
1327 } | |
1328 return false | |
1329 } | |
1330 | |
1331 type rulePosition int | |
1332 | |
1333 const ( | |
1334 topRule rulePosition = 0 | |
1335 bottomRule = -1 | |
1336 ) | |
1337 | |
1338 type ruleMakingConfig struct { | |
1339 delimiter []rune | |
1340 pattern string | |
1341 tokenType Emitter | |
1342 mutator Mutator | |
1343 numberOfDelimiterChars int | |
1344 } | |
1345 | |
1346 type ruleReplacingConfig struct { | |
1347 delimiter []rune | |
1348 pattern string | |
1349 tokenType Emitter | |
1350 numberOfDelimiterChars int | |
1351 mutator Mutator | |
1352 appendMutator Mutator | |
1353 rulePosition rulePosition | |
1354 stateName string | |
1355 pop bool | |
1356 popState bool | |
1357 pushState bool | |
1358 } | |
1359 | |
1360 // Pops rule from state-stack and replaces the rule with the previous rule | |
1361 func popRule(rule ruleReplacingConfig) MutatorFunc { | |
1362 return func(state *LexerState) error { | |
1363 stackName := genStackName(rule.stateName, rule.rulePosition) | |
1364 | |
1365 stack, ok := state.Get(stackName).([]ruleReplacingConfig) | |
1366 | |
1367 if ok && len(stack) > 0 { | |
1368 // Pop from stack | |
1369 stack = stack[:len(stack)-1] | |
1370 lastRule := stack[len(stack)-1] | |
1371 lastRule.pushState = false | |
1372 lastRule.popState = false | |
1373 lastRule.pop = true | |
1374 state.Set(stackName, stack) | |
1375 | |
1376 // Call replaceRule to use the last rule | |
1377 err := replaceRule(lastRule)(state) | |
1378 if err != nil { | |
1379 panic(err) | |
1380 } | |
1381 } | |
1382 | |
1383 return nil | |
1384 } | |
1385 } | |
1386 | |
1387 // Replaces a state's rule based on the rule config and position | |
1388 func replaceRule(rule ruleReplacingConfig) MutatorFunc { | |
1389 return func(state *LexerState) error { | |
1390 stateName := rule.stateName | |
1391 stackName := genStackName(rule.stateName, rule.rulePosition) | |
1392 | |
1393 stack, ok := state.Get(stackName).([]ruleReplacingConfig) | |
1394 if !ok { | |
1395 stack = []ruleReplacingConfig{} | |
1396 } | |
1397 | |
1398 // If state-stack is empty fill it with the placeholder rule | |
1399 if len(stack) == 0 { | |
1400 stack = []ruleReplacingConfig{ | |
1401 { | |
1402 // Placeholder, will be overwritten by mutators, DO NOT REMOVE! | |
1403 pattern: `\A\z`, | |
1404 tokenType: nil, | |
1405 mutator: nil, | |
1406 stateName: stateName, | |
1407 rulePosition: rule.rulePosition, | |
1408 }, | |
1409 } | |
1410 state.Set(stackName, stack) | |
1411 } | |
1412 | |
1413 var mutator Mutator | |
1414 mutators := []Mutator{} | |
1415 | |
1416 switch { | |
1417 case rule.rulePosition == topRule && rule.mutator == nil: | |
1418 // Default mutator for top rule | |
1419 mutators = []Mutator{Pop(1), popRule(rule)} | |
1420 case rule.rulePosition == topRule && rule.mutator != nil: | |
1421 // Default mutator for top rule, when rule.mutator is set | |
1422 mutators = []Mutator{rule.mutator, popRule(rule)} | |
1423 case rule.mutator != nil: | |
1424 mutators = []Mutator{rule.mutator} | |
1425 } | |
1426 | |
1427 if rule.appendMutator != nil { | |
1428 mutators = append(mutators, rule.appendMutator) | |
1429 } | |
1430 | |
1431 if len(mutators) > 0 { | |
1432 mutator = Mutators(mutators...) | |
1433 } else { | |
1434 mutator = nil | |
1435 } | |
1436 | |
1437 ruleConfig := ruleMakingConfig{ | |
1438 pattern: rule.pattern, | |
1439 delimiter: rule.delimiter, | |
1440 numberOfDelimiterChars: rule.numberOfDelimiterChars, | |
1441 tokenType: rule.tokenType, | |
1442 mutator: mutator, | |
1443 } | |
1444 | |
1445 cRule := makeRule(ruleConfig) | |
1446 | |
1447 switch rule.rulePosition { | |
1448 case topRule: | |
1449 state.Rules[stateName][0] = cRule | |
1450 case bottomRule: | |
1451 state.Rules[stateName][len(state.Rules[stateName])-1] = cRule | |
1452 } | |
1453 | |
1454 // Pop state name from stack if asked. State should be popped first before Pushing | |
1455 if rule.popState { | |
1456 err := Pop(1).Mutate(state) | |
1457 if err != nil { | |
1458 panic(err) | |
1459 } | |
1460 } | |
1461 | |
1462 // Push state name to stack if asked | |
1463 if rule.pushState { | |
1464 err := Push(stateName).Mutate(state) | |
1465 if err != nil { | |
1466 panic(err) | |
1467 } | |
1468 } | |
1469 | |
1470 if !rule.pop { | |
1471 state.Set(stackName, append(stack, rule)) | |
1472 } | |
1473 | |
1474 return nil | |
1475 } | |
1476 } | |
1477 | |
1478 // Generates rule replacing stack using state name and rule position | |
1479 func genStackName(stateName string, rulePosition rulePosition) (stackName string) { | |
1480 switch rulePosition { | |
1481 case topRule: | |
1482 stackName = stateName + `-top-stack` | |
1483 case bottomRule: | |
1484 stackName = stateName + `-bottom-stack` | |
1485 } | |
1486 return | |
1487 } | |
1488 | |
1489 // Makes a compiled rule and returns it | |
1490 func makeRule(config ruleMakingConfig) *CompiledRule { | |
1491 var rePattern string | |
1492 | |
1493 if len(config.delimiter) > 0 { | |
1494 delimiter := string(config.delimiter) | |
1495 | |
1496 if config.numberOfDelimiterChars > 1 { | |
1497 delimiter = strings.Repeat(delimiter, config.numberOfDelimiterChars) | |
1498 } | |
1499 | |
1500 rePattern = `(?<!(?<!\\)\\)` + regexp2.Escape(delimiter) | |
1501 } else { | |
1502 rePattern = config.pattern | |
1503 } | |
1504 | |
1505 regex := regexp2.MustCompile(rePattern, regexp2.None) | |
1506 | |
1507 cRule := &CompiledRule{ | |
1508 Rule: Rule{rePattern, config.tokenType, config.mutator}, | |
1509 Regexp: regex, | |
1510 } | |
1511 | |
1512 return cRule | |
1513 } | |
1514 | |
1515 // Emitter for colon pairs, changes token state based on key and brackets | |
1516 func colonPair(tokenClass TokenType) Emitter { | |
1517 return EmitterFunc(func(groups []string, state *LexerState) Iterator { | |
1518 iterators := []Iterator{} | |
1519 tokens := []Token{ | |
1520 {Punctuation, state.NamedGroups[`colon`]}, | |
1521 {Punctuation, state.NamedGroups[`opening_delimiters`]}, | |
1522 {Punctuation, state.NamedGroups[`closing_delimiters`]}, | |
1523 } | |
1524 | |
1525 // Append colon | |
1526 iterators = append(iterators, Literator(tokens[0])) | |
1527 | |
1528 if tokenClass == NameAttribute { | |
1529 iterators = append(iterators, Literator(Token{NameAttribute, state.NamedGroups[`key`]})) | |
1530 } else { | |
1531 var keyTokenState string | |
1532 keyre := regexp.MustCompile(`^\d+$`) | |
1533 if keyre.MatchString(state.NamedGroups[`key`]) { | |
1534 keyTokenState = "common" | |
1535 } else { | |
1536 keyTokenState = "Q" | |
1537 } | |
1538 | |
1539 // Use token state to Tokenise key | |
1540 if keyTokenState != "" { | |
1541 iterator, err := state.Lexer.Tokenise( | |
1542 &TokeniseOptions{ | |
1543 State: keyTokenState, | |
1544 Nested: true, | |
1545 }, state.NamedGroups[`key`]) | |
1546 | |
1547 if err != nil { | |
1548 panic(err) | |
1549 } else { | |
1550 // Append key | |
1551 iterators = append(iterators, iterator) | |
1552 } | |
1553 } | |
1554 } | |
1555 | |
1556 // Append punctuation | |
1557 iterators = append(iterators, Literator(tokens[1])) | |
1558 | |
1559 var valueTokenState string | |
1560 | |
1561 switch state.NamedGroups[`opening_delimiters`] { | |
1562 case "(", "{", "[": | |
1563 valueTokenState = "root" | |
1564 case "<<", "«": | |
1565 valueTokenState = "ww" | |
1566 case "<": | |
1567 valueTokenState = "Q" | |
1568 } | |
1569 | |
1570 // Use token state to Tokenise value | |
1571 if valueTokenState != "" { | |
1572 iterator, err := state.Lexer.Tokenise( | |
1573 &TokeniseOptions{ | |
1574 State: valueTokenState, | |
1575 Nested: true, | |
1576 }, state.NamedGroups[`value`]) | |
1577 | |
1578 if err != nil { | |
1579 panic(err) | |
1580 } else { | |
1581 // Append value | |
1582 iterators = append(iterators, iterator) | |
1583 } | |
1584 } | |
1585 // Append last punctuation | |
1586 iterators = append(iterators, Literator(tokens[2])) | |
1587 | |
1588 return Concaterator(iterators...) | |
1589 }) | |
1590 } | |
1591 | |
1592 // Emitter for quoting constructs, changes token state based on quote name and adverbs | |
1593 func quote(groups []string, state *LexerState) Iterator { | |
1594 keyword := state.NamedGroups[`keyword`] | |
1595 adverbsStr := state.NamedGroups[`adverbs`] | |
1596 iterators := []Iterator{} | |
1597 tokens := []Token{ | |
1598 {Keyword, keyword}, | |
1599 {StringAffix, adverbsStr}, | |
1600 {Text, state.NamedGroups[`ws`]}, | |
1601 {Punctuation, state.NamedGroups[`opening_delimiters`]}, | |
1602 {Punctuation, state.NamedGroups[`closing_delimiters`]}, | |
1603 } | |
1604 | |
1605 // Append all tokens before dealing with the main string | |
1606 iterators = append(iterators, Literator(tokens[:4]...)) | |
1607 | |
1608 var tokenStates []string | |
1609 | |
1610 // Set tokenStates based on adverbs | |
1611 adverbs := strings.Split(adverbsStr, ":") | |
1612 for _, adverb := range adverbs { | |
1613 switch adverb { | |
1614 case "c", "closure": | |
1615 tokenStates = append(tokenStates, "Q-closure") | |
1616 case "qq": | |
1617 tokenStates = append(tokenStates, "qq") | |
1618 case "ww": | |
1619 tokenStates = append(tokenStates, "ww") | |
1620 case "s", "scalar", "a", "array", "h", "hash", "f", "function": | |
1621 tokenStates = append(tokenStates, "Q-variable") | |
1622 } | |
1623 } | |
1624 | |
1625 var tokenState string | |
1626 | |
1627 switch { | |
1628 case keyword == "qq" || contains(tokenStates, "qq"): | |
1629 tokenState = "qq" | |
1630 case adverbsStr == "ww" || contains(tokenStates, "ww"): | |
1631 tokenState = "ww" | |
1632 case contains(tokenStates, "Q-closure") && contains(tokenStates, "Q-variable"): | |
1633 tokenState = "qq" | |
1634 case contains(tokenStates, "Q-closure"): | |
1635 tokenState = "Q-closure" | |
1636 case contains(tokenStates, "Q-variable"): | |
1637 tokenState = "Q-variable" | |
1638 default: | |
1639 tokenState = "Q" | |
1640 } | |
1641 | |
1642 iterator, err := state.Lexer.Tokenise( | |
1643 &TokeniseOptions{ | |
1644 State: tokenState, | |
1645 Nested: true, | |
1646 }, state.NamedGroups[`value`]) | |
1647 | |
1648 if err != nil { | |
1649 panic(err) | |
1650 } else { | |
1651 iterators = append(iterators, iterator) | |
1652 } | |
1653 | |
1654 // Append the last punctuation | |
1655 iterators = append(iterators, Literator(tokens[4])) | |
1656 | |
1657 return Concaterator(iterators...) | |
1658 } | |
1659 | |
1660 // Emitter for pod config, tokenises the properties with "colon-pair-attribute" state | |
1661 func podConfig(groups []string, state *LexerState) Iterator { | |
1662 // Tokenise pod config | |
1663 iterator, err := state.Lexer.Tokenise( | |
1664 &TokeniseOptions{ | |
1665 State: "colon-pair-attribute", | |
1666 Nested: true, | |
1667 }, groups[0]) | |
1668 | |
1669 if err != nil { | |
1670 panic(err) | |
1671 } else { | |
1672 return iterator | |
1673 } | |
1674 } | |
1675 | |
1676 // Emitter for pod code, tokenises the code based on the lang specified | |
1677 func podCode(groups []string, state *LexerState) Iterator { | |
1678 iterators := []Iterator{} | |
1679 tokens := []Token{ | |
1680 {Comment, state.NamedGroups[`ws`]}, | |
1681 {Keyword, state.NamedGroups[`keyword`]}, | |
1682 {Keyword, state.NamedGroups[`ws2`]}, | |
1683 {Keyword, state.NamedGroups[`name`]}, | |
1684 {StringDoc, state.NamedGroups[`value`]}, | |
1685 {Comment, state.NamedGroups[`ws3`]}, | |
1686 {Keyword, state.NamedGroups[`end_keyword`]}, | |
1687 {Keyword, state.NamedGroups[`ws4`]}, | |
1688 {Keyword, state.NamedGroups[`name`]}, | |
1689 } | |
1690 | |
1691 // Append all tokens before dealing with the pod config | |
1692 iterators = append(iterators, Literator(tokens[:4]...)) | |
1693 | |
1694 // Tokenise pod config | |
1695 iterators = append(iterators, podConfig([]string{state.NamedGroups[`config`]}, state)) | |
1696 | |
1697 langMatch := regexp.MustCompile(`:lang\W+(\w+)`).FindStringSubmatch(state.NamedGroups[`config`]) | |
1698 var lang string | |
1699 if len(langMatch) > 1 { | |
1700 lang = langMatch[1] | |
1701 } | |
1702 | |
1703 // Tokenise code based on lang property | |
1704 sublexer := Get(lang) | |
1705 if sublexer != nil { | |
1706 iterator, err := sublexer.Tokenise(nil, state.NamedGroups[`value`]) | |
1707 | |
1708 if err != nil { | |
1709 panic(err) | |
1710 } else { | |
1711 iterators = append(iterators, iterator) | |
1712 } | |
1713 } else { | |
1714 iterators = append(iterators, Literator(tokens[4])) | |
1715 } | |
1716 | |
1717 // Append the rest of the tokens | |
1718 iterators = append(iterators, Literator(tokens[5:]...)) | |
1719 | |
1720 return Concaterator(iterators...) | |
1721 } |