66
|
1 package chroma
|
|
2
|
|
3 import (
|
|
4 "fmt"
|
|
5 )
|
|
6
|
|
7 // An Emitter takes group matches and returns tokens.
|
|
8 type Emitter interface {
|
|
9 // Emit tokens for the given regex groups.
|
|
10 Emit(groups []string, state *LexerState) Iterator
|
|
11 }
|
|
12
|
|
13 // SerialisableEmitter is an Emitter that can be serialised and deserialised to/from JSON.
|
|
14 type SerialisableEmitter interface {
|
|
15 Emitter
|
|
16 EmitterKind() string
|
|
17 }
|
|
18
|
|
19 // EmitterFunc is a function that is an Emitter.
|
|
20 type EmitterFunc func(groups []string, state *LexerState) Iterator
|
|
21
|
|
22 // Emit tokens for groups.
|
|
23 func (e EmitterFunc) Emit(groups []string, state *LexerState) Iterator {
|
|
24 return e(groups, state)
|
|
25 }
|
|
26
|
|
27 type Emitters []Emitter
|
|
28
|
|
29 type byGroupsEmitter struct {
|
|
30 Emitters
|
|
31 }
|
|
32
|
|
33 // ByGroups emits a token for each matching group in the rule's regex.
|
|
34 func ByGroups(emitters ...Emitter) Emitter {
|
|
35 return &byGroupsEmitter{Emitters: emitters}
|
|
36 }
|
|
37
|
|
38 func (b *byGroupsEmitter) EmitterKind() string { return "bygroups" }
|
|
39
|
|
40 func (b *byGroupsEmitter) Emit(groups []string, state *LexerState) Iterator {
|
|
41 iterators := make([]Iterator, 0, len(groups)-1)
|
|
42 if len(b.Emitters) != len(groups)-1 {
|
|
43 iterators = append(iterators, Error.Emit(groups, state))
|
|
44 // panic(errors.Errorf("number of groups %q does not match number of emitters %v", groups, emitters))
|
|
45 } else {
|
|
46 for i, group := range groups[1:] {
|
|
47 if b.Emitters[i] != nil {
|
|
48 iterators = append(iterators, b.Emitters[i].Emit([]string{group}, state))
|
|
49 }
|
|
50 }
|
|
51 }
|
|
52 return Concaterator(iterators...)
|
|
53 }
|
|
54
|
|
55 // ByGroupNames emits a token for each named matching group in the rule's regex.
|
|
56 func ByGroupNames(emitters map[string]Emitter) Emitter {
|
|
57 return EmitterFunc(func(groups []string, state *LexerState) Iterator {
|
|
58 iterators := make([]Iterator, 0, len(state.NamedGroups)-1)
|
|
59 if len(state.NamedGroups)-1 == 0 {
|
|
60 if emitter, ok := emitters[`0`]; ok {
|
|
61 iterators = append(iterators, emitter.Emit(groups, state))
|
|
62 } else {
|
|
63 iterators = append(iterators, Error.Emit(groups, state))
|
|
64 }
|
|
65 } else {
|
|
66 ruleRegex := state.Rules[state.State][state.Rule].Regexp
|
|
67 for i := 1; i < len(state.NamedGroups); i++ {
|
|
68 groupName := ruleRegex.GroupNameFromNumber(i)
|
|
69 group := state.NamedGroups[groupName]
|
|
70 if emitter, ok := emitters[groupName]; ok {
|
|
71 if emitter != nil {
|
|
72 iterators = append(iterators, emitter.Emit([]string{group}, state))
|
|
73 }
|
|
74 } else {
|
|
75 iterators = append(iterators, Error.Emit([]string{group}, state))
|
|
76 }
|
|
77 }
|
|
78 }
|
|
79 return Concaterator(iterators...)
|
|
80 })
|
|
81 }
|
|
82
|
|
83 // UsingByGroup emits tokens for the matched groups in the regex using a
|
|
84 // "sublexer". Used when lexing code blocks where the name of a sublexer is
|
|
85 // contained within the block, for example on a Markdown text block or SQL
|
|
86 // language block.
|
|
87 //
|
|
88 // The sublexer will be retrieved using sublexerGetFunc (typically
|
|
89 // internal.Get), using the captured value from the matched sublexerNameGroup.
|
|
90 //
|
|
91 // If sublexerGetFunc returns a non-nil lexer for the captured sublexerNameGroup,
|
|
92 // then tokens for the matched codeGroup will be emitted using the retrieved
|
|
93 // lexer. Otherwise, if the sublexer is nil, then tokens will be emitted from
|
|
94 // the passed emitter.
|
|
95 //
|
|
96 // Example:
|
|
97 //
|
|
98 // var Markdown = internal.Register(MustNewLexer(
|
|
99 // &Config{
|
|
100 // Name: "markdown",
|
|
101 // Aliases: []string{"md", "mkd"},
|
|
102 // Filenames: []string{"*.md", "*.mkd", "*.markdown"},
|
|
103 // MimeTypes: []string{"text/x-markdown"},
|
|
104 // },
|
|
105 // Rules{
|
|
106 // "root": {
|
|
107 // {"^(```)(\\w+)(\\n)([\\w\\W]*?)(^```$)",
|
|
108 // UsingByGroup(
|
|
109 // internal.Get,
|
|
110 // 2, 4,
|
|
111 // String, String, String, Text, String,
|
|
112 // ),
|
|
113 // nil,
|
|
114 // },
|
|
115 // },
|
|
116 // },
|
|
117 // ))
|
|
118 //
|
|
119 // See the lexers/m/markdown.go for the complete example.
|
|
120 //
|
|
121 // Note: panic's if the number of emitters does not equal the number of matched
|
|
122 // groups in the regex.
|
|
123 func UsingByGroup(sublexerNameGroup, codeGroup int, emitters ...Emitter) Emitter {
|
|
124 return &usingByGroup{
|
|
125 SublexerNameGroup: sublexerNameGroup,
|
|
126 CodeGroup: codeGroup,
|
|
127 Emitters: emitters,
|
|
128 }
|
|
129 }
|
|
130
|
|
131 type usingByGroup struct {
|
|
132 SublexerNameGroup int `xml:"sublexer_name_group"`
|
|
133 CodeGroup int `xml:"code_group"`
|
|
134 Emitters Emitters `xml:"emitters"`
|
|
135 }
|
|
136
|
|
137 func (u *usingByGroup) EmitterKind() string { return "usingbygroup" }
|
|
138 func (u *usingByGroup) Emit(groups []string, state *LexerState) Iterator {
|
|
139 // bounds check
|
|
140 if len(u.Emitters) != len(groups)-1 {
|
|
141 panic("UsingByGroup expects number of emitters to be the same as len(groups)-1")
|
|
142 }
|
|
143
|
|
144 // grab sublexer
|
|
145 sublexer := state.Registry.Get(groups[u.SublexerNameGroup])
|
|
146
|
|
147 // build iterators
|
|
148 iterators := make([]Iterator, len(groups)-1)
|
|
149 for i, group := range groups[1:] {
|
|
150 if i == u.CodeGroup-1 && sublexer != nil {
|
|
151 var err error
|
|
152 iterators[i], err = sublexer.Tokenise(nil, groups[u.CodeGroup])
|
|
153 if err != nil {
|
|
154 panic(err)
|
|
155 }
|
|
156 } else if u.Emitters[i] != nil {
|
|
157 iterators[i] = u.Emitters[i].Emit([]string{group}, state)
|
|
158 }
|
|
159 }
|
|
160 return Concaterator(iterators...)
|
|
161 }
|
|
162
|
|
163 // UsingLexer returns an Emitter that uses a given Lexer for parsing and emitting.
|
|
164 //
|
|
165 // This Emitter is not serialisable.
|
|
166 func UsingLexer(lexer Lexer) Emitter {
|
|
167 return EmitterFunc(func(groups []string, _ *LexerState) Iterator {
|
|
168 it, err := lexer.Tokenise(&TokeniseOptions{State: "root", Nested: true}, groups[0])
|
|
169 if err != nil {
|
|
170 panic(err)
|
|
171 }
|
|
172 return it
|
|
173 })
|
|
174 }
|
|
175
|
|
176 type usingEmitter struct {
|
|
177 Lexer string `xml:"lexer,attr"`
|
|
178 }
|
|
179
|
|
180 func (u *usingEmitter) EmitterKind() string { return "using" }
|
|
181
|
|
182 func (u *usingEmitter) Emit(groups []string, state *LexerState) Iterator {
|
|
183 if state.Registry == nil {
|
|
184 panic(fmt.Sprintf("no LexerRegistry available for Using(%q)", u.Lexer))
|
|
185 }
|
|
186 lexer := state.Registry.Get(u.Lexer)
|
|
187 if lexer == nil {
|
|
188 panic(fmt.Sprintf("no such lexer %q", u.Lexer))
|
|
189 }
|
|
190 it, err := lexer.Tokenise(&TokeniseOptions{State: "root", Nested: true}, groups[0])
|
|
191 if err != nil {
|
|
192 panic(err)
|
|
193 }
|
|
194 return it
|
|
195 }
|
|
196
|
|
197 // Using returns an Emitter that uses a given Lexer reference for parsing and emitting.
|
|
198 //
|
|
199 // The referenced lexer must be stored in the same LexerRegistry.
|
|
200 func Using(lexer string) Emitter {
|
|
201 return &usingEmitter{Lexer: lexer}
|
|
202 }
|
|
203
|
|
204 type usingSelfEmitter struct {
|
|
205 State string `xml:"state,attr"`
|
|
206 }
|
|
207
|
|
208 func (u *usingSelfEmitter) EmitterKind() string { return "usingself" }
|
|
209
|
|
210 func (u *usingSelfEmitter) Emit(groups []string, state *LexerState) Iterator {
|
|
211 it, err := state.Lexer.Tokenise(&TokeniseOptions{State: u.State, Nested: true}, groups[0])
|
|
212 if err != nil {
|
|
213 panic(err)
|
|
214 }
|
|
215 return it
|
|
216 }
|
|
217
|
|
218 // UsingSelf is like Using, but uses the current Lexer.
|
|
219 func UsingSelf(stateName string) Emitter {
|
|
220 return &usingSelfEmitter{stateName}
|
|
221 }
|