Mercurial > yakumo_izuru > aya
comparison vendor/github.com/alecthomas/chroma/v2/serialise.go @ 66:787b5ee0289d draft
Use vendored modules
Signed-off-by: Izuru Yakumo <yakumo.izuru@chaotic.ninja>
author | yakumo.izuru |
---|---|
date | Sun, 23 Jul 2023 13:18:53 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
65:6d985efa0f7a | 66:787b5ee0289d |
---|---|
1 package chroma | |
2 | |
3 import ( | |
4 "compress/gzip" | |
5 "encoding/xml" | |
6 "errors" | |
7 "fmt" | |
8 "io" | |
9 "io/fs" | |
10 "path/filepath" | |
11 "reflect" | |
12 "regexp" | |
13 "strings" | |
14 ) | |
15 | |
16 // Serialisation of Chroma rules to XML. The format is: | |
17 // | |
18 // <rules> | |
19 // <state name="$STATE"> | |
20 // <rule [pattern="$PATTERN"]> | |
21 // [<$EMITTER ...>] | |
22 // [<$MUTATOR ...>] | |
23 // </rule> | |
24 // </state> | |
25 // </rules> | |
26 // | |
27 // eg. Include("String") would become: | |
28 // | |
29 // <rule> | |
30 // <include state="String" /> | |
31 // </rule> | |
32 // | |
33 // [null, null, {"kind": "include", "state": "String"}] | |
34 // | |
35 // eg. Rule{`\d+`, Text, nil} would become: | |
36 // | |
37 // <rule pattern="\\d+"> | |
38 // <token type="Text"/> | |
39 // </rule> | |
40 // | |
41 // eg. Rule{`"`, String, Push("String")} | |
42 // | |
43 // <rule pattern="\""> | |
44 // <token type="String" /> | |
45 // <push state="String" /> | |
46 // </rule> | |
47 // | |
48 // eg. Rule{`(\w+)(\n)`, ByGroups(Keyword, Whitespace), nil}, | |
49 // | |
50 // <rule pattern="(\\w+)(\\n)"> | |
51 // <bygroups token="Keyword" token="Whitespace" /> | |
52 // <push state="String" /> | |
53 // </rule> | |
54 var ( | |
55 // ErrNotSerialisable is returned if a lexer contains Rules that cannot be serialised. | |
56 ErrNotSerialisable = fmt.Errorf("not serialisable") | |
57 emitterTemplates = func() map[string]SerialisableEmitter { | |
58 out := map[string]SerialisableEmitter{} | |
59 for _, emitter := range []SerialisableEmitter{ | |
60 &byGroupsEmitter{}, | |
61 &usingSelfEmitter{}, | |
62 TokenType(0), | |
63 &usingEmitter{}, | |
64 &usingByGroup{}, | |
65 } { | |
66 out[emitter.EmitterKind()] = emitter | |
67 } | |
68 return out | |
69 }() | |
70 mutatorTemplates = func() map[string]SerialisableMutator { | |
71 out := map[string]SerialisableMutator{} | |
72 for _, mutator := range []SerialisableMutator{ | |
73 &includeMutator{}, | |
74 &combinedMutator{}, | |
75 &multiMutator{}, | |
76 &pushMutator{}, | |
77 &popMutator{}, | |
78 } { | |
79 out[mutator.MutatorKind()] = mutator | |
80 } | |
81 return out | |
82 }() | |
83 ) | |
84 | |
85 // fastUnmarshalConfig unmarshals only the Config from a serialised lexer. | |
86 func fastUnmarshalConfig(from fs.FS, path string) (*Config, error) { | |
87 r, err := from.Open(path) | |
88 if err != nil { | |
89 return nil, err | |
90 } | |
91 defer r.Close() | |
92 dec := xml.NewDecoder(r) | |
93 for { | |
94 token, err := dec.Token() | |
95 if err != nil { | |
96 if errors.Is(err, io.EOF) { | |
97 return nil, fmt.Errorf("could not find <config> element") | |
98 } | |
99 return nil, err | |
100 } | |
101 switch se := token.(type) { | |
102 case xml.StartElement: | |
103 if se.Name.Local != "config" { | |
104 break | |
105 } | |
106 | |
107 var config Config | |
108 err = dec.DecodeElement(&config, &se) | |
109 if err != nil { | |
110 panic(err) | |
111 } | |
112 return &config, nil | |
113 } | |
114 } | |
115 } | |
116 | |
117 // MustNewXMLLexer constructs a new RegexLexer from an XML file or panics. | |
118 func MustNewXMLLexer(from fs.FS, path string) *RegexLexer { | |
119 lex, err := NewXMLLexer(from, path) | |
120 if err != nil { | |
121 panic(err) | |
122 } | |
123 return lex | |
124 } | |
125 | |
126 // NewXMLLexer creates a new RegexLexer from a serialised RegexLexer. | |
127 func NewXMLLexer(from fs.FS, path string) (*RegexLexer, error) { | |
128 config, err := fastUnmarshalConfig(from, path) | |
129 if err != nil { | |
130 return nil, err | |
131 } | |
132 for _, glob := range append(config.Filenames, config.AliasFilenames...) { | |
133 _, err := filepath.Match(glob, "") | |
134 if err != nil { | |
135 return nil, fmt.Errorf("%s: %q is not a valid glob: %w", config.Name, glob, err) | |
136 } | |
137 } | |
138 return &RegexLexer{ | |
139 config: config, | |
140 fetchRulesFunc: func() (Rules, error) { | |
141 var lexer struct { | |
142 Config | |
143 Rules Rules `xml:"rules"` | |
144 } | |
145 // Try to open .xml fallback to .xml.gz | |
146 fr, err := from.Open(path) | |
147 if err != nil { | |
148 if errors.Is(err, fs.ErrNotExist) { | |
149 path += ".gz" | |
150 fr, err = from.Open(path) | |
151 if err != nil { | |
152 return nil, err | |
153 } | |
154 } else { | |
155 return nil, err | |
156 } | |
157 } | |
158 defer fr.Close() | |
159 var r io.Reader = fr | |
160 if strings.HasSuffix(path, ".gz") { | |
161 r, err = gzip.NewReader(r) | |
162 if err != nil { | |
163 return nil, fmt.Errorf("%s: %w", path, err) | |
164 } | |
165 } | |
166 err = xml.NewDecoder(r).Decode(&lexer) | |
167 if err != nil { | |
168 return nil, fmt.Errorf("%s: %w", path, err) | |
169 } | |
170 return lexer.Rules, nil | |
171 }, | |
172 }, nil | |
173 } | |
174 | |
175 // Marshal a RegexLexer to XML. | |
176 func Marshal(l *RegexLexer) ([]byte, error) { | |
177 type lexer struct { | |
178 Config Config `xml:"config"` | |
179 Rules Rules `xml:"rules"` | |
180 } | |
181 | |
182 rules, err := l.Rules() | |
183 if err != nil { | |
184 return nil, err | |
185 } | |
186 root := &lexer{ | |
187 Config: *l.Config(), | |
188 Rules: rules, | |
189 } | |
190 data, err := xml.MarshalIndent(root, "", " ") | |
191 if err != nil { | |
192 return nil, err | |
193 } | |
194 re := regexp.MustCompile(`></[a-zA-Z]+>`) | |
195 data = re.ReplaceAll(data, []byte(`/>`)) | |
196 return data, nil | |
197 } | |
198 | |
199 // Unmarshal a RegexLexer from XML. | |
200 func Unmarshal(data []byte) (*RegexLexer, error) { | |
201 type lexer struct { | |
202 Config Config `xml:"config"` | |
203 Rules Rules `xml:"rules"` | |
204 } | |
205 root := &lexer{} | |
206 err := xml.Unmarshal(data, root) | |
207 if err != nil { | |
208 return nil, fmt.Errorf("invalid Lexer XML: %w", err) | |
209 } | |
210 lex, err := NewLexer(&root.Config, func() Rules { return root.Rules }) | |
211 if err != nil { | |
212 return nil, err | |
213 } | |
214 return lex, nil | |
215 } | |
216 | |
217 func marshalMutator(e *xml.Encoder, mutator Mutator) error { | |
218 if mutator == nil { | |
219 return nil | |
220 } | |
221 smutator, ok := mutator.(SerialisableMutator) | |
222 if !ok { | |
223 return fmt.Errorf("unsupported mutator: %w", ErrNotSerialisable) | |
224 } | |
225 return e.EncodeElement(mutator, xml.StartElement{Name: xml.Name{Local: smutator.MutatorKind()}}) | |
226 } | |
227 | |
228 func unmarshalMutator(d *xml.Decoder, start xml.StartElement) (Mutator, error) { | |
229 kind := start.Name.Local | |
230 mutator, ok := mutatorTemplates[kind] | |
231 if !ok { | |
232 return nil, fmt.Errorf("unknown mutator %q: %w", kind, ErrNotSerialisable) | |
233 } | |
234 value, target := newFromTemplate(mutator) | |
235 if err := d.DecodeElement(target, &start); err != nil { | |
236 return nil, err | |
237 } | |
238 return value().(SerialisableMutator), nil | |
239 } | |
240 | |
241 func marshalEmitter(e *xml.Encoder, emitter Emitter) error { | |
242 if emitter == nil { | |
243 return nil | |
244 } | |
245 semitter, ok := emitter.(SerialisableEmitter) | |
246 if !ok { | |
247 return fmt.Errorf("unsupported emitter %T: %w", emitter, ErrNotSerialisable) | |
248 } | |
249 return e.EncodeElement(emitter, xml.StartElement{ | |
250 Name: xml.Name{Local: semitter.EmitterKind()}, | |
251 }) | |
252 } | |
253 | |
254 func unmarshalEmitter(d *xml.Decoder, start xml.StartElement) (Emitter, error) { | |
255 kind := start.Name.Local | |
256 mutator, ok := emitterTemplates[kind] | |
257 if !ok { | |
258 return nil, fmt.Errorf("unknown emitter %q: %w", kind, ErrNotSerialisable) | |
259 } | |
260 value, target := newFromTemplate(mutator) | |
261 if err := d.DecodeElement(target, &start); err != nil { | |
262 return nil, err | |
263 } | |
264 return value().(SerialisableEmitter), nil | |
265 } | |
266 | |
267 func (r Rule) MarshalXML(e *xml.Encoder, _ xml.StartElement) error { | |
268 start := xml.StartElement{ | |
269 Name: xml.Name{Local: "rule"}, | |
270 } | |
271 if r.Pattern != "" { | |
272 start.Attr = append(start.Attr, xml.Attr{ | |
273 Name: xml.Name{Local: "pattern"}, | |
274 Value: r.Pattern, | |
275 }) | |
276 } | |
277 if err := e.EncodeToken(start); err != nil { | |
278 return err | |
279 } | |
280 if err := marshalEmitter(e, r.Type); err != nil { | |
281 return err | |
282 } | |
283 if err := marshalMutator(e, r.Mutator); err != nil { | |
284 return err | |
285 } | |
286 return e.EncodeToken(xml.EndElement{Name: start.Name}) | |
287 } | |
288 | |
289 func (r *Rule) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { | |
290 for _, attr := range start.Attr { | |
291 if attr.Name.Local == "pattern" { | |
292 r.Pattern = attr.Value | |
293 break | |
294 } | |
295 } | |
296 for { | |
297 token, err := d.Token() | |
298 if err != nil { | |
299 return err | |
300 } | |
301 switch token := token.(type) { | |
302 case xml.StartElement: | |
303 mutator, err := unmarshalMutator(d, token) | |
304 if err != nil && !errors.Is(err, ErrNotSerialisable) { | |
305 return err | |
306 } else if err == nil { | |
307 if r.Mutator != nil { | |
308 return fmt.Errorf("duplicate mutator") | |
309 } | |
310 r.Mutator = mutator | |
311 continue | |
312 } | |
313 emitter, err := unmarshalEmitter(d, token) | |
314 if err != nil && !errors.Is(err, ErrNotSerialisable) { // nolint: gocritic | |
315 return err | |
316 } else if err == nil { | |
317 if r.Type != nil { | |
318 return fmt.Errorf("duplicate emitter") | |
319 } | |
320 r.Type = emitter | |
321 continue | |
322 } else { | |
323 return err | |
324 } | |
325 | |
326 case xml.EndElement: | |
327 return nil | |
328 } | |
329 } | |
330 } | |
331 | |
332 type xmlRuleState struct { | |
333 Name string `xml:"name,attr"` | |
334 Rules []Rule `xml:"rule"` | |
335 } | |
336 | |
337 type xmlRules struct { | |
338 States []xmlRuleState `xml:"state"` | |
339 } | |
340 | |
341 func (r Rules) MarshalXML(e *xml.Encoder, _ xml.StartElement) error { | |
342 xr := xmlRules{} | |
343 for state, rules := range r { | |
344 xr.States = append(xr.States, xmlRuleState{ | |
345 Name: state, | |
346 Rules: rules, | |
347 }) | |
348 } | |
349 return e.EncodeElement(xr, xml.StartElement{Name: xml.Name{Local: "rules"}}) | |
350 } | |
351 | |
352 func (r *Rules) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { | |
353 xr := xmlRules{} | |
354 if err := d.DecodeElement(&xr, &start); err != nil { | |
355 return err | |
356 } | |
357 if *r == nil { | |
358 *r = Rules{} | |
359 } | |
360 for _, state := range xr.States { | |
361 (*r)[state.Name] = state.Rules | |
362 } | |
363 return nil | |
364 } | |
365 | |
366 type xmlTokenType struct { | |
367 Type string `xml:"type,attr"` | |
368 } | |
369 | |
370 func (t *TokenType) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { | |
371 el := xmlTokenType{} | |
372 if err := d.DecodeElement(&el, &start); err != nil { | |
373 return err | |
374 } | |
375 for tt, text := range _TokenType_map { | |
376 if text == el.Type { | |
377 *t = tt | |
378 return nil | |
379 } | |
380 } | |
381 return fmt.Errorf("unknown TokenType %q", el.Type) | |
382 } | |
383 | |
384 func (t TokenType) MarshalXML(e *xml.Encoder, start xml.StartElement) error { | |
385 start.Attr = append(start.Attr, xml.Attr{Name: xml.Name{Local: "type"}, Value: t.String()}) | |
386 if err := e.EncodeToken(start); err != nil { | |
387 return err | |
388 } | |
389 return e.EncodeToken(xml.EndElement{Name: start.Name}) | |
390 } | |
391 | |
392 // This hijinks is a bit unfortunate but without it we can't deserialise into TokenType. | |
393 func newFromTemplate(template interface{}) (value func() interface{}, target interface{}) { | |
394 t := reflect.TypeOf(template) | |
395 if t.Kind() == reflect.Ptr { | |
396 v := reflect.New(t.Elem()) | |
397 return v.Interface, v.Interface() | |
398 } | |
399 v := reflect.New(t) | |
400 return func() interface{} { return v.Elem().Interface() }, v.Interface() | |
401 } | |
402 | |
403 func (b *Emitters) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { | |
404 for { | |
405 token, err := d.Token() | |
406 if err != nil { | |
407 return err | |
408 } | |
409 switch token := token.(type) { | |
410 case xml.StartElement: | |
411 emitter, err := unmarshalEmitter(d, token) | |
412 if err != nil { | |
413 return err | |
414 } | |
415 *b = append(*b, emitter) | |
416 | |
417 case xml.EndElement: | |
418 return nil | |
419 } | |
420 } | |
421 } | |
422 | |
423 func (b Emitters) MarshalXML(e *xml.Encoder, start xml.StartElement) error { | |
424 if err := e.EncodeToken(start); err != nil { | |
425 return err | |
426 } | |
427 for _, m := range b { | |
428 if err := marshalEmitter(e, m); err != nil { | |
429 return err | |
430 } | |
431 } | |
432 return e.EncodeToken(xml.EndElement{Name: start.Name}) | |
433 } |