Mercurial > yakumo_izuru > aya
comparison vendor/github.com/alecthomas/chroma/v2/serialise.go @ 66:787b5ee0289d draft
Use vendored modules
Signed-off-by: Izuru Yakumo <yakumo.izuru@chaotic.ninja>
| author | yakumo.izuru |
|---|---|
| date | Sun, 23 Jul 2023 13:18:53 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 65:6d985efa0f7a | 66:787b5ee0289d |
|---|---|
| 1 package chroma | |
| 2 | |
| 3 import ( | |
| 4 "compress/gzip" | |
| 5 "encoding/xml" | |
| 6 "errors" | |
| 7 "fmt" | |
| 8 "io" | |
| 9 "io/fs" | |
| 10 "path/filepath" | |
| 11 "reflect" | |
| 12 "regexp" | |
| 13 "strings" | |
| 14 ) | |
| 15 | |
| 16 // Serialisation of Chroma rules to XML. The format is: | |
| 17 // | |
| 18 // <rules> | |
| 19 // <state name="$STATE"> | |
| 20 // <rule [pattern="$PATTERN"]> | |
| 21 // [<$EMITTER ...>] | |
| 22 // [<$MUTATOR ...>] | |
| 23 // </rule> | |
| 24 // </state> | |
| 25 // </rules> | |
| 26 // | |
| 27 // eg. Include("String") would become: | |
| 28 // | |
| 29 // <rule> | |
| 30 // <include state="String" /> | |
| 31 // </rule> | |
| 32 // | |
| 33 // [null, null, {"kind": "include", "state": "String"}] | |
| 34 // | |
| 35 // eg. Rule{`\d+`, Text, nil} would become: | |
| 36 // | |
| 37 // <rule pattern="\\d+"> | |
| 38 // <token type="Text"/> | |
| 39 // </rule> | |
| 40 // | |
| 41 // eg. Rule{`"`, String, Push("String")} | |
| 42 // | |
| 43 // <rule pattern="\""> | |
| 44 // <token type="String" /> | |
| 45 // <push state="String" /> | |
| 46 // </rule> | |
| 47 // | |
| 48 // eg. Rule{`(\w+)(\n)`, ByGroups(Keyword, Whitespace), nil}, | |
| 49 // | |
| 50 // <rule pattern="(\\w+)(\\n)"> | |
| 51 // <bygroups token="Keyword" token="Whitespace" /> | |
| 52 // <push state="String" /> | |
| 53 // </rule> | |
| 54 var ( | |
| 55 // ErrNotSerialisable is returned if a lexer contains Rules that cannot be serialised. | |
| 56 ErrNotSerialisable = fmt.Errorf("not serialisable") | |
| 57 emitterTemplates = func() map[string]SerialisableEmitter { | |
| 58 out := map[string]SerialisableEmitter{} | |
| 59 for _, emitter := range []SerialisableEmitter{ | |
| 60 &byGroupsEmitter{}, | |
| 61 &usingSelfEmitter{}, | |
| 62 TokenType(0), | |
| 63 &usingEmitter{}, | |
| 64 &usingByGroup{}, | |
| 65 } { | |
| 66 out[emitter.EmitterKind()] = emitter | |
| 67 } | |
| 68 return out | |
| 69 }() | |
| 70 mutatorTemplates = func() map[string]SerialisableMutator { | |
| 71 out := map[string]SerialisableMutator{} | |
| 72 for _, mutator := range []SerialisableMutator{ | |
| 73 &includeMutator{}, | |
| 74 &combinedMutator{}, | |
| 75 &multiMutator{}, | |
| 76 &pushMutator{}, | |
| 77 &popMutator{}, | |
| 78 } { | |
| 79 out[mutator.MutatorKind()] = mutator | |
| 80 } | |
| 81 return out | |
| 82 }() | |
| 83 ) | |
| 84 | |
| 85 // fastUnmarshalConfig unmarshals only the Config from a serialised lexer. | |
| 86 func fastUnmarshalConfig(from fs.FS, path string) (*Config, error) { | |
| 87 r, err := from.Open(path) | |
| 88 if err != nil { | |
| 89 return nil, err | |
| 90 } | |
| 91 defer r.Close() | |
| 92 dec := xml.NewDecoder(r) | |
| 93 for { | |
| 94 token, err := dec.Token() | |
| 95 if err != nil { | |
| 96 if errors.Is(err, io.EOF) { | |
| 97 return nil, fmt.Errorf("could not find <config> element") | |
| 98 } | |
| 99 return nil, err | |
| 100 } | |
| 101 switch se := token.(type) { | |
| 102 case xml.StartElement: | |
| 103 if se.Name.Local != "config" { | |
| 104 break | |
| 105 } | |
| 106 | |
| 107 var config Config | |
| 108 err = dec.DecodeElement(&config, &se) | |
| 109 if err != nil { | |
| 110 panic(err) | |
| 111 } | |
| 112 return &config, nil | |
| 113 } | |
| 114 } | |
| 115 } | |
| 116 | |
| 117 // MustNewXMLLexer constructs a new RegexLexer from an XML file or panics. | |
| 118 func MustNewXMLLexer(from fs.FS, path string) *RegexLexer { | |
| 119 lex, err := NewXMLLexer(from, path) | |
| 120 if err != nil { | |
| 121 panic(err) | |
| 122 } | |
| 123 return lex | |
| 124 } | |
| 125 | |
| 126 // NewXMLLexer creates a new RegexLexer from a serialised RegexLexer. | |
| 127 func NewXMLLexer(from fs.FS, path string) (*RegexLexer, error) { | |
| 128 config, err := fastUnmarshalConfig(from, path) | |
| 129 if err != nil { | |
| 130 return nil, err | |
| 131 } | |
| 132 for _, glob := range append(config.Filenames, config.AliasFilenames...) { | |
| 133 _, err := filepath.Match(glob, "") | |
| 134 if err != nil { | |
| 135 return nil, fmt.Errorf("%s: %q is not a valid glob: %w", config.Name, glob, err) | |
| 136 } | |
| 137 } | |
| 138 return &RegexLexer{ | |
| 139 config: config, | |
| 140 fetchRulesFunc: func() (Rules, error) { | |
| 141 var lexer struct { | |
| 142 Config | |
| 143 Rules Rules `xml:"rules"` | |
| 144 } | |
| 145 // Try to open .xml fallback to .xml.gz | |
| 146 fr, err := from.Open(path) | |
| 147 if err != nil { | |
| 148 if errors.Is(err, fs.ErrNotExist) { | |
| 149 path += ".gz" | |
| 150 fr, err = from.Open(path) | |
| 151 if err != nil { | |
| 152 return nil, err | |
| 153 } | |
| 154 } else { | |
| 155 return nil, err | |
| 156 } | |
| 157 } | |
| 158 defer fr.Close() | |
| 159 var r io.Reader = fr | |
| 160 if strings.HasSuffix(path, ".gz") { | |
| 161 r, err = gzip.NewReader(r) | |
| 162 if err != nil { | |
| 163 return nil, fmt.Errorf("%s: %w", path, err) | |
| 164 } | |
| 165 } | |
| 166 err = xml.NewDecoder(r).Decode(&lexer) | |
| 167 if err != nil { | |
| 168 return nil, fmt.Errorf("%s: %w", path, err) | |
| 169 } | |
| 170 return lexer.Rules, nil | |
| 171 }, | |
| 172 }, nil | |
| 173 } | |
| 174 | |
| 175 // Marshal a RegexLexer to XML. | |
| 176 func Marshal(l *RegexLexer) ([]byte, error) { | |
| 177 type lexer struct { | |
| 178 Config Config `xml:"config"` | |
| 179 Rules Rules `xml:"rules"` | |
| 180 } | |
| 181 | |
| 182 rules, err := l.Rules() | |
| 183 if err != nil { | |
| 184 return nil, err | |
| 185 } | |
| 186 root := &lexer{ | |
| 187 Config: *l.Config(), | |
| 188 Rules: rules, | |
| 189 } | |
| 190 data, err := xml.MarshalIndent(root, "", " ") | |
| 191 if err != nil { | |
| 192 return nil, err | |
| 193 } | |
| 194 re := regexp.MustCompile(`></[a-zA-Z]+>`) | |
| 195 data = re.ReplaceAll(data, []byte(`/>`)) | |
| 196 return data, nil | |
| 197 } | |
| 198 | |
| 199 // Unmarshal a RegexLexer from XML. | |
| 200 func Unmarshal(data []byte) (*RegexLexer, error) { | |
| 201 type lexer struct { | |
| 202 Config Config `xml:"config"` | |
| 203 Rules Rules `xml:"rules"` | |
| 204 } | |
| 205 root := &lexer{} | |
| 206 err := xml.Unmarshal(data, root) | |
| 207 if err != nil { | |
| 208 return nil, fmt.Errorf("invalid Lexer XML: %w", err) | |
| 209 } | |
| 210 lex, err := NewLexer(&root.Config, func() Rules { return root.Rules }) | |
| 211 if err != nil { | |
| 212 return nil, err | |
| 213 } | |
| 214 return lex, nil | |
| 215 } | |
| 216 | |
| 217 func marshalMutator(e *xml.Encoder, mutator Mutator) error { | |
| 218 if mutator == nil { | |
| 219 return nil | |
| 220 } | |
| 221 smutator, ok := mutator.(SerialisableMutator) | |
| 222 if !ok { | |
| 223 return fmt.Errorf("unsupported mutator: %w", ErrNotSerialisable) | |
| 224 } | |
| 225 return e.EncodeElement(mutator, xml.StartElement{Name: xml.Name{Local: smutator.MutatorKind()}}) | |
| 226 } | |
| 227 | |
| 228 func unmarshalMutator(d *xml.Decoder, start xml.StartElement) (Mutator, error) { | |
| 229 kind := start.Name.Local | |
| 230 mutator, ok := mutatorTemplates[kind] | |
| 231 if !ok { | |
| 232 return nil, fmt.Errorf("unknown mutator %q: %w", kind, ErrNotSerialisable) | |
| 233 } | |
| 234 value, target := newFromTemplate(mutator) | |
| 235 if err := d.DecodeElement(target, &start); err != nil { | |
| 236 return nil, err | |
| 237 } | |
| 238 return value().(SerialisableMutator), nil | |
| 239 } | |
| 240 | |
| 241 func marshalEmitter(e *xml.Encoder, emitter Emitter) error { | |
| 242 if emitter == nil { | |
| 243 return nil | |
| 244 } | |
| 245 semitter, ok := emitter.(SerialisableEmitter) | |
| 246 if !ok { | |
| 247 return fmt.Errorf("unsupported emitter %T: %w", emitter, ErrNotSerialisable) | |
| 248 } | |
| 249 return e.EncodeElement(emitter, xml.StartElement{ | |
| 250 Name: xml.Name{Local: semitter.EmitterKind()}, | |
| 251 }) | |
| 252 } | |
| 253 | |
| 254 func unmarshalEmitter(d *xml.Decoder, start xml.StartElement) (Emitter, error) { | |
| 255 kind := start.Name.Local | |
| 256 mutator, ok := emitterTemplates[kind] | |
| 257 if !ok { | |
| 258 return nil, fmt.Errorf("unknown emitter %q: %w", kind, ErrNotSerialisable) | |
| 259 } | |
| 260 value, target := newFromTemplate(mutator) | |
| 261 if err := d.DecodeElement(target, &start); err != nil { | |
| 262 return nil, err | |
| 263 } | |
| 264 return value().(SerialisableEmitter), nil | |
| 265 } | |
| 266 | |
| 267 func (r Rule) MarshalXML(e *xml.Encoder, _ xml.StartElement) error { | |
| 268 start := xml.StartElement{ | |
| 269 Name: xml.Name{Local: "rule"}, | |
| 270 } | |
| 271 if r.Pattern != "" { | |
| 272 start.Attr = append(start.Attr, xml.Attr{ | |
| 273 Name: xml.Name{Local: "pattern"}, | |
| 274 Value: r.Pattern, | |
| 275 }) | |
| 276 } | |
| 277 if err := e.EncodeToken(start); err != nil { | |
| 278 return err | |
| 279 } | |
| 280 if err := marshalEmitter(e, r.Type); err != nil { | |
| 281 return err | |
| 282 } | |
| 283 if err := marshalMutator(e, r.Mutator); err != nil { | |
| 284 return err | |
| 285 } | |
| 286 return e.EncodeToken(xml.EndElement{Name: start.Name}) | |
| 287 } | |
| 288 | |
| 289 func (r *Rule) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { | |
| 290 for _, attr := range start.Attr { | |
| 291 if attr.Name.Local == "pattern" { | |
| 292 r.Pattern = attr.Value | |
| 293 break | |
| 294 } | |
| 295 } | |
| 296 for { | |
| 297 token, err := d.Token() | |
| 298 if err != nil { | |
| 299 return err | |
| 300 } | |
| 301 switch token := token.(type) { | |
| 302 case xml.StartElement: | |
| 303 mutator, err := unmarshalMutator(d, token) | |
| 304 if err != nil && !errors.Is(err, ErrNotSerialisable) { | |
| 305 return err | |
| 306 } else if err == nil { | |
| 307 if r.Mutator != nil { | |
| 308 return fmt.Errorf("duplicate mutator") | |
| 309 } | |
| 310 r.Mutator = mutator | |
| 311 continue | |
| 312 } | |
| 313 emitter, err := unmarshalEmitter(d, token) | |
| 314 if err != nil && !errors.Is(err, ErrNotSerialisable) { // nolint: gocritic | |
| 315 return err | |
| 316 } else if err == nil { | |
| 317 if r.Type != nil { | |
| 318 return fmt.Errorf("duplicate emitter") | |
| 319 } | |
| 320 r.Type = emitter | |
| 321 continue | |
| 322 } else { | |
| 323 return err | |
| 324 } | |
| 325 | |
| 326 case xml.EndElement: | |
| 327 return nil | |
| 328 } | |
| 329 } | |
| 330 } | |
| 331 | |
| 332 type xmlRuleState struct { | |
| 333 Name string `xml:"name,attr"` | |
| 334 Rules []Rule `xml:"rule"` | |
| 335 } | |
| 336 | |
| 337 type xmlRules struct { | |
| 338 States []xmlRuleState `xml:"state"` | |
| 339 } | |
| 340 | |
| 341 func (r Rules) MarshalXML(e *xml.Encoder, _ xml.StartElement) error { | |
| 342 xr := xmlRules{} | |
| 343 for state, rules := range r { | |
| 344 xr.States = append(xr.States, xmlRuleState{ | |
| 345 Name: state, | |
| 346 Rules: rules, | |
| 347 }) | |
| 348 } | |
| 349 return e.EncodeElement(xr, xml.StartElement{Name: xml.Name{Local: "rules"}}) | |
| 350 } | |
| 351 | |
| 352 func (r *Rules) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { | |
| 353 xr := xmlRules{} | |
| 354 if err := d.DecodeElement(&xr, &start); err != nil { | |
| 355 return err | |
| 356 } | |
| 357 if *r == nil { | |
| 358 *r = Rules{} | |
| 359 } | |
| 360 for _, state := range xr.States { | |
| 361 (*r)[state.Name] = state.Rules | |
| 362 } | |
| 363 return nil | |
| 364 } | |
| 365 | |
| 366 type xmlTokenType struct { | |
| 367 Type string `xml:"type,attr"` | |
| 368 } | |
| 369 | |
| 370 func (t *TokenType) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { | |
| 371 el := xmlTokenType{} | |
| 372 if err := d.DecodeElement(&el, &start); err != nil { | |
| 373 return err | |
| 374 } | |
| 375 for tt, text := range _TokenType_map { | |
| 376 if text == el.Type { | |
| 377 *t = tt | |
| 378 return nil | |
| 379 } | |
| 380 } | |
| 381 return fmt.Errorf("unknown TokenType %q", el.Type) | |
| 382 } | |
| 383 | |
| 384 func (t TokenType) MarshalXML(e *xml.Encoder, start xml.StartElement) error { | |
| 385 start.Attr = append(start.Attr, xml.Attr{Name: xml.Name{Local: "type"}, Value: t.String()}) | |
| 386 if err := e.EncodeToken(start); err != nil { | |
| 387 return err | |
| 388 } | |
| 389 return e.EncodeToken(xml.EndElement{Name: start.Name}) | |
| 390 } | |
| 391 | |
| 392 // This hijinks is a bit unfortunate but without it we can't deserialise into TokenType. | |
| 393 func newFromTemplate(template interface{}) (value func() interface{}, target interface{}) { | |
| 394 t := reflect.TypeOf(template) | |
| 395 if t.Kind() == reflect.Ptr { | |
| 396 v := reflect.New(t.Elem()) | |
| 397 return v.Interface, v.Interface() | |
| 398 } | |
| 399 v := reflect.New(t) | |
| 400 return func() interface{} { return v.Elem().Interface() }, v.Interface() | |
| 401 } | |
| 402 | |
| 403 func (b *Emitters) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { | |
| 404 for { | |
| 405 token, err := d.Token() | |
| 406 if err != nil { | |
| 407 return err | |
| 408 } | |
| 409 switch token := token.(type) { | |
| 410 case xml.StartElement: | |
| 411 emitter, err := unmarshalEmitter(d, token) | |
| 412 if err != nil { | |
| 413 return err | |
| 414 } | |
| 415 *b = append(*b, emitter) | |
| 416 | |
| 417 case xml.EndElement: | |
| 418 return nil | |
| 419 } | |
| 420 } | |
| 421 } | |
| 422 | |
| 423 func (b Emitters) MarshalXML(e *xml.Encoder, start xml.StartElement) error { | |
| 424 if err := e.EncodeToken(start); err != nil { | |
| 425 return err | |
| 426 } | |
| 427 for _, m := range b { | |
| 428 if err := marshalEmitter(e, m); err != nil { | |
| 429 return err | |
| 430 } | |
| 431 } | |
| 432 return e.EncodeToken(xml.EndElement{Name: start.Name}) | |
| 433 } |
