66
|
1 package chroma
|
|
2
|
|
3 import (
|
|
4 "fmt"
|
|
5 "strings"
|
|
6 )
|
|
7
|
|
8 var (
|
|
9 defaultOptions = &TokeniseOptions{
|
|
10 State: "root",
|
|
11 EnsureLF: true,
|
|
12 }
|
|
13 )
|
|
14
|
|
15 // Config for a lexer.
|
|
16 type Config struct {
|
|
17 // Name of the lexer.
|
|
18 Name string `xml:"name,omitempty"`
|
|
19
|
|
20 // Shortcuts for the lexer
|
|
21 Aliases []string `xml:"alias,omitempty"`
|
|
22
|
|
23 // File name globs
|
|
24 Filenames []string `xml:"filename,omitempty"`
|
|
25
|
|
26 // Secondary file name globs
|
|
27 AliasFilenames []string `xml:"alias_filename,omitempty"`
|
|
28
|
|
29 // MIME types
|
|
30 MimeTypes []string `xml:"mime_type,omitempty"`
|
|
31
|
|
32 // Regex matching is case-insensitive.
|
|
33 CaseInsensitive bool `xml:"case_insensitive,omitempty"`
|
|
34
|
|
35 // Regex matches all characters.
|
|
36 DotAll bool `xml:"dot_all,omitempty"`
|
|
37
|
|
38 // Regex does not match across lines ($ matches EOL).
|
|
39 //
|
|
40 // Defaults to multiline.
|
|
41 NotMultiline bool `xml:"not_multiline,omitempty"`
|
|
42
|
|
43 // Don't strip leading and trailing newlines from the input.
|
|
44 // DontStripNL bool
|
|
45
|
|
46 // Strip all leading and trailing whitespace from the input
|
|
47 // StripAll bool
|
|
48
|
|
49 // Make sure that the input ends with a newline. This
|
|
50 // is required for some lexers that consume input linewise.
|
|
51 EnsureNL bool `xml:"ensure_nl,omitempty"`
|
|
52
|
|
53 // If given and greater than 0, expand tabs in the input.
|
|
54 // TabSize int
|
|
55
|
|
56 // Priority of lexer.
|
|
57 //
|
|
58 // If this is 0 it will be treated as a default of 1.
|
|
59 Priority float32 `xml:"priority,omitempty"`
|
|
60 }
|
|
61
|
|
62 // Token output to formatter.
|
|
63 type Token struct {
|
|
64 Type TokenType `json:"type"`
|
|
65 Value string `json:"value"`
|
|
66 }
|
|
67
|
|
68 func (t *Token) String() string { return t.Value }
|
|
69 func (t *Token) GoString() string { return fmt.Sprintf("&Token{%s, %q}", t.Type, t.Value) }
|
|
70
|
|
71 // Clone returns a clone of the Token.
|
|
72 func (t *Token) Clone() Token {
|
|
73 return *t
|
|
74 }
|
|
75
|
|
76 // EOF is returned by lexers at the end of input.
|
|
77 var EOF Token
|
|
78
|
|
79 // TokeniseOptions contains options for tokenisers.
|
|
80 type TokeniseOptions struct {
|
|
81 // State to start tokenisation in. Defaults to "root".
|
|
82 State string
|
|
83 // Nested tokenisation.
|
|
84 Nested bool
|
|
85
|
|
86 // If true, all EOLs are converted into LF
|
|
87 // by replacing CRLF and CR
|
|
88 EnsureLF bool
|
|
89 }
|
|
90
|
|
91 // A Lexer for tokenising source code.
|
|
92 type Lexer interface {
|
|
93 // Config describing the features of the Lexer.
|
|
94 Config() *Config
|
|
95 // Tokenise returns an Iterator over tokens in text.
|
|
96 Tokenise(options *TokeniseOptions, text string) (Iterator, error)
|
|
97 // SetRegistry sets the registry this Lexer is associated with.
|
|
98 //
|
|
99 // The registry should be used by the Lexer if it needs to look up other
|
|
100 // lexers.
|
|
101 SetRegistry(registry *LexerRegistry) Lexer
|
|
102 // SetAnalyser sets a function the Lexer should use for scoring how
|
|
103 // likely a fragment of text is to match this lexer, between 0.0 and 1.0.
|
|
104 // A value of 1 indicates high confidence.
|
|
105 //
|
|
106 // Lexers may ignore this if they implement their own analysers.
|
|
107 SetAnalyser(analyser func(text string) float32) Lexer
|
|
108 // AnalyseText scores how likely a fragment of text is to match
|
|
109 // this lexer, between 0.0 and 1.0. A value of 1 indicates high confidence.
|
|
110 AnalyseText(text string) float32
|
|
111 }
|
|
112
|
|
113 // Lexers is a slice of lexers sortable by name.
|
|
114 type Lexers []Lexer
|
|
115
|
|
116 func (l Lexers) Len() int { return len(l) }
|
|
117 func (l Lexers) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
|
|
118 func (l Lexers) Less(i, j int) bool {
|
|
119 return strings.ToLower(l[i].Config().Name) < strings.ToLower(l[j].Config().Name)
|
|
120 }
|
|
121
|
|
122 // PrioritisedLexers is a slice of lexers sortable by priority.
|
|
123 type PrioritisedLexers []Lexer
|
|
124
|
|
125 func (l PrioritisedLexers) Len() int { return len(l) }
|
|
126 func (l PrioritisedLexers) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
|
|
127 func (l PrioritisedLexers) Less(i, j int) bool {
|
|
128 ip := l[i].Config().Priority
|
|
129 if ip == 0 {
|
|
130 ip = 1
|
|
131 }
|
|
132 jp := l[j].Config().Priority
|
|
133 if jp == 0 {
|
|
134 jp = 1
|
|
135 }
|
|
136 return ip > jp
|
|
137 }
|
|
138
|
|
139 // Analyser determines how appropriate this lexer is for the given text.
|
|
140 type Analyser interface {
|
|
141 AnalyseText(text string) float32
|
|
142 }
|