Mercurial > yakumo_izuru > aya
comparison vendor/github.com/dlclark/regexp2/regexp.go @ 66:787b5ee0289d draft
Use vendored modules
Signed-off-by: Izuru Yakumo <yakumo.izuru@chaotic.ninja>
author | yakumo.izuru |
---|---|
date | Sun, 23 Jul 2023 13:18:53 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
65:6d985efa0f7a | 66:787b5ee0289d |
---|---|
1 /* | |
2 Package regexp2 is a regexp package that has an interface similar to Go's framework regexp engine but uses a | |
3 more feature full regex engine behind the scenes. | |
4 | |
5 It doesn't have constant time guarantees, but it allows backtracking and is compatible with Perl5 and .NET. | |
6 You'll likely be better off with the RE2 engine from the regexp package and should only use this if you | |
7 need to write very complex patterns or require compatibility with .NET. | |
8 */ | |
9 package regexp2 | |
10 | |
11 import ( | |
12 "errors" | |
13 "math" | |
14 "strconv" | |
15 "sync" | |
16 "time" | |
17 | |
18 "github.com/dlclark/regexp2/syntax" | |
19 ) | |
20 | |
21 // Default timeout used when running regexp matches -- "forever" | |
22 var DefaultMatchTimeout = time.Duration(math.MaxInt64) | |
23 | |
24 // Regexp is the representation of a compiled regular expression. | |
25 // A Regexp is safe for concurrent use by multiple goroutines. | |
26 type Regexp struct { | |
27 //timeout when trying to find matches | |
28 MatchTimeout time.Duration | |
29 | |
30 // read-only after Compile | |
31 pattern string // as passed to Compile | |
32 options RegexOptions // options | |
33 | |
34 caps map[int]int // capnum->index | |
35 capnames map[string]int //capture group name -> index | |
36 capslist []string //sorted list of capture group names | |
37 capsize int // size of the capture array | |
38 | |
39 code *syntax.Code // compiled program | |
40 | |
41 // cache of machines for running regexp | |
42 muRun sync.Mutex | |
43 runner []*runner | |
44 } | |
45 | |
46 // Compile parses a regular expression and returns, if successful, | |
47 // a Regexp object that can be used to match against text. | |
48 func Compile(expr string, opt RegexOptions) (*Regexp, error) { | |
49 // parse it | |
50 tree, err := syntax.Parse(expr, syntax.RegexOptions(opt)) | |
51 if err != nil { | |
52 return nil, err | |
53 } | |
54 | |
55 // translate it to code | |
56 code, err := syntax.Write(tree) | |
57 if err != nil { | |
58 return nil, err | |
59 } | |
60 | |
61 // return it | |
62 return &Regexp{ | |
63 pattern: expr, | |
64 options: opt, | |
65 caps: code.Caps, | |
66 capnames: tree.Capnames, | |
67 capslist: tree.Caplist, | |
68 capsize: code.Capsize, | |
69 code: code, | |
70 MatchTimeout: DefaultMatchTimeout, | |
71 }, nil | |
72 } | |
73 | |
74 // MustCompile is like Compile but panics if the expression cannot be parsed. | |
75 // It simplifies safe initialization of global variables holding compiled regular | |
76 // expressions. | |
77 func MustCompile(str string, opt RegexOptions) *Regexp { | |
78 regexp, error := Compile(str, opt) | |
79 if error != nil { | |
80 panic(`regexp2: Compile(` + quote(str) + `): ` + error.Error()) | |
81 } | |
82 return regexp | |
83 } | |
84 | |
85 // Escape adds backslashes to any special characters in the input string | |
86 func Escape(input string) string { | |
87 return syntax.Escape(input) | |
88 } | |
89 | |
90 // Unescape removes any backslashes from previously-escaped special characters in the input string | |
91 func Unescape(input string) (string, error) { | |
92 return syntax.Unescape(input) | |
93 } | |
94 | |
95 // String returns the source text used to compile the regular expression. | |
96 func (re *Regexp) String() string { | |
97 return re.pattern | |
98 } | |
99 | |
100 func quote(s string) string { | |
101 if strconv.CanBackquote(s) { | |
102 return "`" + s + "`" | |
103 } | |
104 return strconv.Quote(s) | |
105 } | |
106 | |
107 // RegexOptions impact the runtime and parsing behavior | |
108 // for each specific regex. They are setable in code as well | |
109 // as in the regex pattern itself. | |
110 type RegexOptions int32 | |
111 | |
112 const ( | |
113 None RegexOptions = 0x0 | |
114 IgnoreCase = 0x0001 // "i" | |
115 Multiline = 0x0002 // "m" | |
116 ExplicitCapture = 0x0004 // "n" | |
117 Compiled = 0x0008 // "c" | |
118 Singleline = 0x0010 // "s" | |
119 IgnorePatternWhitespace = 0x0020 // "x" | |
120 RightToLeft = 0x0040 // "r" | |
121 Debug = 0x0080 // "d" | |
122 ECMAScript = 0x0100 // "e" | |
123 RE2 = 0x0200 // RE2 (regexp package) compatibility mode | |
124 ) | |
125 | |
126 func (re *Regexp) RightToLeft() bool { | |
127 return re.options&RightToLeft != 0 | |
128 } | |
129 | |
130 func (re *Regexp) Debug() bool { | |
131 return re.options&Debug != 0 | |
132 } | |
133 | |
134 // Replace searches the input string and replaces each match found with the replacement text. | |
135 // Count will limit the number of matches attempted and startAt will allow | |
136 // us to skip past possible matches at the start of the input (left or right depending on RightToLeft option). | |
137 // Set startAt and count to -1 to go through the whole string | |
138 func (re *Regexp) Replace(input, replacement string, startAt, count int) (string, error) { | |
139 data, err := syntax.NewReplacerData(replacement, re.caps, re.capsize, re.capnames, syntax.RegexOptions(re.options)) | |
140 if err != nil { | |
141 return "", err | |
142 } | |
143 //TODO: cache ReplacerData | |
144 | |
145 return replace(re, data, nil, input, startAt, count) | |
146 } | |
147 | |
148 // ReplaceFunc searches the input string and replaces each match found using the string from the evaluator | |
149 // Count will limit the number of matches attempted and startAt will allow | |
150 // us to skip past possible matches at the start of the input (left or right depending on RightToLeft option). | |
151 // Set startAt and count to -1 to go through the whole string. | |
152 func (re *Regexp) ReplaceFunc(input string, evaluator MatchEvaluator, startAt, count int) (string, error) { | |
153 return replace(re, nil, evaluator, input, startAt, count) | |
154 } | |
155 | |
156 // FindStringMatch searches the input string for a Regexp match | |
157 func (re *Regexp) FindStringMatch(s string) (*Match, error) { | |
158 // convert string to runes | |
159 return re.run(false, -1, getRunes(s)) | |
160 } | |
161 | |
162 // FindRunesMatch searches the input rune slice for a Regexp match | |
163 func (re *Regexp) FindRunesMatch(r []rune) (*Match, error) { | |
164 return re.run(false, -1, r) | |
165 } | |
166 | |
167 // FindStringMatchStartingAt searches the input string for a Regexp match starting at the startAt index | |
168 func (re *Regexp) FindStringMatchStartingAt(s string, startAt int) (*Match, error) { | |
169 if startAt > len(s) { | |
170 return nil, errors.New("startAt must be less than the length of the input string") | |
171 } | |
172 r, startAt := re.getRunesAndStart(s, startAt) | |
173 if startAt == -1 { | |
174 // we didn't find our start index in the string -- that's a problem | |
175 return nil, errors.New("startAt must align to the start of a valid rune in the input string") | |
176 } | |
177 | |
178 return re.run(false, startAt, r) | |
179 } | |
180 | |
181 // FindRunesMatchStartingAt searches the input rune slice for a Regexp match starting at the startAt index | |
182 func (re *Regexp) FindRunesMatchStartingAt(r []rune, startAt int) (*Match, error) { | |
183 return re.run(false, startAt, r) | |
184 } | |
185 | |
186 // FindNextMatch returns the next match in the same input string as the match parameter. | |
187 // Will return nil if there is no next match or if given a nil match. | |
188 func (re *Regexp) FindNextMatch(m *Match) (*Match, error) { | |
189 if m == nil { | |
190 return nil, nil | |
191 } | |
192 | |
193 // If previous match was empty, advance by one before matching to prevent | |
194 // infinite loop | |
195 startAt := m.textpos | |
196 if m.Length == 0 { | |
197 if m.textpos == len(m.text) { | |
198 return nil, nil | |
199 } | |
200 | |
201 if re.RightToLeft() { | |
202 startAt-- | |
203 } else { | |
204 startAt++ | |
205 } | |
206 } | |
207 return re.run(false, startAt, m.text) | |
208 } | |
209 | |
210 // MatchString return true if the string matches the regex | |
211 // error will be set if a timeout occurs | |
212 func (re *Regexp) MatchString(s string) (bool, error) { | |
213 m, err := re.run(true, -1, getRunes(s)) | |
214 if err != nil { | |
215 return false, err | |
216 } | |
217 return m != nil, nil | |
218 } | |
219 | |
220 func (re *Regexp) getRunesAndStart(s string, startAt int) ([]rune, int) { | |
221 if startAt < 0 { | |
222 if re.RightToLeft() { | |
223 r := getRunes(s) | |
224 return r, len(r) | |
225 } | |
226 return getRunes(s), 0 | |
227 } | |
228 ret := make([]rune, len(s)) | |
229 i := 0 | |
230 runeIdx := -1 | |
231 for strIdx, r := range s { | |
232 if strIdx == startAt { | |
233 runeIdx = i | |
234 } | |
235 ret[i] = r | |
236 i++ | |
237 } | |
238 if startAt == len(s) { | |
239 runeIdx = i | |
240 } | |
241 return ret[:i], runeIdx | |
242 } | |
243 | |
244 func getRunes(s string) []rune { | |
245 return []rune(s) | |
246 } | |
247 | |
248 // MatchRunes return true if the runes matches the regex | |
249 // error will be set if a timeout occurs | |
250 func (re *Regexp) MatchRunes(r []rune) (bool, error) { | |
251 m, err := re.run(true, -1, r) | |
252 if err != nil { | |
253 return false, err | |
254 } | |
255 return m != nil, nil | |
256 } | |
257 | |
258 // GetGroupNames Returns the set of strings used to name capturing groups in the expression. | |
259 func (re *Regexp) GetGroupNames() []string { | |
260 var result []string | |
261 | |
262 if re.capslist == nil { | |
263 result = make([]string, re.capsize) | |
264 | |
265 for i := 0; i < len(result); i++ { | |
266 result[i] = strconv.Itoa(i) | |
267 } | |
268 } else { | |
269 result = make([]string, len(re.capslist)) | |
270 copy(result, re.capslist) | |
271 } | |
272 | |
273 return result | |
274 } | |
275 | |
276 // GetGroupNumbers returns the integer group numbers corresponding to a group name. | |
277 func (re *Regexp) GetGroupNumbers() []int { | |
278 var result []int | |
279 | |
280 if re.caps == nil { | |
281 result = make([]int, re.capsize) | |
282 | |
283 for i := 0; i < len(result); i++ { | |
284 result[i] = i | |
285 } | |
286 } else { | |
287 result = make([]int, len(re.caps)) | |
288 | |
289 for k, v := range re.caps { | |
290 result[v] = k | |
291 } | |
292 } | |
293 | |
294 return result | |
295 } | |
296 | |
297 // GroupNameFromNumber retrieves a group name that corresponds to a group number. | |
298 // It will return "" for and unknown group number. Unnamed groups automatically | |
299 // receive a name that is the decimal string equivalent of its number. | |
300 func (re *Regexp) GroupNameFromNumber(i int) string { | |
301 if re.capslist == nil { | |
302 if i >= 0 && i < re.capsize { | |
303 return strconv.Itoa(i) | |
304 } | |
305 | |
306 return "" | |
307 } | |
308 | |
309 if re.caps != nil { | |
310 var ok bool | |
311 if i, ok = re.caps[i]; !ok { | |
312 return "" | |
313 } | |
314 } | |
315 | |
316 if i >= 0 && i < len(re.capslist) { | |
317 return re.capslist[i] | |
318 } | |
319 | |
320 return "" | |
321 } | |
322 | |
323 // GroupNumberFromName returns a group number that corresponds to a group name. | |
324 // Returns -1 if the name is not a recognized group name. Numbered groups | |
325 // automatically get a group name that is the decimal string equivalent of its number. | |
326 func (re *Regexp) GroupNumberFromName(name string) int { | |
327 // look up name if we have a hashtable of names | |
328 if re.capnames != nil { | |
329 if k, ok := re.capnames[name]; ok { | |
330 return k | |
331 } | |
332 | |
333 return -1 | |
334 } | |
335 | |
336 // convert to an int if it looks like a number | |
337 result := 0 | |
338 for i := 0; i < len(name); i++ { | |
339 ch := name[i] | |
340 | |
341 if ch > '9' || ch < '0' { | |
342 return -1 | |
343 } | |
344 | |
345 result *= 10 | |
346 result += int(ch - '0') | |
347 } | |
348 | |
349 // return int if it's in range | |
350 if result >= 0 && result < re.capsize { | |
351 return result | |
352 } | |
353 | |
354 return -1 | |
355 } |