comparison vendor/github.com/dlclark/regexp2/regexp.go @ 66:787b5ee0289d draft

Use vendored modules Signed-off-by: Izuru Yakumo <yakumo.izuru@chaotic.ninja>
author yakumo.izuru
date Sun, 23 Jul 2023 13:18:53 +0000
parents
children
comparison
equal deleted inserted replaced
65:6d985efa0f7a 66:787b5ee0289d
1 /*
2 Package regexp2 is a regexp package that has an interface similar to Go's framework regexp engine but uses a
3 more feature full regex engine behind the scenes.
4
5 It doesn't have constant time guarantees, but it allows backtracking and is compatible with Perl5 and .NET.
6 You'll likely be better off with the RE2 engine from the regexp package and should only use this if you
7 need to write very complex patterns or require compatibility with .NET.
8 */
9 package regexp2
10
11 import (
12 "errors"
13 "math"
14 "strconv"
15 "sync"
16 "time"
17
18 "github.com/dlclark/regexp2/syntax"
19 )
20
21 // Default timeout used when running regexp matches -- "forever"
22 var DefaultMatchTimeout = time.Duration(math.MaxInt64)
23
24 // Regexp is the representation of a compiled regular expression.
25 // A Regexp is safe for concurrent use by multiple goroutines.
26 type Regexp struct {
27 //timeout when trying to find matches
28 MatchTimeout time.Duration
29
30 // read-only after Compile
31 pattern string // as passed to Compile
32 options RegexOptions // options
33
34 caps map[int]int // capnum->index
35 capnames map[string]int //capture group name -> index
36 capslist []string //sorted list of capture group names
37 capsize int // size of the capture array
38
39 code *syntax.Code // compiled program
40
41 // cache of machines for running regexp
42 muRun sync.Mutex
43 runner []*runner
44 }
45
46 // Compile parses a regular expression and returns, if successful,
47 // a Regexp object that can be used to match against text.
48 func Compile(expr string, opt RegexOptions) (*Regexp, error) {
49 // parse it
50 tree, err := syntax.Parse(expr, syntax.RegexOptions(opt))
51 if err != nil {
52 return nil, err
53 }
54
55 // translate it to code
56 code, err := syntax.Write(tree)
57 if err != nil {
58 return nil, err
59 }
60
61 // return it
62 return &Regexp{
63 pattern: expr,
64 options: opt,
65 caps: code.Caps,
66 capnames: tree.Capnames,
67 capslist: tree.Caplist,
68 capsize: code.Capsize,
69 code: code,
70 MatchTimeout: DefaultMatchTimeout,
71 }, nil
72 }
73
74 // MustCompile is like Compile but panics if the expression cannot be parsed.
75 // It simplifies safe initialization of global variables holding compiled regular
76 // expressions.
77 func MustCompile(str string, opt RegexOptions) *Regexp {
78 regexp, error := Compile(str, opt)
79 if error != nil {
80 panic(`regexp2: Compile(` + quote(str) + `): ` + error.Error())
81 }
82 return regexp
83 }
84
85 // Escape adds backslashes to any special characters in the input string
86 func Escape(input string) string {
87 return syntax.Escape(input)
88 }
89
90 // Unescape removes any backslashes from previously-escaped special characters in the input string
91 func Unescape(input string) (string, error) {
92 return syntax.Unescape(input)
93 }
94
95 // String returns the source text used to compile the regular expression.
96 func (re *Regexp) String() string {
97 return re.pattern
98 }
99
100 func quote(s string) string {
101 if strconv.CanBackquote(s) {
102 return "`" + s + "`"
103 }
104 return strconv.Quote(s)
105 }
106
107 // RegexOptions impact the runtime and parsing behavior
108 // for each specific regex. They are setable in code as well
109 // as in the regex pattern itself.
110 type RegexOptions int32
111
112 const (
113 None RegexOptions = 0x0
114 IgnoreCase = 0x0001 // "i"
115 Multiline = 0x0002 // "m"
116 ExplicitCapture = 0x0004 // "n"
117 Compiled = 0x0008 // "c"
118 Singleline = 0x0010 // "s"
119 IgnorePatternWhitespace = 0x0020 // "x"
120 RightToLeft = 0x0040 // "r"
121 Debug = 0x0080 // "d"
122 ECMAScript = 0x0100 // "e"
123 RE2 = 0x0200 // RE2 (regexp package) compatibility mode
124 )
125
126 func (re *Regexp) RightToLeft() bool {
127 return re.options&RightToLeft != 0
128 }
129
130 func (re *Regexp) Debug() bool {
131 return re.options&Debug != 0
132 }
133
134 // Replace searches the input string and replaces each match found with the replacement text.
135 // Count will limit the number of matches attempted and startAt will allow
136 // us to skip past possible matches at the start of the input (left or right depending on RightToLeft option).
137 // Set startAt and count to -1 to go through the whole string
138 func (re *Regexp) Replace(input, replacement string, startAt, count int) (string, error) {
139 data, err := syntax.NewReplacerData(replacement, re.caps, re.capsize, re.capnames, syntax.RegexOptions(re.options))
140 if err != nil {
141 return "", err
142 }
143 //TODO: cache ReplacerData
144
145 return replace(re, data, nil, input, startAt, count)
146 }
147
148 // ReplaceFunc searches the input string and replaces each match found using the string from the evaluator
149 // Count will limit the number of matches attempted and startAt will allow
150 // us to skip past possible matches at the start of the input (left or right depending on RightToLeft option).
151 // Set startAt and count to -1 to go through the whole string.
152 func (re *Regexp) ReplaceFunc(input string, evaluator MatchEvaluator, startAt, count int) (string, error) {
153 return replace(re, nil, evaluator, input, startAt, count)
154 }
155
156 // FindStringMatch searches the input string for a Regexp match
157 func (re *Regexp) FindStringMatch(s string) (*Match, error) {
158 // convert string to runes
159 return re.run(false, -1, getRunes(s))
160 }
161
162 // FindRunesMatch searches the input rune slice for a Regexp match
163 func (re *Regexp) FindRunesMatch(r []rune) (*Match, error) {
164 return re.run(false, -1, r)
165 }
166
167 // FindStringMatchStartingAt searches the input string for a Regexp match starting at the startAt index
168 func (re *Regexp) FindStringMatchStartingAt(s string, startAt int) (*Match, error) {
169 if startAt > len(s) {
170 return nil, errors.New("startAt must be less than the length of the input string")
171 }
172 r, startAt := re.getRunesAndStart(s, startAt)
173 if startAt == -1 {
174 // we didn't find our start index in the string -- that's a problem
175 return nil, errors.New("startAt must align to the start of a valid rune in the input string")
176 }
177
178 return re.run(false, startAt, r)
179 }
180
181 // FindRunesMatchStartingAt searches the input rune slice for a Regexp match starting at the startAt index
182 func (re *Regexp) FindRunesMatchStartingAt(r []rune, startAt int) (*Match, error) {
183 return re.run(false, startAt, r)
184 }
185
186 // FindNextMatch returns the next match in the same input string as the match parameter.
187 // Will return nil if there is no next match or if given a nil match.
188 func (re *Regexp) FindNextMatch(m *Match) (*Match, error) {
189 if m == nil {
190 return nil, nil
191 }
192
193 // If previous match was empty, advance by one before matching to prevent
194 // infinite loop
195 startAt := m.textpos
196 if m.Length == 0 {
197 if m.textpos == len(m.text) {
198 return nil, nil
199 }
200
201 if re.RightToLeft() {
202 startAt--
203 } else {
204 startAt++
205 }
206 }
207 return re.run(false, startAt, m.text)
208 }
209
210 // MatchString return true if the string matches the regex
211 // error will be set if a timeout occurs
212 func (re *Regexp) MatchString(s string) (bool, error) {
213 m, err := re.run(true, -1, getRunes(s))
214 if err != nil {
215 return false, err
216 }
217 return m != nil, nil
218 }
219
220 func (re *Regexp) getRunesAndStart(s string, startAt int) ([]rune, int) {
221 if startAt < 0 {
222 if re.RightToLeft() {
223 r := getRunes(s)
224 return r, len(r)
225 }
226 return getRunes(s), 0
227 }
228 ret := make([]rune, len(s))
229 i := 0
230 runeIdx := -1
231 for strIdx, r := range s {
232 if strIdx == startAt {
233 runeIdx = i
234 }
235 ret[i] = r
236 i++
237 }
238 if startAt == len(s) {
239 runeIdx = i
240 }
241 return ret[:i], runeIdx
242 }
243
244 func getRunes(s string) []rune {
245 return []rune(s)
246 }
247
248 // MatchRunes return true if the runes matches the regex
249 // error will be set if a timeout occurs
250 func (re *Regexp) MatchRunes(r []rune) (bool, error) {
251 m, err := re.run(true, -1, r)
252 if err != nil {
253 return false, err
254 }
255 return m != nil, nil
256 }
257
258 // GetGroupNames Returns the set of strings used to name capturing groups in the expression.
259 func (re *Regexp) GetGroupNames() []string {
260 var result []string
261
262 if re.capslist == nil {
263 result = make([]string, re.capsize)
264
265 for i := 0; i < len(result); i++ {
266 result[i] = strconv.Itoa(i)
267 }
268 } else {
269 result = make([]string, len(re.capslist))
270 copy(result, re.capslist)
271 }
272
273 return result
274 }
275
276 // GetGroupNumbers returns the integer group numbers corresponding to a group name.
277 func (re *Regexp) GetGroupNumbers() []int {
278 var result []int
279
280 if re.caps == nil {
281 result = make([]int, re.capsize)
282
283 for i := 0; i < len(result); i++ {
284 result[i] = i
285 }
286 } else {
287 result = make([]int, len(re.caps))
288
289 for k, v := range re.caps {
290 result[v] = k
291 }
292 }
293
294 return result
295 }
296
297 // GroupNameFromNumber retrieves a group name that corresponds to a group number.
298 // It will return "" for and unknown group number. Unnamed groups automatically
299 // receive a name that is the decimal string equivalent of its number.
300 func (re *Regexp) GroupNameFromNumber(i int) string {
301 if re.capslist == nil {
302 if i >= 0 && i < re.capsize {
303 return strconv.Itoa(i)
304 }
305
306 return ""
307 }
308
309 if re.caps != nil {
310 var ok bool
311 if i, ok = re.caps[i]; !ok {
312 return ""
313 }
314 }
315
316 if i >= 0 && i < len(re.capslist) {
317 return re.capslist[i]
318 }
319
320 return ""
321 }
322
323 // GroupNumberFromName returns a group number that corresponds to a group name.
324 // Returns -1 if the name is not a recognized group name. Numbered groups
325 // automatically get a group name that is the decimal string equivalent of its number.
326 func (re *Regexp) GroupNumberFromName(name string) int {
327 // look up name if we have a hashtable of names
328 if re.capnames != nil {
329 if k, ok := re.capnames[name]; ok {
330 return k
331 }
332
333 return -1
334 }
335
336 // convert to an int if it looks like a number
337 result := 0
338 for i := 0; i < len(name); i++ {
339 ch := name[i]
340
341 if ch > '9' || ch < '0' {
342 return -1
343 }
344
345 result *= 10
346 result += int(ch - '0')
347 }
348
349 // return int if it's in range
350 if result >= 0 && result < re.capsize {
351 return result
352 }
353
354 return -1
355 }