66
|
1 package regexp2
|
|
2
|
|
3 import (
|
|
4 "bytes"
|
|
5 "errors"
|
|
6 "fmt"
|
|
7 "math"
|
|
8 "strconv"
|
|
9 "strings"
|
|
10 "time"
|
|
11 "unicode"
|
|
12
|
|
13 "github.com/dlclark/regexp2/syntax"
|
|
14 )
|
|
15
|
|
16 type runner struct {
|
|
17 re *Regexp
|
|
18 code *syntax.Code
|
|
19
|
|
20 runtextstart int // starting point for search
|
|
21
|
|
22 runtext []rune // text to search
|
|
23 runtextpos int // current position in text
|
|
24 runtextend int
|
|
25
|
|
26 // The backtracking stack. Opcodes use this to store data regarding
|
|
27 // what they have matched and where to backtrack to. Each "frame" on
|
|
28 // the stack takes the form of [CodePosition Data1 Data2...], where
|
|
29 // CodePosition is the position of the current opcode and
|
|
30 // the data values are all optional. The CodePosition can be negative, and
|
|
31 // these values (also called "back2") are used by the BranchMark family of opcodes
|
|
32 // to indicate whether they are backtracking after a successful or failed
|
|
33 // match.
|
|
34 // When we backtrack, we pop the CodePosition off the stack, set the current
|
|
35 // instruction pointer to that code position, and mark the opcode
|
|
36 // with a backtracking flag ("Back"). Each opcode then knows how to
|
|
37 // handle its own data.
|
|
38 runtrack []int
|
|
39 runtrackpos int
|
|
40
|
|
41 // This stack is used to track text positions across different opcodes.
|
|
42 // For example, in /(a*b)+/, the parentheses result in a SetMark/CaptureMark
|
|
43 // pair. SetMark records the text position before we match a*b. Then
|
|
44 // CaptureMark uses that position to figure out where the capture starts.
|
|
45 // Opcodes which push onto this stack are always paired with other opcodes
|
|
46 // which will pop the value from it later. A successful match should mean
|
|
47 // that this stack is empty.
|
|
48 runstack []int
|
|
49 runstackpos int
|
|
50
|
|
51 // The crawl stack is used to keep track of captures. Every time a group
|
|
52 // has a capture, we push its group number onto the runcrawl stack. In
|
|
53 // the case of a balanced match, we push BOTH groups onto the stack.
|
|
54 runcrawl []int
|
|
55 runcrawlpos int
|
|
56
|
|
57 runtrackcount int // count of states that may do backtracking
|
|
58
|
|
59 runmatch *Match // result object
|
|
60
|
|
61 ignoreTimeout bool
|
|
62 timeout time.Duration // timeout in milliseconds (needed for actual)
|
|
63 timeoutChecksToSkip int
|
|
64 timeoutAt time.Time
|
|
65
|
|
66 operator syntax.InstOp
|
|
67 codepos int
|
|
68 rightToLeft bool
|
|
69 caseInsensitive bool
|
|
70 }
|
|
71
|
|
72 // run searches for matches and can continue from the previous match
|
|
73 //
|
|
74 // quick is usually false, but can be true to not return matches, just put it in caches
|
|
75 // textstart is -1 to start at the "beginning" (depending on Right-To-Left), otherwise an index in input
|
|
76 // input is the string to search for our regex pattern
|
|
77 func (re *Regexp) run(quick bool, textstart int, input []rune) (*Match, error) {
|
|
78
|
|
79 // get a cached runner
|
|
80 runner := re.getRunner()
|
|
81 defer re.putRunner(runner)
|
|
82
|
|
83 if textstart < 0 {
|
|
84 if re.RightToLeft() {
|
|
85 textstart = len(input)
|
|
86 } else {
|
|
87 textstart = 0
|
|
88 }
|
|
89 }
|
|
90
|
|
91 return runner.scan(input, textstart, quick, re.MatchTimeout)
|
|
92 }
|
|
93
|
|
94 // Scans the string to find the first match. Uses the Match object
|
|
95 // both to feed text in and as a place to store matches that come out.
|
|
96 //
|
|
97 // All the action is in the Go() method. Our
|
|
98 // responsibility is to load up the class members before
|
|
99 // calling Go.
|
|
100 //
|
|
101 // The optimizer can compute a set of candidate starting characters,
|
|
102 // and we could use a separate method Skip() that will quickly scan past
|
|
103 // any characters that we know can't match.
|
|
104 func (r *runner) scan(rt []rune, textstart int, quick bool, timeout time.Duration) (*Match, error) {
|
|
105 r.timeout = timeout
|
|
106 r.ignoreTimeout = (time.Duration(math.MaxInt64) == timeout)
|
|
107 r.runtextstart = textstart
|
|
108 r.runtext = rt
|
|
109 r.runtextend = len(rt)
|
|
110
|
|
111 stoppos := r.runtextend
|
|
112 bump := 1
|
|
113
|
|
114 if r.re.RightToLeft() {
|
|
115 bump = -1
|
|
116 stoppos = 0
|
|
117 }
|
|
118
|
|
119 r.runtextpos = textstart
|
|
120 initted := false
|
|
121
|
|
122 r.startTimeoutWatch()
|
|
123 for {
|
|
124 if r.re.Debug() {
|
|
125 //fmt.Printf("\nSearch content: %v\n", string(r.runtext))
|
|
126 fmt.Printf("\nSearch range: from 0 to %v\n", r.runtextend)
|
|
127 fmt.Printf("Firstchar search starting at %v stopping at %v\n", r.runtextpos, stoppos)
|
|
128 }
|
|
129
|
|
130 if r.findFirstChar() {
|
|
131 if err := r.checkTimeout(); err != nil {
|
|
132 return nil, err
|
|
133 }
|
|
134
|
|
135 if !initted {
|
|
136 r.initMatch()
|
|
137 initted = true
|
|
138 }
|
|
139
|
|
140 if r.re.Debug() {
|
|
141 fmt.Printf("Executing engine starting at %v\n\n", r.runtextpos)
|
|
142 }
|
|
143
|
|
144 if err := r.execute(); err != nil {
|
|
145 return nil, err
|
|
146 }
|
|
147
|
|
148 if r.runmatch.matchcount[0] > 0 {
|
|
149 // We'll return a match even if it touches a previous empty match
|
|
150 return r.tidyMatch(quick), nil
|
|
151 }
|
|
152
|
|
153 // reset state for another go
|
|
154 r.runtrackpos = len(r.runtrack)
|
|
155 r.runstackpos = len(r.runstack)
|
|
156 r.runcrawlpos = len(r.runcrawl)
|
|
157 }
|
|
158
|
|
159 // failure!
|
|
160
|
|
161 if r.runtextpos == stoppos {
|
|
162 r.tidyMatch(true)
|
|
163 return nil, nil
|
|
164 }
|
|
165
|
|
166 // Recognize leading []* and various anchors, and bump on failure accordingly
|
|
167
|
|
168 // r.bump by one and start again
|
|
169
|
|
170 r.runtextpos += bump
|
|
171 }
|
|
172 // We never get here
|
|
173 }
|
|
174
|
|
175 func (r *runner) execute() error {
|
|
176
|
|
177 r.goTo(0)
|
|
178
|
|
179 for {
|
|
180
|
|
181 if r.re.Debug() {
|
|
182 r.dumpState()
|
|
183 }
|
|
184
|
|
185 if err := r.checkTimeout(); err != nil {
|
|
186 return err
|
|
187 }
|
|
188
|
|
189 switch r.operator {
|
|
190 case syntax.Stop:
|
|
191 return nil
|
|
192
|
|
193 case syntax.Nothing:
|
|
194 break
|
|
195
|
|
196 case syntax.Goto:
|
|
197 r.goTo(r.operand(0))
|
|
198 continue
|
|
199
|
|
200 case syntax.Testref:
|
|
201 if !r.runmatch.isMatched(r.operand(0)) {
|
|
202 break
|
|
203 }
|
|
204 r.advance(1)
|
|
205 continue
|
|
206
|
|
207 case syntax.Lazybranch:
|
|
208 r.trackPush1(r.textPos())
|
|
209 r.advance(1)
|
|
210 continue
|
|
211
|
|
212 case syntax.Lazybranch | syntax.Back:
|
|
213 r.trackPop()
|
|
214 r.textto(r.trackPeek())
|
|
215 r.goTo(r.operand(0))
|
|
216 continue
|
|
217
|
|
218 case syntax.Setmark:
|
|
219 r.stackPush(r.textPos())
|
|
220 r.trackPush()
|
|
221 r.advance(0)
|
|
222 continue
|
|
223
|
|
224 case syntax.Nullmark:
|
|
225 r.stackPush(-1)
|
|
226 r.trackPush()
|
|
227 r.advance(0)
|
|
228 continue
|
|
229
|
|
230 case syntax.Setmark | syntax.Back, syntax.Nullmark | syntax.Back:
|
|
231 r.stackPop()
|
|
232 break
|
|
233
|
|
234 case syntax.Getmark:
|
|
235 r.stackPop()
|
|
236 r.trackPush1(r.stackPeek())
|
|
237 r.textto(r.stackPeek())
|
|
238 r.advance(0)
|
|
239 continue
|
|
240
|
|
241 case syntax.Getmark | syntax.Back:
|
|
242 r.trackPop()
|
|
243 r.stackPush(r.trackPeek())
|
|
244 break
|
|
245
|
|
246 case syntax.Capturemark:
|
|
247 if r.operand(1) != -1 && !r.runmatch.isMatched(r.operand(1)) {
|
|
248 break
|
|
249 }
|
|
250 r.stackPop()
|
|
251 if r.operand(1) != -1 {
|
|
252 r.transferCapture(r.operand(0), r.operand(1), r.stackPeek(), r.textPos())
|
|
253 } else {
|
|
254 r.capture(r.operand(0), r.stackPeek(), r.textPos())
|
|
255 }
|
|
256 r.trackPush1(r.stackPeek())
|
|
257
|
|
258 r.advance(2)
|
|
259
|
|
260 continue
|
|
261
|
|
262 case syntax.Capturemark | syntax.Back:
|
|
263 r.trackPop()
|
|
264 r.stackPush(r.trackPeek())
|
|
265 r.uncapture()
|
|
266 if r.operand(0) != -1 && r.operand(1) != -1 {
|
|
267 r.uncapture()
|
|
268 }
|
|
269
|
|
270 break
|
|
271
|
|
272 case syntax.Branchmark:
|
|
273 r.stackPop()
|
|
274
|
|
275 matched := r.textPos() - r.stackPeek()
|
|
276
|
|
277 if matched != 0 { // Nonempty match -> loop now
|
|
278 r.trackPush2(r.stackPeek(), r.textPos()) // Save old mark, textpos
|
|
279 r.stackPush(r.textPos()) // Make new mark
|
|
280 r.goTo(r.operand(0)) // Loop
|
|
281 } else { // Empty match -> straight now
|
|
282 r.trackPushNeg1(r.stackPeek()) // Save old mark
|
|
283 r.advance(1) // Straight
|
|
284 }
|
|
285 continue
|
|
286
|
|
287 case syntax.Branchmark | syntax.Back:
|
|
288 r.trackPopN(2)
|
|
289 r.stackPop()
|
|
290 r.textto(r.trackPeekN(1)) // Recall position
|
|
291 r.trackPushNeg1(r.trackPeek()) // Save old mark
|
|
292 r.advance(1) // Straight
|
|
293 continue
|
|
294
|
|
295 case syntax.Branchmark | syntax.Back2:
|
|
296 r.trackPop()
|
|
297 r.stackPush(r.trackPeek()) // Recall old mark
|
|
298 break // Backtrack
|
|
299
|
|
300 case syntax.Lazybranchmark:
|
|
301 {
|
|
302 // We hit this the first time through a lazy loop and after each
|
|
303 // successful match of the inner expression. It simply continues
|
|
304 // on and doesn't loop.
|
|
305 r.stackPop()
|
|
306
|
|
307 oldMarkPos := r.stackPeek()
|
|
308
|
|
309 if r.textPos() != oldMarkPos { // Nonempty match -> try to loop again by going to 'back' state
|
|
310 if oldMarkPos != -1 {
|
|
311 r.trackPush2(oldMarkPos, r.textPos()) // Save old mark, textpos
|
|
312 } else {
|
|
313 r.trackPush2(r.textPos(), r.textPos())
|
|
314 }
|
|
315 } else {
|
|
316 // The inner expression found an empty match, so we'll go directly to 'back2' if we
|
|
317 // backtrack. In this case, we need to push something on the stack, since back2 pops.
|
|
318 // However, in the case of ()+? or similar, this empty match may be legitimate, so push the text
|
|
319 // position associated with that empty match.
|
|
320 r.stackPush(oldMarkPos)
|
|
321
|
|
322 r.trackPushNeg1(r.stackPeek()) // Save old mark
|
|
323 }
|
|
324 r.advance(1)
|
|
325 continue
|
|
326 }
|
|
327
|
|
328 case syntax.Lazybranchmark | syntax.Back:
|
|
329
|
|
330 // After the first time, Lazybranchmark | syntax.Back occurs
|
|
331 // with each iteration of the loop, and therefore with every attempted
|
|
332 // match of the inner expression. We'll try to match the inner expression,
|
|
333 // then go back to Lazybranchmark if successful. If the inner expression
|
|
334 // fails, we go to Lazybranchmark | syntax.Back2
|
|
335
|
|
336 r.trackPopN(2)
|
|
337 pos := r.trackPeekN(1)
|
|
338 r.trackPushNeg1(r.trackPeek()) // Save old mark
|
|
339 r.stackPush(pos) // Make new mark
|
|
340 r.textto(pos) // Recall position
|
|
341 r.goTo(r.operand(0)) // Loop
|
|
342 continue
|
|
343
|
|
344 case syntax.Lazybranchmark | syntax.Back2:
|
|
345 // The lazy loop has failed. We'll do a true backtrack and
|
|
346 // start over before the lazy loop.
|
|
347 r.stackPop()
|
|
348 r.trackPop()
|
|
349 r.stackPush(r.trackPeek()) // Recall old mark
|
|
350 break
|
|
351
|
|
352 case syntax.Setcount:
|
|
353 r.stackPush2(r.textPos(), r.operand(0))
|
|
354 r.trackPush()
|
|
355 r.advance(1)
|
|
356 continue
|
|
357
|
|
358 case syntax.Nullcount:
|
|
359 r.stackPush2(-1, r.operand(0))
|
|
360 r.trackPush()
|
|
361 r.advance(1)
|
|
362 continue
|
|
363
|
|
364 case syntax.Setcount | syntax.Back:
|
|
365 r.stackPopN(2)
|
|
366 break
|
|
367
|
|
368 case syntax.Nullcount | syntax.Back:
|
|
369 r.stackPopN(2)
|
|
370 break
|
|
371
|
|
372 case syntax.Branchcount:
|
|
373 // r.stackPush:
|
|
374 // 0: Mark
|
|
375 // 1: Count
|
|
376
|
|
377 r.stackPopN(2)
|
|
378 mark := r.stackPeek()
|
|
379 count := r.stackPeekN(1)
|
|
380 matched := r.textPos() - mark
|
|
381
|
|
382 if count >= r.operand(1) || (matched == 0 && count >= 0) { // Max loops or empty match -> straight now
|
|
383 r.trackPushNeg2(mark, count) // Save old mark, count
|
|
384 r.advance(2) // Straight
|
|
385 } else { // Nonempty match -> count+loop now
|
|
386 r.trackPush1(mark) // remember mark
|
|
387 r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
|
|
388 r.goTo(r.operand(0)) // Loop
|
|
389 }
|
|
390 continue
|
|
391
|
|
392 case syntax.Branchcount | syntax.Back:
|
|
393 // r.trackPush:
|
|
394 // 0: Previous mark
|
|
395 // r.stackPush:
|
|
396 // 0: Mark (= current pos, discarded)
|
|
397 // 1: Count
|
|
398 r.trackPop()
|
|
399 r.stackPopN(2)
|
|
400 if r.stackPeekN(1) > 0 { // Positive -> can go straight
|
|
401 r.textto(r.stackPeek()) // Zap to mark
|
|
402 r.trackPushNeg2(r.trackPeek(), r.stackPeekN(1)-1) // Save old mark, old count
|
|
403 r.advance(2) // Straight
|
|
404 continue
|
|
405 }
|
|
406 r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // recall old mark, old count
|
|
407 break
|
|
408
|
|
409 case syntax.Branchcount | syntax.Back2:
|
|
410 // r.trackPush:
|
|
411 // 0: Previous mark
|
|
412 // 1: Previous count
|
|
413 r.trackPopN(2)
|
|
414 r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, old count
|
|
415 break // Backtrack
|
|
416
|
|
417 case syntax.Lazybranchcount:
|
|
418 // r.stackPush:
|
|
419 // 0: Mark
|
|
420 // 1: Count
|
|
421
|
|
422 r.stackPopN(2)
|
|
423 mark := r.stackPeek()
|
|
424 count := r.stackPeekN(1)
|
|
425
|
|
426 if count < 0 { // Negative count -> loop now
|
|
427 r.trackPushNeg1(mark) // Save old mark
|
|
428 r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
|
|
429 r.goTo(r.operand(0)) // Loop
|
|
430 } else { // Nonneg count -> straight now
|
|
431 r.trackPush3(mark, count, r.textPos()) // Save mark, count, position
|
|
432 r.advance(2) // Straight
|
|
433 }
|
|
434 continue
|
|
435
|
|
436 case syntax.Lazybranchcount | syntax.Back:
|
|
437 // r.trackPush:
|
|
438 // 0: Mark
|
|
439 // 1: Count
|
|
440 // 2: r.textPos
|
|
441
|
|
442 r.trackPopN(3)
|
|
443 mark := r.trackPeek()
|
|
444 textpos := r.trackPeekN(2)
|
|
445
|
|
446 if r.trackPeekN(1) < r.operand(1) && textpos != mark { // Under limit and not empty match -> loop
|
|
447 r.textto(textpos) // Recall position
|
|
448 r.stackPush2(textpos, r.trackPeekN(1)+1) // Make new mark, incr count
|
|
449 r.trackPushNeg1(mark) // Save old mark
|
|
450 r.goTo(r.operand(0)) // Loop
|
|
451 continue
|
|
452 } else { // Max loops or empty match -> backtrack
|
|
453 r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, count
|
|
454 break // backtrack
|
|
455 }
|
|
456
|
|
457 case syntax.Lazybranchcount | syntax.Back2:
|
|
458 // r.trackPush:
|
|
459 // 0: Previous mark
|
|
460 // r.stackPush:
|
|
461 // 0: Mark (== current pos, discarded)
|
|
462 // 1: Count
|
|
463 r.trackPop()
|
|
464 r.stackPopN(2)
|
|
465 r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // Recall old mark, count
|
|
466 break // Backtrack
|
|
467
|
|
468 case syntax.Setjump:
|
|
469 r.stackPush2(r.trackpos(), r.crawlpos())
|
|
470 r.trackPush()
|
|
471 r.advance(0)
|
|
472 continue
|
|
473
|
|
474 case syntax.Setjump | syntax.Back:
|
|
475 r.stackPopN(2)
|
|
476 break
|
|
477
|
|
478 case syntax.Backjump:
|
|
479 // r.stackPush:
|
|
480 // 0: Saved trackpos
|
|
481 // 1: r.crawlpos
|
|
482 r.stackPopN(2)
|
|
483 r.trackto(r.stackPeek())
|
|
484
|
|
485 for r.crawlpos() != r.stackPeekN(1) {
|
|
486 r.uncapture()
|
|
487 }
|
|
488
|
|
489 break
|
|
490
|
|
491 case syntax.Forejump:
|
|
492 // r.stackPush:
|
|
493 // 0: Saved trackpos
|
|
494 // 1: r.crawlpos
|
|
495 r.stackPopN(2)
|
|
496 r.trackto(r.stackPeek())
|
|
497 r.trackPush1(r.stackPeekN(1))
|
|
498 r.advance(0)
|
|
499 continue
|
|
500
|
|
501 case syntax.Forejump | syntax.Back:
|
|
502 // r.trackPush:
|
|
503 // 0: r.crawlpos
|
|
504 r.trackPop()
|
|
505
|
|
506 for r.crawlpos() != r.trackPeek() {
|
|
507 r.uncapture()
|
|
508 }
|
|
509
|
|
510 break
|
|
511
|
|
512 case syntax.Bol:
|
|
513 if r.leftchars() > 0 && r.charAt(r.textPos()-1) != '\n' {
|
|
514 break
|
|
515 }
|
|
516 r.advance(0)
|
|
517 continue
|
|
518
|
|
519 case syntax.Eol:
|
|
520 if r.rightchars() > 0 && r.charAt(r.textPos()) != '\n' {
|
|
521 break
|
|
522 }
|
|
523 r.advance(0)
|
|
524 continue
|
|
525
|
|
526 case syntax.Boundary:
|
|
527 if !r.isBoundary(r.textPos(), 0, r.runtextend) {
|
|
528 break
|
|
529 }
|
|
530 r.advance(0)
|
|
531 continue
|
|
532
|
|
533 case syntax.Nonboundary:
|
|
534 if r.isBoundary(r.textPos(), 0, r.runtextend) {
|
|
535 break
|
|
536 }
|
|
537 r.advance(0)
|
|
538 continue
|
|
539
|
|
540 case syntax.ECMABoundary:
|
|
541 if !r.isECMABoundary(r.textPos(), 0, r.runtextend) {
|
|
542 break
|
|
543 }
|
|
544 r.advance(0)
|
|
545 continue
|
|
546
|
|
547 case syntax.NonECMABoundary:
|
|
548 if r.isECMABoundary(r.textPos(), 0, r.runtextend) {
|
|
549 break
|
|
550 }
|
|
551 r.advance(0)
|
|
552 continue
|
|
553
|
|
554 case syntax.Beginning:
|
|
555 if r.leftchars() > 0 {
|
|
556 break
|
|
557 }
|
|
558 r.advance(0)
|
|
559 continue
|
|
560
|
|
561 case syntax.Start:
|
|
562 if r.textPos() != r.textstart() {
|
|
563 break
|
|
564 }
|
|
565 r.advance(0)
|
|
566 continue
|
|
567
|
|
568 case syntax.EndZ:
|
|
569 rchars := r.rightchars()
|
|
570 if rchars > 1 {
|
|
571 break
|
|
572 }
|
|
573 // RE2 and EcmaScript define $ as "asserts position at the end of the string"
|
|
574 // PCRE/.NET adds "or before the line terminator right at the end of the string (if any)"
|
|
575 if (r.re.options & (RE2 | ECMAScript)) != 0 {
|
|
576 // RE2/Ecmascript mode
|
|
577 if rchars > 0 {
|
|
578 break
|
|
579 }
|
|
580 } else if rchars == 1 && r.charAt(r.textPos()) != '\n' {
|
|
581 // "regular" mode
|
|
582 break
|
|
583 }
|
|
584
|
|
585 r.advance(0)
|
|
586 continue
|
|
587
|
|
588 case syntax.End:
|
|
589 if r.rightchars() > 0 {
|
|
590 break
|
|
591 }
|
|
592 r.advance(0)
|
|
593 continue
|
|
594
|
|
595 case syntax.One:
|
|
596 if r.forwardchars() < 1 || r.forwardcharnext() != rune(r.operand(0)) {
|
|
597 break
|
|
598 }
|
|
599
|
|
600 r.advance(1)
|
|
601 continue
|
|
602
|
|
603 case syntax.Notone:
|
|
604 if r.forwardchars() < 1 || r.forwardcharnext() == rune(r.operand(0)) {
|
|
605 break
|
|
606 }
|
|
607
|
|
608 r.advance(1)
|
|
609 continue
|
|
610
|
|
611 case syntax.Set:
|
|
612
|
|
613 if r.forwardchars() < 1 || !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
|
|
614 break
|
|
615 }
|
|
616
|
|
617 r.advance(1)
|
|
618 continue
|
|
619
|
|
620 case syntax.Multi:
|
|
621 if !r.runematch(r.code.Strings[r.operand(0)]) {
|
|
622 break
|
|
623 }
|
|
624
|
|
625 r.advance(1)
|
|
626 continue
|
|
627
|
|
628 case syntax.Ref:
|
|
629
|
|
630 capnum := r.operand(0)
|
|
631
|
|
632 if r.runmatch.isMatched(capnum) {
|
|
633 if !r.refmatch(r.runmatch.matchIndex(capnum), r.runmatch.matchLength(capnum)) {
|
|
634 break
|
|
635 }
|
|
636 } else {
|
|
637 if (r.re.options & ECMAScript) == 0 {
|
|
638 break
|
|
639 }
|
|
640 }
|
|
641
|
|
642 r.advance(1)
|
|
643 continue
|
|
644
|
|
645 case syntax.Onerep:
|
|
646
|
|
647 c := r.operand(1)
|
|
648
|
|
649 if r.forwardchars() < c {
|
|
650 break
|
|
651 }
|
|
652
|
|
653 ch := rune(r.operand(0))
|
|
654
|
|
655 for c > 0 {
|
|
656 if r.forwardcharnext() != ch {
|
|
657 goto BreakBackward
|
|
658 }
|
|
659 c--
|
|
660 }
|
|
661
|
|
662 r.advance(2)
|
|
663 continue
|
|
664
|
|
665 case syntax.Notonerep:
|
|
666
|
|
667 c := r.operand(1)
|
|
668
|
|
669 if r.forwardchars() < c {
|
|
670 break
|
|
671 }
|
|
672 ch := rune(r.operand(0))
|
|
673
|
|
674 for c > 0 {
|
|
675 if r.forwardcharnext() == ch {
|
|
676 goto BreakBackward
|
|
677 }
|
|
678 c--
|
|
679 }
|
|
680
|
|
681 r.advance(2)
|
|
682 continue
|
|
683
|
|
684 case syntax.Setrep:
|
|
685
|
|
686 c := r.operand(1)
|
|
687
|
|
688 if r.forwardchars() < c {
|
|
689 break
|
|
690 }
|
|
691
|
|
692 set := r.code.Sets[r.operand(0)]
|
|
693
|
|
694 for c > 0 {
|
|
695 if !set.CharIn(r.forwardcharnext()) {
|
|
696 goto BreakBackward
|
|
697 }
|
|
698 c--
|
|
699 }
|
|
700
|
|
701 r.advance(2)
|
|
702 continue
|
|
703
|
|
704 case syntax.Oneloop:
|
|
705
|
|
706 c := r.operand(1)
|
|
707
|
|
708 if c > r.forwardchars() {
|
|
709 c = r.forwardchars()
|
|
710 }
|
|
711
|
|
712 ch := rune(r.operand(0))
|
|
713 i := c
|
|
714
|
|
715 for ; i > 0; i-- {
|
|
716 if r.forwardcharnext() != ch {
|
|
717 r.backwardnext()
|
|
718 break
|
|
719 }
|
|
720 }
|
|
721
|
|
722 if c > i {
|
|
723 r.trackPush2(c-i-1, r.textPos()-r.bump())
|
|
724 }
|
|
725
|
|
726 r.advance(2)
|
|
727 continue
|
|
728
|
|
729 case syntax.Notoneloop:
|
|
730
|
|
731 c := r.operand(1)
|
|
732
|
|
733 if c > r.forwardchars() {
|
|
734 c = r.forwardchars()
|
|
735 }
|
|
736
|
|
737 ch := rune(r.operand(0))
|
|
738 i := c
|
|
739
|
|
740 for ; i > 0; i-- {
|
|
741 if r.forwardcharnext() == ch {
|
|
742 r.backwardnext()
|
|
743 break
|
|
744 }
|
|
745 }
|
|
746
|
|
747 if c > i {
|
|
748 r.trackPush2(c-i-1, r.textPos()-r.bump())
|
|
749 }
|
|
750
|
|
751 r.advance(2)
|
|
752 continue
|
|
753
|
|
754 case syntax.Setloop:
|
|
755
|
|
756 c := r.operand(1)
|
|
757
|
|
758 if c > r.forwardchars() {
|
|
759 c = r.forwardchars()
|
|
760 }
|
|
761
|
|
762 set := r.code.Sets[r.operand(0)]
|
|
763 i := c
|
|
764
|
|
765 for ; i > 0; i-- {
|
|
766 if !set.CharIn(r.forwardcharnext()) {
|
|
767 r.backwardnext()
|
|
768 break
|
|
769 }
|
|
770 }
|
|
771
|
|
772 if c > i {
|
|
773 r.trackPush2(c-i-1, r.textPos()-r.bump())
|
|
774 }
|
|
775
|
|
776 r.advance(2)
|
|
777 continue
|
|
778
|
|
779 case syntax.Oneloop | syntax.Back, syntax.Notoneloop | syntax.Back:
|
|
780
|
|
781 r.trackPopN(2)
|
|
782 i := r.trackPeek()
|
|
783 pos := r.trackPeekN(1)
|
|
784
|
|
785 r.textto(pos)
|
|
786
|
|
787 if i > 0 {
|
|
788 r.trackPush2(i-1, pos-r.bump())
|
|
789 }
|
|
790
|
|
791 r.advance(2)
|
|
792 continue
|
|
793
|
|
794 case syntax.Setloop | syntax.Back:
|
|
795
|
|
796 r.trackPopN(2)
|
|
797 i := r.trackPeek()
|
|
798 pos := r.trackPeekN(1)
|
|
799
|
|
800 r.textto(pos)
|
|
801
|
|
802 if i > 0 {
|
|
803 r.trackPush2(i-1, pos-r.bump())
|
|
804 }
|
|
805
|
|
806 r.advance(2)
|
|
807 continue
|
|
808
|
|
809 case syntax.Onelazy, syntax.Notonelazy:
|
|
810
|
|
811 c := r.operand(1)
|
|
812
|
|
813 if c > r.forwardchars() {
|
|
814 c = r.forwardchars()
|
|
815 }
|
|
816
|
|
817 if c > 0 {
|
|
818 r.trackPush2(c-1, r.textPos())
|
|
819 }
|
|
820
|
|
821 r.advance(2)
|
|
822 continue
|
|
823
|
|
824 case syntax.Setlazy:
|
|
825
|
|
826 c := r.operand(1)
|
|
827
|
|
828 if c > r.forwardchars() {
|
|
829 c = r.forwardchars()
|
|
830 }
|
|
831
|
|
832 if c > 0 {
|
|
833 r.trackPush2(c-1, r.textPos())
|
|
834 }
|
|
835
|
|
836 r.advance(2)
|
|
837 continue
|
|
838
|
|
839 case syntax.Onelazy | syntax.Back:
|
|
840
|
|
841 r.trackPopN(2)
|
|
842 pos := r.trackPeekN(1)
|
|
843 r.textto(pos)
|
|
844
|
|
845 if r.forwardcharnext() != rune(r.operand(0)) {
|
|
846 break
|
|
847 }
|
|
848
|
|
849 i := r.trackPeek()
|
|
850
|
|
851 if i > 0 {
|
|
852 r.trackPush2(i-1, pos+r.bump())
|
|
853 }
|
|
854
|
|
855 r.advance(2)
|
|
856 continue
|
|
857
|
|
858 case syntax.Notonelazy | syntax.Back:
|
|
859
|
|
860 r.trackPopN(2)
|
|
861 pos := r.trackPeekN(1)
|
|
862 r.textto(pos)
|
|
863
|
|
864 if r.forwardcharnext() == rune(r.operand(0)) {
|
|
865 break
|
|
866 }
|
|
867
|
|
868 i := r.trackPeek()
|
|
869
|
|
870 if i > 0 {
|
|
871 r.trackPush2(i-1, pos+r.bump())
|
|
872 }
|
|
873
|
|
874 r.advance(2)
|
|
875 continue
|
|
876
|
|
877 case syntax.Setlazy | syntax.Back:
|
|
878
|
|
879 r.trackPopN(2)
|
|
880 pos := r.trackPeekN(1)
|
|
881 r.textto(pos)
|
|
882
|
|
883 if !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
|
|
884 break
|
|
885 }
|
|
886
|
|
887 i := r.trackPeek()
|
|
888
|
|
889 if i > 0 {
|
|
890 r.trackPush2(i-1, pos+r.bump())
|
|
891 }
|
|
892
|
|
893 r.advance(2)
|
|
894 continue
|
|
895
|
|
896 default:
|
|
897 return errors.New("unknown state in regex runner")
|
|
898 }
|
|
899
|
|
900 BreakBackward:
|
|
901 ;
|
|
902
|
|
903 // "break Backward" comes here:
|
|
904 r.backtrack()
|
|
905 }
|
|
906 }
|
|
907
|
|
908 // increase the size of stack and track storage
|
|
909 func (r *runner) ensureStorage() {
|
|
910 if r.runstackpos < r.runtrackcount*4 {
|
|
911 doubleIntSlice(&r.runstack, &r.runstackpos)
|
|
912 }
|
|
913 if r.runtrackpos < r.runtrackcount*4 {
|
|
914 doubleIntSlice(&r.runtrack, &r.runtrackpos)
|
|
915 }
|
|
916 }
|
|
917
|
|
918 func doubleIntSlice(s *[]int, pos *int) {
|
|
919 oldLen := len(*s)
|
|
920 newS := make([]int, oldLen*2)
|
|
921
|
|
922 copy(newS[oldLen:], *s)
|
|
923 *pos += oldLen
|
|
924 *s = newS
|
|
925 }
|
|
926
|
|
927 // Save a number on the longjump unrolling stack
|
|
928 func (r *runner) crawl(i int) {
|
|
929 if r.runcrawlpos == 0 {
|
|
930 doubleIntSlice(&r.runcrawl, &r.runcrawlpos)
|
|
931 }
|
|
932 r.runcrawlpos--
|
|
933 r.runcrawl[r.runcrawlpos] = i
|
|
934 }
|
|
935
|
|
936 // Remove a number from the longjump unrolling stack
|
|
937 func (r *runner) popcrawl() int {
|
|
938 val := r.runcrawl[r.runcrawlpos]
|
|
939 r.runcrawlpos++
|
|
940 return val
|
|
941 }
|
|
942
|
|
943 // Get the height of the stack
|
|
944 func (r *runner) crawlpos() int {
|
|
945 return len(r.runcrawl) - r.runcrawlpos
|
|
946 }
|
|
947
|
|
948 func (r *runner) advance(i int) {
|
|
949 r.codepos += (i + 1)
|
|
950 r.setOperator(r.code.Codes[r.codepos])
|
|
951 }
|
|
952
|
|
953 func (r *runner) goTo(newpos int) {
|
|
954 // when branching backward or in place, ensure storage
|
|
955 if newpos <= r.codepos {
|
|
956 r.ensureStorage()
|
|
957 }
|
|
958
|
|
959 r.setOperator(r.code.Codes[newpos])
|
|
960 r.codepos = newpos
|
|
961 }
|
|
962
|
|
963 func (r *runner) textto(newpos int) {
|
|
964 r.runtextpos = newpos
|
|
965 }
|
|
966
|
|
967 func (r *runner) trackto(newpos int) {
|
|
968 r.runtrackpos = len(r.runtrack) - newpos
|
|
969 }
|
|
970
|
|
971 func (r *runner) textstart() int {
|
|
972 return r.runtextstart
|
|
973 }
|
|
974
|
|
975 func (r *runner) textPos() int {
|
|
976 return r.runtextpos
|
|
977 }
|
|
978
|
|
979 // push onto the backtracking stack
|
|
980 func (r *runner) trackpos() int {
|
|
981 return len(r.runtrack) - r.runtrackpos
|
|
982 }
|
|
983
|
|
984 func (r *runner) trackPush() {
|
|
985 r.runtrackpos--
|
|
986 r.runtrack[r.runtrackpos] = r.codepos
|
|
987 }
|
|
988
|
|
989 func (r *runner) trackPush1(I1 int) {
|
|
990 r.runtrackpos--
|
|
991 r.runtrack[r.runtrackpos] = I1
|
|
992 r.runtrackpos--
|
|
993 r.runtrack[r.runtrackpos] = r.codepos
|
|
994 }
|
|
995
|
|
996 func (r *runner) trackPush2(I1, I2 int) {
|
|
997 r.runtrackpos--
|
|
998 r.runtrack[r.runtrackpos] = I1
|
|
999 r.runtrackpos--
|
|
1000 r.runtrack[r.runtrackpos] = I2
|
|
1001 r.runtrackpos--
|
|
1002 r.runtrack[r.runtrackpos] = r.codepos
|
|
1003 }
|
|
1004
|
|
1005 func (r *runner) trackPush3(I1, I2, I3 int) {
|
|
1006 r.runtrackpos--
|
|
1007 r.runtrack[r.runtrackpos] = I1
|
|
1008 r.runtrackpos--
|
|
1009 r.runtrack[r.runtrackpos] = I2
|
|
1010 r.runtrackpos--
|
|
1011 r.runtrack[r.runtrackpos] = I3
|
|
1012 r.runtrackpos--
|
|
1013 r.runtrack[r.runtrackpos] = r.codepos
|
|
1014 }
|
|
1015
|
|
1016 func (r *runner) trackPushNeg1(I1 int) {
|
|
1017 r.runtrackpos--
|
|
1018 r.runtrack[r.runtrackpos] = I1
|
|
1019 r.runtrackpos--
|
|
1020 r.runtrack[r.runtrackpos] = -r.codepos
|
|
1021 }
|
|
1022
|
|
1023 func (r *runner) trackPushNeg2(I1, I2 int) {
|
|
1024 r.runtrackpos--
|
|
1025 r.runtrack[r.runtrackpos] = I1
|
|
1026 r.runtrackpos--
|
|
1027 r.runtrack[r.runtrackpos] = I2
|
|
1028 r.runtrackpos--
|
|
1029 r.runtrack[r.runtrackpos] = -r.codepos
|
|
1030 }
|
|
1031
|
|
1032 func (r *runner) backtrack() {
|
|
1033 newpos := r.runtrack[r.runtrackpos]
|
|
1034 r.runtrackpos++
|
|
1035
|
|
1036 if r.re.Debug() {
|
|
1037 if newpos < 0 {
|
|
1038 fmt.Printf(" Backtracking (back2) to code position %v\n", -newpos)
|
|
1039 } else {
|
|
1040 fmt.Printf(" Backtracking to code position %v\n", newpos)
|
|
1041 }
|
|
1042 }
|
|
1043
|
|
1044 if newpos < 0 {
|
|
1045 newpos = -newpos
|
|
1046 r.setOperator(r.code.Codes[newpos] | syntax.Back2)
|
|
1047 } else {
|
|
1048 r.setOperator(r.code.Codes[newpos] | syntax.Back)
|
|
1049 }
|
|
1050
|
|
1051 // When branching backward, ensure storage
|
|
1052 if newpos < r.codepos {
|
|
1053 r.ensureStorage()
|
|
1054 }
|
|
1055
|
|
1056 r.codepos = newpos
|
|
1057 }
|
|
1058
|
|
1059 func (r *runner) setOperator(op int) {
|
|
1060 r.caseInsensitive = (0 != (op & syntax.Ci))
|
|
1061 r.rightToLeft = (0 != (op & syntax.Rtl))
|
|
1062 r.operator = syntax.InstOp(op & ^(syntax.Rtl | syntax.Ci))
|
|
1063 }
|
|
1064
|
|
1065 func (r *runner) trackPop() {
|
|
1066 r.runtrackpos++
|
|
1067 }
|
|
1068
|
|
1069 // pop framesize items from the backtracking stack
|
|
1070 func (r *runner) trackPopN(framesize int) {
|
|
1071 r.runtrackpos += framesize
|
|
1072 }
|
|
1073
|
|
1074 // Technically we are actually peeking at items already popped. So if you want to
|
|
1075 // get and pop the top item from the stack, you do
|
|
1076 // r.trackPop();
|
|
1077 // r.trackPeek();
|
|
1078 func (r *runner) trackPeek() int {
|
|
1079 return r.runtrack[r.runtrackpos-1]
|
|
1080 }
|
|
1081
|
|
1082 // get the ith element down on the backtracking stack
|
|
1083 func (r *runner) trackPeekN(i int) int {
|
|
1084 return r.runtrack[r.runtrackpos-i-1]
|
|
1085 }
|
|
1086
|
|
1087 // Push onto the grouping stack
|
|
1088 func (r *runner) stackPush(I1 int) {
|
|
1089 r.runstackpos--
|
|
1090 r.runstack[r.runstackpos] = I1
|
|
1091 }
|
|
1092
|
|
1093 func (r *runner) stackPush2(I1, I2 int) {
|
|
1094 r.runstackpos--
|
|
1095 r.runstack[r.runstackpos] = I1
|
|
1096 r.runstackpos--
|
|
1097 r.runstack[r.runstackpos] = I2
|
|
1098 }
|
|
1099
|
|
1100 func (r *runner) stackPop() {
|
|
1101 r.runstackpos++
|
|
1102 }
|
|
1103
|
|
1104 // pop framesize items from the grouping stack
|
|
1105 func (r *runner) stackPopN(framesize int) {
|
|
1106 r.runstackpos += framesize
|
|
1107 }
|
|
1108
|
|
1109 // Technically we are actually peeking at items already popped. So if you want to
|
|
1110 // get and pop the top item from the stack, you do
|
|
1111 // r.stackPop();
|
|
1112 // r.stackPeek();
|
|
1113 func (r *runner) stackPeek() int {
|
|
1114 return r.runstack[r.runstackpos-1]
|
|
1115 }
|
|
1116
|
|
1117 // get the ith element down on the grouping stack
|
|
1118 func (r *runner) stackPeekN(i int) int {
|
|
1119 return r.runstack[r.runstackpos-i-1]
|
|
1120 }
|
|
1121
|
|
1122 func (r *runner) operand(i int) int {
|
|
1123 return r.code.Codes[r.codepos+i+1]
|
|
1124 }
|
|
1125
|
|
1126 func (r *runner) leftchars() int {
|
|
1127 return r.runtextpos
|
|
1128 }
|
|
1129
|
|
1130 func (r *runner) rightchars() int {
|
|
1131 return r.runtextend - r.runtextpos
|
|
1132 }
|
|
1133
|
|
1134 func (r *runner) bump() int {
|
|
1135 if r.rightToLeft {
|
|
1136 return -1
|
|
1137 }
|
|
1138 return 1
|
|
1139 }
|
|
1140
|
|
1141 func (r *runner) forwardchars() int {
|
|
1142 if r.rightToLeft {
|
|
1143 return r.runtextpos
|
|
1144 }
|
|
1145 return r.runtextend - r.runtextpos
|
|
1146 }
|
|
1147
|
|
1148 func (r *runner) forwardcharnext() rune {
|
|
1149 var ch rune
|
|
1150 if r.rightToLeft {
|
|
1151 r.runtextpos--
|
|
1152 ch = r.runtext[r.runtextpos]
|
|
1153 } else {
|
|
1154 ch = r.runtext[r.runtextpos]
|
|
1155 r.runtextpos++
|
|
1156 }
|
|
1157
|
|
1158 if r.caseInsensitive {
|
|
1159 return unicode.ToLower(ch)
|
|
1160 }
|
|
1161 return ch
|
|
1162 }
|
|
1163
|
|
1164 func (r *runner) runematch(str []rune) bool {
|
|
1165 var pos int
|
|
1166
|
|
1167 c := len(str)
|
|
1168 if !r.rightToLeft {
|
|
1169 if r.runtextend-r.runtextpos < c {
|
|
1170 return false
|
|
1171 }
|
|
1172
|
|
1173 pos = r.runtextpos + c
|
|
1174 } else {
|
|
1175 if r.runtextpos-0 < c {
|
|
1176 return false
|
|
1177 }
|
|
1178
|
|
1179 pos = r.runtextpos
|
|
1180 }
|
|
1181
|
|
1182 if !r.caseInsensitive {
|
|
1183 for c != 0 {
|
|
1184 c--
|
|
1185 pos--
|
|
1186 if str[c] != r.runtext[pos] {
|
|
1187 return false
|
|
1188 }
|
|
1189 }
|
|
1190 } else {
|
|
1191 for c != 0 {
|
|
1192 c--
|
|
1193 pos--
|
|
1194 if str[c] != unicode.ToLower(r.runtext[pos]) {
|
|
1195 return false
|
|
1196 }
|
|
1197 }
|
|
1198 }
|
|
1199
|
|
1200 if !r.rightToLeft {
|
|
1201 pos += len(str)
|
|
1202 }
|
|
1203
|
|
1204 r.runtextpos = pos
|
|
1205
|
|
1206 return true
|
|
1207 }
|
|
1208
|
|
1209 func (r *runner) refmatch(index, len int) bool {
|
|
1210 var c, pos, cmpos int
|
|
1211
|
|
1212 if !r.rightToLeft {
|
|
1213 if r.runtextend-r.runtextpos < len {
|
|
1214 return false
|
|
1215 }
|
|
1216
|
|
1217 pos = r.runtextpos + len
|
|
1218 } else {
|
|
1219 if r.runtextpos-0 < len {
|
|
1220 return false
|
|
1221 }
|
|
1222
|
|
1223 pos = r.runtextpos
|
|
1224 }
|
|
1225 cmpos = index + len
|
|
1226
|
|
1227 c = len
|
|
1228
|
|
1229 if !r.caseInsensitive {
|
|
1230 for c != 0 {
|
|
1231 c--
|
|
1232 cmpos--
|
|
1233 pos--
|
|
1234 if r.runtext[cmpos] != r.runtext[pos] {
|
|
1235 return false
|
|
1236 }
|
|
1237
|
|
1238 }
|
|
1239 } else {
|
|
1240 for c != 0 {
|
|
1241 c--
|
|
1242 cmpos--
|
|
1243 pos--
|
|
1244
|
|
1245 if unicode.ToLower(r.runtext[cmpos]) != unicode.ToLower(r.runtext[pos]) {
|
|
1246 return false
|
|
1247 }
|
|
1248 }
|
|
1249 }
|
|
1250
|
|
1251 if !r.rightToLeft {
|
|
1252 pos += len
|
|
1253 }
|
|
1254
|
|
1255 r.runtextpos = pos
|
|
1256
|
|
1257 return true
|
|
1258 }
|
|
1259
|
|
1260 func (r *runner) backwardnext() {
|
|
1261 if r.rightToLeft {
|
|
1262 r.runtextpos++
|
|
1263 } else {
|
|
1264 r.runtextpos--
|
|
1265 }
|
|
1266 }
|
|
1267
|
|
1268 func (r *runner) charAt(j int) rune {
|
|
1269 return r.runtext[j]
|
|
1270 }
|
|
1271
|
|
1272 func (r *runner) findFirstChar() bool {
|
|
1273
|
|
1274 if 0 != (r.code.Anchors & (syntax.AnchorBeginning | syntax.AnchorStart | syntax.AnchorEndZ | syntax.AnchorEnd)) {
|
|
1275 if !r.code.RightToLeft {
|
|
1276 if (0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0) ||
|
|
1277 (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos > r.runtextstart) {
|
|
1278 r.runtextpos = r.runtextend
|
|
1279 return false
|
|
1280 }
|
|
1281 if 0 != (r.code.Anchors&syntax.AnchorEndZ) && r.runtextpos < r.runtextend-1 {
|
|
1282 r.runtextpos = r.runtextend - 1
|
|
1283 } else if 0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend {
|
|
1284 r.runtextpos = r.runtextend
|
|
1285 }
|
|
1286 } else {
|
|
1287 if (0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend) ||
|
|
1288 (0 != (r.code.Anchors&syntax.AnchorEndZ) && (r.runtextpos < r.runtextend-1 ||
|
|
1289 (r.runtextpos == r.runtextend-1 && r.charAt(r.runtextpos) != '\n'))) ||
|
|
1290 (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos < r.runtextstart) {
|
|
1291 r.runtextpos = 0
|
|
1292 return false
|
|
1293 }
|
|
1294 if 0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0 {
|
|
1295 r.runtextpos = 0
|
|
1296 }
|
|
1297 }
|
|
1298
|
|
1299 if r.code.BmPrefix != nil {
|
|
1300 return r.code.BmPrefix.IsMatch(r.runtext, r.runtextpos, 0, r.runtextend)
|
|
1301 }
|
|
1302
|
|
1303 return true // found a valid start or end anchor
|
|
1304 } else if r.code.BmPrefix != nil {
|
|
1305 r.runtextpos = r.code.BmPrefix.Scan(r.runtext, r.runtextpos, 0, r.runtextend)
|
|
1306
|
|
1307 if r.runtextpos == -1 {
|
|
1308 if r.code.RightToLeft {
|
|
1309 r.runtextpos = 0
|
|
1310 } else {
|
|
1311 r.runtextpos = r.runtextend
|
|
1312 }
|
|
1313 return false
|
|
1314 }
|
|
1315
|
|
1316 return true
|
|
1317 } else if r.code.FcPrefix == nil {
|
|
1318 return true
|
|
1319 }
|
|
1320
|
|
1321 r.rightToLeft = r.code.RightToLeft
|
|
1322 r.caseInsensitive = r.code.FcPrefix.CaseInsensitive
|
|
1323
|
|
1324 set := r.code.FcPrefix.PrefixSet
|
|
1325 if set.IsSingleton() {
|
|
1326 ch := set.SingletonChar()
|
|
1327 for i := r.forwardchars(); i > 0; i-- {
|
|
1328 if ch == r.forwardcharnext() {
|
|
1329 r.backwardnext()
|
|
1330 return true
|
|
1331 }
|
|
1332 }
|
|
1333 } else {
|
|
1334 for i := r.forwardchars(); i > 0; i-- {
|
|
1335 n := r.forwardcharnext()
|
|
1336 //fmt.Printf("%v in %v: %v\n", string(n), set.String(), set.CharIn(n))
|
|
1337 if set.CharIn(n) {
|
|
1338 r.backwardnext()
|
|
1339 return true
|
|
1340 }
|
|
1341 }
|
|
1342 }
|
|
1343
|
|
1344 return false
|
|
1345 }
|
|
1346
|
|
1347 func (r *runner) initMatch() {
|
|
1348 // Use a hashtable'ed Match object if the capture numbers are sparse
|
|
1349
|
|
1350 if r.runmatch == nil {
|
|
1351 if r.re.caps != nil {
|
|
1352 r.runmatch = newMatchSparse(r.re, r.re.caps, r.re.capsize, r.runtext, r.runtextstart)
|
|
1353 } else {
|
|
1354 r.runmatch = newMatch(r.re, r.re.capsize, r.runtext, r.runtextstart)
|
|
1355 }
|
|
1356 } else {
|
|
1357 r.runmatch.reset(r.runtext, r.runtextstart)
|
|
1358 }
|
|
1359
|
|
1360 // note we test runcrawl, because it is the last one to be allocated
|
|
1361 // If there is an alloc failure in the middle of the three allocations,
|
|
1362 // we may still return to reuse this instance, and we want to behave
|
|
1363 // as if the allocations didn't occur. (we used to test _trackcount != 0)
|
|
1364
|
|
1365 if r.runcrawl != nil {
|
|
1366 r.runtrackpos = len(r.runtrack)
|
|
1367 r.runstackpos = len(r.runstack)
|
|
1368 r.runcrawlpos = len(r.runcrawl)
|
|
1369 return
|
|
1370 }
|
|
1371
|
|
1372 r.initTrackCount()
|
|
1373
|
|
1374 tracksize := r.runtrackcount * 8
|
|
1375 stacksize := r.runtrackcount * 8
|
|
1376
|
|
1377 if tracksize < 32 {
|
|
1378 tracksize = 32
|
|
1379 }
|
|
1380 if stacksize < 16 {
|
|
1381 stacksize = 16
|
|
1382 }
|
|
1383
|
|
1384 r.runtrack = make([]int, tracksize)
|
|
1385 r.runtrackpos = tracksize
|
|
1386
|
|
1387 r.runstack = make([]int, stacksize)
|
|
1388 r.runstackpos = stacksize
|
|
1389
|
|
1390 r.runcrawl = make([]int, 32)
|
|
1391 r.runcrawlpos = 32
|
|
1392 }
|
|
1393
|
|
1394 func (r *runner) tidyMatch(quick bool) *Match {
|
|
1395 if !quick {
|
|
1396 match := r.runmatch
|
|
1397
|
|
1398 r.runmatch = nil
|
|
1399
|
|
1400 match.tidy(r.runtextpos)
|
|
1401 return match
|
|
1402 } else {
|
|
1403 // send back our match -- it's not leaving the package, so it's safe to not clean it up
|
|
1404 // this reduces allocs for frequent calls to the "IsMatch" bool-only functions
|
|
1405 return r.runmatch
|
|
1406 }
|
|
1407 }
|
|
1408
|
|
1409 // capture captures a subexpression. Note that the
|
|
1410 // capnum used here has already been mapped to a non-sparse
|
|
1411 // index (by the code generator RegexWriter).
|
|
1412 func (r *runner) capture(capnum, start, end int) {
|
|
1413 if end < start {
|
|
1414 T := end
|
|
1415 end = start
|
|
1416 start = T
|
|
1417 }
|
|
1418
|
|
1419 r.crawl(capnum)
|
|
1420 r.runmatch.addMatch(capnum, start, end-start)
|
|
1421 }
|
|
1422
|
|
1423 // transferCapture captures a subexpression. Note that the
|
|
1424 // capnum used here has already been mapped to a non-sparse
|
|
1425 // index (by the code generator RegexWriter).
|
|
1426 func (r *runner) transferCapture(capnum, uncapnum, start, end int) {
|
|
1427 var start2, end2 int
|
|
1428
|
|
1429 // these are the two intervals that are cancelling each other
|
|
1430
|
|
1431 if end < start {
|
|
1432 T := end
|
|
1433 end = start
|
|
1434 start = T
|
|
1435 }
|
|
1436
|
|
1437 start2 = r.runmatch.matchIndex(uncapnum)
|
|
1438 end2 = start2 + r.runmatch.matchLength(uncapnum)
|
|
1439
|
|
1440 // The new capture gets the innermost defined interval
|
|
1441
|
|
1442 if start >= end2 {
|
|
1443 end = start
|
|
1444 start = end2
|
|
1445 } else if end <= start2 {
|
|
1446 start = start2
|
|
1447 } else {
|
|
1448 if end > end2 {
|
|
1449 end = end2
|
|
1450 }
|
|
1451 if start2 > start {
|
|
1452 start = start2
|
|
1453 }
|
|
1454 }
|
|
1455
|
|
1456 r.crawl(uncapnum)
|
|
1457 r.runmatch.balanceMatch(uncapnum)
|
|
1458
|
|
1459 if capnum != -1 {
|
|
1460 r.crawl(capnum)
|
|
1461 r.runmatch.addMatch(capnum, start, end-start)
|
|
1462 }
|
|
1463 }
|
|
1464
|
|
1465 // revert the last capture
|
|
1466 func (r *runner) uncapture() {
|
|
1467 capnum := r.popcrawl()
|
|
1468 r.runmatch.removeMatch(capnum)
|
|
1469 }
|
|
1470
|
|
1471 //debug
|
|
1472
|
|
1473 func (r *runner) dumpState() {
|
|
1474 back := ""
|
|
1475 if r.operator&syntax.Back != 0 {
|
|
1476 back = " Back"
|
|
1477 }
|
|
1478 if r.operator&syntax.Back2 != 0 {
|
|
1479 back += " Back2"
|
|
1480 }
|
|
1481 fmt.Printf("Text: %v\nTrack: %v\nStack: %v\n %s%s\n\n",
|
|
1482 r.textposDescription(),
|
|
1483 r.stackDescription(r.runtrack, r.runtrackpos),
|
|
1484 r.stackDescription(r.runstack, r.runstackpos),
|
|
1485 r.code.OpcodeDescription(r.codepos),
|
|
1486 back)
|
|
1487 }
|
|
1488
|
|
1489 func (r *runner) stackDescription(a []int, index int) string {
|
|
1490 buf := &bytes.Buffer{}
|
|
1491
|
|
1492 fmt.Fprintf(buf, "%v/%v", len(a)-index, len(a))
|
|
1493 if buf.Len() < 8 {
|
|
1494 buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
|
|
1495 }
|
|
1496
|
|
1497 buf.WriteRune('(')
|
|
1498 for i := index; i < len(a); i++ {
|
|
1499 if i > index {
|
|
1500 buf.WriteRune(' ')
|
|
1501 }
|
|
1502
|
|
1503 buf.WriteString(strconv.Itoa(a[i]))
|
|
1504 }
|
|
1505
|
|
1506 buf.WriteRune(')')
|
|
1507
|
|
1508 return buf.String()
|
|
1509 }
|
|
1510
|
|
1511 func (r *runner) textposDescription() string {
|
|
1512 buf := &bytes.Buffer{}
|
|
1513
|
|
1514 buf.WriteString(strconv.Itoa(r.runtextpos))
|
|
1515
|
|
1516 if buf.Len() < 8 {
|
|
1517 buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
|
|
1518 }
|
|
1519
|
|
1520 if r.runtextpos > 0 {
|
|
1521 buf.WriteString(syntax.CharDescription(r.runtext[r.runtextpos-1]))
|
|
1522 } else {
|
|
1523 buf.WriteRune('^')
|
|
1524 }
|
|
1525
|
|
1526 buf.WriteRune('>')
|
|
1527
|
|
1528 for i := r.runtextpos; i < r.runtextend; i++ {
|
|
1529 buf.WriteString(syntax.CharDescription(r.runtext[i]))
|
|
1530 }
|
|
1531 if buf.Len() >= 64 {
|
|
1532 buf.Truncate(61)
|
|
1533 buf.WriteString("...")
|
|
1534 } else {
|
|
1535 buf.WriteRune('$')
|
|
1536 }
|
|
1537
|
|
1538 return buf.String()
|
|
1539 }
|
|
1540
|
|
1541 // decide whether the pos
|
|
1542 // at the specified index is a boundary or not. It's just not worth
|
|
1543 // emitting inline code for this logic.
|
|
1544 func (r *runner) isBoundary(index, startpos, endpos int) bool {
|
|
1545 return (index > startpos && syntax.IsWordChar(r.runtext[index-1])) !=
|
|
1546 (index < endpos && syntax.IsWordChar(r.runtext[index]))
|
|
1547 }
|
|
1548
|
|
1549 func (r *runner) isECMABoundary(index, startpos, endpos int) bool {
|
|
1550 return (index > startpos && syntax.IsECMAWordChar(r.runtext[index-1])) !=
|
|
1551 (index < endpos && syntax.IsECMAWordChar(r.runtext[index]))
|
|
1552 }
|
|
1553
|
|
1554 // this seems like a comment to justify randomly picking 1000 :-P
|
|
1555 // We have determined this value in a series of experiments where x86 retail
|
|
1556 // builds (ono-lab-optimized) were run on different pattern/input pairs. Larger values
|
|
1557 // of TimeoutCheckFrequency did not tend to increase performance; smaller values
|
|
1558 // of TimeoutCheckFrequency tended to slow down the execution.
|
|
1559 const timeoutCheckFrequency int = 1000
|
|
1560
|
|
1561 func (r *runner) startTimeoutWatch() {
|
|
1562 if r.ignoreTimeout {
|
|
1563 return
|
|
1564 }
|
|
1565
|
|
1566 r.timeoutChecksToSkip = timeoutCheckFrequency
|
|
1567 r.timeoutAt = time.Now().Add(r.timeout)
|
|
1568 }
|
|
1569
|
|
1570 func (r *runner) checkTimeout() error {
|
|
1571 if r.ignoreTimeout {
|
|
1572 return nil
|
|
1573 }
|
|
1574 r.timeoutChecksToSkip--
|
|
1575 if r.timeoutChecksToSkip != 0 {
|
|
1576 return nil
|
|
1577 }
|
|
1578
|
|
1579 r.timeoutChecksToSkip = timeoutCheckFrequency
|
|
1580 return r.doCheckTimeout()
|
|
1581 }
|
|
1582
|
|
1583 func (r *runner) doCheckTimeout() error {
|
|
1584 current := time.Now()
|
|
1585
|
|
1586 if current.Before(r.timeoutAt) {
|
|
1587 return nil
|
|
1588 }
|
|
1589
|
|
1590 if r.re.Debug() {
|
|
1591 //Debug.WriteLine("")
|
|
1592 //Debug.WriteLine("RegEx match timeout occurred!")
|
|
1593 //Debug.WriteLine("Specified timeout: " + TimeSpan.FromMilliseconds(_timeout).ToString())
|
|
1594 //Debug.WriteLine("Timeout check frequency: " + TimeoutCheckFrequency)
|
|
1595 //Debug.WriteLine("Search pattern: " + _runregex._pattern)
|
|
1596 //Debug.WriteLine("Input: " + r.runtext)
|
|
1597 //Debug.WriteLine("About to throw RegexMatchTimeoutException.")
|
|
1598 }
|
|
1599
|
|
1600 return fmt.Errorf("match timeout after %v on input `%v`", r.timeout, string(r.runtext))
|
|
1601 }
|
|
1602
|
|
1603 func (r *runner) initTrackCount() {
|
|
1604 r.runtrackcount = r.code.TrackCount
|
|
1605 }
|
|
1606
|
|
1607 // getRunner returns a run to use for matching re.
|
|
1608 // It uses the re's runner cache if possible, to avoid
|
|
1609 // unnecessary allocation.
|
|
1610 func (re *Regexp) getRunner() *runner {
|
|
1611 re.muRun.Lock()
|
|
1612 if n := len(re.runner); n > 0 {
|
|
1613 z := re.runner[n-1]
|
|
1614 re.runner = re.runner[:n-1]
|
|
1615 re.muRun.Unlock()
|
|
1616 return z
|
|
1617 }
|
|
1618 re.muRun.Unlock()
|
|
1619 z := &runner{
|
|
1620 re: re,
|
|
1621 code: re.code,
|
|
1622 }
|
|
1623 return z
|
|
1624 }
|
|
1625
|
|
1626 // putRunner returns a runner to the re's cache.
|
|
1627 // There is no attempt to limit the size of the cache, so it will
|
|
1628 // grow to the maximum number of simultaneous matches
|
|
1629 // run using re. (The cache empties when re gets garbage collected.)
|
|
1630 func (re *Regexp) putRunner(r *runner) {
|
|
1631 re.muRun.Lock()
|
|
1632 re.runner = append(re.runner, r)
|
|
1633 re.muRun.Unlock()
|
|
1634 }
|