Mercurial > yakumo_izuru > aya
comparison vendor/github.com/dlclark/regexp2/runner.go @ 66:787b5ee0289d draft
Use vendored modules
Signed-off-by: Izuru Yakumo <yakumo.izuru@chaotic.ninja>
author | yakumo.izuru |
---|---|
date | Sun, 23 Jul 2023 13:18:53 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
65:6d985efa0f7a | 66:787b5ee0289d |
---|---|
1 package regexp2 | |
2 | |
3 import ( | |
4 "bytes" | |
5 "errors" | |
6 "fmt" | |
7 "math" | |
8 "strconv" | |
9 "strings" | |
10 "time" | |
11 "unicode" | |
12 | |
13 "github.com/dlclark/regexp2/syntax" | |
14 ) | |
15 | |
16 type runner struct { | |
17 re *Regexp | |
18 code *syntax.Code | |
19 | |
20 runtextstart int // starting point for search | |
21 | |
22 runtext []rune // text to search | |
23 runtextpos int // current position in text | |
24 runtextend int | |
25 | |
26 // The backtracking stack. Opcodes use this to store data regarding | |
27 // what they have matched and where to backtrack to. Each "frame" on | |
28 // the stack takes the form of [CodePosition Data1 Data2...], where | |
29 // CodePosition is the position of the current opcode and | |
30 // the data values are all optional. The CodePosition can be negative, and | |
31 // these values (also called "back2") are used by the BranchMark family of opcodes | |
32 // to indicate whether they are backtracking after a successful or failed | |
33 // match. | |
34 // When we backtrack, we pop the CodePosition off the stack, set the current | |
35 // instruction pointer to that code position, and mark the opcode | |
36 // with a backtracking flag ("Back"). Each opcode then knows how to | |
37 // handle its own data. | |
38 runtrack []int | |
39 runtrackpos int | |
40 | |
41 // This stack is used to track text positions across different opcodes. | |
42 // For example, in /(a*b)+/, the parentheses result in a SetMark/CaptureMark | |
43 // pair. SetMark records the text position before we match a*b. Then | |
44 // CaptureMark uses that position to figure out where the capture starts. | |
45 // Opcodes which push onto this stack are always paired with other opcodes | |
46 // which will pop the value from it later. A successful match should mean | |
47 // that this stack is empty. | |
48 runstack []int | |
49 runstackpos int | |
50 | |
51 // The crawl stack is used to keep track of captures. Every time a group | |
52 // has a capture, we push its group number onto the runcrawl stack. In | |
53 // the case of a balanced match, we push BOTH groups onto the stack. | |
54 runcrawl []int | |
55 runcrawlpos int | |
56 | |
57 runtrackcount int // count of states that may do backtracking | |
58 | |
59 runmatch *Match // result object | |
60 | |
61 ignoreTimeout bool | |
62 timeout time.Duration // timeout in milliseconds (needed for actual) | |
63 timeoutChecksToSkip int | |
64 timeoutAt time.Time | |
65 | |
66 operator syntax.InstOp | |
67 codepos int | |
68 rightToLeft bool | |
69 caseInsensitive bool | |
70 } | |
71 | |
72 // run searches for matches and can continue from the previous match | |
73 // | |
74 // quick is usually false, but can be true to not return matches, just put it in caches | |
75 // textstart is -1 to start at the "beginning" (depending on Right-To-Left), otherwise an index in input | |
76 // input is the string to search for our regex pattern | |
77 func (re *Regexp) run(quick bool, textstart int, input []rune) (*Match, error) { | |
78 | |
79 // get a cached runner | |
80 runner := re.getRunner() | |
81 defer re.putRunner(runner) | |
82 | |
83 if textstart < 0 { | |
84 if re.RightToLeft() { | |
85 textstart = len(input) | |
86 } else { | |
87 textstart = 0 | |
88 } | |
89 } | |
90 | |
91 return runner.scan(input, textstart, quick, re.MatchTimeout) | |
92 } | |
93 | |
94 // Scans the string to find the first match. Uses the Match object | |
95 // both to feed text in and as a place to store matches that come out. | |
96 // | |
97 // All the action is in the Go() method. Our | |
98 // responsibility is to load up the class members before | |
99 // calling Go. | |
100 // | |
101 // The optimizer can compute a set of candidate starting characters, | |
102 // and we could use a separate method Skip() that will quickly scan past | |
103 // any characters that we know can't match. | |
104 func (r *runner) scan(rt []rune, textstart int, quick bool, timeout time.Duration) (*Match, error) { | |
105 r.timeout = timeout | |
106 r.ignoreTimeout = (time.Duration(math.MaxInt64) == timeout) | |
107 r.runtextstart = textstart | |
108 r.runtext = rt | |
109 r.runtextend = len(rt) | |
110 | |
111 stoppos := r.runtextend | |
112 bump := 1 | |
113 | |
114 if r.re.RightToLeft() { | |
115 bump = -1 | |
116 stoppos = 0 | |
117 } | |
118 | |
119 r.runtextpos = textstart | |
120 initted := false | |
121 | |
122 r.startTimeoutWatch() | |
123 for { | |
124 if r.re.Debug() { | |
125 //fmt.Printf("\nSearch content: %v\n", string(r.runtext)) | |
126 fmt.Printf("\nSearch range: from 0 to %v\n", r.runtextend) | |
127 fmt.Printf("Firstchar search starting at %v stopping at %v\n", r.runtextpos, stoppos) | |
128 } | |
129 | |
130 if r.findFirstChar() { | |
131 if err := r.checkTimeout(); err != nil { | |
132 return nil, err | |
133 } | |
134 | |
135 if !initted { | |
136 r.initMatch() | |
137 initted = true | |
138 } | |
139 | |
140 if r.re.Debug() { | |
141 fmt.Printf("Executing engine starting at %v\n\n", r.runtextpos) | |
142 } | |
143 | |
144 if err := r.execute(); err != nil { | |
145 return nil, err | |
146 } | |
147 | |
148 if r.runmatch.matchcount[0] > 0 { | |
149 // We'll return a match even if it touches a previous empty match | |
150 return r.tidyMatch(quick), nil | |
151 } | |
152 | |
153 // reset state for another go | |
154 r.runtrackpos = len(r.runtrack) | |
155 r.runstackpos = len(r.runstack) | |
156 r.runcrawlpos = len(r.runcrawl) | |
157 } | |
158 | |
159 // failure! | |
160 | |
161 if r.runtextpos == stoppos { | |
162 r.tidyMatch(true) | |
163 return nil, nil | |
164 } | |
165 | |
166 // Recognize leading []* and various anchors, and bump on failure accordingly | |
167 | |
168 // r.bump by one and start again | |
169 | |
170 r.runtextpos += bump | |
171 } | |
172 // We never get here | |
173 } | |
174 | |
175 func (r *runner) execute() error { | |
176 | |
177 r.goTo(0) | |
178 | |
179 for { | |
180 | |
181 if r.re.Debug() { | |
182 r.dumpState() | |
183 } | |
184 | |
185 if err := r.checkTimeout(); err != nil { | |
186 return err | |
187 } | |
188 | |
189 switch r.operator { | |
190 case syntax.Stop: | |
191 return nil | |
192 | |
193 case syntax.Nothing: | |
194 break | |
195 | |
196 case syntax.Goto: | |
197 r.goTo(r.operand(0)) | |
198 continue | |
199 | |
200 case syntax.Testref: | |
201 if !r.runmatch.isMatched(r.operand(0)) { | |
202 break | |
203 } | |
204 r.advance(1) | |
205 continue | |
206 | |
207 case syntax.Lazybranch: | |
208 r.trackPush1(r.textPos()) | |
209 r.advance(1) | |
210 continue | |
211 | |
212 case syntax.Lazybranch | syntax.Back: | |
213 r.trackPop() | |
214 r.textto(r.trackPeek()) | |
215 r.goTo(r.operand(0)) | |
216 continue | |
217 | |
218 case syntax.Setmark: | |
219 r.stackPush(r.textPos()) | |
220 r.trackPush() | |
221 r.advance(0) | |
222 continue | |
223 | |
224 case syntax.Nullmark: | |
225 r.stackPush(-1) | |
226 r.trackPush() | |
227 r.advance(0) | |
228 continue | |
229 | |
230 case syntax.Setmark | syntax.Back, syntax.Nullmark | syntax.Back: | |
231 r.stackPop() | |
232 break | |
233 | |
234 case syntax.Getmark: | |
235 r.stackPop() | |
236 r.trackPush1(r.stackPeek()) | |
237 r.textto(r.stackPeek()) | |
238 r.advance(0) | |
239 continue | |
240 | |
241 case syntax.Getmark | syntax.Back: | |
242 r.trackPop() | |
243 r.stackPush(r.trackPeek()) | |
244 break | |
245 | |
246 case syntax.Capturemark: | |
247 if r.operand(1) != -1 && !r.runmatch.isMatched(r.operand(1)) { | |
248 break | |
249 } | |
250 r.stackPop() | |
251 if r.operand(1) != -1 { | |
252 r.transferCapture(r.operand(0), r.operand(1), r.stackPeek(), r.textPos()) | |
253 } else { | |
254 r.capture(r.operand(0), r.stackPeek(), r.textPos()) | |
255 } | |
256 r.trackPush1(r.stackPeek()) | |
257 | |
258 r.advance(2) | |
259 | |
260 continue | |
261 | |
262 case syntax.Capturemark | syntax.Back: | |
263 r.trackPop() | |
264 r.stackPush(r.trackPeek()) | |
265 r.uncapture() | |
266 if r.operand(0) != -1 && r.operand(1) != -1 { | |
267 r.uncapture() | |
268 } | |
269 | |
270 break | |
271 | |
272 case syntax.Branchmark: | |
273 r.stackPop() | |
274 | |
275 matched := r.textPos() - r.stackPeek() | |
276 | |
277 if matched != 0 { // Nonempty match -> loop now | |
278 r.trackPush2(r.stackPeek(), r.textPos()) // Save old mark, textpos | |
279 r.stackPush(r.textPos()) // Make new mark | |
280 r.goTo(r.operand(0)) // Loop | |
281 } else { // Empty match -> straight now | |
282 r.trackPushNeg1(r.stackPeek()) // Save old mark | |
283 r.advance(1) // Straight | |
284 } | |
285 continue | |
286 | |
287 case syntax.Branchmark | syntax.Back: | |
288 r.trackPopN(2) | |
289 r.stackPop() | |
290 r.textto(r.trackPeekN(1)) // Recall position | |
291 r.trackPushNeg1(r.trackPeek()) // Save old mark | |
292 r.advance(1) // Straight | |
293 continue | |
294 | |
295 case syntax.Branchmark | syntax.Back2: | |
296 r.trackPop() | |
297 r.stackPush(r.trackPeek()) // Recall old mark | |
298 break // Backtrack | |
299 | |
300 case syntax.Lazybranchmark: | |
301 { | |
302 // We hit this the first time through a lazy loop and after each | |
303 // successful match of the inner expression. It simply continues | |
304 // on and doesn't loop. | |
305 r.stackPop() | |
306 | |
307 oldMarkPos := r.stackPeek() | |
308 | |
309 if r.textPos() != oldMarkPos { // Nonempty match -> try to loop again by going to 'back' state | |
310 if oldMarkPos != -1 { | |
311 r.trackPush2(oldMarkPos, r.textPos()) // Save old mark, textpos | |
312 } else { | |
313 r.trackPush2(r.textPos(), r.textPos()) | |
314 } | |
315 } else { | |
316 // The inner expression found an empty match, so we'll go directly to 'back2' if we | |
317 // backtrack. In this case, we need to push something on the stack, since back2 pops. | |
318 // However, in the case of ()+? or similar, this empty match may be legitimate, so push the text | |
319 // position associated with that empty match. | |
320 r.stackPush(oldMarkPos) | |
321 | |
322 r.trackPushNeg1(r.stackPeek()) // Save old mark | |
323 } | |
324 r.advance(1) | |
325 continue | |
326 } | |
327 | |
328 case syntax.Lazybranchmark | syntax.Back: | |
329 | |
330 // After the first time, Lazybranchmark | syntax.Back occurs | |
331 // with each iteration of the loop, and therefore with every attempted | |
332 // match of the inner expression. We'll try to match the inner expression, | |
333 // then go back to Lazybranchmark if successful. If the inner expression | |
334 // fails, we go to Lazybranchmark | syntax.Back2 | |
335 | |
336 r.trackPopN(2) | |
337 pos := r.trackPeekN(1) | |
338 r.trackPushNeg1(r.trackPeek()) // Save old mark | |
339 r.stackPush(pos) // Make new mark | |
340 r.textto(pos) // Recall position | |
341 r.goTo(r.operand(0)) // Loop | |
342 continue | |
343 | |
344 case syntax.Lazybranchmark | syntax.Back2: | |
345 // The lazy loop has failed. We'll do a true backtrack and | |
346 // start over before the lazy loop. | |
347 r.stackPop() | |
348 r.trackPop() | |
349 r.stackPush(r.trackPeek()) // Recall old mark | |
350 break | |
351 | |
352 case syntax.Setcount: | |
353 r.stackPush2(r.textPos(), r.operand(0)) | |
354 r.trackPush() | |
355 r.advance(1) | |
356 continue | |
357 | |
358 case syntax.Nullcount: | |
359 r.stackPush2(-1, r.operand(0)) | |
360 r.trackPush() | |
361 r.advance(1) | |
362 continue | |
363 | |
364 case syntax.Setcount | syntax.Back: | |
365 r.stackPopN(2) | |
366 break | |
367 | |
368 case syntax.Nullcount | syntax.Back: | |
369 r.stackPopN(2) | |
370 break | |
371 | |
372 case syntax.Branchcount: | |
373 // r.stackPush: | |
374 // 0: Mark | |
375 // 1: Count | |
376 | |
377 r.stackPopN(2) | |
378 mark := r.stackPeek() | |
379 count := r.stackPeekN(1) | |
380 matched := r.textPos() - mark | |
381 | |
382 if count >= r.operand(1) || (matched == 0 && count >= 0) { // Max loops or empty match -> straight now | |
383 r.trackPushNeg2(mark, count) // Save old mark, count | |
384 r.advance(2) // Straight | |
385 } else { // Nonempty match -> count+loop now | |
386 r.trackPush1(mark) // remember mark | |
387 r.stackPush2(r.textPos(), count+1) // Make new mark, incr count | |
388 r.goTo(r.operand(0)) // Loop | |
389 } | |
390 continue | |
391 | |
392 case syntax.Branchcount | syntax.Back: | |
393 // r.trackPush: | |
394 // 0: Previous mark | |
395 // r.stackPush: | |
396 // 0: Mark (= current pos, discarded) | |
397 // 1: Count | |
398 r.trackPop() | |
399 r.stackPopN(2) | |
400 if r.stackPeekN(1) > 0 { // Positive -> can go straight | |
401 r.textto(r.stackPeek()) // Zap to mark | |
402 r.trackPushNeg2(r.trackPeek(), r.stackPeekN(1)-1) // Save old mark, old count | |
403 r.advance(2) // Straight | |
404 continue | |
405 } | |
406 r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // recall old mark, old count | |
407 break | |
408 | |
409 case syntax.Branchcount | syntax.Back2: | |
410 // r.trackPush: | |
411 // 0: Previous mark | |
412 // 1: Previous count | |
413 r.trackPopN(2) | |
414 r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, old count | |
415 break // Backtrack | |
416 | |
417 case syntax.Lazybranchcount: | |
418 // r.stackPush: | |
419 // 0: Mark | |
420 // 1: Count | |
421 | |
422 r.stackPopN(2) | |
423 mark := r.stackPeek() | |
424 count := r.stackPeekN(1) | |
425 | |
426 if count < 0 { // Negative count -> loop now | |
427 r.trackPushNeg1(mark) // Save old mark | |
428 r.stackPush2(r.textPos(), count+1) // Make new mark, incr count | |
429 r.goTo(r.operand(0)) // Loop | |
430 } else { // Nonneg count -> straight now | |
431 r.trackPush3(mark, count, r.textPos()) // Save mark, count, position | |
432 r.advance(2) // Straight | |
433 } | |
434 continue | |
435 | |
436 case syntax.Lazybranchcount | syntax.Back: | |
437 // r.trackPush: | |
438 // 0: Mark | |
439 // 1: Count | |
440 // 2: r.textPos | |
441 | |
442 r.trackPopN(3) | |
443 mark := r.trackPeek() | |
444 textpos := r.trackPeekN(2) | |
445 | |
446 if r.trackPeekN(1) < r.operand(1) && textpos != mark { // Under limit and not empty match -> loop | |
447 r.textto(textpos) // Recall position | |
448 r.stackPush2(textpos, r.trackPeekN(1)+1) // Make new mark, incr count | |
449 r.trackPushNeg1(mark) // Save old mark | |
450 r.goTo(r.operand(0)) // Loop | |
451 continue | |
452 } else { // Max loops or empty match -> backtrack | |
453 r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, count | |
454 break // backtrack | |
455 } | |
456 | |
457 case syntax.Lazybranchcount | syntax.Back2: | |
458 // r.trackPush: | |
459 // 0: Previous mark | |
460 // r.stackPush: | |
461 // 0: Mark (== current pos, discarded) | |
462 // 1: Count | |
463 r.trackPop() | |
464 r.stackPopN(2) | |
465 r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // Recall old mark, count | |
466 break // Backtrack | |
467 | |
468 case syntax.Setjump: | |
469 r.stackPush2(r.trackpos(), r.crawlpos()) | |
470 r.trackPush() | |
471 r.advance(0) | |
472 continue | |
473 | |
474 case syntax.Setjump | syntax.Back: | |
475 r.stackPopN(2) | |
476 break | |
477 | |
478 case syntax.Backjump: | |
479 // r.stackPush: | |
480 // 0: Saved trackpos | |
481 // 1: r.crawlpos | |
482 r.stackPopN(2) | |
483 r.trackto(r.stackPeek()) | |
484 | |
485 for r.crawlpos() != r.stackPeekN(1) { | |
486 r.uncapture() | |
487 } | |
488 | |
489 break | |
490 | |
491 case syntax.Forejump: | |
492 // r.stackPush: | |
493 // 0: Saved trackpos | |
494 // 1: r.crawlpos | |
495 r.stackPopN(2) | |
496 r.trackto(r.stackPeek()) | |
497 r.trackPush1(r.stackPeekN(1)) | |
498 r.advance(0) | |
499 continue | |
500 | |
501 case syntax.Forejump | syntax.Back: | |
502 // r.trackPush: | |
503 // 0: r.crawlpos | |
504 r.trackPop() | |
505 | |
506 for r.crawlpos() != r.trackPeek() { | |
507 r.uncapture() | |
508 } | |
509 | |
510 break | |
511 | |
512 case syntax.Bol: | |
513 if r.leftchars() > 0 && r.charAt(r.textPos()-1) != '\n' { | |
514 break | |
515 } | |
516 r.advance(0) | |
517 continue | |
518 | |
519 case syntax.Eol: | |
520 if r.rightchars() > 0 && r.charAt(r.textPos()) != '\n' { | |
521 break | |
522 } | |
523 r.advance(0) | |
524 continue | |
525 | |
526 case syntax.Boundary: | |
527 if !r.isBoundary(r.textPos(), 0, r.runtextend) { | |
528 break | |
529 } | |
530 r.advance(0) | |
531 continue | |
532 | |
533 case syntax.Nonboundary: | |
534 if r.isBoundary(r.textPos(), 0, r.runtextend) { | |
535 break | |
536 } | |
537 r.advance(0) | |
538 continue | |
539 | |
540 case syntax.ECMABoundary: | |
541 if !r.isECMABoundary(r.textPos(), 0, r.runtextend) { | |
542 break | |
543 } | |
544 r.advance(0) | |
545 continue | |
546 | |
547 case syntax.NonECMABoundary: | |
548 if r.isECMABoundary(r.textPos(), 0, r.runtextend) { | |
549 break | |
550 } | |
551 r.advance(0) | |
552 continue | |
553 | |
554 case syntax.Beginning: | |
555 if r.leftchars() > 0 { | |
556 break | |
557 } | |
558 r.advance(0) | |
559 continue | |
560 | |
561 case syntax.Start: | |
562 if r.textPos() != r.textstart() { | |
563 break | |
564 } | |
565 r.advance(0) | |
566 continue | |
567 | |
568 case syntax.EndZ: | |
569 rchars := r.rightchars() | |
570 if rchars > 1 { | |
571 break | |
572 } | |
573 // RE2 and EcmaScript define $ as "asserts position at the end of the string" | |
574 // PCRE/.NET adds "or before the line terminator right at the end of the string (if any)" | |
575 if (r.re.options & (RE2 | ECMAScript)) != 0 { | |
576 // RE2/Ecmascript mode | |
577 if rchars > 0 { | |
578 break | |
579 } | |
580 } else if rchars == 1 && r.charAt(r.textPos()) != '\n' { | |
581 // "regular" mode | |
582 break | |
583 } | |
584 | |
585 r.advance(0) | |
586 continue | |
587 | |
588 case syntax.End: | |
589 if r.rightchars() > 0 { | |
590 break | |
591 } | |
592 r.advance(0) | |
593 continue | |
594 | |
595 case syntax.One: | |
596 if r.forwardchars() < 1 || r.forwardcharnext() != rune(r.operand(0)) { | |
597 break | |
598 } | |
599 | |
600 r.advance(1) | |
601 continue | |
602 | |
603 case syntax.Notone: | |
604 if r.forwardchars() < 1 || r.forwardcharnext() == rune(r.operand(0)) { | |
605 break | |
606 } | |
607 | |
608 r.advance(1) | |
609 continue | |
610 | |
611 case syntax.Set: | |
612 | |
613 if r.forwardchars() < 1 || !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) { | |
614 break | |
615 } | |
616 | |
617 r.advance(1) | |
618 continue | |
619 | |
620 case syntax.Multi: | |
621 if !r.runematch(r.code.Strings[r.operand(0)]) { | |
622 break | |
623 } | |
624 | |
625 r.advance(1) | |
626 continue | |
627 | |
628 case syntax.Ref: | |
629 | |
630 capnum := r.operand(0) | |
631 | |
632 if r.runmatch.isMatched(capnum) { | |
633 if !r.refmatch(r.runmatch.matchIndex(capnum), r.runmatch.matchLength(capnum)) { | |
634 break | |
635 } | |
636 } else { | |
637 if (r.re.options & ECMAScript) == 0 { | |
638 break | |
639 } | |
640 } | |
641 | |
642 r.advance(1) | |
643 continue | |
644 | |
645 case syntax.Onerep: | |
646 | |
647 c := r.operand(1) | |
648 | |
649 if r.forwardchars() < c { | |
650 break | |
651 } | |
652 | |
653 ch := rune(r.operand(0)) | |
654 | |
655 for c > 0 { | |
656 if r.forwardcharnext() != ch { | |
657 goto BreakBackward | |
658 } | |
659 c-- | |
660 } | |
661 | |
662 r.advance(2) | |
663 continue | |
664 | |
665 case syntax.Notonerep: | |
666 | |
667 c := r.operand(1) | |
668 | |
669 if r.forwardchars() < c { | |
670 break | |
671 } | |
672 ch := rune(r.operand(0)) | |
673 | |
674 for c > 0 { | |
675 if r.forwardcharnext() == ch { | |
676 goto BreakBackward | |
677 } | |
678 c-- | |
679 } | |
680 | |
681 r.advance(2) | |
682 continue | |
683 | |
684 case syntax.Setrep: | |
685 | |
686 c := r.operand(1) | |
687 | |
688 if r.forwardchars() < c { | |
689 break | |
690 } | |
691 | |
692 set := r.code.Sets[r.operand(0)] | |
693 | |
694 for c > 0 { | |
695 if !set.CharIn(r.forwardcharnext()) { | |
696 goto BreakBackward | |
697 } | |
698 c-- | |
699 } | |
700 | |
701 r.advance(2) | |
702 continue | |
703 | |
704 case syntax.Oneloop: | |
705 | |
706 c := r.operand(1) | |
707 | |
708 if c > r.forwardchars() { | |
709 c = r.forwardchars() | |
710 } | |
711 | |
712 ch := rune(r.operand(0)) | |
713 i := c | |
714 | |
715 for ; i > 0; i-- { | |
716 if r.forwardcharnext() != ch { | |
717 r.backwardnext() | |
718 break | |
719 } | |
720 } | |
721 | |
722 if c > i { | |
723 r.trackPush2(c-i-1, r.textPos()-r.bump()) | |
724 } | |
725 | |
726 r.advance(2) | |
727 continue | |
728 | |
729 case syntax.Notoneloop: | |
730 | |
731 c := r.operand(1) | |
732 | |
733 if c > r.forwardchars() { | |
734 c = r.forwardchars() | |
735 } | |
736 | |
737 ch := rune(r.operand(0)) | |
738 i := c | |
739 | |
740 for ; i > 0; i-- { | |
741 if r.forwardcharnext() == ch { | |
742 r.backwardnext() | |
743 break | |
744 } | |
745 } | |
746 | |
747 if c > i { | |
748 r.trackPush2(c-i-1, r.textPos()-r.bump()) | |
749 } | |
750 | |
751 r.advance(2) | |
752 continue | |
753 | |
754 case syntax.Setloop: | |
755 | |
756 c := r.operand(1) | |
757 | |
758 if c > r.forwardchars() { | |
759 c = r.forwardchars() | |
760 } | |
761 | |
762 set := r.code.Sets[r.operand(0)] | |
763 i := c | |
764 | |
765 for ; i > 0; i-- { | |
766 if !set.CharIn(r.forwardcharnext()) { | |
767 r.backwardnext() | |
768 break | |
769 } | |
770 } | |
771 | |
772 if c > i { | |
773 r.trackPush2(c-i-1, r.textPos()-r.bump()) | |
774 } | |
775 | |
776 r.advance(2) | |
777 continue | |
778 | |
779 case syntax.Oneloop | syntax.Back, syntax.Notoneloop | syntax.Back: | |
780 | |
781 r.trackPopN(2) | |
782 i := r.trackPeek() | |
783 pos := r.trackPeekN(1) | |
784 | |
785 r.textto(pos) | |
786 | |
787 if i > 0 { | |
788 r.trackPush2(i-1, pos-r.bump()) | |
789 } | |
790 | |
791 r.advance(2) | |
792 continue | |
793 | |
794 case syntax.Setloop | syntax.Back: | |
795 | |
796 r.trackPopN(2) | |
797 i := r.trackPeek() | |
798 pos := r.trackPeekN(1) | |
799 | |
800 r.textto(pos) | |
801 | |
802 if i > 0 { | |
803 r.trackPush2(i-1, pos-r.bump()) | |
804 } | |
805 | |
806 r.advance(2) | |
807 continue | |
808 | |
809 case syntax.Onelazy, syntax.Notonelazy: | |
810 | |
811 c := r.operand(1) | |
812 | |
813 if c > r.forwardchars() { | |
814 c = r.forwardchars() | |
815 } | |
816 | |
817 if c > 0 { | |
818 r.trackPush2(c-1, r.textPos()) | |
819 } | |
820 | |
821 r.advance(2) | |
822 continue | |
823 | |
824 case syntax.Setlazy: | |
825 | |
826 c := r.operand(1) | |
827 | |
828 if c > r.forwardchars() { | |
829 c = r.forwardchars() | |
830 } | |
831 | |
832 if c > 0 { | |
833 r.trackPush2(c-1, r.textPos()) | |
834 } | |
835 | |
836 r.advance(2) | |
837 continue | |
838 | |
839 case syntax.Onelazy | syntax.Back: | |
840 | |
841 r.trackPopN(2) | |
842 pos := r.trackPeekN(1) | |
843 r.textto(pos) | |
844 | |
845 if r.forwardcharnext() != rune(r.operand(0)) { | |
846 break | |
847 } | |
848 | |
849 i := r.trackPeek() | |
850 | |
851 if i > 0 { | |
852 r.trackPush2(i-1, pos+r.bump()) | |
853 } | |
854 | |
855 r.advance(2) | |
856 continue | |
857 | |
858 case syntax.Notonelazy | syntax.Back: | |
859 | |
860 r.trackPopN(2) | |
861 pos := r.trackPeekN(1) | |
862 r.textto(pos) | |
863 | |
864 if r.forwardcharnext() == rune(r.operand(0)) { | |
865 break | |
866 } | |
867 | |
868 i := r.trackPeek() | |
869 | |
870 if i > 0 { | |
871 r.trackPush2(i-1, pos+r.bump()) | |
872 } | |
873 | |
874 r.advance(2) | |
875 continue | |
876 | |
877 case syntax.Setlazy | syntax.Back: | |
878 | |
879 r.trackPopN(2) | |
880 pos := r.trackPeekN(1) | |
881 r.textto(pos) | |
882 | |
883 if !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) { | |
884 break | |
885 } | |
886 | |
887 i := r.trackPeek() | |
888 | |
889 if i > 0 { | |
890 r.trackPush2(i-1, pos+r.bump()) | |
891 } | |
892 | |
893 r.advance(2) | |
894 continue | |
895 | |
896 default: | |
897 return errors.New("unknown state in regex runner") | |
898 } | |
899 | |
900 BreakBackward: | |
901 ; | |
902 | |
903 // "break Backward" comes here: | |
904 r.backtrack() | |
905 } | |
906 } | |
907 | |
908 // increase the size of stack and track storage | |
909 func (r *runner) ensureStorage() { | |
910 if r.runstackpos < r.runtrackcount*4 { | |
911 doubleIntSlice(&r.runstack, &r.runstackpos) | |
912 } | |
913 if r.runtrackpos < r.runtrackcount*4 { | |
914 doubleIntSlice(&r.runtrack, &r.runtrackpos) | |
915 } | |
916 } | |
917 | |
918 func doubleIntSlice(s *[]int, pos *int) { | |
919 oldLen := len(*s) | |
920 newS := make([]int, oldLen*2) | |
921 | |
922 copy(newS[oldLen:], *s) | |
923 *pos += oldLen | |
924 *s = newS | |
925 } | |
926 | |
927 // Save a number on the longjump unrolling stack | |
928 func (r *runner) crawl(i int) { | |
929 if r.runcrawlpos == 0 { | |
930 doubleIntSlice(&r.runcrawl, &r.runcrawlpos) | |
931 } | |
932 r.runcrawlpos-- | |
933 r.runcrawl[r.runcrawlpos] = i | |
934 } | |
935 | |
936 // Remove a number from the longjump unrolling stack | |
937 func (r *runner) popcrawl() int { | |
938 val := r.runcrawl[r.runcrawlpos] | |
939 r.runcrawlpos++ | |
940 return val | |
941 } | |
942 | |
943 // Get the height of the stack | |
944 func (r *runner) crawlpos() int { | |
945 return len(r.runcrawl) - r.runcrawlpos | |
946 } | |
947 | |
948 func (r *runner) advance(i int) { | |
949 r.codepos += (i + 1) | |
950 r.setOperator(r.code.Codes[r.codepos]) | |
951 } | |
952 | |
953 func (r *runner) goTo(newpos int) { | |
954 // when branching backward or in place, ensure storage | |
955 if newpos <= r.codepos { | |
956 r.ensureStorage() | |
957 } | |
958 | |
959 r.setOperator(r.code.Codes[newpos]) | |
960 r.codepos = newpos | |
961 } | |
962 | |
963 func (r *runner) textto(newpos int) { | |
964 r.runtextpos = newpos | |
965 } | |
966 | |
967 func (r *runner) trackto(newpos int) { | |
968 r.runtrackpos = len(r.runtrack) - newpos | |
969 } | |
970 | |
971 func (r *runner) textstart() int { | |
972 return r.runtextstart | |
973 } | |
974 | |
975 func (r *runner) textPos() int { | |
976 return r.runtextpos | |
977 } | |
978 | |
979 // push onto the backtracking stack | |
980 func (r *runner) trackpos() int { | |
981 return len(r.runtrack) - r.runtrackpos | |
982 } | |
983 | |
984 func (r *runner) trackPush() { | |
985 r.runtrackpos-- | |
986 r.runtrack[r.runtrackpos] = r.codepos | |
987 } | |
988 | |
989 func (r *runner) trackPush1(I1 int) { | |
990 r.runtrackpos-- | |
991 r.runtrack[r.runtrackpos] = I1 | |
992 r.runtrackpos-- | |
993 r.runtrack[r.runtrackpos] = r.codepos | |
994 } | |
995 | |
996 func (r *runner) trackPush2(I1, I2 int) { | |
997 r.runtrackpos-- | |
998 r.runtrack[r.runtrackpos] = I1 | |
999 r.runtrackpos-- | |
1000 r.runtrack[r.runtrackpos] = I2 | |
1001 r.runtrackpos-- | |
1002 r.runtrack[r.runtrackpos] = r.codepos | |
1003 } | |
1004 | |
1005 func (r *runner) trackPush3(I1, I2, I3 int) { | |
1006 r.runtrackpos-- | |
1007 r.runtrack[r.runtrackpos] = I1 | |
1008 r.runtrackpos-- | |
1009 r.runtrack[r.runtrackpos] = I2 | |
1010 r.runtrackpos-- | |
1011 r.runtrack[r.runtrackpos] = I3 | |
1012 r.runtrackpos-- | |
1013 r.runtrack[r.runtrackpos] = r.codepos | |
1014 } | |
1015 | |
1016 func (r *runner) trackPushNeg1(I1 int) { | |
1017 r.runtrackpos-- | |
1018 r.runtrack[r.runtrackpos] = I1 | |
1019 r.runtrackpos-- | |
1020 r.runtrack[r.runtrackpos] = -r.codepos | |
1021 } | |
1022 | |
1023 func (r *runner) trackPushNeg2(I1, I2 int) { | |
1024 r.runtrackpos-- | |
1025 r.runtrack[r.runtrackpos] = I1 | |
1026 r.runtrackpos-- | |
1027 r.runtrack[r.runtrackpos] = I2 | |
1028 r.runtrackpos-- | |
1029 r.runtrack[r.runtrackpos] = -r.codepos | |
1030 } | |
1031 | |
1032 func (r *runner) backtrack() { | |
1033 newpos := r.runtrack[r.runtrackpos] | |
1034 r.runtrackpos++ | |
1035 | |
1036 if r.re.Debug() { | |
1037 if newpos < 0 { | |
1038 fmt.Printf(" Backtracking (back2) to code position %v\n", -newpos) | |
1039 } else { | |
1040 fmt.Printf(" Backtracking to code position %v\n", newpos) | |
1041 } | |
1042 } | |
1043 | |
1044 if newpos < 0 { | |
1045 newpos = -newpos | |
1046 r.setOperator(r.code.Codes[newpos] | syntax.Back2) | |
1047 } else { | |
1048 r.setOperator(r.code.Codes[newpos] | syntax.Back) | |
1049 } | |
1050 | |
1051 // When branching backward, ensure storage | |
1052 if newpos < r.codepos { | |
1053 r.ensureStorage() | |
1054 } | |
1055 | |
1056 r.codepos = newpos | |
1057 } | |
1058 | |
1059 func (r *runner) setOperator(op int) { | |
1060 r.caseInsensitive = (0 != (op & syntax.Ci)) | |
1061 r.rightToLeft = (0 != (op & syntax.Rtl)) | |
1062 r.operator = syntax.InstOp(op & ^(syntax.Rtl | syntax.Ci)) | |
1063 } | |
1064 | |
1065 func (r *runner) trackPop() { | |
1066 r.runtrackpos++ | |
1067 } | |
1068 | |
1069 // pop framesize items from the backtracking stack | |
1070 func (r *runner) trackPopN(framesize int) { | |
1071 r.runtrackpos += framesize | |
1072 } | |
1073 | |
1074 // Technically we are actually peeking at items already popped. So if you want to | |
1075 // get and pop the top item from the stack, you do | |
1076 // r.trackPop(); | |
1077 // r.trackPeek(); | |
1078 func (r *runner) trackPeek() int { | |
1079 return r.runtrack[r.runtrackpos-1] | |
1080 } | |
1081 | |
1082 // get the ith element down on the backtracking stack | |
1083 func (r *runner) trackPeekN(i int) int { | |
1084 return r.runtrack[r.runtrackpos-i-1] | |
1085 } | |
1086 | |
1087 // Push onto the grouping stack | |
1088 func (r *runner) stackPush(I1 int) { | |
1089 r.runstackpos-- | |
1090 r.runstack[r.runstackpos] = I1 | |
1091 } | |
1092 | |
1093 func (r *runner) stackPush2(I1, I2 int) { | |
1094 r.runstackpos-- | |
1095 r.runstack[r.runstackpos] = I1 | |
1096 r.runstackpos-- | |
1097 r.runstack[r.runstackpos] = I2 | |
1098 } | |
1099 | |
1100 func (r *runner) stackPop() { | |
1101 r.runstackpos++ | |
1102 } | |
1103 | |
1104 // pop framesize items from the grouping stack | |
1105 func (r *runner) stackPopN(framesize int) { | |
1106 r.runstackpos += framesize | |
1107 } | |
1108 | |
1109 // Technically we are actually peeking at items already popped. So if you want to | |
1110 // get and pop the top item from the stack, you do | |
1111 // r.stackPop(); | |
1112 // r.stackPeek(); | |
1113 func (r *runner) stackPeek() int { | |
1114 return r.runstack[r.runstackpos-1] | |
1115 } | |
1116 | |
1117 // get the ith element down on the grouping stack | |
1118 func (r *runner) stackPeekN(i int) int { | |
1119 return r.runstack[r.runstackpos-i-1] | |
1120 } | |
1121 | |
1122 func (r *runner) operand(i int) int { | |
1123 return r.code.Codes[r.codepos+i+1] | |
1124 } | |
1125 | |
1126 func (r *runner) leftchars() int { | |
1127 return r.runtextpos | |
1128 } | |
1129 | |
1130 func (r *runner) rightchars() int { | |
1131 return r.runtextend - r.runtextpos | |
1132 } | |
1133 | |
1134 func (r *runner) bump() int { | |
1135 if r.rightToLeft { | |
1136 return -1 | |
1137 } | |
1138 return 1 | |
1139 } | |
1140 | |
1141 func (r *runner) forwardchars() int { | |
1142 if r.rightToLeft { | |
1143 return r.runtextpos | |
1144 } | |
1145 return r.runtextend - r.runtextpos | |
1146 } | |
1147 | |
1148 func (r *runner) forwardcharnext() rune { | |
1149 var ch rune | |
1150 if r.rightToLeft { | |
1151 r.runtextpos-- | |
1152 ch = r.runtext[r.runtextpos] | |
1153 } else { | |
1154 ch = r.runtext[r.runtextpos] | |
1155 r.runtextpos++ | |
1156 } | |
1157 | |
1158 if r.caseInsensitive { | |
1159 return unicode.ToLower(ch) | |
1160 } | |
1161 return ch | |
1162 } | |
1163 | |
1164 func (r *runner) runematch(str []rune) bool { | |
1165 var pos int | |
1166 | |
1167 c := len(str) | |
1168 if !r.rightToLeft { | |
1169 if r.runtextend-r.runtextpos < c { | |
1170 return false | |
1171 } | |
1172 | |
1173 pos = r.runtextpos + c | |
1174 } else { | |
1175 if r.runtextpos-0 < c { | |
1176 return false | |
1177 } | |
1178 | |
1179 pos = r.runtextpos | |
1180 } | |
1181 | |
1182 if !r.caseInsensitive { | |
1183 for c != 0 { | |
1184 c-- | |
1185 pos-- | |
1186 if str[c] != r.runtext[pos] { | |
1187 return false | |
1188 } | |
1189 } | |
1190 } else { | |
1191 for c != 0 { | |
1192 c-- | |
1193 pos-- | |
1194 if str[c] != unicode.ToLower(r.runtext[pos]) { | |
1195 return false | |
1196 } | |
1197 } | |
1198 } | |
1199 | |
1200 if !r.rightToLeft { | |
1201 pos += len(str) | |
1202 } | |
1203 | |
1204 r.runtextpos = pos | |
1205 | |
1206 return true | |
1207 } | |
1208 | |
1209 func (r *runner) refmatch(index, len int) bool { | |
1210 var c, pos, cmpos int | |
1211 | |
1212 if !r.rightToLeft { | |
1213 if r.runtextend-r.runtextpos < len { | |
1214 return false | |
1215 } | |
1216 | |
1217 pos = r.runtextpos + len | |
1218 } else { | |
1219 if r.runtextpos-0 < len { | |
1220 return false | |
1221 } | |
1222 | |
1223 pos = r.runtextpos | |
1224 } | |
1225 cmpos = index + len | |
1226 | |
1227 c = len | |
1228 | |
1229 if !r.caseInsensitive { | |
1230 for c != 0 { | |
1231 c-- | |
1232 cmpos-- | |
1233 pos-- | |
1234 if r.runtext[cmpos] != r.runtext[pos] { | |
1235 return false | |
1236 } | |
1237 | |
1238 } | |
1239 } else { | |
1240 for c != 0 { | |
1241 c-- | |
1242 cmpos-- | |
1243 pos-- | |
1244 | |
1245 if unicode.ToLower(r.runtext[cmpos]) != unicode.ToLower(r.runtext[pos]) { | |
1246 return false | |
1247 } | |
1248 } | |
1249 } | |
1250 | |
1251 if !r.rightToLeft { | |
1252 pos += len | |
1253 } | |
1254 | |
1255 r.runtextpos = pos | |
1256 | |
1257 return true | |
1258 } | |
1259 | |
1260 func (r *runner) backwardnext() { | |
1261 if r.rightToLeft { | |
1262 r.runtextpos++ | |
1263 } else { | |
1264 r.runtextpos-- | |
1265 } | |
1266 } | |
1267 | |
1268 func (r *runner) charAt(j int) rune { | |
1269 return r.runtext[j] | |
1270 } | |
1271 | |
1272 func (r *runner) findFirstChar() bool { | |
1273 | |
1274 if 0 != (r.code.Anchors & (syntax.AnchorBeginning | syntax.AnchorStart | syntax.AnchorEndZ | syntax.AnchorEnd)) { | |
1275 if !r.code.RightToLeft { | |
1276 if (0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0) || | |
1277 (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos > r.runtextstart) { | |
1278 r.runtextpos = r.runtextend | |
1279 return false | |
1280 } | |
1281 if 0 != (r.code.Anchors&syntax.AnchorEndZ) && r.runtextpos < r.runtextend-1 { | |
1282 r.runtextpos = r.runtextend - 1 | |
1283 } else if 0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend { | |
1284 r.runtextpos = r.runtextend | |
1285 } | |
1286 } else { | |
1287 if (0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend) || | |
1288 (0 != (r.code.Anchors&syntax.AnchorEndZ) && (r.runtextpos < r.runtextend-1 || | |
1289 (r.runtextpos == r.runtextend-1 && r.charAt(r.runtextpos) != '\n'))) || | |
1290 (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos < r.runtextstart) { | |
1291 r.runtextpos = 0 | |
1292 return false | |
1293 } | |
1294 if 0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0 { | |
1295 r.runtextpos = 0 | |
1296 } | |
1297 } | |
1298 | |
1299 if r.code.BmPrefix != nil { | |
1300 return r.code.BmPrefix.IsMatch(r.runtext, r.runtextpos, 0, r.runtextend) | |
1301 } | |
1302 | |
1303 return true // found a valid start or end anchor | |
1304 } else if r.code.BmPrefix != nil { | |
1305 r.runtextpos = r.code.BmPrefix.Scan(r.runtext, r.runtextpos, 0, r.runtextend) | |
1306 | |
1307 if r.runtextpos == -1 { | |
1308 if r.code.RightToLeft { | |
1309 r.runtextpos = 0 | |
1310 } else { | |
1311 r.runtextpos = r.runtextend | |
1312 } | |
1313 return false | |
1314 } | |
1315 | |
1316 return true | |
1317 } else if r.code.FcPrefix == nil { | |
1318 return true | |
1319 } | |
1320 | |
1321 r.rightToLeft = r.code.RightToLeft | |
1322 r.caseInsensitive = r.code.FcPrefix.CaseInsensitive | |
1323 | |
1324 set := r.code.FcPrefix.PrefixSet | |
1325 if set.IsSingleton() { | |
1326 ch := set.SingletonChar() | |
1327 for i := r.forwardchars(); i > 0; i-- { | |
1328 if ch == r.forwardcharnext() { | |
1329 r.backwardnext() | |
1330 return true | |
1331 } | |
1332 } | |
1333 } else { | |
1334 for i := r.forwardchars(); i > 0; i-- { | |
1335 n := r.forwardcharnext() | |
1336 //fmt.Printf("%v in %v: %v\n", string(n), set.String(), set.CharIn(n)) | |
1337 if set.CharIn(n) { | |
1338 r.backwardnext() | |
1339 return true | |
1340 } | |
1341 } | |
1342 } | |
1343 | |
1344 return false | |
1345 } | |
1346 | |
1347 func (r *runner) initMatch() { | |
1348 // Use a hashtable'ed Match object if the capture numbers are sparse | |
1349 | |
1350 if r.runmatch == nil { | |
1351 if r.re.caps != nil { | |
1352 r.runmatch = newMatchSparse(r.re, r.re.caps, r.re.capsize, r.runtext, r.runtextstart) | |
1353 } else { | |
1354 r.runmatch = newMatch(r.re, r.re.capsize, r.runtext, r.runtextstart) | |
1355 } | |
1356 } else { | |
1357 r.runmatch.reset(r.runtext, r.runtextstart) | |
1358 } | |
1359 | |
1360 // note we test runcrawl, because it is the last one to be allocated | |
1361 // If there is an alloc failure in the middle of the three allocations, | |
1362 // we may still return to reuse this instance, and we want to behave | |
1363 // as if the allocations didn't occur. (we used to test _trackcount != 0) | |
1364 | |
1365 if r.runcrawl != nil { | |
1366 r.runtrackpos = len(r.runtrack) | |
1367 r.runstackpos = len(r.runstack) | |
1368 r.runcrawlpos = len(r.runcrawl) | |
1369 return | |
1370 } | |
1371 | |
1372 r.initTrackCount() | |
1373 | |
1374 tracksize := r.runtrackcount * 8 | |
1375 stacksize := r.runtrackcount * 8 | |
1376 | |
1377 if tracksize < 32 { | |
1378 tracksize = 32 | |
1379 } | |
1380 if stacksize < 16 { | |
1381 stacksize = 16 | |
1382 } | |
1383 | |
1384 r.runtrack = make([]int, tracksize) | |
1385 r.runtrackpos = tracksize | |
1386 | |
1387 r.runstack = make([]int, stacksize) | |
1388 r.runstackpos = stacksize | |
1389 | |
1390 r.runcrawl = make([]int, 32) | |
1391 r.runcrawlpos = 32 | |
1392 } | |
1393 | |
1394 func (r *runner) tidyMatch(quick bool) *Match { | |
1395 if !quick { | |
1396 match := r.runmatch | |
1397 | |
1398 r.runmatch = nil | |
1399 | |
1400 match.tidy(r.runtextpos) | |
1401 return match | |
1402 } else { | |
1403 // send back our match -- it's not leaving the package, so it's safe to not clean it up | |
1404 // this reduces allocs for frequent calls to the "IsMatch" bool-only functions | |
1405 return r.runmatch | |
1406 } | |
1407 } | |
1408 | |
1409 // capture captures a subexpression. Note that the | |
1410 // capnum used here has already been mapped to a non-sparse | |
1411 // index (by the code generator RegexWriter). | |
1412 func (r *runner) capture(capnum, start, end int) { | |
1413 if end < start { | |
1414 T := end | |
1415 end = start | |
1416 start = T | |
1417 } | |
1418 | |
1419 r.crawl(capnum) | |
1420 r.runmatch.addMatch(capnum, start, end-start) | |
1421 } | |
1422 | |
1423 // transferCapture captures a subexpression. Note that the | |
1424 // capnum used here has already been mapped to a non-sparse | |
1425 // index (by the code generator RegexWriter). | |
1426 func (r *runner) transferCapture(capnum, uncapnum, start, end int) { | |
1427 var start2, end2 int | |
1428 | |
1429 // these are the two intervals that are cancelling each other | |
1430 | |
1431 if end < start { | |
1432 T := end | |
1433 end = start | |
1434 start = T | |
1435 } | |
1436 | |
1437 start2 = r.runmatch.matchIndex(uncapnum) | |
1438 end2 = start2 + r.runmatch.matchLength(uncapnum) | |
1439 | |
1440 // The new capture gets the innermost defined interval | |
1441 | |
1442 if start >= end2 { | |
1443 end = start | |
1444 start = end2 | |
1445 } else if end <= start2 { | |
1446 start = start2 | |
1447 } else { | |
1448 if end > end2 { | |
1449 end = end2 | |
1450 } | |
1451 if start2 > start { | |
1452 start = start2 | |
1453 } | |
1454 } | |
1455 | |
1456 r.crawl(uncapnum) | |
1457 r.runmatch.balanceMatch(uncapnum) | |
1458 | |
1459 if capnum != -1 { | |
1460 r.crawl(capnum) | |
1461 r.runmatch.addMatch(capnum, start, end-start) | |
1462 } | |
1463 } | |
1464 | |
1465 // revert the last capture | |
1466 func (r *runner) uncapture() { | |
1467 capnum := r.popcrawl() | |
1468 r.runmatch.removeMatch(capnum) | |
1469 } | |
1470 | |
1471 //debug | |
1472 | |
1473 func (r *runner) dumpState() { | |
1474 back := "" | |
1475 if r.operator&syntax.Back != 0 { | |
1476 back = " Back" | |
1477 } | |
1478 if r.operator&syntax.Back2 != 0 { | |
1479 back += " Back2" | |
1480 } | |
1481 fmt.Printf("Text: %v\nTrack: %v\nStack: %v\n %s%s\n\n", | |
1482 r.textposDescription(), | |
1483 r.stackDescription(r.runtrack, r.runtrackpos), | |
1484 r.stackDescription(r.runstack, r.runstackpos), | |
1485 r.code.OpcodeDescription(r.codepos), | |
1486 back) | |
1487 } | |
1488 | |
1489 func (r *runner) stackDescription(a []int, index int) string { | |
1490 buf := &bytes.Buffer{} | |
1491 | |
1492 fmt.Fprintf(buf, "%v/%v", len(a)-index, len(a)) | |
1493 if buf.Len() < 8 { | |
1494 buf.WriteString(strings.Repeat(" ", 8-buf.Len())) | |
1495 } | |
1496 | |
1497 buf.WriteRune('(') | |
1498 for i := index; i < len(a); i++ { | |
1499 if i > index { | |
1500 buf.WriteRune(' ') | |
1501 } | |
1502 | |
1503 buf.WriteString(strconv.Itoa(a[i])) | |
1504 } | |
1505 | |
1506 buf.WriteRune(')') | |
1507 | |
1508 return buf.String() | |
1509 } | |
1510 | |
1511 func (r *runner) textposDescription() string { | |
1512 buf := &bytes.Buffer{} | |
1513 | |
1514 buf.WriteString(strconv.Itoa(r.runtextpos)) | |
1515 | |
1516 if buf.Len() < 8 { | |
1517 buf.WriteString(strings.Repeat(" ", 8-buf.Len())) | |
1518 } | |
1519 | |
1520 if r.runtextpos > 0 { | |
1521 buf.WriteString(syntax.CharDescription(r.runtext[r.runtextpos-1])) | |
1522 } else { | |
1523 buf.WriteRune('^') | |
1524 } | |
1525 | |
1526 buf.WriteRune('>') | |
1527 | |
1528 for i := r.runtextpos; i < r.runtextend; i++ { | |
1529 buf.WriteString(syntax.CharDescription(r.runtext[i])) | |
1530 } | |
1531 if buf.Len() >= 64 { | |
1532 buf.Truncate(61) | |
1533 buf.WriteString("...") | |
1534 } else { | |
1535 buf.WriteRune('$') | |
1536 } | |
1537 | |
1538 return buf.String() | |
1539 } | |
1540 | |
1541 // decide whether the pos | |
1542 // at the specified index is a boundary or not. It's just not worth | |
1543 // emitting inline code for this logic. | |
1544 func (r *runner) isBoundary(index, startpos, endpos int) bool { | |
1545 return (index > startpos && syntax.IsWordChar(r.runtext[index-1])) != | |
1546 (index < endpos && syntax.IsWordChar(r.runtext[index])) | |
1547 } | |
1548 | |
1549 func (r *runner) isECMABoundary(index, startpos, endpos int) bool { | |
1550 return (index > startpos && syntax.IsECMAWordChar(r.runtext[index-1])) != | |
1551 (index < endpos && syntax.IsECMAWordChar(r.runtext[index])) | |
1552 } | |
1553 | |
1554 // this seems like a comment to justify randomly picking 1000 :-P | |
1555 // We have determined this value in a series of experiments where x86 retail | |
1556 // builds (ono-lab-optimized) were run on different pattern/input pairs. Larger values | |
1557 // of TimeoutCheckFrequency did not tend to increase performance; smaller values | |
1558 // of TimeoutCheckFrequency tended to slow down the execution. | |
1559 const timeoutCheckFrequency int = 1000 | |
1560 | |
1561 func (r *runner) startTimeoutWatch() { | |
1562 if r.ignoreTimeout { | |
1563 return | |
1564 } | |
1565 | |
1566 r.timeoutChecksToSkip = timeoutCheckFrequency | |
1567 r.timeoutAt = time.Now().Add(r.timeout) | |
1568 } | |
1569 | |
1570 func (r *runner) checkTimeout() error { | |
1571 if r.ignoreTimeout { | |
1572 return nil | |
1573 } | |
1574 r.timeoutChecksToSkip-- | |
1575 if r.timeoutChecksToSkip != 0 { | |
1576 return nil | |
1577 } | |
1578 | |
1579 r.timeoutChecksToSkip = timeoutCheckFrequency | |
1580 return r.doCheckTimeout() | |
1581 } | |
1582 | |
1583 func (r *runner) doCheckTimeout() error { | |
1584 current := time.Now() | |
1585 | |
1586 if current.Before(r.timeoutAt) { | |
1587 return nil | |
1588 } | |
1589 | |
1590 if r.re.Debug() { | |
1591 //Debug.WriteLine("") | |
1592 //Debug.WriteLine("RegEx match timeout occurred!") | |
1593 //Debug.WriteLine("Specified timeout: " + TimeSpan.FromMilliseconds(_timeout).ToString()) | |
1594 //Debug.WriteLine("Timeout check frequency: " + TimeoutCheckFrequency) | |
1595 //Debug.WriteLine("Search pattern: " + _runregex._pattern) | |
1596 //Debug.WriteLine("Input: " + r.runtext) | |
1597 //Debug.WriteLine("About to throw RegexMatchTimeoutException.") | |
1598 } | |
1599 | |
1600 return fmt.Errorf("match timeout after %v on input `%v`", r.timeout, string(r.runtext)) | |
1601 } | |
1602 | |
1603 func (r *runner) initTrackCount() { | |
1604 r.runtrackcount = r.code.TrackCount | |
1605 } | |
1606 | |
1607 // getRunner returns a run to use for matching re. | |
1608 // It uses the re's runner cache if possible, to avoid | |
1609 // unnecessary allocation. | |
1610 func (re *Regexp) getRunner() *runner { | |
1611 re.muRun.Lock() | |
1612 if n := len(re.runner); n > 0 { | |
1613 z := re.runner[n-1] | |
1614 re.runner = re.runner[:n-1] | |
1615 re.muRun.Unlock() | |
1616 return z | |
1617 } | |
1618 re.muRun.Unlock() | |
1619 z := &runner{ | |
1620 re: re, | |
1621 code: re.code, | |
1622 } | |
1623 return z | |
1624 } | |
1625 | |
1626 // putRunner returns a runner to the re's cache. | |
1627 // There is no attempt to limit the size of the cache, so it will | |
1628 // grow to the maximum number of simultaneous matches | |
1629 // run using re. (The cache empties when re gets garbage collected.) | |
1630 func (re *Regexp) putRunner(r *runner) { | |
1631 re.muRun.Lock() | |
1632 re.runner = append(re.runner, r) | |
1633 re.muRun.Unlock() | |
1634 } |