66
|
1 package regexp2
|
|
2
|
|
3 import (
|
|
4 "bytes"
|
|
5 "fmt"
|
|
6 )
|
|
7
|
|
8 // Match is a single regex result match that contains groups and repeated captures
|
|
9 // -Groups
|
|
10 // -Capture
|
|
11 type Match struct {
|
|
12 Group //embeded group 0
|
|
13
|
|
14 regex *Regexp
|
|
15 otherGroups []Group
|
|
16
|
|
17 // input to the match
|
|
18 textpos int
|
|
19 textstart int
|
|
20
|
|
21 capcount int
|
|
22 caps []int
|
|
23 sparseCaps map[int]int
|
|
24
|
|
25 // output from the match
|
|
26 matches [][]int
|
|
27 matchcount []int
|
|
28
|
|
29 // whether we've done any balancing with this match. If we
|
|
30 // have done balancing, we'll need to do extra work in Tidy().
|
|
31 balancing bool
|
|
32 }
|
|
33
|
|
34 // Group is an explicit or implit (group 0) matched group within the pattern
|
|
35 type Group struct {
|
|
36 Capture // the last capture of this group is embeded for ease of use
|
|
37
|
|
38 Name string // group name
|
|
39 Captures []Capture // captures of this group
|
|
40 }
|
|
41
|
|
42 // Capture is a single capture of text within the larger original string
|
|
43 type Capture struct {
|
|
44 // the original string
|
|
45 text []rune
|
|
46 // the position in the original string where the first character of
|
|
47 // captured substring was found.
|
|
48 Index int
|
|
49 // the length of the captured substring.
|
|
50 Length int
|
|
51 }
|
|
52
|
|
53 // String returns the captured text as a String
|
|
54 func (c *Capture) String() string {
|
|
55 return string(c.text[c.Index : c.Index+c.Length])
|
|
56 }
|
|
57
|
|
58 // Runes returns the captured text as a rune slice
|
|
59 func (c *Capture) Runes() []rune {
|
|
60 return c.text[c.Index : c.Index+c.Length]
|
|
61 }
|
|
62
|
|
63 func newMatch(regex *Regexp, capcount int, text []rune, startpos int) *Match {
|
|
64 m := Match{
|
|
65 regex: regex,
|
|
66 matchcount: make([]int, capcount),
|
|
67 matches: make([][]int, capcount),
|
|
68 textstart: startpos,
|
|
69 balancing: false,
|
|
70 }
|
|
71 m.Name = "0"
|
|
72 m.text = text
|
|
73 m.matches[0] = make([]int, 2)
|
|
74 return &m
|
|
75 }
|
|
76
|
|
77 func newMatchSparse(regex *Regexp, caps map[int]int, capcount int, text []rune, startpos int) *Match {
|
|
78 m := newMatch(regex, capcount, text, startpos)
|
|
79 m.sparseCaps = caps
|
|
80 return m
|
|
81 }
|
|
82
|
|
83 func (m *Match) reset(text []rune, textstart int) {
|
|
84 m.text = text
|
|
85 m.textstart = textstart
|
|
86 for i := 0; i < len(m.matchcount); i++ {
|
|
87 m.matchcount[i] = 0
|
|
88 }
|
|
89 m.balancing = false
|
|
90 }
|
|
91
|
|
92 func (m *Match) tidy(textpos int) {
|
|
93
|
|
94 interval := m.matches[0]
|
|
95 m.Index = interval[0]
|
|
96 m.Length = interval[1]
|
|
97 m.textpos = textpos
|
|
98 m.capcount = m.matchcount[0]
|
|
99 //copy our root capture to the list
|
|
100 m.Group.Captures = []Capture{m.Group.Capture}
|
|
101
|
|
102 if m.balancing {
|
|
103 // The idea here is that we want to compact all of our unbalanced captures. To do that we
|
|
104 // use j basically as a count of how many unbalanced captures we have at any given time
|
|
105 // (really j is an index, but j/2 is the count). First we skip past all of the real captures
|
|
106 // until we find a balance captures. Then we check each subsequent entry. If it's a balance
|
|
107 // capture (it's negative), we decrement j. If it's a real capture, we increment j and copy
|
|
108 // it down to the last free position.
|
|
109 for cap := 0; cap < len(m.matchcount); cap++ {
|
|
110 limit := m.matchcount[cap] * 2
|
|
111 matcharray := m.matches[cap]
|
|
112
|
|
113 var i, j int
|
|
114
|
|
115 for i = 0; i < limit; i++ {
|
|
116 if matcharray[i] < 0 {
|
|
117 break
|
|
118 }
|
|
119 }
|
|
120
|
|
121 for j = i; i < limit; i++ {
|
|
122 if matcharray[i] < 0 {
|
|
123 // skip negative values
|
|
124 j--
|
|
125 } else {
|
|
126 // but if we find something positive (an actual capture), copy it back to the last
|
|
127 // unbalanced position.
|
|
128 if i != j {
|
|
129 matcharray[j] = matcharray[i]
|
|
130 }
|
|
131 j++
|
|
132 }
|
|
133 }
|
|
134
|
|
135 m.matchcount[cap] = j / 2
|
|
136 }
|
|
137
|
|
138 m.balancing = false
|
|
139 }
|
|
140 }
|
|
141
|
|
142 // isMatched tells if a group was matched by capnum
|
|
143 func (m *Match) isMatched(cap int) bool {
|
|
144 return cap < len(m.matchcount) && m.matchcount[cap] > 0 && m.matches[cap][m.matchcount[cap]*2-1] != (-3+1)
|
|
145 }
|
|
146
|
|
147 // matchIndex returns the index of the last specified matched group by capnum
|
|
148 func (m *Match) matchIndex(cap int) int {
|
|
149 i := m.matches[cap][m.matchcount[cap]*2-2]
|
|
150 if i >= 0 {
|
|
151 return i
|
|
152 }
|
|
153
|
|
154 return m.matches[cap][-3-i]
|
|
155 }
|
|
156
|
|
157 // matchLength returns the length of the last specified matched group by capnum
|
|
158 func (m *Match) matchLength(cap int) int {
|
|
159 i := m.matches[cap][m.matchcount[cap]*2-1]
|
|
160 if i >= 0 {
|
|
161 return i
|
|
162 }
|
|
163
|
|
164 return m.matches[cap][-3-i]
|
|
165 }
|
|
166
|
|
167 // Nonpublic builder: add a capture to the group specified by "c"
|
|
168 func (m *Match) addMatch(c, start, l int) {
|
|
169
|
|
170 if m.matches[c] == nil {
|
|
171 m.matches[c] = make([]int, 2)
|
|
172 }
|
|
173
|
|
174 capcount := m.matchcount[c]
|
|
175
|
|
176 if capcount*2+2 > len(m.matches[c]) {
|
|
177 oldmatches := m.matches[c]
|
|
178 newmatches := make([]int, capcount*8)
|
|
179 copy(newmatches, oldmatches[:capcount*2])
|
|
180 m.matches[c] = newmatches
|
|
181 }
|
|
182
|
|
183 m.matches[c][capcount*2] = start
|
|
184 m.matches[c][capcount*2+1] = l
|
|
185 m.matchcount[c] = capcount + 1
|
|
186 //log.Printf("addMatch: c=%v, i=%v, l=%v ... matches: %v", c, start, l, m.matches)
|
|
187 }
|
|
188
|
|
189 // Nonpublic builder: Add a capture to balance the specified group. This is used by the
|
|
190 // balanced match construct. (?<foo-foo2>...)
|
|
191 //
|
|
192 // If there were no such thing as backtracking, this would be as simple as calling RemoveMatch(c).
|
|
193 // However, since we have backtracking, we need to keep track of everything.
|
|
194 func (m *Match) balanceMatch(c int) {
|
|
195 m.balancing = true
|
|
196
|
|
197 // we'll look at the last capture first
|
|
198 capcount := m.matchcount[c]
|
|
199 target := capcount*2 - 2
|
|
200
|
|
201 // first see if it is negative, and therefore is a reference to the next available
|
|
202 // capture group for balancing. If it is, we'll reset target to point to that capture.
|
|
203 if m.matches[c][target] < 0 {
|
|
204 target = -3 - m.matches[c][target]
|
|
205 }
|
|
206
|
|
207 // move back to the previous capture
|
|
208 target -= 2
|
|
209
|
|
210 // if the previous capture is a reference, just copy that reference to the end. Otherwise, point to it.
|
|
211 if target >= 0 && m.matches[c][target] < 0 {
|
|
212 m.addMatch(c, m.matches[c][target], m.matches[c][target+1])
|
|
213 } else {
|
|
214 m.addMatch(c, -3-target, -4-target /* == -3 - (target + 1) */)
|
|
215 }
|
|
216 }
|
|
217
|
|
218 // Nonpublic builder: removes a group match by capnum
|
|
219 func (m *Match) removeMatch(c int) {
|
|
220 m.matchcount[c]--
|
|
221 }
|
|
222
|
|
223 // GroupCount returns the number of groups this match has matched
|
|
224 func (m *Match) GroupCount() int {
|
|
225 return len(m.matchcount)
|
|
226 }
|
|
227
|
|
228 // GroupByName returns a group based on the name of the group, or nil if the group name does not exist
|
|
229 func (m *Match) GroupByName(name string) *Group {
|
|
230 num := m.regex.GroupNumberFromName(name)
|
|
231 if num < 0 {
|
|
232 return nil
|
|
233 }
|
|
234 return m.GroupByNumber(num)
|
|
235 }
|
|
236
|
|
237 // GroupByNumber returns a group based on the number of the group, or nil if the group number does not exist
|
|
238 func (m *Match) GroupByNumber(num int) *Group {
|
|
239 // check our sparse map
|
|
240 if m.sparseCaps != nil {
|
|
241 if newNum, ok := m.sparseCaps[num]; ok {
|
|
242 num = newNum
|
|
243 }
|
|
244 }
|
|
245 if num >= len(m.matchcount) || num < 0 {
|
|
246 return nil
|
|
247 }
|
|
248
|
|
249 if num == 0 {
|
|
250 return &m.Group
|
|
251 }
|
|
252
|
|
253 m.populateOtherGroups()
|
|
254
|
|
255 return &m.otherGroups[num-1]
|
|
256 }
|
|
257
|
|
258 // Groups returns all the capture groups, starting with group 0 (the full match)
|
|
259 func (m *Match) Groups() []Group {
|
|
260 m.populateOtherGroups()
|
|
261 g := make([]Group, len(m.otherGroups)+1)
|
|
262 g[0] = m.Group
|
|
263 copy(g[1:], m.otherGroups)
|
|
264 return g
|
|
265 }
|
|
266
|
|
267 func (m *Match) populateOtherGroups() {
|
|
268 // Construct all the Group objects first time called
|
|
269 if m.otherGroups == nil {
|
|
270 m.otherGroups = make([]Group, len(m.matchcount)-1)
|
|
271 for i := 0; i < len(m.otherGroups); i++ {
|
|
272 m.otherGroups[i] = newGroup(m.regex.GroupNameFromNumber(i+1), m.text, m.matches[i+1], m.matchcount[i+1])
|
|
273 }
|
|
274 }
|
|
275 }
|
|
276
|
|
277 func (m *Match) groupValueAppendToBuf(groupnum int, buf *bytes.Buffer) {
|
|
278 c := m.matchcount[groupnum]
|
|
279 if c == 0 {
|
|
280 return
|
|
281 }
|
|
282
|
|
283 matches := m.matches[groupnum]
|
|
284
|
|
285 index := matches[(c-1)*2]
|
|
286 last := index + matches[(c*2)-1]
|
|
287
|
|
288 for ; index < last; index++ {
|
|
289 buf.WriteRune(m.text[index])
|
|
290 }
|
|
291 }
|
|
292
|
|
293 func newGroup(name string, text []rune, caps []int, capcount int) Group {
|
|
294 g := Group{}
|
|
295 g.text = text
|
|
296 if capcount > 0 {
|
|
297 g.Index = caps[(capcount-1)*2]
|
|
298 g.Length = caps[(capcount*2)-1]
|
|
299 }
|
|
300 g.Name = name
|
|
301 g.Captures = make([]Capture, capcount)
|
|
302 for i := 0; i < capcount; i++ {
|
|
303 g.Captures[i] = Capture{
|
|
304 text: text,
|
|
305 Index: caps[i*2],
|
|
306 Length: caps[i*2+1],
|
|
307 }
|
|
308 }
|
|
309 //log.Printf("newGroup! capcount %v, %+v", capcount, g)
|
|
310
|
|
311 return g
|
|
312 }
|
|
313
|
|
314 func (m *Match) dump() string {
|
|
315 buf := &bytes.Buffer{}
|
|
316 buf.WriteRune('\n')
|
|
317 if len(m.sparseCaps) > 0 {
|
|
318 for k, v := range m.sparseCaps {
|
|
319 fmt.Fprintf(buf, "Slot %v -> %v\n", k, v)
|
|
320 }
|
|
321 }
|
|
322
|
|
323 for i, g := range m.Groups() {
|
|
324 fmt.Fprintf(buf, "Group %v (%v), %v caps:\n", i, g.Name, len(g.Captures))
|
|
325
|
|
326 for _, c := range g.Captures {
|
|
327 fmt.Fprintf(buf, " (%v, %v) %v\n", c.Index, c.Length, c.String())
|
|
328 }
|
|
329 }
|
|
330 /*
|
|
331 for i := 0; i < len(m.matchcount); i++ {
|
|
332 fmt.Fprintf(buf, "\nGroup %v (%v):\n", i, m.regex.GroupNameFromNumber(i))
|
|
333
|
|
334 for j := 0; j < m.matchcount[i]; j++ {
|
|
335 text := ""
|
|
336
|
|
337 if m.matches[i][j*2] >= 0 {
|
|
338 start := m.matches[i][j*2]
|
|
339 text = m.text[start : start+m.matches[i][j*2+1]]
|
|
340 }
|
|
341
|
|
342 fmt.Fprintf(buf, " (%v, %v) %v\n", m.matches[i][j*2], m.matches[i][j*2+1], text)
|
|
343 }
|
|
344 }
|
|
345 */
|
|
346 return buf.String()
|
|
347 }
|