Mercurial > yakumo_izuru > aya
comparison vendor/github.com/dlclark/regexp2/match.go @ 66:787b5ee0289d draft
Use vendored modules
Signed-off-by: Izuru Yakumo <yakumo.izuru@chaotic.ninja>
| author | yakumo.izuru |
|---|---|
| date | Sun, 23 Jul 2023 13:18:53 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 65:6d985efa0f7a | 66:787b5ee0289d |
|---|---|
| 1 package regexp2 | |
| 2 | |
| 3 import ( | |
| 4 "bytes" | |
| 5 "fmt" | |
| 6 ) | |
| 7 | |
| 8 // Match is a single regex result match that contains groups and repeated captures | |
| 9 // -Groups | |
| 10 // -Capture | |
| 11 type Match struct { | |
| 12 Group //embeded group 0 | |
| 13 | |
| 14 regex *Regexp | |
| 15 otherGroups []Group | |
| 16 | |
| 17 // input to the match | |
| 18 textpos int | |
| 19 textstart int | |
| 20 | |
| 21 capcount int | |
| 22 caps []int | |
| 23 sparseCaps map[int]int | |
| 24 | |
| 25 // output from the match | |
| 26 matches [][]int | |
| 27 matchcount []int | |
| 28 | |
| 29 // whether we've done any balancing with this match. If we | |
| 30 // have done balancing, we'll need to do extra work in Tidy(). | |
| 31 balancing bool | |
| 32 } | |
| 33 | |
| 34 // Group is an explicit or implit (group 0) matched group within the pattern | |
| 35 type Group struct { | |
| 36 Capture // the last capture of this group is embeded for ease of use | |
| 37 | |
| 38 Name string // group name | |
| 39 Captures []Capture // captures of this group | |
| 40 } | |
| 41 | |
| 42 // Capture is a single capture of text within the larger original string | |
| 43 type Capture struct { | |
| 44 // the original string | |
| 45 text []rune | |
| 46 // the position in the original string where the first character of | |
| 47 // captured substring was found. | |
| 48 Index int | |
| 49 // the length of the captured substring. | |
| 50 Length int | |
| 51 } | |
| 52 | |
| 53 // String returns the captured text as a String | |
| 54 func (c *Capture) String() string { | |
| 55 return string(c.text[c.Index : c.Index+c.Length]) | |
| 56 } | |
| 57 | |
| 58 // Runes returns the captured text as a rune slice | |
| 59 func (c *Capture) Runes() []rune { | |
| 60 return c.text[c.Index : c.Index+c.Length] | |
| 61 } | |
| 62 | |
| 63 func newMatch(regex *Regexp, capcount int, text []rune, startpos int) *Match { | |
| 64 m := Match{ | |
| 65 regex: regex, | |
| 66 matchcount: make([]int, capcount), | |
| 67 matches: make([][]int, capcount), | |
| 68 textstart: startpos, | |
| 69 balancing: false, | |
| 70 } | |
| 71 m.Name = "0" | |
| 72 m.text = text | |
| 73 m.matches[0] = make([]int, 2) | |
| 74 return &m | |
| 75 } | |
| 76 | |
| 77 func newMatchSparse(regex *Regexp, caps map[int]int, capcount int, text []rune, startpos int) *Match { | |
| 78 m := newMatch(regex, capcount, text, startpos) | |
| 79 m.sparseCaps = caps | |
| 80 return m | |
| 81 } | |
| 82 | |
| 83 func (m *Match) reset(text []rune, textstart int) { | |
| 84 m.text = text | |
| 85 m.textstart = textstart | |
| 86 for i := 0; i < len(m.matchcount); i++ { | |
| 87 m.matchcount[i] = 0 | |
| 88 } | |
| 89 m.balancing = false | |
| 90 } | |
| 91 | |
| 92 func (m *Match) tidy(textpos int) { | |
| 93 | |
| 94 interval := m.matches[0] | |
| 95 m.Index = interval[0] | |
| 96 m.Length = interval[1] | |
| 97 m.textpos = textpos | |
| 98 m.capcount = m.matchcount[0] | |
| 99 //copy our root capture to the list | |
| 100 m.Group.Captures = []Capture{m.Group.Capture} | |
| 101 | |
| 102 if m.balancing { | |
| 103 // The idea here is that we want to compact all of our unbalanced captures. To do that we | |
| 104 // use j basically as a count of how many unbalanced captures we have at any given time | |
| 105 // (really j is an index, but j/2 is the count). First we skip past all of the real captures | |
| 106 // until we find a balance captures. Then we check each subsequent entry. If it's a balance | |
| 107 // capture (it's negative), we decrement j. If it's a real capture, we increment j and copy | |
| 108 // it down to the last free position. | |
| 109 for cap := 0; cap < len(m.matchcount); cap++ { | |
| 110 limit := m.matchcount[cap] * 2 | |
| 111 matcharray := m.matches[cap] | |
| 112 | |
| 113 var i, j int | |
| 114 | |
| 115 for i = 0; i < limit; i++ { | |
| 116 if matcharray[i] < 0 { | |
| 117 break | |
| 118 } | |
| 119 } | |
| 120 | |
| 121 for j = i; i < limit; i++ { | |
| 122 if matcharray[i] < 0 { | |
| 123 // skip negative values | |
| 124 j-- | |
| 125 } else { | |
| 126 // but if we find something positive (an actual capture), copy it back to the last | |
| 127 // unbalanced position. | |
| 128 if i != j { | |
| 129 matcharray[j] = matcharray[i] | |
| 130 } | |
| 131 j++ | |
| 132 } | |
| 133 } | |
| 134 | |
| 135 m.matchcount[cap] = j / 2 | |
| 136 } | |
| 137 | |
| 138 m.balancing = false | |
| 139 } | |
| 140 } | |
| 141 | |
| 142 // isMatched tells if a group was matched by capnum | |
| 143 func (m *Match) isMatched(cap int) bool { | |
| 144 return cap < len(m.matchcount) && m.matchcount[cap] > 0 && m.matches[cap][m.matchcount[cap]*2-1] != (-3+1) | |
| 145 } | |
| 146 | |
| 147 // matchIndex returns the index of the last specified matched group by capnum | |
| 148 func (m *Match) matchIndex(cap int) int { | |
| 149 i := m.matches[cap][m.matchcount[cap]*2-2] | |
| 150 if i >= 0 { | |
| 151 return i | |
| 152 } | |
| 153 | |
| 154 return m.matches[cap][-3-i] | |
| 155 } | |
| 156 | |
| 157 // matchLength returns the length of the last specified matched group by capnum | |
| 158 func (m *Match) matchLength(cap int) int { | |
| 159 i := m.matches[cap][m.matchcount[cap]*2-1] | |
| 160 if i >= 0 { | |
| 161 return i | |
| 162 } | |
| 163 | |
| 164 return m.matches[cap][-3-i] | |
| 165 } | |
| 166 | |
| 167 // Nonpublic builder: add a capture to the group specified by "c" | |
| 168 func (m *Match) addMatch(c, start, l int) { | |
| 169 | |
| 170 if m.matches[c] == nil { | |
| 171 m.matches[c] = make([]int, 2) | |
| 172 } | |
| 173 | |
| 174 capcount := m.matchcount[c] | |
| 175 | |
| 176 if capcount*2+2 > len(m.matches[c]) { | |
| 177 oldmatches := m.matches[c] | |
| 178 newmatches := make([]int, capcount*8) | |
| 179 copy(newmatches, oldmatches[:capcount*2]) | |
| 180 m.matches[c] = newmatches | |
| 181 } | |
| 182 | |
| 183 m.matches[c][capcount*2] = start | |
| 184 m.matches[c][capcount*2+1] = l | |
| 185 m.matchcount[c] = capcount + 1 | |
| 186 //log.Printf("addMatch: c=%v, i=%v, l=%v ... matches: %v", c, start, l, m.matches) | |
| 187 } | |
| 188 | |
| 189 // Nonpublic builder: Add a capture to balance the specified group. This is used by the | |
| 190 // balanced match construct. (?<foo-foo2>...) | |
| 191 // | |
| 192 // If there were no such thing as backtracking, this would be as simple as calling RemoveMatch(c). | |
| 193 // However, since we have backtracking, we need to keep track of everything. | |
| 194 func (m *Match) balanceMatch(c int) { | |
| 195 m.balancing = true | |
| 196 | |
| 197 // we'll look at the last capture first | |
| 198 capcount := m.matchcount[c] | |
| 199 target := capcount*2 - 2 | |
| 200 | |
| 201 // first see if it is negative, and therefore is a reference to the next available | |
| 202 // capture group for balancing. If it is, we'll reset target to point to that capture. | |
| 203 if m.matches[c][target] < 0 { | |
| 204 target = -3 - m.matches[c][target] | |
| 205 } | |
| 206 | |
| 207 // move back to the previous capture | |
| 208 target -= 2 | |
| 209 | |
| 210 // if the previous capture is a reference, just copy that reference to the end. Otherwise, point to it. | |
| 211 if target >= 0 && m.matches[c][target] < 0 { | |
| 212 m.addMatch(c, m.matches[c][target], m.matches[c][target+1]) | |
| 213 } else { | |
| 214 m.addMatch(c, -3-target, -4-target /* == -3 - (target + 1) */) | |
| 215 } | |
| 216 } | |
| 217 | |
| 218 // Nonpublic builder: removes a group match by capnum | |
| 219 func (m *Match) removeMatch(c int) { | |
| 220 m.matchcount[c]-- | |
| 221 } | |
| 222 | |
| 223 // GroupCount returns the number of groups this match has matched | |
| 224 func (m *Match) GroupCount() int { | |
| 225 return len(m.matchcount) | |
| 226 } | |
| 227 | |
| 228 // GroupByName returns a group based on the name of the group, or nil if the group name does not exist | |
| 229 func (m *Match) GroupByName(name string) *Group { | |
| 230 num := m.regex.GroupNumberFromName(name) | |
| 231 if num < 0 { | |
| 232 return nil | |
| 233 } | |
| 234 return m.GroupByNumber(num) | |
| 235 } | |
| 236 | |
| 237 // GroupByNumber returns a group based on the number of the group, or nil if the group number does not exist | |
| 238 func (m *Match) GroupByNumber(num int) *Group { | |
| 239 // check our sparse map | |
| 240 if m.sparseCaps != nil { | |
| 241 if newNum, ok := m.sparseCaps[num]; ok { | |
| 242 num = newNum | |
| 243 } | |
| 244 } | |
| 245 if num >= len(m.matchcount) || num < 0 { | |
| 246 return nil | |
| 247 } | |
| 248 | |
| 249 if num == 0 { | |
| 250 return &m.Group | |
| 251 } | |
| 252 | |
| 253 m.populateOtherGroups() | |
| 254 | |
| 255 return &m.otherGroups[num-1] | |
| 256 } | |
| 257 | |
| 258 // Groups returns all the capture groups, starting with group 0 (the full match) | |
| 259 func (m *Match) Groups() []Group { | |
| 260 m.populateOtherGroups() | |
| 261 g := make([]Group, len(m.otherGroups)+1) | |
| 262 g[0] = m.Group | |
| 263 copy(g[1:], m.otherGroups) | |
| 264 return g | |
| 265 } | |
| 266 | |
| 267 func (m *Match) populateOtherGroups() { | |
| 268 // Construct all the Group objects first time called | |
| 269 if m.otherGroups == nil { | |
| 270 m.otherGroups = make([]Group, len(m.matchcount)-1) | |
| 271 for i := 0; i < len(m.otherGroups); i++ { | |
| 272 m.otherGroups[i] = newGroup(m.regex.GroupNameFromNumber(i+1), m.text, m.matches[i+1], m.matchcount[i+1]) | |
| 273 } | |
| 274 } | |
| 275 } | |
| 276 | |
| 277 func (m *Match) groupValueAppendToBuf(groupnum int, buf *bytes.Buffer) { | |
| 278 c := m.matchcount[groupnum] | |
| 279 if c == 0 { | |
| 280 return | |
| 281 } | |
| 282 | |
| 283 matches := m.matches[groupnum] | |
| 284 | |
| 285 index := matches[(c-1)*2] | |
| 286 last := index + matches[(c*2)-1] | |
| 287 | |
| 288 for ; index < last; index++ { | |
| 289 buf.WriteRune(m.text[index]) | |
| 290 } | |
| 291 } | |
| 292 | |
| 293 func newGroup(name string, text []rune, caps []int, capcount int) Group { | |
| 294 g := Group{} | |
| 295 g.text = text | |
| 296 if capcount > 0 { | |
| 297 g.Index = caps[(capcount-1)*2] | |
| 298 g.Length = caps[(capcount*2)-1] | |
| 299 } | |
| 300 g.Name = name | |
| 301 g.Captures = make([]Capture, capcount) | |
| 302 for i := 0; i < capcount; i++ { | |
| 303 g.Captures[i] = Capture{ | |
| 304 text: text, | |
| 305 Index: caps[i*2], | |
| 306 Length: caps[i*2+1], | |
| 307 } | |
| 308 } | |
| 309 //log.Printf("newGroup! capcount %v, %+v", capcount, g) | |
| 310 | |
| 311 return g | |
| 312 } | |
| 313 | |
| 314 func (m *Match) dump() string { | |
| 315 buf := &bytes.Buffer{} | |
| 316 buf.WriteRune('\n') | |
| 317 if len(m.sparseCaps) > 0 { | |
| 318 for k, v := range m.sparseCaps { | |
| 319 fmt.Fprintf(buf, "Slot %v -> %v\n", k, v) | |
| 320 } | |
| 321 } | |
| 322 | |
| 323 for i, g := range m.Groups() { | |
| 324 fmt.Fprintf(buf, "Group %v (%v), %v caps:\n", i, g.Name, len(g.Captures)) | |
| 325 | |
| 326 for _, c := range g.Captures { | |
| 327 fmt.Fprintf(buf, " (%v, %v) %v\n", c.Index, c.Length, c.String()) | |
| 328 } | |
| 329 } | |
| 330 /* | |
| 331 for i := 0; i < len(m.matchcount); i++ { | |
| 332 fmt.Fprintf(buf, "\nGroup %v (%v):\n", i, m.regex.GroupNameFromNumber(i)) | |
| 333 | |
| 334 for j := 0; j < m.matchcount[i]; j++ { | |
| 335 text := "" | |
| 336 | |
| 337 if m.matches[i][j*2] >= 0 { | |
| 338 start := m.matches[i][j*2] | |
| 339 text = m.text[start : start+m.matches[i][j*2+1]] | |
| 340 } | |
| 341 | |
| 342 fmt.Fprintf(buf, " (%v, %v) %v\n", m.matches[i][j*2], m.matches[i][j*2+1], text) | |
| 343 } | |
| 344 } | |
| 345 */ | |
| 346 return buf.String() | |
| 347 } |
