Mercurial > yakumo_izuru > aya
diff vendor/github.com/dlclark/regexp2/match.go @ 66:787b5ee0289d draft
Use vendored modules
Signed-off-by: Izuru Yakumo <yakumo.izuru@chaotic.ninja>
author | yakumo.izuru |
---|---|
date | Sun, 23 Jul 2023 13:18:53 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vendor/github.com/dlclark/regexp2/match.go Sun Jul 23 13:18:53 2023 +0000 @@ -0,0 +1,347 @@ +package regexp2 + +import ( + "bytes" + "fmt" +) + +// Match is a single regex result match that contains groups and repeated captures +// -Groups +// -Capture +type Match struct { + Group //embeded group 0 + + regex *Regexp + otherGroups []Group + + // input to the match + textpos int + textstart int + + capcount int + caps []int + sparseCaps map[int]int + + // output from the match + matches [][]int + matchcount []int + + // whether we've done any balancing with this match. If we + // have done balancing, we'll need to do extra work in Tidy(). + balancing bool +} + +// Group is an explicit or implit (group 0) matched group within the pattern +type Group struct { + Capture // the last capture of this group is embeded for ease of use + + Name string // group name + Captures []Capture // captures of this group +} + +// Capture is a single capture of text within the larger original string +type Capture struct { + // the original string + text []rune + // the position in the original string where the first character of + // captured substring was found. + Index int + // the length of the captured substring. + Length int +} + +// String returns the captured text as a String +func (c *Capture) String() string { + return string(c.text[c.Index : c.Index+c.Length]) +} + +// Runes returns the captured text as a rune slice +func (c *Capture) Runes() []rune { + return c.text[c.Index : c.Index+c.Length] +} + +func newMatch(regex *Regexp, capcount int, text []rune, startpos int) *Match { + m := Match{ + regex: regex, + matchcount: make([]int, capcount), + matches: make([][]int, capcount), + textstart: startpos, + balancing: false, + } + m.Name = "0" + m.text = text + m.matches[0] = make([]int, 2) + return &m +} + +func newMatchSparse(regex *Regexp, caps map[int]int, capcount int, text []rune, startpos int) *Match { + m := newMatch(regex, capcount, text, startpos) + m.sparseCaps = caps + return m +} + +func (m *Match) reset(text []rune, textstart int) { + m.text = text + m.textstart = textstart + for i := 0; i < len(m.matchcount); i++ { + m.matchcount[i] = 0 + } + m.balancing = false +} + +func (m *Match) tidy(textpos int) { + + interval := m.matches[0] + m.Index = interval[0] + m.Length = interval[1] + m.textpos = textpos + m.capcount = m.matchcount[0] + //copy our root capture to the list + m.Group.Captures = []Capture{m.Group.Capture} + + if m.balancing { + // The idea here is that we want to compact all of our unbalanced captures. To do that we + // use j basically as a count of how many unbalanced captures we have at any given time + // (really j is an index, but j/2 is the count). First we skip past all of the real captures + // until we find a balance captures. Then we check each subsequent entry. If it's a balance + // capture (it's negative), we decrement j. If it's a real capture, we increment j and copy + // it down to the last free position. + for cap := 0; cap < len(m.matchcount); cap++ { + limit := m.matchcount[cap] * 2 + matcharray := m.matches[cap] + + var i, j int + + for i = 0; i < limit; i++ { + if matcharray[i] < 0 { + break + } + } + + for j = i; i < limit; i++ { + if matcharray[i] < 0 { + // skip negative values + j-- + } else { + // but if we find something positive (an actual capture), copy it back to the last + // unbalanced position. + if i != j { + matcharray[j] = matcharray[i] + } + j++ + } + } + + m.matchcount[cap] = j / 2 + } + + m.balancing = false + } +} + +// isMatched tells if a group was matched by capnum +func (m *Match) isMatched(cap int) bool { + return cap < len(m.matchcount) && m.matchcount[cap] > 0 && m.matches[cap][m.matchcount[cap]*2-1] != (-3+1) +} + +// matchIndex returns the index of the last specified matched group by capnum +func (m *Match) matchIndex(cap int) int { + i := m.matches[cap][m.matchcount[cap]*2-2] + if i >= 0 { + return i + } + + return m.matches[cap][-3-i] +} + +// matchLength returns the length of the last specified matched group by capnum +func (m *Match) matchLength(cap int) int { + i := m.matches[cap][m.matchcount[cap]*2-1] + if i >= 0 { + return i + } + + return m.matches[cap][-3-i] +} + +// Nonpublic builder: add a capture to the group specified by "c" +func (m *Match) addMatch(c, start, l int) { + + if m.matches[c] == nil { + m.matches[c] = make([]int, 2) + } + + capcount := m.matchcount[c] + + if capcount*2+2 > len(m.matches[c]) { + oldmatches := m.matches[c] + newmatches := make([]int, capcount*8) + copy(newmatches, oldmatches[:capcount*2]) + m.matches[c] = newmatches + } + + m.matches[c][capcount*2] = start + m.matches[c][capcount*2+1] = l + m.matchcount[c] = capcount + 1 + //log.Printf("addMatch: c=%v, i=%v, l=%v ... matches: %v", c, start, l, m.matches) +} + +// Nonpublic builder: Add a capture to balance the specified group. This is used by the +// balanced match construct. (?<foo-foo2>...) +// +// If there were no such thing as backtracking, this would be as simple as calling RemoveMatch(c). +// However, since we have backtracking, we need to keep track of everything. +func (m *Match) balanceMatch(c int) { + m.balancing = true + + // we'll look at the last capture first + capcount := m.matchcount[c] + target := capcount*2 - 2 + + // first see if it is negative, and therefore is a reference to the next available + // capture group for balancing. If it is, we'll reset target to point to that capture. + if m.matches[c][target] < 0 { + target = -3 - m.matches[c][target] + } + + // move back to the previous capture + target -= 2 + + // if the previous capture is a reference, just copy that reference to the end. Otherwise, point to it. + if target >= 0 && m.matches[c][target] < 0 { + m.addMatch(c, m.matches[c][target], m.matches[c][target+1]) + } else { + m.addMatch(c, -3-target, -4-target /* == -3 - (target + 1) */) + } +} + +// Nonpublic builder: removes a group match by capnum +func (m *Match) removeMatch(c int) { + m.matchcount[c]-- +} + +// GroupCount returns the number of groups this match has matched +func (m *Match) GroupCount() int { + return len(m.matchcount) +} + +// GroupByName returns a group based on the name of the group, or nil if the group name does not exist +func (m *Match) GroupByName(name string) *Group { + num := m.regex.GroupNumberFromName(name) + if num < 0 { + return nil + } + return m.GroupByNumber(num) +} + +// GroupByNumber returns a group based on the number of the group, or nil if the group number does not exist +func (m *Match) GroupByNumber(num int) *Group { + // check our sparse map + if m.sparseCaps != nil { + if newNum, ok := m.sparseCaps[num]; ok { + num = newNum + } + } + if num >= len(m.matchcount) || num < 0 { + return nil + } + + if num == 0 { + return &m.Group + } + + m.populateOtherGroups() + + return &m.otherGroups[num-1] +} + +// Groups returns all the capture groups, starting with group 0 (the full match) +func (m *Match) Groups() []Group { + m.populateOtherGroups() + g := make([]Group, len(m.otherGroups)+1) + g[0] = m.Group + copy(g[1:], m.otherGroups) + return g +} + +func (m *Match) populateOtherGroups() { + // Construct all the Group objects first time called + if m.otherGroups == nil { + m.otherGroups = make([]Group, len(m.matchcount)-1) + for i := 0; i < len(m.otherGroups); i++ { + m.otherGroups[i] = newGroup(m.regex.GroupNameFromNumber(i+1), m.text, m.matches[i+1], m.matchcount[i+1]) + } + } +} + +func (m *Match) groupValueAppendToBuf(groupnum int, buf *bytes.Buffer) { + c := m.matchcount[groupnum] + if c == 0 { + return + } + + matches := m.matches[groupnum] + + index := matches[(c-1)*2] + last := index + matches[(c*2)-1] + + for ; index < last; index++ { + buf.WriteRune(m.text[index]) + } +} + +func newGroup(name string, text []rune, caps []int, capcount int) Group { + g := Group{} + g.text = text + if capcount > 0 { + g.Index = caps[(capcount-1)*2] + g.Length = caps[(capcount*2)-1] + } + g.Name = name + g.Captures = make([]Capture, capcount) + for i := 0; i < capcount; i++ { + g.Captures[i] = Capture{ + text: text, + Index: caps[i*2], + Length: caps[i*2+1], + } + } + //log.Printf("newGroup! capcount %v, %+v", capcount, g) + + return g +} + +func (m *Match) dump() string { + buf := &bytes.Buffer{} + buf.WriteRune('\n') + if len(m.sparseCaps) > 0 { + for k, v := range m.sparseCaps { + fmt.Fprintf(buf, "Slot %v -> %v\n", k, v) + } + } + + for i, g := range m.Groups() { + fmt.Fprintf(buf, "Group %v (%v), %v caps:\n", i, g.Name, len(g.Captures)) + + for _, c := range g.Captures { + fmt.Fprintf(buf, " (%v, %v) %v\n", c.Index, c.Length, c.String()) + } + } + /* + for i := 0; i < len(m.matchcount); i++ { + fmt.Fprintf(buf, "\nGroup %v (%v):\n", i, m.regex.GroupNameFromNumber(i)) + + for j := 0; j < m.matchcount[i]; j++ { + text := "" + + if m.matches[i][j*2] >= 0 { + start := m.matches[i][j*2] + text = m.text[start : start+m.matches[i][j*2+1]] + } + + fmt.Fprintf(buf, " (%v, %v) %v\n", m.matches[i][j*2], m.matches[i][j*2+1], text) + } + } + */ + return buf.String() +}