Mercurial > yakumo_izuru > aya
diff vendor/github.com/dlclark/regexp2/syntax/code.go @ 66:787b5ee0289d draft
Use vendored modules
Signed-off-by: Izuru Yakumo <yakumo.izuru@chaotic.ninja>
author | yakumo.izuru |
---|---|
date | Sun, 23 Jul 2023 13:18:53 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vendor/github.com/dlclark/regexp2/syntax/code.go Sun Jul 23 13:18:53 2023 +0000 @@ -0,0 +1,274 @@ +package syntax + +import ( + "bytes" + "fmt" + "math" +) + +// similar to prog.go in the go regex package...also with comment 'may not belong in this package' + +// File provides operator constants for use by the Builder and the Machine. + +// Implementation notes: +// +// Regexps are built into RegexCodes, which contain an operation array, +// a string table, and some constants. +// +// Each operation is one of the codes below, followed by the integer +// operands specified for each op. +// +// Strings and sets are indices into a string table. + +type InstOp int + +const ( + // lef/back operands description + + Onerep InstOp = 0 // lef,back char,min,max a {n} + Notonerep = 1 // lef,back char,min,max .{n} + Setrep = 2 // lef,back set,min,max [\d]{n} + + Oneloop = 3 // lef,back char,min,max a {,n} + Notoneloop = 4 // lef,back char,min,max .{,n} + Setloop = 5 // lef,back set,min,max [\d]{,n} + + Onelazy = 6 // lef,back char,min,max a {,n}? + Notonelazy = 7 // lef,back char,min,max .{,n}? + Setlazy = 8 // lef,back set,min,max [\d]{,n}? + + One = 9 // lef char a + Notone = 10 // lef char [^a] + Set = 11 // lef set [a-z\s] \w \s \d + + Multi = 12 // lef string abcd + Ref = 13 // lef group \# + + Bol = 14 // ^ + Eol = 15 // $ + Boundary = 16 // \b + Nonboundary = 17 // \B + Beginning = 18 // \A + Start = 19 // \G + EndZ = 20 // \Z + End = 21 // \Z + + Nothing = 22 // Reject! + + // Primitive control structures + + Lazybranch = 23 // back jump straight first + Branchmark = 24 // back jump branch first for loop + Lazybranchmark = 25 // back jump straight first for loop + Nullcount = 26 // back val set counter, null mark + Setcount = 27 // back val set counter, make mark + Branchcount = 28 // back jump,limit branch++ if zero<=c<limit + Lazybranchcount = 29 // back jump,limit same, but straight first + Nullmark = 30 // back save position + Setmark = 31 // back save position + Capturemark = 32 // back group define group + Getmark = 33 // back recall position + Setjump = 34 // back save backtrack state + Backjump = 35 // zap back to saved state + Forejump = 36 // zap backtracking state + Testref = 37 // backtrack if ref undefined + Goto = 38 // jump just go + + Prune = 39 // prune it baby + Stop = 40 // done! + + ECMABoundary = 41 // \b + NonECMABoundary = 42 // \B + + // Modifiers for alternate modes + + Mask = 63 // Mask to get unmodified ordinary operator + Rtl = 64 // bit to indicate that we're reverse scanning. + Back = 128 // bit to indicate that we're backtracking. + Back2 = 256 // bit to indicate that we're backtracking on a second branch. + Ci = 512 // bit to indicate that we're case-insensitive. +) + +type Code struct { + Codes []int // the code + Strings [][]rune // string table + Sets []*CharSet //character set table + TrackCount int // how many instructions use backtracking + Caps map[int]int // mapping of user group numbers -> impl group slots + Capsize int // number of impl group slots + FcPrefix *Prefix // the set of candidate first characters (may be null) + BmPrefix *BmPrefix // the fixed prefix string as a Boyer-Moore machine (may be null) + Anchors AnchorLoc // the set of zero-length start anchors (RegexFCD.Bol, etc) + RightToLeft bool // true if right to left +} + +func opcodeBacktracks(op InstOp) bool { + op &= Mask + + switch op { + case Oneloop, Notoneloop, Setloop, Onelazy, Notonelazy, Setlazy, Lazybranch, Branchmark, Lazybranchmark, + Nullcount, Setcount, Branchcount, Lazybranchcount, Setmark, Capturemark, Getmark, Setjump, Backjump, + Forejump, Goto: + return true + + default: + return false + } +} + +func opcodeSize(op InstOp) int { + op &= Mask + + switch op { + case Nothing, Bol, Eol, Boundary, Nonboundary, ECMABoundary, NonECMABoundary, Beginning, Start, EndZ, + End, Nullmark, Setmark, Getmark, Setjump, Backjump, Forejump, Stop: + return 1 + + case One, Notone, Multi, Ref, Testref, Goto, Nullcount, Setcount, Lazybranch, Branchmark, Lazybranchmark, + Prune, Set: + return 2 + + case Capturemark, Branchcount, Lazybranchcount, Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy, + Setlazy, Setrep, Setloop: + return 3 + + default: + panic(fmt.Errorf("Unexpected op code: %v", op)) + } +} + +var codeStr = []string{ + "Onerep", "Notonerep", "Setrep", + "Oneloop", "Notoneloop", "Setloop", + "Onelazy", "Notonelazy", "Setlazy", + "One", "Notone", "Set", + "Multi", "Ref", + "Bol", "Eol", "Boundary", "Nonboundary", "Beginning", "Start", "EndZ", "End", + "Nothing", + "Lazybranch", "Branchmark", "Lazybranchmark", + "Nullcount", "Setcount", "Branchcount", "Lazybranchcount", + "Nullmark", "Setmark", "Capturemark", "Getmark", + "Setjump", "Backjump", "Forejump", "Testref", "Goto", + "Prune", "Stop", + "ECMABoundary", "NonECMABoundary", +} + +func operatorDescription(op InstOp) string { + desc := codeStr[op&Mask] + if (op & Ci) != 0 { + desc += "-Ci" + } + if (op & Rtl) != 0 { + desc += "-Rtl" + } + if (op & Back) != 0 { + desc += "-Back" + } + if (op & Back2) != 0 { + desc += "-Back2" + } + + return desc +} + +// OpcodeDescription is a humman readable string of the specific offset +func (c *Code) OpcodeDescription(offset int) string { + buf := &bytes.Buffer{} + + op := InstOp(c.Codes[offset]) + fmt.Fprintf(buf, "%06d ", offset) + + if opcodeBacktracks(op & Mask) { + buf.WriteString("*") + } else { + buf.WriteString(" ") + } + buf.WriteString(operatorDescription(op)) + buf.WriteString("(") + op &= Mask + + switch op { + case One, Notone, Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy: + buf.WriteString("Ch = ") + buf.WriteString(CharDescription(rune(c.Codes[offset+1]))) + + case Set, Setrep, Setloop, Setlazy: + buf.WriteString("Set = ") + buf.WriteString(c.Sets[c.Codes[offset+1]].String()) + + case Multi: + fmt.Fprintf(buf, "String = %s", string(c.Strings[c.Codes[offset+1]])) + + case Ref, Testref: + fmt.Fprintf(buf, "Index = %d", c.Codes[offset+1]) + + case Capturemark: + fmt.Fprintf(buf, "Index = %d", c.Codes[offset+1]) + if c.Codes[offset+2] != -1 { + fmt.Fprintf(buf, ", Unindex = %d", c.Codes[offset+2]) + } + + case Nullcount, Setcount: + fmt.Fprintf(buf, "Value = %d", c.Codes[offset+1]) + + case Goto, Lazybranch, Branchmark, Lazybranchmark, Branchcount, Lazybranchcount: + fmt.Fprintf(buf, "Addr = %d", c.Codes[offset+1]) + } + + switch op { + case Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy, Setrep, Setloop, Setlazy: + buf.WriteString(", Rep = ") + if c.Codes[offset+2] == math.MaxInt32 { + buf.WriteString("inf") + } else { + fmt.Fprintf(buf, "%d", c.Codes[offset+2]) + } + + case Branchcount, Lazybranchcount: + buf.WriteString(", Limit = ") + if c.Codes[offset+2] == math.MaxInt32 { + buf.WriteString("inf") + } else { + fmt.Fprintf(buf, "%d", c.Codes[offset+2]) + } + + } + + buf.WriteString(")") + + return buf.String() +} + +func (c *Code) Dump() string { + buf := &bytes.Buffer{} + + if c.RightToLeft { + fmt.Fprintln(buf, "Direction: right-to-left") + } else { + fmt.Fprintln(buf, "Direction: left-to-right") + } + if c.FcPrefix == nil { + fmt.Fprintln(buf, "Firstchars: n/a") + } else { + fmt.Fprintf(buf, "Firstchars: %v\n", c.FcPrefix.PrefixSet.String()) + } + + if c.BmPrefix == nil { + fmt.Fprintln(buf, "Prefix: n/a") + } else { + fmt.Fprintf(buf, "Prefix: %v\n", Escape(c.BmPrefix.String())) + } + + fmt.Fprintf(buf, "Anchors: %v\n", c.Anchors) + fmt.Fprintln(buf) + + if c.BmPrefix != nil { + fmt.Fprintln(buf, "BoyerMoore:") + fmt.Fprintln(buf, c.BmPrefix.Dump(" ")) + } + for i := 0; i < len(c.Codes); i += opcodeSize(InstOp(c.Codes[i])) { + fmt.Fprintln(buf, c.OpcodeDescription(i)) + } + + return buf.String() +}