Mercurial > yakumo_izuru > aya
comparison vendor/github.com/dlclark/regexp2/syntax/code.go @ 66:787b5ee0289d draft
Use vendored modules
Signed-off-by: Izuru Yakumo <yakumo.izuru@chaotic.ninja>
author | yakumo.izuru |
---|---|
date | Sun, 23 Jul 2023 13:18:53 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
65:6d985efa0f7a | 66:787b5ee0289d |
---|---|
1 package syntax | |
2 | |
3 import ( | |
4 "bytes" | |
5 "fmt" | |
6 "math" | |
7 ) | |
8 | |
9 // similar to prog.go in the go regex package...also with comment 'may not belong in this package' | |
10 | |
11 // File provides operator constants for use by the Builder and the Machine. | |
12 | |
13 // Implementation notes: | |
14 // | |
15 // Regexps are built into RegexCodes, which contain an operation array, | |
16 // a string table, and some constants. | |
17 // | |
18 // Each operation is one of the codes below, followed by the integer | |
19 // operands specified for each op. | |
20 // | |
21 // Strings and sets are indices into a string table. | |
22 | |
23 type InstOp int | |
24 | |
25 const ( | |
26 // lef/back operands description | |
27 | |
28 Onerep InstOp = 0 // lef,back char,min,max a {n} | |
29 Notonerep = 1 // lef,back char,min,max .{n} | |
30 Setrep = 2 // lef,back set,min,max [\d]{n} | |
31 | |
32 Oneloop = 3 // lef,back char,min,max a {,n} | |
33 Notoneloop = 4 // lef,back char,min,max .{,n} | |
34 Setloop = 5 // lef,back set,min,max [\d]{,n} | |
35 | |
36 Onelazy = 6 // lef,back char,min,max a {,n}? | |
37 Notonelazy = 7 // lef,back char,min,max .{,n}? | |
38 Setlazy = 8 // lef,back set,min,max [\d]{,n}? | |
39 | |
40 One = 9 // lef char a | |
41 Notone = 10 // lef char [^a] | |
42 Set = 11 // lef set [a-z\s] \w \s \d | |
43 | |
44 Multi = 12 // lef string abcd | |
45 Ref = 13 // lef group \# | |
46 | |
47 Bol = 14 // ^ | |
48 Eol = 15 // $ | |
49 Boundary = 16 // \b | |
50 Nonboundary = 17 // \B | |
51 Beginning = 18 // \A | |
52 Start = 19 // \G | |
53 EndZ = 20 // \Z | |
54 End = 21 // \Z | |
55 | |
56 Nothing = 22 // Reject! | |
57 | |
58 // Primitive control structures | |
59 | |
60 Lazybranch = 23 // back jump straight first | |
61 Branchmark = 24 // back jump branch first for loop | |
62 Lazybranchmark = 25 // back jump straight first for loop | |
63 Nullcount = 26 // back val set counter, null mark | |
64 Setcount = 27 // back val set counter, make mark | |
65 Branchcount = 28 // back jump,limit branch++ if zero<=c<limit | |
66 Lazybranchcount = 29 // back jump,limit same, but straight first | |
67 Nullmark = 30 // back save position | |
68 Setmark = 31 // back save position | |
69 Capturemark = 32 // back group define group | |
70 Getmark = 33 // back recall position | |
71 Setjump = 34 // back save backtrack state | |
72 Backjump = 35 // zap back to saved state | |
73 Forejump = 36 // zap backtracking state | |
74 Testref = 37 // backtrack if ref undefined | |
75 Goto = 38 // jump just go | |
76 | |
77 Prune = 39 // prune it baby | |
78 Stop = 40 // done! | |
79 | |
80 ECMABoundary = 41 // \b | |
81 NonECMABoundary = 42 // \B | |
82 | |
83 // Modifiers for alternate modes | |
84 | |
85 Mask = 63 // Mask to get unmodified ordinary operator | |
86 Rtl = 64 // bit to indicate that we're reverse scanning. | |
87 Back = 128 // bit to indicate that we're backtracking. | |
88 Back2 = 256 // bit to indicate that we're backtracking on a second branch. | |
89 Ci = 512 // bit to indicate that we're case-insensitive. | |
90 ) | |
91 | |
92 type Code struct { | |
93 Codes []int // the code | |
94 Strings [][]rune // string table | |
95 Sets []*CharSet //character set table | |
96 TrackCount int // how many instructions use backtracking | |
97 Caps map[int]int // mapping of user group numbers -> impl group slots | |
98 Capsize int // number of impl group slots | |
99 FcPrefix *Prefix // the set of candidate first characters (may be null) | |
100 BmPrefix *BmPrefix // the fixed prefix string as a Boyer-Moore machine (may be null) | |
101 Anchors AnchorLoc // the set of zero-length start anchors (RegexFCD.Bol, etc) | |
102 RightToLeft bool // true if right to left | |
103 } | |
104 | |
105 func opcodeBacktracks(op InstOp) bool { | |
106 op &= Mask | |
107 | |
108 switch op { | |
109 case Oneloop, Notoneloop, Setloop, Onelazy, Notonelazy, Setlazy, Lazybranch, Branchmark, Lazybranchmark, | |
110 Nullcount, Setcount, Branchcount, Lazybranchcount, Setmark, Capturemark, Getmark, Setjump, Backjump, | |
111 Forejump, Goto: | |
112 return true | |
113 | |
114 default: | |
115 return false | |
116 } | |
117 } | |
118 | |
119 func opcodeSize(op InstOp) int { | |
120 op &= Mask | |
121 | |
122 switch op { | |
123 case Nothing, Bol, Eol, Boundary, Nonboundary, ECMABoundary, NonECMABoundary, Beginning, Start, EndZ, | |
124 End, Nullmark, Setmark, Getmark, Setjump, Backjump, Forejump, Stop: | |
125 return 1 | |
126 | |
127 case One, Notone, Multi, Ref, Testref, Goto, Nullcount, Setcount, Lazybranch, Branchmark, Lazybranchmark, | |
128 Prune, Set: | |
129 return 2 | |
130 | |
131 case Capturemark, Branchcount, Lazybranchcount, Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy, | |
132 Setlazy, Setrep, Setloop: | |
133 return 3 | |
134 | |
135 default: | |
136 panic(fmt.Errorf("Unexpected op code: %v", op)) | |
137 } | |
138 } | |
139 | |
140 var codeStr = []string{ | |
141 "Onerep", "Notonerep", "Setrep", | |
142 "Oneloop", "Notoneloop", "Setloop", | |
143 "Onelazy", "Notonelazy", "Setlazy", | |
144 "One", "Notone", "Set", | |
145 "Multi", "Ref", | |
146 "Bol", "Eol", "Boundary", "Nonboundary", "Beginning", "Start", "EndZ", "End", | |
147 "Nothing", | |
148 "Lazybranch", "Branchmark", "Lazybranchmark", | |
149 "Nullcount", "Setcount", "Branchcount", "Lazybranchcount", | |
150 "Nullmark", "Setmark", "Capturemark", "Getmark", | |
151 "Setjump", "Backjump", "Forejump", "Testref", "Goto", | |
152 "Prune", "Stop", | |
153 "ECMABoundary", "NonECMABoundary", | |
154 } | |
155 | |
156 func operatorDescription(op InstOp) string { | |
157 desc := codeStr[op&Mask] | |
158 if (op & Ci) != 0 { | |
159 desc += "-Ci" | |
160 } | |
161 if (op & Rtl) != 0 { | |
162 desc += "-Rtl" | |
163 } | |
164 if (op & Back) != 0 { | |
165 desc += "-Back" | |
166 } | |
167 if (op & Back2) != 0 { | |
168 desc += "-Back2" | |
169 } | |
170 | |
171 return desc | |
172 } | |
173 | |
174 // OpcodeDescription is a humman readable string of the specific offset | |
175 func (c *Code) OpcodeDescription(offset int) string { | |
176 buf := &bytes.Buffer{} | |
177 | |
178 op := InstOp(c.Codes[offset]) | |
179 fmt.Fprintf(buf, "%06d ", offset) | |
180 | |
181 if opcodeBacktracks(op & Mask) { | |
182 buf.WriteString("*") | |
183 } else { | |
184 buf.WriteString(" ") | |
185 } | |
186 buf.WriteString(operatorDescription(op)) | |
187 buf.WriteString("(") | |
188 op &= Mask | |
189 | |
190 switch op { | |
191 case One, Notone, Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy: | |
192 buf.WriteString("Ch = ") | |
193 buf.WriteString(CharDescription(rune(c.Codes[offset+1]))) | |
194 | |
195 case Set, Setrep, Setloop, Setlazy: | |
196 buf.WriteString("Set = ") | |
197 buf.WriteString(c.Sets[c.Codes[offset+1]].String()) | |
198 | |
199 case Multi: | |
200 fmt.Fprintf(buf, "String = %s", string(c.Strings[c.Codes[offset+1]])) | |
201 | |
202 case Ref, Testref: | |
203 fmt.Fprintf(buf, "Index = %d", c.Codes[offset+1]) | |
204 | |
205 case Capturemark: | |
206 fmt.Fprintf(buf, "Index = %d", c.Codes[offset+1]) | |
207 if c.Codes[offset+2] != -1 { | |
208 fmt.Fprintf(buf, ", Unindex = %d", c.Codes[offset+2]) | |
209 } | |
210 | |
211 case Nullcount, Setcount: | |
212 fmt.Fprintf(buf, "Value = %d", c.Codes[offset+1]) | |
213 | |
214 case Goto, Lazybranch, Branchmark, Lazybranchmark, Branchcount, Lazybranchcount: | |
215 fmt.Fprintf(buf, "Addr = %d", c.Codes[offset+1]) | |
216 } | |
217 | |
218 switch op { | |
219 case Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy, Setrep, Setloop, Setlazy: | |
220 buf.WriteString(", Rep = ") | |
221 if c.Codes[offset+2] == math.MaxInt32 { | |
222 buf.WriteString("inf") | |
223 } else { | |
224 fmt.Fprintf(buf, "%d", c.Codes[offset+2]) | |
225 } | |
226 | |
227 case Branchcount, Lazybranchcount: | |
228 buf.WriteString(", Limit = ") | |
229 if c.Codes[offset+2] == math.MaxInt32 { | |
230 buf.WriteString("inf") | |
231 } else { | |
232 fmt.Fprintf(buf, "%d", c.Codes[offset+2]) | |
233 } | |
234 | |
235 } | |
236 | |
237 buf.WriteString(")") | |
238 | |
239 return buf.String() | |
240 } | |
241 | |
242 func (c *Code) Dump() string { | |
243 buf := &bytes.Buffer{} | |
244 | |
245 if c.RightToLeft { | |
246 fmt.Fprintln(buf, "Direction: right-to-left") | |
247 } else { | |
248 fmt.Fprintln(buf, "Direction: left-to-right") | |
249 } | |
250 if c.FcPrefix == nil { | |
251 fmt.Fprintln(buf, "Firstchars: n/a") | |
252 } else { | |
253 fmt.Fprintf(buf, "Firstchars: %v\n", c.FcPrefix.PrefixSet.String()) | |
254 } | |
255 | |
256 if c.BmPrefix == nil { | |
257 fmt.Fprintln(buf, "Prefix: n/a") | |
258 } else { | |
259 fmt.Fprintf(buf, "Prefix: %v\n", Escape(c.BmPrefix.String())) | |
260 } | |
261 | |
262 fmt.Fprintf(buf, "Anchors: %v\n", c.Anchors) | |
263 fmt.Fprintln(buf) | |
264 | |
265 if c.BmPrefix != nil { | |
266 fmt.Fprintln(buf, "BoyerMoore:") | |
267 fmt.Fprintln(buf, c.BmPrefix.Dump(" ")) | |
268 } | |
269 for i := 0; i < len(c.Codes); i += opcodeSize(InstOp(c.Codes[i])) { | |
270 fmt.Fprintln(buf, c.OpcodeDescription(i)) | |
271 } | |
272 | |
273 return buf.String() | |
274 } |