66
|
1 //
|
|
2 // Blackfriday Markdown Processor
|
|
3 // Available at http://github.com/russross/blackfriday
|
|
4 //
|
|
5 // Copyright © 2011 Russ Ross <russ@russross.com>.
|
|
6 // Distributed under the Simplified BSD License.
|
|
7 // See README.md for details.
|
|
8 //
|
|
9
|
|
10 //
|
|
11 // Functions to parse block-level elements.
|
|
12 //
|
|
13
|
|
14 package blackfriday
|
|
15
|
|
16 import (
|
|
17 "bytes"
|
|
18 "html"
|
|
19 "regexp"
|
|
20 "strings"
|
|
21 "unicode"
|
|
22 )
|
|
23
|
|
24 const (
|
|
25 charEntity = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"
|
|
26 escapable = "[!\"#$%&'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]"
|
|
27 )
|
|
28
|
|
29 var (
|
|
30 reBackslashOrAmp = regexp.MustCompile("[\\&]")
|
|
31 reEntityOrEscapedChar = regexp.MustCompile("(?i)\\\\" + escapable + "|" + charEntity)
|
|
32 )
|
|
33
|
|
34 // Parse block-level data.
|
|
35 // Note: this function and many that it calls assume that
|
|
36 // the input buffer ends with a newline.
|
|
37 func (p *Markdown) block(data []byte) {
|
|
38 // this is called recursively: enforce a maximum depth
|
|
39 if p.nesting >= p.maxNesting {
|
|
40 return
|
|
41 }
|
|
42 p.nesting++
|
|
43
|
|
44 // parse out one block-level construct at a time
|
|
45 for len(data) > 0 {
|
|
46 // prefixed heading:
|
|
47 //
|
|
48 // # Heading 1
|
|
49 // ## Heading 2
|
|
50 // ...
|
|
51 // ###### Heading 6
|
|
52 if p.isPrefixHeading(data) {
|
|
53 data = data[p.prefixHeading(data):]
|
|
54 continue
|
|
55 }
|
|
56
|
|
57 // block of preformatted HTML:
|
|
58 //
|
|
59 // <div>
|
|
60 // ...
|
|
61 // </div>
|
|
62 if data[0] == '<' {
|
|
63 if i := p.html(data, true); i > 0 {
|
|
64 data = data[i:]
|
|
65 continue
|
|
66 }
|
|
67 }
|
|
68
|
|
69 // title block
|
|
70 //
|
|
71 // % stuff
|
|
72 // % more stuff
|
|
73 // % even more stuff
|
|
74 if p.extensions&Titleblock != 0 {
|
|
75 if data[0] == '%' {
|
|
76 if i := p.titleBlock(data, true); i > 0 {
|
|
77 data = data[i:]
|
|
78 continue
|
|
79 }
|
|
80 }
|
|
81 }
|
|
82
|
|
83 // blank lines. note: returns the # of bytes to skip
|
|
84 if i := p.isEmpty(data); i > 0 {
|
|
85 data = data[i:]
|
|
86 continue
|
|
87 }
|
|
88
|
|
89 // indented code block:
|
|
90 //
|
|
91 // func max(a, b int) int {
|
|
92 // if a > b {
|
|
93 // return a
|
|
94 // }
|
|
95 // return b
|
|
96 // }
|
|
97 if p.codePrefix(data) > 0 {
|
|
98 data = data[p.code(data):]
|
|
99 continue
|
|
100 }
|
|
101
|
|
102 // fenced code block:
|
|
103 //
|
|
104 // ``` go
|
|
105 // func fact(n int) int {
|
|
106 // if n <= 1 {
|
|
107 // return n
|
|
108 // }
|
|
109 // return n * fact(n-1)
|
|
110 // }
|
|
111 // ```
|
|
112 if p.extensions&FencedCode != 0 {
|
|
113 if i := p.fencedCodeBlock(data, true); i > 0 {
|
|
114 data = data[i:]
|
|
115 continue
|
|
116 }
|
|
117 }
|
|
118
|
|
119 // horizontal rule:
|
|
120 //
|
|
121 // ------
|
|
122 // or
|
|
123 // ******
|
|
124 // or
|
|
125 // ______
|
|
126 if p.isHRule(data) {
|
|
127 p.addBlock(HorizontalRule, nil)
|
|
128 var i int
|
|
129 for i = 0; i < len(data) && data[i] != '\n'; i++ {
|
|
130 }
|
|
131 data = data[i:]
|
|
132 continue
|
|
133 }
|
|
134
|
|
135 // block quote:
|
|
136 //
|
|
137 // > A big quote I found somewhere
|
|
138 // > on the web
|
|
139 if p.quotePrefix(data) > 0 {
|
|
140 data = data[p.quote(data):]
|
|
141 continue
|
|
142 }
|
|
143
|
|
144 // table:
|
|
145 //
|
|
146 // Name | Age | Phone
|
|
147 // ------|-----|---------
|
|
148 // Bob | 31 | 555-1234
|
|
149 // Alice | 27 | 555-4321
|
|
150 if p.extensions&Tables != 0 {
|
|
151 if i := p.table(data); i > 0 {
|
|
152 data = data[i:]
|
|
153 continue
|
|
154 }
|
|
155 }
|
|
156
|
|
157 // an itemized/unordered list:
|
|
158 //
|
|
159 // * Item 1
|
|
160 // * Item 2
|
|
161 //
|
|
162 // also works with + or -
|
|
163 if p.uliPrefix(data) > 0 {
|
|
164 data = data[p.list(data, 0):]
|
|
165 continue
|
|
166 }
|
|
167
|
|
168 // a numbered/ordered list:
|
|
169 //
|
|
170 // 1. Item 1
|
|
171 // 2. Item 2
|
|
172 if p.oliPrefix(data) > 0 {
|
|
173 data = data[p.list(data, ListTypeOrdered):]
|
|
174 continue
|
|
175 }
|
|
176
|
|
177 // definition lists:
|
|
178 //
|
|
179 // Term 1
|
|
180 // : Definition a
|
|
181 // : Definition b
|
|
182 //
|
|
183 // Term 2
|
|
184 // : Definition c
|
|
185 if p.extensions&DefinitionLists != 0 {
|
|
186 if p.dliPrefix(data) > 0 {
|
|
187 data = data[p.list(data, ListTypeDefinition):]
|
|
188 continue
|
|
189 }
|
|
190 }
|
|
191
|
|
192 // anything else must look like a normal paragraph
|
|
193 // note: this finds underlined headings, too
|
|
194 data = data[p.paragraph(data):]
|
|
195 }
|
|
196
|
|
197 p.nesting--
|
|
198 }
|
|
199
|
|
200 func (p *Markdown) addBlock(typ NodeType, content []byte) *Node {
|
|
201 p.closeUnmatchedBlocks()
|
|
202 container := p.addChild(typ, 0)
|
|
203 container.content = content
|
|
204 return container
|
|
205 }
|
|
206
|
|
207 func (p *Markdown) isPrefixHeading(data []byte) bool {
|
|
208 if data[0] != '#' {
|
|
209 return false
|
|
210 }
|
|
211
|
|
212 if p.extensions&SpaceHeadings != 0 {
|
|
213 level := 0
|
|
214 for level < 6 && level < len(data) && data[level] == '#' {
|
|
215 level++
|
|
216 }
|
|
217 if level == len(data) || data[level] != ' ' {
|
|
218 return false
|
|
219 }
|
|
220 }
|
|
221 return true
|
|
222 }
|
|
223
|
|
224 func (p *Markdown) prefixHeading(data []byte) int {
|
|
225 level := 0
|
|
226 for level < 6 && level < len(data) && data[level] == '#' {
|
|
227 level++
|
|
228 }
|
|
229 i := skipChar(data, level, ' ')
|
|
230 end := skipUntilChar(data, i, '\n')
|
|
231 skip := end
|
|
232 id := ""
|
|
233 if p.extensions&HeadingIDs != 0 {
|
|
234 j, k := 0, 0
|
|
235 // find start/end of heading id
|
|
236 for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ {
|
|
237 }
|
|
238 for k = j + 1; k < end && data[k] != '}'; k++ {
|
|
239 }
|
|
240 // extract heading id iff found
|
|
241 if j < end && k < end {
|
|
242 id = string(data[j+2 : k])
|
|
243 end = j
|
|
244 skip = k + 1
|
|
245 for end > 0 && data[end-1] == ' ' {
|
|
246 end--
|
|
247 }
|
|
248 }
|
|
249 }
|
|
250 for end > 0 && data[end-1] == '#' {
|
|
251 if isBackslashEscaped(data, end-1) {
|
|
252 break
|
|
253 }
|
|
254 end--
|
|
255 }
|
|
256 for end > 0 && data[end-1] == ' ' {
|
|
257 end--
|
|
258 }
|
|
259 if end > i {
|
|
260 if id == "" && p.extensions&AutoHeadingIDs != 0 {
|
|
261 id = SanitizedAnchorName(string(data[i:end]))
|
|
262 }
|
|
263 block := p.addBlock(Heading, data[i:end])
|
|
264 block.HeadingID = id
|
|
265 block.Level = level
|
|
266 }
|
|
267 return skip
|
|
268 }
|
|
269
|
|
270 func (p *Markdown) isUnderlinedHeading(data []byte) int {
|
|
271 // test of level 1 heading
|
|
272 if data[0] == '=' {
|
|
273 i := skipChar(data, 1, '=')
|
|
274 i = skipChar(data, i, ' ')
|
|
275 if i < len(data) && data[i] == '\n' {
|
|
276 return 1
|
|
277 }
|
|
278 return 0
|
|
279 }
|
|
280
|
|
281 // test of level 2 heading
|
|
282 if data[0] == '-' {
|
|
283 i := skipChar(data, 1, '-')
|
|
284 i = skipChar(data, i, ' ')
|
|
285 if i < len(data) && data[i] == '\n' {
|
|
286 return 2
|
|
287 }
|
|
288 return 0
|
|
289 }
|
|
290
|
|
291 return 0
|
|
292 }
|
|
293
|
|
294 func (p *Markdown) titleBlock(data []byte, doRender bool) int {
|
|
295 if data[0] != '%' {
|
|
296 return 0
|
|
297 }
|
|
298 splitData := bytes.Split(data, []byte("\n"))
|
|
299 var i int
|
|
300 for idx, b := range splitData {
|
|
301 if !bytes.HasPrefix(b, []byte("%")) {
|
|
302 i = idx // - 1
|
|
303 break
|
|
304 }
|
|
305 }
|
|
306
|
|
307 data = bytes.Join(splitData[0:i], []byte("\n"))
|
|
308 consumed := len(data)
|
|
309 data = bytes.TrimPrefix(data, []byte("% "))
|
|
310 data = bytes.Replace(data, []byte("\n% "), []byte("\n"), -1)
|
|
311 block := p.addBlock(Heading, data)
|
|
312 block.Level = 1
|
|
313 block.IsTitleblock = true
|
|
314
|
|
315 return consumed
|
|
316 }
|
|
317
|
|
318 func (p *Markdown) html(data []byte, doRender bool) int {
|
|
319 var i, j int
|
|
320
|
|
321 // identify the opening tag
|
|
322 if data[0] != '<' {
|
|
323 return 0
|
|
324 }
|
|
325 curtag, tagfound := p.htmlFindTag(data[1:])
|
|
326
|
|
327 // handle special cases
|
|
328 if !tagfound {
|
|
329 // check for an HTML comment
|
|
330 if size := p.htmlComment(data, doRender); size > 0 {
|
|
331 return size
|
|
332 }
|
|
333
|
|
334 // check for an <hr> tag
|
|
335 if size := p.htmlHr(data, doRender); size > 0 {
|
|
336 return size
|
|
337 }
|
|
338
|
|
339 // no special case recognized
|
|
340 return 0
|
|
341 }
|
|
342
|
|
343 // look for an unindented matching closing tag
|
|
344 // followed by a blank line
|
|
345 found := false
|
|
346 /*
|
|
347 closetag := []byte("\n</" + curtag + ">")
|
|
348 j = len(curtag) + 1
|
|
349 for !found {
|
|
350 // scan for a closing tag at the beginning of a line
|
|
351 if skip := bytes.Index(data[j:], closetag); skip >= 0 {
|
|
352 j += skip + len(closetag)
|
|
353 } else {
|
|
354 break
|
|
355 }
|
|
356
|
|
357 // see if it is the only thing on the line
|
|
358 if skip := p.isEmpty(data[j:]); skip > 0 {
|
|
359 // see if it is followed by a blank line/eof
|
|
360 j += skip
|
|
361 if j >= len(data) {
|
|
362 found = true
|
|
363 i = j
|
|
364 } else {
|
|
365 if skip := p.isEmpty(data[j:]); skip > 0 {
|
|
366 j += skip
|
|
367 found = true
|
|
368 i = j
|
|
369 }
|
|
370 }
|
|
371 }
|
|
372 }
|
|
373 */
|
|
374
|
|
375 // if not found, try a second pass looking for indented match
|
|
376 // but not if tag is "ins" or "del" (following original Markdown.pl)
|
|
377 if !found && curtag != "ins" && curtag != "del" {
|
|
378 i = 1
|
|
379 for i < len(data) {
|
|
380 i++
|
|
381 for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
|
|
382 i++
|
|
383 }
|
|
384
|
|
385 if i+2+len(curtag) >= len(data) {
|
|
386 break
|
|
387 }
|
|
388
|
|
389 j = p.htmlFindEnd(curtag, data[i-1:])
|
|
390
|
|
391 if j > 0 {
|
|
392 i += j - 1
|
|
393 found = true
|
|
394 break
|
|
395 }
|
|
396 }
|
|
397 }
|
|
398
|
|
399 if !found {
|
|
400 return 0
|
|
401 }
|
|
402
|
|
403 // the end of the block has been found
|
|
404 if doRender {
|
|
405 // trim newlines
|
|
406 end := i
|
|
407 for end > 0 && data[end-1] == '\n' {
|
|
408 end--
|
|
409 }
|
|
410 finalizeHTMLBlock(p.addBlock(HTMLBlock, data[:end]))
|
|
411 }
|
|
412
|
|
413 return i
|
|
414 }
|
|
415
|
|
416 func finalizeHTMLBlock(block *Node) {
|
|
417 block.Literal = block.content
|
|
418 block.content = nil
|
|
419 }
|
|
420
|
|
421 // HTML comment, lax form
|
|
422 func (p *Markdown) htmlComment(data []byte, doRender bool) int {
|
|
423 i := p.inlineHTMLComment(data)
|
|
424 // needs to end with a blank line
|
|
425 if j := p.isEmpty(data[i:]); j > 0 {
|
|
426 size := i + j
|
|
427 if doRender {
|
|
428 // trim trailing newlines
|
|
429 end := size
|
|
430 for end > 0 && data[end-1] == '\n' {
|
|
431 end--
|
|
432 }
|
|
433 block := p.addBlock(HTMLBlock, data[:end])
|
|
434 finalizeHTMLBlock(block)
|
|
435 }
|
|
436 return size
|
|
437 }
|
|
438 return 0
|
|
439 }
|
|
440
|
|
441 // HR, which is the only self-closing block tag considered
|
|
442 func (p *Markdown) htmlHr(data []byte, doRender bool) int {
|
|
443 if len(data) < 4 {
|
|
444 return 0
|
|
445 }
|
|
446 if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
|
|
447 return 0
|
|
448 }
|
|
449 if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
|
|
450 // not an <hr> tag after all; at least not a valid one
|
|
451 return 0
|
|
452 }
|
|
453 i := 3
|
|
454 for i < len(data) && data[i] != '>' && data[i] != '\n' {
|
|
455 i++
|
|
456 }
|
|
457 if i < len(data) && data[i] == '>' {
|
|
458 i++
|
|
459 if j := p.isEmpty(data[i:]); j > 0 {
|
|
460 size := i + j
|
|
461 if doRender {
|
|
462 // trim newlines
|
|
463 end := size
|
|
464 for end > 0 && data[end-1] == '\n' {
|
|
465 end--
|
|
466 }
|
|
467 finalizeHTMLBlock(p.addBlock(HTMLBlock, data[:end]))
|
|
468 }
|
|
469 return size
|
|
470 }
|
|
471 }
|
|
472 return 0
|
|
473 }
|
|
474
|
|
475 func (p *Markdown) htmlFindTag(data []byte) (string, bool) {
|
|
476 i := 0
|
|
477 for i < len(data) && isalnum(data[i]) {
|
|
478 i++
|
|
479 }
|
|
480 key := string(data[:i])
|
|
481 if _, ok := blockTags[key]; ok {
|
|
482 return key, true
|
|
483 }
|
|
484 return "", false
|
|
485 }
|
|
486
|
|
487 func (p *Markdown) htmlFindEnd(tag string, data []byte) int {
|
|
488 // assume data[0] == '<' && data[1] == '/' already tested
|
|
489 if tag == "hr" {
|
|
490 return 2
|
|
491 }
|
|
492 // check if tag is a match
|
|
493 closetag := []byte("</" + tag + ">")
|
|
494 if !bytes.HasPrefix(data, closetag) {
|
|
495 return 0
|
|
496 }
|
|
497 i := len(closetag)
|
|
498
|
|
499 // check that the rest of the line is blank
|
|
500 skip := 0
|
|
501 if skip = p.isEmpty(data[i:]); skip == 0 {
|
|
502 return 0
|
|
503 }
|
|
504 i += skip
|
|
505 skip = 0
|
|
506
|
|
507 if i >= len(data) {
|
|
508 return i
|
|
509 }
|
|
510
|
|
511 if p.extensions&LaxHTMLBlocks != 0 {
|
|
512 return i
|
|
513 }
|
|
514 if skip = p.isEmpty(data[i:]); skip == 0 {
|
|
515 // following line must be blank
|
|
516 return 0
|
|
517 }
|
|
518
|
|
519 return i + skip
|
|
520 }
|
|
521
|
|
522 func (*Markdown) isEmpty(data []byte) int {
|
|
523 // it is okay to call isEmpty on an empty buffer
|
|
524 if len(data) == 0 {
|
|
525 return 0
|
|
526 }
|
|
527
|
|
528 var i int
|
|
529 for i = 0; i < len(data) && data[i] != '\n'; i++ {
|
|
530 if data[i] != ' ' && data[i] != '\t' {
|
|
531 return 0
|
|
532 }
|
|
533 }
|
|
534 if i < len(data) && data[i] == '\n' {
|
|
535 i++
|
|
536 }
|
|
537 return i
|
|
538 }
|
|
539
|
|
540 func (*Markdown) isHRule(data []byte) bool {
|
|
541 i := 0
|
|
542
|
|
543 // skip up to three spaces
|
|
544 for i < 3 && data[i] == ' ' {
|
|
545 i++
|
|
546 }
|
|
547
|
|
548 // look at the hrule char
|
|
549 if data[i] != '*' && data[i] != '-' && data[i] != '_' {
|
|
550 return false
|
|
551 }
|
|
552 c := data[i]
|
|
553
|
|
554 // the whole line must be the char or whitespace
|
|
555 n := 0
|
|
556 for i < len(data) && data[i] != '\n' {
|
|
557 switch {
|
|
558 case data[i] == c:
|
|
559 n++
|
|
560 case data[i] != ' ':
|
|
561 return false
|
|
562 }
|
|
563 i++
|
|
564 }
|
|
565
|
|
566 return n >= 3
|
|
567 }
|
|
568
|
|
569 // isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data,
|
|
570 // and returns the end index if so, or 0 otherwise. It also returns the marker found.
|
|
571 // If info is not nil, it gets set to the syntax specified in the fence line.
|
|
572 func isFenceLine(data []byte, info *string, oldmarker string) (end int, marker string) {
|
|
573 i, size := 0, 0
|
|
574
|
|
575 // skip up to three spaces
|
|
576 for i < len(data) && i < 3 && data[i] == ' ' {
|
|
577 i++
|
|
578 }
|
|
579
|
|
580 // check for the marker characters: ~ or `
|
|
581 if i >= len(data) {
|
|
582 return 0, ""
|
|
583 }
|
|
584 if data[i] != '~' && data[i] != '`' {
|
|
585 return 0, ""
|
|
586 }
|
|
587
|
|
588 c := data[i]
|
|
589
|
|
590 // the whole line must be the same char or whitespace
|
|
591 for i < len(data) && data[i] == c {
|
|
592 size++
|
|
593 i++
|
|
594 }
|
|
595
|
|
596 // the marker char must occur at least 3 times
|
|
597 if size < 3 {
|
|
598 return 0, ""
|
|
599 }
|
|
600 marker = string(data[i-size : i])
|
|
601
|
|
602 // if this is the end marker, it must match the beginning marker
|
|
603 if oldmarker != "" && marker != oldmarker {
|
|
604 return 0, ""
|
|
605 }
|
|
606
|
|
607 // TODO(shurcooL): It's probably a good idea to simplify the 2 code paths here
|
|
608 // into one, always get the info string, and discard it if the caller doesn't care.
|
|
609 if info != nil {
|
|
610 infoLength := 0
|
|
611 i = skipChar(data, i, ' ')
|
|
612
|
|
613 if i >= len(data) {
|
|
614 if i == len(data) {
|
|
615 return i, marker
|
|
616 }
|
|
617 return 0, ""
|
|
618 }
|
|
619
|
|
620 infoStart := i
|
|
621
|
|
622 if data[i] == '{' {
|
|
623 i++
|
|
624 infoStart++
|
|
625
|
|
626 for i < len(data) && data[i] != '}' && data[i] != '\n' {
|
|
627 infoLength++
|
|
628 i++
|
|
629 }
|
|
630
|
|
631 if i >= len(data) || data[i] != '}' {
|
|
632 return 0, ""
|
|
633 }
|
|
634
|
|
635 // strip all whitespace at the beginning and the end
|
|
636 // of the {} block
|
|
637 for infoLength > 0 && isspace(data[infoStart]) {
|
|
638 infoStart++
|
|
639 infoLength--
|
|
640 }
|
|
641
|
|
642 for infoLength > 0 && isspace(data[infoStart+infoLength-1]) {
|
|
643 infoLength--
|
|
644 }
|
|
645 i++
|
|
646 i = skipChar(data, i, ' ')
|
|
647 } else {
|
|
648 for i < len(data) && !isverticalspace(data[i]) {
|
|
649 infoLength++
|
|
650 i++
|
|
651 }
|
|
652 }
|
|
653
|
|
654 *info = strings.TrimSpace(string(data[infoStart : infoStart+infoLength]))
|
|
655 }
|
|
656
|
|
657 if i == len(data) {
|
|
658 return i, marker
|
|
659 }
|
|
660 if i > len(data) || data[i] != '\n' {
|
|
661 return 0, ""
|
|
662 }
|
|
663 return i + 1, marker // Take newline into account.
|
|
664 }
|
|
665
|
|
666 // fencedCodeBlock returns the end index if data contains a fenced code block at the beginning,
|
|
667 // or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects.
|
|
668 // If doRender is true, a final newline is mandatory to recognize the fenced code block.
|
|
669 func (p *Markdown) fencedCodeBlock(data []byte, doRender bool) int {
|
|
670 var info string
|
|
671 beg, marker := isFenceLine(data, &info, "")
|
|
672 if beg == 0 || beg >= len(data) {
|
|
673 return 0
|
|
674 }
|
|
675 fenceLength := beg - 1
|
|
676
|
|
677 var work bytes.Buffer
|
|
678 work.Write([]byte(info))
|
|
679 work.WriteByte('\n')
|
|
680
|
|
681 for {
|
|
682 // safe to assume beg < len(data)
|
|
683
|
|
684 // check for the end of the code block
|
|
685 fenceEnd, _ := isFenceLine(data[beg:], nil, marker)
|
|
686 if fenceEnd != 0 {
|
|
687 beg += fenceEnd
|
|
688 break
|
|
689 }
|
|
690
|
|
691 // copy the current line
|
|
692 end := skipUntilChar(data, beg, '\n') + 1
|
|
693
|
|
694 // did we reach the end of the buffer without a closing marker?
|
|
695 if end >= len(data) {
|
|
696 return 0
|
|
697 }
|
|
698
|
|
699 // verbatim copy to the working buffer
|
|
700 if doRender {
|
|
701 work.Write(data[beg:end])
|
|
702 }
|
|
703 beg = end
|
|
704 }
|
|
705
|
|
706 if doRender {
|
|
707 block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
|
|
708 block.IsFenced = true
|
|
709 block.FenceLength = fenceLength
|
|
710 finalizeCodeBlock(block)
|
|
711 }
|
|
712
|
|
713 return beg
|
|
714 }
|
|
715
|
|
716 func unescapeChar(str []byte) []byte {
|
|
717 if str[0] == '\\' {
|
|
718 return []byte{str[1]}
|
|
719 }
|
|
720 return []byte(html.UnescapeString(string(str)))
|
|
721 }
|
|
722
|
|
723 func unescapeString(str []byte) []byte {
|
|
724 if reBackslashOrAmp.Match(str) {
|
|
725 return reEntityOrEscapedChar.ReplaceAllFunc(str, unescapeChar)
|
|
726 }
|
|
727 return str
|
|
728 }
|
|
729
|
|
730 func finalizeCodeBlock(block *Node) {
|
|
731 if block.IsFenced {
|
|
732 newlinePos := bytes.IndexByte(block.content, '\n')
|
|
733 firstLine := block.content[:newlinePos]
|
|
734 rest := block.content[newlinePos+1:]
|
|
735 block.Info = unescapeString(bytes.Trim(firstLine, "\n"))
|
|
736 block.Literal = rest
|
|
737 } else {
|
|
738 block.Literal = block.content
|
|
739 }
|
|
740 block.content = nil
|
|
741 }
|
|
742
|
|
743 func (p *Markdown) table(data []byte) int {
|
|
744 table := p.addBlock(Table, nil)
|
|
745 i, columns := p.tableHeader(data)
|
|
746 if i == 0 {
|
|
747 p.tip = table.Parent
|
|
748 table.Unlink()
|
|
749 return 0
|
|
750 }
|
|
751
|
|
752 p.addBlock(TableBody, nil)
|
|
753
|
|
754 for i < len(data) {
|
|
755 pipes, rowStart := 0, i
|
|
756 for ; i < len(data) && data[i] != '\n'; i++ {
|
|
757 if data[i] == '|' {
|
|
758 pipes++
|
|
759 }
|
|
760 }
|
|
761
|
|
762 if pipes == 0 {
|
|
763 i = rowStart
|
|
764 break
|
|
765 }
|
|
766
|
|
767 // include the newline in data sent to tableRow
|
|
768 if i < len(data) && data[i] == '\n' {
|
|
769 i++
|
|
770 }
|
|
771 p.tableRow(data[rowStart:i], columns, false)
|
|
772 }
|
|
773
|
|
774 return i
|
|
775 }
|
|
776
|
|
777 // check if the specified position is preceded by an odd number of backslashes
|
|
778 func isBackslashEscaped(data []byte, i int) bool {
|
|
779 backslashes := 0
|
|
780 for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
|
|
781 backslashes++
|
|
782 }
|
|
783 return backslashes&1 == 1
|
|
784 }
|
|
785
|
|
786 func (p *Markdown) tableHeader(data []byte) (size int, columns []CellAlignFlags) {
|
|
787 i := 0
|
|
788 colCount := 1
|
|
789 for i = 0; i < len(data) && data[i] != '\n'; i++ {
|
|
790 if data[i] == '|' && !isBackslashEscaped(data, i) {
|
|
791 colCount++
|
|
792 }
|
|
793 }
|
|
794
|
|
795 // doesn't look like a table header
|
|
796 if colCount == 1 {
|
|
797 return
|
|
798 }
|
|
799
|
|
800 // include the newline in the data sent to tableRow
|
|
801 j := i
|
|
802 if j < len(data) && data[j] == '\n' {
|
|
803 j++
|
|
804 }
|
|
805 header := data[:j]
|
|
806
|
|
807 // column count ignores pipes at beginning or end of line
|
|
808 if data[0] == '|' {
|
|
809 colCount--
|
|
810 }
|
|
811 if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) {
|
|
812 colCount--
|
|
813 }
|
|
814
|
|
815 columns = make([]CellAlignFlags, colCount)
|
|
816
|
|
817 // move on to the header underline
|
|
818 i++
|
|
819 if i >= len(data) {
|
|
820 return
|
|
821 }
|
|
822
|
|
823 if data[i] == '|' && !isBackslashEscaped(data, i) {
|
|
824 i++
|
|
825 }
|
|
826 i = skipChar(data, i, ' ')
|
|
827
|
|
828 // each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
|
|
829 // and trailing | optional on last column
|
|
830 col := 0
|
|
831 for i < len(data) && data[i] != '\n' {
|
|
832 dashes := 0
|
|
833
|
|
834 if data[i] == ':' {
|
|
835 i++
|
|
836 columns[col] |= TableAlignmentLeft
|
|
837 dashes++
|
|
838 }
|
|
839 for i < len(data) && data[i] == '-' {
|
|
840 i++
|
|
841 dashes++
|
|
842 }
|
|
843 if i < len(data) && data[i] == ':' {
|
|
844 i++
|
|
845 columns[col] |= TableAlignmentRight
|
|
846 dashes++
|
|
847 }
|
|
848 for i < len(data) && data[i] == ' ' {
|
|
849 i++
|
|
850 }
|
|
851 if i == len(data) {
|
|
852 return
|
|
853 }
|
|
854 // end of column test is messy
|
|
855 switch {
|
|
856 case dashes < 3:
|
|
857 // not a valid column
|
|
858 return
|
|
859
|
|
860 case data[i] == '|' && !isBackslashEscaped(data, i):
|
|
861 // marker found, now skip past trailing whitespace
|
|
862 col++
|
|
863 i++
|
|
864 for i < len(data) && data[i] == ' ' {
|
|
865 i++
|
|
866 }
|
|
867
|
|
868 // trailing junk found after last column
|
|
869 if col >= colCount && i < len(data) && data[i] != '\n' {
|
|
870 return
|
|
871 }
|
|
872
|
|
873 case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount:
|
|
874 // something else found where marker was required
|
|
875 return
|
|
876
|
|
877 case data[i] == '\n':
|
|
878 // marker is optional for the last column
|
|
879 col++
|
|
880
|
|
881 default:
|
|
882 // trailing junk found after last column
|
|
883 return
|
|
884 }
|
|
885 }
|
|
886 if col != colCount {
|
|
887 return
|
|
888 }
|
|
889
|
|
890 p.addBlock(TableHead, nil)
|
|
891 p.tableRow(header, columns, true)
|
|
892 size = i
|
|
893 if size < len(data) && data[size] == '\n' {
|
|
894 size++
|
|
895 }
|
|
896 return
|
|
897 }
|
|
898
|
|
899 func (p *Markdown) tableRow(data []byte, columns []CellAlignFlags, header bool) {
|
|
900 p.addBlock(TableRow, nil)
|
|
901 i, col := 0, 0
|
|
902
|
|
903 if data[i] == '|' && !isBackslashEscaped(data, i) {
|
|
904 i++
|
|
905 }
|
|
906
|
|
907 for col = 0; col < len(columns) && i < len(data); col++ {
|
|
908 for i < len(data) && data[i] == ' ' {
|
|
909 i++
|
|
910 }
|
|
911
|
|
912 cellStart := i
|
|
913
|
|
914 for i < len(data) && (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
|
|
915 i++
|
|
916 }
|
|
917
|
|
918 cellEnd := i
|
|
919
|
|
920 // skip the end-of-cell marker, possibly taking us past end of buffer
|
|
921 i++
|
|
922
|
|
923 for cellEnd > cellStart && cellEnd-1 < len(data) && data[cellEnd-1] == ' ' {
|
|
924 cellEnd--
|
|
925 }
|
|
926
|
|
927 cell := p.addBlock(TableCell, data[cellStart:cellEnd])
|
|
928 cell.IsHeader = header
|
|
929 cell.Align = columns[col]
|
|
930 }
|
|
931
|
|
932 // pad it out with empty columns to get the right number
|
|
933 for ; col < len(columns); col++ {
|
|
934 cell := p.addBlock(TableCell, nil)
|
|
935 cell.IsHeader = header
|
|
936 cell.Align = columns[col]
|
|
937 }
|
|
938
|
|
939 // silently ignore rows with too many cells
|
|
940 }
|
|
941
|
|
942 // returns blockquote prefix length
|
|
943 func (p *Markdown) quotePrefix(data []byte) int {
|
|
944 i := 0
|
|
945 for i < 3 && i < len(data) && data[i] == ' ' {
|
|
946 i++
|
|
947 }
|
|
948 if i < len(data) && data[i] == '>' {
|
|
949 if i+1 < len(data) && data[i+1] == ' ' {
|
|
950 return i + 2
|
|
951 }
|
|
952 return i + 1
|
|
953 }
|
|
954 return 0
|
|
955 }
|
|
956
|
|
957 // blockquote ends with at least one blank line
|
|
958 // followed by something without a blockquote prefix
|
|
959 func (p *Markdown) terminateBlockquote(data []byte, beg, end int) bool {
|
|
960 if p.isEmpty(data[beg:]) <= 0 {
|
|
961 return false
|
|
962 }
|
|
963 if end >= len(data) {
|
|
964 return true
|
|
965 }
|
|
966 return p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0
|
|
967 }
|
|
968
|
|
969 // parse a blockquote fragment
|
|
970 func (p *Markdown) quote(data []byte) int {
|
|
971 block := p.addBlock(BlockQuote, nil)
|
|
972 var raw bytes.Buffer
|
|
973 beg, end := 0, 0
|
|
974 for beg < len(data) {
|
|
975 end = beg
|
|
976 // Step over whole lines, collecting them. While doing that, check for
|
|
977 // fenced code and if one's found, incorporate it altogether,
|
|
978 // irregardless of any contents inside it
|
|
979 for end < len(data) && data[end] != '\n' {
|
|
980 if p.extensions&FencedCode != 0 {
|
|
981 if i := p.fencedCodeBlock(data[end:], false); i > 0 {
|
|
982 // -1 to compensate for the extra end++ after the loop:
|
|
983 end += i - 1
|
|
984 break
|
|
985 }
|
|
986 }
|
|
987 end++
|
|
988 }
|
|
989 if end < len(data) && data[end] == '\n' {
|
|
990 end++
|
|
991 }
|
|
992 if pre := p.quotePrefix(data[beg:]); pre > 0 {
|
|
993 // skip the prefix
|
|
994 beg += pre
|
|
995 } else if p.terminateBlockquote(data, beg, end) {
|
|
996 break
|
|
997 }
|
|
998 // this line is part of the blockquote
|
|
999 raw.Write(data[beg:end])
|
|
1000 beg = end
|
|
1001 }
|
|
1002 p.block(raw.Bytes())
|
|
1003 p.finalize(block)
|
|
1004 return end
|
|
1005 }
|
|
1006
|
|
1007 // returns prefix length for block code
|
|
1008 func (p *Markdown) codePrefix(data []byte) int {
|
|
1009 if len(data) >= 1 && data[0] == '\t' {
|
|
1010 return 1
|
|
1011 }
|
|
1012 if len(data) >= 4 && data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
|
|
1013 return 4
|
|
1014 }
|
|
1015 return 0
|
|
1016 }
|
|
1017
|
|
1018 func (p *Markdown) code(data []byte) int {
|
|
1019 var work bytes.Buffer
|
|
1020
|
|
1021 i := 0
|
|
1022 for i < len(data) {
|
|
1023 beg := i
|
|
1024 for i < len(data) && data[i] != '\n' {
|
|
1025 i++
|
|
1026 }
|
|
1027 if i < len(data) && data[i] == '\n' {
|
|
1028 i++
|
|
1029 }
|
|
1030
|
|
1031 blankline := p.isEmpty(data[beg:i]) > 0
|
|
1032 if pre := p.codePrefix(data[beg:i]); pre > 0 {
|
|
1033 beg += pre
|
|
1034 } else if !blankline {
|
|
1035 // non-empty, non-prefixed line breaks the pre
|
|
1036 i = beg
|
|
1037 break
|
|
1038 }
|
|
1039
|
|
1040 // verbatim copy to the working buffer
|
|
1041 if blankline {
|
|
1042 work.WriteByte('\n')
|
|
1043 } else {
|
|
1044 work.Write(data[beg:i])
|
|
1045 }
|
|
1046 }
|
|
1047
|
|
1048 // trim all the \n off the end of work
|
|
1049 workbytes := work.Bytes()
|
|
1050 eol := len(workbytes)
|
|
1051 for eol > 0 && workbytes[eol-1] == '\n' {
|
|
1052 eol--
|
|
1053 }
|
|
1054 if eol != len(workbytes) {
|
|
1055 work.Truncate(eol)
|
|
1056 }
|
|
1057
|
|
1058 work.WriteByte('\n')
|
|
1059
|
|
1060 block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
|
|
1061 block.IsFenced = false
|
|
1062 finalizeCodeBlock(block)
|
|
1063
|
|
1064 return i
|
|
1065 }
|
|
1066
|
|
1067 // returns unordered list item prefix
|
|
1068 func (p *Markdown) uliPrefix(data []byte) int {
|
|
1069 i := 0
|
|
1070 // start with up to 3 spaces
|
|
1071 for i < len(data) && i < 3 && data[i] == ' ' {
|
|
1072 i++
|
|
1073 }
|
|
1074 if i >= len(data)-1 {
|
|
1075 return 0
|
|
1076 }
|
|
1077 // need one of {'*', '+', '-'} followed by a space or a tab
|
|
1078 if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
|
|
1079 (data[i+1] != ' ' && data[i+1] != '\t') {
|
|
1080 return 0
|
|
1081 }
|
|
1082 return i + 2
|
|
1083 }
|
|
1084
|
|
1085 // returns ordered list item prefix
|
|
1086 func (p *Markdown) oliPrefix(data []byte) int {
|
|
1087 i := 0
|
|
1088
|
|
1089 // start with up to 3 spaces
|
|
1090 for i < 3 && i < len(data) && data[i] == ' ' {
|
|
1091 i++
|
|
1092 }
|
|
1093
|
|
1094 // count the digits
|
|
1095 start := i
|
|
1096 for i < len(data) && data[i] >= '0' && data[i] <= '9' {
|
|
1097 i++
|
|
1098 }
|
|
1099 if start == i || i >= len(data)-1 {
|
|
1100 return 0
|
|
1101 }
|
|
1102
|
|
1103 // we need >= 1 digits followed by a dot and a space or a tab
|
|
1104 if data[i] != '.' || !(data[i+1] == ' ' || data[i+1] == '\t') {
|
|
1105 return 0
|
|
1106 }
|
|
1107 return i + 2
|
|
1108 }
|
|
1109
|
|
1110 // returns definition list item prefix
|
|
1111 func (p *Markdown) dliPrefix(data []byte) int {
|
|
1112 if len(data) < 2 {
|
|
1113 return 0
|
|
1114 }
|
|
1115 i := 0
|
|
1116 // need a ':' followed by a space or a tab
|
|
1117 if data[i] != ':' || !(data[i+1] == ' ' || data[i+1] == '\t') {
|
|
1118 return 0
|
|
1119 }
|
|
1120 for i < len(data) && data[i] == ' ' {
|
|
1121 i++
|
|
1122 }
|
|
1123 return i + 2
|
|
1124 }
|
|
1125
|
|
1126 // parse ordered or unordered list block
|
|
1127 func (p *Markdown) list(data []byte, flags ListType) int {
|
|
1128 i := 0
|
|
1129 flags |= ListItemBeginningOfList
|
|
1130 block := p.addBlock(List, nil)
|
|
1131 block.ListFlags = flags
|
|
1132 block.Tight = true
|
|
1133
|
|
1134 for i < len(data) {
|
|
1135 skip := p.listItem(data[i:], &flags)
|
|
1136 if flags&ListItemContainsBlock != 0 {
|
|
1137 block.ListData.Tight = false
|
|
1138 }
|
|
1139 i += skip
|
|
1140 if skip == 0 || flags&ListItemEndOfList != 0 {
|
|
1141 break
|
|
1142 }
|
|
1143 flags &= ^ListItemBeginningOfList
|
|
1144 }
|
|
1145
|
|
1146 above := block.Parent
|
|
1147 finalizeList(block)
|
|
1148 p.tip = above
|
|
1149 return i
|
|
1150 }
|
|
1151
|
|
1152 // Returns true if the list item is not the same type as its parent list
|
|
1153 func (p *Markdown) listTypeChanged(data []byte, flags *ListType) bool {
|
|
1154 if p.dliPrefix(data) > 0 && *flags&ListTypeDefinition == 0 {
|
|
1155 return true
|
|
1156 } else if p.oliPrefix(data) > 0 && *flags&ListTypeOrdered == 0 {
|
|
1157 return true
|
|
1158 } else if p.uliPrefix(data) > 0 && (*flags&ListTypeOrdered != 0 || *flags&ListTypeDefinition != 0) {
|
|
1159 return true
|
|
1160 }
|
|
1161 return false
|
|
1162 }
|
|
1163
|
|
1164 // Returns true if block ends with a blank line, descending if needed
|
|
1165 // into lists and sublists.
|
|
1166 func endsWithBlankLine(block *Node) bool {
|
|
1167 // TODO: figure this out. Always false now.
|
|
1168 for block != nil {
|
|
1169 //if block.lastLineBlank {
|
|
1170 //return true
|
|
1171 //}
|
|
1172 t := block.Type
|
|
1173 if t == List || t == Item {
|
|
1174 block = block.LastChild
|
|
1175 } else {
|
|
1176 break
|
|
1177 }
|
|
1178 }
|
|
1179 return false
|
|
1180 }
|
|
1181
|
|
1182 func finalizeList(block *Node) {
|
|
1183 block.open = false
|
|
1184 item := block.FirstChild
|
|
1185 for item != nil {
|
|
1186 // check for non-final list item ending with blank line:
|
|
1187 if endsWithBlankLine(item) && item.Next != nil {
|
|
1188 block.ListData.Tight = false
|
|
1189 break
|
|
1190 }
|
|
1191 // recurse into children of list item, to see if there are spaces
|
|
1192 // between any of them:
|
|
1193 subItem := item.FirstChild
|
|
1194 for subItem != nil {
|
|
1195 if endsWithBlankLine(subItem) && (item.Next != nil || subItem.Next != nil) {
|
|
1196 block.ListData.Tight = false
|
|
1197 break
|
|
1198 }
|
|
1199 subItem = subItem.Next
|
|
1200 }
|
|
1201 item = item.Next
|
|
1202 }
|
|
1203 }
|
|
1204
|
|
1205 // Parse a single list item.
|
|
1206 // Assumes initial prefix is already removed if this is a sublist.
|
|
1207 func (p *Markdown) listItem(data []byte, flags *ListType) int {
|
|
1208 // keep track of the indentation of the first line
|
|
1209 itemIndent := 0
|
|
1210 if data[0] == '\t' {
|
|
1211 itemIndent += 4
|
|
1212 } else {
|
|
1213 for itemIndent < 3 && data[itemIndent] == ' ' {
|
|
1214 itemIndent++
|
|
1215 }
|
|
1216 }
|
|
1217
|
|
1218 var bulletChar byte = '*'
|
|
1219 i := p.uliPrefix(data)
|
|
1220 if i == 0 {
|
|
1221 i = p.oliPrefix(data)
|
|
1222 } else {
|
|
1223 bulletChar = data[i-2]
|
|
1224 }
|
|
1225 if i == 0 {
|
|
1226 i = p.dliPrefix(data)
|
|
1227 // reset definition term flag
|
|
1228 if i > 0 {
|
|
1229 *flags &= ^ListTypeTerm
|
|
1230 }
|
|
1231 }
|
|
1232 if i == 0 {
|
|
1233 // if in definition list, set term flag and continue
|
|
1234 if *flags&ListTypeDefinition != 0 {
|
|
1235 *flags |= ListTypeTerm
|
|
1236 } else {
|
|
1237 return 0
|
|
1238 }
|
|
1239 }
|
|
1240
|
|
1241 // skip leading whitespace on first line
|
|
1242 for i < len(data) && data[i] == ' ' {
|
|
1243 i++
|
|
1244 }
|
|
1245
|
|
1246 // find the end of the line
|
|
1247 line := i
|
|
1248 for i > 0 && i < len(data) && data[i-1] != '\n' {
|
|
1249 i++
|
|
1250 }
|
|
1251
|
|
1252 // get working buffer
|
|
1253 var raw bytes.Buffer
|
|
1254
|
|
1255 // put the first line into the working buffer
|
|
1256 raw.Write(data[line:i])
|
|
1257 line = i
|
|
1258
|
|
1259 // process the following lines
|
|
1260 containsBlankLine := false
|
|
1261 sublist := 0
|
|
1262 codeBlockMarker := ""
|
|
1263
|
|
1264 gatherlines:
|
|
1265 for line < len(data) {
|
|
1266 i++
|
|
1267
|
|
1268 // find the end of this line
|
|
1269 for i < len(data) && data[i-1] != '\n' {
|
|
1270 i++
|
|
1271 }
|
|
1272
|
|
1273 // if it is an empty line, guess that it is part of this item
|
|
1274 // and move on to the next line
|
|
1275 if p.isEmpty(data[line:i]) > 0 {
|
|
1276 containsBlankLine = true
|
|
1277 line = i
|
|
1278 continue
|
|
1279 }
|
|
1280
|
|
1281 // calculate the indentation
|
|
1282 indent := 0
|
|
1283 indentIndex := 0
|
|
1284 if data[line] == '\t' {
|
|
1285 indentIndex++
|
|
1286 indent += 4
|
|
1287 } else {
|
|
1288 for indent < 4 && line+indent < i && data[line+indent] == ' ' {
|
|
1289 indent++
|
|
1290 indentIndex++
|
|
1291 }
|
|
1292 }
|
|
1293
|
|
1294 chunk := data[line+indentIndex : i]
|
|
1295
|
|
1296 if p.extensions&FencedCode != 0 {
|
|
1297 // determine if in or out of codeblock
|
|
1298 // if in codeblock, ignore normal list processing
|
|
1299 _, marker := isFenceLine(chunk, nil, codeBlockMarker)
|
|
1300 if marker != "" {
|
|
1301 if codeBlockMarker == "" {
|
|
1302 // start of codeblock
|
|
1303 codeBlockMarker = marker
|
|
1304 } else {
|
|
1305 // end of codeblock.
|
|
1306 codeBlockMarker = ""
|
|
1307 }
|
|
1308 }
|
|
1309 // we are in a codeblock, write line, and continue
|
|
1310 if codeBlockMarker != "" || marker != "" {
|
|
1311 raw.Write(data[line+indentIndex : i])
|
|
1312 line = i
|
|
1313 continue gatherlines
|
|
1314 }
|
|
1315 }
|
|
1316
|
|
1317 // evaluate how this line fits in
|
|
1318 switch {
|
|
1319 // is this a nested list item?
|
|
1320 case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) ||
|
|
1321 p.oliPrefix(chunk) > 0 ||
|
|
1322 p.dliPrefix(chunk) > 0:
|
|
1323
|
|
1324 // to be a nested list, it must be indented more
|
|
1325 // if not, it is either a different kind of list
|
|
1326 // or the next item in the same list
|
|
1327 if indent <= itemIndent {
|
|
1328 if p.listTypeChanged(chunk, flags) {
|
|
1329 *flags |= ListItemEndOfList
|
|
1330 } else if containsBlankLine {
|
|
1331 *flags |= ListItemContainsBlock
|
|
1332 }
|
|
1333
|
|
1334 break gatherlines
|
|
1335 }
|
|
1336
|
|
1337 if containsBlankLine {
|
|
1338 *flags |= ListItemContainsBlock
|
|
1339 }
|
|
1340
|
|
1341 // is this the first item in the nested list?
|
|
1342 if sublist == 0 {
|
|
1343 sublist = raw.Len()
|
|
1344 }
|
|
1345
|
|
1346 // is this a nested prefix heading?
|
|
1347 case p.isPrefixHeading(chunk):
|
|
1348 // if the heading is not indented, it is not nested in the list
|
|
1349 // and thus ends the list
|
|
1350 if containsBlankLine && indent < 4 {
|
|
1351 *flags |= ListItemEndOfList
|
|
1352 break gatherlines
|
|
1353 }
|
|
1354 *flags |= ListItemContainsBlock
|
|
1355
|
|
1356 // anything following an empty line is only part
|
|
1357 // of this item if it is indented 4 spaces
|
|
1358 // (regardless of the indentation of the beginning of the item)
|
|
1359 case containsBlankLine && indent < 4:
|
|
1360 if *flags&ListTypeDefinition != 0 && i < len(data)-1 {
|
|
1361 // is the next item still a part of this list?
|
|
1362 next := i
|
|
1363 for next < len(data) && data[next] != '\n' {
|
|
1364 next++
|
|
1365 }
|
|
1366 for next < len(data)-1 && data[next] == '\n' {
|
|
1367 next++
|
|
1368 }
|
|
1369 if i < len(data)-1 && data[i] != ':' && data[next] != ':' {
|
|
1370 *flags |= ListItemEndOfList
|
|
1371 }
|
|
1372 } else {
|
|
1373 *flags |= ListItemEndOfList
|
|
1374 }
|
|
1375 break gatherlines
|
|
1376
|
|
1377 // a blank line means this should be parsed as a block
|
|
1378 case containsBlankLine:
|
|
1379 raw.WriteByte('\n')
|
|
1380 *flags |= ListItemContainsBlock
|
|
1381 }
|
|
1382
|
|
1383 // if this line was preceded by one or more blanks,
|
|
1384 // re-introduce the blank into the buffer
|
|
1385 if containsBlankLine {
|
|
1386 containsBlankLine = false
|
|
1387 raw.WriteByte('\n')
|
|
1388 }
|
|
1389
|
|
1390 // add the line into the working buffer without prefix
|
|
1391 raw.Write(data[line+indentIndex : i])
|
|
1392
|
|
1393 line = i
|
|
1394 }
|
|
1395
|
|
1396 rawBytes := raw.Bytes()
|
|
1397
|
|
1398 block := p.addBlock(Item, nil)
|
|
1399 block.ListFlags = *flags
|
|
1400 block.Tight = false
|
|
1401 block.BulletChar = bulletChar
|
|
1402 block.Delimiter = '.' // Only '.' is possible in Markdown, but ')' will also be possible in CommonMark
|
|
1403
|
|
1404 // render the contents of the list item
|
|
1405 if *flags&ListItemContainsBlock != 0 && *flags&ListTypeTerm == 0 {
|
|
1406 // intermediate render of block item, except for definition term
|
|
1407 if sublist > 0 {
|
|
1408 p.block(rawBytes[:sublist])
|
|
1409 p.block(rawBytes[sublist:])
|
|
1410 } else {
|
|
1411 p.block(rawBytes)
|
|
1412 }
|
|
1413 } else {
|
|
1414 // intermediate render of inline item
|
|
1415 if sublist > 0 {
|
|
1416 child := p.addChild(Paragraph, 0)
|
|
1417 child.content = rawBytes[:sublist]
|
|
1418 p.block(rawBytes[sublist:])
|
|
1419 } else {
|
|
1420 child := p.addChild(Paragraph, 0)
|
|
1421 child.content = rawBytes
|
|
1422 }
|
|
1423 }
|
|
1424 return line
|
|
1425 }
|
|
1426
|
|
1427 // render a single paragraph that has already been parsed out
|
|
1428 func (p *Markdown) renderParagraph(data []byte) {
|
|
1429 if len(data) == 0 {
|
|
1430 return
|
|
1431 }
|
|
1432
|
|
1433 // trim leading spaces
|
|
1434 beg := 0
|
|
1435 for data[beg] == ' ' {
|
|
1436 beg++
|
|
1437 }
|
|
1438
|
|
1439 end := len(data)
|
|
1440 // trim trailing newline
|
|
1441 if data[len(data)-1] == '\n' {
|
|
1442 end--
|
|
1443 }
|
|
1444
|
|
1445 // trim trailing spaces
|
|
1446 for end > beg && data[end-1] == ' ' {
|
|
1447 end--
|
|
1448 }
|
|
1449
|
|
1450 p.addBlock(Paragraph, data[beg:end])
|
|
1451 }
|
|
1452
|
|
1453 func (p *Markdown) paragraph(data []byte) int {
|
|
1454 // prev: index of 1st char of previous line
|
|
1455 // line: index of 1st char of current line
|
|
1456 // i: index of cursor/end of current line
|
|
1457 var prev, line, i int
|
|
1458 tabSize := TabSizeDefault
|
|
1459 if p.extensions&TabSizeEight != 0 {
|
|
1460 tabSize = TabSizeDouble
|
|
1461 }
|
|
1462 // keep going until we find something to mark the end of the paragraph
|
|
1463 for i < len(data) {
|
|
1464 // mark the beginning of the current line
|
|
1465 prev = line
|
|
1466 current := data[i:]
|
|
1467 line = i
|
|
1468
|
|
1469 // did we find a reference or a footnote? If so, end a paragraph
|
|
1470 // preceding it and report that we have consumed up to the end of that
|
|
1471 // reference:
|
|
1472 if refEnd := isReference(p, current, tabSize); refEnd > 0 {
|
|
1473 p.renderParagraph(data[:i])
|
|
1474 return i + refEnd
|
|
1475 }
|
|
1476
|
|
1477 // did we find a blank line marking the end of the paragraph?
|
|
1478 if n := p.isEmpty(current); n > 0 {
|
|
1479 // did this blank line followed by a definition list item?
|
|
1480 if p.extensions&DefinitionLists != 0 {
|
|
1481 if i < len(data)-1 && data[i+1] == ':' {
|
|
1482 return p.list(data[prev:], ListTypeDefinition)
|
|
1483 }
|
|
1484 }
|
|
1485
|
|
1486 p.renderParagraph(data[:i])
|
|
1487 return i + n
|
|
1488 }
|
|
1489
|
|
1490 // an underline under some text marks a heading, so our paragraph ended on prev line
|
|
1491 if i > 0 {
|
|
1492 if level := p.isUnderlinedHeading(current); level > 0 {
|
|
1493 // render the paragraph
|
|
1494 p.renderParagraph(data[:prev])
|
|
1495
|
|
1496 // ignore leading and trailing whitespace
|
|
1497 eol := i - 1
|
|
1498 for prev < eol && data[prev] == ' ' {
|
|
1499 prev++
|
|
1500 }
|
|
1501 for eol > prev && data[eol-1] == ' ' {
|
|
1502 eol--
|
|
1503 }
|
|
1504
|
|
1505 id := ""
|
|
1506 if p.extensions&AutoHeadingIDs != 0 {
|
|
1507 id = SanitizedAnchorName(string(data[prev:eol]))
|
|
1508 }
|
|
1509
|
|
1510 block := p.addBlock(Heading, data[prev:eol])
|
|
1511 block.Level = level
|
|
1512 block.HeadingID = id
|
|
1513
|
|
1514 // find the end of the underline
|
|
1515 for i < len(data) && data[i] != '\n' {
|
|
1516 i++
|
|
1517 }
|
|
1518 return i
|
|
1519 }
|
|
1520 }
|
|
1521
|
|
1522 // if the next line starts a block of HTML, then the paragraph ends here
|
|
1523 if p.extensions&LaxHTMLBlocks != 0 {
|
|
1524 if data[i] == '<' && p.html(current, false) > 0 {
|
|
1525 // rewind to before the HTML block
|
|
1526 p.renderParagraph(data[:i])
|
|
1527 return i
|
|
1528 }
|
|
1529 }
|
|
1530
|
|
1531 // if there's a prefixed heading or a horizontal rule after this, paragraph is over
|
|
1532 if p.isPrefixHeading(current) || p.isHRule(current) {
|
|
1533 p.renderParagraph(data[:i])
|
|
1534 return i
|
|
1535 }
|
|
1536
|
|
1537 // if there's a fenced code block, paragraph is over
|
|
1538 if p.extensions&FencedCode != 0 {
|
|
1539 if p.fencedCodeBlock(current, false) > 0 {
|
|
1540 p.renderParagraph(data[:i])
|
|
1541 return i
|
|
1542 }
|
|
1543 }
|
|
1544
|
|
1545 // if there's a definition list item, prev line is a definition term
|
|
1546 if p.extensions&DefinitionLists != 0 {
|
|
1547 if p.dliPrefix(current) != 0 {
|
|
1548 ret := p.list(data[prev:], ListTypeDefinition)
|
|
1549 return ret
|
|
1550 }
|
|
1551 }
|
|
1552
|
|
1553 // if there's a list after this, paragraph is over
|
|
1554 if p.extensions&NoEmptyLineBeforeBlock != 0 {
|
|
1555 if p.uliPrefix(current) != 0 ||
|
|
1556 p.oliPrefix(current) != 0 ||
|
|
1557 p.quotePrefix(current) != 0 ||
|
|
1558 p.codePrefix(current) != 0 {
|
|
1559 p.renderParagraph(data[:i])
|
|
1560 return i
|
|
1561 }
|
|
1562 }
|
|
1563
|
|
1564 // otherwise, scan to the beginning of the next line
|
|
1565 nl := bytes.IndexByte(data[i:], '\n')
|
|
1566 if nl >= 0 {
|
|
1567 i += nl + 1
|
|
1568 } else {
|
|
1569 i += len(data[i:])
|
|
1570 }
|
|
1571 }
|
|
1572
|
|
1573 p.renderParagraph(data[:i])
|
|
1574 return i
|
|
1575 }
|
|
1576
|
|
1577 func skipChar(data []byte, start int, char byte) int {
|
|
1578 i := start
|
|
1579 for i < len(data) && data[i] == char {
|
|
1580 i++
|
|
1581 }
|
|
1582 return i
|
|
1583 }
|
|
1584
|
|
1585 func skipUntilChar(text []byte, start int, char byte) int {
|
|
1586 i := start
|
|
1587 for i < len(text) && text[i] != char {
|
|
1588 i++
|
|
1589 }
|
|
1590 return i
|
|
1591 }
|
|
1592
|
|
1593 // SanitizedAnchorName returns a sanitized anchor name for the given text.
|
|
1594 //
|
|
1595 // It implements the algorithm specified in the package comment.
|
|
1596 func SanitizedAnchorName(text string) string {
|
|
1597 var anchorName []rune
|
|
1598 futureDash := false
|
|
1599 for _, r := range text {
|
|
1600 switch {
|
|
1601 case unicode.IsLetter(r) || unicode.IsNumber(r):
|
|
1602 if futureDash && len(anchorName) > 0 {
|
|
1603 anchorName = append(anchorName, '-')
|
|
1604 }
|
|
1605 futureDash = false
|
|
1606 anchorName = append(anchorName, unicode.ToLower(r))
|
|
1607 default:
|
|
1608 futureDash = true
|
|
1609 }
|
|
1610 }
|
|
1611 return string(anchorName)
|
|
1612 }
|