66
|
1 //
|
|
2 // Blackfriday Markdown Processor
|
|
3 // Available at http://github.com/russross/blackfriday
|
|
4 //
|
|
5 // Copyright © 2011 Russ Ross <russ@russross.com>.
|
|
6 // Distributed under the Simplified BSD License.
|
|
7 // See README.md for details.
|
|
8 //
|
|
9
|
|
10 //
|
|
11 // Functions to parse inline elements.
|
|
12 //
|
|
13
|
|
14 package blackfriday
|
|
15
|
|
16 import (
|
|
17 "bytes"
|
|
18 "regexp"
|
|
19 "strconv"
|
|
20 )
|
|
21
|
|
22 var (
|
|
23 urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
|
|
24 anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
|
|
25
|
|
26 // https://www.w3.org/TR/html5/syntax.html#character-references
|
|
27 // highest unicode code point in 17 planes (2^20): 1,114,112d =
|
|
28 // 7 dec digits or 6 hex digits
|
|
29 // named entity references can be 2-31 characters with stuff like <
|
|
30 // at one end and ∳ at the other. There
|
|
31 // are also sometimes numbers at the end, although this isn't inherent
|
|
32 // in the specification; there are never numbers anywhere else in
|
|
33 // current character references, though; see ¾ and ▒, etc.
|
|
34 // https://www.w3.org/TR/html5/syntax.html#named-character-references
|
|
35 //
|
|
36 // entity := "&" (named group | number ref) ";"
|
|
37 // named group := [a-zA-Z]{2,31}[0-9]{0,2}
|
|
38 // number ref := "#" (dec ref | hex ref)
|
|
39 // dec ref := [0-9]{1,7}
|
|
40 // hex ref := ("x" | "X") [0-9a-fA-F]{1,6}
|
|
41 htmlEntityRe = regexp.MustCompile(`&([a-zA-Z]{2,31}[0-9]{0,2}|#([0-9]{1,7}|[xX][0-9a-fA-F]{1,6}));`)
|
|
42 )
|
|
43
|
|
44 // Functions to parse text within a block
|
|
45 // Each function returns the number of chars taken care of
|
|
46 // data is the complete block being rendered
|
|
47 // offset is the number of valid chars before the current cursor
|
|
48
|
|
49 func (p *Markdown) inline(currBlock *Node, data []byte) {
|
|
50 // handlers might call us recursively: enforce a maximum depth
|
|
51 if p.nesting >= p.maxNesting || len(data) == 0 {
|
|
52 return
|
|
53 }
|
|
54 p.nesting++
|
|
55 beg, end := 0, 0
|
|
56 for end < len(data) {
|
|
57 handler := p.inlineCallback[data[end]]
|
|
58 if handler != nil {
|
|
59 if consumed, node := handler(p, data, end); consumed == 0 {
|
|
60 // No action from the callback.
|
|
61 end++
|
|
62 } else {
|
|
63 // Copy inactive chars into the output.
|
|
64 currBlock.AppendChild(text(data[beg:end]))
|
|
65 if node != nil {
|
|
66 currBlock.AppendChild(node)
|
|
67 }
|
|
68 // Skip past whatever the callback used.
|
|
69 beg = end + consumed
|
|
70 end = beg
|
|
71 }
|
|
72 } else {
|
|
73 end++
|
|
74 }
|
|
75 }
|
|
76 if beg < len(data) {
|
|
77 if data[end-1] == '\n' {
|
|
78 end--
|
|
79 }
|
|
80 currBlock.AppendChild(text(data[beg:end]))
|
|
81 }
|
|
82 p.nesting--
|
|
83 }
|
|
84
|
|
85 // single and double emphasis parsing
|
|
86 func emphasis(p *Markdown, data []byte, offset int) (int, *Node) {
|
|
87 data = data[offset:]
|
|
88 c := data[0]
|
|
89
|
|
90 if len(data) > 2 && data[1] != c {
|
|
91 // whitespace cannot follow an opening emphasis;
|
|
92 // strikethrough only takes two characters '~~'
|
|
93 if c == '~' || isspace(data[1]) {
|
|
94 return 0, nil
|
|
95 }
|
|
96 ret, node := helperEmphasis(p, data[1:], c)
|
|
97 if ret == 0 {
|
|
98 return 0, nil
|
|
99 }
|
|
100
|
|
101 return ret + 1, node
|
|
102 }
|
|
103
|
|
104 if len(data) > 3 && data[1] == c && data[2] != c {
|
|
105 if isspace(data[2]) {
|
|
106 return 0, nil
|
|
107 }
|
|
108 ret, node := helperDoubleEmphasis(p, data[2:], c)
|
|
109 if ret == 0 {
|
|
110 return 0, nil
|
|
111 }
|
|
112
|
|
113 return ret + 2, node
|
|
114 }
|
|
115
|
|
116 if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c {
|
|
117 if c == '~' || isspace(data[3]) {
|
|
118 return 0, nil
|
|
119 }
|
|
120 ret, node := helperTripleEmphasis(p, data, 3, c)
|
|
121 if ret == 0 {
|
|
122 return 0, nil
|
|
123 }
|
|
124
|
|
125 return ret + 3, node
|
|
126 }
|
|
127
|
|
128 return 0, nil
|
|
129 }
|
|
130
|
|
131 func codeSpan(p *Markdown, data []byte, offset int) (int, *Node) {
|
|
132 data = data[offset:]
|
|
133
|
|
134 nb := 0
|
|
135
|
|
136 // count the number of backticks in the delimiter
|
|
137 for nb < len(data) && data[nb] == '`' {
|
|
138 nb++
|
|
139 }
|
|
140
|
|
141 // find the next delimiter
|
|
142 i, end := 0, 0
|
|
143 for end = nb; end < len(data) && i < nb; end++ {
|
|
144 if data[end] == '`' {
|
|
145 i++
|
|
146 } else {
|
|
147 i = 0
|
|
148 }
|
|
149 }
|
|
150
|
|
151 // no matching delimiter?
|
|
152 if i < nb && end >= len(data) {
|
|
153 return 0, nil
|
|
154 }
|
|
155
|
|
156 // trim outside whitespace
|
|
157 fBegin := nb
|
|
158 for fBegin < end && data[fBegin] == ' ' {
|
|
159 fBegin++
|
|
160 }
|
|
161
|
|
162 fEnd := end - nb
|
|
163 for fEnd > fBegin && data[fEnd-1] == ' ' {
|
|
164 fEnd--
|
|
165 }
|
|
166
|
|
167 // render the code span
|
|
168 if fBegin != fEnd {
|
|
169 code := NewNode(Code)
|
|
170 code.Literal = data[fBegin:fEnd]
|
|
171 return end, code
|
|
172 }
|
|
173
|
|
174 return end, nil
|
|
175 }
|
|
176
|
|
177 // newline preceded by two spaces becomes <br>
|
|
178 func maybeLineBreak(p *Markdown, data []byte, offset int) (int, *Node) {
|
|
179 origOffset := offset
|
|
180 for offset < len(data) && data[offset] == ' ' {
|
|
181 offset++
|
|
182 }
|
|
183
|
|
184 if offset < len(data) && data[offset] == '\n' {
|
|
185 if offset-origOffset >= 2 {
|
|
186 return offset - origOffset + 1, NewNode(Hardbreak)
|
|
187 }
|
|
188 return offset - origOffset, nil
|
|
189 }
|
|
190 return 0, nil
|
|
191 }
|
|
192
|
|
193 // newline without two spaces works when HardLineBreak is enabled
|
|
194 func lineBreak(p *Markdown, data []byte, offset int) (int, *Node) {
|
|
195 if p.extensions&HardLineBreak != 0 {
|
|
196 return 1, NewNode(Hardbreak)
|
|
197 }
|
|
198 return 0, nil
|
|
199 }
|
|
200
|
|
201 type linkType int
|
|
202
|
|
203 const (
|
|
204 linkNormal linkType = iota
|
|
205 linkImg
|
|
206 linkDeferredFootnote
|
|
207 linkInlineFootnote
|
|
208 )
|
|
209
|
|
210 func isReferenceStyleLink(data []byte, pos int, t linkType) bool {
|
|
211 if t == linkDeferredFootnote {
|
|
212 return false
|
|
213 }
|
|
214 return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^'
|
|
215 }
|
|
216
|
|
217 func maybeImage(p *Markdown, data []byte, offset int) (int, *Node) {
|
|
218 if offset < len(data)-1 && data[offset+1] == '[' {
|
|
219 return link(p, data, offset)
|
|
220 }
|
|
221 return 0, nil
|
|
222 }
|
|
223
|
|
224 func maybeInlineFootnote(p *Markdown, data []byte, offset int) (int, *Node) {
|
|
225 if offset < len(data)-1 && data[offset+1] == '[' {
|
|
226 return link(p, data, offset)
|
|
227 }
|
|
228 return 0, nil
|
|
229 }
|
|
230
|
|
231 // '[': parse a link or an image or a footnote
|
|
232 func link(p *Markdown, data []byte, offset int) (int, *Node) {
|
|
233 // no links allowed inside regular links, footnote, and deferred footnotes
|
|
234 if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') {
|
|
235 return 0, nil
|
|
236 }
|
|
237
|
|
238 var t linkType
|
|
239 switch {
|
|
240 // special case: ![^text] == deferred footnote (that follows something with
|
|
241 // an exclamation point)
|
|
242 case p.extensions&Footnotes != 0 && len(data)-1 > offset && data[offset+1] == '^':
|
|
243 t = linkDeferredFootnote
|
|
244 // ![alt] == image
|
|
245 case offset >= 0 && data[offset] == '!':
|
|
246 t = linkImg
|
|
247 offset++
|
|
248 // ^[text] == inline footnote
|
|
249 // [^refId] == deferred footnote
|
|
250 case p.extensions&Footnotes != 0:
|
|
251 if offset >= 0 && data[offset] == '^' {
|
|
252 t = linkInlineFootnote
|
|
253 offset++
|
|
254 } else if len(data)-1 > offset && data[offset+1] == '^' {
|
|
255 t = linkDeferredFootnote
|
|
256 }
|
|
257 // [text] == regular link
|
|
258 default:
|
|
259 t = linkNormal
|
|
260 }
|
|
261
|
|
262 data = data[offset:]
|
|
263
|
|
264 var (
|
|
265 i = 1
|
|
266 noteID int
|
|
267 title, link, altContent []byte
|
|
268 textHasNl = false
|
|
269 )
|
|
270
|
|
271 if t == linkDeferredFootnote {
|
|
272 i++
|
|
273 }
|
|
274
|
|
275 // look for the matching closing bracket
|
|
276 for level := 1; level > 0 && i < len(data); i++ {
|
|
277 switch {
|
|
278 case data[i] == '\n':
|
|
279 textHasNl = true
|
|
280
|
|
281 case isBackslashEscaped(data, i):
|
|
282 continue
|
|
283
|
|
284 case data[i] == '[':
|
|
285 level++
|
|
286
|
|
287 case data[i] == ']':
|
|
288 level--
|
|
289 if level <= 0 {
|
|
290 i-- // compensate for extra i++ in for loop
|
|
291 }
|
|
292 }
|
|
293 }
|
|
294
|
|
295 if i >= len(data) {
|
|
296 return 0, nil
|
|
297 }
|
|
298
|
|
299 txtE := i
|
|
300 i++
|
|
301 var footnoteNode *Node
|
|
302
|
|
303 // skip any amount of whitespace or newline
|
|
304 // (this is much more lax than original markdown syntax)
|
|
305 for i < len(data) && isspace(data[i]) {
|
|
306 i++
|
|
307 }
|
|
308
|
|
309 // inline style link
|
|
310 switch {
|
|
311 case i < len(data) && data[i] == '(':
|
|
312 // skip initial whitespace
|
|
313 i++
|
|
314
|
|
315 for i < len(data) && isspace(data[i]) {
|
|
316 i++
|
|
317 }
|
|
318
|
|
319 linkB := i
|
|
320
|
|
321 // look for link end: ' " )
|
|
322 findlinkend:
|
|
323 for i < len(data) {
|
|
324 switch {
|
|
325 case data[i] == '\\':
|
|
326 i += 2
|
|
327
|
|
328 case data[i] == ')' || data[i] == '\'' || data[i] == '"':
|
|
329 break findlinkend
|
|
330
|
|
331 default:
|
|
332 i++
|
|
333 }
|
|
334 }
|
|
335
|
|
336 if i >= len(data) {
|
|
337 return 0, nil
|
|
338 }
|
|
339 linkE := i
|
|
340
|
|
341 // look for title end if present
|
|
342 titleB, titleE := 0, 0
|
|
343 if data[i] == '\'' || data[i] == '"' {
|
|
344 i++
|
|
345 titleB = i
|
|
346
|
|
347 findtitleend:
|
|
348 for i < len(data) {
|
|
349 switch {
|
|
350 case data[i] == '\\':
|
|
351 i += 2
|
|
352
|
|
353 case data[i] == ')':
|
|
354 break findtitleend
|
|
355
|
|
356 default:
|
|
357 i++
|
|
358 }
|
|
359 }
|
|
360
|
|
361 if i >= len(data) {
|
|
362 return 0, nil
|
|
363 }
|
|
364
|
|
365 // skip whitespace after title
|
|
366 titleE = i - 1
|
|
367 for titleE > titleB && isspace(data[titleE]) {
|
|
368 titleE--
|
|
369 }
|
|
370
|
|
371 // check for closing quote presence
|
|
372 if data[titleE] != '\'' && data[titleE] != '"' {
|
|
373 titleB, titleE = 0, 0
|
|
374 linkE = i
|
|
375 }
|
|
376 }
|
|
377
|
|
378 // remove whitespace at the end of the link
|
|
379 for linkE > linkB && isspace(data[linkE-1]) {
|
|
380 linkE--
|
|
381 }
|
|
382
|
|
383 // remove optional angle brackets around the link
|
|
384 if data[linkB] == '<' {
|
|
385 linkB++
|
|
386 }
|
|
387 if data[linkE-1] == '>' {
|
|
388 linkE--
|
|
389 }
|
|
390
|
|
391 // build escaped link and title
|
|
392 if linkE > linkB {
|
|
393 link = data[linkB:linkE]
|
|
394 }
|
|
395
|
|
396 if titleE > titleB {
|
|
397 title = data[titleB:titleE]
|
|
398 }
|
|
399
|
|
400 i++
|
|
401
|
|
402 // reference style link
|
|
403 case isReferenceStyleLink(data, i, t):
|
|
404 var id []byte
|
|
405 altContentConsidered := false
|
|
406
|
|
407 // look for the id
|
|
408 i++
|
|
409 linkB := i
|
|
410 for i < len(data) && data[i] != ']' {
|
|
411 i++
|
|
412 }
|
|
413 if i >= len(data) {
|
|
414 return 0, nil
|
|
415 }
|
|
416 linkE := i
|
|
417
|
|
418 // find the reference
|
|
419 if linkB == linkE {
|
|
420 if textHasNl {
|
|
421 var b bytes.Buffer
|
|
422
|
|
423 for j := 1; j < txtE; j++ {
|
|
424 switch {
|
|
425 case data[j] != '\n':
|
|
426 b.WriteByte(data[j])
|
|
427 case data[j-1] != ' ':
|
|
428 b.WriteByte(' ')
|
|
429 }
|
|
430 }
|
|
431
|
|
432 id = b.Bytes()
|
|
433 } else {
|
|
434 id = data[1:txtE]
|
|
435 altContentConsidered = true
|
|
436 }
|
|
437 } else {
|
|
438 id = data[linkB:linkE]
|
|
439 }
|
|
440
|
|
441 // find the reference with matching id
|
|
442 lr, ok := p.getRef(string(id))
|
|
443 if !ok {
|
|
444 return 0, nil
|
|
445 }
|
|
446
|
|
447 // keep link and title from reference
|
|
448 link = lr.link
|
|
449 title = lr.title
|
|
450 if altContentConsidered {
|
|
451 altContent = lr.text
|
|
452 }
|
|
453 i++
|
|
454
|
|
455 // shortcut reference style link or reference or inline footnote
|
|
456 default:
|
|
457 var id []byte
|
|
458
|
|
459 // craft the id
|
|
460 if textHasNl {
|
|
461 var b bytes.Buffer
|
|
462
|
|
463 for j := 1; j < txtE; j++ {
|
|
464 switch {
|
|
465 case data[j] != '\n':
|
|
466 b.WriteByte(data[j])
|
|
467 case data[j-1] != ' ':
|
|
468 b.WriteByte(' ')
|
|
469 }
|
|
470 }
|
|
471
|
|
472 id = b.Bytes()
|
|
473 } else {
|
|
474 if t == linkDeferredFootnote {
|
|
475 id = data[2:txtE] // get rid of the ^
|
|
476 } else {
|
|
477 id = data[1:txtE]
|
|
478 }
|
|
479 }
|
|
480
|
|
481 footnoteNode = NewNode(Item)
|
|
482 if t == linkInlineFootnote {
|
|
483 // create a new reference
|
|
484 noteID = len(p.notes) + 1
|
|
485
|
|
486 var fragment []byte
|
|
487 if len(id) > 0 {
|
|
488 if len(id) < 16 {
|
|
489 fragment = make([]byte, len(id))
|
|
490 } else {
|
|
491 fragment = make([]byte, 16)
|
|
492 }
|
|
493 copy(fragment, slugify(id))
|
|
494 } else {
|
|
495 fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteID))...)
|
|
496 }
|
|
497
|
|
498 ref := &reference{
|
|
499 noteID: noteID,
|
|
500 hasBlock: false,
|
|
501 link: fragment,
|
|
502 title: id,
|
|
503 footnote: footnoteNode,
|
|
504 }
|
|
505
|
|
506 p.notes = append(p.notes, ref)
|
|
507
|
|
508 link = ref.link
|
|
509 title = ref.title
|
|
510 } else {
|
|
511 // find the reference with matching id
|
|
512 lr, ok := p.getRef(string(id))
|
|
513 if !ok {
|
|
514 return 0, nil
|
|
515 }
|
|
516
|
|
517 if t == linkDeferredFootnote {
|
|
518 lr.noteID = len(p.notes) + 1
|
|
519 lr.footnote = footnoteNode
|
|
520 p.notes = append(p.notes, lr)
|
|
521 }
|
|
522
|
|
523 // keep link and title from reference
|
|
524 link = lr.link
|
|
525 // if inline footnote, title == footnote contents
|
|
526 title = lr.title
|
|
527 noteID = lr.noteID
|
|
528 }
|
|
529
|
|
530 // rewind the whitespace
|
|
531 i = txtE + 1
|
|
532 }
|
|
533
|
|
534 var uLink []byte
|
|
535 if t == linkNormal || t == linkImg {
|
|
536 if len(link) > 0 {
|
|
537 var uLinkBuf bytes.Buffer
|
|
538 unescapeText(&uLinkBuf, link)
|
|
539 uLink = uLinkBuf.Bytes()
|
|
540 }
|
|
541
|
|
542 // links need something to click on and somewhere to go
|
|
543 if len(uLink) == 0 || (t == linkNormal && txtE <= 1) {
|
|
544 return 0, nil
|
|
545 }
|
|
546 }
|
|
547
|
|
548 // call the relevant rendering function
|
|
549 var linkNode *Node
|
|
550 switch t {
|
|
551 case linkNormal:
|
|
552 linkNode = NewNode(Link)
|
|
553 linkNode.Destination = normalizeURI(uLink)
|
|
554 linkNode.Title = title
|
|
555 if len(altContent) > 0 {
|
|
556 linkNode.AppendChild(text(altContent))
|
|
557 } else {
|
|
558 // links cannot contain other links, so turn off link parsing
|
|
559 // temporarily and recurse
|
|
560 insideLink := p.insideLink
|
|
561 p.insideLink = true
|
|
562 p.inline(linkNode, data[1:txtE])
|
|
563 p.insideLink = insideLink
|
|
564 }
|
|
565
|
|
566 case linkImg:
|
|
567 linkNode = NewNode(Image)
|
|
568 linkNode.Destination = uLink
|
|
569 linkNode.Title = title
|
|
570 linkNode.AppendChild(text(data[1:txtE]))
|
|
571 i++
|
|
572
|
|
573 case linkInlineFootnote, linkDeferredFootnote:
|
|
574 linkNode = NewNode(Link)
|
|
575 linkNode.Destination = link
|
|
576 linkNode.Title = title
|
|
577 linkNode.NoteID = noteID
|
|
578 linkNode.Footnote = footnoteNode
|
|
579 if t == linkInlineFootnote {
|
|
580 i++
|
|
581 }
|
|
582
|
|
583 default:
|
|
584 return 0, nil
|
|
585 }
|
|
586
|
|
587 return i, linkNode
|
|
588 }
|
|
589
|
|
590 func (p *Markdown) inlineHTMLComment(data []byte) int {
|
|
591 if len(data) < 5 {
|
|
592 return 0
|
|
593 }
|
|
594 if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
|
|
595 return 0
|
|
596 }
|
|
597 i := 5
|
|
598 // scan for an end-of-comment marker, across lines if necessary
|
|
599 for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
|
|
600 i++
|
|
601 }
|
|
602 // no end-of-comment marker
|
|
603 if i >= len(data) {
|
|
604 return 0
|
|
605 }
|
|
606 return i + 1
|
|
607 }
|
|
608
|
|
609 func stripMailto(link []byte) []byte {
|
|
610 if bytes.HasPrefix(link, []byte("mailto://")) {
|
|
611 return link[9:]
|
|
612 } else if bytes.HasPrefix(link, []byte("mailto:")) {
|
|
613 return link[7:]
|
|
614 } else {
|
|
615 return link
|
|
616 }
|
|
617 }
|
|
618
|
|
619 // autolinkType specifies a kind of autolink that gets detected.
|
|
620 type autolinkType int
|
|
621
|
|
622 // These are the possible flag values for the autolink renderer.
|
|
623 const (
|
|
624 notAutolink autolinkType = iota
|
|
625 normalAutolink
|
|
626 emailAutolink
|
|
627 )
|
|
628
|
|
629 // '<' when tags or autolinks are allowed
|
|
630 func leftAngle(p *Markdown, data []byte, offset int) (int, *Node) {
|
|
631 data = data[offset:]
|
|
632 altype, end := tagLength(data)
|
|
633 if size := p.inlineHTMLComment(data); size > 0 {
|
|
634 end = size
|
|
635 }
|
|
636 if end > 2 {
|
|
637 if altype != notAutolink {
|
|
638 var uLink bytes.Buffer
|
|
639 unescapeText(&uLink, data[1:end+1-2])
|
|
640 if uLink.Len() > 0 {
|
|
641 link := uLink.Bytes()
|
|
642 node := NewNode(Link)
|
|
643 node.Destination = link
|
|
644 if altype == emailAutolink {
|
|
645 node.Destination = append([]byte("mailto:"), link...)
|
|
646 }
|
|
647 node.AppendChild(text(stripMailto(link)))
|
|
648 return end, node
|
|
649 }
|
|
650 } else {
|
|
651 htmlTag := NewNode(HTMLSpan)
|
|
652 htmlTag.Literal = data[:end]
|
|
653 return end, htmlTag
|
|
654 }
|
|
655 }
|
|
656
|
|
657 return end, nil
|
|
658 }
|
|
659
|
|
660 // '\\' backslash escape
|
|
661 var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~")
|
|
662
|
|
663 func escape(p *Markdown, data []byte, offset int) (int, *Node) {
|
|
664 data = data[offset:]
|
|
665
|
|
666 if len(data) > 1 {
|
|
667 if p.extensions&BackslashLineBreak != 0 && data[1] == '\n' {
|
|
668 return 2, NewNode(Hardbreak)
|
|
669 }
|
|
670 if bytes.IndexByte(escapeChars, data[1]) < 0 {
|
|
671 return 0, nil
|
|
672 }
|
|
673
|
|
674 return 2, text(data[1:2])
|
|
675 }
|
|
676
|
|
677 return 2, nil
|
|
678 }
|
|
679
|
|
680 func unescapeText(ob *bytes.Buffer, src []byte) {
|
|
681 i := 0
|
|
682 for i < len(src) {
|
|
683 org := i
|
|
684 for i < len(src) && src[i] != '\\' {
|
|
685 i++
|
|
686 }
|
|
687
|
|
688 if i > org {
|
|
689 ob.Write(src[org:i])
|
|
690 }
|
|
691
|
|
692 if i+1 >= len(src) {
|
|
693 break
|
|
694 }
|
|
695
|
|
696 ob.WriteByte(src[i+1])
|
|
697 i += 2
|
|
698 }
|
|
699 }
|
|
700
|
|
701 // '&' escaped when it doesn't belong to an entity
|
|
702 // valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
|
|
703 func entity(p *Markdown, data []byte, offset int) (int, *Node) {
|
|
704 data = data[offset:]
|
|
705
|
|
706 end := 1
|
|
707
|
|
708 if end < len(data) && data[end] == '#' {
|
|
709 end++
|
|
710 }
|
|
711
|
|
712 for end < len(data) && isalnum(data[end]) {
|
|
713 end++
|
|
714 }
|
|
715
|
|
716 if end < len(data) && data[end] == ';' {
|
|
717 end++ // real entity
|
|
718 } else {
|
|
719 return 0, nil // lone '&'
|
|
720 }
|
|
721
|
|
722 ent := data[:end]
|
|
723 // undo & escaping or it will be converted to &amp; by another
|
|
724 // escaper in the renderer
|
|
725 if bytes.Equal(ent, []byte("&")) {
|
|
726 ent = []byte{'&'}
|
|
727 }
|
|
728
|
|
729 return end, text(ent)
|
|
730 }
|
|
731
|
|
732 func linkEndsWithEntity(data []byte, linkEnd int) bool {
|
|
733 entityRanges := htmlEntityRe.FindAllIndex(data[:linkEnd], -1)
|
|
734 return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd
|
|
735 }
|
|
736
|
|
737 // hasPrefixCaseInsensitive is a custom implementation of
|
|
738 // strings.HasPrefix(strings.ToLower(s), prefix)
|
|
739 // we rolled our own because ToLower pulls in a huge machinery of lowercasing
|
|
740 // anything from Unicode and that's very slow. Since this func will only be
|
|
741 // used on ASCII protocol prefixes, we can take shortcuts.
|
|
742 func hasPrefixCaseInsensitive(s, prefix []byte) bool {
|
|
743 if len(s) < len(prefix) {
|
|
744 return false
|
|
745 }
|
|
746 delta := byte('a' - 'A')
|
|
747 for i, b := range prefix {
|
|
748 if b != s[i] && b != s[i]+delta {
|
|
749 return false
|
|
750 }
|
|
751 }
|
|
752 return true
|
|
753 }
|
|
754
|
|
755 var protocolPrefixes = [][]byte{
|
|
756 []byte("http://"),
|
|
757 []byte("https://"),
|
|
758 []byte("ftp://"),
|
|
759 []byte("file://"),
|
|
760 []byte("mailto:"),
|
|
761 }
|
|
762
|
|
763 const shortestPrefix = 6 // len("ftp://"), the shortest of the above
|
|
764
|
|
765 func maybeAutoLink(p *Markdown, data []byte, offset int) (int, *Node) {
|
|
766 // quick check to rule out most false hits
|
|
767 if p.insideLink || len(data) < offset+shortestPrefix {
|
|
768 return 0, nil
|
|
769 }
|
|
770 for _, prefix := range protocolPrefixes {
|
|
771 endOfHead := offset + 8 // 8 is the len() of the longest prefix
|
|
772 if endOfHead > len(data) {
|
|
773 endOfHead = len(data)
|
|
774 }
|
|
775 if hasPrefixCaseInsensitive(data[offset:endOfHead], prefix) {
|
|
776 return autoLink(p, data, offset)
|
|
777 }
|
|
778 }
|
|
779 return 0, nil
|
|
780 }
|
|
781
|
|
782 func autoLink(p *Markdown, data []byte, offset int) (int, *Node) {
|
|
783 // Now a more expensive check to see if we're not inside an anchor element
|
|
784 anchorStart := offset
|
|
785 offsetFromAnchor := 0
|
|
786 for anchorStart > 0 && data[anchorStart] != '<' {
|
|
787 anchorStart--
|
|
788 offsetFromAnchor++
|
|
789 }
|
|
790
|
|
791 anchorStr := anchorRe.Find(data[anchorStart:])
|
|
792 if anchorStr != nil {
|
|
793 anchorClose := NewNode(HTMLSpan)
|
|
794 anchorClose.Literal = anchorStr[offsetFromAnchor:]
|
|
795 return len(anchorStr) - offsetFromAnchor, anchorClose
|
|
796 }
|
|
797
|
|
798 // scan backward for a word boundary
|
|
799 rewind := 0
|
|
800 for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) {
|
|
801 rewind++
|
|
802 }
|
|
803 if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters
|
|
804 return 0, nil
|
|
805 }
|
|
806
|
|
807 origData := data
|
|
808 data = data[offset-rewind:]
|
|
809
|
|
810 if !isSafeLink(data) {
|
|
811 return 0, nil
|
|
812 }
|
|
813
|
|
814 linkEnd := 0
|
|
815 for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) {
|
|
816 linkEnd++
|
|
817 }
|
|
818
|
|
819 // Skip punctuation at the end of the link
|
|
820 if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' {
|
|
821 linkEnd--
|
|
822 }
|
|
823
|
|
824 // But don't skip semicolon if it's a part of escaped entity:
|
|
825 if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) {
|
|
826 linkEnd--
|
|
827 }
|
|
828
|
|
829 // See if the link finishes with a punctuation sign that can be closed.
|
|
830 var copen byte
|
|
831 switch data[linkEnd-1] {
|
|
832 case '"':
|
|
833 copen = '"'
|
|
834 case '\'':
|
|
835 copen = '\''
|
|
836 case ')':
|
|
837 copen = '('
|
|
838 case ']':
|
|
839 copen = '['
|
|
840 case '}':
|
|
841 copen = '{'
|
|
842 default:
|
|
843 copen = 0
|
|
844 }
|
|
845
|
|
846 if copen != 0 {
|
|
847 bufEnd := offset - rewind + linkEnd - 2
|
|
848
|
|
849 openDelim := 1
|
|
850
|
|
851 /* Try to close the final punctuation sign in this same line;
|
|
852 * if we managed to close it outside of the URL, that means that it's
|
|
853 * not part of the URL. If it closes inside the URL, that means it
|
|
854 * is part of the URL.
|
|
855 *
|
|
856 * Examples:
|
|
857 *
|
|
858 * foo http://www.pokemon.com/Pikachu_(Electric) bar
|
|
859 * => http://www.pokemon.com/Pikachu_(Electric)
|
|
860 *
|
|
861 * foo (http://www.pokemon.com/Pikachu_(Electric)) bar
|
|
862 * => http://www.pokemon.com/Pikachu_(Electric)
|
|
863 *
|
|
864 * foo http://www.pokemon.com/Pikachu_(Electric)) bar
|
|
865 * => http://www.pokemon.com/Pikachu_(Electric))
|
|
866 *
|
|
867 * (foo http://www.pokemon.com/Pikachu_(Electric)) bar
|
|
868 * => foo http://www.pokemon.com/Pikachu_(Electric)
|
|
869 */
|
|
870
|
|
871 for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 {
|
|
872 if origData[bufEnd] == data[linkEnd-1] {
|
|
873 openDelim++
|
|
874 }
|
|
875
|
|
876 if origData[bufEnd] == copen {
|
|
877 openDelim--
|
|
878 }
|
|
879
|
|
880 bufEnd--
|
|
881 }
|
|
882
|
|
883 if openDelim == 0 {
|
|
884 linkEnd--
|
|
885 }
|
|
886 }
|
|
887
|
|
888 var uLink bytes.Buffer
|
|
889 unescapeText(&uLink, data[:linkEnd])
|
|
890
|
|
891 if uLink.Len() > 0 {
|
|
892 node := NewNode(Link)
|
|
893 node.Destination = uLink.Bytes()
|
|
894 node.AppendChild(text(uLink.Bytes()))
|
|
895 return linkEnd, node
|
|
896 }
|
|
897
|
|
898 return linkEnd, nil
|
|
899 }
|
|
900
|
|
901 func isEndOfLink(char byte) bool {
|
|
902 return isspace(char) || char == '<'
|
|
903 }
|
|
904
|
|
905 var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
|
|
906 var validPaths = [][]byte{[]byte("/"), []byte("./"), []byte("../")}
|
|
907
|
|
908 func isSafeLink(link []byte) bool {
|
|
909 for _, path := range validPaths {
|
|
910 if len(link) >= len(path) && bytes.Equal(link[:len(path)], path) {
|
|
911 if len(link) == len(path) {
|
|
912 return true
|
|
913 } else if isalnum(link[len(path)]) {
|
|
914 return true
|
|
915 }
|
|
916 }
|
|
917 }
|
|
918
|
|
919 for _, prefix := range validUris {
|
|
920 // TODO: handle unicode here
|
|
921 // case-insensitive prefix test
|
|
922 if len(link) > len(prefix) && bytes.Equal(bytes.ToLower(link[:len(prefix)]), prefix) && isalnum(link[len(prefix)]) {
|
|
923 return true
|
|
924 }
|
|
925 }
|
|
926
|
|
927 return false
|
|
928 }
|
|
929
|
|
930 // return the length of the given tag, or 0 is it's not valid
|
|
931 func tagLength(data []byte) (autolink autolinkType, end int) {
|
|
932 var i, j int
|
|
933
|
|
934 // a valid tag can't be shorter than 3 chars
|
|
935 if len(data) < 3 {
|
|
936 return notAutolink, 0
|
|
937 }
|
|
938
|
|
939 // begins with a '<' optionally followed by '/', followed by letter or number
|
|
940 if data[0] != '<' {
|
|
941 return notAutolink, 0
|
|
942 }
|
|
943 if data[1] == '/' {
|
|
944 i = 2
|
|
945 } else {
|
|
946 i = 1
|
|
947 }
|
|
948
|
|
949 if !isalnum(data[i]) {
|
|
950 return notAutolink, 0
|
|
951 }
|
|
952
|
|
953 // scheme test
|
|
954 autolink = notAutolink
|
|
955
|
|
956 // try to find the beginning of an URI
|
|
957 for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
|
|
958 i++
|
|
959 }
|
|
960
|
|
961 if i > 1 && i < len(data) && data[i] == '@' {
|
|
962 if j = isMailtoAutoLink(data[i:]); j != 0 {
|
|
963 return emailAutolink, i + j
|
|
964 }
|
|
965 }
|
|
966
|
|
967 if i > 2 && i < len(data) && data[i] == ':' {
|
|
968 autolink = normalAutolink
|
|
969 i++
|
|
970 }
|
|
971
|
|
972 // complete autolink test: no whitespace or ' or "
|
|
973 switch {
|
|
974 case i >= len(data):
|
|
975 autolink = notAutolink
|
|
976 case autolink != notAutolink:
|
|
977 j = i
|
|
978
|
|
979 for i < len(data) {
|
|
980 if data[i] == '\\' {
|
|
981 i += 2
|
|
982 } else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) {
|
|
983 break
|
|
984 } else {
|
|
985 i++
|
|
986 }
|
|
987
|
|
988 }
|
|
989
|
|
990 if i >= len(data) {
|
|
991 return autolink, 0
|
|
992 }
|
|
993 if i > j && data[i] == '>' {
|
|
994 return autolink, i + 1
|
|
995 }
|
|
996
|
|
997 // one of the forbidden chars has been found
|
|
998 autolink = notAutolink
|
|
999 }
|
|
1000 i += bytes.IndexByte(data[i:], '>')
|
|
1001 if i < 0 {
|
|
1002 return autolink, 0
|
|
1003 }
|
|
1004 return autolink, i + 1
|
|
1005 }
|
|
1006
|
|
1007 // look for the address part of a mail autolink and '>'
|
|
1008 // this is less strict than the original markdown e-mail address matching
|
|
1009 func isMailtoAutoLink(data []byte) int {
|
|
1010 nb := 0
|
|
1011
|
|
1012 // address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
|
|
1013 for i := 0; i < len(data); i++ {
|
|
1014 if isalnum(data[i]) {
|
|
1015 continue
|
|
1016 }
|
|
1017
|
|
1018 switch data[i] {
|
|
1019 case '@':
|
|
1020 nb++
|
|
1021
|
|
1022 case '-', '.', '_':
|
|
1023 break
|
|
1024
|
|
1025 case '>':
|
|
1026 if nb == 1 {
|
|
1027 return i + 1
|
|
1028 }
|
|
1029 return 0
|
|
1030 default:
|
|
1031 return 0
|
|
1032 }
|
|
1033 }
|
|
1034
|
|
1035 return 0
|
|
1036 }
|
|
1037
|
|
1038 // look for the next emph char, skipping other constructs
|
|
1039 func helperFindEmphChar(data []byte, c byte) int {
|
|
1040 i := 0
|
|
1041
|
|
1042 for i < len(data) {
|
|
1043 for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
|
|
1044 i++
|
|
1045 }
|
|
1046 if i >= len(data) {
|
|
1047 return 0
|
|
1048 }
|
|
1049 // do not count escaped chars
|
|
1050 if i != 0 && data[i-1] == '\\' {
|
|
1051 i++
|
|
1052 continue
|
|
1053 }
|
|
1054 if data[i] == c {
|
|
1055 return i
|
|
1056 }
|
|
1057
|
|
1058 if data[i] == '`' {
|
|
1059 // skip a code span
|
|
1060 tmpI := 0
|
|
1061 i++
|
|
1062 for i < len(data) && data[i] != '`' {
|
|
1063 if tmpI == 0 && data[i] == c {
|
|
1064 tmpI = i
|
|
1065 }
|
|
1066 i++
|
|
1067 }
|
|
1068 if i >= len(data) {
|
|
1069 return tmpI
|
|
1070 }
|
|
1071 i++
|
|
1072 } else if data[i] == '[' {
|
|
1073 // skip a link
|
|
1074 tmpI := 0
|
|
1075 i++
|
|
1076 for i < len(data) && data[i] != ']' {
|
|
1077 if tmpI == 0 && data[i] == c {
|
|
1078 tmpI = i
|
|
1079 }
|
|
1080 i++
|
|
1081 }
|
|
1082 i++
|
|
1083 for i < len(data) && (data[i] == ' ' || data[i] == '\n') {
|
|
1084 i++
|
|
1085 }
|
|
1086 if i >= len(data) {
|
|
1087 return tmpI
|
|
1088 }
|
|
1089 if data[i] != '[' && data[i] != '(' { // not a link
|
|
1090 if tmpI > 0 {
|
|
1091 return tmpI
|
|
1092 }
|
|
1093 continue
|
|
1094 }
|
|
1095 cc := data[i]
|
|
1096 i++
|
|
1097 for i < len(data) && data[i] != cc {
|
|
1098 if tmpI == 0 && data[i] == c {
|
|
1099 return i
|
|
1100 }
|
|
1101 i++
|
|
1102 }
|
|
1103 if i >= len(data) {
|
|
1104 return tmpI
|
|
1105 }
|
|
1106 i++
|
|
1107 }
|
|
1108 }
|
|
1109 return 0
|
|
1110 }
|
|
1111
|
|
1112 func helperEmphasis(p *Markdown, data []byte, c byte) (int, *Node) {
|
|
1113 i := 0
|
|
1114
|
|
1115 // skip one symbol if coming from emph3
|
|
1116 if len(data) > 1 && data[0] == c && data[1] == c {
|
|
1117 i = 1
|
|
1118 }
|
|
1119
|
|
1120 for i < len(data) {
|
|
1121 length := helperFindEmphChar(data[i:], c)
|
|
1122 if length == 0 {
|
|
1123 return 0, nil
|
|
1124 }
|
|
1125 i += length
|
|
1126 if i >= len(data) {
|
|
1127 return 0, nil
|
|
1128 }
|
|
1129
|
|
1130 if i+1 < len(data) && data[i+1] == c {
|
|
1131 i++
|
|
1132 continue
|
|
1133 }
|
|
1134
|
|
1135 if data[i] == c && !isspace(data[i-1]) {
|
|
1136
|
|
1137 if p.extensions&NoIntraEmphasis != 0 {
|
|
1138 if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) {
|
|
1139 continue
|
|
1140 }
|
|
1141 }
|
|
1142
|
|
1143 emph := NewNode(Emph)
|
|
1144 p.inline(emph, data[:i])
|
|
1145 return i + 1, emph
|
|
1146 }
|
|
1147 }
|
|
1148
|
|
1149 return 0, nil
|
|
1150 }
|
|
1151
|
|
1152 func helperDoubleEmphasis(p *Markdown, data []byte, c byte) (int, *Node) {
|
|
1153 i := 0
|
|
1154
|
|
1155 for i < len(data) {
|
|
1156 length := helperFindEmphChar(data[i:], c)
|
|
1157 if length == 0 {
|
|
1158 return 0, nil
|
|
1159 }
|
|
1160 i += length
|
|
1161
|
|
1162 if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) {
|
|
1163 nodeType := Strong
|
|
1164 if c == '~' {
|
|
1165 nodeType = Del
|
|
1166 }
|
|
1167 node := NewNode(nodeType)
|
|
1168 p.inline(node, data[:i])
|
|
1169 return i + 2, node
|
|
1170 }
|
|
1171 i++
|
|
1172 }
|
|
1173 return 0, nil
|
|
1174 }
|
|
1175
|
|
1176 func helperTripleEmphasis(p *Markdown, data []byte, offset int, c byte) (int, *Node) {
|
|
1177 i := 0
|
|
1178 origData := data
|
|
1179 data = data[offset:]
|
|
1180
|
|
1181 for i < len(data) {
|
|
1182 length := helperFindEmphChar(data[i:], c)
|
|
1183 if length == 0 {
|
|
1184 return 0, nil
|
|
1185 }
|
|
1186 i += length
|
|
1187
|
|
1188 // skip whitespace preceded symbols
|
|
1189 if data[i] != c || isspace(data[i-1]) {
|
|
1190 continue
|
|
1191 }
|
|
1192
|
|
1193 switch {
|
|
1194 case i+2 < len(data) && data[i+1] == c && data[i+2] == c:
|
|
1195 // triple symbol found
|
|
1196 strong := NewNode(Strong)
|
|
1197 em := NewNode(Emph)
|
|
1198 strong.AppendChild(em)
|
|
1199 p.inline(em, data[:i])
|
|
1200 return i + 3, strong
|
|
1201 case (i+1 < len(data) && data[i+1] == c):
|
|
1202 // double symbol found, hand over to emph1
|
|
1203 length, node := helperEmphasis(p, origData[offset-2:], c)
|
|
1204 if length == 0 {
|
|
1205 return 0, nil
|
|
1206 }
|
|
1207 return length - 2, node
|
|
1208 default:
|
|
1209 // single symbol found, hand over to emph2
|
|
1210 length, node := helperDoubleEmphasis(p, origData[offset-1:], c)
|
|
1211 if length == 0 {
|
|
1212 return 0, nil
|
|
1213 }
|
|
1214 return length - 1, node
|
|
1215 }
|
|
1216 }
|
|
1217 return 0, nil
|
|
1218 }
|
|
1219
|
|
1220 func text(s []byte) *Node {
|
|
1221 node := NewNode(Text)
|
|
1222 node.Literal = s
|
|
1223 return node
|
|
1224 }
|
|
1225
|
|
1226 func normalizeURI(s []byte) []byte {
|
|
1227 return s // TODO: implement
|
|
1228 }
|