Mercurial > yakumo_izuru > aya
comparison vendor/github.com/russross/blackfriday/v2/inline.go @ 66:787b5ee0289d draft
Use vendored modules
Signed-off-by: Izuru Yakumo <yakumo.izuru@chaotic.ninja>
| author | yakumo.izuru |
|---|---|
| date | Sun, 23 Jul 2023 13:18:53 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 65:6d985efa0f7a | 66:787b5ee0289d |
|---|---|
| 1 // | |
| 2 // Blackfriday Markdown Processor | |
| 3 // Available at http://github.com/russross/blackfriday | |
| 4 // | |
| 5 // Copyright © 2011 Russ Ross <russ@russross.com>. | |
| 6 // Distributed under the Simplified BSD License. | |
| 7 // See README.md for details. | |
| 8 // | |
| 9 | |
| 10 // | |
| 11 // Functions to parse inline elements. | |
| 12 // | |
| 13 | |
| 14 package blackfriday | |
| 15 | |
| 16 import ( | |
| 17 "bytes" | |
| 18 "regexp" | |
| 19 "strconv" | |
| 20 ) | |
| 21 | |
| 22 var ( | |
| 23 urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+` | |
| 24 anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`) | |
| 25 | |
| 26 // https://www.w3.org/TR/html5/syntax.html#character-references | |
| 27 // highest unicode code point in 17 planes (2^20): 1,114,112d = | |
| 28 // 7 dec digits or 6 hex digits | |
| 29 // named entity references can be 2-31 characters with stuff like < | |
| 30 // at one end and ∳ at the other. There | |
| 31 // are also sometimes numbers at the end, although this isn't inherent | |
| 32 // in the specification; there are never numbers anywhere else in | |
| 33 // current character references, though; see ¾ and ▒, etc. | |
| 34 // https://www.w3.org/TR/html5/syntax.html#named-character-references | |
| 35 // | |
| 36 // entity := "&" (named group | number ref) ";" | |
| 37 // named group := [a-zA-Z]{2,31}[0-9]{0,2} | |
| 38 // number ref := "#" (dec ref | hex ref) | |
| 39 // dec ref := [0-9]{1,7} | |
| 40 // hex ref := ("x" | "X") [0-9a-fA-F]{1,6} | |
| 41 htmlEntityRe = regexp.MustCompile(`&([a-zA-Z]{2,31}[0-9]{0,2}|#([0-9]{1,7}|[xX][0-9a-fA-F]{1,6}));`) | |
| 42 ) | |
| 43 | |
| 44 // Functions to parse text within a block | |
| 45 // Each function returns the number of chars taken care of | |
| 46 // data is the complete block being rendered | |
| 47 // offset is the number of valid chars before the current cursor | |
| 48 | |
| 49 func (p *Markdown) inline(currBlock *Node, data []byte) { | |
| 50 // handlers might call us recursively: enforce a maximum depth | |
| 51 if p.nesting >= p.maxNesting || len(data) == 0 { | |
| 52 return | |
| 53 } | |
| 54 p.nesting++ | |
| 55 beg, end := 0, 0 | |
| 56 for end < len(data) { | |
| 57 handler := p.inlineCallback[data[end]] | |
| 58 if handler != nil { | |
| 59 if consumed, node := handler(p, data, end); consumed == 0 { | |
| 60 // No action from the callback. | |
| 61 end++ | |
| 62 } else { | |
| 63 // Copy inactive chars into the output. | |
| 64 currBlock.AppendChild(text(data[beg:end])) | |
| 65 if node != nil { | |
| 66 currBlock.AppendChild(node) | |
| 67 } | |
| 68 // Skip past whatever the callback used. | |
| 69 beg = end + consumed | |
| 70 end = beg | |
| 71 } | |
| 72 } else { | |
| 73 end++ | |
| 74 } | |
| 75 } | |
| 76 if beg < len(data) { | |
| 77 if data[end-1] == '\n' { | |
| 78 end-- | |
| 79 } | |
| 80 currBlock.AppendChild(text(data[beg:end])) | |
| 81 } | |
| 82 p.nesting-- | |
| 83 } | |
| 84 | |
| 85 // single and double emphasis parsing | |
| 86 func emphasis(p *Markdown, data []byte, offset int) (int, *Node) { | |
| 87 data = data[offset:] | |
| 88 c := data[0] | |
| 89 | |
| 90 if len(data) > 2 && data[1] != c { | |
| 91 // whitespace cannot follow an opening emphasis; | |
| 92 // strikethrough only takes two characters '~~' | |
| 93 if c == '~' || isspace(data[1]) { | |
| 94 return 0, nil | |
| 95 } | |
| 96 ret, node := helperEmphasis(p, data[1:], c) | |
| 97 if ret == 0 { | |
| 98 return 0, nil | |
| 99 } | |
| 100 | |
| 101 return ret + 1, node | |
| 102 } | |
| 103 | |
| 104 if len(data) > 3 && data[1] == c && data[2] != c { | |
| 105 if isspace(data[2]) { | |
| 106 return 0, nil | |
| 107 } | |
| 108 ret, node := helperDoubleEmphasis(p, data[2:], c) | |
| 109 if ret == 0 { | |
| 110 return 0, nil | |
| 111 } | |
| 112 | |
| 113 return ret + 2, node | |
| 114 } | |
| 115 | |
| 116 if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c { | |
| 117 if c == '~' || isspace(data[3]) { | |
| 118 return 0, nil | |
| 119 } | |
| 120 ret, node := helperTripleEmphasis(p, data, 3, c) | |
| 121 if ret == 0 { | |
| 122 return 0, nil | |
| 123 } | |
| 124 | |
| 125 return ret + 3, node | |
| 126 } | |
| 127 | |
| 128 return 0, nil | |
| 129 } | |
| 130 | |
| 131 func codeSpan(p *Markdown, data []byte, offset int) (int, *Node) { | |
| 132 data = data[offset:] | |
| 133 | |
| 134 nb := 0 | |
| 135 | |
| 136 // count the number of backticks in the delimiter | |
| 137 for nb < len(data) && data[nb] == '`' { | |
| 138 nb++ | |
| 139 } | |
| 140 | |
| 141 // find the next delimiter | |
| 142 i, end := 0, 0 | |
| 143 for end = nb; end < len(data) && i < nb; end++ { | |
| 144 if data[end] == '`' { | |
| 145 i++ | |
| 146 } else { | |
| 147 i = 0 | |
| 148 } | |
| 149 } | |
| 150 | |
| 151 // no matching delimiter? | |
| 152 if i < nb && end >= len(data) { | |
| 153 return 0, nil | |
| 154 } | |
| 155 | |
| 156 // trim outside whitespace | |
| 157 fBegin := nb | |
| 158 for fBegin < end && data[fBegin] == ' ' { | |
| 159 fBegin++ | |
| 160 } | |
| 161 | |
| 162 fEnd := end - nb | |
| 163 for fEnd > fBegin && data[fEnd-1] == ' ' { | |
| 164 fEnd-- | |
| 165 } | |
| 166 | |
| 167 // render the code span | |
| 168 if fBegin != fEnd { | |
| 169 code := NewNode(Code) | |
| 170 code.Literal = data[fBegin:fEnd] | |
| 171 return end, code | |
| 172 } | |
| 173 | |
| 174 return end, nil | |
| 175 } | |
| 176 | |
| 177 // newline preceded by two spaces becomes <br> | |
| 178 func maybeLineBreak(p *Markdown, data []byte, offset int) (int, *Node) { | |
| 179 origOffset := offset | |
| 180 for offset < len(data) && data[offset] == ' ' { | |
| 181 offset++ | |
| 182 } | |
| 183 | |
| 184 if offset < len(data) && data[offset] == '\n' { | |
| 185 if offset-origOffset >= 2 { | |
| 186 return offset - origOffset + 1, NewNode(Hardbreak) | |
| 187 } | |
| 188 return offset - origOffset, nil | |
| 189 } | |
| 190 return 0, nil | |
| 191 } | |
| 192 | |
| 193 // newline without two spaces works when HardLineBreak is enabled | |
| 194 func lineBreak(p *Markdown, data []byte, offset int) (int, *Node) { | |
| 195 if p.extensions&HardLineBreak != 0 { | |
| 196 return 1, NewNode(Hardbreak) | |
| 197 } | |
| 198 return 0, nil | |
| 199 } | |
| 200 | |
| 201 type linkType int | |
| 202 | |
| 203 const ( | |
| 204 linkNormal linkType = iota | |
| 205 linkImg | |
| 206 linkDeferredFootnote | |
| 207 linkInlineFootnote | |
| 208 ) | |
| 209 | |
| 210 func isReferenceStyleLink(data []byte, pos int, t linkType) bool { | |
| 211 if t == linkDeferredFootnote { | |
| 212 return false | |
| 213 } | |
| 214 return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^' | |
| 215 } | |
| 216 | |
| 217 func maybeImage(p *Markdown, data []byte, offset int) (int, *Node) { | |
| 218 if offset < len(data)-1 && data[offset+1] == '[' { | |
| 219 return link(p, data, offset) | |
| 220 } | |
| 221 return 0, nil | |
| 222 } | |
| 223 | |
| 224 func maybeInlineFootnote(p *Markdown, data []byte, offset int) (int, *Node) { | |
| 225 if offset < len(data)-1 && data[offset+1] == '[' { | |
| 226 return link(p, data, offset) | |
| 227 } | |
| 228 return 0, nil | |
| 229 } | |
| 230 | |
| 231 // '[': parse a link or an image or a footnote | |
| 232 func link(p *Markdown, data []byte, offset int) (int, *Node) { | |
| 233 // no links allowed inside regular links, footnote, and deferred footnotes | |
| 234 if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') { | |
| 235 return 0, nil | |
| 236 } | |
| 237 | |
| 238 var t linkType | |
| 239 switch { | |
| 240 // special case: ![^text] == deferred footnote (that follows something with | |
| 241 // an exclamation point) | |
| 242 case p.extensions&Footnotes != 0 && len(data)-1 > offset && data[offset+1] == '^': | |
| 243 t = linkDeferredFootnote | |
| 244 // ![alt] == image | |
| 245 case offset >= 0 && data[offset] == '!': | |
| 246 t = linkImg | |
| 247 offset++ | |
| 248 // ^[text] == inline footnote | |
| 249 // [^refId] == deferred footnote | |
| 250 case p.extensions&Footnotes != 0: | |
| 251 if offset >= 0 && data[offset] == '^' { | |
| 252 t = linkInlineFootnote | |
| 253 offset++ | |
| 254 } else if len(data)-1 > offset && data[offset+1] == '^' { | |
| 255 t = linkDeferredFootnote | |
| 256 } | |
| 257 // [text] == regular link | |
| 258 default: | |
| 259 t = linkNormal | |
| 260 } | |
| 261 | |
| 262 data = data[offset:] | |
| 263 | |
| 264 var ( | |
| 265 i = 1 | |
| 266 noteID int | |
| 267 title, link, altContent []byte | |
| 268 textHasNl = false | |
| 269 ) | |
| 270 | |
| 271 if t == linkDeferredFootnote { | |
| 272 i++ | |
| 273 } | |
| 274 | |
| 275 // look for the matching closing bracket | |
| 276 for level := 1; level > 0 && i < len(data); i++ { | |
| 277 switch { | |
| 278 case data[i] == '\n': | |
| 279 textHasNl = true | |
| 280 | |
| 281 case isBackslashEscaped(data, i): | |
| 282 continue | |
| 283 | |
| 284 case data[i] == '[': | |
| 285 level++ | |
| 286 | |
| 287 case data[i] == ']': | |
| 288 level-- | |
| 289 if level <= 0 { | |
| 290 i-- // compensate for extra i++ in for loop | |
| 291 } | |
| 292 } | |
| 293 } | |
| 294 | |
| 295 if i >= len(data) { | |
| 296 return 0, nil | |
| 297 } | |
| 298 | |
| 299 txtE := i | |
| 300 i++ | |
| 301 var footnoteNode *Node | |
| 302 | |
| 303 // skip any amount of whitespace or newline | |
| 304 // (this is much more lax than original markdown syntax) | |
| 305 for i < len(data) && isspace(data[i]) { | |
| 306 i++ | |
| 307 } | |
| 308 | |
| 309 // inline style link | |
| 310 switch { | |
| 311 case i < len(data) && data[i] == '(': | |
| 312 // skip initial whitespace | |
| 313 i++ | |
| 314 | |
| 315 for i < len(data) && isspace(data[i]) { | |
| 316 i++ | |
| 317 } | |
| 318 | |
| 319 linkB := i | |
| 320 | |
| 321 // look for link end: ' " ) | |
| 322 findlinkend: | |
| 323 for i < len(data) { | |
| 324 switch { | |
| 325 case data[i] == '\\': | |
| 326 i += 2 | |
| 327 | |
| 328 case data[i] == ')' || data[i] == '\'' || data[i] == '"': | |
| 329 break findlinkend | |
| 330 | |
| 331 default: | |
| 332 i++ | |
| 333 } | |
| 334 } | |
| 335 | |
| 336 if i >= len(data) { | |
| 337 return 0, nil | |
| 338 } | |
| 339 linkE := i | |
| 340 | |
| 341 // look for title end if present | |
| 342 titleB, titleE := 0, 0 | |
| 343 if data[i] == '\'' || data[i] == '"' { | |
| 344 i++ | |
| 345 titleB = i | |
| 346 | |
| 347 findtitleend: | |
| 348 for i < len(data) { | |
| 349 switch { | |
| 350 case data[i] == '\\': | |
| 351 i += 2 | |
| 352 | |
| 353 case data[i] == ')': | |
| 354 break findtitleend | |
| 355 | |
| 356 default: | |
| 357 i++ | |
| 358 } | |
| 359 } | |
| 360 | |
| 361 if i >= len(data) { | |
| 362 return 0, nil | |
| 363 } | |
| 364 | |
| 365 // skip whitespace after title | |
| 366 titleE = i - 1 | |
| 367 for titleE > titleB && isspace(data[titleE]) { | |
| 368 titleE-- | |
| 369 } | |
| 370 | |
| 371 // check for closing quote presence | |
| 372 if data[titleE] != '\'' && data[titleE] != '"' { | |
| 373 titleB, titleE = 0, 0 | |
| 374 linkE = i | |
| 375 } | |
| 376 } | |
| 377 | |
| 378 // remove whitespace at the end of the link | |
| 379 for linkE > linkB && isspace(data[linkE-1]) { | |
| 380 linkE-- | |
| 381 } | |
| 382 | |
| 383 // remove optional angle brackets around the link | |
| 384 if data[linkB] == '<' { | |
| 385 linkB++ | |
| 386 } | |
| 387 if data[linkE-1] == '>' { | |
| 388 linkE-- | |
| 389 } | |
| 390 | |
| 391 // build escaped link and title | |
| 392 if linkE > linkB { | |
| 393 link = data[linkB:linkE] | |
| 394 } | |
| 395 | |
| 396 if titleE > titleB { | |
| 397 title = data[titleB:titleE] | |
| 398 } | |
| 399 | |
| 400 i++ | |
| 401 | |
| 402 // reference style link | |
| 403 case isReferenceStyleLink(data, i, t): | |
| 404 var id []byte | |
| 405 altContentConsidered := false | |
| 406 | |
| 407 // look for the id | |
| 408 i++ | |
| 409 linkB := i | |
| 410 for i < len(data) && data[i] != ']' { | |
| 411 i++ | |
| 412 } | |
| 413 if i >= len(data) { | |
| 414 return 0, nil | |
| 415 } | |
| 416 linkE := i | |
| 417 | |
| 418 // find the reference | |
| 419 if linkB == linkE { | |
| 420 if textHasNl { | |
| 421 var b bytes.Buffer | |
| 422 | |
| 423 for j := 1; j < txtE; j++ { | |
| 424 switch { | |
| 425 case data[j] != '\n': | |
| 426 b.WriteByte(data[j]) | |
| 427 case data[j-1] != ' ': | |
| 428 b.WriteByte(' ') | |
| 429 } | |
| 430 } | |
| 431 | |
| 432 id = b.Bytes() | |
| 433 } else { | |
| 434 id = data[1:txtE] | |
| 435 altContentConsidered = true | |
| 436 } | |
| 437 } else { | |
| 438 id = data[linkB:linkE] | |
| 439 } | |
| 440 | |
| 441 // find the reference with matching id | |
| 442 lr, ok := p.getRef(string(id)) | |
| 443 if !ok { | |
| 444 return 0, nil | |
| 445 } | |
| 446 | |
| 447 // keep link and title from reference | |
| 448 link = lr.link | |
| 449 title = lr.title | |
| 450 if altContentConsidered { | |
| 451 altContent = lr.text | |
| 452 } | |
| 453 i++ | |
| 454 | |
| 455 // shortcut reference style link or reference or inline footnote | |
| 456 default: | |
| 457 var id []byte | |
| 458 | |
| 459 // craft the id | |
| 460 if textHasNl { | |
| 461 var b bytes.Buffer | |
| 462 | |
| 463 for j := 1; j < txtE; j++ { | |
| 464 switch { | |
| 465 case data[j] != '\n': | |
| 466 b.WriteByte(data[j]) | |
| 467 case data[j-1] != ' ': | |
| 468 b.WriteByte(' ') | |
| 469 } | |
| 470 } | |
| 471 | |
| 472 id = b.Bytes() | |
| 473 } else { | |
| 474 if t == linkDeferredFootnote { | |
| 475 id = data[2:txtE] // get rid of the ^ | |
| 476 } else { | |
| 477 id = data[1:txtE] | |
| 478 } | |
| 479 } | |
| 480 | |
| 481 footnoteNode = NewNode(Item) | |
| 482 if t == linkInlineFootnote { | |
| 483 // create a new reference | |
| 484 noteID = len(p.notes) + 1 | |
| 485 | |
| 486 var fragment []byte | |
| 487 if len(id) > 0 { | |
| 488 if len(id) < 16 { | |
| 489 fragment = make([]byte, len(id)) | |
| 490 } else { | |
| 491 fragment = make([]byte, 16) | |
| 492 } | |
| 493 copy(fragment, slugify(id)) | |
| 494 } else { | |
| 495 fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteID))...) | |
| 496 } | |
| 497 | |
| 498 ref := &reference{ | |
| 499 noteID: noteID, | |
| 500 hasBlock: false, | |
| 501 link: fragment, | |
| 502 title: id, | |
| 503 footnote: footnoteNode, | |
| 504 } | |
| 505 | |
| 506 p.notes = append(p.notes, ref) | |
| 507 | |
| 508 link = ref.link | |
| 509 title = ref.title | |
| 510 } else { | |
| 511 // find the reference with matching id | |
| 512 lr, ok := p.getRef(string(id)) | |
| 513 if !ok { | |
| 514 return 0, nil | |
| 515 } | |
| 516 | |
| 517 if t == linkDeferredFootnote { | |
| 518 lr.noteID = len(p.notes) + 1 | |
| 519 lr.footnote = footnoteNode | |
| 520 p.notes = append(p.notes, lr) | |
| 521 } | |
| 522 | |
| 523 // keep link and title from reference | |
| 524 link = lr.link | |
| 525 // if inline footnote, title == footnote contents | |
| 526 title = lr.title | |
| 527 noteID = lr.noteID | |
| 528 } | |
| 529 | |
| 530 // rewind the whitespace | |
| 531 i = txtE + 1 | |
| 532 } | |
| 533 | |
| 534 var uLink []byte | |
| 535 if t == linkNormal || t == linkImg { | |
| 536 if len(link) > 0 { | |
| 537 var uLinkBuf bytes.Buffer | |
| 538 unescapeText(&uLinkBuf, link) | |
| 539 uLink = uLinkBuf.Bytes() | |
| 540 } | |
| 541 | |
| 542 // links need something to click on and somewhere to go | |
| 543 if len(uLink) == 0 || (t == linkNormal && txtE <= 1) { | |
| 544 return 0, nil | |
| 545 } | |
| 546 } | |
| 547 | |
| 548 // call the relevant rendering function | |
| 549 var linkNode *Node | |
| 550 switch t { | |
| 551 case linkNormal: | |
| 552 linkNode = NewNode(Link) | |
| 553 linkNode.Destination = normalizeURI(uLink) | |
| 554 linkNode.Title = title | |
| 555 if len(altContent) > 0 { | |
| 556 linkNode.AppendChild(text(altContent)) | |
| 557 } else { | |
| 558 // links cannot contain other links, so turn off link parsing | |
| 559 // temporarily and recurse | |
| 560 insideLink := p.insideLink | |
| 561 p.insideLink = true | |
| 562 p.inline(linkNode, data[1:txtE]) | |
| 563 p.insideLink = insideLink | |
| 564 } | |
| 565 | |
| 566 case linkImg: | |
| 567 linkNode = NewNode(Image) | |
| 568 linkNode.Destination = uLink | |
| 569 linkNode.Title = title | |
| 570 linkNode.AppendChild(text(data[1:txtE])) | |
| 571 i++ | |
| 572 | |
| 573 case linkInlineFootnote, linkDeferredFootnote: | |
| 574 linkNode = NewNode(Link) | |
| 575 linkNode.Destination = link | |
| 576 linkNode.Title = title | |
| 577 linkNode.NoteID = noteID | |
| 578 linkNode.Footnote = footnoteNode | |
| 579 if t == linkInlineFootnote { | |
| 580 i++ | |
| 581 } | |
| 582 | |
| 583 default: | |
| 584 return 0, nil | |
| 585 } | |
| 586 | |
| 587 return i, linkNode | |
| 588 } | |
| 589 | |
| 590 func (p *Markdown) inlineHTMLComment(data []byte) int { | |
| 591 if len(data) < 5 { | |
| 592 return 0 | |
| 593 } | |
| 594 if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' { | |
| 595 return 0 | |
| 596 } | |
| 597 i := 5 | |
| 598 // scan for an end-of-comment marker, across lines if necessary | |
| 599 for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') { | |
| 600 i++ | |
| 601 } | |
| 602 // no end-of-comment marker | |
| 603 if i >= len(data) { | |
| 604 return 0 | |
| 605 } | |
| 606 return i + 1 | |
| 607 } | |
| 608 | |
| 609 func stripMailto(link []byte) []byte { | |
| 610 if bytes.HasPrefix(link, []byte("mailto://")) { | |
| 611 return link[9:] | |
| 612 } else if bytes.HasPrefix(link, []byte("mailto:")) { | |
| 613 return link[7:] | |
| 614 } else { | |
| 615 return link | |
| 616 } | |
| 617 } | |
| 618 | |
| 619 // autolinkType specifies a kind of autolink that gets detected. | |
| 620 type autolinkType int | |
| 621 | |
| 622 // These are the possible flag values for the autolink renderer. | |
| 623 const ( | |
| 624 notAutolink autolinkType = iota | |
| 625 normalAutolink | |
| 626 emailAutolink | |
| 627 ) | |
| 628 | |
| 629 // '<' when tags or autolinks are allowed | |
| 630 func leftAngle(p *Markdown, data []byte, offset int) (int, *Node) { | |
| 631 data = data[offset:] | |
| 632 altype, end := tagLength(data) | |
| 633 if size := p.inlineHTMLComment(data); size > 0 { | |
| 634 end = size | |
| 635 } | |
| 636 if end > 2 { | |
| 637 if altype != notAutolink { | |
| 638 var uLink bytes.Buffer | |
| 639 unescapeText(&uLink, data[1:end+1-2]) | |
| 640 if uLink.Len() > 0 { | |
| 641 link := uLink.Bytes() | |
| 642 node := NewNode(Link) | |
| 643 node.Destination = link | |
| 644 if altype == emailAutolink { | |
| 645 node.Destination = append([]byte("mailto:"), link...) | |
| 646 } | |
| 647 node.AppendChild(text(stripMailto(link))) | |
| 648 return end, node | |
| 649 } | |
| 650 } else { | |
| 651 htmlTag := NewNode(HTMLSpan) | |
| 652 htmlTag.Literal = data[:end] | |
| 653 return end, htmlTag | |
| 654 } | |
| 655 } | |
| 656 | |
| 657 return end, nil | |
| 658 } | |
| 659 | |
| 660 // '\\' backslash escape | |
| 661 var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~") | |
| 662 | |
| 663 func escape(p *Markdown, data []byte, offset int) (int, *Node) { | |
| 664 data = data[offset:] | |
| 665 | |
| 666 if len(data) > 1 { | |
| 667 if p.extensions&BackslashLineBreak != 0 && data[1] == '\n' { | |
| 668 return 2, NewNode(Hardbreak) | |
| 669 } | |
| 670 if bytes.IndexByte(escapeChars, data[1]) < 0 { | |
| 671 return 0, nil | |
| 672 } | |
| 673 | |
| 674 return 2, text(data[1:2]) | |
| 675 } | |
| 676 | |
| 677 return 2, nil | |
| 678 } | |
| 679 | |
| 680 func unescapeText(ob *bytes.Buffer, src []byte) { | |
| 681 i := 0 | |
| 682 for i < len(src) { | |
| 683 org := i | |
| 684 for i < len(src) && src[i] != '\\' { | |
| 685 i++ | |
| 686 } | |
| 687 | |
| 688 if i > org { | |
| 689 ob.Write(src[org:i]) | |
| 690 } | |
| 691 | |
| 692 if i+1 >= len(src) { | |
| 693 break | |
| 694 } | |
| 695 | |
| 696 ob.WriteByte(src[i+1]) | |
| 697 i += 2 | |
| 698 } | |
| 699 } | |
| 700 | |
| 701 // '&' escaped when it doesn't belong to an entity | |
| 702 // valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; | |
| 703 func entity(p *Markdown, data []byte, offset int) (int, *Node) { | |
| 704 data = data[offset:] | |
| 705 | |
| 706 end := 1 | |
| 707 | |
| 708 if end < len(data) && data[end] == '#' { | |
| 709 end++ | |
| 710 } | |
| 711 | |
| 712 for end < len(data) && isalnum(data[end]) { | |
| 713 end++ | |
| 714 } | |
| 715 | |
| 716 if end < len(data) && data[end] == ';' { | |
| 717 end++ // real entity | |
| 718 } else { | |
| 719 return 0, nil // lone '&' | |
| 720 } | |
| 721 | |
| 722 ent := data[:end] | |
| 723 // undo & escaping or it will be converted to &amp; by another | |
| 724 // escaper in the renderer | |
| 725 if bytes.Equal(ent, []byte("&")) { | |
| 726 ent = []byte{'&'} | |
| 727 } | |
| 728 | |
| 729 return end, text(ent) | |
| 730 } | |
| 731 | |
| 732 func linkEndsWithEntity(data []byte, linkEnd int) bool { | |
| 733 entityRanges := htmlEntityRe.FindAllIndex(data[:linkEnd], -1) | |
| 734 return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd | |
| 735 } | |
| 736 | |
| 737 // hasPrefixCaseInsensitive is a custom implementation of | |
| 738 // strings.HasPrefix(strings.ToLower(s), prefix) | |
| 739 // we rolled our own because ToLower pulls in a huge machinery of lowercasing | |
| 740 // anything from Unicode and that's very slow. Since this func will only be | |
| 741 // used on ASCII protocol prefixes, we can take shortcuts. | |
| 742 func hasPrefixCaseInsensitive(s, prefix []byte) bool { | |
| 743 if len(s) < len(prefix) { | |
| 744 return false | |
| 745 } | |
| 746 delta := byte('a' - 'A') | |
| 747 for i, b := range prefix { | |
| 748 if b != s[i] && b != s[i]+delta { | |
| 749 return false | |
| 750 } | |
| 751 } | |
| 752 return true | |
| 753 } | |
| 754 | |
| 755 var protocolPrefixes = [][]byte{ | |
| 756 []byte("http://"), | |
| 757 []byte("https://"), | |
| 758 []byte("ftp://"), | |
| 759 []byte("file://"), | |
| 760 []byte("mailto:"), | |
| 761 } | |
| 762 | |
| 763 const shortestPrefix = 6 // len("ftp://"), the shortest of the above | |
| 764 | |
| 765 func maybeAutoLink(p *Markdown, data []byte, offset int) (int, *Node) { | |
| 766 // quick check to rule out most false hits | |
| 767 if p.insideLink || len(data) < offset+shortestPrefix { | |
| 768 return 0, nil | |
| 769 } | |
| 770 for _, prefix := range protocolPrefixes { | |
| 771 endOfHead := offset + 8 // 8 is the len() of the longest prefix | |
| 772 if endOfHead > len(data) { | |
| 773 endOfHead = len(data) | |
| 774 } | |
| 775 if hasPrefixCaseInsensitive(data[offset:endOfHead], prefix) { | |
| 776 return autoLink(p, data, offset) | |
| 777 } | |
| 778 } | |
| 779 return 0, nil | |
| 780 } | |
| 781 | |
| 782 func autoLink(p *Markdown, data []byte, offset int) (int, *Node) { | |
| 783 // Now a more expensive check to see if we're not inside an anchor element | |
| 784 anchorStart := offset | |
| 785 offsetFromAnchor := 0 | |
| 786 for anchorStart > 0 && data[anchorStart] != '<' { | |
| 787 anchorStart-- | |
| 788 offsetFromAnchor++ | |
| 789 } | |
| 790 | |
| 791 anchorStr := anchorRe.Find(data[anchorStart:]) | |
| 792 if anchorStr != nil { | |
| 793 anchorClose := NewNode(HTMLSpan) | |
| 794 anchorClose.Literal = anchorStr[offsetFromAnchor:] | |
| 795 return len(anchorStr) - offsetFromAnchor, anchorClose | |
| 796 } | |
| 797 | |
| 798 // scan backward for a word boundary | |
| 799 rewind := 0 | |
| 800 for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) { | |
| 801 rewind++ | |
| 802 } | |
| 803 if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters | |
| 804 return 0, nil | |
| 805 } | |
| 806 | |
| 807 origData := data | |
| 808 data = data[offset-rewind:] | |
| 809 | |
| 810 if !isSafeLink(data) { | |
| 811 return 0, nil | |
| 812 } | |
| 813 | |
| 814 linkEnd := 0 | |
| 815 for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) { | |
| 816 linkEnd++ | |
| 817 } | |
| 818 | |
| 819 // Skip punctuation at the end of the link | |
| 820 if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' { | |
| 821 linkEnd-- | |
| 822 } | |
| 823 | |
| 824 // But don't skip semicolon if it's a part of escaped entity: | |
| 825 if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) { | |
| 826 linkEnd-- | |
| 827 } | |
| 828 | |
| 829 // See if the link finishes with a punctuation sign that can be closed. | |
| 830 var copen byte | |
| 831 switch data[linkEnd-1] { | |
| 832 case '"': | |
| 833 copen = '"' | |
| 834 case '\'': | |
| 835 copen = '\'' | |
| 836 case ')': | |
| 837 copen = '(' | |
| 838 case ']': | |
| 839 copen = '[' | |
| 840 case '}': | |
| 841 copen = '{' | |
| 842 default: | |
| 843 copen = 0 | |
| 844 } | |
| 845 | |
| 846 if copen != 0 { | |
| 847 bufEnd := offset - rewind + linkEnd - 2 | |
| 848 | |
| 849 openDelim := 1 | |
| 850 | |
| 851 /* Try to close the final punctuation sign in this same line; | |
| 852 * if we managed to close it outside of the URL, that means that it's | |
| 853 * not part of the URL. If it closes inside the URL, that means it | |
| 854 * is part of the URL. | |
| 855 * | |
| 856 * Examples: | |
| 857 * | |
| 858 * foo http://www.pokemon.com/Pikachu_(Electric) bar | |
| 859 * => http://www.pokemon.com/Pikachu_(Electric) | |
| 860 * | |
| 861 * foo (http://www.pokemon.com/Pikachu_(Electric)) bar | |
| 862 * => http://www.pokemon.com/Pikachu_(Electric) | |
| 863 * | |
| 864 * foo http://www.pokemon.com/Pikachu_(Electric)) bar | |
| 865 * => http://www.pokemon.com/Pikachu_(Electric)) | |
| 866 * | |
| 867 * (foo http://www.pokemon.com/Pikachu_(Electric)) bar | |
| 868 * => foo http://www.pokemon.com/Pikachu_(Electric) | |
| 869 */ | |
| 870 | |
| 871 for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 { | |
| 872 if origData[bufEnd] == data[linkEnd-1] { | |
| 873 openDelim++ | |
| 874 } | |
| 875 | |
| 876 if origData[bufEnd] == copen { | |
| 877 openDelim-- | |
| 878 } | |
| 879 | |
| 880 bufEnd-- | |
| 881 } | |
| 882 | |
| 883 if openDelim == 0 { | |
| 884 linkEnd-- | |
| 885 } | |
| 886 } | |
| 887 | |
| 888 var uLink bytes.Buffer | |
| 889 unescapeText(&uLink, data[:linkEnd]) | |
| 890 | |
| 891 if uLink.Len() > 0 { | |
| 892 node := NewNode(Link) | |
| 893 node.Destination = uLink.Bytes() | |
| 894 node.AppendChild(text(uLink.Bytes())) | |
| 895 return linkEnd, node | |
| 896 } | |
| 897 | |
| 898 return linkEnd, nil | |
| 899 } | |
| 900 | |
| 901 func isEndOfLink(char byte) bool { | |
| 902 return isspace(char) || char == '<' | |
| 903 } | |
| 904 | |
| 905 var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")} | |
| 906 var validPaths = [][]byte{[]byte("/"), []byte("./"), []byte("../")} | |
| 907 | |
| 908 func isSafeLink(link []byte) bool { | |
| 909 for _, path := range validPaths { | |
| 910 if len(link) >= len(path) && bytes.Equal(link[:len(path)], path) { | |
| 911 if len(link) == len(path) { | |
| 912 return true | |
| 913 } else if isalnum(link[len(path)]) { | |
| 914 return true | |
| 915 } | |
| 916 } | |
| 917 } | |
| 918 | |
| 919 for _, prefix := range validUris { | |
| 920 // TODO: handle unicode here | |
| 921 // case-insensitive prefix test | |
| 922 if len(link) > len(prefix) && bytes.Equal(bytes.ToLower(link[:len(prefix)]), prefix) && isalnum(link[len(prefix)]) { | |
| 923 return true | |
| 924 } | |
| 925 } | |
| 926 | |
| 927 return false | |
| 928 } | |
| 929 | |
| 930 // return the length of the given tag, or 0 is it's not valid | |
| 931 func tagLength(data []byte) (autolink autolinkType, end int) { | |
| 932 var i, j int | |
| 933 | |
| 934 // a valid tag can't be shorter than 3 chars | |
| 935 if len(data) < 3 { | |
| 936 return notAutolink, 0 | |
| 937 } | |
| 938 | |
| 939 // begins with a '<' optionally followed by '/', followed by letter or number | |
| 940 if data[0] != '<' { | |
| 941 return notAutolink, 0 | |
| 942 } | |
| 943 if data[1] == '/' { | |
| 944 i = 2 | |
| 945 } else { | |
| 946 i = 1 | |
| 947 } | |
| 948 | |
| 949 if !isalnum(data[i]) { | |
| 950 return notAutolink, 0 | |
| 951 } | |
| 952 | |
| 953 // scheme test | |
| 954 autolink = notAutolink | |
| 955 | |
| 956 // try to find the beginning of an URI | |
| 957 for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') { | |
| 958 i++ | |
| 959 } | |
| 960 | |
| 961 if i > 1 && i < len(data) && data[i] == '@' { | |
| 962 if j = isMailtoAutoLink(data[i:]); j != 0 { | |
| 963 return emailAutolink, i + j | |
| 964 } | |
| 965 } | |
| 966 | |
| 967 if i > 2 && i < len(data) && data[i] == ':' { | |
| 968 autolink = normalAutolink | |
| 969 i++ | |
| 970 } | |
| 971 | |
| 972 // complete autolink test: no whitespace or ' or " | |
| 973 switch { | |
| 974 case i >= len(data): | |
| 975 autolink = notAutolink | |
| 976 case autolink != notAutolink: | |
| 977 j = i | |
| 978 | |
| 979 for i < len(data) { | |
| 980 if data[i] == '\\' { | |
| 981 i += 2 | |
| 982 } else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) { | |
| 983 break | |
| 984 } else { | |
| 985 i++ | |
| 986 } | |
| 987 | |
| 988 } | |
| 989 | |
| 990 if i >= len(data) { | |
| 991 return autolink, 0 | |
| 992 } | |
| 993 if i > j && data[i] == '>' { | |
| 994 return autolink, i + 1 | |
| 995 } | |
| 996 | |
| 997 // one of the forbidden chars has been found | |
| 998 autolink = notAutolink | |
| 999 } | |
| 1000 i += bytes.IndexByte(data[i:], '>') | |
| 1001 if i < 0 { | |
| 1002 return autolink, 0 | |
| 1003 } | |
| 1004 return autolink, i + 1 | |
| 1005 } | |
| 1006 | |
| 1007 // look for the address part of a mail autolink and '>' | |
| 1008 // this is less strict than the original markdown e-mail address matching | |
| 1009 func isMailtoAutoLink(data []byte) int { | |
| 1010 nb := 0 | |
| 1011 | |
| 1012 // address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' | |
| 1013 for i := 0; i < len(data); i++ { | |
| 1014 if isalnum(data[i]) { | |
| 1015 continue | |
| 1016 } | |
| 1017 | |
| 1018 switch data[i] { | |
| 1019 case '@': | |
| 1020 nb++ | |
| 1021 | |
| 1022 case '-', '.', '_': | |
| 1023 break | |
| 1024 | |
| 1025 case '>': | |
| 1026 if nb == 1 { | |
| 1027 return i + 1 | |
| 1028 } | |
| 1029 return 0 | |
| 1030 default: | |
| 1031 return 0 | |
| 1032 } | |
| 1033 } | |
| 1034 | |
| 1035 return 0 | |
| 1036 } | |
| 1037 | |
| 1038 // look for the next emph char, skipping other constructs | |
| 1039 func helperFindEmphChar(data []byte, c byte) int { | |
| 1040 i := 0 | |
| 1041 | |
| 1042 for i < len(data) { | |
| 1043 for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' { | |
| 1044 i++ | |
| 1045 } | |
| 1046 if i >= len(data) { | |
| 1047 return 0 | |
| 1048 } | |
| 1049 // do not count escaped chars | |
| 1050 if i != 0 && data[i-1] == '\\' { | |
| 1051 i++ | |
| 1052 continue | |
| 1053 } | |
| 1054 if data[i] == c { | |
| 1055 return i | |
| 1056 } | |
| 1057 | |
| 1058 if data[i] == '`' { | |
| 1059 // skip a code span | |
| 1060 tmpI := 0 | |
| 1061 i++ | |
| 1062 for i < len(data) && data[i] != '`' { | |
| 1063 if tmpI == 0 && data[i] == c { | |
| 1064 tmpI = i | |
| 1065 } | |
| 1066 i++ | |
| 1067 } | |
| 1068 if i >= len(data) { | |
| 1069 return tmpI | |
| 1070 } | |
| 1071 i++ | |
| 1072 } else if data[i] == '[' { | |
| 1073 // skip a link | |
| 1074 tmpI := 0 | |
| 1075 i++ | |
| 1076 for i < len(data) && data[i] != ']' { | |
| 1077 if tmpI == 0 && data[i] == c { | |
| 1078 tmpI = i | |
| 1079 } | |
| 1080 i++ | |
| 1081 } | |
| 1082 i++ | |
| 1083 for i < len(data) && (data[i] == ' ' || data[i] == '\n') { | |
| 1084 i++ | |
| 1085 } | |
| 1086 if i >= len(data) { | |
| 1087 return tmpI | |
| 1088 } | |
| 1089 if data[i] != '[' && data[i] != '(' { // not a link | |
| 1090 if tmpI > 0 { | |
| 1091 return tmpI | |
| 1092 } | |
| 1093 continue | |
| 1094 } | |
| 1095 cc := data[i] | |
| 1096 i++ | |
| 1097 for i < len(data) && data[i] != cc { | |
| 1098 if tmpI == 0 && data[i] == c { | |
| 1099 return i | |
| 1100 } | |
| 1101 i++ | |
| 1102 } | |
| 1103 if i >= len(data) { | |
| 1104 return tmpI | |
| 1105 } | |
| 1106 i++ | |
| 1107 } | |
| 1108 } | |
| 1109 return 0 | |
| 1110 } | |
| 1111 | |
| 1112 func helperEmphasis(p *Markdown, data []byte, c byte) (int, *Node) { | |
| 1113 i := 0 | |
| 1114 | |
| 1115 // skip one symbol if coming from emph3 | |
| 1116 if len(data) > 1 && data[0] == c && data[1] == c { | |
| 1117 i = 1 | |
| 1118 } | |
| 1119 | |
| 1120 for i < len(data) { | |
| 1121 length := helperFindEmphChar(data[i:], c) | |
| 1122 if length == 0 { | |
| 1123 return 0, nil | |
| 1124 } | |
| 1125 i += length | |
| 1126 if i >= len(data) { | |
| 1127 return 0, nil | |
| 1128 } | |
| 1129 | |
| 1130 if i+1 < len(data) && data[i+1] == c { | |
| 1131 i++ | |
| 1132 continue | |
| 1133 } | |
| 1134 | |
| 1135 if data[i] == c && !isspace(data[i-1]) { | |
| 1136 | |
| 1137 if p.extensions&NoIntraEmphasis != 0 { | |
| 1138 if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) { | |
| 1139 continue | |
| 1140 } | |
| 1141 } | |
| 1142 | |
| 1143 emph := NewNode(Emph) | |
| 1144 p.inline(emph, data[:i]) | |
| 1145 return i + 1, emph | |
| 1146 } | |
| 1147 } | |
| 1148 | |
| 1149 return 0, nil | |
| 1150 } | |
| 1151 | |
| 1152 func helperDoubleEmphasis(p *Markdown, data []byte, c byte) (int, *Node) { | |
| 1153 i := 0 | |
| 1154 | |
| 1155 for i < len(data) { | |
| 1156 length := helperFindEmphChar(data[i:], c) | |
| 1157 if length == 0 { | |
| 1158 return 0, nil | |
| 1159 } | |
| 1160 i += length | |
| 1161 | |
| 1162 if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) { | |
| 1163 nodeType := Strong | |
| 1164 if c == '~' { | |
| 1165 nodeType = Del | |
| 1166 } | |
| 1167 node := NewNode(nodeType) | |
| 1168 p.inline(node, data[:i]) | |
| 1169 return i + 2, node | |
| 1170 } | |
| 1171 i++ | |
| 1172 } | |
| 1173 return 0, nil | |
| 1174 } | |
| 1175 | |
| 1176 func helperTripleEmphasis(p *Markdown, data []byte, offset int, c byte) (int, *Node) { | |
| 1177 i := 0 | |
| 1178 origData := data | |
| 1179 data = data[offset:] | |
| 1180 | |
| 1181 for i < len(data) { | |
| 1182 length := helperFindEmphChar(data[i:], c) | |
| 1183 if length == 0 { | |
| 1184 return 0, nil | |
| 1185 } | |
| 1186 i += length | |
| 1187 | |
| 1188 // skip whitespace preceded symbols | |
| 1189 if data[i] != c || isspace(data[i-1]) { | |
| 1190 continue | |
| 1191 } | |
| 1192 | |
| 1193 switch { | |
| 1194 case i+2 < len(data) && data[i+1] == c && data[i+2] == c: | |
| 1195 // triple symbol found | |
| 1196 strong := NewNode(Strong) | |
| 1197 em := NewNode(Emph) | |
| 1198 strong.AppendChild(em) | |
| 1199 p.inline(em, data[:i]) | |
| 1200 return i + 3, strong | |
| 1201 case (i+1 < len(data) && data[i+1] == c): | |
| 1202 // double symbol found, hand over to emph1 | |
| 1203 length, node := helperEmphasis(p, origData[offset-2:], c) | |
| 1204 if length == 0 { | |
| 1205 return 0, nil | |
| 1206 } | |
| 1207 return length - 2, node | |
| 1208 default: | |
| 1209 // single symbol found, hand over to emph2 | |
| 1210 length, node := helperDoubleEmphasis(p, origData[offset-1:], c) | |
| 1211 if length == 0 { | |
| 1212 return 0, nil | |
| 1213 } | |
| 1214 return length - 1, node | |
| 1215 } | |
| 1216 } | |
| 1217 return 0, nil | |
| 1218 } | |
| 1219 | |
| 1220 func text(s []byte) *Node { | |
| 1221 node := NewNode(Text) | |
| 1222 node.Literal = s | |
| 1223 return node | |
| 1224 } | |
| 1225 | |
| 1226 func normalizeURI(s []byte) []byte { | |
| 1227 return s // TODO: implement | |
| 1228 } |
