Mercurial > yakumo_izuru > aya
comparison vendor/github.com/russross/blackfriday/v2/block.go @ 66:787b5ee0289d draft
Use vendored modules
Signed-off-by: Izuru Yakumo <yakumo.izuru@chaotic.ninja>
| author | yakumo.izuru |
|---|---|
| date | Sun, 23 Jul 2023 13:18:53 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 65:6d985efa0f7a | 66:787b5ee0289d |
|---|---|
| 1 // | |
| 2 // Blackfriday Markdown Processor | |
| 3 // Available at http://github.com/russross/blackfriday | |
| 4 // | |
| 5 // Copyright © 2011 Russ Ross <russ@russross.com>. | |
| 6 // Distributed under the Simplified BSD License. | |
| 7 // See README.md for details. | |
| 8 // | |
| 9 | |
| 10 // | |
| 11 // Functions to parse block-level elements. | |
| 12 // | |
| 13 | |
| 14 package blackfriday | |
| 15 | |
| 16 import ( | |
| 17 "bytes" | |
| 18 "html" | |
| 19 "regexp" | |
| 20 "strings" | |
| 21 "unicode" | |
| 22 ) | |
| 23 | |
| 24 const ( | |
| 25 charEntity = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});" | |
| 26 escapable = "[!\"#$%&'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]" | |
| 27 ) | |
| 28 | |
| 29 var ( | |
| 30 reBackslashOrAmp = regexp.MustCompile("[\\&]") | |
| 31 reEntityOrEscapedChar = regexp.MustCompile("(?i)\\\\" + escapable + "|" + charEntity) | |
| 32 ) | |
| 33 | |
| 34 // Parse block-level data. | |
| 35 // Note: this function and many that it calls assume that | |
| 36 // the input buffer ends with a newline. | |
| 37 func (p *Markdown) block(data []byte) { | |
| 38 // this is called recursively: enforce a maximum depth | |
| 39 if p.nesting >= p.maxNesting { | |
| 40 return | |
| 41 } | |
| 42 p.nesting++ | |
| 43 | |
| 44 // parse out one block-level construct at a time | |
| 45 for len(data) > 0 { | |
| 46 // prefixed heading: | |
| 47 // | |
| 48 // # Heading 1 | |
| 49 // ## Heading 2 | |
| 50 // ... | |
| 51 // ###### Heading 6 | |
| 52 if p.isPrefixHeading(data) { | |
| 53 data = data[p.prefixHeading(data):] | |
| 54 continue | |
| 55 } | |
| 56 | |
| 57 // block of preformatted HTML: | |
| 58 // | |
| 59 // <div> | |
| 60 // ... | |
| 61 // </div> | |
| 62 if data[0] == '<' { | |
| 63 if i := p.html(data, true); i > 0 { | |
| 64 data = data[i:] | |
| 65 continue | |
| 66 } | |
| 67 } | |
| 68 | |
| 69 // title block | |
| 70 // | |
| 71 // % stuff | |
| 72 // % more stuff | |
| 73 // % even more stuff | |
| 74 if p.extensions&Titleblock != 0 { | |
| 75 if data[0] == '%' { | |
| 76 if i := p.titleBlock(data, true); i > 0 { | |
| 77 data = data[i:] | |
| 78 continue | |
| 79 } | |
| 80 } | |
| 81 } | |
| 82 | |
| 83 // blank lines. note: returns the # of bytes to skip | |
| 84 if i := p.isEmpty(data); i > 0 { | |
| 85 data = data[i:] | |
| 86 continue | |
| 87 } | |
| 88 | |
| 89 // indented code block: | |
| 90 // | |
| 91 // func max(a, b int) int { | |
| 92 // if a > b { | |
| 93 // return a | |
| 94 // } | |
| 95 // return b | |
| 96 // } | |
| 97 if p.codePrefix(data) > 0 { | |
| 98 data = data[p.code(data):] | |
| 99 continue | |
| 100 } | |
| 101 | |
| 102 // fenced code block: | |
| 103 // | |
| 104 // ``` go | |
| 105 // func fact(n int) int { | |
| 106 // if n <= 1 { | |
| 107 // return n | |
| 108 // } | |
| 109 // return n * fact(n-1) | |
| 110 // } | |
| 111 // ``` | |
| 112 if p.extensions&FencedCode != 0 { | |
| 113 if i := p.fencedCodeBlock(data, true); i > 0 { | |
| 114 data = data[i:] | |
| 115 continue | |
| 116 } | |
| 117 } | |
| 118 | |
| 119 // horizontal rule: | |
| 120 // | |
| 121 // ------ | |
| 122 // or | |
| 123 // ****** | |
| 124 // or | |
| 125 // ______ | |
| 126 if p.isHRule(data) { | |
| 127 p.addBlock(HorizontalRule, nil) | |
| 128 var i int | |
| 129 for i = 0; i < len(data) && data[i] != '\n'; i++ { | |
| 130 } | |
| 131 data = data[i:] | |
| 132 continue | |
| 133 } | |
| 134 | |
| 135 // block quote: | |
| 136 // | |
| 137 // > A big quote I found somewhere | |
| 138 // > on the web | |
| 139 if p.quotePrefix(data) > 0 { | |
| 140 data = data[p.quote(data):] | |
| 141 continue | |
| 142 } | |
| 143 | |
| 144 // table: | |
| 145 // | |
| 146 // Name | Age | Phone | |
| 147 // ------|-----|--------- | |
| 148 // Bob | 31 | 555-1234 | |
| 149 // Alice | 27 | 555-4321 | |
| 150 if p.extensions&Tables != 0 { | |
| 151 if i := p.table(data); i > 0 { | |
| 152 data = data[i:] | |
| 153 continue | |
| 154 } | |
| 155 } | |
| 156 | |
| 157 // an itemized/unordered list: | |
| 158 // | |
| 159 // * Item 1 | |
| 160 // * Item 2 | |
| 161 // | |
| 162 // also works with + or - | |
| 163 if p.uliPrefix(data) > 0 { | |
| 164 data = data[p.list(data, 0):] | |
| 165 continue | |
| 166 } | |
| 167 | |
| 168 // a numbered/ordered list: | |
| 169 // | |
| 170 // 1. Item 1 | |
| 171 // 2. Item 2 | |
| 172 if p.oliPrefix(data) > 0 { | |
| 173 data = data[p.list(data, ListTypeOrdered):] | |
| 174 continue | |
| 175 } | |
| 176 | |
| 177 // definition lists: | |
| 178 // | |
| 179 // Term 1 | |
| 180 // : Definition a | |
| 181 // : Definition b | |
| 182 // | |
| 183 // Term 2 | |
| 184 // : Definition c | |
| 185 if p.extensions&DefinitionLists != 0 { | |
| 186 if p.dliPrefix(data) > 0 { | |
| 187 data = data[p.list(data, ListTypeDefinition):] | |
| 188 continue | |
| 189 } | |
| 190 } | |
| 191 | |
| 192 // anything else must look like a normal paragraph | |
| 193 // note: this finds underlined headings, too | |
| 194 data = data[p.paragraph(data):] | |
| 195 } | |
| 196 | |
| 197 p.nesting-- | |
| 198 } | |
| 199 | |
| 200 func (p *Markdown) addBlock(typ NodeType, content []byte) *Node { | |
| 201 p.closeUnmatchedBlocks() | |
| 202 container := p.addChild(typ, 0) | |
| 203 container.content = content | |
| 204 return container | |
| 205 } | |
| 206 | |
| 207 func (p *Markdown) isPrefixHeading(data []byte) bool { | |
| 208 if data[0] != '#' { | |
| 209 return false | |
| 210 } | |
| 211 | |
| 212 if p.extensions&SpaceHeadings != 0 { | |
| 213 level := 0 | |
| 214 for level < 6 && level < len(data) && data[level] == '#' { | |
| 215 level++ | |
| 216 } | |
| 217 if level == len(data) || data[level] != ' ' { | |
| 218 return false | |
| 219 } | |
| 220 } | |
| 221 return true | |
| 222 } | |
| 223 | |
| 224 func (p *Markdown) prefixHeading(data []byte) int { | |
| 225 level := 0 | |
| 226 for level < 6 && level < len(data) && data[level] == '#' { | |
| 227 level++ | |
| 228 } | |
| 229 i := skipChar(data, level, ' ') | |
| 230 end := skipUntilChar(data, i, '\n') | |
| 231 skip := end | |
| 232 id := "" | |
| 233 if p.extensions&HeadingIDs != 0 { | |
| 234 j, k := 0, 0 | |
| 235 // find start/end of heading id | |
| 236 for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ { | |
| 237 } | |
| 238 for k = j + 1; k < end && data[k] != '}'; k++ { | |
| 239 } | |
| 240 // extract heading id iff found | |
| 241 if j < end && k < end { | |
| 242 id = string(data[j+2 : k]) | |
| 243 end = j | |
| 244 skip = k + 1 | |
| 245 for end > 0 && data[end-1] == ' ' { | |
| 246 end-- | |
| 247 } | |
| 248 } | |
| 249 } | |
| 250 for end > 0 && data[end-1] == '#' { | |
| 251 if isBackslashEscaped(data, end-1) { | |
| 252 break | |
| 253 } | |
| 254 end-- | |
| 255 } | |
| 256 for end > 0 && data[end-1] == ' ' { | |
| 257 end-- | |
| 258 } | |
| 259 if end > i { | |
| 260 if id == "" && p.extensions&AutoHeadingIDs != 0 { | |
| 261 id = SanitizedAnchorName(string(data[i:end])) | |
| 262 } | |
| 263 block := p.addBlock(Heading, data[i:end]) | |
| 264 block.HeadingID = id | |
| 265 block.Level = level | |
| 266 } | |
| 267 return skip | |
| 268 } | |
| 269 | |
| 270 func (p *Markdown) isUnderlinedHeading(data []byte) int { | |
| 271 // test of level 1 heading | |
| 272 if data[0] == '=' { | |
| 273 i := skipChar(data, 1, '=') | |
| 274 i = skipChar(data, i, ' ') | |
| 275 if i < len(data) && data[i] == '\n' { | |
| 276 return 1 | |
| 277 } | |
| 278 return 0 | |
| 279 } | |
| 280 | |
| 281 // test of level 2 heading | |
| 282 if data[0] == '-' { | |
| 283 i := skipChar(data, 1, '-') | |
| 284 i = skipChar(data, i, ' ') | |
| 285 if i < len(data) && data[i] == '\n' { | |
| 286 return 2 | |
| 287 } | |
| 288 return 0 | |
| 289 } | |
| 290 | |
| 291 return 0 | |
| 292 } | |
| 293 | |
| 294 func (p *Markdown) titleBlock(data []byte, doRender bool) int { | |
| 295 if data[0] != '%' { | |
| 296 return 0 | |
| 297 } | |
| 298 splitData := bytes.Split(data, []byte("\n")) | |
| 299 var i int | |
| 300 for idx, b := range splitData { | |
| 301 if !bytes.HasPrefix(b, []byte("%")) { | |
| 302 i = idx // - 1 | |
| 303 break | |
| 304 } | |
| 305 } | |
| 306 | |
| 307 data = bytes.Join(splitData[0:i], []byte("\n")) | |
| 308 consumed := len(data) | |
| 309 data = bytes.TrimPrefix(data, []byte("% ")) | |
| 310 data = bytes.Replace(data, []byte("\n% "), []byte("\n"), -1) | |
| 311 block := p.addBlock(Heading, data) | |
| 312 block.Level = 1 | |
| 313 block.IsTitleblock = true | |
| 314 | |
| 315 return consumed | |
| 316 } | |
| 317 | |
| 318 func (p *Markdown) html(data []byte, doRender bool) int { | |
| 319 var i, j int | |
| 320 | |
| 321 // identify the opening tag | |
| 322 if data[0] != '<' { | |
| 323 return 0 | |
| 324 } | |
| 325 curtag, tagfound := p.htmlFindTag(data[1:]) | |
| 326 | |
| 327 // handle special cases | |
| 328 if !tagfound { | |
| 329 // check for an HTML comment | |
| 330 if size := p.htmlComment(data, doRender); size > 0 { | |
| 331 return size | |
| 332 } | |
| 333 | |
| 334 // check for an <hr> tag | |
| 335 if size := p.htmlHr(data, doRender); size > 0 { | |
| 336 return size | |
| 337 } | |
| 338 | |
| 339 // no special case recognized | |
| 340 return 0 | |
| 341 } | |
| 342 | |
| 343 // look for an unindented matching closing tag | |
| 344 // followed by a blank line | |
| 345 found := false | |
| 346 /* | |
| 347 closetag := []byte("\n</" + curtag + ">") | |
| 348 j = len(curtag) + 1 | |
| 349 for !found { | |
| 350 // scan for a closing tag at the beginning of a line | |
| 351 if skip := bytes.Index(data[j:], closetag); skip >= 0 { | |
| 352 j += skip + len(closetag) | |
| 353 } else { | |
| 354 break | |
| 355 } | |
| 356 | |
| 357 // see if it is the only thing on the line | |
| 358 if skip := p.isEmpty(data[j:]); skip > 0 { | |
| 359 // see if it is followed by a blank line/eof | |
| 360 j += skip | |
| 361 if j >= len(data) { | |
| 362 found = true | |
| 363 i = j | |
| 364 } else { | |
| 365 if skip := p.isEmpty(data[j:]); skip > 0 { | |
| 366 j += skip | |
| 367 found = true | |
| 368 i = j | |
| 369 } | |
| 370 } | |
| 371 } | |
| 372 } | |
| 373 */ | |
| 374 | |
| 375 // if not found, try a second pass looking for indented match | |
| 376 // but not if tag is "ins" or "del" (following original Markdown.pl) | |
| 377 if !found && curtag != "ins" && curtag != "del" { | |
| 378 i = 1 | |
| 379 for i < len(data) { | |
| 380 i++ | |
| 381 for i < len(data) && !(data[i-1] == '<' && data[i] == '/') { | |
| 382 i++ | |
| 383 } | |
| 384 | |
| 385 if i+2+len(curtag) >= len(data) { | |
| 386 break | |
| 387 } | |
| 388 | |
| 389 j = p.htmlFindEnd(curtag, data[i-1:]) | |
| 390 | |
| 391 if j > 0 { | |
| 392 i += j - 1 | |
| 393 found = true | |
| 394 break | |
| 395 } | |
| 396 } | |
| 397 } | |
| 398 | |
| 399 if !found { | |
| 400 return 0 | |
| 401 } | |
| 402 | |
| 403 // the end of the block has been found | |
| 404 if doRender { | |
| 405 // trim newlines | |
| 406 end := i | |
| 407 for end > 0 && data[end-1] == '\n' { | |
| 408 end-- | |
| 409 } | |
| 410 finalizeHTMLBlock(p.addBlock(HTMLBlock, data[:end])) | |
| 411 } | |
| 412 | |
| 413 return i | |
| 414 } | |
| 415 | |
| 416 func finalizeHTMLBlock(block *Node) { | |
| 417 block.Literal = block.content | |
| 418 block.content = nil | |
| 419 } | |
| 420 | |
| 421 // HTML comment, lax form | |
| 422 func (p *Markdown) htmlComment(data []byte, doRender bool) int { | |
| 423 i := p.inlineHTMLComment(data) | |
| 424 // needs to end with a blank line | |
| 425 if j := p.isEmpty(data[i:]); j > 0 { | |
| 426 size := i + j | |
| 427 if doRender { | |
| 428 // trim trailing newlines | |
| 429 end := size | |
| 430 for end > 0 && data[end-1] == '\n' { | |
| 431 end-- | |
| 432 } | |
| 433 block := p.addBlock(HTMLBlock, data[:end]) | |
| 434 finalizeHTMLBlock(block) | |
| 435 } | |
| 436 return size | |
| 437 } | |
| 438 return 0 | |
| 439 } | |
| 440 | |
| 441 // HR, which is the only self-closing block tag considered | |
| 442 func (p *Markdown) htmlHr(data []byte, doRender bool) int { | |
| 443 if len(data) < 4 { | |
| 444 return 0 | |
| 445 } | |
| 446 if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') { | |
| 447 return 0 | |
| 448 } | |
| 449 if data[3] != ' ' && data[3] != '/' && data[3] != '>' { | |
| 450 // not an <hr> tag after all; at least not a valid one | |
| 451 return 0 | |
| 452 } | |
| 453 i := 3 | |
| 454 for i < len(data) && data[i] != '>' && data[i] != '\n' { | |
| 455 i++ | |
| 456 } | |
| 457 if i < len(data) && data[i] == '>' { | |
| 458 i++ | |
| 459 if j := p.isEmpty(data[i:]); j > 0 { | |
| 460 size := i + j | |
| 461 if doRender { | |
| 462 // trim newlines | |
| 463 end := size | |
| 464 for end > 0 && data[end-1] == '\n' { | |
| 465 end-- | |
| 466 } | |
| 467 finalizeHTMLBlock(p.addBlock(HTMLBlock, data[:end])) | |
| 468 } | |
| 469 return size | |
| 470 } | |
| 471 } | |
| 472 return 0 | |
| 473 } | |
| 474 | |
| 475 func (p *Markdown) htmlFindTag(data []byte) (string, bool) { | |
| 476 i := 0 | |
| 477 for i < len(data) && isalnum(data[i]) { | |
| 478 i++ | |
| 479 } | |
| 480 key := string(data[:i]) | |
| 481 if _, ok := blockTags[key]; ok { | |
| 482 return key, true | |
| 483 } | |
| 484 return "", false | |
| 485 } | |
| 486 | |
| 487 func (p *Markdown) htmlFindEnd(tag string, data []byte) int { | |
| 488 // assume data[0] == '<' && data[1] == '/' already tested | |
| 489 if tag == "hr" { | |
| 490 return 2 | |
| 491 } | |
| 492 // check if tag is a match | |
| 493 closetag := []byte("</" + tag + ">") | |
| 494 if !bytes.HasPrefix(data, closetag) { | |
| 495 return 0 | |
| 496 } | |
| 497 i := len(closetag) | |
| 498 | |
| 499 // check that the rest of the line is blank | |
| 500 skip := 0 | |
| 501 if skip = p.isEmpty(data[i:]); skip == 0 { | |
| 502 return 0 | |
| 503 } | |
| 504 i += skip | |
| 505 skip = 0 | |
| 506 | |
| 507 if i >= len(data) { | |
| 508 return i | |
| 509 } | |
| 510 | |
| 511 if p.extensions&LaxHTMLBlocks != 0 { | |
| 512 return i | |
| 513 } | |
| 514 if skip = p.isEmpty(data[i:]); skip == 0 { | |
| 515 // following line must be blank | |
| 516 return 0 | |
| 517 } | |
| 518 | |
| 519 return i + skip | |
| 520 } | |
| 521 | |
| 522 func (*Markdown) isEmpty(data []byte) int { | |
| 523 // it is okay to call isEmpty on an empty buffer | |
| 524 if len(data) == 0 { | |
| 525 return 0 | |
| 526 } | |
| 527 | |
| 528 var i int | |
| 529 for i = 0; i < len(data) && data[i] != '\n'; i++ { | |
| 530 if data[i] != ' ' && data[i] != '\t' { | |
| 531 return 0 | |
| 532 } | |
| 533 } | |
| 534 if i < len(data) && data[i] == '\n' { | |
| 535 i++ | |
| 536 } | |
| 537 return i | |
| 538 } | |
| 539 | |
| 540 func (*Markdown) isHRule(data []byte) bool { | |
| 541 i := 0 | |
| 542 | |
| 543 // skip up to three spaces | |
| 544 for i < 3 && data[i] == ' ' { | |
| 545 i++ | |
| 546 } | |
| 547 | |
| 548 // look at the hrule char | |
| 549 if data[i] != '*' && data[i] != '-' && data[i] != '_' { | |
| 550 return false | |
| 551 } | |
| 552 c := data[i] | |
| 553 | |
| 554 // the whole line must be the char or whitespace | |
| 555 n := 0 | |
| 556 for i < len(data) && data[i] != '\n' { | |
| 557 switch { | |
| 558 case data[i] == c: | |
| 559 n++ | |
| 560 case data[i] != ' ': | |
| 561 return false | |
| 562 } | |
| 563 i++ | |
| 564 } | |
| 565 | |
| 566 return n >= 3 | |
| 567 } | |
| 568 | |
| 569 // isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data, | |
| 570 // and returns the end index if so, or 0 otherwise. It also returns the marker found. | |
| 571 // If info is not nil, it gets set to the syntax specified in the fence line. | |
| 572 func isFenceLine(data []byte, info *string, oldmarker string) (end int, marker string) { | |
| 573 i, size := 0, 0 | |
| 574 | |
| 575 // skip up to three spaces | |
| 576 for i < len(data) && i < 3 && data[i] == ' ' { | |
| 577 i++ | |
| 578 } | |
| 579 | |
| 580 // check for the marker characters: ~ or ` | |
| 581 if i >= len(data) { | |
| 582 return 0, "" | |
| 583 } | |
| 584 if data[i] != '~' && data[i] != '`' { | |
| 585 return 0, "" | |
| 586 } | |
| 587 | |
| 588 c := data[i] | |
| 589 | |
| 590 // the whole line must be the same char or whitespace | |
| 591 for i < len(data) && data[i] == c { | |
| 592 size++ | |
| 593 i++ | |
| 594 } | |
| 595 | |
| 596 // the marker char must occur at least 3 times | |
| 597 if size < 3 { | |
| 598 return 0, "" | |
| 599 } | |
| 600 marker = string(data[i-size : i]) | |
| 601 | |
| 602 // if this is the end marker, it must match the beginning marker | |
| 603 if oldmarker != "" && marker != oldmarker { | |
| 604 return 0, "" | |
| 605 } | |
| 606 | |
| 607 // TODO(shurcooL): It's probably a good idea to simplify the 2 code paths here | |
| 608 // into one, always get the info string, and discard it if the caller doesn't care. | |
| 609 if info != nil { | |
| 610 infoLength := 0 | |
| 611 i = skipChar(data, i, ' ') | |
| 612 | |
| 613 if i >= len(data) { | |
| 614 if i == len(data) { | |
| 615 return i, marker | |
| 616 } | |
| 617 return 0, "" | |
| 618 } | |
| 619 | |
| 620 infoStart := i | |
| 621 | |
| 622 if data[i] == '{' { | |
| 623 i++ | |
| 624 infoStart++ | |
| 625 | |
| 626 for i < len(data) && data[i] != '}' && data[i] != '\n' { | |
| 627 infoLength++ | |
| 628 i++ | |
| 629 } | |
| 630 | |
| 631 if i >= len(data) || data[i] != '}' { | |
| 632 return 0, "" | |
| 633 } | |
| 634 | |
| 635 // strip all whitespace at the beginning and the end | |
| 636 // of the {} block | |
| 637 for infoLength > 0 && isspace(data[infoStart]) { | |
| 638 infoStart++ | |
| 639 infoLength-- | |
| 640 } | |
| 641 | |
| 642 for infoLength > 0 && isspace(data[infoStart+infoLength-1]) { | |
| 643 infoLength-- | |
| 644 } | |
| 645 i++ | |
| 646 i = skipChar(data, i, ' ') | |
| 647 } else { | |
| 648 for i < len(data) && !isverticalspace(data[i]) { | |
| 649 infoLength++ | |
| 650 i++ | |
| 651 } | |
| 652 } | |
| 653 | |
| 654 *info = strings.TrimSpace(string(data[infoStart : infoStart+infoLength])) | |
| 655 } | |
| 656 | |
| 657 if i == len(data) { | |
| 658 return i, marker | |
| 659 } | |
| 660 if i > len(data) || data[i] != '\n' { | |
| 661 return 0, "" | |
| 662 } | |
| 663 return i + 1, marker // Take newline into account. | |
| 664 } | |
| 665 | |
| 666 // fencedCodeBlock returns the end index if data contains a fenced code block at the beginning, | |
| 667 // or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects. | |
| 668 // If doRender is true, a final newline is mandatory to recognize the fenced code block. | |
| 669 func (p *Markdown) fencedCodeBlock(data []byte, doRender bool) int { | |
| 670 var info string | |
| 671 beg, marker := isFenceLine(data, &info, "") | |
| 672 if beg == 0 || beg >= len(data) { | |
| 673 return 0 | |
| 674 } | |
| 675 fenceLength := beg - 1 | |
| 676 | |
| 677 var work bytes.Buffer | |
| 678 work.Write([]byte(info)) | |
| 679 work.WriteByte('\n') | |
| 680 | |
| 681 for { | |
| 682 // safe to assume beg < len(data) | |
| 683 | |
| 684 // check for the end of the code block | |
| 685 fenceEnd, _ := isFenceLine(data[beg:], nil, marker) | |
| 686 if fenceEnd != 0 { | |
| 687 beg += fenceEnd | |
| 688 break | |
| 689 } | |
| 690 | |
| 691 // copy the current line | |
| 692 end := skipUntilChar(data, beg, '\n') + 1 | |
| 693 | |
| 694 // did we reach the end of the buffer without a closing marker? | |
| 695 if end >= len(data) { | |
| 696 return 0 | |
| 697 } | |
| 698 | |
| 699 // verbatim copy to the working buffer | |
| 700 if doRender { | |
| 701 work.Write(data[beg:end]) | |
| 702 } | |
| 703 beg = end | |
| 704 } | |
| 705 | |
| 706 if doRender { | |
| 707 block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer | |
| 708 block.IsFenced = true | |
| 709 block.FenceLength = fenceLength | |
| 710 finalizeCodeBlock(block) | |
| 711 } | |
| 712 | |
| 713 return beg | |
| 714 } | |
| 715 | |
| 716 func unescapeChar(str []byte) []byte { | |
| 717 if str[0] == '\\' { | |
| 718 return []byte{str[1]} | |
| 719 } | |
| 720 return []byte(html.UnescapeString(string(str))) | |
| 721 } | |
| 722 | |
| 723 func unescapeString(str []byte) []byte { | |
| 724 if reBackslashOrAmp.Match(str) { | |
| 725 return reEntityOrEscapedChar.ReplaceAllFunc(str, unescapeChar) | |
| 726 } | |
| 727 return str | |
| 728 } | |
| 729 | |
| 730 func finalizeCodeBlock(block *Node) { | |
| 731 if block.IsFenced { | |
| 732 newlinePos := bytes.IndexByte(block.content, '\n') | |
| 733 firstLine := block.content[:newlinePos] | |
| 734 rest := block.content[newlinePos+1:] | |
| 735 block.Info = unescapeString(bytes.Trim(firstLine, "\n")) | |
| 736 block.Literal = rest | |
| 737 } else { | |
| 738 block.Literal = block.content | |
| 739 } | |
| 740 block.content = nil | |
| 741 } | |
| 742 | |
| 743 func (p *Markdown) table(data []byte) int { | |
| 744 table := p.addBlock(Table, nil) | |
| 745 i, columns := p.tableHeader(data) | |
| 746 if i == 0 { | |
| 747 p.tip = table.Parent | |
| 748 table.Unlink() | |
| 749 return 0 | |
| 750 } | |
| 751 | |
| 752 p.addBlock(TableBody, nil) | |
| 753 | |
| 754 for i < len(data) { | |
| 755 pipes, rowStart := 0, i | |
| 756 for ; i < len(data) && data[i] != '\n'; i++ { | |
| 757 if data[i] == '|' { | |
| 758 pipes++ | |
| 759 } | |
| 760 } | |
| 761 | |
| 762 if pipes == 0 { | |
| 763 i = rowStart | |
| 764 break | |
| 765 } | |
| 766 | |
| 767 // include the newline in data sent to tableRow | |
| 768 if i < len(data) && data[i] == '\n' { | |
| 769 i++ | |
| 770 } | |
| 771 p.tableRow(data[rowStart:i], columns, false) | |
| 772 } | |
| 773 | |
| 774 return i | |
| 775 } | |
| 776 | |
| 777 // check if the specified position is preceded by an odd number of backslashes | |
| 778 func isBackslashEscaped(data []byte, i int) bool { | |
| 779 backslashes := 0 | |
| 780 for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' { | |
| 781 backslashes++ | |
| 782 } | |
| 783 return backslashes&1 == 1 | |
| 784 } | |
| 785 | |
| 786 func (p *Markdown) tableHeader(data []byte) (size int, columns []CellAlignFlags) { | |
| 787 i := 0 | |
| 788 colCount := 1 | |
| 789 for i = 0; i < len(data) && data[i] != '\n'; i++ { | |
| 790 if data[i] == '|' && !isBackslashEscaped(data, i) { | |
| 791 colCount++ | |
| 792 } | |
| 793 } | |
| 794 | |
| 795 // doesn't look like a table header | |
| 796 if colCount == 1 { | |
| 797 return | |
| 798 } | |
| 799 | |
| 800 // include the newline in the data sent to tableRow | |
| 801 j := i | |
| 802 if j < len(data) && data[j] == '\n' { | |
| 803 j++ | |
| 804 } | |
| 805 header := data[:j] | |
| 806 | |
| 807 // column count ignores pipes at beginning or end of line | |
| 808 if data[0] == '|' { | |
| 809 colCount-- | |
| 810 } | |
| 811 if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) { | |
| 812 colCount-- | |
| 813 } | |
| 814 | |
| 815 columns = make([]CellAlignFlags, colCount) | |
| 816 | |
| 817 // move on to the header underline | |
| 818 i++ | |
| 819 if i >= len(data) { | |
| 820 return | |
| 821 } | |
| 822 | |
| 823 if data[i] == '|' && !isBackslashEscaped(data, i) { | |
| 824 i++ | |
| 825 } | |
| 826 i = skipChar(data, i, ' ') | |
| 827 | |
| 828 // each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3 | |
| 829 // and trailing | optional on last column | |
| 830 col := 0 | |
| 831 for i < len(data) && data[i] != '\n' { | |
| 832 dashes := 0 | |
| 833 | |
| 834 if data[i] == ':' { | |
| 835 i++ | |
| 836 columns[col] |= TableAlignmentLeft | |
| 837 dashes++ | |
| 838 } | |
| 839 for i < len(data) && data[i] == '-' { | |
| 840 i++ | |
| 841 dashes++ | |
| 842 } | |
| 843 if i < len(data) && data[i] == ':' { | |
| 844 i++ | |
| 845 columns[col] |= TableAlignmentRight | |
| 846 dashes++ | |
| 847 } | |
| 848 for i < len(data) && data[i] == ' ' { | |
| 849 i++ | |
| 850 } | |
| 851 if i == len(data) { | |
| 852 return | |
| 853 } | |
| 854 // end of column test is messy | |
| 855 switch { | |
| 856 case dashes < 3: | |
| 857 // not a valid column | |
| 858 return | |
| 859 | |
| 860 case data[i] == '|' && !isBackslashEscaped(data, i): | |
| 861 // marker found, now skip past trailing whitespace | |
| 862 col++ | |
| 863 i++ | |
| 864 for i < len(data) && data[i] == ' ' { | |
| 865 i++ | |
| 866 } | |
| 867 | |
| 868 // trailing junk found after last column | |
| 869 if col >= colCount && i < len(data) && data[i] != '\n' { | |
| 870 return | |
| 871 } | |
| 872 | |
| 873 case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount: | |
| 874 // something else found where marker was required | |
| 875 return | |
| 876 | |
| 877 case data[i] == '\n': | |
| 878 // marker is optional for the last column | |
| 879 col++ | |
| 880 | |
| 881 default: | |
| 882 // trailing junk found after last column | |
| 883 return | |
| 884 } | |
| 885 } | |
| 886 if col != colCount { | |
| 887 return | |
| 888 } | |
| 889 | |
| 890 p.addBlock(TableHead, nil) | |
| 891 p.tableRow(header, columns, true) | |
| 892 size = i | |
| 893 if size < len(data) && data[size] == '\n' { | |
| 894 size++ | |
| 895 } | |
| 896 return | |
| 897 } | |
| 898 | |
| 899 func (p *Markdown) tableRow(data []byte, columns []CellAlignFlags, header bool) { | |
| 900 p.addBlock(TableRow, nil) | |
| 901 i, col := 0, 0 | |
| 902 | |
| 903 if data[i] == '|' && !isBackslashEscaped(data, i) { | |
| 904 i++ | |
| 905 } | |
| 906 | |
| 907 for col = 0; col < len(columns) && i < len(data); col++ { | |
| 908 for i < len(data) && data[i] == ' ' { | |
| 909 i++ | |
| 910 } | |
| 911 | |
| 912 cellStart := i | |
| 913 | |
| 914 for i < len(data) && (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' { | |
| 915 i++ | |
| 916 } | |
| 917 | |
| 918 cellEnd := i | |
| 919 | |
| 920 // skip the end-of-cell marker, possibly taking us past end of buffer | |
| 921 i++ | |
| 922 | |
| 923 for cellEnd > cellStart && cellEnd-1 < len(data) && data[cellEnd-1] == ' ' { | |
| 924 cellEnd-- | |
| 925 } | |
| 926 | |
| 927 cell := p.addBlock(TableCell, data[cellStart:cellEnd]) | |
| 928 cell.IsHeader = header | |
| 929 cell.Align = columns[col] | |
| 930 } | |
| 931 | |
| 932 // pad it out with empty columns to get the right number | |
| 933 for ; col < len(columns); col++ { | |
| 934 cell := p.addBlock(TableCell, nil) | |
| 935 cell.IsHeader = header | |
| 936 cell.Align = columns[col] | |
| 937 } | |
| 938 | |
| 939 // silently ignore rows with too many cells | |
| 940 } | |
| 941 | |
| 942 // returns blockquote prefix length | |
| 943 func (p *Markdown) quotePrefix(data []byte) int { | |
| 944 i := 0 | |
| 945 for i < 3 && i < len(data) && data[i] == ' ' { | |
| 946 i++ | |
| 947 } | |
| 948 if i < len(data) && data[i] == '>' { | |
| 949 if i+1 < len(data) && data[i+1] == ' ' { | |
| 950 return i + 2 | |
| 951 } | |
| 952 return i + 1 | |
| 953 } | |
| 954 return 0 | |
| 955 } | |
| 956 | |
| 957 // blockquote ends with at least one blank line | |
| 958 // followed by something without a blockquote prefix | |
| 959 func (p *Markdown) terminateBlockquote(data []byte, beg, end int) bool { | |
| 960 if p.isEmpty(data[beg:]) <= 0 { | |
| 961 return false | |
| 962 } | |
| 963 if end >= len(data) { | |
| 964 return true | |
| 965 } | |
| 966 return p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0 | |
| 967 } | |
| 968 | |
| 969 // parse a blockquote fragment | |
| 970 func (p *Markdown) quote(data []byte) int { | |
| 971 block := p.addBlock(BlockQuote, nil) | |
| 972 var raw bytes.Buffer | |
| 973 beg, end := 0, 0 | |
| 974 for beg < len(data) { | |
| 975 end = beg | |
| 976 // Step over whole lines, collecting them. While doing that, check for | |
| 977 // fenced code and if one's found, incorporate it altogether, | |
| 978 // irregardless of any contents inside it | |
| 979 for end < len(data) && data[end] != '\n' { | |
| 980 if p.extensions&FencedCode != 0 { | |
| 981 if i := p.fencedCodeBlock(data[end:], false); i > 0 { | |
| 982 // -1 to compensate for the extra end++ after the loop: | |
| 983 end += i - 1 | |
| 984 break | |
| 985 } | |
| 986 } | |
| 987 end++ | |
| 988 } | |
| 989 if end < len(data) && data[end] == '\n' { | |
| 990 end++ | |
| 991 } | |
| 992 if pre := p.quotePrefix(data[beg:]); pre > 0 { | |
| 993 // skip the prefix | |
| 994 beg += pre | |
| 995 } else if p.terminateBlockquote(data, beg, end) { | |
| 996 break | |
| 997 } | |
| 998 // this line is part of the blockquote | |
| 999 raw.Write(data[beg:end]) | |
| 1000 beg = end | |
| 1001 } | |
| 1002 p.block(raw.Bytes()) | |
| 1003 p.finalize(block) | |
| 1004 return end | |
| 1005 } | |
| 1006 | |
| 1007 // returns prefix length for block code | |
| 1008 func (p *Markdown) codePrefix(data []byte) int { | |
| 1009 if len(data) >= 1 && data[0] == '\t' { | |
| 1010 return 1 | |
| 1011 } | |
| 1012 if len(data) >= 4 && data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' { | |
| 1013 return 4 | |
| 1014 } | |
| 1015 return 0 | |
| 1016 } | |
| 1017 | |
| 1018 func (p *Markdown) code(data []byte) int { | |
| 1019 var work bytes.Buffer | |
| 1020 | |
| 1021 i := 0 | |
| 1022 for i < len(data) { | |
| 1023 beg := i | |
| 1024 for i < len(data) && data[i] != '\n' { | |
| 1025 i++ | |
| 1026 } | |
| 1027 if i < len(data) && data[i] == '\n' { | |
| 1028 i++ | |
| 1029 } | |
| 1030 | |
| 1031 blankline := p.isEmpty(data[beg:i]) > 0 | |
| 1032 if pre := p.codePrefix(data[beg:i]); pre > 0 { | |
| 1033 beg += pre | |
| 1034 } else if !blankline { | |
| 1035 // non-empty, non-prefixed line breaks the pre | |
| 1036 i = beg | |
| 1037 break | |
| 1038 } | |
| 1039 | |
| 1040 // verbatim copy to the working buffer | |
| 1041 if blankline { | |
| 1042 work.WriteByte('\n') | |
| 1043 } else { | |
| 1044 work.Write(data[beg:i]) | |
| 1045 } | |
| 1046 } | |
| 1047 | |
| 1048 // trim all the \n off the end of work | |
| 1049 workbytes := work.Bytes() | |
| 1050 eol := len(workbytes) | |
| 1051 for eol > 0 && workbytes[eol-1] == '\n' { | |
| 1052 eol-- | |
| 1053 } | |
| 1054 if eol != len(workbytes) { | |
| 1055 work.Truncate(eol) | |
| 1056 } | |
| 1057 | |
| 1058 work.WriteByte('\n') | |
| 1059 | |
| 1060 block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer | |
| 1061 block.IsFenced = false | |
| 1062 finalizeCodeBlock(block) | |
| 1063 | |
| 1064 return i | |
| 1065 } | |
| 1066 | |
| 1067 // returns unordered list item prefix | |
| 1068 func (p *Markdown) uliPrefix(data []byte) int { | |
| 1069 i := 0 | |
| 1070 // start with up to 3 spaces | |
| 1071 for i < len(data) && i < 3 && data[i] == ' ' { | |
| 1072 i++ | |
| 1073 } | |
| 1074 if i >= len(data)-1 { | |
| 1075 return 0 | |
| 1076 } | |
| 1077 // need one of {'*', '+', '-'} followed by a space or a tab | |
| 1078 if (data[i] != '*' && data[i] != '+' && data[i] != '-') || | |
| 1079 (data[i+1] != ' ' && data[i+1] != '\t') { | |
| 1080 return 0 | |
| 1081 } | |
| 1082 return i + 2 | |
| 1083 } | |
| 1084 | |
| 1085 // returns ordered list item prefix | |
| 1086 func (p *Markdown) oliPrefix(data []byte) int { | |
| 1087 i := 0 | |
| 1088 | |
| 1089 // start with up to 3 spaces | |
| 1090 for i < 3 && i < len(data) && data[i] == ' ' { | |
| 1091 i++ | |
| 1092 } | |
| 1093 | |
| 1094 // count the digits | |
| 1095 start := i | |
| 1096 for i < len(data) && data[i] >= '0' && data[i] <= '9' { | |
| 1097 i++ | |
| 1098 } | |
| 1099 if start == i || i >= len(data)-1 { | |
| 1100 return 0 | |
| 1101 } | |
| 1102 | |
| 1103 // we need >= 1 digits followed by a dot and a space or a tab | |
| 1104 if data[i] != '.' || !(data[i+1] == ' ' || data[i+1] == '\t') { | |
| 1105 return 0 | |
| 1106 } | |
| 1107 return i + 2 | |
| 1108 } | |
| 1109 | |
| 1110 // returns definition list item prefix | |
| 1111 func (p *Markdown) dliPrefix(data []byte) int { | |
| 1112 if len(data) < 2 { | |
| 1113 return 0 | |
| 1114 } | |
| 1115 i := 0 | |
| 1116 // need a ':' followed by a space or a tab | |
| 1117 if data[i] != ':' || !(data[i+1] == ' ' || data[i+1] == '\t') { | |
| 1118 return 0 | |
| 1119 } | |
| 1120 for i < len(data) && data[i] == ' ' { | |
| 1121 i++ | |
| 1122 } | |
| 1123 return i + 2 | |
| 1124 } | |
| 1125 | |
| 1126 // parse ordered or unordered list block | |
| 1127 func (p *Markdown) list(data []byte, flags ListType) int { | |
| 1128 i := 0 | |
| 1129 flags |= ListItemBeginningOfList | |
| 1130 block := p.addBlock(List, nil) | |
| 1131 block.ListFlags = flags | |
| 1132 block.Tight = true | |
| 1133 | |
| 1134 for i < len(data) { | |
| 1135 skip := p.listItem(data[i:], &flags) | |
| 1136 if flags&ListItemContainsBlock != 0 { | |
| 1137 block.ListData.Tight = false | |
| 1138 } | |
| 1139 i += skip | |
| 1140 if skip == 0 || flags&ListItemEndOfList != 0 { | |
| 1141 break | |
| 1142 } | |
| 1143 flags &= ^ListItemBeginningOfList | |
| 1144 } | |
| 1145 | |
| 1146 above := block.Parent | |
| 1147 finalizeList(block) | |
| 1148 p.tip = above | |
| 1149 return i | |
| 1150 } | |
| 1151 | |
| 1152 // Returns true if the list item is not the same type as its parent list | |
| 1153 func (p *Markdown) listTypeChanged(data []byte, flags *ListType) bool { | |
| 1154 if p.dliPrefix(data) > 0 && *flags&ListTypeDefinition == 0 { | |
| 1155 return true | |
| 1156 } else if p.oliPrefix(data) > 0 && *flags&ListTypeOrdered == 0 { | |
| 1157 return true | |
| 1158 } else if p.uliPrefix(data) > 0 && (*flags&ListTypeOrdered != 0 || *flags&ListTypeDefinition != 0) { | |
| 1159 return true | |
| 1160 } | |
| 1161 return false | |
| 1162 } | |
| 1163 | |
| 1164 // Returns true if block ends with a blank line, descending if needed | |
| 1165 // into lists and sublists. | |
| 1166 func endsWithBlankLine(block *Node) bool { | |
| 1167 // TODO: figure this out. Always false now. | |
| 1168 for block != nil { | |
| 1169 //if block.lastLineBlank { | |
| 1170 //return true | |
| 1171 //} | |
| 1172 t := block.Type | |
| 1173 if t == List || t == Item { | |
| 1174 block = block.LastChild | |
| 1175 } else { | |
| 1176 break | |
| 1177 } | |
| 1178 } | |
| 1179 return false | |
| 1180 } | |
| 1181 | |
| 1182 func finalizeList(block *Node) { | |
| 1183 block.open = false | |
| 1184 item := block.FirstChild | |
| 1185 for item != nil { | |
| 1186 // check for non-final list item ending with blank line: | |
| 1187 if endsWithBlankLine(item) && item.Next != nil { | |
| 1188 block.ListData.Tight = false | |
| 1189 break | |
| 1190 } | |
| 1191 // recurse into children of list item, to see if there are spaces | |
| 1192 // between any of them: | |
| 1193 subItem := item.FirstChild | |
| 1194 for subItem != nil { | |
| 1195 if endsWithBlankLine(subItem) && (item.Next != nil || subItem.Next != nil) { | |
| 1196 block.ListData.Tight = false | |
| 1197 break | |
| 1198 } | |
| 1199 subItem = subItem.Next | |
| 1200 } | |
| 1201 item = item.Next | |
| 1202 } | |
| 1203 } | |
| 1204 | |
| 1205 // Parse a single list item. | |
| 1206 // Assumes initial prefix is already removed if this is a sublist. | |
| 1207 func (p *Markdown) listItem(data []byte, flags *ListType) int { | |
| 1208 // keep track of the indentation of the first line | |
| 1209 itemIndent := 0 | |
| 1210 if data[0] == '\t' { | |
| 1211 itemIndent += 4 | |
| 1212 } else { | |
| 1213 for itemIndent < 3 && data[itemIndent] == ' ' { | |
| 1214 itemIndent++ | |
| 1215 } | |
| 1216 } | |
| 1217 | |
| 1218 var bulletChar byte = '*' | |
| 1219 i := p.uliPrefix(data) | |
| 1220 if i == 0 { | |
| 1221 i = p.oliPrefix(data) | |
| 1222 } else { | |
| 1223 bulletChar = data[i-2] | |
| 1224 } | |
| 1225 if i == 0 { | |
| 1226 i = p.dliPrefix(data) | |
| 1227 // reset definition term flag | |
| 1228 if i > 0 { | |
| 1229 *flags &= ^ListTypeTerm | |
| 1230 } | |
| 1231 } | |
| 1232 if i == 0 { | |
| 1233 // if in definition list, set term flag and continue | |
| 1234 if *flags&ListTypeDefinition != 0 { | |
| 1235 *flags |= ListTypeTerm | |
| 1236 } else { | |
| 1237 return 0 | |
| 1238 } | |
| 1239 } | |
| 1240 | |
| 1241 // skip leading whitespace on first line | |
| 1242 for i < len(data) && data[i] == ' ' { | |
| 1243 i++ | |
| 1244 } | |
| 1245 | |
| 1246 // find the end of the line | |
| 1247 line := i | |
| 1248 for i > 0 && i < len(data) && data[i-1] != '\n' { | |
| 1249 i++ | |
| 1250 } | |
| 1251 | |
| 1252 // get working buffer | |
| 1253 var raw bytes.Buffer | |
| 1254 | |
| 1255 // put the first line into the working buffer | |
| 1256 raw.Write(data[line:i]) | |
| 1257 line = i | |
| 1258 | |
| 1259 // process the following lines | |
| 1260 containsBlankLine := false | |
| 1261 sublist := 0 | |
| 1262 codeBlockMarker := "" | |
| 1263 | |
| 1264 gatherlines: | |
| 1265 for line < len(data) { | |
| 1266 i++ | |
| 1267 | |
| 1268 // find the end of this line | |
| 1269 for i < len(data) && data[i-1] != '\n' { | |
| 1270 i++ | |
| 1271 } | |
| 1272 | |
| 1273 // if it is an empty line, guess that it is part of this item | |
| 1274 // and move on to the next line | |
| 1275 if p.isEmpty(data[line:i]) > 0 { | |
| 1276 containsBlankLine = true | |
| 1277 line = i | |
| 1278 continue | |
| 1279 } | |
| 1280 | |
| 1281 // calculate the indentation | |
| 1282 indent := 0 | |
| 1283 indentIndex := 0 | |
| 1284 if data[line] == '\t' { | |
| 1285 indentIndex++ | |
| 1286 indent += 4 | |
| 1287 } else { | |
| 1288 for indent < 4 && line+indent < i && data[line+indent] == ' ' { | |
| 1289 indent++ | |
| 1290 indentIndex++ | |
| 1291 } | |
| 1292 } | |
| 1293 | |
| 1294 chunk := data[line+indentIndex : i] | |
| 1295 | |
| 1296 if p.extensions&FencedCode != 0 { | |
| 1297 // determine if in or out of codeblock | |
| 1298 // if in codeblock, ignore normal list processing | |
| 1299 _, marker := isFenceLine(chunk, nil, codeBlockMarker) | |
| 1300 if marker != "" { | |
| 1301 if codeBlockMarker == "" { | |
| 1302 // start of codeblock | |
| 1303 codeBlockMarker = marker | |
| 1304 } else { | |
| 1305 // end of codeblock. | |
| 1306 codeBlockMarker = "" | |
| 1307 } | |
| 1308 } | |
| 1309 // we are in a codeblock, write line, and continue | |
| 1310 if codeBlockMarker != "" || marker != "" { | |
| 1311 raw.Write(data[line+indentIndex : i]) | |
| 1312 line = i | |
| 1313 continue gatherlines | |
| 1314 } | |
| 1315 } | |
| 1316 | |
| 1317 // evaluate how this line fits in | |
| 1318 switch { | |
| 1319 // is this a nested list item? | |
| 1320 case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) || | |
| 1321 p.oliPrefix(chunk) > 0 || | |
| 1322 p.dliPrefix(chunk) > 0: | |
| 1323 | |
| 1324 // to be a nested list, it must be indented more | |
| 1325 // if not, it is either a different kind of list | |
| 1326 // or the next item in the same list | |
| 1327 if indent <= itemIndent { | |
| 1328 if p.listTypeChanged(chunk, flags) { | |
| 1329 *flags |= ListItemEndOfList | |
| 1330 } else if containsBlankLine { | |
| 1331 *flags |= ListItemContainsBlock | |
| 1332 } | |
| 1333 | |
| 1334 break gatherlines | |
| 1335 } | |
| 1336 | |
| 1337 if containsBlankLine { | |
| 1338 *flags |= ListItemContainsBlock | |
| 1339 } | |
| 1340 | |
| 1341 // is this the first item in the nested list? | |
| 1342 if sublist == 0 { | |
| 1343 sublist = raw.Len() | |
| 1344 } | |
| 1345 | |
| 1346 // is this a nested prefix heading? | |
| 1347 case p.isPrefixHeading(chunk): | |
| 1348 // if the heading is not indented, it is not nested in the list | |
| 1349 // and thus ends the list | |
| 1350 if containsBlankLine && indent < 4 { | |
| 1351 *flags |= ListItemEndOfList | |
| 1352 break gatherlines | |
| 1353 } | |
| 1354 *flags |= ListItemContainsBlock | |
| 1355 | |
| 1356 // anything following an empty line is only part | |
| 1357 // of this item if it is indented 4 spaces | |
| 1358 // (regardless of the indentation of the beginning of the item) | |
| 1359 case containsBlankLine && indent < 4: | |
| 1360 if *flags&ListTypeDefinition != 0 && i < len(data)-1 { | |
| 1361 // is the next item still a part of this list? | |
| 1362 next := i | |
| 1363 for next < len(data) && data[next] != '\n' { | |
| 1364 next++ | |
| 1365 } | |
| 1366 for next < len(data)-1 && data[next] == '\n' { | |
| 1367 next++ | |
| 1368 } | |
| 1369 if i < len(data)-1 && data[i] != ':' && data[next] != ':' { | |
| 1370 *flags |= ListItemEndOfList | |
| 1371 } | |
| 1372 } else { | |
| 1373 *flags |= ListItemEndOfList | |
| 1374 } | |
| 1375 break gatherlines | |
| 1376 | |
| 1377 // a blank line means this should be parsed as a block | |
| 1378 case containsBlankLine: | |
| 1379 raw.WriteByte('\n') | |
| 1380 *flags |= ListItemContainsBlock | |
| 1381 } | |
| 1382 | |
| 1383 // if this line was preceded by one or more blanks, | |
| 1384 // re-introduce the blank into the buffer | |
| 1385 if containsBlankLine { | |
| 1386 containsBlankLine = false | |
| 1387 raw.WriteByte('\n') | |
| 1388 } | |
| 1389 | |
| 1390 // add the line into the working buffer without prefix | |
| 1391 raw.Write(data[line+indentIndex : i]) | |
| 1392 | |
| 1393 line = i | |
| 1394 } | |
| 1395 | |
| 1396 rawBytes := raw.Bytes() | |
| 1397 | |
| 1398 block := p.addBlock(Item, nil) | |
| 1399 block.ListFlags = *flags | |
| 1400 block.Tight = false | |
| 1401 block.BulletChar = bulletChar | |
| 1402 block.Delimiter = '.' // Only '.' is possible in Markdown, but ')' will also be possible in CommonMark | |
| 1403 | |
| 1404 // render the contents of the list item | |
| 1405 if *flags&ListItemContainsBlock != 0 && *flags&ListTypeTerm == 0 { | |
| 1406 // intermediate render of block item, except for definition term | |
| 1407 if sublist > 0 { | |
| 1408 p.block(rawBytes[:sublist]) | |
| 1409 p.block(rawBytes[sublist:]) | |
| 1410 } else { | |
| 1411 p.block(rawBytes) | |
| 1412 } | |
| 1413 } else { | |
| 1414 // intermediate render of inline item | |
| 1415 if sublist > 0 { | |
| 1416 child := p.addChild(Paragraph, 0) | |
| 1417 child.content = rawBytes[:sublist] | |
| 1418 p.block(rawBytes[sublist:]) | |
| 1419 } else { | |
| 1420 child := p.addChild(Paragraph, 0) | |
| 1421 child.content = rawBytes | |
| 1422 } | |
| 1423 } | |
| 1424 return line | |
| 1425 } | |
| 1426 | |
| 1427 // render a single paragraph that has already been parsed out | |
| 1428 func (p *Markdown) renderParagraph(data []byte) { | |
| 1429 if len(data) == 0 { | |
| 1430 return | |
| 1431 } | |
| 1432 | |
| 1433 // trim leading spaces | |
| 1434 beg := 0 | |
| 1435 for data[beg] == ' ' { | |
| 1436 beg++ | |
| 1437 } | |
| 1438 | |
| 1439 end := len(data) | |
| 1440 // trim trailing newline | |
| 1441 if data[len(data)-1] == '\n' { | |
| 1442 end-- | |
| 1443 } | |
| 1444 | |
| 1445 // trim trailing spaces | |
| 1446 for end > beg && data[end-1] == ' ' { | |
| 1447 end-- | |
| 1448 } | |
| 1449 | |
| 1450 p.addBlock(Paragraph, data[beg:end]) | |
| 1451 } | |
| 1452 | |
| 1453 func (p *Markdown) paragraph(data []byte) int { | |
| 1454 // prev: index of 1st char of previous line | |
| 1455 // line: index of 1st char of current line | |
| 1456 // i: index of cursor/end of current line | |
| 1457 var prev, line, i int | |
| 1458 tabSize := TabSizeDefault | |
| 1459 if p.extensions&TabSizeEight != 0 { | |
| 1460 tabSize = TabSizeDouble | |
| 1461 } | |
| 1462 // keep going until we find something to mark the end of the paragraph | |
| 1463 for i < len(data) { | |
| 1464 // mark the beginning of the current line | |
| 1465 prev = line | |
| 1466 current := data[i:] | |
| 1467 line = i | |
| 1468 | |
| 1469 // did we find a reference or a footnote? If so, end a paragraph | |
| 1470 // preceding it and report that we have consumed up to the end of that | |
| 1471 // reference: | |
| 1472 if refEnd := isReference(p, current, tabSize); refEnd > 0 { | |
| 1473 p.renderParagraph(data[:i]) | |
| 1474 return i + refEnd | |
| 1475 } | |
| 1476 | |
| 1477 // did we find a blank line marking the end of the paragraph? | |
| 1478 if n := p.isEmpty(current); n > 0 { | |
| 1479 // did this blank line followed by a definition list item? | |
| 1480 if p.extensions&DefinitionLists != 0 { | |
| 1481 if i < len(data)-1 && data[i+1] == ':' { | |
| 1482 return p.list(data[prev:], ListTypeDefinition) | |
| 1483 } | |
| 1484 } | |
| 1485 | |
| 1486 p.renderParagraph(data[:i]) | |
| 1487 return i + n | |
| 1488 } | |
| 1489 | |
| 1490 // an underline under some text marks a heading, so our paragraph ended on prev line | |
| 1491 if i > 0 { | |
| 1492 if level := p.isUnderlinedHeading(current); level > 0 { | |
| 1493 // render the paragraph | |
| 1494 p.renderParagraph(data[:prev]) | |
| 1495 | |
| 1496 // ignore leading and trailing whitespace | |
| 1497 eol := i - 1 | |
| 1498 for prev < eol && data[prev] == ' ' { | |
| 1499 prev++ | |
| 1500 } | |
| 1501 for eol > prev && data[eol-1] == ' ' { | |
| 1502 eol-- | |
| 1503 } | |
| 1504 | |
| 1505 id := "" | |
| 1506 if p.extensions&AutoHeadingIDs != 0 { | |
| 1507 id = SanitizedAnchorName(string(data[prev:eol])) | |
| 1508 } | |
| 1509 | |
| 1510 block := p.addBlock(Heading, data[prev:eol]) | |
| 1511 block.Level = level | |
| 1512 block.HeadingID = id | |
| 1513 | |
| 1514 // find the end of the underline | |
| 1515 for i < len(data) && data[i] != '\n' { | |
| 1516 i++ | |
| 1517 } | |
| 1518 return i | |
| 1519 } | |
| 1520 } | |
| 1521 | |
| 1522 // if the next line starts a block of HTML, then the paragraph ends here | |
| 1523 if p.extensions&LaxHTMLBlocks != 0 { | |
| 1524 if data[i] == '<' && p.html(current, false) > 0 { | |
| 1525 // rewind to before the HTML block | |
| 1526 p.renderParagraph(data[:i]) | |
| 1527 return i | |
| 1528 } | |
| 1529 } | |
| 1530 | |
| 1531 // if there's a prefixed heading or a horizontal rule after this, paragraph is over | |
| 1532 if p.isPrefixHeading(current) || p.isHRule(current) { | |
| 1533 p.renderParagraph(data[:i]) | |
| 1534 return i | |
| 1535 } | |
| 1536 | |
| 1537 // if there's a fenced code block, paragraph is over | |
| 1538 if p.extensions&FencedCode != 0 { | |
| 1539 if p.fencedCodeBlock(current, false) > 0 { | |
| 1540 p.renderParagraph(data[:i]) | |
| 1541 return i | |
| 1542 } | |
| 1543 } | |
| 1544 | |
| 1545 // if there's a definition list item, prev line is a definition term | |
| 1546 if p.extensions&DefinitionLists != 0 { | |
| 1547 if p.dliPrefix(current) != 0 { | |
| 1548 ret := p.list(data[prev:], ListTypeDefinition) | |
| 1549 return ret | |
| 1550 } | |
| 1551 } | |
| 1552 | |
| 1553 // if there's a list after this, paragraph is over | |
| 1554 if p.extensions&NoEmptyLineBeforeBlock != 0 { | |
| 1555 if p.uliPrefix(current) != 0 || | |
| 1556 p.oliPrefix(current) != 0 || | |
| 1557 p.quotePrefix(current) != 0 || | |
| 1558 p.codePrefix(current) != 0 { | |
| 1559 p.renderParagraph(data[:i]) | |
| 1560 return i | |
| 1561 } | |
| 1562 } | |
| 1563 | |
| 1564 // otherwise, scan to the beginning of the next line | |
| 1565 nl := bytes.IndexByte(data[i:], '\n') | |
| 1566 if nl >= 0 { | |
| 1567 i += nl + 1 | |
| 1568 } else { | |
| 1569 i += len(data[i:]) | |
| 1570 } | |
| 1571 } | |
| 1572 | |
| 1573 p.renderParagraph(data[:i]) | |
| 1574 return i | |
| 1575 } | |
| 1576 | |
| 1577 func skipChar(data []byte, start int, char byte) int { | |
| 1578 i := start | |
| 1579 for i < len(data) && data[i] == char { | |
| 1580 i++ | |
| 1581 } | |
| 1582 return i | |
| 1583 } | |
| 1584 | |
| 1585 func skipUntilChar(text []byte, start int, char byte) int { | |
| 1586 i := start | |
| 1587 for i < len(text) && text[i] != char { | |
| 1588 i++ | |
| 1589 } | |
| 1590 return i | |
| 1591 } | |
| 1592 | |
| 1593 // SanitizedAnchorName returns a sanitized anchor name for the given text. | |
| 1594 // | |
| 1595 // It implements the algorithm specified in the package comment. | |
| 1596 func SanitizedAnchorName(text string) string { | |
| 1597 var anchorName []rune | |
| 1598 futureDash := false | |
| 1599 for _, r := range text { | |
| 1600 switch { | |
| 1601 case unicode.IsLetter(r) || unicode.IsNumber(r): | |
| 1602 if futureDash && len(anchorName) > 0 { | |
| 1603 anchorName = append(anchorName, '-') | |
| 1604 } | |
| 1605 futureDash = false | |
| 1606 anchorName = append(anchorName, unicode.ToLower(r)) | |
| 1607 default: | |
| 1608 futureDash = true | |
| 1609 } | |
| 1610 } | |
| 1611 return string(anchorName) | |
| 1612 } |
