Mercurial > yakumo_izuru > aya
comparison vendor/github.com/russross/blackfriday/v2/markdown.go @ 66:787b5ee0289d draft
Use vendored modules
Signed-off-by: Izuru Yakumo <yakumo.izuru@chaotic.ninja>
| author | yakumo.izuru |
|---|---|
| date | Sun, 23 Jul 2023 13:18:53 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 65:6d985efa0f7a | 66:787b5ee0289d |
|---|---|
| 1 // Blackfriday Markdown Processor | |
| 2 // Available at http://github.com/russross/blackfriday | |
| 3 // | |
| 4 // Copyright © 2011 Russ Ross <russ@russross.com>. | |
| 5 // Distributed under the Simplified BSD License. | |
| 6 // See README.md for details. | |
| 7 | |
| 8 package blackfriday | |
| 9 | |
| 10 import ( | |
| 11 "bytes" | |
| 12 "fmt" | |
| 13 "io" | |
| 14 "strings" | |
| 15 "unicode/utf8" | |
| 16 ) | |
| 17 | |
| 18 // | |
| 19 // Markdown parsing and processing | |
| 20 // | |
| 21 | |
| 22 // Version string of the package. Appears in the rendered document when | |
| 23 // CompletePage flag is on. | |
| 24 const Version = "2.0" | |
| 25 | |
| 26 // Extensions is a bitwise or'ed collection of enabled Blackfriday's | |
| 27 // extensions. | |
| 28 type Extensions int | |
| 29 | |
| 30 // These are the supported markdown parsing extensions. | |
| 31 // OR these values together to select multiple extensions. | |
| 32 const ( | |
| 33 NoExtensions Extensions = 0 | |
| 34 NoIntraEmphasis Extensions = 1 << iota // Ignore emphasis markers inside words | |
| 35 Tables // Render tables | |
| 36 FencedCode // Render fenced code blocks | |
| 37 Autolink // Detect embedded URLs that are not explicitly marked | |
| 38 Strikethrough // Strikethrough text using ~~test~~ | |
| 39 LaxHTMLBlocks // Loosen up HTML block parsing rules | |
| 40 SpaceHeadings // Be strict about prefix heading rules | |
| 41 HardLineBreak // Translate newlines into line breaks | |
| 42 TabSizeEight // Expand tabs to eight spaces instead of four | |
| 43 Footnotes // Pandoc-style footnotes | |
| 44 NoEmptyLineBeforeBlock // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block | |
| 45 HeadingIDs // specify heading IDs with {#id} | |
| 46 Titleblock // Titleblock ala pandoc | |
| 47 AutoHeadingIDs // Create the heading ID from the text | |
| 48 BackslashLineBreak // Translate trailing backslashes into line breaks | |
| 49 DefinitionLists // Render definition lists | |
| 50 | |
| 51 CommonHTMLFlags HTMLFlags = UseXHTML | Smartypants | | |
| 52 SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes | |
| 53 | |
| 54 CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode | | |
| 55 Autolink | Strikethrough | SpaceHeadings | HeadingIDs | | |
| 56 BackslashLineBreak | DefinitionLists | |
| 57 ) | |
| 58 | |
| 59 // ListType contains bitwise or'ed flags for list and list item objects. | |
| 60 type ListType int | |
| 61 | |
| 62 // These are the possible flag values for the ListItem renderer. | |
| 63 // Multiple flag values may be ORed together. | |
| 64 // These are mostly of interest if you are writing a new output format. | |
| 65 const ( | |
| 66 ListTypeOrdered ListType = 1 << iota | |
| 67 ListTypeDefinition | |
| 68 ListTypeTerm | |
| 69 | |
| 70 ListItemContainsBlock | |
| 71 ListItemBeginningOfList // TODO: figure out if this is of any use now | |
| 72 ListItemEndOfList | |
| 73 ) | |
| 74 | |
| 75 // CellAlignFlags holds a type of alignment in a table cell. | |
| 76 type CellAlignFlags int | |
| 77 | |
| 78 // These are the possible flag values for the table cell renderer. | |
| 79 // Only a single one of these values will be used; they are not ORed together. | |
| 80 // These are mostly of interest if you are writing a new output format. | |
| 81 const ( | |
| 82 TableAlignmentLeft CellAlignFlags = 1 << iota | |
| 83 TableAlignmentRight | |
| 84 TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight) | |
| 85 ) | |
| 86 | |
| 87 // The size of a tab stop. | |
| 88 const ( | |
| 89 TabSizeDefault = 4 | |
| 90 TabSizeDouble = 8 | |
| 91 ) | |
| 92 | |
| 93 // blockTags is a set of tags that are recognized as HTML block tags. | |
| 94 // Any of these can be included in markdown text without special escaping. | |
| 95 var blockTags = map[string]struct{}{ | |
| 96 "blockquote": {}, | |
| 97 "del": {}, | |
| 98 "div": {}, | |
| 99 "dl": {}, | |
| 100 "fieldset": {}, | |
| 101 "form": {}, | |
| 102 "h1": {}, | |
| 103 "h2": {}, | |
| 104 "h3": {}, | |
| 105 "h4": {}, | |
| 106 "h5": {}, | |
| 107 "h6": {}, | |
| 108 "iframe": {}, | |
| 109 "ins": {}, | |
| 110 "math": {}, | |
| 111 "noscript": {}, | |
| 112 "ol": {}, | |
| 113 "pre": {}, | |
| 114 "p": {}, | |
| 115 "script": {}, | |
| 116 "style": {}, | |
| 117 "table": {}, | |
| 118 "ul": {}, | |
| 119 | |
| 120 // HTML5 | |
| 121 "address": {}, | |
| 122 "article": {}, | |
| 123 "aside": {}, | |
| 124 "canvas": {}, | |
| 125 "figcaption": {}, | |
| 126 "figure": {}, | |
| 127 "footer": {}, | |
| 128 "header": {}, | |
| 129 "hgroup": {}, | |
| 130 "main": {}, | |
| 131 "nav": {}, | |
| 132 "output": {}, | |
| 133 "progress": {}, | |
| 134 "section": {}, | |
| 135 "video": {}, | |
| 136 } | |
| 137 | |
| 138 // Renderer is the rendering interface. This is mostly of interest if you are | |
| 139 // implementing a new rendering format. | |
| 140 // | |
| 141 // Only an HTML implementation is provided in this repository, see the README | |
| 142 // for external implementations. | |
| 143 type Renderer interface { | |
| 144 // RenderNode is the main rendering method. It will be called once for | |
| 145 // every leaf node and twice for every non-leaf node (first with | |
| 146 // entering=true, then with entering=false). The method should write its | |
| 147 // rendition of the node to the supplied writer w. | |
| 148 RenderNode(w io.Writer, node *Node, entering bool) WalkStatus | |
| 149 | |
| 150 // RenderHeader is a method that allows the renderer to produce some | |
| 151 // content preceding the main body of the output document. The header is | |
| 152 // understood in the broad sense here. For example, the default HTML | |
| 153 // renderer will write not only the HTML document preamble, but also the | |
| 154 // table of contents if it was requested. | |
| 155 // | |
| 156 // The method will be passed an entire document tree, in case a particular | |
| 157 // implementation needs to inspect it to produce output. | |
| 158 // | |
| 159 // The output should be written to the supplied writer w. If your | |
| 160 // implementation has no header to write, supply an empty implementation. | |
| 161 RenderHeader(w io.Writer, ast *Node) | |
| 162 | |
| 163 // RenderFooter is a symmetric counterpart of RenderHeader. | |
| 164 RenderFooter(w io.Writer, ast *Node) | |
| 165 } | |
| 166 | |
| 167 // Callback functions for inline parsing. One such function is defined | |
| 168 // for each character that triggers a response when parsing inline data. | |
| 169 type inlineParser func(p *Markdown, data []byte, offset int) (int, *Node) | |
| 170 | |
| 171 // Markdown is a type that holds extensions and the runtime state used by | |
| 172 // Parse, and the renderer. You can not use it directly, construct it with New. | |
| 173 type Markdown struct { | |
| 174 renderer Renderer | |
| 175 referenceOverride ReferenceOverrideFunc | |
| 176 refs map[string]*reference | |
| 177 inlineCallback [256]inlineParser | |
| 178 extensions Extensions | |
| 179 nesting int | |
| 180 maxNesting int | |
| 181 insideLink bool | |
| 182 | |
| 183 // Footnotes need to be ordered as well as available to quickly check for | |
| 184 // presence. If a ref is also a footnote, it's stored both in refs and here | |
| 185 // in notes. Slice is nil if footnotes not enabled. | |
| 186 notes []*reference | |
| 187 | |
| 188 doc *Node | |
| 189 tip *Node // = doc | |
| 190 oldTip *Node | |
| 191 lastMatchedContainer *Node // = doc | |
| 192 allClosed bool | |
| 193 } | |
| 194 | |
| 195 func (p *Markdown) getRef(refid string) (ref *reference, found bool) { | |
| 196 if p.referenceOverride != nil { | |
| 197 r, overridden := p.referenceOverride(refid) | |
| 198 if overridden { | |
| 199 if r == nil { | |
| 200 return nil, false | |
| 201 } | |
| 202 return &reference{ | |
| 203 link: []byte(r.Link), | |
| 204 title: []byte(r.Title), | |
| 205 noteID: 0, | |
| 206 hasBlock: false, | |
| 207 text: []byte(r.Text)}, true | |
| 208 } | |
| 209 } | |
| 210 // refs are case insensitive | |
| 211 ref, found = p.refs[strings.ToLower(refid)] | |
| 212 return ref, found | |
| 213 } | |
| 214 | |
| 215 func (p *Markdown) finalize(block *Node) { | |
| 216 above := block.Parent | |
| 217 block.open = false | |
| 218 p.tip = above | |
| 219 } | |
| 220 | |
| 221 func (p *Markdown) addChild(node NodeType, offset uint32) *Node { | |
| 222 return p.addExistingChild(NewNode(node), offset) | |
| 223 } | |
| 224 | |
| 225 func (p *Markdown) addExistingChild(node *Node, offset uint32) *Node { | |
| 226 for !p.tip.canContain(node.Type) { | |
| 227 p.finalize(p.tip) | |
| 228 } | |
| 229 p.tip.AppendChild(node) | |
| 230 p.tip = node | |
| 231 return node | |
| 232 } | |
| 233 | |
| 234 func (p *Markdown) closeUnmatchedBlocks() { | |
| 235 if !p.allClosed { | |
| 236 for p.oldTip != p.lastMatchedContainer { | |
| 237 parent := p.oldTip.Parent | |
| 238 p.finalize(p.oldTip) | |
| 239 p.oldTip = parent | |
| 240 } | |
| 241 p.allClosed = true | |
| 242 } | |
| 243 } | |
| 244 | |
| 245 // | |
| 246 // | |
| 247 // Public interface | |
| 248 // | |
| 249 // | |
| 250 | |
| 251 // Reference represents the details of a link. | |
| 252 // See the documentation in Options for more details on use-case. | |
| 253 type Reference struct { | |
| 254 // Link is usually the URL the reference points to. | |
| 255 Link string | |
| 256 // Title is the alternate text describing the link in more detail. | |
| 257 Title string | |
| 258 // Text is the optional text to override the ref with if the syntax used was | |
| 259 // [refid][] | |
| 260 Text string | |
| 261 } | |
| 262 | |
| 263 // ReferenceOverrideFunc is expected to be called with a reference string and | |
| 264 // return either a valid Reference type that the reference string maps to or | |
| 265 // nil. If overridden is false, the default reference logic will be executed. | |
| 266 // See the documentation in Options for more details on use-case. | |
| 267 type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool) | |
| 268 | |
| 269 // New constructs a Markdown processor. You can use the same With* functions as | |
| 270 // for Run() to customize parser's behavior and the renderer. | |
| 271 func New(opts ...Option) *Markdown { | |
| 272 var p Markdown | |
| 273 for _, opt := range opts { | |
| 274 opt(&p) | |
| 275 } | |
| 276 p.refs = make(map[string]*reference) | |
| 277 p.maxNesting = 16 | |
| 278 p.insideLink = false | |
| 279 docNode := NewNode(Document) | |
| 280 p.doc = docNode | |
| 281 p.tip = docNode | |
| 282 p.oldTip = docNode | |
| 283 p.lastMatchedContainer = docNode | |
| 284 p.allClosed = true | |
| 285 // register inline parsers | |
| 286 p.inlineCallback[' '] = maybeLineBreak | |
| 287 p.inlineCallback['*'] = emphasis | |
| 288 p.inlineCallback['_'] = emphasis | |
| 289 if p.extensions&Strikethrough != 0 { | |
| 290 p.inlineCallback['~'] = emphasis | |
| 291 } | |
| 292 p.inlineCallback['`'] = codeSpan | |
| 293 p.inlineCallback['\n'] = lineBreak | |
| 294 p.inlineCallback['['] = link | |
| 295 p.inlineCallback['<'] = leftAngle | |
| 296 p.inlineCallback['\\'] = escape | |
| 297 p.inlineCallback['&'] = entity | |
| 298 p.inlineCallback['!'] = maybeImage | |
| 299 p.inlineCallback['^'] = maybeInlineFootnote | |
| 300 if p.extensions&Autolink != 0 { | |
| 301 p.inlineCallback['h'] = maybeAutoLink | |
| 302 p.inlineCallback['m'] = maybeAutoLink | |
| 303 p.inlineCallback['f'] = maybeAutoLink | |
| 304 p.inlineCallback['H'] = maybeAutoLink | |
| 305 p.inlineCallback['M'] = maybeAutoLink | |
| 306 p.inlineCallback['F'] = maybeAutoLink | |
| 307 } | |
| 308 if p.extensions&Footnotes != 0 { | |
| 309 p.notes = make([]*reference, 0) | |
| 310 } | |
| 311 return &p | |
| 312 } | |
| 313 | |
| 314 // Option customizes the Markdown processor's default behavior. | |
| 315 type Option func(*Markdown) | |
| 316 | |
| 317 // WithRenderer allows you to override the default renderer. | |
| 318 func WithRenderer(r Renderer) Option { | |
| 319 return func(p *Markdown) { | |
| 320 p.renderer = r | |
| 321 } | |
| 322 } | |
| 323 | |
| 324 // WithExtensions allows you to pick some of the many extensions provided by | |
| 325 // Blackfriday. You can bitwise OR them. | |
| 326 func WithExtensions(e Extensions) Option { | |
| 327 return func(p *Markdown) { | |
| 328 p.extensions = e | |
| 329 } | |
| 330 } | |
| 331 | |
| 332 // WithNoExtensions turns off all extensions and custom behavior. | |
| 333 func WithNoExtensions() Option { | |
| 334 return func(p *Markdown) { | |
| 335 p.extensions = NoExtensions | |
| 336 p.renderer = NewHTMLRenderer(HTMLRendererParameters{ | |
| 337 Flags: HTMLFlagsNone, | |
| 338 }) | |
| 339 } | |
| 340 } | |
| 341 | |
| 342 // WithRefOverride sets an optional function callback that is called every | |
| 343 // time a reference is resolved. | |
| 344 // | |
| 345 // In Markdown, the link reference syntax can be made to resolve a link to | |
| 346 // a reference instead of an inline URL, in one of the following ways: | |
| 347 // | |
| 348 // * [link text][refid] | |
| 349 // * [refid][] | |
| 350 // | |
| 351 // Usually, the refid is defined at the bottom of the Markdown document. If | |
| 352 // this override function is provided, the refid is passed to the override | |
| 353 // function first, before consulting the defined refids at the bottom. If | |
| 354 // the override function indicates an override did not occur, the refids at | |
| 355 // the bottom will be used to fill in the link details. | |
| 356 func WithRefOverride(o ReferenceOverrideFunc) Option { | |
| 357 return func(p *Markdown) { | |
| 358 p.referenceOverride = o | |
| 359 } | |
| 360 } | |
| 361 | |
| 362 // Run is the main entry point to Blackfriday. It parses and renders a | |
| 363 // block of markdown-encoded text. | |
| 364 // | |
| 365 // The simplest invocation of Run takes one argument, input: | |
| 366 // output := Run(input) | |
| 367 // This will parse the input with CommonExtensions enabled and render it with | |
| 368 // the default HTMLRenderer (with CommonHTMLFlags). | |
| 369 // | |
| 370 // Variadic arguments opts can customize the default behavior. Since Markdown | |
| 371 // type does not contain exported fields, you can not use it directly. Instead, | |
| 372 // use the With* functions. For example, this will call the most basic | |
| 373 // functionality, with no extensions: | |
| 374 // output := Run(input, WithNoExtensions()) | |
| 375 // | |
| 376 // You can use any number of With* arguments, even contradicting ones. They | |
| 377 // will be applied in order of appearance and the latter will override the | |
| 378 // former: | |
| 379 // output := Run(input, WithNoExtensions(), WithExtensions(exts), | |
| 380 // WithRenderer(yourRenderer)) | |
| 381 func Run(input []byte, opts ...Option) []byte { | |
| 382 r := NewHTMLRenderer(HTMLRendererParameters{ | |
| 383 Flags: CommonHTMLFlags, | |
| 384 }) | |
| 385 optList := []Option{WithRenderer(r), WithExtensions(CommonExtensions)} | |
| 386 optList = append(optList, opts...) | |
| 387 parser := New(optList...) | |
| 388 ast := parser.Parse(input) | |
| 389 var buf bytes.Buffer | |
| 390 parser.renderer.RenderHeader(&buf, ast) | |
| 391 ast.Walk(func(node *Node, entering bool) WalkStatus { | |
| 392 return parser.renderer.RenderNode(&buf, node, entering) | |
| 393 }) | |
| 394 parser.renderer.RenderFooter(&buf, ast) | |
| 395 return buf.Bytes() | |
| 396 } | |
| 397 | |
| 398 // Parse is an entry point to the parsing part of Blackfriday. It takes an | |
| 399 // input markdown document and produces a syntax tree for its contents. This | |
| 400 // tree can then be rendered with a default or custom renderer, or | |
| 401 // analyzed/transformed by the caller to whatever non-standard needs they have. | |
| 402 // The return value is the root node of the syntax tree. | |
| 403 func (p *Markdown) Parse(input []byte) *Node { | |
| 404 p.block(input) | |
| 405 // Walk the tree and finish up some of unfinished blocks | |
| 406 for p.tip != nil { | |
| 407 p.finalize(p.tip) | |
| 408 } | |
| 409 // Walk the tree again and process inline markdown in each block | |
| 410 p.doc.Walk(func(node *Node, entering bool) WalkStatus { | |
| 411 if node.Type == Paragraph || node.Type == Heading || node.Type == TableCell { | |
| 412 p.inline(node, node.content) | |
| 413 node.content = nil | |
| 414 } | |
| 415 return GoToNext | |
| 416 }) | |
| 417 p.parseRefsToAST() | |
| 418 return p.doc | |
| 419 } | |
| 420 | |
| 421 func (p *Markdown) parseRefsToAST() { | |
| 422 if p.extensions&Footnotes == 0 || len(p.notes) == 0 { | |
| 423 return | |
| 424 } | |
| 425 p.tip = p.doc | |
| 426 block := p.addBlock(List, nil) | |
| 427 block.IsFootnotesList = true | |
| 428 block.ListFlags = ListTypeOrdered | |
| 429 flags := ListItemBeginningOfList | |
| 430 // Note: this loop is intentionally explicit, not range-form. This is | |
| 431 // because the body of the loop will append nested footnotes to p.notes and | |
| 432 // we need to process those late additions. Range form would only walk over | |
| 433 // the fixed initial set. | |
| 434 for i := 0; i < len(p.notes); i++ { | |
| 435 ref := p.notes[i] | |
| 436 p.addExistingChild(ref.footnote, 0) | |
| 437 block := ref.footnote | |
| 438 block.ListFlags = flags | ListTypeOrdered | |
| 439 block.RefLink = ref.link | |
| 440 if ref.hasBlock { | |
| 441 flags |= ListItemContainsBlock | |
| 442 p.block(ref.title) | |
| 443 } else { | |
| 444 p.inline(block, ref.title) | |
| 445 } | |
| 446 flags &^= ListItemBeginningOfList | ListItemContainsBlock | |
| 447 } | |
| 448 above := block.Parent | |
| 449 finalizeList(block) | |
| 450 p.tip = above | |
| 451 block.Walk(func(node *Node, entering bool) WalkStatus { | |
| 452 if node.Type == Paragraph || node.Type == Heading { | |
| 453 p.inline(node, node.content) | |
| 454 node.content = nil | |
| 455 } | |
| 456 return GoToNext | |
| 457 }) | |
| 458 } | |
| 459 | |
| 460 // | |
| 461 // Link references | |
| 462 // | |
| 463 // This section implements support for references that (usually) appear | |
| 464 // as footnotes in a document, and can be referenced anywhere in the document. | |
| 465 // The basic format is: | |
| 466 // | |
| 467 // [1]: http://www.google.com/ "Google" | |
| 468 // [2]: http://www.github.com/ "Github" | |
| 469 // | |
| 470 // Anywhere in the document, the reference can be linked by referring to its | |
| 471 // label, i.e., 1 and 2 in this example, as in: | |
| 472 // | |
| 473 // This library is hosted on [Github][2], a git hosting site. | |
| 474 // | |
| 475 // Actual footnotes as specified in Pandoc and supported by some other Markdown | |
| 476 // libraries such as php-markdown are also taken care of. They look like this: | |
| 477 // | |
| 478 // This sentence needs a bit of further explanation.[^note] | |
| 479 // | |
| 480 // [^note]: This is the explanation. | |
| 481 // | |
| 482 // Footnotes should be placed at the end of the document in an ordered list. | |
| 483 // Finally, there are inline footnotes such as: | |
| 484 // | |
| 485 // Inline footnotes^[Also supported.] provide a quick inline explanation, | |
| 486 // but are rendered at the bottom of the document. | |
| 487 // | |
| 488 | |
| 489 // reference holds all information necessary for a reference-style links or | |
| 490 // footnotes. | |
| 491 // | |
| 492 // Consider this markdown with reference-style links: | |
| 493 // | |
| 494 // [link][ref] | |
| 495 // | |
| 496 // [ref]: /url/ "tooltip title" | |
| 497 // | |
| 498 // It will be ultimately converted to this HTML: | |
| 499 // | |
| 500 // <p><a href=\"/url/\" title=\"title\">link</a></p> | |
| 501 // | |
| 502 // And a reference structure will be populated as follows: | |
| 503 // | |
| 504 // p.refs["ref"] = &reference{ | |
| 505 // link: "/url/", | |
| 506 // title: "tooltip title", | |
| 507 // } | |
| 508 // | |
| 509 // Alternatively, reference can contain information about a footnote. Consider | |
| 510 // this markdown: | |
| 511 // | |
| 512 // Text needing a footnote.[^a] | |
| 513 // | |
| 514 // [^a]: This is the note | |
| 515 // | |
| 516 // A reference structure will be populated as follows: | |
| 517 // | |
| 518 // p.refs["a"] = &reference{ | |
| 519 // link: "a", | |
| 520 // title: "This is the note", | |
| 521 // noteID: <some positive int>, | |
| 522 // } | |
| 523 // | |
| 524 // TODO: As you can see, it begs for splitting into two dedicated structures | |
| 525 // for refs and for footnotes. | |
| 526 type reference struct { | |
| 527 link []byte | |
| 528 title []byte | |
| 529 noteID int // 0 if not a footnote ref | |
| 530 hasBlock bool | |
| 531 footnote *Node // a link to the Item node within a list of footnotes | |
| 532 | |
| 533 text []byte // only gets populated by refOverride feature with Reference.Text | |
| 534 } | |
| 535 | |
| 536 func (r *reference) String() string { | |
| 537 return fmt.Sprintf("{link: %q, title: %q, text: %q, noteID: %d, hasBlock: %v}", | |
| 538 r.link, r.title, r.text, r.noteID, r.hasBlock) | |
| 539 } | |
| 540 | |
| 541 // Check whether or not data starts with a reference link. | |
| 542 // If so, it is parsed and stored in the list of references | |
| 543 // (in the render struct). | |
| 544 // Returns the number of bytes to skip to move past it, | |
| 545 // or zero if the first line is not a reference. | |
| 546 func isReference(p *Markdown, data []byte, tabSize int) int { | |
| 547 // up to 3 optional leading spaces | |
| 548 if len(data) < 4 { | |
| 549 return 0 | |
| 550 } | |
| 551 i := 0 | |
| 552 for i < 3 && data[i] == ' ' { | |
| 553 i++ | |
| 554 } | |
| 555 | |
| 556 noteID := 0 | |
| 557 | |
| 558 // id part: anything but a newline between brackets | |
| 559 if data[i] != '[' { | |
| 560 return 0 | |
| 561 } | |
| 562 i++ | |
| 563 if p.extensions&Footnotes != 0 { | |
| 564 if i < len(data) && data[i] == '^' { | |
| 565 // we can set it to anything here because the proper noteIds will | |
| 566 // be assigned later during the second pass. It just has to be != 0 | |
| 567 noteID = 1 | |
| 568 i++ | |
| 569 } | |
| 570 } | |
| 571 idOffset := i | |
| 572 for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' { | |
| 573 i++ | |
| 574 } | |
| 575 if i >= len(data) || data[i] != ']' { | |
| 576 return 0 | |
| 577 } | |
| 578 idEnd := i | |
| 579 // footnotes can have empty ID, like this: [^], but a reference can not be | |
| 580 // empty like this: []. Break early if it's not a footnote and there's no ID | |
| 581 if noteID == 0 && idOffset == idEnd { | |
| 582 return 0 | |
| 583 } | |
| 584 // spacer: colon (space | tab)* newline? (space | tab)* | |
| 585 i++ | |
| 586 if i >= len(data) || data[i] != ':' { | |
| 587 return 0 | |
| 588 } | |
| 589 i++ | |
| 590 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { | |
| 591 i++ | |
| 592 } | |
| 593 if i < len(data) && (data[i] == '\n' || data[i] == '\r') { | |
| 594 i++ | |
| 595 if i < len(data) && data[i] == '\n' && data[i-1] == '\r' { | |
| 596 i++ | |
| 597 } | |
| 598 } | |
| 599 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { | |
| 600 i++ | |
| 601 } | |
| 602 if i >= len(data) { | |
| 603 return 0 | |
| 604 } | |
| 605 | |
| 606 var ( | |
| 607 linkOffset, linkEnd int | |
| 608 titleOffset, titleEnd int | |
| 609 lineEnd int | |
| 610 raw []byte | |
| 611 hasBlock bool | |
| 612 ) | |
| 613 | |
| 614 if p.extensions&Footnotes != 0 && noteID != 0 { | |
| 615 linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize) | |
| 616 lineEnd = linkEnd | |
| 617 } else { | |
| 618 linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i) | |
| 619 } | |
| 620 if lineEnd == 0 { | |
| 621 return 0 | |
| 622 } | |
| 623 | |
| 624 // a valid ref has been found | |
| 625 | |
| 626 ref := &reference{ | |
| 627 noteID: noteID, | |
| 628 hasBlock: hasBlock, | |
| 629 } | |
| 630 | |
| 631 if noteID > 0 { | |
| 632 // reusing the link field for the id since footnotes don't have links | |
| 633 ref.link = data[idOffset:idEnd] | |
| 634 // if footnote, it's not really a title, it's the contained text | |
| 635 ref.title = raw | |
| 636 } else { | |
| 637 ref.link = data[linkOffset:linkEnd] | |
| 638 ref.title = data[titleOffset:titleEnd] | |
| 639 } | |
| 640 | |
| 641 // id matches are case-insensitive | |
| 642 id := string(bytes.ToLower(data[idOffset:idEnd])) | |
| 643 | |
| 644 p.refs[id] = ref | |
| 645 | |
| 646 return lineEnd | |
| 647 } | |
| 648 | |
| 649 func scanLinkRef(p *Markdown, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) { | |
| 650 // link: whitespace-free sequence, optionally between angle brackets | |
| 651 if data[i] == '<' { | |
| 652 i++ | |
| 653 } | |
| 654 linkOffset = i | |
| 655 for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' { | |
| 656 i++ | |
| 657 } | |
| 658 linkEnd = i | |
| 659 if data[linkOffset] == '<' && data[linkEnd-1] == '>' { | |
| 660 linkOffset++ | |
| 661 linkEnd-- | |
| 662 } | |
| 663 | |
| 664 // optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) | |
| 665 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { | |
| 666 i++ | |
| 667 } | |
| 668 if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' { | |
| 669 return | |
| 670 } | |
| 671 | |
| 672 // compute end-of-line | |
| 673 if i >= len(data) || data[i] == '\r' || data[i] == '\n' { | |
| 674 lineEnd = i | |
| 675 } | |
| 676 if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' { | |
| 677 lineEnd++ | |
| 678 } | |
| 679 | |
| 680 // optional (space|tab)* spacer after a newline | |
| 681 if lineEnd > 0 { | |
| 682 i = lineEnd + 1 | |
| 683 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { | |
| 684 i++ | |
| 685 } | |
| 686 } | |
| 687 | |
| 688 // optional title: any non-newline sequence enclosed in '"() alone on its line | |
| 689 if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') { | |
| 690 i++ | |
| 691 titleOffset = i | |
| 692 | |
| 693 // look for EOL | |
| 694 for i < len(data) && data[i] != '\n' && data[i] != '\r' { | |
| 695 i++ | |
| 696 } | |
| 697 if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' { | |
| 698 titleEnd = i + 1 | |
| 699 } else { | |
| 700 titleEnd = i | |
| 701 } | |
| 702 | |
| 703 // step back | |
| 704 i-- | |
| 705 for i > titleOffset && (data[i] == ' ' || data[i] == '\t') { | |
| 706 i-- | |
| 707 } | |
| 708 if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') { | |
| 709 lineEnd = titleEnd | |
| 710 titleEnd = i | |
| 711 } | |
| 712 } | |
| 713 | |
| 714 return | |
| 715 } | |
| 716 | |
| 717 // The first bit of this logic is the same as Parser.listItem, but the rest | |
| 718 // is much simpler. This function simply finds the entire block and shifts it | |
| 719 // over by one tab if it is indeed a block (just returns the line if it's not). | |
| 720 // blockEnd is the end of the section in the input buffer, and contents is the | |
| 721 // extracted text that was shifted over one tab. It will need to be rendered at | |
| 722 // the end of the document. | |
| 723 func scanFootnote(p *Markdown, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) { | |
| 724 if i == 0 || len(data) == 0 { | |
| 725 return | |
| 726 } | |
| 727 | |
| 728 // skip leading whitespace on first line | |
| 729 for i < len(data) && data[i] == ' ' { | |
| 730 i++ | |
| 731 } | |
| 732 | |
| 733 blockStart = i | |
| 734 | |
| 735 // find the end of the line | |
| 736 blockEnd = i | |
| 737 for i < len(data) && data[i-1] != '\n' { | |
| 738 i++ | |
| 739 } | |
| 740 | |
| 741 // get working buffer | |
| 742 var raw bytes.Buffer | |
| 743 | |
| 744 // put the first line into the working buffer | |
| 745 raw.Write(data[blockEnd:i]) | |
| 746 blockEnd = i | |
| 747 | |
| 748 // process the following lines | |
| 749 containsBlankLine := false | |
| 750 | |
| 751 gatherLines: | |
| 752 for blockEnd < len(data) { | |
| 753 i++ | |
| 754 | |
| 755 // find the end of this line | |
| 756 for i < len(data) && data[i-1] != '\n' { | |
| 757 i++ | |
| 758 } | |
| 759 | |
| 760 // if it is an empty line, guess that it is part of this item | |
| 761 // and move on to the next line | |
| 762 if p.isEmpty(data[blockEnd:i]) > 0 { | |
| 763 containsBlankLine = true | |
| 764 blockEnd = i | |
| 765 continue | |
| 766 } | |
| 767 | |
| 768 n := 0 | |
| 769 if n = isIndented(data[blockEnd:i], indentSize); n == 0 { | |
| 770 // this is the end of the block. | |
| 771 // we don't want to include this last line in the index. | |
| 772 break gatherLines | |
| 773 } | |
| 774 | |
| 775 // if there were blank lines before this one, insert a new one now | |
| 776 if containsBlankLine { | |
| 777 raw.WriteByte('\n') | |
| 778 containsBlankLine = false | |
| 779 } | |
| 780 | |
| 781 // get rid of that first tab, write to buffer | |
| 782 raw.Write(data[blockEnd+n : i]) | |
| 783 hasBlock = true | |
| 784 | |
| 785 blockEnd = i | |
| 786 } | |
| 787 | |
| 788 if data[blockEnd-1] != '\n' { | |
| 789 raw.WriteByte('\n') | |
| 790 } | |
| 791 | |
| 792 contents = raw.Bytes() | |
| 793 | |
| 794 return | |
| 795 } | |
| 796 | |
| 797 // | |
| 798 // | |
| 799 // Miscellaneous helper functions | |
| 800 // | |
| 801 // | |
| 802 | |
| 803 // Test if a character is a punctuation symbol. | |
| 804 // Taken from a private function in regexp in the stdlib. | |
| 805 func ispunct(c byte) bool { | |
| 806 for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") { | |
| 807 if c == r { | |
| 808 return true | |
| 809 } | |
| 810 } | |
| 811 return false | |
| 812 } | |
| 813 | |
| 814 // Test if a character is a whitespace character. | |
| 815 func isspace(c byte) bool { | |
| 816 return ishorizontalspace(c) || isverticalspace(c) | |
| 817 } | |
| 818 | |
| 819 // Test if a character is a horizontal whitespace character. | |
| 820 func ishorizontalspace(c byte) bool { | |
| 821 return c == ' ' || c == '\t' | |
| 822 } | |
| 823 | |
| 824 // Test if a character is a vertical character. | |
| 825 func isverticalspace(c byte) bool { | |
| 826 return c == '\n' || c == '\r' || c == '\f' || c == '\v' | |
| 827 } | |
| 828 | |
| 829 // Test if a character is letter. | |
| 830 func isletter(c byte) bool { | |
| 831 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') | |
| 832 } | |
| 833 | |
| 834 // Test if a character is a letter or a digit. | |
| 835 // TODO: check when this is looking for ASCII alnum and when it should use unicode | |
| 836 func isalnum(c byte) bool { | |
| 837 return (c >= '0' && c <= '9') || isletter(c) | |
| 838 } | |
| 839 | |
| 840 // Replace tab characters with spaces, aligning to the next TAB_SIZE column. | |
| 841 // always ends output with a newline | |
| 842 func expandTabs(out *bytes.Buffer, line []byte, tabSize int) { | |
| 843 // first, check for common cases: no tabs, or only tabs at beginning of line | |
| 844 i, prefix := 0, 0 | |
| 845 slowcase := false | |
| 846 for i = 0; i < len(line); i++ { | |
| 847 if line[i] == '\t' { | |
| 848 if prefix == i { | |
| 849 prefix++ | |
| 850 } else { | |
| 851 slowcase = true | |
| 852 break | |
| 853 } | |
| 854 } | |
| 855 } | |
| 856 | |
| 857 // no need to decode runes if all tabs are at the beginning of the line | |
| 858 if !slowcase { | |
| 859 for i = 0; i < prefix*tabSize; i++ { | |
| 860 out.WriteByte(' ') | |
| 861 } | |
| 862 out.Write(line[prefix:]) | |
| 863 return | |
| 864 } | |
| 865 | |
| 866 // the slow case: we need to count runes to figure out how | |
| 867 // many spaces to insert for each tab | |
| 868 column := 0 | |
| 869 i = 0 | |
| 870 for i < len(line) { | |
| 871 start := i | |
| 872 for i < len(line) && line[i] != '\t' { | |
| 873 _, size := utf8.DecodeRune(line[i:]) | |
| 874 i += size | |
| 875 column++ | |
| 876 } | |
| 877 | |
| 878 if i > start { | |
| 879 out.Write(line[start:i]) | |
| 880 } | |
| 881 | |
| 882 if i >= len(line) { | |
| 883 break | |
| 884 } | |
| 885 | |
| 886 for { | |
| 887 out.WriteByte(' ') | |
| 888 column++ | |
| 889 if column%tabSize == 0 { | |
| 890 break | |
| 891 } | |
| 892 } | |
| 893 | |
| 894 i++ | |
| 895 } | |
| 896 } | |
| 897 | |
| 898 // Find if a line counts as indented or not. | |
| 899 // Returns number of characters the indent is (0 = not indented). | |
| 900 func isIndented(data []byte, indentSize int) int { | |
| 901 if len(data) == 0 { | |
| 902 return 0 | |
| 903 } | |
| 904 if data[0] == '\t' { | |
| 905 return 1 | |
| 906 } | |
| 907 if len(data) < indentSize { | |
| 908 return 0 | |
| 909 } | |
| 910 for i := 0; i < indentSize; i++ { | |
| 911 if data[i] != ' ' { | |
| 912 return 0 | |
| 913 } | |
| 914 } | |
| 915 return indentSize | |
| 916 } | |
| 917 | |
| 918 // Create a url-safe slug for fragments | |
| 919 func slugify(in []byte) []byte { | |
| 920 if len(in) == 0 { | |
| 921 return in | |
| 922 } | |
| 923 out := make([]byte, 0, len(in)) | |
| 924 sym := false | |
| 925 | |
| 926 for _, ch := range in { | |
| 927 if isalnum(ch) { | |
| 928 sym = false | |
| 929 out = append(out, ch) | |
| 930 } else if sym { | |
| 931 continue | |
| 932 } else { | |
| 933 out = append(out, '-') | |
| 934 sym = true | |
| 935 } | |
| 936 } | |
| 937 var a, b int | |
| 938 var ch byte | |
| 939 for a, ch = range out { | |
| 940 if ch != '-' { | |
| 941 break | |
| 942 } | |
| 943 } | |
| 944 for b = len(out) - 1; b > 0; b-- { | |
| 945 if out[b] != '-' { | |
| 946 break | |
| 947 } | |
| 948 } | |
| 949 return out[a : b+1] | |
| 950 } |
