66
|
1 // Blackfriday Markdown Processor
|
|
2 // Available at http://github.com/russross/blackfriday
|
|
3 //
|
|
4 // Copyright © 2011 Russ Ross <russ@russross.com>.
|
|
5 // Distributed under the Simplified BSD License.
|
|
6 // See README.md for details.
|
|
7
|
|
8 package blackfriday
|
|
9
|
|
10 import (
|
|
11 "bytes"
|
|
12 "fmt"
|
|
13 "io"
|
|
14 "strings"
|
|
15 "unicode/utf8"
|
|
16 )
|
|
17
|
|
18 //
|
|
19 // Markdown parsing and processing
|
|
20 //
|
|
21
|
|
22 // Version string of the package. Appears in the rendered document when
|
|
23 // CompletePage flag is on.
|
|
24 const Version = "2.0"
|
|
25
|
|
26 // Extensions is a bitwise or'ed collection of enabled Blackfriday's
|
|
27 // extensions.
|
|
28 type Extensions int
|
|
29
|
|
30 // These are the supported markdown parsing extensions.
|
|
31 // OR these values together to select multiple extensions.
|
|
32 const (
|
|
33 NoExtensions Extensions = 0
|
|
34 NoIntraEmphasis Extensions = 1 << iota // Ignore emphasis markers inside words
|
|
35 Tables // Render tables
|
|
36 FencedCode // Render fenced code blocks
|
|
37 Autolink // Detect embedded URLs that are not explicitly marked
|
|
38 Strikethrough // Strikethrough text using ~~test~~
|
|
39 LaxHTMLBlocks // Loosen up HTML block parsing rules
|
|
40 SpaceHeadings // Be strict about prefix heading rules
|
|
41 HardLineBreak // Translate newlines into line breaks
|
|
42 TabSizeEight // Expand tabs to eight spaces instead of four
|
|
43 Footnotes // Pandoc-style footnotes
|
|
44 NoEmptyLineBeforeBlock // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block
|
|
45 HeadingIDs // specify heading IDs with {#id}
|
|
46 Titleblock // Titleblock ala pandoc
|
|
47 AutoHeadingIDs // Create the heading ID from the text
|
|
48 BackslashLineBreak // Translate trailing backslashes into line breaks
|
|
49 DefinitionLists // Render definition lists
|
|
50
|
|
51 CommonHTMLFlags HTMLFlags = UseXHTML | Smartypants |
|
|
52 SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes
|
|
53
|
|
54 CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode |
|
|
55 Autolink | Strikethrough | SpaceHeadings | HeadingIDs |
|
|
56 BackslashLineBreak | DefinitionLists
|
|
57 )
|
|
58
|
|
59 // ListType contains bitwise or'ed flags for list and list item objects.
|
|
60 type ListType int
|
|
61
|
|
62 // These are the possible flag values for the ListItem renderer.
|
|
63 // Multiple flag values may be ORed together.
|
|
64 // These are mostly of interest if you are writing a new output format.
|
|
65 const (
|
|
66 ListTypeOrdered ListType = 1 << iota
|
|
67 ListTypeDefinition
|
|
68 ListTypeTerm
|
|
69
|
|
70 ListItemContainsBlock
|
|
71 ListItemBeginningOfList // TODO: figure out if this is of any use now
|
|
72 ListItemEndOfList
|
|
73 )
|
|
74
|
|
75 // CellAlignFlags holds a type of alignment in a table cell.
|
|
76 type CellAlignFlags int
|
|
77
|
|
78 // These are the possible flag values for the table cell renderer.
|
|
79 // Only a single one of these values will be used; they are not ORed together.
|
|
80 // These are mostly of interest if you are writing a new output format.
|
|
81 const (
|
|
82 TableAlignmentLeft CellAlignFlags = 1 << iota
|
|
83 TableAlignmentRight
|
|
84 TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight)
|
|
85 )
|
|
86
|
|
87 // The size of a tab stop.
|
|
88 const (
|
|
89 TabSizeDefault = 4
|
|
90 TabSizeDouble = 8
|
|
91 )
|
|
92
|
|
93 // blockTags is a set of tags that are recognized as HTML block tags.
|
|
94 // Any of these can be included in markdown text without special escaping.
|
|
95 var blockTags = map[string]struct{}{
|
|
96 "blockquote": {},
|
|
97 "del": {},
|
|
98 "div": {},
|
|
99 "dl": {},
|
|
100 "fieldset": {},
|
|
101 "form": {},
|
|
102 "h1": {},
|
|
103 "h2": {},
|
|
104 "h3": {},
|
|
105 "h4": {},
|
|
106 "h5": {},
|
|
107 "h6": {},
|
|
108 "iframe": {},
|
|
109 "ins": {},
|
|
110 "math": {},
|
|
111 "noscript": {},
|
|
112 "ol": {},
|
|
113 "pre": {},
|
|
114 "p": {},
|
|
115 "script": {},
|
|
116 "style": {},
|
|
117 "table": {},
|
|
118 "ul": {},
|
|
119
|
|
120 // HTML5
|
|
121 "address": {},
|
|
122 "article": {},
|
|
123 "aside": {},
|
|
124 "canvas": {},
|
|
125 "figcaption": {},
|
|
126 "figure": {},
|
|
127 "footer": {},
|
|
128 "header": {},
|
|
129 "hgroup": {},
|
|
130 "main": {},
|
|
131 "nav": {},
|
|
132 "output": {},
|
|
133 "progress": {},
|
|
134 "section": {},
|
|
135 "video": {},
|
|
136 }
|
|
137
|
|
138 // Renderer is the rendering interface. This is mostly of interest if you are
|
|
139 // implementing a new rendering format.
|
|
140 //
|
|
141 // Only an HTML implementation is provided in this repository, see the README
|
|
142 // for external implementations.
|
|
143 type Renderer interface {
|
|
144 // RenderNode is the main rendering method. It will be called once for
|
|
145 // every leaf node and twice for every non-leaf node (first with
|
|
146 // entering=true, then with entering=false). The method should write its
|
|
147 // rendition of the node to the supplied writer w.
|
|
148 RenderNode(w io.Writer, node *Node, entering bool) WalkStatus
|
|
149
|
|
150 // RenderHeader is a method that allows the renderer to produce some
|
|
151 // content preceding the main body of the output document. The header is
|
|
152 // understood in the broad sense here. For example, the default HTML
|
|
153 // renderer will write not only the HTML document preamble, but also the
|
|
154 // table of contents if it was requested.
|
|
155 //
|
|
156 // The method will be passed an entire document tree, in case a particular
|
|
157 // implementation needs to inspect it to produce output.
|
|
158 //
|
|
159 // The output should be written to the supplied writer w. If your
|
|
160 // implementation has no header to write, supply an empty implementation.
|
|
161 RenderHeader(w io.Writer, ast *Node)
|
|
162
|
|
163 // RenderFooter is a symmetric counterpart of RenderHeader.
|
|
164 RenderFooter(w io.Writer, ast *Node)
|
|
165 }
|
|
166
|
|
167 // Callback functions for inline parsing. One such function is defined
|
|
168 // for each character that triggers a response when parsing inline data.
|
|
169 type inlineParser func(p *Markdown, data []byte, offset int) (int, *Node)
|
|
170
|
|
171 // Markdown is a type that holds extensions and the runtime state used by
|
|
172 // Parse, and the renderer. You can not use it directly, construct it with New.
|
|
173 type Markdown struct {
|
|
174 renderer Renderer
|
|
175 referenceOverride ReferenceOverrideFunc
|
|
176 refs map[string]*reference
|
|
177 inlineCallback [256]inlineParser
|
|
178 extensions Extensions
|
|
179 nesting int
|
|
180 maxNesting int
|
|
181 insideLink bool
|
|
182
|
|
183 // Footnotes need to be ordered as well as available to quickly check for
|
|
184 // presence. If a ref is also a footnote, it's stored both in refs and here
|
|
185 // in notes. Slice is nil if footnotes not enabled.
|
|
186 notes []*reference
|
|
187
|
|
188 doc *Node
|
|
189 tip *Node // = doc
|
|
190 oldTip *Node
|
|
191 lastMatchedContainer *Node // = doc
|
|
192 allClosed bool
|
|
193 }
|
|
194
|
|
195 func (p *Markdown) getRef(refid string) (ref *reference, found bool) {
|
|
196 if p.referenceOverride != nil {
|
|
197 r, overridden := p.referenceOverride(refid)
|
|
198 if overridden {
|
|
199 if r == nil {
|
|
200 return nil, false
|
|
201 }
|
|
202 return &reference{
|
|
203 link: []byte(r.Link),
|
|
204 title: []byte(r.Title),
|
|
205 noteID: 0,
|
|
206 hasBlock: false,
|
|
207 text: []byte(r.Text)}, true
|
|
208 }
|
|
209 }
|
|
210 // refs are case insensitive
|
|
211 ref, found = p.refs[strings.ToLower(refid)]
|
|
212 return ref, found
|
|
213 }
|
|
214
|
|
215 func (p *Markdown) finalize(block *Node) {
|
|
216 above := block.Parent
|
|
217 block.open = false
|
|
218 p.tip = above
|
|
219 }
|
|
220
|
|
221 func (p *Markdown) addChild(node NodeType, offset uint32) *Node {
|
|
222 return p.addExistingChild(NewNode(node), offset)
|
|
223 }
|
|
224
|
|
225 func (p *Markdown) addExistingChild(node *Node, offset uint32) *Node {
|
|
226 for !p.tip.canContain(node.Type) {
|
|
227 p.finalize(p.tip)
|
|
228 }
|
|
229 p.tip.AppendChild(node)
|
|
230 p.tip = node
|
|
231 return node
|
|
232 }
|
|
233
|
|
234 func (p *Markdown) closeUnmatchedBlocks() {
|
|
235 if !p.allClosed {
|
|
236 for p.oldTip != p.lastMatchedContainer {
|
|
237 parent := p.oldTip.Parent
|
|
238 p.finalize(p.oldTip)
|
|
239 p.oldTip = parent
|
|
240 }
|
|
241 p.allClosed = true
|
|
242 }
|
|
243 }
|
|
244
|
|
245 //
|
|
246 //
|
|
247 // Public interface
|
|
248 //
|
|
249 //
|
|
250
|
|
251 // Reference represents the details of a link.
|
|
252 // See the documentation in Options for more details on use-case.
|
|
253 type Reference struct {
|
|
254 // Link is usually the URL the reference points to.
|
|
255 Link string
|
|
256 // Title is the alternate text describing the link in more detail.
|
|
257 Title string
|
|
258 // Text is the optional text to override the ref with if the syntax used was
|
|
259 // [refid][]
|
|
260 Text string
|
|
261 }
|
|
262
|
|
263 // ReferenceOverrideFunc is expected to be called with a reference string and
|
|
264 // return either a valid Reference type that the reference string maps to or
|
|
265 // nil. If overridden is false, the default reference logic will be executed.
|
|
266 // See the documentation in Options for more details on use-case.
|
|
267 type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool)
|
|
268
|
|
269 // New constructs a Markdown processor. You can use the same With* functions as
|
|
270 // for Run() to customize parser's behavior and the renderer.
|
|
271 func New(opts ...Option) *Markdown {
|
|
272 var p Markdown
|
|
273 for _, opt := range opts {
|
|
274 opt(&p)
|
|
275 }
|
|
276 p.refs = make(map[string]*reference)
|
|
277 p.maxNesting = 16
|
|
278 p.insideLink = false
|
|
279 docNode := NewNode(Document)
|
|
280 p.doc = docNode
|
|
281 p.tip = docNode
|
|
282 p.oldTip = docNode
|
|
283 p.lastMatchedContainer = docNode
|
|
284 p.allClosed = true
|
|
285 // register inline parsers
|
|
286 p.inlineCallback[' '] = maybeLineBreak
|
|
287 p.inlineCallback['*'] = emphasis
|
|
288 p.inlineCallback['_'] = emphasis
|
|
289 if p.extensions&Strikethrough != 0 {
|
|
290 p.inlineCallback['~'] = emphasis
|
|
291 }
|
|
292 p.inlineCallback['`'] = codeSpan
|
|
293 p.inlineCallback['\n'] = lineBreak
|
|
294 p.inlineCallback['['] = link
|
|
295 p.inlineCallback['<'] = leftAngle
|
|
296 p.inlineCallback['\\'] = escape
|
|
297 p.inlineCallback['&'] = entity
|
|
298 p.inlineCallback['!'] = maybeImage
|
|
299 p.inlineCallback['^'] = maybeInlineFootnote
|
|
300 if p.extensions&Autolink != 0 {
|
|
301 p.inlineCallback['h'] = maybeAutoLink
|
|
302 p.inlineCallback['m'] = maybeAutoLink
|
|
303 p.inlineCallback['f'] = maybeAutoLink
|
|
304 p.inlineCallback['H'] = maybeAutoLink
|
|
305 p.inlineCallback['M'] = maybeAutoLink
|
|
306 p.inlineCallback['F'] = maybeAutoLink
|
|
307 }
|
|
308 if p.extensions&Footnotes != 0 {
|
|
309 p.notes = make([]*reference, 0)
|
|
310 }
|
|
311 return &p
|
|
312 }
|
|
313
|
|
314 // Option customizes the Markdown processor's default behavior.
|
|
315 type Option func(*Markdown)
|
|
316
|
|
317 // WithRenderer allows you to override the default renderer.
|
|
318 func WithRenderer(r Renderer) Option {
|
|
319 return func(p *Markdown) {
|
|
320 p.renderer = r
|
|
321 }
|
|
322 }
|
|
323
|
|
324 // WithExtensions allows you to pick some of the many extensions provided by
|
|
325 // Blackfriday. You can bitwise OR them.
|
|
326 func WithExtensions(e Extensions) Option {
|
|
327 return func(p *Markdown) {
|
|
328 p.extensions = e
|
|
329 }
|
|
330 }
|
|
331
|
|
332 // WithNoExtensions turns off all extensions and custom behavior.
|
|
333 func WithNoExtensions() Option {
|
|
334 return func(p *Markdown) {
|
|
335 p.extensions = NoExtensions
|
|
336 p.renderer = NewHTMLRenderer(HTMLRendererParameters{
|
|
337 Flags: HTMLFlagsNone,
|
|
338 })
|
|
339 }
|
|
340 }
|
|
341
|
|
342 // WithRefOverride sets an optional function callback that is called every
|
|
343 // time a reference is resolved.
|
|
344 //
|
|
345 // In Markdown, the link reference syntax can be made to resolve a link to
|
|
346 // a reference instead of an inline URL, in one of the following ways:
|
|
347 //
|
|
348 // * [link text][refid]
|
|
349 // * [refid][]
|
|
350 //
|
|
351 // Usually, the refid is defined at the bottom of the Markdown document. If
|
|
352 // this override function is provided, the refid is passed to the override
|
|
353 // function first, before consulting the defined refids at the bottom. If
|
|
354 // the override function indicates an override did not occur, the refids at
|
|
355 // the bottom will be used to fill in the link details.
|
|
356 func WithRefOverride(o ReferenceOverrideFunc) Option {
|
|
357 return func(p *Markdown) {
|
|
358 p.referenceOverride = o
|
|
359 }
|
|
360 }
|
|
361
|
|
362 // Run is the main entry point to Blackfriday. It parses and renders a
|
|
363 // block of markdown-encoded text.
|
|
364 //
|
|
365 // The simplest invocation of Run takes one argument, input:
|
|
366 // output := Run(input)
|
|
367 // This will parse the input with CommonExtensions enabled and render it with
|
|
368 // the default HTMLRenderer (with CommonHTMLFlags).
|
|
369 //
|
|
370 // Variadic arguments opts can customize the default behavior. Since Markdown
|
|
371 // type does not contain exported fields, you can not use it directly. Instead,
|
|
372 // use the With* functions. For example, this will call the most basic
|
|
373 // functionality, with no extensions:
|
|
374 // output := Run(input, WithNoExtensions())
|
|
375 //
|
|
376 // You can use any number of With* arguments, even contradicting ones. They
|
|
377 // will be applied in order of appearance and the latter will override the
|
|
378 // former:
|
|
379 // output := Run(input, WithNoExtensions(), WithExtensions(exts),
|
|
380 // WithRenderer(yourRenderer))
|
|
381 func Run(input []byte, opts ...Option) []byte {
|
|
382 r := NewHTMLRenderer(HTMLRendererParameters{
|
|
383 Flags: CommonHTMLFlags,
|
|
384 })
|
|
385 optList := []Option{WithRenderer(r), WithExtensions(CommonExtensions)}
|
|
386 optList = append(optList, opts...)
|
|
387 parser := New(optList...)
|
|
388 ast := parser.Parse(input)
|
|
389 var buf bytes.Buffer
|
|
390 parser.renderer.RenderHeader(&buf, ast)
|
|
391 ast.Walk(func(node *Node, entering bool) WalkStatus {
|
|
392 return parser.renderer.RenderNode(&buf, node, entering)
|
|
393 })
|
|
394 parser.renderer.RenderFooter(&buf, ast)
|
|
395 return buf.Bytes()
|
|
396 }
|
|
397
|
|
398 // Parse is an entry point to the parsing part of Blackfriday. It takes an
|
|
399 // input markdown document and produces a syntax tree for its contents. This
|
|
400 // tree can then be rendered with a default or custom renderer, or
|
|
401 // analyzed/transformed by the caller to whatever non-standard needs they have.
|
|
402 // The return value is the root node of the syntax tree.
|
|
403 func (p *Markdown) Parse(input []byte) *Node {
|
|
404 p.block(input)
|
|
405 // Walk the tree and finish up some of unfinished blocks
|
|
406 for p.tip != nil {
|
|
407 p.finalize(p.tip)
|
|
408 }
|
|
409 // Walk the tree again and process inline markdown in each block
|
|
410 p.doc.Walk(func(node *Node, entering bool) WalkStatus {
|
|
411 if node.Type == Paragraph || node.Type == Heading || node.Type == TableCell {
|
|
412 p.inline(node, node.content)
|
|
413 node.content = nil
|
|
414 }
|
|
415 return GoToNext
|
|
416 })
|
|
417 p.parseRefsToAST()
|
|
418 return p.doc
|
|
419 }
|
|
420
|
|
421 func (p *Markdown) parseRefsToAST() {
|
|
422 if p.extensions&Footnotes == 0 || len(p.notes) == 0 {
|
|
423 return
|
|
424 }
|
|
425 p.tip = p.doc
|
|
426 block := p.addBlock(List, nil)
|
|
427 block.IsFootnotesList = true
|
|
428 block.ListFlags = ListTypeOrdered
|
|
429 flags := ListItemBeginningOfList
|
|
430 // Note: this loop is intentionally explicit, not range-form. This is
|
|
431 // because the body of the loop will append nested footnotes to p.notes and
|
|
432 // we need to process those late additions. Range form would only walk over
|
|
433 // the fixed initial set.
|
|
434 for i := 0; i < len(p.notes); i++ {
|
|
435 ref := p.notes[i]
|
|
436 p.addExistingChild(ref.footnote, 0)
|
|
437 block := ref.footnote
|
|
438 block.ListFlags = flags | ListTypeOrdered
|
|
439 block.RefLink = ref.link
|
|
440 if ref.hasBlock {
|
|
441 flags |= ListItemContainsBlock
|
|
442 p.block(ref.title)
|
|
443 } else {
|
|
444 p.inline(block, ref.title)
|
|
445 }
|
|
446 flags &^= ListItemBeginningOfList | ListItemContainsBlock
|
|
447 }
|
|
448 above := block.Parent
|
|
449 finalizeList(block)
|
|
450 p.tip = above
|
|
451 block.Walk(func(node *Node, entering bool) WalkStatus {
|
|
452 if node.Type == Paragraph || node.Type == Heading {
|
|
453 p.inline(node, node.content)
|
|
454 node.content = nil
|
|
455 }
|
|
456 return GoToNext
|
|
457 })
|
|
458 }
|
|
459
|
|
460 //
|
|
461 // Link references
|
|
462 //
|
|
463 // This section implements support for references that (usually) appear
|
|
464 // as footnotes in a document, and can be referenced anywhere in the document.
|
|
465 // The basic format is:
|
|
466 //
|
|
467 // [1]: http://www.google.com/ "Google"
|
|
468 // [2]: http://www.github.com/ "Github"
|
|
469 //
|
|
470 // Anywhere in the document, the reference can be linked by referring to its
|
|
471 // label, i.e., 1 and 2 in this example, as in:
|
|
472 //
|
|
473 // This library is hosted on [Github][2], a git hosting site.
|
|
474 //
|
|
475 // Actual footnotes as specified in Pandoc and supported by some other Markdown
|
|
476 // libraries such as php-markdown are also taken care of. They look like this:
|
|
477 //
|
|
478 // This sentence needs a bit of further explanation.[^note]
|
|
479 //
|
|
480 // [^note]: This is the explanation.
|
|
481 //
|
|
482 // Footnotes should be placed at the end of the document in an ordered list.
|
|
483 // Finally, there are inline footnotes such as:
|
|
484 //
|
|
485 // Inline footnotes^[Also supported.] provide a quick inline explanation,
|
|
486 // but are rendered at the bottom of the document.
|
|
487 //
|
|
488
|
|
489 // reference holds all information necessary for a reference-style links or
|
|
490 // footnotes.
|
|
491 //
|
|
492 // Consider this markdown with reference-style links:
|
|
493 //
|
|
494 // [link][ref]
|
|
495 //
|
|
496 // [ref]: /url/ "tooltip title"
|
|
497 //
|
|
498 // It will be ultimately converted to this HTML:
|
|
499 //
|
|
500 // <p><a href=\"/url/\" title=\"title\">link</a></p>
|
|
501 //
|
|
502 // And a reference structure will be populated as follows:
|
|
503 //
|
|
504 // p.refs["ref"] = &reference{
|
|
505 // link: "/url/",
|
|
506 // title: "tooltip title",
|
|
507 // }
|
|
508 //
|
|
509 // Alternatively, reference can contain information about a footnote. Consider
|
|
510 // this markdown:
|
|
511 //
|
|
512 // Text needing a footnote.[^a]
|
|
513 //
|
|
514 // [^a]: This is the note
|
|
515 //
|
|
516 // A reference structure will be populated as follows:
|
|
517 //
|
|
518 // p.refs["a"] = &reference{
|
|
519 // link: "a",
|
|
520 // title: "This is the note",
|
|
521 // noteID: <some positive int>,
|
|
522 // }
|
|
523 //
|
|
524 // TODO: As you can see, it begs for splitting into two dedicated structures
|
|
525 // for refs and for footnotes.
|
|
526 type reference struct {
|
|
527 link []byte
|
|
528 title []byte
|
|
529 noteID int // 0 if not a footnote ref
|
|
530 hasBlock bool
|
|
531 footnote *Node // a link to the Item node within a list of footnotes
|
|
532
|
|
533 text []byte // only gets populated by refOverride feature with Reference.Text
|
|
534 }
|
|
535
|
|
536 func (r *reference) String() string {
|
|
537 return fmt.Sprintf("{link: %q, title: %q, text: %q, noteID: %d, hasBlock: %v}",
|
|
538 r.link, r.title, r.text, r.noteID, r.hasBlock)
|
|
539 }
|
|
540
|
|
541 // Check whether or not data starts with a reference link.
|
|
542 // If so, it is parsed and stored in the list of references
|
|
543 // (in the render struct).
|
|
544 // Returns the number of bytes to skip to move past it,
|
|
545 // or zero if the first line is not a reference.
|
|
546 func isReference(p *Markdown, data []byte, tabSize int) int {
|
|
547 // up to 3 optional leading spaces
|
|
548 if len(data) < 4 {
|
|
549 return 0
|
|
550 }
|
|
551 i := 0
|
|
552 for i < 3 && data[i] == ' ' {
|
|
553 i++
|
|
554 }
|
|
555
|
|
556 noteID := 0
|
|
557
|
|
558 // id part: anything but a newline between brackets
|
|
559 if data[i] != '[' {
|
|
560 return 0
|
|
561 }
|
|
562 i++
|
|
563 if p.extensions&Footnotes != 0 {
|
|
564 if i < len(data) && data[i] == '^' {
|
|
565 // we can set it to anything here because the proper noteIds will
|
|
566 // be assigned later during the second pass. It just has to be != 0
|
|
567 noteID = 1
|
|
568 i++
|
|
569 }
|
|
570 }
|
|
571 idOffset := i
|
|
572 for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
|
|
573 i++
|
|
574 }
|
|
575 if i >= len(data) || data[i] != ']' {
|
|
576 return 0
|
|
577 }
|
|
578 idEnd := i
|
|
579 // footnotes can have empty ID, like this: [^], but a reference can not be
|
|
580 // empty like this: []. Break early if it's not a footnote and there's no ID
|
|
581 if noteID == 0 && idOffset == idEnd {
|
|
582 return 0
|
|
583 }
|
|
584 // spacer: colon (space | tab)* newline? (space | tab)*
|
|
585 i++
|
|
586 if i >= len(data) || data[i] != ':' {
|
|
587 return 0
|
|
588 }
|
|
589 i++
|
|
590 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
|
|
591 i++
|
|
592 }
|
|
593 if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
|
|
594 i++
|
|
595 if i < len(data) && data[i] == '\n' && data[i-1] == '\r' {
|
|
596 i++
|
|
597 }
|
|
598 }
|
|
599 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
|
|
600 i++
|
|
601 }
|
|
602 if i >= len(data) {
|
|
603 return 0
|
|
604 }
|
|
605
|
|
606 var (
|
|
607 linkOffset, linkEnd int
|
|
608 titleOffset, titleEnd int
|
|
609 lineEnd int
|
|
610 raw []byte
|
|
611 hasBlock bool
|
|
612 )
|
|
613
|
|
614 if p.extensions&Footnotes != 0 && noteID != 0 {
|
|
615 linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize)
|
|
616 lineEnd = linkEnd
|
|
617 } else {
|
|
618 linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i)
|
|
619 }
|
|
620 if lineEnd == 0 {
|
|
621 return 0
|
|
622 }
|
|
623
|
|
624 // a valid ref has been found
|
|
625
|
|
626 ref := &reference{
|
|
627 noteID: noteID,
|
|
628 hasBlock: hasBlock,
|
|
629 }
|
|
630
|
|
631 if noteID > 0 {
|
|
632 // reusing the link field for the id since footnotes don't have links
|
|
633 ref.link = data[idOffset:idEnd]
|
|
634 // if footnote, it's not really a title, it's the contained text
|
|
635 ref.title = raw
|
|
636 } else {
|
|
637 ref.link = data[linkOffset:linkEnd]
|
|
638 ref.title = data[titleOffset:titleEnd]
|
|
639 }
|
|
640
|
|
641 // id matches are case-insensitive
|
|
642 id := string(bytes.ToLower(data[idOffset:idEnd]))
|
|
643
|
|
644 p.refs[id] = ref
|
|
645
|
|
646 return lineEnd
|
|
647 }
|
|
648
|
|
649 func scanLinkRef(p *Markdown, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) {
|
|
650 // link: whitespace-free sequence, optionally between angle brackets
|
|
651 if data[i] == '<' {
|
|
652 i++
|
|
653 }
|
|
654 linkOffset = i
|
|
655 for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
|
|
656 i++
|
|
657 }
|
|
658 linkEnd = i
|
|
659 if data[linkOffset] == '<' && data[linkEnd-1] == '>' {
|
|
660 linkOffset++
|
|
661 linkEnd--
|
|
662 }
|
|
663
|
|
664 // optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
|
|
665 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
|
|
666 i++
|
|
667 }
|
|
668 if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
|
|
669 return
|
|
670 }
|
|
671
|
|
672 // compute end-of-line
|
|
673 if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
|
|
674 lineEnd = i
|
|
675 }
|
|
676 if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' {
|
|
677 lineEnd++
|
|
678 }
|
|
679
|
|
680 // optional (space|tab)* spacer after a newline
|
|
681 if lineEnd > 0 {
|
|
682 i = lineEnd + 1
|
|
683 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
|
|
684 i++
|
|
685 }
|
|
686 }
|
|
687
|
|
688 // optional title: any non-newline sequence enclosed in '"() alone on its line
|
|
689 if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
|
|
690 i++
|
|
691 titleOffset = i
|
|
692
|
|
693 // look for EOL
|
|
694 for i < len(data) && data[i] != '\n' && data[i] != '\r' {
|
|
695 i++
|
|
696 }
|
|
697 if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
|
|
698 titleEnd = i + 1
|
|
699 } else {
|
|
700 titleEnd = i
|
|
701 }
|
|
702
|
|
703 // step back
|
|
704 i--
|
|
705 for i > titleOffset && (data[i] == ' ' || data[i] == '\t') {
|
|
706 i--
|
|
707 }
|
|
708 if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
|
|
709 lineEnd = titleEnd
|
|
710 titleEnd = i
|
|
711 }
|
|
712 }
|
|
713
|
|
714 return
|
|
715 }
|
|
716
|
|
717 // The first bit of this logic is the same as Parser.listItem, but the rest
|
|
718 // is much simpler. This function simply finds the entire block and shifts it
|
|
719 // over by one tab if it is indeed a block (just returns the line if it's not).
|
|
720 // blockEnd is the end of the section in the input buffer, and contents is the
|
|
721 // extracted text that was shifted over one tab. It will need to be rendered at
|
|
722 // the end of the document.
|
|
723 func scanFootnote(p *Markdown, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) {
|
|
724 if i == 0 || len(data) == 0 {
|
|
725 return
|
|
726 }
|
|
727
|
|
728 // skip leading whitespace on first line
|
|
729 for i < len(data) && data[i] == ' ' {
|
|
730 i++
|
|
731 }
|
|
732
|
|
733 blockStart = i
|
|
734
|
|
735 // find the end of the line
|
|
736 blockEnd = i
|
|
737 for i < len(data) && data[i-1] != '\n' {
|
|
738 i++
|
|
739 }
|
|
740
|
|
741 // get working buffer
|
|
742 var raw bytes.Buffer
|
|
743
|
|
744 // put the first line into the working buffer
|
|
745 raw.Write(data[blockEnd:i])
|
|
746 blockEnd = i
|
|
747
|
|
748 // process the following lines
|
|
749 containsBlankLine := false
|
|
750
|
|
751 gatherLines:
|
|
752 for blockEnd < len(data) {
|
|
753 i++
|
|
754
|
|
755 // find the end of this line
|
|
756 for i < len(data) && data[i-1] != '\n' {
|
|
757 i++
|
|
758 }
|
|
759
|
|
760 // if it is an empty line, guess that it is part of this item
|
|
761 // and move on to the next line
|
|
762 if p.isEmpty(data[blockEnd:i]) > 0 {
|
|
763 containsBlankLine = true
|
|
764 blockEnd = i
|
|
765 continue
|
|
766 }
|
|
767
|
|
768 n := 0
|
|
769 if n = isIndented(data[blockEnd:i], indentSize); n == 0 {
|
|
770 // this is the end of the block.
|
|
771 // we don't want to include this last line in the index.
|
|
772 break gatherLines
|
|
773 }
|
|
774
|
|
775 // if there were blank lines before this one, insert a new one now
|
|
776 if containsBlankLine {
|
|
777 raw.WriteByte('\n')
|
|
778 containsBlankLine = false
|
|
779 }
|
|
780
|
|
781 // get rid of that first tab, write to buffer
|
|
782 raw.Write(data[blockEnd+n : i])
|
|
783 hasBlock = true
|
|
784
|
|
785 blockEnd = i
|
|
786 }
|
|
787
|
|
788 if data[blockEnd-1] != '\n' {
|
|
789 raw.WriteByte('\n')
|
|
790 }
|
|
791
|
|
792 contents = raw.Bytes()
|
|
793
|
|
794 return
|
|
795 }
|
|
796
|
|
797 //
|
|
798 //
|
|
799 // Miscellaneous helper functions
|
|
800 //
|
|
801 //
|
|
802
|
|
803 // Test if a character is a punctuation symbol.
|
|
804 // Taken from a private function in regexp in the stdlib.
|
|
805 func ispunct(c byte) bool {
|
|
806 for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
|
|
807 if c == r {
|
|
808 return true
|
|
809 }
|
|
810 }
|
|
811 return false
|
|
812 }
|
|
813
|
|
814 // Test if a character is a whitespace character.
|
|
815 func isspace(c byte) bool {
|
|
816 return ishorizontalspace(c) || isverticalspace(c)
|
|
817 }
|
|
818
|
|
819 // Test if a character is a horizontal whitespace character.
|
|
820 func ishorizontalspace(c byte) bool {
|
|
821 return c == ' ' || c == '\t'
|
|
822 }
|
|
823
|
|
824 // Test if a character is a vertical character.
|
|
825 func isverticalspace(c byte) bool {
|
|
826 return c == '\n' || c == '\r' || c == '\f' || c == '\v'
|
|
827 }
|
|
828
|
|
829 // Test if a character is letter.
|
|
830 func isletter(c byte) bool {
|
|
831 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
|
|
832 }
|
|
833
|
|
834 // Test if a character is a letter or a digit.
|
|
835 // TODO: check when this is looking for ASCII alnum and when it should use unicode
|
|
836 func isalnum(c byte) bool {
|
|
837 return (c >= '0' && c <= '9') || isletter(c)
|
|
838 }
|
|
839
|
|
840 // Replace tab characters with spaces, aligning to the next TAB_SIZE column.
|
|
841 // always ends output with a newline
|
|
842 func expandTabs(out *bytes.Buffer, line []byte, tabSize int) {
|
|
843 // first, check for common cases: no tabs, or only tabs at beginning of line
|
|
844 i, prefix := 0, 0
|
|
845 slowcase := false
|
|
846 for i = 0; i < len(line); i++ {
|
|
847 if line[i] == '\t' {
|
|
848 if prefix == i {
|
|
849 prefix++
|
|
850 } else {
|
|
851 slowcase = true
|
|
852 break
|
|
853 }
|
|
854 }
|
|
855 }
|
|
856
|
|
857 // no need to decode runes if all tabs are at the beginning of the line
|
|
858 if !slowcase {
|
|
859 for i = 0; i < prefix*tabSize; i++ {
|
|
860 out.WriteByte(' ')
|
|
861 }
|
|
862 out.Write(line[prefix:])
|
|
863 return
|
|
864 }
|
|
865
|
|
866 // the slow case: we need to count runes to figure out how
|
|
867 // many spaces to insert for each tab
|
|
868 column := 0
|
|
869 i = 0
|
|
870 for i < len(line) {
|
|
871 start := i
|
|
872 for i < len(line) && line[i] != '\t' {
|
|
873 _, size := utf8.DecodeRune(line[i:])
|
|
874 i += size
|
|
875 column++
|
|
876 }
|
|
877
|
|
878 if i > start {
|
|
879 out.Write(line[start:i])
|
|
880 }
|
|
881
|
|
882 if i >= len(line) {
|
|
883 break
|
|
884 }
|
|
885
|
|
886 for {
|
|
887 out.WriteByte(' ')
|
|
888 column++
|
|
889 if column%tabSize == 0 {
|
|
890 break
|
|
891 }
|
|
892 }
|
|
893
|
|
894 i++
|
|
895 }
|
|
896 }
|
|
897
|
|
898 // Find if a line counts as indented or not.
|
|
899 // Returns number of characters the indent is (0 = not indented).
|
|
900 func isIndented(data []byte, indentSize int) int {
|
|
901 if len(data) == 0 {
|
|
902 return 0
|
|
903 }
|
|
904 if data[0] == '\t' {
|
|
905 return 1
|
|
906 }
|
|
907 if len(data) < indentSize {
|
|
908 return 0
|
|
909 }
|
|
910 for i := 0; i < indentSize; i++ {
|
|
911 if data[i] != ' ' {
|
|
912 return 0
|
|
913 }
|
|
914 }
|
|
915 return indentSize
|
|
916 }
|
|
917
|
|
918 // Create a url-safe slug for fragments
|
|
919 func slugify(in []byte) []byte {
|
|
920 if len(in) == 0 {
|
|
921 return in
|
|
922 }
|
|
923 out := make([]byte, 0, len(in))
|
|
924 sym := false
|
|
925
|
|
926 for _, ch := range in {
|
|
927 if isalnum(ch) {
|
|
928 sym = false
|
|
929 out = append(out, ch)
|
|
930 } else if sym {
|
|
931 continue
|
|
932 } else {
|
|
933 out = append(out, '-')
|
|
934 sym = true
|
|
935 }
|
|
936 }
|
|
937 var a, b int
|
|
938 var ch byte
|
|
939 for a, ch = range out {
|
|
940 if ch != '-' {
|
|
941 break
|
|
942 }
|
|
943 }
|
|
944 for b = len(out) - 1; b > 0; b-- {
|
|
945 if out[b] != '-' {
|
|
946 break
|
|
947 }
|
|
948 }
|
|
949 return out[a : b+1]
|
|
950 }
|