Mercurial > yakumo_izuru > aya
comparison vendor/github.com/russross/blackfriday/v2/markdown.go @ 66:787b5ee0289d draft
Use vendored modules
Signed-off-by: Izuru Yakumo <yakumo.izuru@chaotic.ninja>
author | yakumo.izuru |
---|---|
date | Sun, 23 Jul 2023 13:18:53 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
65:6d985efa0f7a | 66:787b5ee0289d |
---|---|
1 // Blackfriday Markdown Processor | |
2 // Available at http://github.com/russross/blackfriday | |
3 // | |
4 // Copyright © 2011 Russ Ross <russ@russross.com>. | |
5 // Distributed under the Simplified BSD License. | |
6 // See README.md for details. | |
7 | |
8 package blackfriday | |
9 | |
10 import ( | |
11 "bytes" | |
12 "fmt" | |
13 "io" | |
14 "strings" | |
15 "unicode/utf8" | |
16 ) | |
17 | |
18 // | |
19 // Markdown parsing and processing | |
20 // | |
21 | |
22 // Version string of the package. Appears in the rendered document when | |
23 // CompletePage flag is on. | |
24 const Version = "2.0" | |
25 | |
26 // Extensions is a bitwise or'ed collection of enabled Blackfriday's | |
27 // extensions. | |
28 type Extensions int | |
29 | |
30 // These are the supported markdown parsing extensions. | |
31 // OR these values together to select multiple extensions. | |
32 const ( | |
33 NoExtensions Extensions = 0 | |
34 NoIntraEmphasis Extensions = 1 << iota // Ignore emphasis markers inside words | |
35 Tables // Render tables | |
36 FencedCode // Render fenced code blocks | |
37 Autolink // Detect embedded URLs that are not explicitly marked | |
38 Strikethrough // Strikethrough text using ~~test~~ | |
39 LaxHTMLBlocks // Loosen up HTML block parsing rules | |
40 SpaceHeadings // Be strict about prefix heading rules | |
41 HardLineBreak // Translate newlines into line breaks | |
42 TabSizeEight // Expand tabs to eight spaces instead of four | |
43 Footnotes // Pandoc-style footnotes | |
44 NoEmptyLineBeforeBlock // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block | |
45 HeadingIDs // specify heading IDs with {#id} | |
46 Titleblock // Titleblock ala pandoc | |
47 AutoHeadingIDs // Create the heading ID from the text | |
48 BackslashLineBreak // Translate trailing backslashes into line breaks | |
49 DefinitionLists // Render definition lists | |
50 | |
51 CommonHTMLFlags HTMLFlags = UseXHTML | Smartypants | | |
52 SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes | |
53 | |
54 CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode | | |
55 Autolink | Strikethrough | SpaceHeadings | HeadingIDs | | |
56 BackslashLineBreak | DefinitionLists | |
57 ) | |
58 | |
59 // ListType contains bitwise or'ed flags for list and list item objects. | |
60 type ListType int | |
61 | |
62 // These are the possible flag values for the ListItem renderer. | |
63 // Multiple flag values may be ORed together. | |
64 // These are mostly of interest if you are writing a new output format. | |
65 const ( | |
66 ListTypeOrdered ListType = 1 << iota | |
67 ListTypeDefinition | |
68 ListTypeTerm | |
69 | |
70 ListItemContainsBlock | |
71 ListItemBeginningOfList // TODO: figure out if this is of any use now | |
72 ListItemEndOfList | |
73 ) | |
74 | |
75 // CellAlignFlags holds a type of alignment in a table cell. | |
76 type CellAlignFlags int | |
77 | |
78 // These are the possible flag values for the table cell renderer. | |
79 // Only a single one of these values will be used; they are not ORed together. | |
80 // These are mostly of interest if you are writing a new output format. | |
81 const ( | |
82 TableAlignmentLeft CellAlignFlags = 1 << iota | |
83 TableAlignmentRight | |
84 TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight) | |
85 ) | |
86 | |
87 // The size of a tab stop. | |
88 const ( | |
89 TabSizeDefault = 4 | |
90 TabSizeDouble = 8 | |
91 ) | |
92 | |
93 // blockTags is a set of tags that are recognized as HTML block tags. | |
94 // Any of these can be included in markdown text without special escaping. | |
95 var blockTags = map[string]struct{}{ | |
96 "blockquote": {}, | |
97 "del": {}, | |
98 "div": {}, | |
99 "dl": {}, | |
100 "fieldset": {}, | |
101 "form": {}, | |
102 "h1": {}, | |
103 "h2": {}, | |
104 "h3": {}, | |
105 "h4": {}, | |
106 "h5": {}, | |
107 "h6": {}, | |
108 "iframe": {}, | |
109 "ins": {}, | |
110 "math": {}, | |
111 "noscript": {}, | |
112 "ol": {}, | |
113 "pre": {}, | |
114 "p": {}, | |
115 "script": {}, | |
116 "style": {}, | |
117 "table": {}, | |
118 "ul": {}, | |
119 | |
120 // HTML5 | |
121 "address": {}, | |
122 "article": {}, | |
123 "aside": {}, | |
124 "canvas": {}, | |
125 "figcaption": {}, | |
126 "figure": {}, | |
127 "footer": {}, | |
128 "header": {}, | |
129 "hgroup": {}, | |
130 "main": {}, | |
131 "nav": {}, | |
132 "output": {}, | |
133 "progress": {}, | |
134 "section": {}, | |
135 "video": {}, | |
136 } | |
137 | |
138 // Renderer is the rendering interface. This is mostly of interest if you are | |
139 // implementing a new rendering format. | |
140 // | |
141 // Only an HTML implementation is provided in this repository, see the README | |
142 // for external implementations. | |
143 type Renderer interface { | |
144 // RenderNode is the main rendering method. It will be called once for | |
145 // every leaf node and twice for every non-leaf node (first with | |
146 // entering=true, then with entering=false). The method should write its | |
147 // rendition of the node to the supplied writer w. | |
148 RenderNode(w io.Writer, node *Node, entering bool) WalkStatus | |
149 | |
150 // RenderHeader is a method that allows the renderer to produce some | |
151 // content preceding the main body of the output document. The header is | |
152 // understood in the broad sense here. For example, the default HTML | |
153 // renderer will write not only the HTML document preamble, but also the | |
154 // table of contents if it was requested. | |
155 // | |
156 // The method will be passed an entire document tree, in case a particular | |
157 // implementation needs to inspect it to produce output. | |
158 // | |
159 // The output should be written to the supplied writer w. If your | |
160 // implementation has no header to write, supply an empty implementation. | |
161 RenderHeader(w io.Writer, ast *Node) | |
162 | |
163 // RenderFooter is a symmetric counterpart of RenderHeader. | |
164 RenderFooter(w io.Writer, ast *Node) | |
165 } | |
166 | |
167 // Callback functions for inline parsing. One such function is defined | |
168 // for each character that triggers a response when parsing inline data. | |
169 type inlineParser func(p *Markdown, data []byte, offset int) (int, *Node) | |
170 | |
171 // Markdown is a type that holds extensions and the runtime state used by | |
172 // Parse, and the renderer. You can not use it directly, construct it with New. | |
173 type Markdown struct { | |
174 renderer Renderer | |
175 referenceOverride ReferenceOverrideFunc | |
176 refs map[string]*reference | |
177 inlineCallback [256]inlineParser | |
178 extensions Extensions | |
179 nesting int | |
180 maxNesting int | |
181 insideLink bool | |
182 | |
183 // Footnotes need to be ordered as well as available to quickly check for | |
184 // presence. If a ref is also a footnote, it's stored both in refs and here | |
185 // in notes. Slice is nil if footnotes not enabled. | |
186 notes []*reference | |
187 | |
188 doc *Node | |
189 tip *Node // = doc | |
190 oldTip *Node | |
191 lastMatchedContainer *Node // = doc | |
192 allClosed bool | |
193 } | |
194 | |
195 func (p *Markdown) getRef(refid string) (ref *reference, found bool) { | |
196 if p.referenceOverride != nil { | |
197 r, overridden := p.referenceOverride(refid) | |
198 if overridden { | |
199 if r == nil { | |
200 return nil, false | |
201 } | |
202 return &reference{ | |
203 link: []byte(r.Link), | |
204 title: []byte(r.Title), | |
205 noteID: 0, | |
206 hasBlock: false, | |
207 text: []byte(r.Text)}, true | |
208 } | |
209 } | |
210 // refs are case insensitive | |
211 ref, found = p.refs[strings.ToLower(refid)] | |
212 return ref, found | |
213 } | |
214 | |
215 func (p *Markdown) finalize(block *Node) { | |
216 above := block.Parent | |
217 block.open = false | |
218 p.tip = above | |
219 } | |
220 | |
221 func (p *Markdown) addChild(node NodeType, offset uint32) *Node { | |
222 return p.addExistingChild(NewNode(node), offset) | |
223 } | |
224 | |
225 func (p *Markdown) addExistingChild(node *Node, offset uint32) *Node { | |
226 for !p.tip.canContain(node.Type) { | |
227 p.finalize(p.tip) | |
228 } | |
229 p.tip.AppendChild(node) | |
230 p.tip = node | |
231 return node | |
232 } | |
233 | |
234 func (p *Markdown) closeUnmatchedBlocks() { | |
235 if !p.allClosed { | |
236 for p.oldTip != p.lastMatchedContainer { | |
237 parent := p.oldTip.Parent | |
238 p.finalize(p.oldTip) | |
239 p.oldTip = parent | |
240 } | |
241 p.allClosed = true | |
242 } | |
243 } | |
244 | |
245 // | |
246 // | |
247 // Public interface | |
248 // | |
249 // | |
250 | |
251 // Reference represents the details of a link. | |
252 // See the documentation in Options for more details on use-case. | |
253 type Reference struct { | |
254 // Link is usually the URL the reference points to. | |
255 Link string | |
256 // Title is the alternate text describing the link in more detail. | |
257 Title string | |
258 // Text is the optional text to override the ref with if the syntax used was | |
259 // [refid][] | |
260 Text string | |
261 } | |
262 | |
263 // ReferenceOverrideFunc is expected to be called with a reference string and | |
264 // return either a valid Reference type that the reference string maps to or | |
265 // nil. If overridden is false, the default reference logic will be executed. | |
266 // See the documentation in Options for more details on use-case. | |
267 type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool) | |
268 | |
269 // New constructs a Markdown processor. You can use the same With* functions as | |
270 // for Run() to customize parser's behavior and the renderer. | |
271 func New(opts ...Option) *Markdown { | |
272 var p Markdown | |
273 for _, opt := range opts { | |
274 opt(&p) | |
275 } | |
276 p.refs = make(map[string]*reference) | |
277 p.maxNesting = 16 | |
278 p.insideLink = false | |
279 docNode := NewNode(Document) | |
280 p.doc = docNode | |
281 p.tip = docNode | |
282 p.oldTip = docNode | |
283 p.lastMatchedContainer = docNode | |
284 p.allClosed = true | |
285 // register inline parsers | |
286 p.inlineCallback[' '] = maybeLineBreak | |
287 p.inlineCallback['*'] = emphasis | |
288 p.inlineCallback['_'] = emphasis | |
289 if p.extensions&Strikethrough != 0 { | |
290 p.inlineCallback['~'] = emphasis | |
291 } | |
292 p.inlineCallback['`'] = codeSpan | |
293 p.inlineCallback['\n'] = lineBreak | |
294 p.inlineCallback['['] = link | |
295 p.inlineCallback['<'] = leftAngle | |
296 p.inlineCallback['\\'] = escape | |
297 p.inlineCallback['&'] = entity | |
298 p.inlineCallback['!'] = maybeImage | |
299 p.inlineCallback['^'] = maybeInlineFootnote | |
300 if p.extensions&Autolink != 0 { | |
301 p.inlineCallback['h'] = maybeAutoLink | |
302 p.inlineCallback['m'] = maybeAutoLink | |
303 p.inlineCallback['f'] = maybeAutoLink | |
304 p.inlineCallback['H'] = maybeAutoLink | |
305 p.inlineCallback['M'] = maybeAutoLink | |
306 p.inlineCallback['F'] = maybeAutoLink | |
307 } | |
308 if p.extensions&Footnotes != 0 { | |
309 p.notes = make([]*reference, 0) | |
310 } | |
311 return &p | |
312 } | |
313 | |
314 // Option customizes the Markdown processor's default behavior. | |
315 type Option func(*Markdown) | |
316 | |
317 // WithRenderer allows you to override the default renderer. | |
318 func WithRenderer(r Renderer) Option { | |
319 return func(p *Markdown) { | |
320 p.renderer = r | |
321 } | |
322 } | |
323 | |
324 // WithExtensions allows you to pick some of the many extensions provided by | |
325 // Blackfriday. You can bitwise OR them. | |
326 func WithExtensions(e Extensions) Option { | |
327 return func(p *Markdown) { | |
328 p.extensions = e | |
329 } | |
330 } | |
331 | |
332 // WithNoExtensions turns off all extensions and custom behavior. | |
333 func WithNoExtensions() Option { | |
334 return func(p *Markdown) { | |
335 p.extensions = NoExtensions | |
336 p.renderer = NewHTMLRenderer(HTMLRendererParameters{ | |
337 Flags: HTMLFlagsNone, | |
338 }) | |
339 } | |
340 } | |
341 | |
342 // WithRefOverride sets an optional function callback that is called every | |
343 // time a reference is resolved. | |
344 // | |
345 // In Markdown, the link reference syntax can be made to resolve a link to | |
346 // a reference instead of an inline URL, in one of the following ways: | |
347 // | |
348 // * [link text][refid] | |
349 // * [refid][] | |
350 // | |
351 // Usually, the refid is defined at the bottom of the Markdown document. If | |
352 // this override function is provided, the refid is passed to the override | |
353 // function first, before consulting the defined refids at the bottom. If | |
354 // the override function indicates an override did not occur, the refids at | |
355 // the bottom will be used to fill in the link details. | |
356 func WithRefOverride(o ReferenceOverrideFunc) Option { | |
357 return func(p *Markdown) { | |
358 p.referenceOverride = o | |
359 } | |
360 } | |
361 | |
362 // Run is the main entry point to Blackfriday. It parses and renders a | |
363 // block of markdown-encoded text. | |
364 // | |
365 // The simplest invocation of Run takes one argument, input: | |
366 // output := Run(input) | |
367 // This will parse the input with CommonExtensions enabled and render it with | |
368 // the default HTMLRenderer (with CommonHTMLFlags). | |
369 // | |
370 // Variadic arguments opts can customize the default behavior. Since Markdown | |
371 // type does not contain exported fields, you can not use it directly. Instead, | |
372 // use the With* functions. For example, this will call the most basic | |
373 // functionality, with no extensions: | |
374 // output := Run(input, WithNoExtensions()) | |
375 // | |
376 // You can use any number of With* arguments, even contradicting ones. They | |
377 // will be applied in order of appearance and the latter will override the | |
378 // former: | |
379 // output := Run(input, WithNoExtensions(), WithExtensions(exts), | |
380 // WithRenderer(yourRenderer)) | |
381 func Run(input []byte, opts ...Option) []byte { | |
382 r := NewHTMLRenderer(HTMLRendererParameters{ | |
383 Flags: CommonHTMLFlags, | |
384 }) | |
385 optList := []Option{WithRenderer(r), WithExtensions(CommonExtensions)} | |
386 optList = append(optList, opts...) | |
387 parser := New(optList...) | |
388 ast := parser.Parse(input) | |
389 var buf bytes.Buffer | |
390 parser.renderer.RenderHeader(&buf, ast) | |
391 ast.Walk(func(node *Node, entering bool) WalkStatus { | |
392 return parser.renderer.RenderNode(&buf, node, entering) | |
393 }) | |
394 parser.renderer.RenderFooter(&buf, ast) | |
395 return buf.Bytes() | |
396 } | |
397 | |
398 // Parse is an entry point to the parsing part of Blackfriday. It takes an | |
399 // input markdown document and produces a syntax tree for its contents. This | |
400 // tree can then be rendered with a default or custom renderer, or | |
401 // analyzed/transformed by the caller to whatever non-standard needs they have. | |
402 // The return value is the root node of the syntax tree. | |
403 func (p *Markdown) Parse(input []byte) *Node { | |
404 p.block(input) | |
405 // Walk the tree and finish up some of unfinished blocks | |
406 for p.tip != nil { | |
407 p.finalize(p.tip) | |
408 } | |
409 // Walk the tree again and process inline markdown in each block | |
410 p.doc.Walk(func(node *Node, entering bool) WalkStatus { | |
411 if node.Type == Paragraph || node.Type == Heading || node.Type == TableCell { | |
412 p.inline(node, node.content) | |
413 node.content = nil | |
414 } | |
415 return GoToNext | |
416 }) | |
417 p.parseRefsToAST() | |
418 return p.doc | |
419 } | |
420 | |
421 func (p *Markdown) parseRefsToAST() { | |
422 if p.extensions&Footnotes == 0 || len(p.notes) == 0 { | |
423 return | |
424 } | |
425 p.tip = p.doc | |
426 block := p.addBlock(List, nil) | |
427 block.IsFootnotesList = true | |
428 block.ListFlags = ListTypeOrdered | |
429 flags := ListItemBeginningOfList | |
430 // Note: this loop is intentionally explicit, not range-form. This is | |
431 // because the body of the loop will append nested footnotes to p.notes and | |
432 // we need to process those late additions. Range form would only walk over | |
433 // the fixed initial set. | |
434 for i := 0; i < len(p.notes); i++ { | |
435 ref := p.notes[i] | |
436 p.addExistingChild(ref.footnote, 0) | |
437 block := ref.footnote | |
438 block.ListFlags = flags | ListTypeOrdered | |
439 block.RefLink = ref.link | |
440 if ref.hasBlock { | |
441 flags |= ListItemContainsBlock | |
442 p.block(ref.title) | |
443 } else { | |
444 p.inline(block, ref.title) | |
445 } | |
446 flags &^= ListItemBeginningOfList | ListItemContainsBlock | |
447 } | |
448 above := block.Parent | |
449 finalizeList(block) | |
450 p.tip = above | |
451 block.Walk(func(node *Node, entering bool) WalkStatus { | |
452 if node.Type == Paragraph || node.Type == Heading { | |
453 p.inline(node, node.content) | |
454 node.content = nil | |
455 } | |
456 return GoToNext | |
457 }) | |
458 } | |
459 | |
460 // | |
461 // Link references | |
462 // | |
463 // This section implements support for references that (usually) appear | |
464 // as footnotes in a document, and can be referenced anywhere in the document. | |
465 // The basic format is: | |
466 // | |
467 // [1]: http://www.google.com/ "Google" | |
468 // [2]: http://www.github.com/ "Github" | |
469 // | |
470 // Anywhere in the document, the reference can be linked by referring to its | |
471 // label, i.e., 1 and 2 in this example, as in: | |
472 // | |
473 // This library is hosted on [Github][2], a git hosting site. | |
474 // | |
475 // Actual footnotes as specified in Pandoc and supported by some other Markdown | |
476 // libraries such as php-markdown are also taken care of. They look like this: | |
477 // | |
478 // This sentence needs a bit of further explanation.[^note] | |
479 // | |
480 // [^note]: This is the explanation. | |
481 // | |
482 // Footnotes should be placed at the end of the document in an ordered list. | |
483 // Finally, there are inline footnotes such as: | |
484 // | |
485 // Inline footnotes^[Also supported.] provide a quick inline explanation, | |
486 // but are rendered at the bottom of the document. | |
487 // | |
488 | |
489 // reference holds all information necessary for a reference-style links or | |
490 // footnotes. | |
491 // | |
492 // Consider this markdown with reference-style links: | |
493 // | |
494 // [link][ref] | |
495 // | |
496 // [ref]: /url/ "tooltip title" | |
497 // | |
498 // It will be ultimately converted to this HTML: | |
499 // | |
500 // <p><a href=\"/url/\" title=\"title\">link</a></p> | |
501 // | |
502 // And a reference structure will be populated as follows: | |
503 // | |
504 // p.refs["ref"] = &reference{ | |
505 // link: "/url/", | |
506 // title: "tooltip title", | |
507 // } | |
508 // | |
509 // Alternatively, reference can contain information about a footnote. Consider | |
510 // this markdown: | |
511 // | |
512 // Text needing a footnote.[^a] | |
513 // | |
514 // [^a]: This is the note | |
515 // | |
516 // A reference structure will be populated as follows: | |
517 // | |
518 // p.refs["a"] = &reference{ | |
519 // link: "a", | |
520 // title: "This is the note", | |
521 // noteID: <some positive int>, | |
522 // } | |
523 // | |
524 // TODO: As you can see, it begs for splitting into two dedicated structures | |
525 // for refs and for footnotes. | |
526 type reference struct { | |
527 link []byte | |
528 title []byte | |
529 noteID int // 0 if not a footnote ref | |
530 hasBlock bool | |
531 footnote *Node // a link to the Item node within a list of footnotes | |
532 | |
533 text []byte // only gets populated by refOverride feature with Reference.Text | |
534 } | |
535 | |
536 func (r *reference) String() string { | |
537 return fmt.Sprintf("{link: %q, title: %q, text: %q, noteID: %d, hasBlock: %v}", | |
538 r.link, r.title, r.text, r.noteID, r.hasBlock) | |
539 } | |
540 | |
541 // Check whether or not data starts with a reference link. | |
542 // If so, it is parsed and stored in the list of references | |
543 // (in the render struct). | |
544 // Returns the number of bytes to skip to move past it, | |
545 // or zero if the first line is not a reference. | |
546 func isReference(p *Markdown, data []byte, tabSize int) int { | |
547 // up to 3 optional leading spaces | |
548 if len(data) < 4 { | |
549 return 0 | |
550 } | |
551 i := 0 | |
552 for i < 3 && data[i] == ' ' { | |
553 i++ | |
554 } | |
555 | |
556 noteID := 0 | |
557 | |
558 // id part: anything but a newline between brackets | |
559 if data[i] != '[' { | |
560 return 0 | |
561 } | |
562 i++ | |
563 if p.extensions&Footnotes != 0 { | |
564 if i < len(data) && data[i] == '^' { | |
565 // we can set it to anything here because the proper noteIds will | |
566 // be assigned later during the second pass. It just has to be != 0 | |
567 noteID = 1 | |
568 i++ | |
569 } | |
570 } | |
571 idOffset := i | |
572 for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' { | |
573 i++ | |
574 } | |
575 if i >= len(data) || data[i] != ']' { | |
576 return 0 | |
577 } | |
578 idEnd := i | |
579 // footnotes can have empty ID, like this: [^], but a reference can not be | |
580 // empty like this: []. Break early if it's not a footnote and there's no ID | |
581 if noteID == 0 && idOffset == idEnd { | |
582 return 0 | |
583 } | |
584 // spacer: colon (space | tab)* newline? (space | tab)* | |
585 i++ | |
586 if i >= len(data) || data[i] != ':' { | |
587 return 0 | |
588 } | |
589 i++ | |
590 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { | |
591 i++ | |
592 } | |
593 if i < len(data) && (data[i] == '\n' || data[i] == '\r') { | |
594 i++ | |
595 if i < len(data) && data[i] == '\n' && data[i-1] == '\r' { | |
596 i++ | |
597 } | |
598 } | |
599 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { | |
600 i++ | |
601 } | |
602 if i >= len(data) { | |
603 return 0 | |
604 } | |
605 | |
606 var ( | |
607 linkOffset, linkEnd int | |
608 titleOffset, titleEnd int | |
609 lineEnd int | |
610 raw []byte | |
611 hasBlock bool | |
612 ) | |
613 | |
614 if p.extensions&Footnotes != 0 && noteID != 0 { | |
615 linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize) | |
616 lineEnd = linkEnd | |
617 } else { | |
618 linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i) | |
619 } | |
620 if lineEnd == 0 { | |
621 return 0 | |
622 } | |
623 | |
624 // a valid ref has been found | |
625 | |
626 ref := &reference{ | |
627 noteID: noteID, | |
628 hasBlock: hasBlock, | |
629 } | |
630 | |
631 if noteID > 0 { | |
632 // reusing the link field for the id since footnotes don't have links | |
633 ref.link = data[idOffset:idEnd] | |
634 // if footnote, it's not really a title, it's the contained text | |
635 ref.title = raw | |
636 } else { | |
637 ref.link = data[linkOffset:linkEnd] | |
638 ref.title = data[titleOffset:titleEnd] | |
639 } | |
640 | |
641 // id matches are case-insensitive | |
642 id := string(bytes.ToLower(data[idOffset:idEnd])) | |
643 | |
644 p.refs[id] = ref | |
645 | |
646 return lineEnd | |
647 } | |
648 | |
649 func scanLinkRef(p *Markdown, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) { | |
650 // link: whitespace-free sequence, optionally between angle brackets | |
651 if data[i] == '<' { | |
652 i++ | |
653 } | |
654 linkOffset = i | |
655 for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' { | |
656 i++ | |
657 } | |
658 linkEnd = i | |
659 if data[linkOffset] == '<' && data[linkEnd-1] == '>' { | |
660 linkOffset++ | |
661 linkEnd-- | |
662 } | |
663 | |
664 // optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) | |
665 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { | |
666 i++ | |
667 } | |
668 if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' { | |
669 return | |
670 } | |
671 | |
672 // compute end-of-line | |
673 if i >= len(data) || data[i] == '\r' || data[i] == '\n' { | |
674 lineEnd = i | |
675 } | |
676 if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' { | |
677 lineEnd++ | |
678 } | |
679 | |
680 // optional (space|tab)* spacer after a newline | |
681 if lineEnd > 0 { | |
682 i = lineEnd + 1 | |
683 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { | |
684 i++ | |
685 } | |
686 } | |
687 | |
688 // optional title: any non-newline sequence enclosed in '"() alone on its line | |
689 if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') { | |
690 i++ | |
691 titleOffset = i | |
692 | |
693 // look for EOL | |
694 for i < len(data) && data[i] != '\n' && data[i] != '\r' { | |
695 i++ | |
696 } | |
697 if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' { | |
698 titleEnd = i + 1 | |
699 } else { | |
700 titleEnd = i | |
701 } | |
702 | |
703 // step back | |
704 i-- | |
705 for i > titleOffset && (data[i] == ' ' || data[i] == '\t') { | |
706 i-- | |
707 } | |
708 if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') { | |
709 lineEnd = titleEnd | |
710 titleEnd = i | |
711 } | |
712 } | |
713 | |
714 return | |
715 } | |
716 | |
717 // The first bit of this logic is the same as Parser.listItem, but the rest | |
718 // is much simpler. This function simply finds the entire block and shifts it | |
719 // over by one tab if it is indeed a block (just returns the line if it's not). | |
720 // blockEnd is the end of the section in the input buffer, and contents is the | |
721 // extracted text that was shifted over one tab. It will need to be rendered at | |
722 // the end of the document. | |
723 func scanFootnote(p *Markdown, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) { | |
724 if i == 0 || len(data) == 0 { | |
725 return | |
726 } | |
727 | |
728 // skip leading whitespace on first line | |
729 for i < len(data) && data[i] == ' ' { | |
730 i++ | |
731 } | |
732 | |
733 blockStart = i | |
734 | |
735 // find the end of the line | |
736 blockEnd = i | |
737 for i < len(data) && data[i-1] != '\n' { | |
738 i++ | |
739 } | |
740 | |
741 // get working buffer | |
742 var raw bytes.Buffer | |
743 | |
744 // put the first line into the working buffer | |
745 raw.Write(data[blockEnd:i]) | |
746 blockEnd = i | |
747 | |
748 // process the following lines | |
749 containsBlankLine := false | |
750 | |
751 gatherLines: | |
752 for blockEnd < len(data) { | |
753 i++ | |
754 | |
755 // find the end of this line | |
756 for i < len(data) && data[i-1] != '\n' { | |
757 i++ | |
758 } | |
759 | |
760 // if it is an empty line, guess that it is part of this item | |
761 // and move on to the next line | |
762 if p.isEmpty(data[blockEnd:i]) > 0 { | |
763 containsBlankLine = true | |
764 blockEnd = i | |
765 continue | |
766 } | |
767 | |
768 n := 0 | |
769 if n = isIndented(data[blockEnd:i], indentSize); n == 0 { | |
770 // this is the end of the block. | |
771 // we don't want to include this last line in the index. | |
772 break gatherLines | |
773 } | |
774 | |
775 // if there were blank lines before this one, insert a new one now | |
776 if containsBlankLine { | |
777 raw.WriteByte('\n') | |
778 containsBlankLine = false | |
779 } | |
780 | |
781 // get rid of that first tab, write to buffer | |
782 raw.Write(data[blockEnd+n : i]) | |
783 hasBlock = true | |
784 | |
785 blockEnd = i | |
786 } | |
787 | |
788 if data[blockEnd-1] != '\n' { | |
789 raw.WriteByte('\n') | |
790 } | |
791 | |
792 contents = raw.Bytes() | |
793 | |
794 return | |
795 } | |
796 | |
797 // | |
798 // | |
799 // Miscellaneous helper functions | |
800 // | |
801 // | |
802 | |
803 // Test if a character is a punctuation symbol. | |
804 // Taken from a private function in regexp in the stdlib. | |
805 func ispunct(c byte) bool { | |
806 for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") { | |
807 if c == r { | |
808 return true | |
809 } | |
810 } | |
811 return false | |
812 } | |
813 | |
814 // Test if a character is a whitespace character. | |
815 func isspace(c byte) bool { | |
816 return ishorizontalspace(c) || isverticalspace(c) | |
817 } | |
818 | |
819 // Test if a character is a horizontal whitespace character. | |
820 func ishorizontalspace(c byte) bool { | |
821 return c == ' ' || c == '\t' | |
822 } | |
823 | |
824 // Test if a character is a vertical character. | |
825 func isverticalspace(c byte) bool { | |
826 return c == '\n' || c == '\r' || c == '\f' || c == '\v' | |
827 } | |
828 | |
829 // Test if a character is letter. | |
830 func isletter(c byte) bool { | |
831 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') | |
832 } | |
833 | |
834 // Test if a character is a letter or a digit. | |
835 // TODO: check when this is looking for ASCII alnum and when it should use unicode | |
836 func isalnum(c byte) bool { | |
837 return (c >= '0' && c <= '9') || isletter(c) | |
838 } | |
839 | |
840 // Replace tab characters with spaces, aligning to the next TAB_SIZE column. | |
841 // always ends output with a newline | |
842 func expandTabs(out *bytes.Buffer, line []byte, tabSize int) { | |
843 // first, check for common cases: no tabs, or only tabs at beginning of line | |
844 i, prefix := 0, 0 | |
845 slowcase := false | |
846 for i = 0; i < len(line); i++ { | |
847 if line[i] == '\t' { | |
848 if prefix == i { | |
849 prefix++ | |
850 } else { | |
851 slowcase = true | |
852 break | |
853 } | |
854 } | |
855 } | |
856 | |
857 // no need to decode runes if all tabs are at the beginning of the line | |
858 if !slowcase { | |
859 for i = 0; i < prefix*tabSize; i++ { | |
860 out.WriteByte(' ') | |
861 } | |
862 out.Write(line[prefix:]) | |
863 return | |
864 } | |
865 | |
866 // the slow case: we need to count runes to figure out how | |
867 // many spaces to insert for each tab | |
868 column := 0 | |
869 i = 0 | |
870 for i < len(line) { | |
871 start := i | |
872 for i < len(line) && line[i] != '\t' { | |
873 _, size := utf8.DecodeRune(line[i:]) | |
874 i += size | |
875 column++ | |
876 } | |
877 | |
878 if i > start { | |
879 out.Write(line[start:i]) | |
880 } | |
881 | |
882 if i >= len(line) { | |
883 break | |
884 } | |
885 | |
886 for { | |
887 out.WriteByte(' ') | |
888 column++ | |
889 if column%tabSize == 0 { | |
890 break | |
891 } | |
892 } | |
893 | |
894 i++ | |
895 } | |
896 } | |
897 | |
898 // Find if a line counts as indented or not. | |
899 // Returns number of characters the indent is (0 = not indented). | |
900 func isIndented(data []byte, indentSize int) int { | |
901 if len(data) == 0 { | |
902 return 0 | |
903 } | |
904 if data[0] == '\t' { | |
905 return 1 | |
906 } | |
907 if len(data) < indentSize { | |
908 return 0 | |
909 } | |
910 for i := 0; i < indentSize; i++ { | |
911 if data[i] != ' ' { | |
912 return 0 | |
913 } | |
914 } | |
915 return indentSize | |
916 } | |
917 | |
918 // Create a url-safe slug for fragments | |
919 func slugify(in []byte) []byte { | |
920 if len(in) == 0 { | |
921 return in | |
922 } | |
923 out := make([]byte, 0, len(in)) | |
924 sym := false | |
925 | |
926 for _, ch := range in { | |
927 if isalnum(ch) { | |
928 sym = false | |
929 out = append(out, ch) | |
930 } else if sym { | |
931 continue | |
932 } else { | |
933 out = append(out, '-') | |
934 sym = true | |
935 } | |
936 } | |
937 var a, b int | |
938 var ch byte | |
939 for a, ch = range out { | |
940 if ch != '-' { | |
941 break | |
942 } | |
943 } | |
944 for b = len(out) - 1; b > 0; b-- { | |
945 if out[b] != '-' { | |
946 break | |
947 } | |
948 } | |
949 return out[a : b+1] | |
950 } |