1 /** 2 Markdown parser implementation 3 4 Copyright: © 2012-2014 RejectedSoftware e.K. 5 License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file. 6 Authors: Sönke Ludwig 7 */ 8 module dmarkdown.markdown; 9 10 import dmarkdown.html; 11 import dmarkdown.string; 12 13 // import vibe.utils.string; 14 15 import std.algorithm : canFind, countUntil, min; 16 import std.array; 17 import std.ascii : isAlpha, isWhite; 18 import std.format; 19 import std.range; 20 import std.string; 21 22 /* 23 TODO: 24 detect inline HTML tags 25 */ 26 27 // TODO (dmarkdown) detailed API docs and examples for everything 28 29 unittest 30 { 31 auto text = 32 "=======\n" 33 "Heading\n" 34 "=======\n" 35 "\n" 36 "**bold** *italic*\n" 37 "\n" 38 "List:\n" 39 "\n" 40 " * a\n" 41 " * b\n" 42 " * c\n"; 43 string result = filterMarkdown(text); 44 import std.stdio; 45 foreach( ln; splitLines(result) ) 46 writeln(ln); 47 } 48 49 50 /** Returns a Markdown filtered HTML string. 51 */ 52 string filterMarkdown()(string str, MarkdownFlags flags) 53 { 54 scope settings = new MarkdownSettings; 55 settings.flags = flags; 56 return filterMarkdown(str, settings); 57 } 58 /// ditto 59 string filterMarkdown()(string str, scope MarkdownSettings settings = null) 60 @trusted { // Appender not @safe as of 2.065 61 auto dst = appender!string(); 62 filterMarkdown(dst, str, settings); 63 return dst.data; 64 } 65 66 67 /** Markdown filters the given string and writes the corresponding HTML to an output range. 68 */ 69 void filterMarkdown(R)(ref R dst, string src, MarkdownFlags flags) 70 { 71 scope settings = new MarkdownSettings; 72 settings.flags = flags; 73 filterMarkdown(dst, src, settings); 74 } 75 /// ditto 76 void filterMarkdown(R)(ref R dst, string src, scope MarkdownSettings settings = null) 77 { 78 auto defsettings = new MarkdownSettings; 79 if (!settings) settings = defsettings; 80 81 auto all_lines = splitLines(src); 82 auto links = scanForReferences(all_lines); 83 auto lines = parseLines(all_lines, settings); 84 Block root_block; 85 parseBlocks(root_block, lines, null, settings); 86 writeBlock(dst, root_block, links, settings); 87 } 88 89 final class MarkdownSettings { 90 MarkdownFlags flags = MarkdownFlags.vanillaMarkdown; 91 size_t headingBaseLevel = 1; 92 } 93 94 enum MarkdownFlags { 95 none = 0, 96 keepLineBreaks = 1<<0, 97 backtickCodeBlocks = 1<<1, 98 noInlineHtml = 1<<2, 99 //noLinks = 1<<3, 100 //allowUnsafeHtml = 1<<4, 101 vanillaMarkdown = none, 102 forumDefault = keepLineBreaks|backtickCodeBlocks|noInlineHtml 103 } 104 105 private { 106 immutable s_blockTags = ["div", "ol", "p", "pre", "section", "table", "ul"]; 107 } 108 109 private enum IndentType { 110 White, 111 Quote 112 } 113 114 private enum LineType { 115 Undefined, 116 Blank, 117 Plain, 118 Hline, 119 AtxHeader, 120 SetextHeader, 121 UList, 122 OList, 123 HtmlBlock, 124 CodeBlockDelimiter 125 } 126 127 private struct Line { 128 LineType type; 129 IndentType[] indent; 130 string text; 131 string unindented; 132 133 string unindent(size_t n) 134 pure @safe { 135 assert(n <= indent.length); 136 string ln = text; 137 foreach( i; 0 .. n ){ 138 final switch(indent[i]){ 139 case IndentType.White: 140 if( ln[0] == ' ' ) ln = ln[4 .. $]; 141 else ln = ln[1 .. $]; 142 break; 143 case IndentType.Quote: 144 ln = ln.stripLeft()[1 .. $]; 145 break; 146 } 147 } 148 return ln; 149 } 150 } 151 152 private Line[] parseLines(ref string[] lines, scope MarkdownSettings settings) 153 pure @safe { 154 Line[] ret; 155 while( !lines.empty ){ 156 auto ln = lines.front; 157 lines.popFront(); 158 159 Line lninfo; 160 lninfo.text = ln; 161 162 while( ln.length > 0 ){ 163 if( ln[0] == '\t' ){ 164 lninfo.indent ~= IndentType.White; 165 ln.popFront(); 166 } else if( ln.startsWith(" ") ){ 167 lninfo.indent ~= IndentType.White; 168 ln.popFrontN(4); 169 } else { 170 ln = ln.stripLeft(); 171 if( ln.startsWith(">") ){ 172 lninfo.indent ~= IndentType.Quote; 173 ln.popFront(); 174 } else break; 175 } 176 } 177 lninfo.unindented = ln; 178 179 if( (settings.flags & MarkdownFlags.backtickCodeBlocks) && isCodeBlockDelimiter(ln) ) lninfo.type = LineType.CodeBlockDelimiter; 180 else if( isAtxHeaderLine(ln) ) lninfo.type = LineType.AtxHeader; 181 else if( isSetextHeaderLine(ln) ) lninfo.type = LineType.SetextHeader; 182 else if( isHlineLine(ln) ) lninfo.type = LineType.Hline; 183 else if( isOListLine(ln) ) lninfo.type = LineType.OList; 184 else if( isUListLine(ln) ) lninfo.type = LineType.UList; 185 else if( isLineBlank(ln) ) lninfo.type = LineType.Blank; 186 else if( !(settings.flags & MarkdownFlags.noInlineHtml) && isHtmlBlockLine(ln) ) lninfo.type = LineType.HtmlBlock; 187 else lninfo.type = LineType.Plain; 188 189 ret ~= lninfo; 190 } 191 return ret; 192 } 193 194 private enum BlockType { 195 Plain, 196 Text, 197 Paragraph, 198 Header, 199 OList, 200 UList, 201 ListItem, 202 Code, 203 Quote 204 } 205 206 private struct Block { 207 BlockType type; 208 string[] text; 209 Block[] blocks; 210 size_t headerLevel; 211 } 212 213 private void parseBlocks(ref Block root, ref Line[] lines, IndentType[] base_indent, scope MarkdownSettings settings) 214 pure @safe { 215 if( base_indent.length == 0 ) root.type = BlockType.Text; 216 else if( base_indent[$-1] == IndentType.Quote ) root.type = BlockType.Quote; 217 218 while( !lines.empty ){ 219 auto ln = lines.front; 220 221 if( ln.type == LineType.Blank ){ 222 lines.popFront(); 223 continue; 224 } 225 226 if( ln.indent != base_indent ){ 227 if( ln.indent.length < base_indent.length || ln.indent[0 .. base_indent.length] != base_indent ) 228 return; 229 230 auto cindent = base_indent ~ IndentType.White; 231 if( ln.indent == cindent ){ 232 Block cblock; 233 cblock.type = BlockType.Code; 234 while( !lines.empty && lines.front.indent.length >= cindent.length 235 && lines.front.indent[0 .. cindent.length] == cindent) 236 { 237 cblock.text ~= lines.front.unindent(cindent.length); 238 lines.popFront(); 239 } 240 root.blocks ~= cblock; 241 } else { 242 Block subblock; 243 parseBlocks(subblock, lines, ln.indent[0 .. base_indent.length+1], settings); 244 root.blocks ~= subblock; 245 } 246 } else { 247 Block b; 248 final switch(ln.type){ 249 case LineType.Undefined: assert(false); 250 case LineType.Blank: assert(false); 251 case LineType.Plain: 252 if( lines.length >= 2 && lines[1].type == LineType.SetextHeader ){ 253 auto setln = lines[1].unindented; 254 b.type = BlockType.Header; 255 b.text = [ln.unindented]; 256 b.headerLevel = setln.strip()[0] == '=' ? 1 : 2; 257 lines.popFrontN(2); 258 } else { 259 b.type = BlockType.Paragraph; 260 b.text = skipText(lines, base_indent); 261 } 262 break; 263 case LineType.Hline: 264 b.type = BlockType.Plain; 265 b.text = ["<hr>"]; 266 lines.popFront(); 267 break; 268 case LineType.AtxHeader: 269 b.type = BlockType.Header; 270 string hl = ln.unindented; 271 b.headerLevel = 0; 272 while( hl.length > 0 && hl[0] == '#' ){ 273 b.headerLevel++; 274 hl = hl[1 .. $]; 275 } 276 while( hl.length > 0 && (hl[$-1] == '#' || hl[$-1] == ' ') ) 277 hl = hl[0 .. $-1]; 278 b.text = [hl]; 279 lines.popFront(); 280 break; 281 case LineType.SetextHeader: 282 lines.popFront(); 283 break; 284 case LineType.UList: 285 case LineType.OList: 286 b.type = ln.type == LineType.UList ? BlockType.UList : BlockType.OList; 287 auto itemindent = base_indent ~ IndentType.White; 288 bool firstItem = true, paraMode = false; 289 while(!lines.empty && lines.front.type == ln.type && lines.front.indent == base_indent ){ 290 Block itm; 291 itm.text = skipText(lines, itemindent); 292 itm.text[0] = removeListPrefix(itm.text[0], ln.type); 293 294 // emit <p></p> if there are blank lines between the items 295 if( firstItem && !lines.empty && lines.front.type == LineType.Blank ) 296 paraMode = true; 297 firstItem = false; 298 if( paraMode ){ 299 Block para; 300 para.type = BlockType.Paragraph; 301 para.text = itm.text; 302 itm.blocks ~= para; 303 itm.text = null; 304 } 305 306 parseBlocks(itm, lines, itemindent, settings); 307 itm.type = BlockType.ListItem; 308 b.blocks ~= itm; 309 } 310 break; 311 case LineType.HtmlBlock: 312 int nestlevel = 0; 313 auto starttag = parseHtmlBlockLine(ln.unindented); 314 if( !starttag.isHtmlBlock || !starttag.open ) 315 break; 316 317 b.type = BlockType.Plain; 318 while(!lines.empty){ 319 if( lines.front.indent.length < base_indent.length ) break; 320 if( lines.front.indent[0 .. base_indent.length] != base_indent ) break; 321 322 auto str = lines.front.unindent(base_indent.length); 323 auto taginfo = parseHtmlBlockLine(str); 324 b.text ~= lines.front.unindent(base_indent.length); 325 lines.popFront(); 326 if( taginfo.isHtmlBlock && taginfo.tagName == starttag.tagName ) 327 nestlevel += taginfo.open ? 1 : -1; 328 if( nestlevel <= 0 ) break; 329 } 330 break; 331 case LineType.CodeBlockDelimiter: 332 lines.popFront(); // TODO: get language from line 333 b.type = BlockType.Code; 334 while(!lines.empty){ 335 if( lines.front.indent.length < base_indent.length ) break; 336 if( lines.front.indent[0 .. base_indent.length] != base_indent ) break; 337 if( lines.front.type == LineType.CodeBlockDelimiter ){ 338 lines.popFront(); 339 break; 340 } 341 b.text ~= lines.front.unindent(base_indent.length); 342 lines.popFront(); 343 } 344 break; 345 } 346 root.blocks ~= b; 347 } 348 } 349 } 350 351 private string[] skipText(ref Line[] lines, IndentType[] indent) 352 pure @safe { 353 static bool matchesIndent(IndentType[] indent, IndentType[] base_indent) 354 { 355 if( indent.length > base_indent.length ) return false; 356 if( indent != base_indent[0 .. indent.length] ) return false; 357 sizediff_t qidx = -1; 358 foreach_reverse (i, tp; base_indent) if (tp == IndentType.Quote) { qidx = i; break; } 359 if( qidx >= 0 ){ 360 qidx = base_indent.length-1 - qidx; 361 if( indent.length <= qidx ) return false; 362 } 363 return true; 364 } 365 366 string[] ret; 367 368 while(true){ 369 ret ~= lines.front.unindent(min(indent.length, lines.front.indent.length)); 370 lines.popFront(); 371 372 if( lines.empty || !matchesIndent(lines.front.indent, indent) || lines.front.type != LineType.Plain ) 373 return ret; 374 } 375 } 376 377 /// private 378 private void writeBlock(R)(ref R dst, ref const Block block, LinkRef[string] links, scope MarkdownSettings settings) 379 { 380 final switch(block.type){ 381 case BlockType.Plain: 382 foreach( ln; block.text ){ 383 dst.put(ln); 384 dst.put("\n"); 385 } 386 foreach(b; block.blocks) 387 writeBlock(dst, b, links, settings); 388 break; 389 case BlockType.Text: 390 writeMarkdownEscaped(dst, block, links, settings); 391 foreach(b; block.blocks) 392 writeBlock(dst, b, links, settings); 393 break; 394 case BlockType.Paragraph: 395 assert(block.blocks.length == 0); 396 dst.put("<p>"); 397 writeMarkdownEscaped(dst, block, links, settings); 398 dst.put("</p>\n"); 399 break; 400 case BlockType.Header: 401 assert(block.blocks.length == 0); 402 auto hlvl = block.headerLevel + (settings ? settings.headingBaseLevel-1 : 0); 403 dst.formattedWrite("<h%s>", hlvl); 404 assert(block.text.length == 1); 405 writeMarkdownEscaped(dst, block.text[0], links, settings); 406 dst.formattedWrite("</h%s>\n", hlvl); 407 break; 408 case BlockType.OList: 409 dst.put("<ol>\n"); 410 foreach(b; block.blocks) 411 writeBlock(dst, b, links, settings); 412 dst.put("</ol>\n"); 413 break; 414 case BlockType.UList: 415 dst.put("<ul>\n"); 416 foreach(b; block.blocks) 417 writeBlock(dst, b, links, settings); 418 dst.put("</ul>\n"); 419 break; 420 case BlockType.ListItem: 421 dst.put("<li>"); 422 writeMarkdownEscaped(dst, block, links, settings); 423 foreach(b; block.blocks) 424 writeBlock(dst, b, links, settings); 425 dst.put("</li>\n"); 426 break; 427 case BlockType.Code: 428 assert(block.blocks.length == 0); 429 dst.put("<pre class=\"prettyprint\"><code>"); 430 foreach(ln; block.text){ 431 filterHTMLEscape(dst, ln); 432 dst.put("\n"); 433 } 434 dst.put("</code></pre>"); 435 break; 436 case BlockType.Quote: 437 dst.put("<blockquote>"); 438 writeMarkdownEscaped(dst, block, links, settings); 439 foreach(b; block.blocks) 440 writeBlock(dst, b, links, settings); 441 dst.put("</blockquote>\n"); 442 break; 443 } 444 } 445 446 private void writeMarkdownEscaped(R)(ref R dst, ref const Block block, in LinkRef[string] links, scope MarkdownSettings settings) 447 { 448 auto lines = cast(string[])block.text; 449 auto text = settings.flags & MarkdownFlags.keepLineBreaks ? lines.join("<br>") : lines.join("\n"); 450 writeMarkdownEscaped(dst, text, links, settings); 451 if (lines.length) dst.put("\n"); 452 } 453 454 /// private 455 private void writeMarkdownEscaped(R)(ref R dst, string ln, in LinkRef[string] linkrefs, scope MarkdownSettings settings) 456 { 457 bool br = ln.endsWith(" "); 458 while( ln.length > 0 ){ 459 switch( ln[0] ){ 460 default: 461 dst.put(ln[0]); 462 ln = ln[1 .. $]; 463 break; 464 case '\\': 465 if( ln.length >= 2 ){ 466 switch(ln[1]){ 467 default: 468 dst.put(ln[0 .. 2]); 469 ln = ln[2 .. $]; 470 break; 471 case '\'', '`', '*', '_', '{', '}', '[', ']', 472 '(', ')', '#', '+', '-', '.', '!': 473 dst.put(ln[1]); 474 ln = ln[2 .. $]; 475 break; 476 } 477 } else { 478 dst.put(ln[0]); 479 ln = ln[1 .. $]; 480 } 481 break; 482 case '_': 483 case '*': 484 string text; 485 if( auto em = parseEmphasis(ln, text) ){ 486 dst.put(em == 1 ? "<em>" : em == 2 ? "<strong>" : "<strong><em>"); 487 filterHTMLEscape(dst, text, HTMLEscapeFlags.escapeMinimal); 488 dst.put(em == 1 ? "</em>" : em == 2 ? "</strong>": "</em></strong>"); 489 } else { 490 dst.put(ln[0]); 491 ln = ln[1 .. $]; 492 } 493 break; 494 case '`': 495 string code; 496 if( parseInlineCode(ln, code) ){ 497 dst.put("<code class=\"prettyprint\">"); 498 filterHTMLEscape(dst, code, HTMLEscapeFlags.escapeMinimal); 499 dst.put("</code>"); 500 } else { 501 dst.put(ln[0]); 502 ln = ln[1 .. $]; 503 } 504 break; 505 case '[': 506 Link link; 507 if( parseLink(ln, link, linkrefs) ){ 508 dst.put("<a href=\""); 509 filterHTMLAttribEscape(dst, link.url); 510 dst.put("\""); 511 if( link.title.length ){ 512 dst.put(" title=\""); 513 filterHTMLAttribEscape(dst, link.title); 514 dst.put("\""); 515 } 516 dst.put(">"); 517 writeMarkdownEscaped(dst, link.text, linkrefs, settings); 518 dst.put("</a>"); 519 } else { 520 dst.put(ln[0]); 521 ln = ln[1 .. $]; 522 } 523 break; 524 case '!': 525 Link link; 526 if( parseLink(ln, link, linkrefs) ){ 527 dst.put("<img src=\""); 528 filterHTMLAttribEscape(dst, link.url); 529 dst.put("\" alt=\""); 530 filterHTMLAttribEscape(dst, link.text); 531 dst.put("\""); 532 if( link.title.length ){ 533 dst.put(" title=\""); 534 filterHTMLAttribEscape(dst, link.title); 535 dst.put("\""); 536 } 537 dst.put(">"); 538 } else if( ln.length >= 2 ){ 539 dst.put(ln[0 .. 2]); 540 ln = ln[2 .. $]; 541 } else { 542 dst.put(ln[0]); 543 ln = ln[1 .. $]; 544 } 545 break; 546 case '>': 547 if( settings.flags & MarkdownFlags.noInlineHtml ) dst.put(">"); 548 else dst.put(ln[0]); 549 ln = ln[1 .. $]; 550 break; 551 case '<': 552 string url; 553 if( parseAutoLink(ln, url) ){ 554 bool is_email = url.startsWith("mailto:"); 555 dst.put("<a href=\""); 556 if( is_email ) filterHTMLAllEscape(dst, url); 557 else filterHTMLAttribEscape(dst, url); 558 dst.put("\">"); 559 if( is_email ) filterHTMLAllEscape(dst, url[7 .. $]); 560 else filterHTMLEscape(dst, url, HTMLEscapeFlags.escapeMinimal); 561 dst.put("</a>"); 562 } else { 563 if (ln.startsWith("<br>")) { 564 // always support line breaks, since we embed them here ourselves! 565 dst.put("<br>"); 566 ln = ln[4 .. $]; 567 } else { 568 if( settings.flags & MarkdownFlags.noInlineHtml ) dst.put("<"); 569 else dst.put(ln[0]); 570 ln = ln[1 .. $]; 571 } 572 } 573 break; 574 } 575 } 576 if( br ) dst.put("<br/>"); 577 } 578 579 private bool isLineBlank(string ln) 580 pure @safe { 581 return allOf(ln, " \t"); 582 } 583 584 private bool isSetextHeaderLine(string ln) 585 pure @safe { 586 ln = stripLeft(ln); 587 if( ln.length < 1 ) return false; 588 if( ln[0] == '=' ){ 589 while(!ln.empty && ln.front == '=') ln.popFront(); 590 return allOf(ln, " \t"); 591 } 592 if( ln[0] == '-' ){ 593 while(!ln.empty && ln.front == '-') ln.popFront(); 594 return allOf(ln, " \t"); 595 } 596 return false; 597 } 598 599 private bool isAtxHeaderLine(string ln) 600 pure @safe { 601 ln = stripLeft(ln); 602 size_t i = 0; 603 while( i < ln.length && ln[i] == '#' ) i++; 604 if( i < 1 || i > 6 || i >= ln.length ) return false; 605 return ln[i] == ' '; 606 } 607 608 private bool isHlineLine(string ln) 609 pure @safe { 610 if( allOf(ln, " -") && count(ln, '-') >= 3 ) return true; 611 if( allOf(ln, " *") && count(ln, '*') >= 3 ) return true; 612 if( allOf(ln, " _") && count(ln, '_') >= 3 ) return true; 613 return false; 614 } 615 616 private bool isQuoteLine(string ln) 617 pure @safe { 618 return ln.stripLeft().startsWith(">"); 619 } 620 621 private size_t getQuoteLevel(string ln) 622 pure @safe { 623 size_t level = 0; 624 ln = stripLeft(ln); 625 while( ln.length > 0 && ln[0] == '>' ){ 626 level++; 627 ln = stripLeft(ln[1 .. $]); 628 } 629 return level; 630 } 631 632 private bool isUListLine(string ln) 633 pure @safe { 634 ln = stripLeft(ln); 635 if (ln.length < 2) return false; 636 if (!canFind("*+-", ln[0])) return false; 637 if (ln[1] != ' ' && ln[1] != '\t') return false; 638 return true; 639 } 640 641 private bool isOListLine(string ln) 642 pure @safe { 643 ln = stripLeft(ln); 644 if( ln.length < 1 ) return false; 645 if( ln[0] < '0' || ln[0] > '9' ) return false; 646 ln = ln[1 .. $]; 647 while( ln.length > 0 && ln[0] >= '0' && ln[0] <= '9' ) 648 ln = ln[1 .. $]; 649 if( ln.length < 2 ) return false; 650 if( ln[0] != '.' ) return false; 651 if( ln[1] != ' ' && ln[1] != '\t' ) 652 return false; 653 return true; 654 } 655 656 private string removeListPrefix(string str, LineType tp) 657 pure @safe { 658 switch(tp){ 659 default: assert(false); 660 case LineType.OList: // skip bullets and output using normal escaping 661 auto idx = str.indexOfCT('.'); 662 assert(idx > 0); 663 return str[idx+1 .. $].stripLeft(); 664 case LineType.UList: 665 return stripLeft(str.stripLeft()[1 .. $]); 666 } 667 } 668 669 670 private auto parseHtmlBlockLine(string ln) 671 pure @safe { 672 struct HtmlBlockInfo { 673 bool isHtmlBlock; 674 string tagName; 675 bool open; 676 } 677 678 HtmlBlockInfo ret; 679 ret.isHtmlBlock = false; 680 ret.open = true; 681 682 ln = strip(ln); 683 if( ln.length < 3 ) return ret; 684 if( ln[0] != '<' ) return ret; 685 if( ln[1] == '/' ){ 686 ret.open = false; 687 ln = ln[1 .. $]; 688 } 689 if( !std.ascii.isAlpha(ln[1]) ) return ret; 690 ln = ln[1 .. $]; 691 size_t idx = 0; 692 while( idx < ln.length && ln[idx] != ' ' && ln[idx] != '>' ) 693 idx++; 694 ret.tagName = ln[0 .. idx]; 695 ln = ln[idx .. $]; 696 697 auto eidx = ln.indexOf('>'); 698 if( eidx < 0 ) return ret; 699 if( eidx != ln.length-1 ) return ret; 700 701 if (!s_blockTags.canFind(ret.tagName)) return ret; 702 703 ret.isHtmlBlock = true; 704 return ret; 705 } 706 707 private bool isHtmlBlockLine(string ln) 708 pure @safe { 709 auto bi = parseHtmlBlockLine(ln); 710 return bi.isHtmlBlock && bi.open; 711 } 712 713 private bool isHtmlBlockCloseLine(string ln) 714 pure @safe { 715 auto bi = parseHtmlBlockLine(ln); 716 return bi.isHtmlBlock && !bi.open; 717 } 718 719 private bool isCodeBlockDelimiter(string ln) 720 pure @safe { 721 return ln.startsWith("```"); 722 } 723 724 private string getHtmlTagName(string ln) 725 pure @safe { 726 return parseHtmlBlockLine(ln).tagName; 727 } 728 729 private bool isLineIndented(string ln) 730 pure @safe { 731 return ln.startsWith("\t") || ln.startsWith(" "); 732 } 733 734 private string unindentLine(string ln) 735 pure @safe { 736 if( ln.startsWith("\t") ) return ln[1 .. $]; 737 if( ln.startsWith(" ") ) return ln[4 .. $]; 738 assert(false); 739 } 740 741 private int parseEmphasis(ref string str, ref string text) 742 pure @safe { 743 string pstr = str; 744 if( pstr.length < 3 ) return false; 745 746 string ctag; 747 if( pstr.startsWith("***") ) ctag = "***"; 748 else if( pstr.startsWith("**") ) ctag = "**"; 749 else if( pstr.startsWith("*") ) ctag = "*"; 750 else if( pstr.startsWith("___") ) ctag = "___"; 751 else if( pstr.startsWith("__") ) ctag = "__"; 752 else if( pstr.startsWith("_") ) ctag = "_"; 753 else return false; 754 755 pstr = pstr[ctag.length .. $]; 756 757 auto cidx = () @trusted { return pstr.indexOf(ctag); }(); 758 if( cidx < 1 ) return false; 759 760 text = pstr[0 .. cidx]; 761 762 str = pstr[cidx+ctag.length .. $]; 763 return cast(int)ctag.length; 764 } 765 766 private bool parseInlineCode(ref string str, ref string code) 767 pure @safe { 768 string pstr = str; 769 if( pstr.length < 3 ) return false; 770 string ctag; 771 if( pstr.startsWith("``") ) ctag = "``"; 772 else if( pstr.startsWith("`") ) ctag = "`"; 773 else return false; 774 pstr = pstr[ctag.length .. $]; 775 776 auto cidx = () @trusted { return pstr.indexOf(ctag); }(); 777 if( cidx < 1 ) return false; 778 779 code = pstr[0 .. cidx]; 780 str = pstr[cidx+ctag.length .. $]; 781 return true; 782 } 783 784 private bool parseLink(ref string str, ref Link dst, in LinkRef[string] linkrefs) 785 pure @safe { 786 string pstr = str; 787 if( pstr.length < 3 ) return false; 788 // ignore img-link prefix 789 if( pstr[0] == '!' ) pstr = pstr[1 .. $]; 790 791 // parse the text part [text] 792 if( pstr[0] != '[' ) return false; 793 auto cidx = pstr.matchBracket(); 794 if( cidx < 1 ) return false; 795 string refid; 796 dst.text = pstr[1 .. cidx]; 797 pstr = pstr[cidx+1 .. $]; 798 799 // parse either (link '['"title"']') or '[' ']'[refid] 800 if( pstr.length < 2 ) return false; 801 if( pstr[0] == '('){ 802 cidx = pstr.matchBracket(); 803 if( cidx < 1 ) return false; 804 auto inner = pstr[1 .. cidx]; 805 immutable qidx = inner.indexOfCT('"'); 806 if( qidx > 1 && inner[qidx - 1].isWhite()){ 807 dst.url = inner[0 .. qidx].stripRight(); 808 immutable len = inner[qidx .. $].lastIndexOf('"'); 809 if( len == 0 ) return false; 810 assert(len > 0); 811 dst.title = inner[qidx + 1 .. qidx + len]; 812 } else { 813 dst.url = inner.stripRight(); 814 dst.title = null; 815 } 816 if (dst.url.startsWith("<") && dst.url.endsWith(">")) 817 dst.url = dst.url[1 .. $-1]; 818 pstr = pstr[cidx+1 .. $]; 819 } else { 820 if( pstr[0] == ' ' ) pstr = pstr[1 .. $]; 821 if( pstr[0] != '[' ) return false; 822 pstr = pstr[1 .. $]; 823 cidx = pstr.indexOfCT(']'); 824 if( cidx < 0 ) return false; 825 if( cidx == 0 ) refid = dst.text; 826 else refid = pstr[0 .. cidx]; 827 pstr = pstr[cidx+1 .. $]; 828 } 829 830 831 if( refid.length > 0 ){ 832 auto pr = toLower(refid) in linkrefs; 833 if( !pr ){ 834 // debug if (!__ctfe) logDebug("[LINK REF NOT FOUND: '%s'", refid); 835 return false; 836 } 837 dst.url = pr.url; 838 dst.title = pr.title; 839 } 840 841 str = pstr; 842 return true; 843 } 844 845 @safe unittest 846 { 847 static void testLink(string s, Link exp, in LinkRef[string] refs) 848 { 849 Link link; 850 assert(parseLink(s, link, refs), s); 851 assert(link == exp); 852 } 853 LinkRef[string] refs; 854 refs["ref"] = LinkRef("ref", "target", "title"); 855 856 testLink(`[link](target)`, Link("link", "target"), null); 857 testLink(`[link](target "title")`, Link("link", "target", "title"), null); 858 testLink(`[link](target "title")`, Link("link", "target", "title"), null); 859 testLink(`[link](target "title" )`, Link("link", "target", "title"), null); 860 861 testLink(`[link](target)`, Link("link", "target"), null); 862 testLink(`[link](target "title")`, Link("link", "target", "title"), null); 863 864 testLink(`[link][ref]`, Link("link", "target", "title"), refs); 865 testLink(`[ref][]`, Link("ref", "target", "title"), refs); 866 867 testLink(`[link[with brackets]](target)`, Link("link[with brackets]", "target"), null); 868 testLink(`[link[with brackets]][ref]`, Link("link[with brackets]", "target", "title"), refs); 869 870 testLink(`[link](/target with spaces )`, Link("link", "/target with spaces"), null); 871 testLink(`[link](/target with spaces "title")`, Link("link", "/target with spaces", "title"), null); 872 873 testLink(`[link](white-space "around title" )`, Link("link", "white-space", "around title"), null); 874 testLink(`[link](tabs "around title" )`, Link("link", "tabs", "around title"), null); 875 876 testLink(`[link](target "")`, Link("link", "target", ""), null); 877 testLink(`[link](target-no-title"foo" )`, Link("link", "target-no-title\"foo\"", ""), null); 878 879 testLink(`[link](<target>)`, Link("link", "target"), null); 880 881 auto failing = [ 882 `text`, `[link](target`, `[link]target)`, `[link]`, 883 `[link(target)`, `link](target)`, `[link] (target)`, 884 `[link][noref]`, `[noref][]` 885 ]; 886 Link link; 887 foreach (s; failing) 888 assert(!parseLink(s, link, refs), s); 889 } 890 891 private bool parseAutoLink(ref string str, ref string url) 892 pure @safe { 893 string pstr = str; 894 if( pstr.length < 3 ) return false; 895 if( pstr[0] != '<' ) return false; 896 pstr = pstr[1 .. $]; 897 auto cidx = pstr.indexOf('>'); 898 if( cidx < 0 ) return false; 899 url = pstr[0 .. cidx]; 900 if( anyOf(url, " \t") ) return false; 901 if( !anyOf(url, ":@") ) return false; 902 str = pstr[cidx+1 .. $]; 903 if( url.indexOf('@') > 0 ) url = "mailto:"~url; 904 return true; 905 } 906 907 private LinkRef[string] scanForReferences(ref string[] lines) 908 pure @safe { 909 LinkRef[string] ret; 910 bool[size_t] reflines; 911 912 // search for reference definitions: 913 // [refid] link "opt text" 914 // [refid] <link> "opt text" 915 // "opt text", 'opt text', (opt text) 916 // line must not be indented 917 foreach( lnidx, ln; lines ){ 918 if( isLineIndented(ln) ) continue; 919 ln = strip(ln); 920 if( !ln.startsWith("[") ) continue; 921 ln = ln[1 .. $]; 922 923 auto idx = () @trusted { return ln.indexOf("]:"); }(); 924 if( idx < 0 ) continue; 925 string refid = ln[0 .. idx]; 926 ln = stripLeft(ln[idx+2 .. $]); 927 928 string url; 929 if( ln.startsWith("<") ){ 930 idx = ln.indexOfCT('>'); 931 if( idx < 0 ) continue; 932 url = ln[1 .. idx]; 933 ln = ln[idx+1 .. $]; 934 } else { 935 idx = ln.indexOfCT(' '); 936 if( idx > 0 ){ 937 url = ln[0 .. idx]; 938 ln = ln[idx+1 .. $]; 939 } else { 940 idx = ln.indexOfCT('\t'); 941 if( idx < 0 ){ 942 url = ln; 943 ln = ln[$ .. $]; 944 } else { 945 url = ln[0 .. idx]; 946 ln = ln[idx+1 .. $]; 947 } 948 } 949 } 950 ln = stripLeft(ln); 951 952 string title; 953 if( ln.length >= 3 ){ 954 if( ln[0] == '(' && ln[$-1] == ')' || ln[0] == '\"' && ln[$-1] == '\"' || ln[0] == '\'' && ln[$-1] == '\'' ) 955 title = ln[1 .. $-1]; 956 } 957 958 ret[toLower(refid)] = LinkRef(refid, url, title); 959 reflines[lnidx] = true; 960 961 // debug if (!__ctfe) logTrace("[detected ref on line %d]", lnidx+1); 962 } 963 964 // remove all lines containing references 965 auto nonreflines = appender!(string[])(); 966 nonreflines.reserve(lines.length); 967 foreach( i, ln; lines ) 968 if( i !in reflines ) 969 nonreflines.put(ln); 970 lines = nonreflines.data(); 971 972 return ret; 973 } 974 975 private struct LinkRef { 976 string id; 977 string url; 978 string title; 979 } 980 981 private struct Link { 982 string text; 983 string url; 984 string title; 985 } 986 987 @safe unittest { // alt and title attributes 988 assert(filterMarkdown("") 989 == "<p><img src=\"http://example.org/image\" alt=\"alt\">\n</p>\n"); 990 assert(filterMarkdown("") 991 == "<p><img src=\"http://example.org/image\" alt=\"alt\" title=\"Title\">\n</p>\n"); 992 } 993 994 @safe unittest { // complex links 995 assert(filterMarkdown("their [install\ninstructions](<http://www.brew.sh>) and") 996 == "<p>their <a href=\"http://www.brew.sh\">install\ninstructions</a> and\n</p>\n"); 997 assert(filterMarkdown("[](https://travis-ci.org/rejectedsoftware/vibe.d)") 998 == "<p><a href=\"https://travis-ci.org/rejectedsoftware/vibe.d\"><img src=\"https://travis-ci.org/rejectedsoftware/vibe.d.png\" alt=\"Build Status\"></a>\n</p>\n"); 999 } 1000 1001 @safe unittest { // check CTFE-ability 1002 enum res = filterMarkdown("### some markdown\n[foo][]\n[foo]: /bar"); 1003 assert(res == "<h3> some markdown</h3>\n<p><a href=\"/bar\">foo</a>\n</p>\n", res); 1004 } 1005 1006 @safe unittest { // correct line breaks in restrictive mode 1007 auto res = filterMarkdown("hello\nworld", MarkdownFlags.forumDefault); 1008 assert(res == "<p>hello<br>world\n</p>\n", res); 1009 } 1010 1011 /*@safe unittest { // code blocks and blockquotes 1012 assert(filterMarkdown("\tthis\n\tis\n\tcode") == 1013 "<pre><code>this\nis\ncode</code></pre>\n"); 1014 assert(filterMarkdown(" this\n is\n code") == 1015 "<pre><code>this\nis\ncode</code></pre>\n"); 1016 assert(filterMarkdown(" this\n is\n\tcode") == 1017 "<pre><code>this\nis</code></pre>\n<pre><code>code</code></pre>\n"); 1018 assert(filterMarkdown("\tthis\n\n\tcode") == 1019 "<pre><code>this\n\ncode</code></pre>\n"); 1020 assert(filterMarkdown("\t> this") == 1021 "<pre><code>> this</code></pre>\n"); 1022 assert(filterMarkdown("> this") == 1023 "<blockquote><pre><code>this</code></pre></blockquote>\n"); 1024 assert(filterMarkdown("> this\n is code") == 1025 "<blockquote><pre><code>this\nis code</code></pre></blockquote>\n"); 1026 }*/