dmarkdown.markdown source code

1 /**
2 	Markdown parser implementation
3 
4 	Copyright: © 2012-2014 RejectedSoftware e.K.
5 	License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file.
6 	Authors: Sönke Ludwig
7 */
8 module dmarkdown.markdown;
9 
10 import dmarkdown.html;
11 import dmarkdown.string;
12 
13 // import vibe.utils.string;
14 
15 import std.algorithm : canFind, countUntil, min;
16 import std.array;
17 import std.ascii : isAlpha, isWhite;
18 import std.format;
19 import std.range;
20 import std.string;
21 
22 /*
23 	TODO:
24 		detect inline HTML tags
25 */
26 
27 // TODO (dmarkdown) detailed API docs and examples for everything
28 
29 unittest
30 {
31 	auto text =
32 	    "=======\n"
33 	    "Heading\n"
34 	    "=======\n"
35 	    "\n"
36 	    "**bold** *italic*\n"
37 	    "\n"
38 	    "List:\n"
39 	    "\n"
40 	    "  * a\n"
41 	    "  * b\n"
42 	    "  * c\n";
43 	string result = filterMarkdown(text);
44 	import std.stdio;
45 	foreach( ln; splitLines(result) )
46 		writeln(ln);
47 }
48 
49 
50 /** Returns a Markdown filtered HTML string.
51 */
52 string filterMarkdown()(string str, MarkdownFlags flags)
53 {
54 	scope settings = new MarkdownSettings;
55 	settings.flags = flags;
56 	return filterMarkdown(str, settings);
57 }
58 /// ditto
59 string filterMarkdown()(string str, scope MarkdownSettings settings = null)
60 @trusted { // Appender not @safe as of 2.065
61 	auto dst = appender!string();
62 	filterMarkdown(dst, str, settings);
63 	return dst.data;
64 }
65 
66 
67 /** Markdown filters the given string and writes the corresponding HTML to an output range.
68 */
69 void filterMarkdown(R)(ref R dst, string src, MarkdownFlags flags)
70 {
71 	scope settings = new MarkdownSettings;
72 	settings.flags = flags;
73 	filterMarkdown(dst, src, settings);
74 }
75 /// ditto
76 void filterMarkdown(R)(ref R dst, string src, scope MarkdownSettings settings = null)
77 {
78 	auto defsettings = new MarkdownSettings;
79 	if (!settings) settings = defsettings;
80 
81 	auto all_lines = splitLines(src);
82 	auto links = scanForReferences(all_lines);
83 	auto lines = parseLines(all_lines, settings);
84 	Block root_block;
85 	parseBlocks(root_block, lines, null, settings);
86 	writeBlock(dst, root_block, links, settings);
87 }
88 
89 final class MarkdownSettings {
90 	MarkdownFlags flags = MarkdownFlags.vanillaMarkdown;
91 	size_t headingBaseLevel = 1;
92 }
93 
94 enum MarkdownFlags {
95 	none = 0,
96 	keepLineBreaks = 1<<0,
97 	backtickCodeBlocks = 1<<1,
98 	noInlineHtml = 1<<2,
99 	//noLinks = 1<<3,
100 	//allowUnsafeHtml = 1<<4,
101 	vanillaMarkdown = none,
102 	forumDefault = keepLineBreaks|backtickCodeBlocks|noInlineHtml
103 }
104 
105 private {
106 	immutable s_blockTags = ["div", "ol", "p", "pre", "section", "table", "ul"];
107 }
108 
109 private enum IndentType {
110 	White,
111 	Quote
112 }
113 
114 private enum LineType {
115 	Undefined,
116 	Blank,
117 	Plain,
118 	Hline,
119 	AtxHeader,
120 	SetextHeader,
121 	UList,
122 	OList,
123 	HtmlBlock,
124 	CodeBlockDelimiter
125 }
126 
127 private struct Line {
128 	LineType type;
129 	IndentType[] indent;
130 	string text;
131 	string unindented;
132 
133 	string unindent(size_t n)
134 	pure @safe {
135 		assert(n <= indent.length);
136 		string ln = text;
137 		foreach( i; 0 .. n ){
138 			final switch(indent[i]){
139 				case IndentType.White:
140 					if( ln[0] == ' ' ) ln = ln[4 .. $];
141 					else ln = ln[1 .. $];
142 					break;
143 				case IndentType.Quote:
144 					ln = ln.stripLeft()[1 .. $];
145 					break;
146 			}
147 		}
148 		return ln;
149 	}
150 }
151 
152 private Line[] parseLines(ref string[] lines, scope MarkdownSettings settings)
153 pure @safe {
154 	Line[] ret;
155 	while( !lines.empty ){
156 		auto ln = lines.front;
157 		lines.popFront();
158 
159 		Line lninfo;
160 		lninfo.text = ln;
161 
162 		while( ln.length > 0 ){
163 			if( ln[0] == '\t' ){
164 				lninfo.indent ~= IndentType.White;
165 				ln.popFront();
166 			} else if( ln.startsWith("    ") ){
167 				lninfo.indent ~= IndentType.White;
168 				ln.popFrontN(4);
169 			} else {
170 				ln = ln.stripLeft();
171 				if( ln.startsWith(">") ){
172 					lninfo.indent ~= IndentType.Quote;
173 					ln.popFront();
174 				} else break;
175 			}
176 		}
177 		lninfo.unindented = ln;
178 
179 		if( (settings.flags & MarkdownFlags.backtickCodeBlocks) && isCodeBlockDelimiter(ln) ) lninfo.type = LineType.CodeBlockDelimiter;
180 		else if( isAtxHeaderLine(ln) ) lninfo.type = LineType.AtxHeader;
181 		else if( isSetextHeaderLine(ln) ) lninfo.type = LineType.SetextHeader;
182 		else if( isHlineLine(ln) ) lninfo.type = LineType.Hline;
183 		else if( isOListLine(ln) ) lninfo.type = LineType.OList;
184 		else if( isUListLine(ln) ) lninfo.type = LineType.UList;
185 		else if( isLineBlank(ln) ) lninfo.type = LineType.Blank;
186 		else if( !(settings.flags & MarkdownFlags.noInlineHtml) && isHtmlBlockLine(ln) ) lninfo.type = LineType.HtmlBlock;
187 		else lninfo.type = LineType.Plain;
188 
189 		ret ~= lninfo;
190 	}
191 	return ret;
192 }
193 
194 private enum BlockType {
195 	Plain,
196 	Text,
197 	Paragraph,
198 	Header,
199 	OList,
200 	UList,
201 	ListItem,
202 	Code,
203 	Quote
204 }
205 
206 private struct Block {
207 	BlockType type;
208 	string[] text;
209 	Block[] blocks;
210 	size_t headerLevel;
211 }
212 
213 private void parseBlocks(ref Block root, ref Line[] lines, IndentType[] base_indent, scope MarkdownSettings settings)
214 pure @safe {
215 	if( base_indent.length == 0 ) root.type = BlockType.Text;
216 	else if( base_indent[$-1] == IndentType.Quote ) root.type = BlockType.Quote;
217 
218 	while( !lines.empty ){
219 		auto ln = lines.front;
220 
221 		if( ln.type == LineType.Blank ){
222 			lines.popFront();
223 			continue;
224 		}
225 
226 		if( ln.indent != base_indent ){
227 			if( ln.indent.length < base_indent.length || ln.indent[0 .. base_indent.length] != base_indent )
228 				return;
229 
230 			auto cindent = base_indent ~ IndentType.White;
231 			if( ln.indent == cindent ){
232 				Block cblock;
233 				cblock.type = BlockType.Code;
234 				while( !lines.empty && lines.front.indent.length >= cindent.length
235 						&& lines.front.indent[0 .. cindent.length] == cindent)
236 				{
237 					cblock.text ~= lines.front.unindent(cindent.length);
238 					lines.popFront();
239 				}
240 				root.blocks ~= cblock;
241 			} else {
242 				Block subblock;
243 				parseBlocks(subblock, lines, ln.indent[0 .. base_indent.length+1], settings);
244 				root.blocks ~= subblock;
245 			}
246 		} else {
247 			Block b;
248 			final switch(ln.type){
249 				case LineType.Undefined: assert(false);
250 				case LineType.Blank: assert(false);
251 				case LineType.Plain:
252 					if( lines.length >= 2 && lines[1].type == LineType.SetextHeader ){
253 						auto setln = lines[1].unindented;
254 						b.type = BlockType.Header;
255 						b.text = [ln.unindented];
256 						b.headerLevel = setln.strip()[0] == '=' ? 1 : 2;
257 						lines.popFrontN(2);
258 					} else {
259 						b.type = BlockType.Paragraph;
260 						b.text = skipText(lines, base_indent);
261 					}
262 					break;
263 				case LineType.Hline:
264 					b.type = BlockType.Plain;
265 					b.text = ["<hr>"];
266 					lines.popFront();
267 					break;
268 				case LineType.AtxHeader:
269 					b.type = BlockType.Header;
270 					string hl = ln.unindented;
271 					b.headerLevel = 0;
272 					while( hl.length > 0 && hl[0] == '#' ){
273 						b.headerLevel++;
274 						hl = hl[1 .. $];
275 					}
276 					while( hl.length > 0 && (hl[$-1] == '#' || hl[$-1] == ' ') )
277 						hl = hl[0 .. $-1];
278 					b.text = [hl];
279 					lines.popFront();
280 					break;
281 				case LineType.SetextHeader:
282 					lines.popFront();
283 					break;
284 				case LineType.UList:
285 				case LineType.OList:
286 					b.type = ln.type == LineType.UList ? BlockType.UList : BlockType.OList;
287 					auto itemindent = base_indent ~ IndentType.White;
288 					bool firstItem = true, paraMode = false;
289 					while(!lines.empty && lines.front.type == ln.type && lines.front.indent == base_indent ){
290 						Block itm;
291 						itm.text = skipText(lines, itemindent);
292 						itm.text[0] = removeListPrefix(itm.text[0], ln.type);
293 
294 						// emit <p></p> if there are blank lines between the items
295 						if( firstItem && !lines.empty && lines.front.type == LineType.Blank )
296 							paraMode = true;
297 						firstItem = false;
298 						if( paraMode ){
299 							Block para;
300 							para.type = BlockType.Paragraph;
301 							para.text = itm.text;
302 							itm.blocks ~= para;
303 							itm.text = null;
304 						}
305 
306 						parseBlocks(itm, lines, itemindent, settings);
307 						itm.type = BlockType.ListItem;
308 						b.blocks ~= itm;
309 					}
310 					break;
311 				case LineType.HtmlBlock:
312 					int nestlevel = 0;
313 					auto starttag = parseHtmlBlockLine(ln.unindented);
314 					if( !starttag.isHtmlBlock || !starttag.open )
315 						break;
316 
317 					b.type = BlockType.Plain;
318 					while(!lines.empty){
319 						if( lines.front.indent.length < base_indent.length ) break;
320 						if( lines.front.indent[0 .. base_indent.length] != base_indent ) break;
321 
322 						auto str = lines.front.unindent(base_indent.length);
323 						auto taginfo = parseHtmlBlockLine(str);
324 						b.text ~= lines.front.unindent(base_indent.length);
325 						lines.popFront();
326 						if( taginfo.isHtmlBlock && taginfo.tagName == starttag.tagName )
327 							nestlevel += taginfo.open ? 1 : -1;
328 						if( nestlevel <= 0 ) break;
329 					}
330 					break;
331 				case LineType.CodeBlockDelimiter:
332 					lines.popFront(); // TODO: get language from line
333 					b.type = BlockType.Code;
334 					while(!lines.empty){
335 						if( lines.front.indent.length < base_indent.length ) break;
336 						if( lines.front.indent[0 .. base_indent.length] != base_indent ) break;
337 						if( lines.front.type == LineType.CodeBlockDelimiter ){
338 							lines.popFront();
339 							break;
340 						}
341 						b.text ~= lines.front.unindent(base_indent.length);
342 						lines.popFront();
343 					}
344 					break;
345 			}
346 			root.blocks ~= b;
347 		}
348 	}
349 }
350 
351 private string[] skipText(ref Line[] lines, IndentType[] indent)
352 pure @safe {
353 	static bool matchesIndent(IndentType[] indent, IndentType[] base_indent)
354 	{
355 		if( indent.length > base_indent.length ) return false;
356 		if( indent != base_indent[0 .. indent.length] ) return false;
357 		sizediff_t qidx = -1;
358 		foreach_reverse (i, tp; base_indent) if (tp == IndentType.Quote) { qidx = i; break; }
359 		if( qidx >= 0 ){
360 			qidx = base_indent.length-1 - qidx;
361 			if( indent.length <= qidx ) return false;
362 		}
363 		return true;
364 	}
365 
366 	string[] ret;
367 
368 	while(true){
369 		ret ~= lines.front.unindent(min(indent.length, lines.front.indent.length));
370 		lines.popFront();
371 
372 		if( lines.empty || !matchesIndent(lines.front.indent, indent) || lines.front.type != LineType.Plain )
373 			return ret;
374 	}
375 }
376 
377 /// private
378 private void writeBlock(R)(ref R dst, ref const Block block, LinkRef[string] links, scope MarkdownSettings settings)
379 {
380 	final switch(block.type){
381 		case BlockType.Plain:
382 			foreach( ln; block.text ){
383 				dst.put(ln);
384 				dst.put("\n");
385 			}
386 			foreach(b; block.blocks)
387 				writeBlock(dst, b, links, settings);
388 			break;
389 		case BlockType.Text:
390 			writeMarkdownEscaped(dst, block, links, settings);
391 			foreach(b; block.blocks)
392 				writeBlock(dst, b, links, settings);
393 			break;
394 		case BlockType.Paragraph:
395 			assert(block.blocks.length == 0);
396 			dst.put("<p>");
397 			writeMarkdownEscaped(dst, block, links, settings);
398 			dst.put("</p>\n");
399 			break;
400 		case BlockType.Header:
401 			assert(block.blocks.length == 0);
402 			auto hlvl = block.headerLevel + (settings ? settings.headingBaseLevel-1 : 0);
403 			dst.formattedWrite("<h%s>", hlvl);
404 			assert(block.text.length == 1);
405 			writeMarkdownEscaped(dst, block.text[0], links, settings);
406 			dst.formattedWrite("</h%s>\n", hlvl);
407 			break;
408 		case BlockType.OList:
409 			dst.put("<ol>\n");
410 			foreach(b; block.blocks)
411 				writeBlock(dst, b, links, settings);
412 			dst.put("</ol>\n");
413 			break;
414 		case BlockType.UList:
415 			dst.put("<ul>\n");
416 			foreach(b; block.blocks)
417 				writeBlock(dst, b, links, settings);
418 			dst.put("</ul>\n");
419 			break;
420 		case BlockType.ListItem:
421 			dst.put("<li>");
422 			writeMarkdownEscaped(dst, block, links, settings);
423 			foreach(b; block.blocks)
424 				writeBlock(dst, b, links, settings);
425 			dst.put("</li>\n");
426 			break;
427 		case BlockType.Code:
428 			assert(block.blocks.length == 0);
429 			dst.put("<pre class=\"prettyprint\"><code>");
430 			foreach(ln; block.text){
431 				filterHTMLEscape(dst, ln);
432 				dst.put("\n");
433 			}
434 			dst.put("</code></pre>");
435 			break;
436 		case BlockType.Quote:
437 			dst.put("<blockquote>");
438 			writeMarkdownEscaped(dst, block, links, settings);
439 			foreach(b; block.blocks)
440 				writeBlock(dst, b, links, settings);
441 			dst.put("</blockquote>\n");
442 			break;
443 	}
444 }
445 
446 private void writeMarkdownEscaped(R)(ref R dst, ref const Block block, in LinkRef[string] links, scope MarkdownSettings settings)
447 {
448 	auto lines = cast(string[])block.text;
449 	auto text = settings.flags & MarkdownFlags.keepLineBreaks ? lines.join("<br>") : lines.join("\n");
450 	writeMarkdownEscaped(dst, text, links, settings);
451 	if (lines.length) dst.put("\n");
452 }
453 
454 /// private
455 private void writeMarkdownEscaped(R)(ref R dst, string ln, in LinkRef[string] linkrefs, scope MarkdownSettings settings)
456 {
457 	bool br = ln.endsWith("  ");
458 	while( ln.length > 0 ){
459 		switch( ln[0] ){
460 			default:
461 				dst.put(ln[0]);
462 				ln = ln[1 .. $];
463 				break;
464 			case '\\':
465 				if( ln.length >= 2 ){
466 					switch(ln[1]){
467 						default:
468 							dst.put(ln[0 .. 2]);
469 							ln = ln[2 .. $];
470 							break;
471 						case '\'', '`', '*', '_', '{', '}', '[', ']',
472 							'(', ')', '#', '+', '-', '.', '!':
473 							dst.put(ln[1]);
474 							ln = ln[2 .. $];
475 							break;
476 					}
477 				} else {
478 					dst.put(ln[0]);
479 					ln = ln[1 .. $];
480 				}
481 				break;
482 			case '_':
483 			case '*':
484 				string text;
485 				if( auto em = parseEmphasis(ln, text) ){
486 					dst.put(em == 1 ? "<em>" : em == 2 ? "<strong>" : "<strong><em>");
487 					filterHTMLEscape(dst, text, HTMLEscapeFlags.escapeMinimal);
488 					dst.put(em == 1 ? "</em>" : em == 2 ? "</strong>": "</em></strong>");
489 				} else {
490 					dst.put(ln[0]);
491 					ln = ln[1 .. $];
492 				}
493 				break;
494 			case '`':
495 				string code;
496 				if( parseInlineCode(ln, code) ){
497 					dst.put("<code class=\"prettyprint\">");
498 					filterHTMLEscape(dst, code, HTMLEscapeFlags.escapeMinimal);
499 					dst.put("</code>");
500 				} else {
501 					dst.put(ln[0]);
502 					ln = ln[1 .. $];
503 				}
504 				break;
505 			case '[':
506 				Link link;
507 				if( parseLink(ln, link, linkrefs) ){
508 					dst.put("<a href=\"");
509 					filterHTMLAttribEscape(dst, link.url);
510 					dst.put("\"");
511 					if( link.title.length ){
512 						dst.put(" title=\"");
513 						filterHTMLAttribEscape(dst, link.title);
514 						dst.put("\"");
515 					}
516 					dst.put(">");
517 					writeMarkdownEscaped(dst, link.text, linkrefs, settings);
518 					dst.put("</a>");
519 				} else {
520 					dst.put(ln[0]);
521 					ln = ln[1 .. $];
522 				}
523 				break;
524 			case '!':
525 				Link link;
526 				if( parseLink(ln, link, linkrefs) ){
527 					dst.put("<img src=\"");
528 					filterHTMLAttribEscape(dst, link.url);
529 					dst.put("\" alt=\"");
530 					filterHTMLAttribEscape(dst, link.text);
531 					dst.put("\"");
532 					if( link.title.length ){
533 						dst.put(" title=\"");
534 						filterHTMLAttribEscape(dst, link.title);
535 						dst.put("\"");
536 					}
537 					dst.put(">");
538 				} else if( ln.length >= 2 ){
539 					dst.put(ln[0 .. 2]);
540 					ln = ln[2 .. $];
541 				} else {
542 					dst.put(ln[0]);
543 					ln = ln[1 .. $];
544 				}
545 				break;
546 			case '>':
547 				if( settings.flags & MarkdownFlags.noInlineHtml ) dst.put("&gt;");
548 				else dst.put(ln[0]);
549 				ln = ln[1 .. $];
550 				break;
551 			case '<':
552 				string url;
553 				if( parseAutoLink(ln, url) ){
554 					bool is_email = url.startsWith("mailto:");
555 					dst.put("<a href=\"");
556 					if( is_email ) filterHTMLAllEscape(dst, url);
557 					else filterHTMLAttribEscape(dst, url);
558 					dst.put("\">");
559 					if( is_email ) filterHTMLAllEscape(dst, url[7 .. $]);
560 					else filterHTMLEscape(dst, url, HTMLEscapeFlags.escapeMinimal);
561 					dst.put("</a>");
562 				} else {
563 					if (ln.startsWith("<br>")) {
564 						// always support line breaks, since we embed them here ourselves!
565 						dst.put("<br>");
566 						ln = ln[4 .. $];
567 					} else {
568 						if( settings.flags & MarkdownFlags.noInlineHtml ) dst.put("&lt;");
569 						else dst.put(ln[0]);
570 						ln = ln[1 .. $];
571 					}
572 				}
573 				break;
574 		}
575 	}
576 	if( br ) dst.put("<br/>");
577 }
578 
579 private bool isLineBlank(string ln)
580 pure @safe {
581 	return allOf(ln, " \t");
582 }
583 
584 private bool isSetextHeaderLine(string ln)
585 pure @safe {
586 	ln = stripLeft(ln);
587 	if( ln.length < 1 ) return false;
588 	if( ln[0] == '=' ){
589 		while(!ln.empty && ln.front == '=') ln.popFront();
590 		return allOf(ln, " \t");
591 	}
592 	if( ln[0] == '-' ){
593 		while(!ln.empty && ln.front == '-') ln.popFront();
594 		return allOf(ln, " \t");
595 	}
596 	return false;
597 }
598 
599 private bool isAtxHeaderLine(string ln)
600 pure @safe {
601 	ln = stripLeft(ln);
602 	size_t i = 0;
603 	while( i < ln.length && ln[i] == '#' ) i++;
604 	if( i < 1 || i > 6 || i >= ln.length ) return false;
605 	return ln[i] == ' ';
606 }
607 
608 private bool isHlineLine(string ln)
609 pure @safe {
610 	if( allOf(ln, " -") && count(ln, '-') >= 3 ) return true;
611 	if( allOf(ln, " *") && count(ln, '*') >= 3 ) return true;
612 	if( allOf(ln, " _") && count(ln, '_') >= 3 ) return true;
613 	return false;
614 }
615 
616 private bool isQuoteLine(string ln)
617 pure @safe {
618 	return ln.stripLeft().startsWith(">");
619 }
620 
621 private size_t getQuoteLevel(string ln)
622 pure @safe {
623 	size_t level = 0;
624 	ln = stripLeft(ln);
625 	while( ln.length > 0 && ln[0] == '>' ){
626 		level++;
627 		ln = stripLeft(ln[1 .. $]);
628 	}
629 	return level;
630 }
631 
632 private bool isUListLine(string ln)
633 pure @safe {
634 	ln = stripLeft(ln);
635 	if (ln.length < 2) return false;
636 	if (!canFind("*+-", ln[0])) return false;
637 	if (ln[1] != ' ' && ln[1] != '\t') return false;
638 	return true;
639 }
640 
641 private bool isOListLine(string ln)
642 pure @safe {
643 	ln = stripLeft(ln);
644 	if( ln.length < 1 ) return false;
645 	if( ln[0] < '0' || ln[0] > '9' ) return false;
646 	ln = ln[1 .. $];
647 	while( ln.length > 0 && ln[0] >= '0' && ln[0] <= '9' )
648 		ln = ln[1 .. $];
649 	if( ln.length < 2 ) return false;
650 	if( ln[0] != '.' ) return false;
651 	if( ln[1] != ' ' && ln[1] != '\t' )
652 		return false;
653 	return true;
654 }
655 
656 private string removeListPrefix(string str, LineType tp)
657 pure @safe {
658 	switch(tp){
659 		default: assert(false);
660 		case LineType.OList: // skip bullets and output using normal escaping
661 			auto idx = str.indexOfCT('.');
662 			assert(idx > 0);
663 			return str[idx+1 .. $].stripLeft();
664 		case LineType.UList:
665 			return stripLeft(str.stripLeft()[1 .. $]);
666 	}
667 }
668 
669 
670 private auto parseHtmlBlockLine(string ln)
671 pure @safe {
672 	struct HtmlBlockInfo {
673 		bool isHtmlBlock;
674 		string tagName;
675 		bool open;
676 	}
677 
678 	HtmlBlockInfo ret;
679 	ret.isHtmlBlock = false;
680 	ret.open = true;
681 
682 	ln = strip(ln);
683 	if( ln.length < 3 ) return ret;
684 	if( ln[0] != '<' ) return ret;
685 	if( ln[1] == '/' ){
686 		ret.open = false;
687 		ln = ln[1 .. $];
688 	}
689 	if( !std.ascii.isAlpha(ln[1]) ) return ret;
690 	ln = ln[1 .. $];
691 	size_t idx = 0;
692 	while( idx < ln.length && ln[idx] != ' ' && ln[idx] != '>' )
693 		idx++;
694 	ret.tagName = ln[0 .. idx];
695 	ln = ln[idx .. $];
696 
697 	auto eidx = ln.indexOf('>');
698 	if( eidx < 0 ) return ret;
699 	if( eidx != ln.length-1 ) return ret;
700 
701 	if (!s_blockTags.canFind(ret.tagName)) return ret;
702 
703 	ret.isHtmlBlock = true;
704 	return ret;
705 }
706 
707 private bool isHtmlBlockLine(string ln)
708 pure @safe {
709 	auto bi = parseHtmlBlockLine(ln);
710 	return bi.isHtmlBlock && bi.open;
711 }
712 
713 private bool isHtmlBlockCloseLine(string ln)
714 pure @safe {
715 	auto bi = parseHtmlBlockLine(ln);
716 	return bi.isHtmlBlock && !bi.open;
717 }
718 
719 private bool isCodeBlockDelimiter(string ln)
720 pure @safe {
721 	return ln.startsWith("```");
722 }
723 
724 private string getHtmlTagName(string ln)
725 pure @safe {
726 	return parseHtmlBlockLine(ln).tagName;
727 }
728 
729 private bool isLineIndented(string ln)
730 pure @safe {
731 	return ln.startsWith("\t") || ln.startsWith("    ");
732 }
733 
734 private string unindentLine(string ln)
735 pure @safe {
736 	if( ln.startsWith("\t") ) return ln[1 .. $];
737 	if( ln.startsWith("    ") ) return ln[4 .. $];
738 	assert(false);
739 }
740 
741 private int parseEmphasis(ref string str, ref string text)
742 pure @safe {
743 	string pstr = str;
744 	if( pstr.length < 3 ) return false;
745 
746 	string ctag;
747 	if( pstr.startsWith("***") ) ctag = "***";
748 	else if( pstr.startsWith("**") ) ctag = "**";
749 	else if( pstr.startsWith("*") ) ctag = "*";
750 	else if( pstr.startsWith("___") ) ctag = "___";
751 	else if( pstr.startsWith("__") ) ctag = "__";
752 	else if( pstr.startsWith("_") ) ctag = "_";
753 	else return false;
754 
755 	pstr = pstr[ctag.length .. $];
756 
757 	auto cidx = () @trusted { return pstr.indexOf(ctag); }();
758 	if( cidx < 1 ) return false;
759 
760 	text = pstr[0 .. cidx];
761 
762 	str = pstr[cidx+ctag.length .. $];
763 	return cast(int)ctag.length;
764 }
765 
766 private bool parseInlineCode(ref string str, ref string code)
767 pure @safe {
768 	string pstr = str;
769 	if( pstr.length < 3 ) return false;
770 	string ctag;
771 	if( pstr.startsWith("``") ) ctag = "``";
772 	else if( pstr.startsWith("`") ) ctag = "`";
773 	else return false;
774 	pstr = pstr[ctag.length .. $];
775 
776 	auto cidx = () @trusted { return pstr.indexOf(ctag); }();
777 	if( cidx < 1 ) return false;
778 
779 	code = pstr[0 .. cidx];
780 	str = pstr[cidx+ctag.length .. $];
781 	return true;
782 }
783 
784 private bool parseLink(ref string str, ref Link dst, in LinkRef[string] linkrefs)
785 pure @safe {
786 	string pstr = str;
787 	if( pstr.length < 3 ) return false;
788 	// ignore img-link prefix
789 	if( pstr[0] == '!' ) pstr = pstr[1 .. $];
790 
791 	// parse the text part [text]
792 	if( pstr[0] != '[' ) return false;
793 	auto cidx = pstr.matchBracket();
794 	if( cidx < 1 ) return false;
795 	string refid;
796 	dst.text = pstr[1 .. cidx];
797 	pstr = pstr[cidx+1 .. $];
798 
799 	// parse either (link '['"title"']') or '[' ']'[refid]
800 	if( pstr.length < 2 ) return false;
801 	if( pstr[0] == '('){
802 		cidx = pstr.matchBracket();
803 		if( cidx < 1 ) return false;
804 		auto inner = pstr[1 .. cidx];
805 		immutable qidx = inner.indexOfCT('"');
806 		if( qidx > 1 && inner[qidx - 1].isWhite()){
807 			dst.url = inner[0 .. qidx].stripRight();
808 			immutable len = inner[qidx .. $].lastIndexOf('"');
809 			if( len == 0 ) return false;
810 			assert(len > 0);
811 			dst.title = inner[qidx + 1 .. qidx + len];
812 		} else {
813 			dst.url = inner.stripRight();
814 			dst.title = null;
815 		}
816 		if (dst.url.startsWith("<") && dst.url.endsWith(">"))
817 			dst.url = dst.url[1 .. $-1];
818 		pstr = pstr[cidx+1 .. $];
819 	} else {
820 		if( pstr[0] == ' ' ) pstr = pstr[1 .. $];
821 		if( pstr[0] != '[' ) return false;
822 		pstr = pstr[1 .. $];
823 		cidx = pstr.indexOfCT(']');
824 		if( cidx < 0 ) return false;
825 		if( cidx == 0 ) refid = dst.text;
826 		else refid = pstr[0 .. cidx];
827 		pstr = pstr[cidx+1 .. $];
828 	}
829 
830 
831 	if( refid.length > 0 ){
832 		auto pr = toLower(refid) in linkrefs;
833 		if( !pr ){
834 			// debug if (!__ctfe) logDebug("[LINK REF NOT FOUND: '%s'", refid);
835 			return false;
836 		}
837 		dst.url = pr.url;
838 		dst.title = pr.title;
839 	}
840 
841 	str = pstr;
842 	return true;
843 }
844 
845 @safe unittest
846 {
847     static void testLink(string s, Link exp, in LinkRef[string] refs)
848     {
849         Link link;
850         assert(parseLink(s, link, refs), s);
851         assert(link == exp);
852     }
853     LinkRef[string] refs;
854     refs["ref"] = LinkRef("ref", "target", "title");
855 
856     testLink(`[link](target)`, Link("link", "target"), null);
857     testLink(`[link](target "title")`, Link("link", "target", "title"), null);
858     testLink(`[link](target  "title")`, Link("link", "target", "title"), null);
859     testLink(`[link](target "title"  )`, Link("link", "target", "title"), null);
860 
861     testLink(`[link](target)`, Link("link", "target"), null);
862     testLink(`[link](target "title")`, Link("link", "target", "title"), null);
863 
864     testLink(`[link][ref]`, Link("link", "target", "title"), refs);
865     testLink(`[ref][]`, Link("ref", "target", "title"), refs);
866 
867     testLink(`[link[with brackets]](target)`, Link("link[with brackets]", "target"), null);
868     testLink(`[link[with brackets]][ref]`, Link("link[with brackets]", "target", "title"), refs);
869 
870     testLink(`[link](/target with spaces )`, Link("link", "/target with spaces"), null);
871     testLink(`[link](/target with spaces "title")`, Link("link", "/target with spaces", "title"), null);
872 
873     testLink(`[link](white-space  "around title" )`, Link("link", "white-space", "around title"), null);
874     testLink(`[link](tabs	"around title"	)`, Link("link", "tabs", "around title"), null);
875 
876     testLink(`[link](target "")`, Link("link", "target", ""), null);
877     testLink(`[link](target-no-title"foo" )`, Link("link", "target-no-title\"foo\"", ""), null);
878 
879     testLink(`[link](<target>)`, Link("link", "target"), null);
880 
881     auto failing = [
882         `text`, `[link](target`, `[link]target)`, `[link]`,
883         `[link(target)`, `link](target)`, `[link] (target)`,
884         `[link][noref]`, `[noref][]`
885     ];
886     Link link;
887     foreach (s; failing)
888         assert(!parseLink(s, link, refs), s);
889 }
890 
891 private bool parseAutoLink(ref string str, ref string url)
892 pure @safe {
893 	string pstr = str;
894 	if( pstr.length < 3 ) return false;
895 	if( pstr[0] != '<' ) return false;
896 	pstr = pstr[1 .. $];
897 	auto cidx = pstr.indexOf('>');
898 	if( cidx < 0 ) return false;
899 	url = pstr[0 .. cidx];
900 	if( anyOf(url, " \t") ) return false;
901 	if( !anyOf(url, ":@") ) return false;
902 	str = pstr[cidx+1 .. $];
903 	if( url.indexOf('@') > 0 ) url = "mailto:"~url;
904 	return true;
905 }
906 
907 private LinkRef[string] scanForReferences(ref string[] lines)
908 pure @safe {
909 	LinkRef[string] ret;
910 	bool[size_t] reflines;
911 
912 	// search for reference definitions:
913 	//   [refid] link "opt text"
914 	//   [refid] <link> "opt text"
915 	//   "opt text", 'opt text', (opt text)
916 	//   line must not be indented
917 	foreach( lnidx, ln; lines ){
918 		if( isLineIndented(ln) ) continue;
919 		ln = strip(ln);
920 		if( !ln.startsWith("[") ) continue;
921 		ln = ln[1 .. $];
922 
923 		auto idx = () @trusted { return ln.indexOf("]:"); }();
924 		if( idx < 0 ) continue;
925 		string refid = ln[0 .. idx];
926 		ln = stripLeft(ln[idx+2 .. $]);
927 
928 		string url;
929 		if( ln.startsWith("<") ){
930 			idx = ln.indexOfCT('>');
931 			if( idx < 0 ) continue;
932 			url = ln[1 .. idx];
933 			ln = ln[idx+1 .. $];
934 		} else {
935 			idx = ln.indexOfCT(' ');
936 			if( idx > 0 ){
937 				url = ln[0 .. idx];
938 				ln = ln[idx+1 .. $];
939 			} else {
940 				idx = ln.indexOfCT('\t');
941 				if( idx < 0 ){
942 					url = ln;
943 					ln = ln[$ .. $];
944 				} else {
945 					url = ln[0 .. idx];
946 					ln = ln[idx+1 .. $];
947 				}
948 			}
949 		}
950 		ln = stripLeft(ln);
951 
952 		string title;
953 		if( ln.length >= 3 ){
954 			if( ln[0] == '(' && ln[$-1] == ')' || ln[0] == '\"' && ln[$-1] == '\"' || ln[0] == '\'' && ln[$-1] == '\'' )
955 				title = ln[1 .. $-1];
956 		}
957 
958 		ret[toLower(refid)] = LinkRef(refid, url, title);
959 		reflines[lnidx] = true;
960 
961 		// debug if (!__ctfe) logTrace("[detected ref on line %d]", lnidx+1);
962 	}
963 
964 	// remove all lines containing references
965 	auto nonreflines = appender!(string[])();
966 	nonreflines.reserve(lines.length);
967 	foreach( i, ln; lines )
968 		if( i !in reflines )
969 			nonreflines.put(ln);
970 	lines = nonreflines.data();
971 
972 	return ret;
973 }
974 
975 private struct LinkRef {
976 	string id;
977 	string url;
978 	string title;
979 }
980 
981 private struct Link {
982 	string text;
983 	string url;
984 	string title;
985 }
986 
987 @safe unittest { // alt and title attributes
988 	assert(filterMarkdown("![alt](http://example.org/image)")
989 		== "<p><img src=\"http://example.org/image\" alt=\"alt\">\n</p>\n");
990 	assert(filterMarkdown("![alt](http://example.org/image \"Title\")")
991 		== "<p><img src=\"http://example.org/image\" alt=\"alt\" title=\"Title\">\n</p>\n");
992 }
993 
994 @safe unittest { // complex links
995 	assert(filterMarkdown("their [install\ninstructions](<http://www.brew.sh>) and")
996 		== "<p>their <a href=\"http://www.brew.sh\">install\ninstructions</a> and\n</p>\n");
997 	assert(filterMarkdown("[![Build Status](https://travis-ci.org/rejectedsoftware/vibe.d.png)](https://travis-ci.org/rejectedsoftware/vibe.d)")
998 		== "<p><a href=\"https://travis-ci.org/rejectedsoftware/vibe.d\"><img src=\"https://travis-ci.org/rejectedsoftware/vibe.d.png\" alt=\"Build Status\"></a>\n</p>\n");
999 }
1000 
1001 @safe unittest { // check CTFE-ability
1002     enum res = filterMarkdown("### some markdown\n[foo][]\n[foo]: /bar");
1003     assert(res == "<h3> some markdown</h3>\n<p><a href=\"/bar\">foo</a>\n</p>\n", res);
1004 }
1005 
1006 @safe unittest { // correct line breaks in restrictive mode
1007 	auto res = filterMarkdown("hello\nworld", MarkdownFlags.forumDefault);
1008 	assert(res == "<p>hello<br>world\n</p>\n", res);
1009 }
1010 
1011 /*@safe unittest { // code blocks and blockquotes
1012 	assert(filterMarkdown("\tthis\n\tis\n\tcode") ==
1013 		"<pre><code>this\nis\ncode</code></pre>\n");
1014 	assert(filterMarkdown("    this\n    is\n    code") ==
1015 		"<pre><code>this\nis\ncode</code></pre>\n");
1016 	assert(filterMarkdown("    this\n    is\n\tcode") ==
1017 		"<pre><code>this\nis</code></pre>\n<pre><code>code</code></pre>\n");
1018 	assert(filterMarkdown("\tthis\n\n\tcode") ==
1019 		"<pre><code>this\n\ncode</code></pre>\n");
1020 	assert(filterMarkdown("\t> this") ==
1021 		"<pre><code>&gt; this</code></pre>\n");
1022 	assert(filterMarkdown(">     this") ==
1023 		"<blockquote><pre><code>this</code></pre></blockquote>\n");
1024 	assert(filterMarkdown(">     this\n    is code") ==
1025 		"<blockquote><pre><code>this\nis code</code></pre></blockquote>\n");
1026 }*/