1 /**
2 	Markdown parser implementation
3 
4 	Copyright: © 2012-2014 RejectedSoftware e.K.
5 	License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file.
6 	Authors: Sönke Ludwig
7 */
8 module dmarkdown.markdown;
9 
10 import dmarkdown.html;
11 import dmarkdown..string;
12 
13 import std.algorithm : joiner, map, canFind, countUntil, min;
14 import std.array;
15 import std.ascii : isAlpha, isWhite;
16 import std.format;
17 import std.range;
18 import std..string;
19 
20 /*
21 	TODO:
22 		detect inline HTML tags
23 */
24 
25 // TODO (dmarkdown) detailed API docs and examples for everything
26 
27 unittest
28 {
29 	auto text =
30 	    "=======\n"
31 	    "Heading\n"
32 	    "=======\n"
33 	    "\n"
34 	    "**bold** *italic*\n"
35 	    "\n"
36 	    "List:\n"
37 	    "\n"
38 	    "  * a\n"
39 	    "  * b\n"
40 	    "  * c\n";
41 
42 	import std.stdio;
43 	writeln("===========");
44 	writeln(text);
45 	writeln("===========");
46 	writeln(filterMarkdown(text));
47 }
48 
49 unittest
50 {
51 	auto source =
52 `Merged prototype. The prototype is not locked, allowing to add more components.
53         To be used it must be locked by calling EntityPrototype.lockAndTrimMemory().`;
54 	auto expected =
55 `<p>Merged prototype. The prototype is not locked, allowing to add more components.
56         To be used it must be locked by calling EntityPrototype.lockAndTrimMemory().
57 </p>
58 `;
59 	string result = filterMarkdown(source);
60 	assert(result == expected);
61 }
62 
63 unittest 
64 {
65     auto source = `*stars* under_score_s`;
66     auto expectedUnderscores   = `<p><em>stars</em> under<em>score</em>s
67 </p>
68 `;
69     auto expectedNoUnderscores = `<p><em>stars</em> under_score_s
70 </p>
71 `;
72 
73     string resultUnderscores = filterMarkdown(source);
74     string resultNoUnderscores = filterMarkdown(source, MarkdownFlags.disableUnderscoreEmphasis);
75 
76     assert(resultUnderscores == expectedUnderscores, 
77            "'%s' != '%s'".format(resultUnderscores, expectedUnderscores));
78     assert(resultNoUnderscores == expectedNoUnderscores,
79            "'%s' != '%s'".format(resultNoUnderscores, expectedNoUnderscores));
80 }
81 
82 /** Returns a Markdown filtered HTML string.
83 */
84 string filterMarkdown()(string str, MarkdownFlags flags)
85 {
86 	scope settings = new MarkdownSettings;
87 	settings.flags = flags;
88 	return filterMarkdown(str, settings);
89 }
90 /// ditto
91 string filterMarkdown()(string str, scope MarkdownSettings settings = null)
92 @trusted { // Appender not @safe as of 2.065
93 	auto dst = appender!string();
94 	filterMarkdown(dst, str, settings);
95 	return dst.data;
96 }
97 
98 
99 /** Markdown filters the given string and writes the corresponding HTML to an output range.
100 */
101 void filterMarkdown(R)(ref R dst, string src, MarkdownFlags flags)
102 {
103 	scope settings = new MarkdownSettings;
104 	settings.flags = flags;
105 	filterMarkdown(dst, src, settings);
106 }
107 /// ditto
108 void filterMarkdown(R)(ref R dst, string src, scope MarkdownSettings settings = null)
109 {
110 	if (!settings) settings = new MarkdownSettings;
111 
112 	auto all_lines = splitLines(src);
113 	auto links = scanForReferences(all_lines);
114 	auto lines = parseLines(all_lines, settings);
115 	Block root_block;
116 	parseBlocks(root_block, lines, null, settings);
117 	writeBlock(dst, root_block, links, settings);
118 }
119 
120 /**
121 	Returns the hierarchy of sections
122 */
123 Section[] getMarkdownOutline(string markdown_source, scope MarkdownSettings settings = null)
124 {
125 	import std.conv : to;
126 
127 	if (!settings) settings = new MarkdownSettings;
128 	auto all_lines = splitLines(markdown_source);
129 	auto lines = parseLines(all_lines, settings);
130 	Block root_block;
131 	parseBlocks(root_block, lines, null, settings);
132 	Section root;
133 
134 	foreach (ref sb; root_block.blocks) {
135 		if (sb.type == BlockType.Header) {
136 			auto s = &root;
137 			while (true) {
138 				if (s.subSections.length == 0) break;
139 				if (s.subSections[$-1].headingLevel >= sb.headerLevel) break;
140 				s = &s.subSections[$-1];
141 			}
142 			s.subSections ~= Section(sb.headerLevel, sb.text[0], sb.text[0].asSlug.to!string);
143 		}
144 	}
145 
146 	return root.subSections;
147 }
148 
149 ///
150 unittest {
151 	import std.conv : to;
152 	assert(getMarkdownOutline("## first\n## second\n### third\n# fourth\n### fifth") ==
153 		[
154 			Section(2, " first", "first"),
155 			Section(2, " second", "second", [
156 				Section(3, " third", "third")
157 			]),
158 			Section(1, " fourth", "fourth", [
159 				Section(3, " fifth", "fifth")
160 			])
161 		]
162 	);
163 }
164 
165 final class MarkdownSettings {
166 	/// Controls the capabilities of the parser.
167 	MarkdownFlags flags = MarkdownFlags.vanillaMarkdown;
168 
169 	/// Heading tags will start at this level.
170 	size_t headingBaseLevel = 1;
171 
172 	/// Called for every link/image URL to perform arbitrary transformations.
173 	string delegate(string url_or_path, bool is_image) urlFilter;
174 
175 	/** An optional delegate to post-process code blocks and inline code.
176 	 *
177 	 * Useful to e.g. add code highlighting.
178 	 */
179 	string delegate(string) @safe nothrow processCode = null;
180 }
181 // Unittest for code post-processing
182 unittest
183 {
184 	auto text =
185 	    "`inline code`\n"
186 	    "block:\n"
187 	    "\n"
188 	    "    code block\n";
189 	auto expected =
190 	    "<p><code class=\"prettyprint\">AAAAAAAAAAA</code>\n"
191 	    "block:\n"
192 	    "</p>\n"
193 	    "<pre class=\"prettyprint\"><code>"
194 	    "AAAAAAAAAA"
195 	    "</code></pre>";
196 
197 	import std.algorithm: filter;
198 	string processCode(string input) @safe nothrow
199 	{
200 		import std.conv;
201 		import std.exception: assumeWontThrow;
202 		// ignore newlines generated by code block processing
203 		input = input.filter!(c => c != '\n').array.to!string.assumeWontThrow;
204 		return 'A'.repeat(input.length).array.to!string.assumeWontThrow;
205 	}
206 	auto settings = new MarkdownSettings;
207 	settings.processCode = &processCode;
208 	auto result = filterMarkdown(text, settings);
209 
210 	assert(result == expected, "Unexpected code processing result:\n" ~
211 	                           result ~ "\nExpected:\n" ~ expected);
212 }
213 
214 enum MarkdownFlags {
215 	none = 0,
216 	keepLineBreaks = 1<<0,
217 	backtickCodeBlocks = 1<<1,
218 	noInlineHtml = 1<<2,
219 	//noLinks = 1<<3,
220 	//allowUnsafeHtml = 1<<4,
221 	/// If used, subheadings are underlined by stars ('*') instead of dashes ('-')
222 	alternateSubheaders = 1 << 5,
223 	/// If used, '_' may not be used for emphasis ('*' may still be used)
224 	disableUnderscoreEmphasis = 1 << 6,
225 	vanillaMarkdown = none,
226 	forumDefault = keepLineBreaks|backtickCodeBlocks|noInlineHtml
227 }
228 
229 struct Section {
230 	size_t headingLevel;
231 	string caption;
232 	string anchor;
233 	Section[] subSections;
234 }
235 
236 private {
237 	immutable s_blockTags = ["div", "ol", "p", "pre", "section", "table", "ul"];
238 }
239 
240 private enum IndentType {
241 	White,
242 	Quote
243 }
244 
245 private enum LineType {
246 	Undefined,
247 	Blank,
248 	Plain,
249 	Hline,
250 	AtxHeader,
251 	SetextHeader,
252 	UList,
253 	OList,
254 	HtmlBlock,
255 	CodeBlockDelimiter
256 }
257 
258 private struct Line {
259 	LineType type;
260 	IndentType[] indent;
261 	string text;
262 	string unindented;
263 
264 	string unindent(size_t n)
265 	pure @safe {
266 		assert(n <= indent.length);
267 		string ln = text;
268 		foreach( i; 0 .. n ){
269 			final switch(indent[i]){
270 				case IndentType.White:
271 					if( ln[0] == ' ' ) ln = ln[4 .. $];
272 					else ln = ln[1 .. $];
273 					break;
274 				case IndentType.Quote:
275 					ln = ln.stripLeft()[1 .. $];
276 					break;
277 			}
278 		}
279 		return ln;
280 	}
281 }
282 
283 private Line[] parseLines(ref string[] lines, scope MarkdownSettings settings)
284 pure @safe {
285 	Line[] ret;
286 	const subHeaderChar = settings.flags * MarkdownFlags.alternateSubheaders ? '*' : '-';
287 	while( !lines.empty ){
288 		auto ln = lines.front;
289 		lines.popFront();
290 
291 		Line lninfo;
292 		lninfo.text = ln;
293 
294 		while( ln.length > 0 ){
295 			if( ln[0] == '\t' ){
296 				lninfo.indent ~= IndentType.White;
297 				ln.popFront();
298 			} else if( ln.startsWith("    ") ){
299 				lninfo.indent ~= IndentType.White;
300 				ln.popFrontN(4);
301 			} else {
302 				ln = ln.stripLeft();
303 				if( ln.startsWith(">") ){
304 					lninfo.indent ~= IndentType.Quote;
305 					ln.popFront();
306 				} else break;
307 			}
308 		}
309 		lninfo.unindented = ln;
310 
311 		if( (settings.flags & MarkdownFlags.backtickCodeBlocks) && isCodeBlockDelimiter(ln) ) lninfo.type = LineType.CodeBlockDelimiter;
312 		else if( isAtxHeaderLine(ln) ) lninfo.type = LineType.AtxHeader;
313 		else if( isSetextHeaderLine(ln, subHeaderChar) ) lninfo.type = LineType.SetextHeader;
314 		else if( isHlineLine(ln) ) lninfo.type = LineType.Hline;
315 		else if( isOListLine(ln) ) lninfo.type = LineType.OList;
316 		else if( isUListLine(ln) ) lninfo.type = LineType.UList;
317 		else if( isLineBlank(ln) ) lninfo.type = LineType.Blank;
318 		else if( !(settings.flags & MarkdownFlags.noInlineHtml) && isHtmlBlockLine(ln) ) lninfo.type = LineType.HtmlBlock;
319 		else lninfo.type = LineType.Plain;
320 
321 		ret ~= lninfo;
322 	}
323 	return ret;
324 }
325 
326 private enum BlockType {
327 	Plain,
328 	Text,
329 	Paragraph,
330 	Header,
331 	OList,
332 	UList,
333 	ListItem,
334 	Code,
335 	Quote
336 }
337 
338 private struct Block {
339 	BlockType type;
340 	string[] text;
341 	Block[] blocks;
342 	size_t headerLevel;
343 
344 	// A human-readable toString for debugging.
345 	string toString()
346 	{
347 		return toStringNested;
348 	}
349 
350 	// toString implementation; capable of indenting nested blocks.
351 	string toStringNested(uint depth = 0)
352 	{
353 		import std.conv: to;
354 		string indent = " ".repeat(depth * 2).joiner.array.to!string;
355 		return indent ~ "%s\n".format(type) ~
356 		       indent ~ "%s\n".format(text) ~
357 		       blocks.map!((ref b) => b.toStringNested(depth + 1)).joiner.array.to!string ~
358 		       indent ~ "%s\n".format(headerLevel);
359 	}
360 }
361 
362 private void parseBlocks(ref Block root, ref Line[] lines, IndentType[] base_indent, scope MarkdownSettings settings)
363 pure @safe {
364 	if( base_indent.length == 0 ) root.type = BlockType.Text;
365 	else if( base_indent[$-1] == IndentType.Quote ) root.type = BlockType.Quote;
366 
367 	while( !lines.empty ){
368 		auto ln = lines.front;
369 
370 		if( ln.type == LineType.Blank ){
371 			lines.popFront();
372 			continue;
373 		}
374 
375 		if( ln.indent != base_indent ){
376 			if( ln.indent.length < base_indent.length || ln.indent[0 .. base_indent.length] != base_indent )
377 				return;
378 
379 			auto cindent = base_indent ~ IndentType.White;
380 			if( ln.indent == cindent ){
381 				Block cblock;
382 				cblock.type = BlockType.Code;
383 				while( !lines.empty && lines.front.indent.length >= cindent.length
384 						&& lines.front.indent[0 .. cindent.length] == cindent)
385 				{
386 					cblock.text ~= lines.front.unindent(cindent.length);
387 					lines.popFront();
388 				}
389 				root.blocks ~= cblock;
390 			} else {
391 				Block subblock;
392 				parseBlocks(subblock, lines, ln.indent[0 .. base_indent.length+1], settings);
393 				root.blocks ~= subblock;
394 			}
395 		} else {
396 			Block b;
397 			final switch(ln.type){
398 				case LineType.Undefined: assert(false);
399 				case LineType.Blank: assert(false);
400 				case LineType.Plain:
401 					if( lines.length >= 2 && lines[1].type == LineType.SetextHeader ){
402 						auto setln = lines[1].unindented;
403 						b.type = BlockType.Header;
404 						b.text = [ln.unindented];
405 						b.headerLevel = setln.strip()[0] == '=' ? 1 : 2;
406 						lines.popFrontN(2);
407 					} else {
408 						b.type = BlockType.Paragraph;
409 						b.text = skipText(lines, base_indent);
410 					}
411 					break;
412 				case LineType.Hline:
413 					b.type = BlockType.Plain;
414 					b.text = ["<hr>"];
415 					lines.popFront();
416 					break;
417 				case LineType.AtxHeader:
418 					b.type = BlockType.Header;
419 					string hl = ln.unindented;
420 					b.headerLevel = 0;
421 					while( hl.length > 0 && hl[0] == '#' ){
422 						b.headerLevel++;
423 						hl = hl[1 .. $];
424 					}
425 					while( hl.length > 0 && (hl[$-1] == '#' || hl[$-1] == ' ') )
426 						hl = hl[0 .. $-1];
427 					b.text = [hl];
428 					lines.popFront();
429 					break;
430 				case LineType.SetextHeader:
431 					lines.popFront();
432 					break;
433 				case LineType.UList:
434 				case LineType.OList:
435 					b.type = ln.type == LineType.UList ? BlockType.UList : BlockType.OList;
436 					auto itemindent = base_indent ~ IndentType.White;
437 					bool firstItem = true, paraMode = false;
438 					while(!lines.empty && lines.front.type == ln.type && lines.front.indent == base_indent ){
439 						Block itm;
440 						itm.text = skipText(lines, itemindent);
441 						itm.text[0] = removeListPrefix(itm.text[0], ln.type);
442 
443 						// emit <p></p> if there are blank lines between the items
444 						if( firstItem && !lines.empty && lines.front.type == LineType.Blank )
445 							paraMode = true;
446 						firstItem = false;
447 						if( paraMode ){
448 							Block para;
449 							para.type = BlockType.Paragraph;
450 							para.text = itm.text;
451 							itm.blocks ~= para;
452 							itm.text = null;
453 						}
454 
455 						parseBlocks(itm, lines, itemindent, settings);
456 						itm.type = BlockType.ListItem;
457 						b.blocks ~= itm;
458 					}
459 					break;
460 				case LineType.HtmlBlock:
461 					int nestlevel = 0;
462 					auto starttag = parseHtmlBlockLine(ln.unindented);
463 					if( !starttag.isHtmlBlock || !starttag.open )
464 						break;
465 
466 					b.type = BlockType.Plain;
467 					while(!lines.empty){
468 						if( lines.front.indent.length < base_indent.length ) break;
469 						if( lines.front.indent[0 .. base_indent.length] != base_indent ) break;
470 
471 						auto str = lines.front.unindent(base_indent.length);
472 						auto taginfo = parseHtmlBlockLine(str);
473 						b.text ~= lines.front.unindent(base_indent.length);
474 						lines.popFront();
475 						if( taginfo.isHtmlBlock && taginfo.tagName == starttag.tagName )
476 							nestlevel += taginfo.open ? 1 : -1;
477 						if( nestlevel <= 0 ) break;
478 					}
479 					break;
480 				case LineType.CodeBlockDelimiter:
481 					lines.popFront(); // TODO: get language from line
482 					b.type = BlockType.Code;
483 					while(!lines.empty){
484 						if( lines.front.indent.length < base_indent.length ) break;
485 						if( lines.front.indent[0 .. base_indent.length] != base_indent ) break;
486 						if( lines.front.type == LineType.CodeBlockDelimiter ){
487 							lines.popFront();
488 							break;
489 						}
490 						b.text ~= lines.front.unindent(base_indent.length);
491 						lines.popFront();
492 					}
493 					break;
494 			}
495 			root.blocks ~= b;
496 		}
497 	}
498 }
499 
500 private string[] skipText(ref Line[] lines, IndentType[] indent)
501 pure @safe {
502 	static bool matchesIndent(IndentType[] indent, IndentType[] base_indent)
503 	{
504 		// Any *plain* line with a higher indent should still be a part of
505 		// a paragraph read by skipText(). Returning false here resulted in
506 		// text such as:
507 		// ---
508 		// First line
509 		//         Second line
510 		// ---
511 		// being interpreted as a paragraph followed by a code block, even though
512 		// other Markdown processors would interpret it as a single paragraph.
513 
514 		// if( indent.length > base_indent.length ) return false;
515 		if( indent.length > base_indent.length ) return true;
516 		if( indent != base_indent[0 .. indent.length] ) return false;
517 		sizediff_t qidx = -1;
518 		foreach_reverse (i, tp; base_indent) if (tp == IndentType.Quote) { qidx = i; break; }
519 		if( qidx >= 0 ){
520 			qidx = base_indent.length-1 - qidx;
521 			if( indent.length <= qidx ) return false;
522 		}
523 		return true;
524 	}
525 
526 	string[] ret;
527 
528 	while(true){
529 		ret ~= lines.front.unindent(min(indent.length, lines.front.indent.length));
530 		lines.popFront();
531 
532 		if( lines.empty || !matchesIndent(lines.front.indent, indent) || lines.front.type != LineType.Plain )
533 			return ret;
534 	}
535 }
536 
537 /// private
538 private void writeBlock(R)(ref R dst, ref const Block block, LinkRef[string] links, scope MarkdownSettings settings)
539 {
540 	final switch(block.type){
541 		case BlockType.Plain:
542 			foreach( ln; block.text ){
543 				dst.put(ln);
544 				dst.put("\n");
545 			}
546 			foreach(b; block.blocks)
547 				writeBlock(dst, b, links, settings);
548 			break;
549 		case BlockType.Text:
550 			writeMarkdownEscaped(dst, block, links, settings);
551 			foreach(b; block.blocks)
552 				writeBlock(dst, b, links, settings);
553 			break;
554 		case BlockType.Paragraph:
555 			assert(block.blocks.length == 0);
556 			dst.put("<p>");
557 			writeMarkdownEscaped(dst, block, links, settings);
558 			dst.put("</p>\n");
559 			break;
560 		case BlockType.Header:
561 			assert(block.blocks.length == 0);
562 			auto hlvl = block.headerLevel + (settings ? settings.headingBaseLevel-1 : 0);
563 			dst.formattedWrite("<h%s id=\"%s\">", hlvl, block.text[0].asSlug);
564 			assert(block.text.length == 1);
565 			writeMarkdownEscaped(dst, block.text[0], links, settings);
566 			dst.formattedWrite("</h%s>\n", hlvl);
567 			break;
568 		case BlockType.OList:
569 			dst.put("<ol>\n");
570 			foreach(b; block.blocks)
571 				writeBlock(dst, b, links, settings);
572 			dst.put("</ol>\n");
573 			break;
574 		case BlockType.UList:
575 			dst.put("<ul>\n");
576 			foreach(b; block.blocks)
577 				writeBlock(dst, b, links, settings);
578 			dst.put("</ul>\n");
579 			break;
580 		case BlockType.ListItem:
581 			dst.put("<li>");
582 			writeMarkdownEscaped(dst, block, links, settings);
583 			foreach(b; block.blocks)
584 				writeBlock(dst, b, links, settings);
585 			dst.put("</li>\n");
586 			break;
587 		case BlockType.Code:
588 			assert(block.blocks.length == 0);
589 			dst.put("<pre class=\"prettyprint\"><code>");
590 			if(settings.processCode is null)
591 			{
592 				foreach(ln; block.text){
593 					filterHTMLEscape(dst, ln);
594 					dst.put("\n");
595 				}
596 			}
597 			else 
598 			{
599 				auto temp = appender!string();
600 				foreach(ln; block.text){
601 					filterHTMLEscape(temp, ln);
602 					temp.put("\n");
603 				}
604 				dst.put(settings.processCode(temp.data));
605 			}
606 			dst.put("</code></pre>");
607 			break;
608 		case BlockType.Quote:
609 			dst.put("<blockquote>");
610 			writeMarkdownEscaped(dst, block, links, settings);
611 			foreach(b; block.blocks)
612 				writeBlock(dst, b, links, settings);
613 			dst.put("</blockquote>\n");
614 			break;
615 	}
616 }
617 
618 private void writeMarkdownEscaped(R)(ref R dst, ref const Block block, in LinkRef[string] links, scope MarkdownSettings settings)
619 {
620 	auto lines = cast(string[])block.text;
621 	auto text = settings.flags & MarkdownFlags.keepLineBreaks ? lines.join("<br>") : lines.join("\n");
622 	writeMarkdownEscaped(dst, text, links, settings);
623 	if (lines.length) dst.put("\n");
624 }
625 
626 /// private
627 private void writeMarkdownEscaped(R)(ref R dst, string ln, in LinkRef[string] linkrefs, scope MarkdownSettings settings)
628 {
629 	string filterLink(string lnk, bool is_image) {
630 		return settings.urlFilter ? settings.urlFilter(lnk, is_image) : lnk;
631 	}
632 
633 	bool br = ln.endsWith("  ");
634 	while( ln.length > 0 ){
635 		switch( ln[0] ){
636 			default:
637 				dst.put(ln[0]);
638 				ln = ln[1 .. $];
639 				break;
640 			case '\\':
641 				if( ln.length >= 2 ){
642 					switch(ln[1]){
643 						default:
644 							dst.put(ln[0 .. 2]);
645 							ln = ln[2 .. $];
646 							break;
647 						case '\'', '`', '*', '_', '{', '}', '[', ']',
648 							'(', ')', '#', '+', '-', '.', '!':
649 							dst.put(ln[1]);
650 							ln = ln[2 .. $];
651 							break;
652 					}
653 				} else {
654 					dst.put(ln[0]);
655 					ln = ln[1 .. $];
656 				}
657 				break;
658 			case '_':
659 				if(settings.flags & MarkdownFlags.disableUnderscoreEmphasis)
660 				{
661 					dst.put(ln[0]);
662 					ln = ln[1 .. $];
663 					break;
664 				}
665 				goto case;
666 			case '*':
667 				string text;
668 				if( auto em = parseEmphasis(ln, text) ){
669 					dst.put(em == 1 ? "<em>" : em == 2 ? "<strong>" : "<strong><em>");
670 					filterHTMLEscape(dst, text, HTMLEscapeFlags.escapeMinimal);
671 					dst.put(em == 1 ? "</em>" : em == 2 ? "</strong>": "</em></strong>");
672 				} else {
673 					dst.put(ln[0]);
674 					ln = ln[1 .. $];
675 				}
676 				break;
677 			case '`':
678 				string code;
679 				if( parseInlineCode(ln, code) ){
680 					dst.put("<code class=\"prettyprint\">");
681 					if(settings.processCode is null)
682 					{
683 						filterHTMLEscape(dst, code, HTMLEscapeFlags.escapeMinimal);
684 					}
685 					else 
686 					{
687 						auto temp = appender!string();
688 						filterHTMLEscape(temp, code, HTMLEscapeFlags.escapeMinimal);
689 						dst.put(settings.processCode(temp.data));
690 					}
691 					dst.put("</code>");
692 				} else {
693 					dst.put(ln[0]);
694 					ln = ln[1 .. $];
695 				}
696 				break;
697 			case '[':
698 				Link link;
699 				if( parseLink(ln, link, linkrefs) ){
700 					dst.put("<a href=\"");
701 					filterHTMLAttribEscape(dst, filterLink(link.url, false));
702 					dst.put("\"");
703 					if( link.title.length ){
704 						dst.put(" title=\"");
705 						filterHTMLAttribEscape(dst, link.title);
706 						dst.put("\"");
707 					}
708 					dst.put(">");
709 					writeMarkdownEscaped(dst, link.text, linkrefs, settings);
710 					dst.put("</a>");
711 				} else {
712 					dst.put(ln[0]);
713 					ln = ln[1 .. $];
714 				}
715 				break;
716 			case '!':
717 				Link link;
718 				if( parseLink(ln, link, linkrefs) ){
719 					dst.put("<img src=\"");
720 					filterHTMLAttribEscape(dst, filterLink(link.url, true));
721 					dst.put("\" alt=\"");
722 					filterHTMLAttribEscape(dst, link.text);
723 					dst.put("\"");
724 					if( link.title.length ){
725 						dst.put(" title=\"");
726 						filterHTMLAttribEscape(dst, link.title);
727 						dst.put("\"");
728 					}
729 					dst.put(">");
730 				} else if( ln.length >= 2 ){
731 					dst.put(ln[0 .. 2]);
732 					ln = ln[2 .. $];
733 				} else {
734 					dst.put(ln[0]);
735 					ln = ln[1 .. $];
736 				}
737 				break;
738 			case '>':
739 				if( settings.flags & MarkdownFlags.noInlineHtml ) dst.put("&gt;");
740 				else dst.put(ln[0]);
741 				ln = ln[1 .. $];
742 				break;
743 			case '<':
744 				string url;
745 				if( parseAutoLink(ln, url) ){
746 					bool is_email = url.startsWith("mailto:");
747 					dst.put("<a href=\"");
748 					if( is_email ) filterHTMLAllEscape(dst, url);
749 					else filterHTMLAttribEscape(dst, filterLink(url, false));
750 					dst.put("\">");
751 					if( is_email ) filterHTMLAllEscape(dst, url[7 .. $]);
752 					else filterHTMLEscape(dst, url, HTMLEscapeFlags.escapeMinimal);
753 					dst.put("</a>");
754 				} else {
755 					if (ln.startsWith("<br>")) {
756 						// always support line breaks, since we embed them here ourselves!
757 						dst.put("<br/>");
758 						ln = ln[4 .. $];
759 					} else if(ln.startsWith("<br/>")) {
760 						dst.put("<br/>");
761 						ln = ln[5 .. $];
762 					} else {
763 						if( settings.flags & MarkdownFlags.noInlineHtml ) dst.put("&lt;");
764 						else dst.put(ln[0]);
765 						ln = ln[1 .. $];
766 					}
767 				}
768 				break;
769 		}
770 	}
771 	if( br ) dst.put("<br/>");
772 }
773 
774 private bool isLineBlank(string ln)
775 pure @safe {
776 	return allOf(ln, " \t");
777 }
778 
779 private bool isSetextHeaderLine(string ln, char subHeaderChar)
780 pure @safe {
781 	ln = stripLeft(ln);
782 	if( ln.length < 1 ) return false;
783 	if( ln[0] == '=' ){
784 		while(!ln.empty && ln.front == '=') ln.popFront();
785 		return allOf(ln, " \t");
786 	}
787 	if( ln[0] == subHeaderChar ){
788 		while(!ln.empty && ln.front == subHeaderChar) ln.popFront();
789 		return allOf(ln, " \t");
790 	}
791 	return false;
792 }
793 
794 private bool isAtxHeaderLine(string ln)
795 pure @safe {
796 	ln = stripLeft(ln);
797 	size_t i = 0;
798 	while( i < ln.length && ln[i] == '#' ) i++;
799 	if( i < 1 || i > 6 || i >= ln.length ) return false;
800 	return ln[i] == ' ';
801 }
802 
803 private bool isHlineLine(string ln)
804 pure @safe {
805 	if( allOf(ln, " -") && count(ln, '-') >= 3 ) return true;
806 	if( allOf(ln, " *") && count(ln, '*') >= 3 ) return true;
807 	if( allOf(ln, " _") && count(ln, '_') >= 3 ) return true;
808 	return false;
809 }
810 
811 private bool isQuoteLine(string ln)
812 pure @safe {
813 	return ln.stripLeft().startsWith(">");
814 }
815 
816 private size_t getQuoteLevel(string ln)
817 pure @safe {
818 	size_t level = 0;
819 	ln = stripLeft(ln);
820 	while( ln.length > 0 && ln[0] == '>' ){
821 		level++;
822 		ln = stripLeft(ln[1 .. $]);
823 	}
824 	return level;
825 }
826 
827 private bool isUListLine(string ln)
828 pure @safe {
829 	ln = stripLeft(ln);
830 	if (ln.length < 2) return false;
831 	if (!canFind("*+-", ln[0])) return false;
832 	if (ln[1] != ' ' && ln[1] != '\t') return false;
833 	return true;
834 }
835 
836 private bool isOListLine(string ln)
837 pure @safe {
838 	ln = stripLeft(ln);
839 	if( ln.length < 1 ) return false;
840 	if( ln[0] < '0' || ln[0] > '9' ) return false;
841 	ln = ln[1 .. $];
842 	while( ln.length > 0 && ln[0] >= '0' && ln[0] <= '9' )
843 		ln = ln[1 .. $];
844 	if( ln.length < 2 ) return false;
845 	if( ln[0] != '.' ) return false;
846 	if( ln[1] != ' ' && ln[1] != '\t' )
847 		return false;
848 	return true;
849 }
850 
851 private string removeListPrefix(string str, LineType tp)
852 pure @safe {
853 	switch(tp){
854 		default: assert(false);
855 		case LineType.OList: // skip bullets and output using normal escaping
856 			auto idx = str.indexOfCT('.');
857 			assert(idx > 0);
858 			return str[idx+1 .. $].stripLeft();
859 		case LineType.UList:
860 			return stripLeft(str.stripLeft()[1 .. $]);
861 	}
862 }
863 
864 
865 private auto parseHtmlBlockLine(string ln)
866 pure @safe {
867 	struct HtmlBlockInfo {
868 		bool isHtmlBlock;
869 		string tagName;
870 		bool open;
871 	}
872 
873 	HtmlBlockInfo ret;
874 	ret.isHtmlBlock = false;
875 	ret.open = true;
876 
877 	ln = strip(ln);
878 	if( ln.length < 3 ) return ret;
879 	if( ln[0] != '<' ) return ret;
880 	if( ln[1] == '/' ){
881 		ret.open = false;
882 		ln = ln[1 .. $];
883 	}
884 	if( !isAlpha(ln[1]) ) return ret;
885 	ln = ln[1 .. $];
886 	size_t idx = 0;
887 	while( idx < ln.length && ln[idx] != ' ' && ln[idx] != '>' )
888 		idx++;
889 	ret.tagName = ln[0 .. idx];
890 	ln = ln[idx .. $];
891 
892 	auto eidx = ln.indexOf('>');
893 	if( eidx < 0 ) return ret;
894 	if( eidx != ln.length-1 ) return ret;
895 
896 	if (!s_blockTags.canFind(ret.tagName)) return ret;
897 
898 	ret.isHtmlBlock = true;
899 	return ret;
900 }
901 
902 private bool isHtmlBlockLine(string ln)
903 pure @safe {
904 	auto bi = parseHtmlBlockLine(ln);
905 	return bi.isHtmlBlock && bi.open;
906 }
907 
908 private bool isHtmlBlockCloseLine(string ln)
909 pure @safe {
910 	auto bi = parseHtmlBlockLine(ln);
911 	return bi.isHtmlBlock && !bi.open;
912 }
913 
914 private bool isCodeBlockDelimiter(string ln)
915 pure @safe {
916 	return ln.startsWith("```");
917 }
918 
919 private string getHtmlTagName(string ln)
920 pure @safe {
921 	return parseHtmlBlockLine(ln).tagName;
922 }
923 
924 private bool isLineIndented(string ln)
925 pure @safe {
926 	return ln.startsWith("\t") || ln.startsWith("    ");
927 }
928 
929 private string unindentLine(string ln)
930 pure @safe {
931 	if( ln.startsWith("\t") ) return ln[1 .. $];
932 	if( ln.startsWith("    ") ) return ln[4 .. $];
933 	assert(false);
934 }
935 
936 private int parseEmphasis(ref string str, ref string text)
937 pure @safe {
938 	string pstr = str;
939 	if( pstr.length < 3 ) return false;
940 
941 	string ctag;
942 	if( pstr.startsWith("***") ) ctag = "***";
943 	else if( pstr.startsWith("**") ) ctag = "**";
944 	else if( pstr.startsWith("*") ) ctag = "*";
945 	else if( pstr.startsWith("___") ) ctag = "___";
946 	else if( pstr.startsWith("__") ) ctag = "__";
947 	else if( pstr.startsWith("_") ) ctag = "_";
948 	else return false;
949 
950 	pstr = pstr[ctag.length .. $];
951 
952 	auto cidx = () @trusted { return pstr.indexOf(ctag); }();
953 	if( cidx < 1 ) return false;
954 
955 	text = pstr[0 .. cidx];
956 
957 	str = pstr[cidx+ctag.length .. $];
958 	return cast(int)ctag.length;
959 }
960 
961 private bool parseInlineCode(ref string str, ref string code)
962 pure @safe {
963 	string pstr = str;
964 	if( pstr.length < 3 ) return false;
965 	string ctag;
966 	if( pstr.startsWith("``") ) ctag = "``";
967 	else if( pstr.startsWith("`") ) ctag = "`";
968 	else return false;
969 	pstr = pstr[ctag.length .. $];
970 
971 	auto cidx = () @trusted { return pstr.indexOf(ctag); }();
972 	if( cidx < 1 ) return false;
973 
974 	code = pstr[0 .. cidx];
975 	str = pstr[cidx+ctag.length .. $];
976 	return true;
977 }
978 
979 private bool parseLink(ref string str, ref Link dst, in LinkRef[string] linkrefs)
980 pure @safe {
981 	string pstr = str;
982 	if( pstr.length < 3 ) return false;
983 	// ignore img-link prefix
984 	if( pstr[0] == '!' ) pstr = pstr[1 .. $];
985 
986 	// parse the text part [text]
987 	if( pstr[0] != '[' ) return false;
988 	auto cidx = pstr.matchBracket();
989 	if( cidx < 1 ) return false;
990 	string refid;
991 	dst.text = pstr[1 .. cidx];
992 	pstr = pstr[cidx+1 .. $];
993 
994 	// parse either (link '['"title"']') or '[' ']'[refid]
995 	if( pstr.length < 2 ) return false;
996 	if( pstr[0] == '('){
997 		cidx = pstr.matchBracket();
998 		if( cidx < 1 ) return false;
999 		auto inner = pstr[1 .. cidx];
1000 		immutable qidx = inner.indexOfCT('"');
1001 		if( qidx > 1 && inner[qidx - 1].isWhite()){
1002 			dst.url = inner[0 .. qidx].stripRight();
1003 			immutable len = inner[qidx .. $].lastIndexOf('"');
1004 			if( len == 0 ) return false;
1005 			assert(len > 0);
1006 			dst.title = inner[qidx + 1 .. qidx + len];
1007 		} else {
1008 			dst.url = inner.stripRight();
1009 			dst.title = null;
1010 		}
1011 		if (dst.url.startsWith("<") && dst.url.endsWith(">"))
1012 			dst.url = dst.url[1 .. $-1];
1013 		pstr = pstr[cidx+1 .. $];
1014 	} else {
1015 		if( pstr[0] == ' ' ) pstr = pstr[1 .. $];
1016 		if( pstr[0] != '[' ) return false;
1017 		pstr = pstr[1 .. $];
1018 		cidx = pstr.indexOfCT(']');
1019 		if( cidx < 0 ) return false;
1020 		if( cidx == 0 ) refid = dst.text;
1021 		else refid = pstr[0 .. cidx];
1022 		pstr = pstr[cidx+1 .. $];
1023 	}
1024 
1025 
1026 	if( refid.length > 0 ){
1027 		auto pr = toLower(refid) in linkrefs;
1028 		if( !pr ){
1029 			// debug if (!__ctfe) logDebug("[LINK REF NOT FOUND: '%s'", refid);
1030 			return false;
1031 		}
1032 		dst.url = pr.url;
1033 		dst.title = pr.title;
1034 	}
1035 
1036 	str = pstr;
1037 	return true;
1038 }
1039 
1040 @safe unittest
1041 {
1042 	static void testLink(string s, Link exp, in LinkRef[string] refs)
1043 	{
1044 		Link link;
1045 		assert(parseLink(s, link, refs), s);
1046 		assert(link == exp);
1047 	}
1048 	LinkRef[string] refs;
1049 	refs["ref"] = LinkRef("ref", "target", "title");
1050 
1051 	testLink(`[link](target)`, Link("link", "target"), null);
1052 	testLink(`[link](target "title")`, Link("link", "target", "title"), null);
1053 	testLink(`[link](target  "title")`, Link("link", "target", "title"), null);
1054 	testLink(`[link](target "title"  )`, Link("link", "target", "title"), null);
1055 
1056 	testLink(`[link](target)`, Link("link", "target"), null);
1057 	testLink(`[link](target "title")`, Link("link", "target", "title"), null);
1058 
1059 	testLink(`[link][ref]`, Link("link", "target", "title"), refs);
1060 	testLink(`[ref][]`, Link("ref", "target", "title"), refs);
1061 
1062 	testLink(`[link[with brackets]](target)`, Link("link[with brackets]", "target"), null);
1063 	testLink(`[link[with brackets]][ref]`, Link("link[with brackets]", "target", "title"), refs);
1064 
1065 	testLink(`[link](/target with spaces )`, Link("link", "/target with spaces"), null);
1066 	testLink(`[link](/target with spaces "title")`, Link("link", "/target with spaces", "title"), null);
1067 
1068 	testLink(`[link](white-space  "around title" )`, Link("link", "white-space", "around title"), null);
1069 	testLink(`[link](tabs	"around title"	)`, Link("link", "tabs", "around title"), null);
1070 
1071 	testLink(`[link](target "")`, Link("link", "target", ""), null);
1072 	testLink(`[link](target-no-title"foo" )`, Link("link", "target-no-title\"foo\"", ""), null);
1073 
1074 	testLink(`[link](<target>)`, Link("link", "target"), null);
1075 
1076 	auto failing = [
1077 		`text`, `[link](target`, `[link]target)`, `[link]`,
1078 		`[link(target)`, `link](target)`, `[link] (target)`,
1079 		`[link][noref]`, `[noref][]`
1080 	];
1081 	Link link;
1082 	foreach (s; failing)
1083 		assert(!parseLink(s, link, refs), s);
1084 }
1085 
1086 private bool parseAutoLink(ref string str, ref string url)
1087 pure @safe {
1088 	string pstr = str;
1089 	if( pstr.length < 3 ) return false;
1090 	if( pstr[0] != '<' ) return false;
1091 	pstr = pstr[1 .. $];
1092 	auto cidx = pstr.indexOf('>');
1093 	if( cidx < 0 ) return false;
1094 	url = pstr[0 .. cidx];
1095 	if( anyOf(url, " \t") ) return false;
1096 	if( !anyOf(url, ":@") ) return false;
1097 	str = pstr[cidx+1 .. $];
1098 	if( url.indexOf('@') > 0 ) url = "mailto:"~url;
1099 	return true;
1100 }
1101 
1102 private LinkRef[string] scanForReferences(ref string[] lines)
1103 pure @safe {
1104 	LinkRef[string] ret;
1105 	bool[size_t] reflines;
1106 
1107 	// search for reference definitions:
1108 	//   [refid] link "opt text"
1109 	//   [refid] <link> "opt text"
1110 	//   "opt text", 'opt text', (opt text)
1111 	//   line must not be indented
1112 	foreach( lnidx, ln; lines ){
1113 		if( isLineIndented(ln) ) continue;
1114 		ln = strip(ln);
1115 		if( !ln.startsWith("[") ) continue;
1116 		ln = ln[1 .. $];
1117 
1118 		auto idx = () @trusted { return ln.indexOf("]:"); }();
1119 		if( idx < 0 ) continue;
1120 		string refid = ln[0 .. idx];
1121 		ln = stripLeft(ln[idx+2 .. $]);
1122 
1123 		string url;
1124 		if( ln.startsWith("<") ){
1125 			idx = ln.indexOfCT('>');
1126 			if( idx < 0 ) continue;
1127 			url = ln[1 .. idx];
1128 			ln = ln[idx+1 .. $];
1129 		} else {
1130 			idx = ln.indexOfCT(' ');
1131 			if( idx > 0 ){
1132 				url = ln[0 .. idx];
1133 				ln = ln[idx+1 .. $];
1134 			} else {
1135 				idx = ln.indexOfCT('\t');
1136 				if( idx < 0 ){
1137 					url = ln;
1138 					ln = ln[$ .. $];
1139 				} else {
1140 					url = ln[0 .. idx];
1141 					ln = ln[idx+1 .. $];
1142 				}
1143 			}
1144 		}
1145 		ln = stripLeft(ln);
1146 
1147 		string title;
1148 		if( ln.length >= 3 ){
1149 			if( ln[0] == '(' && ln[$-1] == ')' || ln[0] == '\"' && ln[$-1] == '\"' || ln[0] == '\'' && ln[$-1] == '\'' )
1150 				title = ln[1 .. $-1];
1151 		}
1152 
1153 		ret[toLower(refid)] = LinkRef(refid, url, title);
1154 		reflines[lnidx] = true;
1155 
1156 		// debug if (!__ctfe) logTrace("[detected ref on line %d]", lnidx+1);
1157 	}
1158 
1159 	// remove all lines containing references
1160 	auto nonreflines = appender!(string[])();
1161 	nonreflines.reserve(lines.length);
1162 	foreach( i, ln; lines )
1163 		if( i !in reflines )
1164 			nonreflines.put(ln);
1165 	lines = nonreflines.data();
1166 
1167 	return ret;
1168 }
1169 
1170 
1171 /**
1172 	Generates an identifier suitable to use as within a URL.
1173 
1174 	The resulting string will contain only ASCII lower case alphabetic or
1175 	numeric characters, as well as dashes (-). Every sequence of
1176 	non-alphanumeric characters will be replaced by a single dash. No dashes
1177 	will be at either the front or the back of the result string.
1178 */
1179 auto asSlug(R)(R text)
1180 	if (isInputRange!R && is(typeof(R.init.front) == dchar))
1181 {
1182 	static struct SlugRange {
1183 		private {
1184 			R _input;
1185 			bool _dash;
1186 		}
1187 
1188 		this(R input)
1189 		{
1190 			_input = input;
1191 			skipNonAlphaNum();
1192 		}
1193 
1194 		@property bool empty() const { return _dash ? false : _input.empty; }
1195 		@property char front() const {
1196 			if (_dash) return '-';
1197 
1198 			char r = cast(char)_input.front;
1199 			if (r >= 'A' && r <= 'Z') return cast(char)(r + ('a' - 'A'));
1200 			return r;
1201 		}
1202 
1203 		void popFront()
1204 		{
1205 			if (_dash) {
1206 				_dash = false;
1207 				return;
1208 			}
1209 
1210 			_input.popFront();
1211 			auto na = skipNonAlphaNum();
1212 			if (na && !_input.empty)
1213 				_dash = true;
1214 		}
1215 
1216 		private bool skipNonAlphaNum()
1217 		{
1218 			bool have_skipped = false;
1219 			while (!_input.empty) {
1220 				switch (_input.front) {
1221 					default:
1222 						_input.popFront();
1223 						have_skipped = true;
1224 						break;
1225 					case 'a': .. case 'z':
1226 					case 'A': .. case 'Z':
1227 					case '0': .. case '9':
1228 						return have_skipped;
1229 				}
1230 			}
1231 			return have_skipped;
1232 		}
1233 	}
1234 	return SlugRange(text);
1235 }
1236 
1237 unittest {
1238 	import std.algorithm : equal;
1239 	assert("".asSlug.equal(""));
1240 	assert(".,-".asSlug.equal(""));
1241 	assert("abc".asSlug.equal("abc"));
1242 	assert("aBc123".asSlug.equal("abc123"));
1243 	assert("....aBc...123...".asSlug.equal("abc-123"));
1244 }
1245 
1246 private struct LinkRef {
1247 	string id;
1248 	string url;
1249 	string title;
1250 }
1251 
1252 private struct Link {
1253 	string text;
1254 	string url;
1255 	string title;
1256 }
1257 
1258 @safe unittest { // alt and title attributes
1259 	assert(filterMarkdown("![alt](http://example.org/image)")
1260 		== "<p><img src=\"http://example.org/image\" alt=\"alt\">\n</p>\n");
1261 	assert(filterMarkdown("![alt](http://example.org/image \"Title\")")
1262 		== "<p><img src=\"http://example.org/image\" alt=\"alt\" title=\"Title\">\n</p>\n");
1263 }
1264 
1265 @safe unittest { // complex links
1266 	assert(filterMarkdown("their [install\ninstructions](<http://www.brew.sh>) and")
1267 		== "<p>their <a href=\"http://www.brew.sh\">install\ninstructions</a> and\n</p>\n");
1268 	assert(filterMarkdown("[![Build Status](https://travis-ci.org/rejectedsoftware/vibe.d.png)](https://travis-ci.org/rejectedsoftware/vibe.d)")
1269 		== "<p><a href=\"https://travis-ci.org/rejectedsoftware/vibe.d\"><img src=\"https://travis-ci.org/rejectedsoftware/vibe.d.png\" alt=\"Build Status\"></a>\n</p>\n");
1270 }
1271 
1272 @safe unittest { // check CTFE-ability
1273 	enum res = filterMarkdown("### some markdown\n[foo][]\n[foo]: /bar");
1274 	assert(res == "<h3 id=\"some-markdown\"> some markdown</h3>\n<p><a href=\"/bar\">foo</a>\n</p>\n", res);
1275 }
1276 
1277 @safe unittest { // correct line breaks in restrictive mode
1278 	auto res = filterMarkdown("hello\nworld", MarkdownFlags.forumDefault);
1279 	assert(res == "<p>hello<br/>world\n</p>\n", res);
1280 }
1281 
1282 /*@safe unittest { // code blocks and blockquotes
1283 	assert(filterMarkdown("\tthis\n\tis\n\tcode") ==
1284 		"<pre><code>this\nis\ncode</code></pre>\n");
1285 	assert(filterMarkdown("    this\n    is\n    code") ==
1286 		"<pre><code>this\nis\ncode</code></pre>\n");
1287 	assert(filterMarkdown("    this\n    is\n\tcode") ==
1288 		"<pre><code>this\nis</code></pre>\n<pre><code>code</code></pre>\n");
1289 	assert(filterMarkdown("\tthis\n\n\tcode") ==
1290 		"<pre><code>this\n\ncode</code></pre>\n");
1291 	assert(filterMarkdown("\t> this") ==
1292 		"<pre><code>&gt; this</code></pre>\n");
1293 	assert(filterMarkdown(">     this") ==
1294 		"<blockquote><pre><code>this</code></pre></blockquote>\n");
1295 	assert(filterMarkdown(">     this\n    is code") ==
1296 		"<blockquote><pre><code>this\nis code</code></pre></blockquote>\n");
1297 }*/