markus / MarkusAutoUpdate / src / NetSparkle / Libraries / MarkdownSharp.cs @ 38d69491
이력 | 보기 | 이력해설 | 다운로드 (69.7 KB)
1 | d8f5045e | taeseongkim | /* |
---|---|---|---|
2 | * MarkdownSharp |
||
3 | * ------------- |
||
4 | * a C# Markdown processor |
||
5 | * |
||
6 | * Markdown is a text-to-HTML conversion tool for web writers |
||
7 | * Copyright (c) 2004 John Gruber |
||
8 | * http://daringfireball.net/projects/markdown/ |
||
9 | * |
||
10 | * Markdown.NET |
||
11 | * Copyright (c) 2004-2009 Milan Negovan |
||
12 | * http://www.aspnetresources.com |
||
13 | * http://aspnetresources.com/blog/markdown_announced.aspx |
||
14 | * |
||
15 | * MarkdownSharp |
||
16 | * Copyright (c) 2009-2011 Jeff Atwood |
||
17 | * http://stackoverflow.com |
||
18 | * http://www.codinghorror.com/blog/ |
||
19 | * http://code.google.com/p/markdownsharp/ |
||
20 | * |
||
21 | * History: Milan ported the Markdown processor to C#. He granted license to me so I can open source it |
||
22 | * and let the community contribute to and improve MarkdownSharp. |
||
23 | * |
||
24 | */ |
||
25 | |||
26 | #region Copyright and license |
||
27 | |||
28 | /* |
||
29 | |||
30 | Copyright (c) 2009 - 2010 Jeff Atwood |
||
31 | |||
32 | http://www.opensource.org/licenses/mit-license.php |
||
33 | |||
34 | Permission is hereby granted, free of charge, to any person obtaining a copy |
||
35 | of this software and associated documentation files (the "Software"), to deal |
||
36 | in the Software without restriction, including without limitation the rights |
||
37 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||
38 | copies of the Software, and to permit persons to whom the Software is |
||
39 | furnished to do so, subject to the following conditions: |
||
40 | |||
41 | The above copyright notice and this permission notice shall be included in |
||
42 | all copies or substantial portions of the Software. |
||
43 | |||
44 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
45 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
46 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||
47 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
48 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||
49 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||
50 | THE SOFTWARE. |
||
51 | |||
52 | Copyright (c) 2003-2004 John Gruber |
||
53 | <http://daringfireball.net/> |
||
54 | All rights reserved. |
||
55 | |||
56 | Redistribution and use in source and binary forms, with or without |
||
57 | modification, are permitted provided that the following conditions are |
||
58 | met: |
||
59 | |||
60 | * Redistributions of source code must retain the above copyright notice, |
||
61 | this list of conditions and the following disclaimer. |
||
62 | |||
63 | * Redistributions in binary form must reproduce the above copyright |
||
64 | notice, this list of conditions and the following disclaimer in the |
||
65 | documentation and/or other materials provided with the distribution. |
||
66 | |||
67 | * Neither the name "Markdown" nor the names of its contributors may |
||
68 | be used to endorse or promote products derived from this software |
||
69 | without specific prior written permission. |
||
70 | |||
71 | This software is provided by the copyright holders and contributors "as |
||
72 | is" and any express or implied warranties, including, but not limited |
||
73 | to, the implied warranties of merchantability and fitness for a |
||
74 | particular purpose are disclaimed. In no event shall the copyright owner |
||
75 | or contributors be liable for any direct, indirect, incidental, special, |
||
76 | exemplary, or consequential damages (including, but not limited to, |
||
77 | procurement of substitute goods or services; loss of use, data, or |
||
78 | profits; or business interruption) however caused and on any theory of |
||
79 | liability, whether in contract, strict liability, or tort (including |
||
80 | negligence or otherwise) arising in any way out of the use of this |
||
81 | software, even if advised of the possibility of such damage. |
||
82 | */ |
||
83 | |||
84 | #endregion |
||
85 | |||
86 | using System; |
||
87 | using System.Collections.Generic; |
||
88 | using System.Configuration; |
||
89 | using System.Text; |
||
90 | using System.Text.RegularExpressions; |
||
91 | |||
92 | namespace MarkdownSharp |
||
93 | { |
||
94 | |||
95 | /// <summary> |
||
96 | /// |
||
97 | /// </summary> |
||
98 | public class MarkdownOptions |
||
99 | { |
||
100 | /// <summary> |
||
101 | /// when true, (most) bare plain URLs are auto-hyperlinked |
||
102 | /// WARNING: this is a significant deviation from the markdown spec |
||
103 | /// </summary> |
||
104 | public bool AutoHyperlink { get; set; } |
||
105 | /// <summary> |
||
106 | /// when true, RETURN becomes a literal newline |
||
107 | /// WARNING: this is a significant deviation from the markdown spec |
||
108 | /// </summary> |
||
109 | public bool AutoNewlines { get; set; } |
||
110 | /// <summary> |
||
111 | /// use ">" for HTML output, or " />" for XHTML output |
||
112 | /// </summary> |
||
113 | public string EmptyElementSuffix { get; set; } |
||
114 | /// <summary> |
||
115 | /// when true, problematic URL characters like [, ], (, and so forth will be encoded |
||
116 | /// WARNING: this is a significant deviation from the markdown spec |
||
117 | /// </summary> |
||
118 | public bool EncodeProblemUrlCharacters { get; set; } |
||
119 | /// <summary> |
||
120 | /// when false, email addresses will never be auto-linked |
||
121 | /// WARNING: this is a significant deviation from the markdown spec |
||
122 | /// </summary> |
||
123 | public bool LinkEmails { get; set; } |
||
124 | /// <summary> |
||
125 | /// when true, bold and italic require non-word characters on either side |
||
126 | /// WARNING: this is a significant deviation from the markdown spec |
||
127 | /// </summary> |
||
128 | public bool StrictBoldItalic { get; set; } |
||
129 | } |
||
130 | |||
131 | |||
132 | /// <summary> |
||
133 | /// Markdown is a text-to-HTML conversion tool for web writers. |
||
134 | /// Markdown allows you to write using an easy-to-read, easy-to-write plain text format, |
||
135 | /// then convert it to structurally valid XHTML (or HTML). |
||
136 | /// </summary> |
||
137 | public class Markdown |
||
138 | { |
||
139 | private const string _version = "1.13"; |
||
140 | |||
141 | #region Constructors and Options |
||
142 | |||
143 | /// <summary> |
||
144 | /// Create a new Markdown instance using default options |
||
145 | /// </summary> |
||
146 | public Markdown() |
||
147 | : this(false) |
||
148 | { |
||
149 | } |
||
150 | |||
151 | /// <summary> |
||
152 | /// Create a new Markdown instance and optionally load options from a configuration |
||
153 | /// file. There they should be stored in the appSettings section, available options are: |
||
154 | /// |
||
155 | /// Markdown.StrictBoldItalic (true/false) |
||
156 | /// Markdown.EmptyElementSuffix (">" or " />" without the quotes) |
||
157 | /// Markdown.LinkEmails (true/false) |
||
158 | /// Markdown.AutoNewLines (true/false) |
||
159 | /// Markdown.AutoHyperlink (true/false) |
||
160 | /// Markdown.EncodeProblemUrlCharacters (true/false) |
||
161 | /// |
||
162 | /// </summary> |
||
163 | public Markdown(bool loadOptionsFromConfigFile) |
||
164 | { |
||
165 | if (!loadOptionsFromConfigFile) return; |
||
166 | // |
||
167 | // var settings = ConfigurationManager.AppSettings; |
||
168 | // foreach (string key in settings.Keys) |
||
169 | // { |
||
170 | // switch (key) |
||
171 | // { |
||
172 | // case "Markdown.AutoHyperlink": |
||
173 | // _autoHyperlink = Convert.ToBoolean(settings[key]); |
||
174 | // break; |
||
175 | // case "Markdown.AutoNewlines": |
||
176 | // _autoNewlines = Convert.ToBoolean(settings[key]); |
||
177 | // break; |
||
178 | // case "Markdown.EmptyElementSuffix": |
||
179 | // _emptyElementSuffix = settings[key]; |
||
180 | // break; |
||
181 | // case "Markdown.EncodeProblemUrlCharacters": |
||
182 | // _encodeProblemUrlCharacters = Convert.ToBoolean(settings[key]); |
||
183 | // break; |
||
184 | // case "Markdown.LinkEmails": |
||
185 | // _linkEmails = Convert.ToBoolean(settings[key]); |
||
186 | // break; |
||
187 | // case "Markdown.StrictBoldItalic": |
||
188 | // _strictBoldItalic = Convert.ToBoolean(settings[key]); |
||
189 | // break; |
||
190 | // } |
||
191 | // } |
||
192 | } |
||
193 | |||
194 | /// <summary> |
||
195 | /// Create a new Markdown instance and set the options from the MarkdownOptions object. |
||
196 | /// </summary> |
||
197 | public Markdown(MarkdownOptions options) |
||
198 | { |
||
199 | _autoHyperlink = options.AutoHyperlink; |
||
200 | _autoNewlines = options.AutoNewlines; |
||
201 | _emptyElementSuffix = options.EmptyElementSuffix; |
||
202 | _encodeProblemUrlCharacters = options.EncodeProblemUrlCharacters; |
||
203 | _linkEmails = options.LinkEmails; |
||
204 | _strictBoldItalic = options.StrictBoldItalic; |
||
205 | } |
||
206 | |||
207 | |||
208 | /// <summary> |
||
209 | /// use ">" for HTML output, or " />" for XHTML output |
||
210 | /// </summary> |
||
211 | public string EmptyElementSuffix |
||
212 | { |
||
213 | get { return _emptyElementSuffix; } |
||
214 | set { _emptyElementSuffix = value; } |
||
215 | } |
||
216 | private string _emptyElementSuffix = " />"; |
||
217 | |||
218 | /// <summary> |
||
219 | /// when false, email addresses will never be auto-linked |
||
220 | /// WARNING: this is a significant deviation from the markdown spec |
||
221 | /// </summary> |
||
222 | public bool LinkEmails |
||
223 | { |
||
224 | get { return _linkEmails; } |
||
225 | set { _linkEmails = value; } |
||
226 | } |
||
227 | private bool _linkEmails = true; |
||
228 | |||
229 | /// <summary> |
||
230 | /// when true, bold and italic require non-word characters on either side |
||
231 | /// WARNING: this is a significant deviation from the markdown spec |
||
232 | /// </summary> |
||
233 | public bool StrictBoldItalic |
||
234 | { |
||
235 | get { return _strictBoldItalic; } |
||
236 | set { _strictBoldItalic = value; } |
||
237 | } |
||
238 | private bool _strictBoldItalic = false; |
||
239 | |||
240 | /// <summary> |
||
241 | /// when true, RETURN becomes a literal newline |
||
242 | /// WARNING: this is a significant deviation from the markdown spec |
||
243 | /// </summary> |
||
244 | public bool AutoNewLines |
||
245 | { |
||
246 | get { return _autoNewlines; } |
||
247 | set { _autoNewlines = value; } |
||
248 | } |
||
249 | private bool _autoNewlines = false; |
||
250 | |||
251 | /// <summary> |
||
252 | /// when true, (most) bare plain URLs are auto-hyperlinked |
||
253 | /// WARNING: this is a significant deviation from the markdown spec |
||
254 | /// </summary> |
||
255 | public bool AutoHyperlink |
||
256 | { |
||
257 | get { return _autoHyperlink; } |
||
258 | set { _autoHyperlink = value; } |
||
259 | } |
||
260 | private bool _autoHyperlink = false; |
||
261 | |||
262 | /// <summary> |
||
263 | /// when true, problematic URL characters like [, ], (, and so forth will be encoded |
||
264 | /// WARNING: this is a significant deviation from the markdown spec |
||
265 | /// </summary> |
||
266 | public bool EncodeProblemUrlCharacters |
||
267 | { |
||
268 | get { return _encodeProblemUrlCharacters; } |
||
269 | set { _encodeProblemUrlCharacters = value; } |
||
270 | } |
||
271 | private bool _encodeProblemUrlCharacters = false; |
||
272 | |||
273 | #endregion |
||
274 | |||
275 | private enum TokenType { Text, Tag } |
||
276 | |||
277 | private struct Token |
||
278 | { |
||
279 | public Token(TokenType type, string value) |
||
280 | { |
||
281 | this.Type = type; |
||
282 | this.Value = value; |
||
283 | } |
||
284 | public TokenType Type; |
||
285 | public string Value; |
||
286 | } |
||
287 | |||
288 | /// <summary> |
||
289 | /// maximum nested depth of [] and () supported by the transform; implementation detail |
||
290 | /// </summary> |
||
291 | private const int _nestDepth = 6; |
||
292 | |||
293 | /// <summary> |
||
294 | /// Tabs are automatically converted to spaces as part of the transform |
||
295 | /// this constant determines how "wide" those tabs become in spaces |
||
296 | /// </summary> |
||
297 | private const int _tabWidth = 4; |
||
298 | |||
299 | private const string _markerUL = @"[*+-]"; |
||
300 | private const string _markerOL = @"\d+[.]"; |
||
301 | |||
302 | private static readonly Dictionary<string, string> _escapeTable; |
||
303 | private static readonly Dictionary<string, string> _invertedEscapeTable; |
||
304 | private static readonly Dictionary<string, string> _backslashEscapeTable; |
||
305 | |||
306 | private readonly Dictionary<string, string> _urls = new Dictionary<string, string>(); |
||
307 | private readonly Dictionary<string, string> _titles = new Dictionary<string, string>(); |
||
308 | private readonly Dictionary<string, string> _htmlBlocks = new Dictionary<string, string>(); |
||
309 | |||
310 | private int _listLevel; |
||
311 | private static string AutoLinkPreventionMarker = "\x1AP"; // temporarily replaces "://" where auto-linking shouldn't happen; |
||
312 | |||
313 | /// <summary> |
||
314 | /// In the static constuctor we'll initialize what stays the same across all transforms. |
||
315 | /// </summary> |
||
316 | static Markdown() |
||
317 | { |
||
318 | // Table of hash values for escaped characters: |
||
319 | _escapeTable = new Dictionary<string, string>(); |
||
320 | _invertedEscapeTable = new Dictionary<string, string>(); |
||
321 | // Table of hash value for backslash escaped characters: |
||
322 | _backslashEscapeTable = new Dictionary<string, string>(); |
||
323 | |||
324 | string backslashPattern = ""; |
||
325 | |||
326 | foreach (char c in @"\`*_{}[]()>#+-.!/") |
||
327 | { |
||
328 | string key = c.ToString(); |
||
329 | string hash = GetHashKey(key, isHtmlBlock: false); |
||
330 | _escapeTable.Add(key, hash); |
||
331 | _invertedEscapeTable.Add(hash, key); |
||
332 | _backslashEscapeTable.Add(@"\" + key, hash); |
||
333 | backslashPattern += Regex.Escape(@"\" + key) + "|"; |
||
334 | } |
||
335 | |||
336 | _backslashEscapes = new Regex(backslashPattern.Substring(0, backslashPattern.Length - 1), RegexOptions.Compiled); |
||
337 | } |
||
338 | |||
339 | /// <summary> |
||
340 | /// current version of MarkdownSharp; |
||
341 | /// see http://code.google.com/p/markdownsharp/ for the latest code or to contribute |
||
342 | /// </summary> |
||
343 | public string Version |
||
344 | { |
||
345 | get { return _version; } |
||
346 | } |
||
347 | |||
348 | /// <summary> |
||
349 | /// Transforms the provided Markdown-formatted text to HTML; |
||
350 | /// see http://en.wikipedia.org/wiki/Markdown |
||
351 | /// </summary> |
||
352 | /// <remarks> |
||
353 | /// The order in which other subs are called here is |
||
354 | /// essential. Link and image substitutions need to happen before |
||
355 | /// EscapeSpecialChars(), so that any *'s or _'s in the a |
||
356 | /// and img tags get encoded. |
||
357 | /// </remarks> |
||
358 | public string Transform(string text) |
||
359 | { |
||
360 | if (String.IsNullOrEmpty(text)) return ""; |
||
361 | |||
362 | Setup(); |
||
363 | |||
364 | text = Normalize(text); |
||
365 | |||
366 | text = HashHTMLBlocks(text); |
||
367 | text = StripLinkDefinitions(text); |
||
368 | text = RunBlockGamut(text); |
||
369 | text = Unescape(text); |
||
370 | |||
371 | Cleanup(); |
||
372 | |||
373 | return text + "\n"; |
||
374 | } |
||
375 | |||
376 | |||
377 | /// <summary> |
||
378 | /// Perform transformations that form block-level tags like paragraphs, headers, and list items. |
||
379 | /// </summary> |
||
380 | private string RunBlockGamut(string text, bool unhash = true) |
||
381 | { |
||
382 | text = DoHeaders(text); |
||
383 | text = DoHorizontalRules(text); |
||
384 | text = DoLists(text); |
||
385 | text = DoCodeBlocks(text); |
||
386 | text = DoBlockQuotes(text); |
||
387 | |||
388 | // We already ran HashHTMLBlocks() before, in Markdown(), but that |
||
389 | // was to escape raw HTML in the original Markdown source. This time, |
||
390 | // we're escaping the markup we've just created, so that we don't wrap |
||
391 | // <p> tags around block-level tags. |
||
392 | text = HashHTMLBlocks(text); |
||
393 | |||
394 | text = FormParagraphs(text, unhash: unhash); |
||
395 | |||
396 | return text; |
||
397 | } |
||
398 | |||
399 | |||
400 | /// <summary> |
||
401 | /// Perform transformations that occur *within* block-level tags like paragraphs, headers, and list items. |
||
402 | /// </summary> |
||
403 | private string RunSpanGamut(string text) |
||
404 | { |
||
405 | text = DoCodeSpans(text); |
||
406 | text = EscapeSpecialCharsWithinTagAttributes(text); |
||
407 | text = EscapeBackslashes(text); |
||
408 | |||
409 | // Images must come first, because ![foo][f] looks like an anchor. |
||
410 | text = DoImages(text); |
||
411 | text = DoAnchors(text); |
||
412 | |||
413 | // Must come after DoAnchors(), because you can use < and > |
||
414 | // delimiters in inline links like [this](<url>). |
||
415 | text = DoAutoLinks(text); |
||
416 | |||
417 | text = text.Replace(AutoLinkPreventionMarker, "://"); |
||
418 | |||
419 | text = EncodeAmpsAndAngles(text); |
||
420 | text = DoItalicsAndBold(text); |
||
421 | text = DoHardBreaks(text); |
||
422 | |||
423 | return text; |
||
424 | } |
||
425 | |||
426 | private static Regex _newlinesLeadingTrailing = new Regex(@"^\n+|\n+\z", RegexOptions.Compiled); |
||
427 | private static Regex _newlinesMultiple = new Regex(@"\n{2,}", RegexOptions.Compiled); |
||
428 | private static Regex _leadingWhitespace = new Regex(@"^[ ]*", RegexOptions.Compiled); |
||
429 | |||
430 | private static Regex _htmlBlockHash = new Regex("\x1AH\\d+H", RegexOptions.Compiled); |
||
431 | |||
432 | /// <summary> |
||
433 | /// splits on two or more newlines, to form "paragraphs"; |
||
434 | /// each paragraph is then unhashed (if it is a hash and unhashing isn't turned off) or wrapped in HTML p tag |
||
435 | /// </summary> |
||
436 | private string FormParagraphs(string text, bool unhash = true) |
||
437 | { |
||
438 | // split on two or more newlines |
||
439 | string[] grafs = _newlinesMultiple.Split(_newlinesLeadingTrailing.Replace(text, "")); |
||
440 | |||
441 | for (int i = 0; i < grafs.Length; i++) |
||
442 | { |
||
443 | if (grafs[i].StartsWith("\x1AH")) |
||
444 | { |
||
445 | // unhashify HTML blocks |
||
446 | if (unhash) |
||
447 | { |
||
448 | int sanityCheck = 50; // just for safety, guard against an infinite loop |
||
449 | bool keepGoing = true; // as long as replacements where made, keep going |
||
450 | while (keepGoing && sanityCheck > 0) |
||
451 | { |
||
452 | keepGoing = false; |
||
453 | grafs[i] = _htmlBlockHash.Replace(grafs[i], match => |
||
454 | { |
||
455 | keepGoing = true; |
||
456 | return _htmlBlocks[match.Value]; |
||
457 | }); |
||
458 | sanityCheck--; |
||
459 | } |
||
460 | /* if (keepGoing) |
||
461 | { |
||
462 | // Logging of an infinite loop goes here. |
||
463 | // If such a thing should happen, please open a new issue on http://code.google.com/p/markdownsharp/ |
||
464 | // with the input that caused it. |
||
465 | }*/ |
||
466 | } |
||
467 | } |
||
468 | else |
||
469 | { |
||
470 | // do span level processing inside the block, then wrap result in <p> tags |
||
471 | grafs[i] = _leadingWhitespace.Replace(RunSpanGamut(grafs[i]), "<p>") + "</p>"; |
||
472 | } |
||
473 | } |
||
474 | |||
475 | return string.Join("\n\n", grafs); |
||
476 | } |
||
477 | |||
478 | |||
479 | private void Setup() |
||
480 | { |
||
481 | // Clear the global hashes. If we don't clear these, you get conflicts |
||
482 | // from other articles when generating a page which contains more than |
||
483 | // one article (e.g. an index page that shows the N most recent |
||
484 | // articles): |
||
485 | _urls.Clear(); |
||
486 | _titles.Clear(); |
||
487 | _htmlBlocks.Clear(); |
||
488 | _listLevel = 0; |
||
489 | } |
||
490 | |||
491 | private void Cleanup() |
||
492 | { |
||
493 | Setup(); |
||
494 | } |
||
495 | |||
496 | private static string _nestedBracketsPattern; |
||
497 | |||
498 | /// <summary> |
||
499 | /// Reusable pattern to match balanced [brackets]. See Friedl's |
||
500 | /// "Mastering Regular Expressions", 2nd Ed., pp. 328-331. |
||
501 | /// </summary> |
||
502 | private static string GetNestedBracketsPattern() |
||
503 | { |
||
504 | // in other words [this] and [this[also]] and [this[also[too]]] |
||
505 | // up to _nestDepth |
||
506 | if (_nestedBracketsPattern == null) |
||
507 | _nestedBracketsPattern = |
||
508 | RepeatString(@" |
||
509 | (?> # Atomic matching |
||
510 | [^\[\]]+ # Anything other than brackets |
||
511 | | |
||
512 | \[ |
||
513 | ", _nestDepth) + RepeatString( |
||
514 | @" \] |
||
515 | )*" |
||
516 | , _nestDepth); |
||
517 | return _nestedBracketsPattern; |
||
518 | } |
||
519 | |||
520 | private static string _nestedParensPattern; |
||
521 | |||
522 | /// <summary> |
||
523 | /// Reusable pattern to match balanced (parens). See Friedl's |
||
524 | /// "Mastering Regular Expressions", 2nd Ed., pp. 328-331. |
||
525 | /// </summary> |
||
526 | private static string GetNestedParensPattern() |
||
527 | { |
||
528 | // in other words (this) and (this(also)) and (this(also(too))) |
||
529 | // up to _nestDepth |
||
530 | if (_nestedParensPattern == null) |
||
531 | _nestedParensPattern = |
||
532 | RepeatString(@" |
||
533 | (?> # Atomic matching |
||
534 | [^()\s]+ # Anything other than parens or whitespace |
||
535 | | |
||
536 | \( |
||
537 | ", _nestDepth) + RepeatString( |
||
538 | @" \) |
||
539 | )*" |
||
540 | , _nestDepth); |
||
541 | return _nestedParensPattern; |
||
542 | } |
||
543 | |||
544 | private static Regex _linkDef = new Regex(string.Format(@" |
||
545 | ^[ ]{{0,{0}}}\[(.+)\]: # id = $1 |
||
546 | [ ]* |
||
547 | \n? # maybe *one* newline |
||
548 | [ ]* |
||
549 | <?(\S+?)>? # url = $2 |
||
550 | [ ]* |
||
551 | \n? # maybe one newline |
||
552 | [ ]* |
||
553 | (?: |
||
554 | (?<=\s) # lookbehind for whitespace |
||
555 | [""(] |
||
556 | (.+?) # title = $3 |
||
557 | ["")] |
||
558 | [ ]* |
||
559 | )? # title is optional |
||
560 | (?:\n+|\Z)", _tabWidth - 1), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); |
||
561 | |||
562 | /// <summary> |
||
563 | /// Strips link definitions from text, stores the URLs and titles in hash references. |
||
564 | /// </summary> |
||
565 | /// <remarks> |
||
566 | /// ^[id]: url "optional title" |
||
567 | /// </remarks> |
||
568 | private string StripLinkDefinitions(string text) |
||
569 | { |
||
570 | return _linkDef.Replace(text, new MatchEvaluator(LinkEvaluator)); |
||
571 | } |
||
572 | |||
573 | private string LinkEvaluator(Match match) |
||
574 | { |
||
575 | string linkID = match.Groups[1].Value.ToLowerInvariant(); |
||
576 | _urls[linkID] = EncodeAmpsAndAngles(match.Groups[2].Value); |
||
577 | |||
578 | if (match.Groups[3] != null && match.Groups[3].Length > 0) |
||
579 | _titles[linkID] = match.Groups[3].Value.Replace("\"", """); |
||
580 | |||
581 | return ""; |
||
582 | } |
||
583 | |||
584 | // compiling this monster regex results in worse performance. trust me. |
||
585 | private static Regex _blocksHtml = new Regex(GetBlockPattern(), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace); |
||
586 | |||
587 | |||
588 | /// <summary> |
||
589 | /// derived pretty much verbatim from PHP Markdown |
||
590 | /// </summary> |
||
591 | private static string GetBlockPattern() |
||
592 | { |
||
593 | |||
594 | // Hashify HTML blocks: |
||
595 | // We only want to do this for block-level HTML tags, such as headers, |
||
596 | // lists, and tables. That's because we still want to wrap <p>s around |
||
597 | // "paragraphs" that are wrapped in non-block-level tags, such as anchors, |
||
598 | // phrase emphasis, and spans. The list of tags we're looking for is |
||
599 | // hard-coded: |
||
600 | // |
||
601 | // * List "a" is made of tags which can be both inline or block-level. |
||
602 | // These will be treated block-level when the start tag is alone on |
||
603 | // its line, otherwise they're not matched here and will be taken as |
||
604 | // inline later. |
||
605 | // * List "b" is made of tags which are always block-level; |
||
606 | // |
||
607 | string blockTagsA = "ins|del"; |
||
608 | string blockTagsB = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|script|noscript|form|fieldset|iframe|math"; |
||
609 | |||
610 | // Regular expression for the content of a block tag. |
||
611 | string attr = @" |
||
612 | (?> # optional tag attributes |
||
613 | \s # starts with whitespace |
||
614 | (?> |
||
615 | [^>""/]+ # text outside quotes |
||
616 | | |
||
617 | /+(?!>) # slash not followed by > |
||
618 | | |
||
619 | ""[^""]*"" # text inside double quotes (tolerate >) |
||
620 | | |
||
621 | '[^']*' # text inside single quotes (tolerate >) |
||
622 | )* |
||
623 | )? |
||
624 | "; |
||
625 | |||
626 | string content = RepeatString(@" |
||
627 | (?> |
||
628 | [^<]+ # content without tag |
||
629 | | |
||
630 | <\2 # nested opening tag |
||
631 | " + attr + @" # attributes |
||
632 | (?> |
||
633 | /> |
||
634 | | |
||
635 | >", _nestDepth) + // end of opening tag |
||
636 | ".*?" + // last level nested tag content |
||
637 | RepeatString(@" |
||
638 | </\2\s*> # closing nested tag |
||
639 | ) |
||
640 | | |
||
641 | <(?!/\2\s*> # other tags with a different name |
||
642 | ) |
||
643 | )*", _nestDepth); |
||
644 | |||
645 | string content2 = content.Replace(@"\2", @"\3"); |
||
646 | |||
647 | // First, look for nested blocks, e.g.: |
||
648 | // <div> |
||
649 | // <div> |
||
650 | // tags for inner block must be indented. |
||
651 | // </div> |
||
652 | // </div> |
||
653 | // |
||
654 | // The outermost tags must start at the left margin for this to match, and |
||
655 | // the inner nested divs must be indented. |
||
656 | // We need to do this before the next, more liberal match, because the next |
||
657 | // match will start at the first `<div>` and stop at the first `</div>`. |
||
658 | string pattern = @" |
||
659 | (?> |
||
660 | (?> |
||
661 | (?<=\n) # Starting at the beginning of a line |
||
662 | | # or |
||
663 | \A\n? # the beginning of the doc |
||
664 | ) |
||
665 | ( # save in $1 |
||
666 | |||
667 | # Match from `\n<tag>` to `</tag>\n`, handling nested tags |
||
668 | # in between. |
||
669 | |||
670 | <($block_tags_b_re) # start tag = $2 |
||
671 | $attr> # attributes followed by > and \n |
||
672 | $content # content, support nesting |
||
673 | </\2> # the matching end tag |
||
674 | [ ]* # trailing spaces |
||
675 | (?=\n+|\Z) # followed by a newline or end of document |
||
676 | |||
677 | | # Special version for tags of group a. |
||
678 | |||
679 | <($block_tags_a_re) # start tag = $3 |
||
680 | $attr>[ ]*\n # attributes followed by > |
||
681 | $content2 # content, support nesting |
||
682 | </\3> # the matching end tag |
||
683 | [ ]* # trailing spaces |
||
684 | (?=\n+|\Z) # followed by a newline or end of document |
||
685 | |||
686 | | # Special case just for <hr />. It was easier to make a special |
||
687 | # case than to make the other regex more complicated. |
||
688 | |||
689 | [ ]{0,$less_than_tab} |
||
690 | <hr |
||
691 | $attr # attributes |
||
692 | /?> # the matching end tag |
||
693 | [ ]* |
||
694 | (?=\n{2,}|\Z) # followed by a blank line or end of document |
||
695 | |||
696 | | # Special case for standalone HTML comments: |
||
697 | |||
698 | (?<=\n\n|\A) # preceded by a blank line or start of document |
||
699 | [ ]{0,$less_than_tab} |
||
700 | (?s: |
||
701 | <!--(?:|(?:[^>-]|-[^>])(?:[^-]|-[^-])*)--> |
||
702 | ) |
||
703 | [ ]* |
||
704 | (?=\n{2,}|\Z) # followed by a blank line or end of document |
||
705 | |||
706 | | # PHP and ASP-style processor instructions (<? and <%) |
||
707 | |||
708 | [ ]{0,$less_than_tab} |
||
709 | (?s: |
||
710 | <([?%]) # $4 |
||
711 | .*? |
||
712 | \4> |
||
713 | ) |
||
714 | [ ]* |
||
715 | (?=\n{2,}|\Z) # followed by a blank line or end of document |
||
716 | |||
717 | ) |
||
718 | )"; |
||
719 | |||
720 | pattern = pattern.Replace("$less_than_tab", (_tabWidth - 1).ToString()); |
||
721 | pattern = pattern.Replace("$block_tags_b_re", blockTagsB); |
||
722 | pattern = pattern.Replace("$block_tags_a_re", blockTagsA); |
||
723 | pattern = pattern.Replace("$attr", attr); |
||
724 | pattern = pattern.Replace("$content2", content2); |
||
725 | pattern = pattern.Replace("$content", content); |
||
726 | |||
727 | return pattern; |
||
728 | } |
||
729 | |||
730 | /// <summary> |
||
731 | /// replaces any block-level HTML blocks with hash entries |
||
732 | /// </summary> |
||
733 | private string HashHTMLBlocks(string text) |
||
734 | { |
||
735 | return _blocksHtml.Replace(text, new MatchEvaluator(HtmlEvaluator)); |
||
736 | } |
||
737 | |||
738 | private string HtmlEvaluator(Match match) |
||
739 | { |
||
740 | string text = match.Groups[1].Value; |
||
741 | string key = GetHashKey(text, isHtmlBlock: true); |
||
742 | _htmlBlocks[key] = text; |
||
743 | |||
744 | return string.Concat("\n\n", key, "\n\n"); |
||
745 | } |
||
746 | |||
747 | private static string GetHashKey(string s, bool isHtmlBlock) |
||
748 | { |
||
749 | var delim = isHtmlBlock ? 'H' : 'E'; |
||
750 | return "\x1A" + delim + Math.Abs(s.GetHashCode()).ToString() + delim; |
||
751 | } |
||
752 | |||
753 | private static Regex _htmlTokens = new Regex(@" |
||
754 | (<!--(?:|(?:[^>-]|-[^>])(?:[^-]|-[^-])*)-->)| # match <!-- foo --> |
||
755 | (<\?.*?\?>)| # match <?foo?> " + |
||
756 | RepeatString(@" |
||
757 | (<[A-Za-z\/!$](?:[^<>]|", _nestDepth) + RepeatString(@")*>)", _nestDepth) + |
||
758 | " # match <tag> and </tag>", |
||
759 | RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); |
||
760 | |||
761 | /// <summary> |
||
762 | /// returns an array of HTML tokens comprising the input string. Each token is |
||
763 | /// either a tag (possibly with nested, tags contained therein, such |
||
764 | /// as <a href="<MTFoo>">, or a run of text between tags. Each element of the |
||
765 | /// array is a two-element array; the first is either 'tag' or 'text'; the second is |
||
766 | /// the actual value. |
||
767 | /// </summary> |
||
768 | private List<Token> TokenizeHTML(string text) |
||
769 | { |
||
770 | int pos = 0; |
||
771 | int tagStart = 0; |
||
772 | var tokens = new List<Token>(); |
||
773 | |||
774 | // this regex is derived from the _tokenize() subroutine in Brad Choate's MTRegex plugin. |
||
775 | // http://www.bradchoate.com/past/mtregex.php |
||
776 | foreach (Match m in _htmlTokens.Matches(text)) |
||
777 | { |
||
778 | tagStart = m.Index; |
||
779 | |||
780 | if (pos < tagStart) |
||
781 | tokens.Add(new Token(TokenType.Text, text.Substring(pos, tagStart - pos))); |
||
782 | |||
783 | tokens.Add(new Token(TokenType.Tag, m.Value)); |
||
784 | pos = tagStart + m.Length; |
||
785 | } |
||
786 | |||
787 | if (pos < text.Length) |
||
788 | tokens.Add(new Token(TokenType.Text, text.Substring(pos, text.Length - pos))); |
||
789 | |||
790 | return tokens; |
||
791 | } |
||
792 | |||
793 | |||
794 | private static Regex _anchorRef = new Regex(string.Format(@" |
||
795 | ( # wrap whole match in $1 |
||
796 | \[ |
||
797 | ({0}) # link text = $2 |
||
798 | \] |
||
799 | |||
800 | [ ]? # one optional space |
||
801 | (?:\n[ ]*)? # one optional newline followed by spaces |
||
802 | |||
803 | \[ |
||
804 | (.*?) # id = $3 |
||
805 | \] |
||
806 | )", GetNestedBracketsPattern()), RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); |
||
807 | |||
808 | private static Regex _anchorInline = new Regex(string.Format(@" |
||
809 | ( # wrap whole match in $1 |
||
810 | \[ |
||
811 | ({0}) # link text = $2 |
||
812 | \] |
||
813 | \( # literal paren |
||
814 | [ ]* |
||
815 | ({1}) # href = $3 |
||
816 | [ ]* |
||
817 | ( # $4 |
||
818 | (['""]) # quote char = $5 |
||
819 | (.*?) # title = $6 |
||
820 | \5 # matching quote |
||
821 | [ ]* # ignore any spaces between closing quote and ) |
||
822 | )? # title is optional |
||
823 | \) |
||
824 | )", GetNestedBracketsPattern(), GetNestedParensPattern()), |
||
825 | RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); |
||
826 | |||
827 | private static Regex _anchorRefShortcut = new Regex(@" |
||
828 | ( # wrap whole match in $1 |
||
829 | \[ |
||
830 | ([^\[\]]+) # link text = $2; can't contain [ or ] |
||
831 | \] |
||
832 | )", RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); |
||
833 | |||
834 | /// <summary> |
||
835 | /// Turn Markdown link shortcuts into HTML anchor tags |
||
836 | /// </summary> |
||
837 | /// <remarks> |
||
838 | /// [link text](url "title") |
||
839 | /// [link text][id] |
||
840 | /// [id] |
||
841 | /// </remarks> |
||
842 | private string DoAnchors(string text) |
||
843 | { |
||
844 | // First, handle reference-style links: [link text] [id] |
||
845 | text = _anchorRef.Replace(text, new MatchEvaluator(AnchorRefEvaluator)); |
||
846 | |||
847 | // Next, inline-style links: [link text](url "optional title") or [link text](url "optional title") |
||
848 | text = _anchorInline.Replace(text, new MatchEvaluator(AnchorInlineEvaluator)); |
||
849 | |||
850 | // Last, handle reference-style shortcuts: [link text] |
||
851 | // These must come last in case you've also got [link test][1] |
||
852 | // or [link test](/foo) |
||
853 | text = _anchorRefShortcut.Replace(text, new MatchEvaluator(AnchorRefShortcutEvaluator)); |
||
854 | return text; |
||
855 | } |
||
856 | |||
857 | private string SaveFromAutoLinking(string s) |
||
858 | { |
||
859 | return s.Replace("://", AutoLinkPreventionMarker); |
||
860 | } |
||
861 | |||
862 | private string AnchorRefEvaluator(Match match) |
||
863 | { |
||
864 | string wholeMatch = match.Groups[1].Value; |
||
865 | string linkText = SaveFromAutoLinking(match.Groups[2].Value); |
||
866 | string linkID = match.Groups[3].Value.ToLowerInvariant(); |
||
867 | |||
868 | string result; |
||
869 | |||
870 | // for shortcut links like [this][]. |
||
871 | if (linkID == "") |
||
872 | linkID = linkText.ToLowerInvariant(); |
||
873 | |||
874 | if (_urls.ContainsKey(linkID)) |
||
875 | { |
||
876 | string url = _urls[linkID]; |
||
877 | |||
878 | url = EncodeProblemUrlChars(url); |
||
879 | url = EscapeBoldItalic(url); |
||
880 | result = "<a href=\"" + url + "\""; |
||
881 | |||
882 | if (_titles.ContainsKey(linkID)) |
||
883 | { |
||
884 | string title = AttributeEncode(_titles[linkID]); |
||
885 | title = AttributeEncode(EscapeBoldItalic(title)); |
||
886 | result += " title=\"" + title + "\""; |
||
887 | } |
||
888 | |||
889 | result += ">" + linkText + "</a>"; |
||
890 | } |
||
891 | else |
||
892 | result = wholeMatch; |
||
893 | |||
894 | return result; |
||
895 | } |
||
896 | |||
897 | private string AnchorRefShortcutEvaluator(Match match) |
||
898 | { |
||
899 | string wholeMatch = match.Groups[1].Value; |
||
900 | string linkText = SaveFromAutoLinking(match.Groups[2].Value); |
||
901 | string linkID = Regex.Replace(linkText.ToLowerInvariant(), @"[ ]*\n[ ]*", " "); // lower case and remove newlines / extra spaces |
||
902 | |||
903 | string result; |
||
904 | |||
905 | if (_urls.ContainsKey(linkID)) |
||
906 | { |
||
907 | string url = _urls[linkID]; |
||
908 | |||
909 | url = EncodeProblemUrlChars(url); |
||
910 | url = EscapeBoldItalic(url); |
||
911 | result = "<a href=\"" + url + "\""; |
||
912 | |||
913 | if (_titles.ContainsKey(linkID)) |
||
914 | { |
||
915 | string title = AttributeEncode(_titles[linkID]); |
||
916 | title = EscapeBoldItalic(title); |
||
917 | result += " title=\"" + title + "\""; |
||
918 | } |
||
919 | |||
920 | result += ">" + linkText + "</a>"; |
||
921 | } |
||
922 | else |
||
923 | result = wholeMatch; |
||
924 | |||
925 | return result; |
||
926 | } |
||
927 | |||
928 | |||
929 | private string AnchorInlineEvaluator(Match match) |
||
930 | { |
||
931 | string linkText = SaveFromAutoLinking(match.Groups[2].Value); |
||
932 | string url = match.Groups[3].Value; |
||
933 | string title = match.Groups[6].Value; |
||
934 | string result; |
||
935 | |||
936 | url = EncodeProblemUrlChars(url); |
||
937 | url = EscapeBoldItalic(url); |
||
938 | if (url.StartsWith("<") && url.EndsWith(">")) |
||
939 | url = url.Substring(1, url.Length - 2); // remove <>'s surrounding URL, if present |
||
940 | |||
941 | result = string.Format("<a href=\"{0}\"", url); |
||
942 | |||
943 | if (!String.IsNullOrEmpty(title)) |
||
944 | { |
||
945 | title = AttributeEncode(title); |
||
946 | title = EscapeBoldItalic(title); |
||
947 | result += string.Format(" title=\"{0}\"", title); |
||
948 | } |
||
949 | |||
950 | result += string.Format(">{0}</a>", linkText); |
||
951 | return result; |
||
952 | } |
||
953 | |||
954 | private static Regex _imagesRef = new Regex(@" |
||
955 | ( # wrap whole match in $1 |
||
956 | !\[ |
||
957 | (.*?) # alt text = $2 |
||
958 | \] |
||
959 | |||
960 | [ ]? # one optional space |
||
961 | (?:\n[ ]*)? # one optional newline followed by spaces |
||
962 | |||
963 | \[ |
||
964 | (.*?) # id = $3 |
||
965 | \] |
||
966 | |||
967 | )", RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); |
||
968 | |||
969 | private static Regex _imagesInline = new Regex(String.Format(@" |
||
970 | ( # wrap whole match in $1 |
||
971 | !\[ |
||
972 | (.*?) # alt text = $2 |
||
973 | \] |
||
974 | \s? # one optional whitespace character |
||
975 | \( # literal paren |
||
976 | [ ]* |
||
977 | ({0}) # href = $3 |
||
978 | [ ]* |
||
979 | ( # $4 |
||
980 | (['""]) # quote char = $5 |
||
981 | (.*?) # title = $6 |
||
982 | \5 # matching quote |
||
983 | [ ]* |
||
984 | )? # title is optional |
||
985 | \) |
||
986 | )", GetNestedParensPattern()), |
||
987 | RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); |
||
988 | |||
989 | /// <summary> |
||
990 | /// Turn Markdown image shortcuts into HTML img tags. |
||
991 | /// </summary> |
||
992 | /// <remarks> |
||
993 | /// ![alt text][id] |
||
994 | /// ![alt text](url "optional title") |
||
995 | /// </remarks> |
||
996 | private string DoImages(string text) |
||
997 | { |
||
998 | // First, handle reference-style labeled images: ![alt text][id] |
||
999 | text = _imagesRef.Replace(text, new MatchEvaluator(ImageReferenceEvaluator)); |
||
1000 | |||
1001 | // Next, handle inline images: ![alt text](url "optional title") |
||
1002 | // Don't forget: encode * and _ |
||
1003 | text = _imagesInline.Replace(text, new MatchEvaluator(ImageInlineEvaluator)); |
||
1004 | |||
1005 | return text; |
||
1006 | } |
||
1007 | |||
1008 | // This prevents the creation of horribly broken HTML when some syntax ambiguities |
||
1009 | // collide. It likely still doesn't do what the user meant, but at least we're not |
||
1010 | // outputting garbage. |
||
1011 | private string EscapeImageAltText(string s) |
||
1012 | { |
||
1013 | s = EscapeBoldItalic(s); |
||
1014 | s = Regex.Replace(s, @"[\[\]()]", m => _escapeTable[m.ToString()]); |
||
1015 | return s; |
||
1016 | } |
||
1017 | |||
1018 | private string ImageReferenceEvaluator(Match match) |
||
1019 | { |
||
1020 | string wholeMatch = match.Groups[1].Value; |
||
1021 | string altText = match.Groups[2].Value; |
||
1022 | string linkID = match.Groups[3].Value.ToLowerInvariant(); |
||
1023 | |||
1024 | // for shortcut links like ![this][]. |
||
1025 | if (linkID == "") |
||
1026 | linkID = altText.ToLowerInvariant(); |
||
1027 | |||
1028 | if (_urls.ContainsKey(linkID)) |
||
1029 | { |
||
1030 | string url = _urls[linkID]; |
||
1031 | string title = null; |
||
1032 | |||
1033 | if (_titles.ContainsKey(linkID)) |
||
1034 | title = _titles[linkID]; |
||
1035 | |||
1036 | return ImageTag(url, altText, title); |
||
1037 | } |
||
1038 | else |
||
1039 | { |
||
1040 | // If there's no such link ID, leave intact: |
||
1041 | return wholeMatch; |
||
1042 | } |
||
1043 | } |
||
1044 | |||
1045 | private string ImageInlineEvaluator(Match match) |
||
1046 | { |
||
1047 | string alt = match.Groups[2].Value; |
||
1048 | string url = match.Groups[3].Value; |
||
1049 | string title = match.Groups[6].Value; |
||
1050 | |||
1051 | if (url.StartsWith("<") && url.EndsWith(">")) |
||
1052 | url = url.Substring(1, url.Length - 2); // Remove <>'s surrounding URL, if present |
||
1053 | |||
1054 | return ImageTag(url, alt, title); |
||
1055 | } |
||
1056 | |||
1057 | private string ImageTag(string url, string altText, string title) |
||
1058 | { |
||
1059 | altText = EscapeImageAltText(AttributeEncode(altText)); |
||
1060 | url = EncodeProblemUrlChars(url); |
||
1061 | url = EscapeBoldItalic(url); |
||
1062 | var result = string.Format("<img src=\"{0}\" alt=\"{1}\"", url, altText); |
||
1063 | if (!String.IsNullOrEmpty(title)) |
||
1064 | { |
||
1065 | title = AttributeEncode(EscapeBoldItalic(title)); |
||
1066 | result += string.Format(" title=\"{0}\"", title); |
||
1067 | } |
||
1068 | result += _emptyElementSuffix; |
||
1069 | return result; |
||
1070 | } |
||
1071 | |||
1072 | private static Regex _headerSetext = new Regex(@" |
||
1073 | ^(.+?) |
||
1074 | [ ]* |
||
1075 | \n |
||
1076 | (=+|-+) # $1 = string of ='s or -'s |
||
1077 | [ ]* |
||
1078 | \n+", |
||
1079 | RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); |
||
1080 | |||
1081 | private static Regex _headerAtx = new Regex(@" |
||
1082 | ^(\#{1,6}) # $1 = string of #'s |
||
1083 | [ ]* |
||
1084 | (.+?) # $2 = Header text |
||
1085 | [ ]* |
||
1086 | \#* # optional closing #'s (not counted) |
||
1087 | \n+", |
||
1088 | RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); |
||
1089 | |||
1090 | /// <summary> |
||
1091 | /// Turn Markdown headers into HTML header tags |
||
1092 | /// </summary> |
||
1093 | /// <remarks> |
||
1094 | /// Header 1 |
||
1095 | /// ======== |
||
1096 | /// |
||
1097 | /// Header 2 |
||
1098 | /// -------- |
||
1099 | /// |
||
1100 | /// # Header 1 |
||
1101 | /// ## Header 2 |
||
1102 | /// ## Header 2 with closing hashes ## |
||
1103 | /// ... |
||
1104 | /// ###### Header 6 |
||
1105 | /// </remarks> |
||
1106 | private string DoHeaders(string text) |
||
1107 | { |
||
1108 | text = _headerSetext.Replace(text, new MatchEvaluator(SetextHeaderEvaluator)); |
||
1109 | text = _headerAtx.Replace(text, new MatchEvaluator(AtxHeaderEvaluator)); |
||
1110 | return text; |
||
1111 | } |
||
1112 | |||
1113 | private string SetextHeaderEvaluator(Match match) |
||
1114 | { |
||
1115 | string header = match.Groups[1].Value; |
||
1116 | int level = match.Groups[2].Value.StartsWith("=") ? 1 : 2; |
||
1117 | return string.Format("<h{1}>{0}</h{1}>\n\n", RunSpanGamut(header), level); |
||
1118 | } |
||
1119 | |||
1120 | private string AtxHeaderEvaluator(Match match) |
||
1121 | { |
||
1122 | string header = match.Groups[2].Value; |
||
1123 | int level = match.Groups[1].Value.Length; |
||
1124 | return string.Format("<h{1}>{0}</h{1}>\n\n", RunSpanGamut(header), level); |
||
1125 | } |
||
1126 | |||
1127 | |||
1128 | private static Regex _horizontalRules = new Regex(@" |
||
1129 | ^[ ]{0,3} # Leading space |
||
1130 | ([-*_]) # $1: First marker |
||
1131 | (?> # Repeated marker group |
||
1132 | [ ]{0,2} # Zero, one, or two spaces. |
||
1133 | \1 # Marker character |
||
1134 | ){2,} # Group repeated at least twice |
||
1135 | [ ]* # Trailing spaces |
||
1136 | $ # End of line. |
||
1137 | ", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); |
||
1138 | |||
1139 | /// <summary> |
||
1140 | /// Turn Markdown horizontal rules into HTML hr tags |
||
1141 | /// </summary> |
||
1142 | /// <remarks> |
||
1143 | /// *** |
||
1144 | /// * * * |
||
1145 | /// --- |
||
1146 | /// - - - |
||
1147 | /// </remarks> |
||
1148 | private string DoHorizontalRules(string text) |
||
1149 | { |
||
1150 | return _horizontalRules.Replace(text, "<hr" + _emptyElementSuffix + "\n"); |
||
1151 | } |
||
1152 | |||
1153 | private static string _wholeList = string.Format(@" |
||
1154 | ( # $1 = whole list |
||
1155 | ( # $2 |
||
1156 | [ ]{{0,{1}}} |
||
1157 | ({0}) # $3 = first list item marker |
||
1158 | [ ]+ |
||
1159 | ) |
||
1160 | (?s:.+?) |
||
1161 | ( # $4 |
||
1162 | \z |
||
1163 | | |
||
1164 | \n{{2,}} |
||
1165 | (?=\S) |
||
1166 | (?! # Negative lookahead for another list item marker |
||
1167 | [ ]* |
||
1168 | {0}[ ]+ |
||
1169 | ) |
||
1170 | ) |
||
1171 | )", string.Format("(?:{0}|{1})", _markerUL, _markerOL), _tabWidth - 1); |
||
1172 | |||
1173 | private static Regex _listNested = new Regex(@"^" + _wholeList, |
||
1174 | RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); |
||
1175 | |||
1176 | private static Regex _listTopLevel = new Regex(@"(?:(?<=\n\n)|\A\n?)" + _wholeList, |
||
1177 | RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); |
||
1178 | |||
1179 | /// <summary> |
||
1180 | /// Turn Markdown lists into HTML ul and ol and li tags |
||
1181 | /// </summary> |
||
1182 | private string DoLists(string text) |
||
1183 | { |
||
1184 | // We use a different prefix before nested lists than top-level lists. |
||
1185 | // See extended comment in _ProcessListItems(). |
||
1186 | if (_listLevel > 0) |
||
1187 | text = _listNested.Replace(text, new MatchEvaluator(ListEvaluator)); |
||
1188 | else |
||
1189 | text = _listTopLevel.Replace(text, new MatchEvaluator(ListEvaluator)); |
||
1190 | |||
1191 | return text; |
||
1192 | } |
||
1193 | |||
1194 | private string ListEvaluator(Match match) |
||
1195 | { |
||
1196 | string list = match.Groups[1].Value; |
||
1197 | string listType = Regex.IsMatch(match.Groups[3].Value, _markerUL) ? "ul" : "ol"; |
||
1198 | string result; |
||
1199 | |||
1200 | result = ProcessListItems(list, listType == "ul" ? _markerUL : _markerOL); |
||
1201 | |||
1202 | result = string.Format("<{0}>\n{1}</{0}>\n", listType, result); |
||
1203 | return result; |
||
1204 | } |
||
1205 | |||
1206 | /// <summary> |
||
1207 | /// Process the contents of a single ordered or unordered list, splitting it |
||
1208 | /// into individual list items. |
||
1209 | /// </summary> |
||
1210 | private string ProcessListItems(string list, string marker) |
||
1211 | { |
||
1212 | // The listLevel global keeps track of when we're inside a list. |
||
1213 | // Each time we enter a list, we increment it; when we leave a list, |
||
1214 | // we decrement. If it's zero, we're not in a list anymore. |
||
1215 | |||
1216 | // We do this because when we're not inside a list, we want to treat |
||
1217 | // something like this: |
||
1218 | |||
1219 | // I recommend upgrading to version |
||
1220 | // 8. Oops, now this line is treated |
||
1221 | // as a sub-list. |
||
1222 | |||
1223 | // As a single paragraph, despite the fact that the second line starts |
||
1224 | // with a digit-period-space sequence. |
||
1225 | |||
1226 | // Whereas when we're inside a list (or sub-list), that line will be |
||
1227 | // treated as the start of a sub-list. What a kludge, huh? This is |
||
1228 | // an aspect of Markdown's syntax that's hard to parse perfectly |
||
1229 | // without resorting to mind-reading. Perhaps the solution is to |
||
1230 | // change the syntax rules such that sub-lists must start with a |
||
1231 | // starting cardinal number; e.g. "1." or "a.". |
||
1232 | |||
1233 | _listLevel++; |
||
1234 | |||
1235 | // Trim trailing blank lines: |
||
1236 | list = Regex.Replace(list, @"\n{2,}\z", "\n"); |
||
1237 | |||
1238 | string pattern = string.Format( |
||
1239 | @"(^[ ]*) # leading whitespace = $1 |
||
1240 | ({0}) [ ]+ # list marker = $2 |
||
1241 | ((?s:.+?) # list item text = $3 |
||
1242 | (\n+)) |
||
1243 | (?= (\z | \1 ({0}) [ ]+))", marker); |
||
1244 | |||
1245 | bool lastItemHadADoubleNewline = false; |
||
1246 | |||
1247 | // has to be a closure, so subsequent invocations can share the bool |
||
1248 | MatchEvaluator ListItemEvaluator = (Match match) => |
||
1249 | { |
||
1250 | string item = match.Groups[3].Value; |
||
1251 | |||
1252 | bool endsWithDoubleNewline = item.EndsWith("\n\n"); |
||
1253 | bool containsDoubleNewline = endsWithDoubleNewline || item.Contains("\n\n"); |
||
1254 | |||
1255 | if (containsDoubleNewline || lastItemHadADoubleNewline) |
||
1256 | // we could correct any bad indentation here.. |
||
1257 | item = RunBlockGamut(Outdent(item) + "\n", unhash: false); |
||
1258 | else |
||
1259 | { |
||
1260 | // recursion for sub-lists |
||
1261 | item = DoLists(Outdent(item)); |
||
1262 | item = item.TrimEnd('\n'); |
||
1263 | item = RunSpanGamut(item); |
||
1264 | } |
||
1265 | lastItemHadADoubleNewline = endsWithDoubleNewline; |
||
1266 | return string.Format("<li>{0}</li>\n", item); |
||
1267 | }; |
||
1268 | |||
1269 | list = Regex.Replace(list, pattern, new MatchEvaluator(ListItemEvaluator), |
||
1270 | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline); |
||
1271 | _listLevel--; |
||
1272 | return list; |
||
1273 | } |
||
1274 | |||
1275 | private static Regex _codeBlock = new Regex(string.Format(@" |
||
1276 | (?:\n\n|\A\n?) |
||
1277 | ( # $1 = the code block -- one or more lines, starting with a space |
||
1278 | (?: |
||
1279 | (?:[ ]{{{0}}}) # Lines must start with a tab-width of spaces |
||
1280 | .*\n+ |
||
1281 | )+ |
||
1282 | ) |
||
1283 | ((?=^[ ]{{0,{0}}}[^ \t\n])|\Z) # Lookahead for non-space at line-start, or end of doc", |
||
1284 | _tabWidth), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); |
||
1285 | |||
1286 | /// <summary> |
||
1287 | /// /// Turn Markdown 4-space indented code into HTML pre code blocks |
||
1288 | /// </summary> |
||
1289 | private string DoCodeBlocks(string text) |
||
1290 | { |
||
1291 | text = _codeBlock.Replace(text, new MatchEvaluator(CodeBlockEvaluator)); |
||
1292 | return text; |
||
1293 | } |
||
1294 | |||
1295 | private string CodeBlockEvaluator(Match match) |
||
1296 | { |
||
1297 | string codeBlock = match.Groups[1].Value; |
||
1298 | |||
1299 | codeBlock = EncodeCode(Outdent(codeBlock)); |
||
1300 | codeBlock = _newlinesLeadingTrailing.Replace(codeBlock, ""); |
||
1301 | |||
1302 | return string.Concat("\n\n<pre><code>", codeBlock, "\n</code></pre>\n\n"); |
||
1303 | } |
||
1304 | |||
1305 | private static Regex _codeSpan = new Regex(@" |
||
1306 | (?<!\\) # Character before opening ` can't be a backslash |
||
1307 | (`+) # $1 = Opening run of ` |
||
1308 | (.+?) # $2 = The code block |
||
1309 | (?<!`) |
||
1310 | \1 |
||
1311 | (?!`)", RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); |
||
1312 | |||
1313 | /// <summary> |
||
1314 | /// Turn Markdown `code spans` into HTML code tags |
||
1315 | /// </summary> |
||
1316 | private string DoCodeSpans(string text) |
||
1317 | { |
||
1318 | // * You can use multiple backticks as the delimiters if you want to |
||
1319 | // include literal backticks in the code span. So, this input: |
||
1320 | // |
||
1321 | // Just type ``foo `bar` baz`` at the prompt. |
||
1322 | // |
||
1323 | // Will translate to: |
||
1324 | // |
||
1325 | // <p>Just type <code>foo `bar` baz</code> at the prompt.</p> |
||
1326 | // |
||
1327 | // There's no arbitrary limit to the number of backticks you |
||
1328 | // can use as delimters. If you need three consecutive backticks |
||
1329 | // in your code, use four for delimiters, etc. |
||
1330 | // |
||
1331 | // * You can use spaces to get literal backticks at the edges: |
||
1332 | // |
||
1333 | // ... type `` `bar` `` ... |
||
1334 | // |
||
1335 | // Turns to: |
||
1336 | // |
||
1337 | // ... type <code>`bar`</code> ... |
||
1338 | // |
||
1339 | |||
1340 | return _codeSpan.Replace(text, new MatchEvaluator(CodeSpanEvaluator)); |
||
1341 | } |
||
1342 | |||
1343 | private string CodeSpanEvaluator(Match match) |
||
1344 | { |
||
1345 | string span = match.Groups[2].Value; |
||
1346 | span = Regex.Replace(span, @"^[ ]*", ""); // leading whitespace |
||
1347 | span = Regex.Replace(span, @"[ ]*$", ""); // trailing whitespace |
||
1348 | span = EncodeCode(span); |
||
1349 | span = SaveFromAutoLinking(span); // to prevent auto-linking. Not necessary in code *blocks*, but in code spans. |
||
1350 | |||
1351 | return string.Concat("<code>", span, "</code>"); |
||
1352 | } |
||
1353 | |||
1354 | |||
1355 | private static Regex _bold = new Regex(@"(\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1", |
||
1356 | RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); |
||
1357 | private static Regex _strictBold = new Regex(@"([\W_]|^) (\*\*|__) (?=\S) ([^\r]*?\S[\*_]*) \2 ([\W_]|$)", |
||
1358 | RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); |
||
1359 | |||
1360 | private static Regex _italic = new Regex(@"(\*|_) (?=\S) (.+?) (?<=\S) \1", |
||
1361 | RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); |
||
1362 | private static Regex _strictItalic = new Regex(@"([\W_]|^) (\*|_) (?=\S) ([^\r\*_]*?\S) \2 ([\W_]|$)", |
||
1363 | RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); |
||
1364 | |||
1365 | /// <summary> |
||
1366 | /// Turn Markdown *italics* and **bold** into HTML strong and em tags |
||
1367 | /// </summary> |
||
1368 | private string DoItalicsAndBold(string text) |
||
1369 | { |
||
1370 | |||
1371 | // <strong> must go first, then <em> |
||
1372 | if (_strictBoldItalic) |
||
1373 | { |
||
1374 | text = _strictBold.Replace(text, "$1<strong>$3</strong>$4"); |
||
1375 | text = _strictItalic.Replace(text, "$1<em>$3</em>$4"); |
||
1376 | } |
||
1377 | else |
||
1378 | { |
||
1379 | text = _bold.Replace(text, "<strong>$2</strong>"); |
||
1380 | text = _italic.Replace(text, "<em>$2</em>"); |
||
1381 | } |
||
1382 | return text; |
||
1383 | } |
||
1384 | |||
1385 | /// <summary> |
||
1386 | /// Turn markdown line breaks (two space at end of line) into HTML break tags |
||
1387 | /// </summary> |
||
1388 | private string DoHardBreaks(string text) |
||
1389 | { |
||
1390 | if (_autoNewlines) |
||
1391 | text = Regex.Replace(text, @"\n", string.Format("<br{0}\n", _emptyElementSuffix)); |
||
1392 | else |
||
1393 | text = Regex.Replace(text, @" {2,}\n", string.Format("<br{0}\n", _emptyElementSuffix)); |
||
1394 | return text; |
||
1395 | } |
||
1396 | |||
1397 | private static Regex _blockquote = new Regex(@" |
||
1398 | ( # Wrap whole match in $1 |
||
1399 | ( |
||
1400 | ^[ ]*>[ ]? # '>' at the start of a line |
||
1401 | .+\n # rest of the first line |
||
1402 | (.+\n)* # subsequent consecutive lines |
||
1403 | \n* # blanks |
||
1404 | )+ |
||
1405 | )", RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline | RegexOptions.Compiled); |
||
1406 | |||
1407 | /// <summary> |
||
1408 | /// Turn Markdown > quoted blocks into HTML blockquote blocks |
||
1409 | /// </summary> |
||
1410 | private string DoBlockQuotes(string text) |
||
1411 | { |
||
1412 | return _blockquote.Replace(text, new MatchEvaluator(BlockQuoteEvaluator)); |
||
1413 | } |
||
1414 | |||
1415 | private string BlockQuoteEvaluator(Match match) |
||
1416 | { |
||
1417 | string bq = match.Groups[1].Value; |
||
1418 | |||
1419 | bq = Regex.Replace(bq, @"^[ ]*>[ ]?", "", RegexOptions.Multiline); // trim one level of quoting |
||
1420 | bq = Regex.Replace(bq, @"^[ ]+$", "", RegexOptions.Multiline); // trim whitespace-only lines |
||
1421 | bq = RunBlockGamut(bq); // recurse |
||
1422 | |||
1423 | bq = Regex.Replace(bq, @"^", " ", RegexOptions.Multiline); |
||
1424 | |||
1425 | // These leading spaces screw with <pre> content, so we need to fix that: |
||
1426 | bq = Regex.Replace(bq, @"(\s*<pre>.+?</pre>)", new MatchEvaluator(BlockQuoteEvaluator2), RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline); |
||
1427 | |||
1428 | bq = string.Format("<blockquote>\n{0}\n</blockquote>", bq); |
||
1429 | string key = GetHashKey(bq, isHtmlBlock: true); |
||
1430 | _htmlBlocks[key] = bq; |
||
1431 | |||
1432 | return "\n\n" + key + "\n\n"; |
||
1433 | } |
||
1434 | |||
1435 | private string BlockQuoteEvaluator2(Match match) |
||
1436 | { |
||
1437 | return Regex.Replace(match.Groups[1].Value, @"^ ", "", RegexOptions.Multiline); |
||
1438 | } |
||
1439 | |||
1440 | private static Regex _autolinkBare = new Regex(@"(<|="")?\b(https?|ftp)(://[-A-Z0-9+&@#/%?=~_|\[\]\(\)!:,\.;]*[-A-Z0-9+&@#/%=~_|\[\])])(?=$|\W)", |
||
1441 | RegexOptions.IgnoreCase | RegexOptions.Compiled); |
||
1442 | |||
1443 | private static string handleTrailingParens(Match match) |
||
1444 | { |
||
1445 | // The first group is essentially a negative lookbehind -- if there's a < or a =", we don't touch this. |
||
1446 | // We're not using a *real* lookbehind, because of links with in links, like <a href="http://web.archive.org/web/20121130000728/http://www.google.com/"> |
||
1447 | // With a real lookbehind, the full link would never be matched, and thus the http://www.google.com *would* be matched. |
||
1448 | // With the simulated lookbehind, the full link *is* matched (just not handled, because of this early return), causing |
||
1449 | // the google link to not be matched again. |
||
1450 | if (match.Groups[1].Success) |
||
1451 | return match.Value; |
||
1452 | |||
1453 | var protocol = match.Groups[2].Value; |
||
1454 | var link = match.Groups[3].Value; |
||
1455 | if (!link.EndsWith(")")) |
||
1456 | return "<" + protocol + link + ">"; |
||
1457 | var level = 0; |
||
1458 | foreach (Match c in Regex.Matches(link, "[()]")) |
||
1459 | { |
||
1460 | if (c.Value == "(") |
||
1461 | { |
||
1462 | if (level <= 0) |
||
1463 | level = 1; |
||
1464 | else |
||
1465 | level++; |
||
1466 | } |
||
1467 | else |
||
1468 | { |
||
1469 | level--; |
||
1470 | } |
||
1471 | } |
||
1472 | var tail = ""; |
||
1473 | if (level < 0) |
||
1474 | { |
||
1475 | link = Regex.Replace(link, @"\){1," + (-level) + "}$", m => { tail = m.Value; return ""; }); |
||
1476 | } |
||
1477 | return "<" + protocol + link + ">" + tail; |
||
1478 | } |
||
1479 | |||
1480 | /// <summary> |
||
1481 | /// Turn angle-delimited URLs into HTML anchor tags |
||
1482 | /// </summary> |
||
1483 | /// <remarks> |
||
1484 | /// <http://www.example.com> |
||
1485 | /// </remarks> |
||
1486 | private string DoAutoLinks(string text) |
||
1487 | { |
||
1488 | |||
1489 | if (_autoHyperlink) |
||
1490 | { |
||
1491 | // fixup arbitrary URLs by adding Markdown < > so they get linked as well |
||
1492 | // note that at this point, all other URL in the text are already hyperlinked as <a href=""></a> |
||
1493 | // *except* for the <http://www.foo.com> case |
||
1494 | text = _autolinkBare.Replace(text, handleTrailingParens); |
||
1495 | } |
||
1496 | |||
1497 | // Hyperlinks: <http://foo.com> |
||
1498 | text = Regex.Replace(text, "<((https?|ftp):[^'\">\\s]+)>", new MatchEvaluator(HyperlinkEvaluator)); |
||
1499 | |||
1500 | if (_linkEmails) |
||
1501 | { |
||
1502 | // Email addresses: <address@domain.foo> |
||
1503 | string pattern = |
||
1504 | @"< |
||
1505 | (?:mailto:)? |
||
1506 | ( |
||
1507 | [-.\w]+ |
||
1508 | \@ |
||
1509 | [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ |
||
1510 | ) |
||
1511 | >"; |
||
1512 | text = Regex.Replace(text, pattern, new MatchEvaluator(EmailEvaluator), RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace); |
||
1513 | } |
||
1514 | |||
1515 | return text; |
||
1516 | } |
||
1517 | |||
1518 | private string HyperlinkEvaluator(Match match) |
||
1519 | { |
||
1520 | string link = match.Groups[1].Value; |
||
1521 | return string.Format("<a href=\"{0}\">{0}</a>", link); |
||
1522 | } |
||
1523 | |||
1524 | private string EmailEvaluator(Match match) |
||
1525 | { |
||
1526 | string email = Unescape(match.Groups[1].Value); |
||
1527 | |||
1528 | // |
||
1529 | // Input: an email address, e.g. "foo@example.com" |
||
1530 | // |
||
1531 | // Output: the email address as a mailto link, with each character |
||
1532 | // of the address encoded as either a decimal or hex entity, in |
||
1533 | // the hopes of foiling most address harvesting spam bots. E.g.: |
||
1534 | // |
||
1535 | // <a href="mailto:foo@e |
||
1536 | // xample.com">foo |
||
1537 | // @example.com</a> |
||
1538 | // |
||
1539 | // Based by a filter by Matthew Wickline, posted to the BBEdit-Talk |
||
1540 | // mailing list: <http://tinyurl.com/yu7ue> |
||
1541 | // |
||
1542 | email = "mailto:" + email; |
||
1543 | |||
1544 | // leave ':' alone (to spot mailto: later) |
||
1545 | email = EncodeEmailAddress(email); |
||
1546 | |||
1547 | email = string.Format("<a href=\"{0}\">{0}</a>", email); |
||
1548 | |||
1549 | // strip the mailto: from the visible part |
||
1550 | email = Regex.Replace(email, "\">.+?:", "\">"); |
||
1551 | return email; |
||
1552 | } |
||
1553 | |||
1554 | |||
1555 | private static Regex _outDent = new Regex(@"^[ ]{1," + _tabWidth + @"}", RegexOptions.Multiline | RegexOptions.Compiled); |
||
1556 | |||
1557 | /// <summary> |
||
1558 | /// Remove one level of line-leading spaces |
||
1559 | /// </summary> |
||
1560 | private string Outdent(string block) |
||
1561 | { |
||
1562 | return _outDent.Replace(block, ""); |
||
1563 | } |
||
1564 | |||
1565 | |||
1566 | #region Encoding and Normalization |
||
1567 | |||
1568 | |||
1569 | /// <summary> |
||
1570 | /// encodes email address randomly |
||
1571 | /// roughly 10% raw, 45% hex, 45% dec |
||
1572 | /// note that @ is always encoded and : never is |
||
1573 | /// </summary> |
||
1574 | private string EncodeEmailAddress(string addr) |
||
1575 | { |
||
1576 | var sb = new StringBuilder(addr.Length * 5); |
||
1577 | var rand = new Random(); |
||
1578 | int r; |
||
1579 | foreach (char c in addr) |
||
1580 | { |
||
1581 | r = rand.Next(1, 100); |
||
1582 | if ((r > 90 || c == ':') && c != '@') |
||
1583 | sb.Append(c); // m |
||
1584 | else if (r < 45) |
||
1585 | sb.AppendFormat("&#x{0:x};", (int)c); // m |
||
1586 | else |
||
1587 | sb.AppendFormat("&#{0};", (int)c); // m |
||
1588 | } |
||
1589 | return sb.ToString(); |
||
1590 | } |
||
1591 | |||
1592 | private static Regex _codeEncoder = new Regex(@"&|<|>|\\|\*|_|\{|\}|\[|\]", RegexOptions.Compiled); |
||
1593 | |||
1594 | /// <summary> |
||
1595 | /// Encode/escape certain Markdown characters inside code blocks and spans where they are literals |
||
1596 | /// </summary> |
||
1597 | private string EncodeCode(string code) |
||
1598 | { |
||
1599 | return _codeEncoder.Replace(code, EncodeCodeEvaluator); |
||
1600 | } |
||
1601 | private string EncodeCodeEvaluator(Match match) |
||
1602 | { |
||
1603 | switch (match.Value) |
||
1604 | { |
||
1605 | // Encode all ampersands; HTML entities are not |
||
1606 | // entities within a Markdown code span. |
||
1607 | case "&": |
||
1608 | return "&"; |
||
1609 | // Do the angle bracket song and dance |
||
1610 | case "<": |
||
1611 | return "<"; |
||
1612 | case ">": |
||
1613 | return ">"; |
||
1614 | // escape characters that are magic in Markdown |
||
1615 | default: |
||
1616 | return _escapeTable[match.Value]; |
||
1617 | } |
||
1618 | } |
||
1619 | |||
1620 | |||
1621 | private static Regex _amps = new Regex(@"&(?!((#[0-9]+)|(#[xX][a-fA-F0-9]+)|([a-zA-Z][a-zA-Z0-9]*));)", RegexOptions.ExplicitCapture | RegexOptions.Compiled); |
||
1622 | private static Regex _angles = new Regex(@"<(?![A-Za-z/?\$!])", RegexOptions.ExplicitCapture | RegexOptions.Compiled); |
||
1623 | |||
1624 | /// <summary> |
||
1625 | /// Encode any ampersands (that aren't part of an HTML entity) and left or right angle brackets |
||
1626 | /// </summary> |
||
1627 | private string EncodeAmpsAndAngles(string s) |
||
1628 | { |
||
1629 | s = _amps.Replace(s, "&"); |
||
1630 | s = _angles.Replace(s, "<"); |
||
1631 | return s; |
||
1632 | } |
||
1633 | |||
1634 | private static Regex _backslashEscapes; |
||
1635 | |||
1636 | /// <summary> |
||
1637 | /// Encodes any escaped characters such as \`, \*, \[ etc |
||
1638 | /// </summary> |
||
1639 | private string EscapeBackslashes(string s) |
||
1640 | { |
||
1641 | return _backslashEscapes.Replace(s, new MatchEvaluator(EscapeBackslashesEvaluator)); |
||
1642 | } |
||
1643 | private string EscapeBackslashesEvaluator(Match match) |
||
1644 | { |
||
1645 | return _backslashEscapeTable[match.Value]; |
||
1646 | } |
||
1647 | |||
1648 | private static Regex _unescapes = new Regex("\x1A" + "E\\d+E", RegexOptions.Compiled); |
||
1649 | |||
1650 | /// <summary> |
||
1651 | /// swap back in all the special characters we've hidden |
||
1652 | /// </summary> |
||
1653 | private string Unescape(string s) |
||
1654 | { |
||
1655 | return _unescapes.Replace(s, new MatchEvaluator(UnescapeEvaluator)); |
||
1656 | } |
||
1657 | private string UnescapeEvaluator(Match match) |
||
1658 | { |
||
1659 | return _invertedEscapeTable[match.Value]; |
||
1660 | } |
||
1661 | |||
1662 | |||
1663 | /// <summary> |
||
1664 | /// escapes Bold [ * ] and Italic [ _ ] characters |
||
1665 | /// </summary> |
||
1666 | private string EscapeBoldItalic(string s) |
||
1667 | { |
||
1668 | s = s.Replace("*", _escapeTable["*"]); |
||
1669 | s = s.Replace("_", _escapeTable["_"]); |
||
1670 | return s; |
||
1671 | } |
||
1672 | |||
1673 | private static string AttributeEncode(string s) |
||
1674 | { |
||
1675 | return s.Replace(">", ">").Replace("<", "<").Replace("\"", """); |
||
1676 | } |
||
1677 | |||
1678 | private static char[] _problemUrlChars = @"""'*()[]$:".ToCharArray(); |
||
1679 | |||
1680 | /// <summary> |
||
1681 | /// hex-encodes some unusual "problem" chars in URLs to avoid URL detection problems |
||
1682 | /// </summary> |
||
1683 | private string EncodeProblemUrlChars(string url) |
||
1684 | { |
||
1685 | if (!_encodeProblemUrlCharacters) return url; |
||
1686 | |||
1687 | var sb = new StringBuilder(url.Length); |
||
1688 | bool encode; |
||
1689 | char c; |
||
1690 | |||
1691 | for (int i = 0; i < url.Length; i++) |
||
1692 | { |
||
1693 | c = url[i]; |
||
1694 | encode = Array.IndexOf(_problemUrlChars, c) != -1; |
||
1695 | if (encode && c == ':' && i < url.Length - 1) |
||
1696 | encode = !(url[i + 1] == '/') && !(url[i + 1] >= '0' && url[i + 1] <= '9'); |
||
1697 | |||
1698 | if (encode) |
||
1699 | sb.Append("%" + String.Format("{0:x}", (byte)c)); |
||
1700 | else |
||
1701 | sb.Append(c); |
||
1702 | } |
||
1703 | |||
1704 | return sb.ToString(); |
||
1705 | } |
||
1706 | |||
1707 | |||
1708 | /// <summary> |
||
1709 | /// Within tags -- meaning between < and > -- encode [\ ` * _] so they |
||
1710 | /// don't conflict with their use in Markdown for code, italics and strong. |
||
1711 | /// We're replacing each such character with its corresponding hash |
||
1712 | /// value; this is likely overkill, but it should prevent us from colliding |
||
1713 | /// with the escape values by accident. |
||
1714 | /// </summary> |
||
1715 | private string EscapeSpecialCharsWithinTagAttributes(string text) |
||
1716 | { |
||
1717 | var tokens = TokenizeHTML(text); |
||
1718 | |||
1719 | // now, rebuild text from the tokens |
||
1720 | var sb = new StringBuilder(text.Length); |
||
1721 | |||
1722 | foreach (var token in tokens) |
||
1723 | { |
||
1724 | string value = token.Value; |
||
1725 | |||
1726 | if (token.Type == TokenType.Tag) |
||
1727 | { |
||
1728 | value = value.Replace(@"\", _escapeTable[@"\"]); |
||
1729 | |||
1730 | if (_autoHyperlink && value.StartsWith("<!")) // escape slashes in comments to prevent autolinking there -- http://meta.stackoverflow.com/questions/95987/html-comment-containing-url-breaks-if-followed-by-another-html-comment |
||
1731 | value = value.Replace("/", _escapeTable["/"]); |
||
1732 | |||
1733 | value = Regex.Replace(value, "(?<=.)</?code>(?=.)", _escapeTable[@"`"]); |
||
1734 | value = EscapeBoldItalic(value); |
||
1735 | } |
||
1736 | |||
1737 | sb.Append(value); |
||
1738 | } |
||
1739 | |||
1740 | return sb.ToString(); |
||
1741 | } |
||
1742 | |||
1743 | /// <summary> |
||
1744 | /// convert all tabs to _tabWidth spaces; |
||
1745 | /// standardizes line endings from DOS (CR LF) or Mac (CR) to UNIX (LF); |
||
1746 | /// makes sure text ends with a couple of newlines; |
||
1747 | /// removes any blank lines (only spaces) in the text |
||
1748 | /// </summary> |
||
1749 | private string Normalize(string text) |
||
1750 | { |
||
1751 | var output = new StringBuilder(text.Length); |
||
1752 | var line = new StringBuilder(); |
||
1753 | bool valid = false; |
||
1754 | |||
1755 | for (int i = 0; i < text.Length; i++) |
||
1756 | { |
||
1757 | switch (text[i]) |
||
1758 | { |
||
1759 | case '\n': |
||
1760 | if (valid) output.Append(line); |
||
1761 | output.Append('\n'); |
||
1762 | line.Length = 0; valid = false; |
||
1763 | break; |
||
1764 | case '\r': |
||
1765 | if ((i < text.Length - 1) && (text[i + 1] != '\n')) |
||
1766 | { |
||
1767 | if (valid) output.Append(line); |
||
1768 | output.Append('\n'); |
||
1769 | line.Length = 0; valid = false; |
||
1770 | } |
||
1771 | break; |
||
1772 | case '\t': |
||
1773 | int width = (_tabWidth - line.Length % _tabWidth); |
||
1774 | for (int k = 0; k < width; k++) |
||
1775 | line.Append(' '); |
||
1776 | break; |
||
1777 | case '\x1A': |
||
1778 | break; |
||
1779 | default: |
||
1780 | if (!valid && text[i] != ' ') valid = true; |
||
1781 | line.Append(text[i]); |
||
1782 | break; |
||
1783 | } |
||
1784 | } |
||
1785 | |||
1786 | if (valid) output.Append(line); |
||
1787 | output.Append('\n'); |
||
1788 | |||
1789 | // add two newlines to the end before return |
||
1790 | return output.Append("\n\n").ToString(); |
||
1791 | } |
||
1792 | |||
1793 | #endregion |
||
1794 | |||
1795 | /// <summary> |
||
1796 | /// this is to emulate what's evailable in PHP |
||
1797 | /// </summary> |
||
1798 | private static string RepeatString(string text, int count) |
||
1799 | { |
||
1800 | var sb = new StringBuilder(text.Length * count); |
||
1801 | for (int i = 0; i < count; i++) |
||
1802 | sb.Append(text); |
||
1803 | return sb.ToString(); |
||
1804 | } |
||
1805 | |||
1806 | } |
||
1807 | } |