markus / MarkusAutoUpdate / src / NetSparkle / Libraries / MarkdownSharp.cs @ 38d69491
이력 | 보기 | 이력해설 | 다운로드 (69.7 KB)
1 |
/* |
---|---|
2 |
* MarkdownSharp |
3 |
* ------------- |
4 |
* a C# Markdown processor |
5 |
* |
6 |
* Markdown is a text-to-HTML conversion tool for web writers |
7 |
* Copyright (c) 2004 John Gruber |
8 |
* http://daringfireball.net/projects/markdown/ |
9 |
* |
10 |
* Markdown.NET |
11 |
* Copyright (c) 2004-2009 Milan Negovan |
12 |
* http://www.aspnetresources.com |
13 |
* http://aspnetresources.com/blog/markdown_announced.aspx |
14 |
* |
15 |
* MarkdownSharp |
16 |
* Copyright (c) 2009-2011 Jeff Atwood |
17 |
* http://stackoverflow.com |
18 |
* http://www.codinghorror.com/blog/ |
19 |
* http://code.google.com/p/markdownsharp/ |
20 |
* |
21 |
* History: Milan ported the Markdown processor to C#. He granted license to me so I can open source it |
22 |
* and let the community contribute to and improve MarkdownSharp. |
23 |
* |
24 |
*/ |
25 |
|
26 |
#region Copyright and license |
27 |
|
28 |
/* |
29 |
|
30 |
Copyright (c) 2009 - 2010 Jeff Atwood |
31 |
|
32 |
http://www.opensource.org/licenses/mit-license.php |
33 |
|
34 |
Permission is hereby granted, free of charge, to any person obtaining a copy |
35 |
of this software and associated documentation files (the "Software"), to deal |
36 |
in the Software without restriction, including without limitation the rights |
37 |
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
38 |
copies of the Software, and to permit persons to whom the Software is |
39 |
furnished to do so, subject to the following conditions: |
40 |
|
41 |
The above copyright notice and this permission notice shall be included in |
42 |
all copies or substantial portions of the Software. |
43 |
|
44 |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
45 |
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
46 |
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
47 |
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
48 |
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
49 |
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
50 |
THE SOFTWARE. |
51 |
|
52 |
Copyright (c) 2003-2004 John Gruber |
53 |
<http://daringfireball.net/> |
54 |
All rights reserved. |
55 |
|
56 |
Redistribution and use in source and binary forms, with or without |
57 |
modification, are permitted provided that the following conditions are |
58 |
met: |
59 |
|
60 |
* Redistributions of source code must retain the above copyright notice, |
61 |
this list of conditions and the following disclaimer. |
62 |
|
63 |
* Redistributions in binary form must reproduce the above copyright |
64 |
notice, this list of conditions and the following disclaimer in the |
65 |
documentation and/or other materials provided with the distribution. |
66 |
|
67 |
* Neither the name "Markdown" nor the names of its contributors may |
68 |
be used to endorse or promote products derived from this software |
69 |
without specific prior written permission. |
70 |
|
71 |
This software is provided by the copyright holders and contributors "as |
72 |
is" and any express or implied warranties, including, but not limited |
73 |
to, the implied warranties of merchantability and fitness for a |
74 |
particular purpose are disclaimed. In no event shall the copyright owner |
75 |
or contributors be liable for any direct, indirect, incidental, special, |
76 |
exemplary, or consequential damages (including, but not limited to, |
77 |
procurement of substitute goods or services; loss of use, data, or |
78 |
profits; or business interruption) however caused and on any theory of |
79 |
liability, whether in contract, strict liability, or tort (including |
80 |
negligence or otherwise) arising in any way out of the use of this |
81 |
software, even if advised of the possibility of such damage. |
82 |
*/ |
83 |
|
84 |
#endregion |
85 |
|
86 |
using System; |
87 |
using System.Collections.Generic; |
88 |
using System.Configuration; |
89 |
using System.Text; |
90 |
using System.Text.RegularExpressions; |
91 |
|
92 |
namespace MarkdownSharp |
93 |
{ |
94 |
|
95 |
/// <summary> |
96 |
/// |
97 |
/// </summary> |
98 |
public class MarkdownOptions |
99 |
{ |
100 |
/// <summary> |
101 |
/// when true, (most) bare plain URLs are auto-hyperlinked |
102 |
/// WARNING: this is a significant deviation from the markdown spec |
103 |
/// </summary> |
104 |
public bool AutoHyperlink { get; set; } |
105 |
/// <summary> |
106 |
/// when true, RETURN becomes a literal newline |
107 |
/// WARNING: this is a significant deviation from the markdown spec |
108 |
/// </summary> |
109 |
public bool AutoNewlines { get; set; } |
110 |
/// <summary> |
111 |
/// use ">" for HTML output, or " />" for XHTML output |
112 |
/// </summary> |
113 |
public string EmptyElementSuffix { get; set; } |
114 |
/// <summary> |
115 |
/// when true, problematic URL characters like [, ], (, and so forth will be encoded |
116 |
/// WARNING: this is a significant deviation from the markdown spec |
117 |
/// </summary> |
118 |
public bool EncodeProblemUrlCharacters { get; set; } |
119 |
/// <summary> |
120 |
/// when false, email addresses will never be auto-linked |
121 |
/// WARNING: this is a significant deviation from the markdown spec |
122 |
/// </summary> |
123 |
public bool LinkEmails { get; set; } |
124 |
/// <summary> |
125 |
/// when true, bold and italic require non-word characters on either side |
126 |
/// WARNING: this is a significant deviation from the markdown spec |
127 |
/// </summary> |
128 |
public bool StrictBoldItalic { get; set; } |
129 |
} |
130 |
|
131 |
|
132 |
/// <summary> |
133 |
/// Markdown is a text-to-HTML conversion tool for web writers. |
134 |
/// Markdown allows you to write using an easy-to-read, easy-to-write plain text format, |
135 |
/// then convert it to structurally valid XHTML (or HTML). |
136 |
/// </summary> |
137 |
public class Markdown |
138 |
{ |
139 |
private const string _version = "1.13"; |
140 |
|
141 |
#region Constructors and Options |
142 |
|
143 |
/// <summary> |
144 |
/// Create a new Markdown instance using default options |
145 |
/// </summary> |
146 |
public Markdown() |
147 |
: this(false) |
148 |
{ |
149 |
} |
150 |
|
151 |
/// <summary> |
152 |
/// Create a new Markdown instance and optionally load options from a configuration |
153 |
/// file. There they should be stored in the appSettings section, available options are: |
154 |
/// |
155 |
/// Markdown.StrictBoldItalic (true/false) |
156 |
/// Markdown.EmptyElementSuffix (">" or " />" without the quotes) |
157 |
/// Markdown.LinkEmails (true/false) |
158 |
/// Markdown.AutoNewLines (true/false) |
159 |
/// Markdown.AutoHyperlink (true/false) |
160 |
/// Markdown.EncodeProblemUrlCharacters (true/false) |
161 |
/// |
162 |
/// </summary> |
163 |
public Markdown(bool loadOptionsFromConfigFile) |
164 |
{ |
165 |
if (!loadOptionsFromConfigFile) return; |
166 |
// |
167 |
// var settings = ConfigurationManager.AppSettings; |
168 |
// foreach (string key in settings.Keys) |
169 |
// { |
170 |
// switch (key) |
171 |
// { |
172 |
// case "Markdown.AutoHyperlink": |
173 |
// _autoHyperlink = Convert.ToBoolean(settings[key]); |
174 |
// break; |
175 |
// case "Markdown.AutoNewlines": |
176 |
// _autoNewlines = Convert.ToBoolean(settings[key]); |
177 |
// break; |
178 |
// case "Markdown.EmptyElementSuffix": |
179 |
// _emptyElementSuffix = settings[key]; |
180 |
// break; |
181 |
// case "Markdown.EncodeProblemUrlCharacters": |
182 |
// _encodeProblemUrlCharacters = Convert.ToBoolean(settings[key]); |
183 |
// break; |
184 |
// case "Markdown.LinkEmails": |
185 |
// _linkEmails = Convert.ToBoolean(settings[key]); |
186 |
// break; |
187 |
// case "Markdown.StrictBoldItalic": |
188 |
// _strictBoldItalic = Convert.ToBoolean(settings[key]); |
189 |
// break; |
190 |
// } |
191 |
// } |
192 |
} |
193 |
|
194 |
/// <summary> |
195 |
/// Create a new Markdown instance and set the options from the MarkdownOptions object. |
196 |
/// </summary> |
197 |
public Markdown(MarkdownOptions options) |
198 |
{ |
199 |
_autoHyperlink = options.AutoHyperlink; |
200 |
_autoNewlines = options.AutoNewlines; |
201 |
_emptyElementSuffix = options.EmptyElementSuffix; |
202 |
_encodeProblemUrlCharacters = options.EncodeProblemUrlCharacters; |
203 |
_linkEmails = options.LinkEmails; |
204 |
_strictBoldItalic = options.StrictBoldItalic; |
205 |
} |
206 |
|
207 |
|
208 |
/// <summary> |
209 |
/// use ">" for HTML output, or " />" for XHTML output |
210 |
/// </summary> |
211 |
public string EmptyElementSuffix |
212 |
{ |
213 |
get { return _emptyElementSuffix; } |
214 |
set { _emptyElementSuffix = value; } |
215 |
} |
216 |
private string _emptyElementSuffix = " />"; |
217 |
|
218 |
/// <summary> |
219 |
/// when false, email addresses will never be auto-linked |
220 |
/// WARNING: this is a significant deviation from the markdown spec |
221 |
/// </summary> |
222 |
public bool LinkEmails |
223 |
{ |
224 |
get { return _linkEmails; } |
225 |
set { _linkEmails = value; } |
226 |
} |
227 |
private bool _linkEmails = true; |
228 |
|
229 |
/// <summary> |
230 |
/// when true, bold and italic require non-word characters on either side |
231 |
/// WARNING: this is a significant deviation from the markdown spec |
232 |
/// </summary> |
233 |
public bool StrictBoldItalic |
234 |
{ |
235 |
get { return _strictBoldItalic; } |
236 |
set { _strictBoldItalic = value; } |
237 |
} |
238 |
private bool _strictBoldItalic = false; |
239 |
|
240 |
/// <summary> |
241 |
/// when true, RETURN becomes a literal newline |
242 |
/// WARNING: this is a significant deviation from the markdown spec |
243 |
/// </summary> |
244 |
public bool AutoNewLines |
245 |
{ |
246 |
get { return _autoNewlines; } |
247 |
set { _autoNewlines = value; } |
248 |
} |
249 |
private bool _autoNewlines = false; |
250 |
|
251 |
/// <summary> |
252 |
/// when true, (most) bare plain URLs are auto-hyperlinked |
253 |
/// WARNING: this is a significant deviation from the markdown spec |
254 |
/// </summary> |
255 |
public bool AutoHyperlink |
256 |
{ |
257 |
get { return _autoHyperlink; } |
258 |
set { _autoHyperlink = value; } |
259 |
} |
260 |
private bool _autoHyperlink = false; |
261 |
|
262 |
/// <summary> |
263 |
/// when true, problematic URL characters like [, ], (, and so forth will be encoded |
264 |
/// WARNING: this is a significant deviation from the markdown spec |
265 |
/// </summary> |
266 |
public bool EncodeProblemUrlCharacters |
267 |
{ |
268 |
get { return _encodeProblemUrlCharacters; } |
269 |
set { _encodeProblemUrlCharacters = value; } |
270 |
} |
271 |
private bool _encodeProblemUrlCharacters = false; |
272 |
|
273 |
#endregion |
274 |
|
275 |
private enum TokenType { Text, Tag } |
276 |
|
277 |
private struct Token |
278 |
{ |
279 |
public Token(TokenType type, string value) |
280 |
{ |
281 |
this.Type = type; |
282 |
this.Value = value; |
283 |
} |
284 |
public TokenType Type; |
285 |
public string Value; |
286 |
} |
287 |
|
288 |
/// <summary> |
289 |
/// maximum nested depth of [] and () supported by the transform; implementation detail |
290 |
/// </summary> |
291 |
private const int _nestDepth = 6; |
292 |
|
293 |
/// <summary> |
294 |
/// Tabs are automatically converted to spaces as part of the transform |
295 |
/// this constant determines how "wide" those tabs become in spaces |
296 |
/// </summary> |
297 |
private const int _tabWidth = 4; |
298 |
|
299 |
private const string _markerUL = @"[*+-]"; |
300 |
private const string _markerOL = @"\d+[.]"; |
301 |
|
302 |
private static readonly Dictionary<string, string> _escapeTable; |
303 |
private static readonly Dictionary<string, string> _invertedEscapeTable; |
304 |
private static readonly Dictionary<string, string> _backslashEscapeTable; |
305 |
|
306 |
private readonly Dictionary<string, string> _urls = new Dictionary<string, string>(); |
307 |
private readonly Dictionary<string, string> _titles = new Dictionary<string, string>(); |
308 |
private readonly Dictionary<string, string> _htmlBlocks = new Dictionary<string, string>(); |
309 |
|
310 |
private int _listLevel; |
311 |
private static string AutoLinkPreventionMarker = "\x1AP"; // temporarily replaces "://" where auto-linking shouldn't happen; |
312 |
|
313 |
/// <summary> |
314 |
/// In the static constuctor we'll initialize what stays the same across all transforms. |
315 |
/// </summary> |
316 |
static Markdown() |
317 |
{ |
318 |
// Table of hash values for escaped characters: |
319 |
_escapeTable = new Dictionary<string, string>(); |
320 |
_invertedEscapeTable = new Dictionary<string, string>(); |
321 |
// Table of hash value for backslash escaped characters: |
322 |
_backslashEscapeTable = new Dictionary<string, string>(); |
323 |
|
324 |
string backslashPattern = ""; |
325 |
|
326 |
foreach (char c in @"\`*_{}[]()>#+-.!/") |
327 |
{ |
328 |
string key = c.ToString(); |
329 |
string hash = GetHashKey(key, isHtmlBlock: false); |
330 |
_escapeTable.Add(key, hash); |
331 |
_invertedEscapeTable.Add(hash, key); |
332 |
_backslashEscapeTable.Add(@"\" + key, hash); |
333 |
backslashPattern += Regex.Escape(@"\" + key) + "|"; |
334 |
} |
335 |
|
336 |
_backslashEscapes = new Regex(backslashPattern.Substring(0, backslashPattern.Length - 1), RegexOptions.Compiled); |
337 |
} |
338 |
|
339 |
/// <summary> |
340 |
/// current version of MarkdownSharp; |
341 |
/// see http://code.google.com/p/markdownsharp/ for the latest code or to contribute |
342 |
/// </summary> |
343 |
public string Version |
344 |
{ |
345 |
get { return _version; } |
346 |
} |
347 |
|
348 |
/// <summary> |
349 |
/// Transforms the provided Markdown-formatted text to HTML; |
350 |
/// see http://en.wikipedia.org/wiki/Markdown |
351 |
/// </summary> |
352 |
/// <remarks> |
353 |
/// The order in which other subs are called here is |
354 |
/// essential. Link and image substitutions need to happen before |
355 |
/// EscapeSpecialChars(), so that any *'s or _'s in the a |
356 |
/// and img tags get encoded. |
357 |
/// </remarks> |
358 |
public string Transform(string text) |
359 |
{ |
360 |
if (String.IsNullOrEmpty(text)) return ""; |
361 |
|
362 |
Setup(); |
363 |
|
364 |
text = Normalize(text); |
365 |
|
366 |
text = HashHTMLBlocks(text); |
367 |
text = StripLinkDefinitions(text); |
368 |
text = RunBlockGamut(text); |
369 |
text = Unescape(text); |
370 |
|
371 |
Cleanup(); |
372 |
|
373 |
return text + "\n"; |
374 |
} |
375 |
|
376 |
|
377 |
/// <summary> |
378 |
/// Perform transformations that form block-level tags like paragraphs, headers, and list items. |
379 |
/// </summary> |
380 |
private string RunBlockGamut(string text, bool unhash = true) |
381 |
{ |
382 |
text = DoHeaders(text); |
383 |
text = DoHorizontalRules(text); |
384 |
text = DoLists(text); |
385 |
text = DoCodeBlocks(text); |
386 |
text = DoBlockQuotes(text); |
387 |
|
388 |
// We already ran HashHTMLBlocks() before, in Markdown(), but that |
389 |
// was to escape raw HTML in the original Markdown source. This time, |
390 |
// we're escaping the markup we've just created, so that we don't wrap |
391 |
// <p> tags around block-level tags. |
392 |
text = HashHTMLBlocks(text); |
393 |
|
394 |
text = FormParagraphs(text, unhash: unhash); |
395 |
|
396 |
return text; |
397 |
} |
398 |
|
399 |
|
400 |
/// <summary> |
401 |
/// Perform transformations that occur *within* block-level tags like paragraphs, headers, and list items. |
402 |
/// </summary> |
403 |
private string RunSpanGamut(string text) |
404 |
{ |
405 |
text = DoCodeSpans(text); |
406 |
text = EscapeSpecialCharsWithinTagAttributes(text); |
407 |
text = EscapeBackslashes(text); |
408 |
|
409 |
// Images must come first, because ![foo][f] looks like an anchor. |
410 |
text = DoImages(text); |
411 |
text = DoAnchors(text); |
412 |
|
413 |
// Must come after DoAnchors(), because you can use < and > |
414 |
// delimiters in inline links like [this](<url>). |
415 |
text = DoAutoLinks(text); |
416 |
|
417 |
text = text.Replace(AutoLinkPreventionMarker, "://"); |
418 |
|
419 |
text = EncodeAmpsAndAngles(text); |
420 |
text = DoItalicsAndBold(text); |
421 |
text = DoHardBreaks(text); |
422 |
|
423 |
return text; |
424 |
} |
425 |
|
426 |
private static Regex _newlinesLeadingTrailing = new Regex(@"^\n+|\n+\z", RegexOptions.Compiled); |
427 |
private static Regex _newlinesMultiple = new Regex(@"\n{2,}", RegexOptions.Compiled); |
428 |
private static Regex _leadingWhitespace = new Regex(@"^[ ]*", RegexOptions.Compiled); |
429 |
|
430 |
private static Regex _htmlBlockHash = new Regex("\x1AH\\d+H", RegexOptions.Compiled); |
431 |
|
432 |
/// <summary> |
433 |
/// splits on two or more newlines, to form "paragraphs"; |
434 |
/// each paragraph is then unhashed (if it is a hash and unhashing isn't turned off) or wrapped in HTML p tag |
435 |
/// </summary> |
436 |
private string FormParagraphs(string text, bool unhash = true) |
437 |
{ |
438 |
// split on two or more newlines |
439 |
string[] grafs = _newlinesMultiple.Split(_newlinesLeadingTrailing.Replace(text, "")); |
440 |
|
441 |
for (int i = 0; i < grafs.Length; i++) |
442 |
{ |
443 |
if (grafs[i].StartsWith("\x1AH")) |
444 |
{ |
445 |
// unhashify HTML blocks |
446 |
if (unhash) |
447 |
{ |
448 |
int sanityCheck = 50; // just for safety, guard against an infinite loop |
449 |
bool keepGoing = true; // as long as replacements where made, keep going |
450 |
while (keepGoing && sanityCheck > 0) |
451 |
{ |
452 |
keepGoing = false; |
453 |
grafs[i] = _htmlBlockHash.Replace(grafs[i], match => |
454 |
{ |
455 |
keepGoing = true; |
456 |
return _htmlBlocks[match.Value]; |
457 |
}); |
458 |
sanityCheck--; |
459 |
} |
460 |
/* if (keepGoing) |
461 |
{ |
462 |
// Logging of an infinite loop goes here. |
463 |
// If such a thing should happen, please open a new issue on http://code.google.com/p/markdownsharp/ |
464 |
// with the input that caused it. |
465 |
}*/ |
466 |
} |
467 |
} |
468 |
else |
469 |
{ |
470 |
// do span level processing inside the block, then wrap result in <p> tags |
471 |
grafs[i] = _leadingWhitespace.Replace(RunSpanGamut(grafs[i]), "<p>") + "</p>"; |
472 |
} |
473 |
} |
474 |
|
475 |
return string.Join("\n\n", grafs); |
476 |
} |
477 |
|
478 |
|
479 |
private void Setup() |
480 |
{ |
481 |
// Clear the global hashes. If we don't clear these, you get conflicts |
482 |
// from other articles when generating a page which contains more than |
483 |
// one article (e.g. an index page that shows the N most recent |
484 |
// articles): |
485 |
_urls.Clear(); |
486 |
_titles.Clear(); |
487 |
_htmlBlocks.Clear(); |
488 |
_listLevel = 0; |
489 |
} |
490 |
|
491 |
private void Cleanup() |
492 |
{ |
493 |
Setup(); |
494 |
} |
495 |
|
496 |
private static string _nestedBracketsPattern; |
497 |
|
498 |
/// <summary> |
499 |
/// Reusable pattern to match balanced [brackets]. See Friedl's |
500 |
/// "Mastering Regular Expressions", 2nd Ed., pp. 328-331. |
501 |
/// </summary> |
502 |
private static string GetNestedBracketsPattern() |
503 |
{ |
504 |
// in other words [this] and [this[also]] and [this[also[too]]] |
505 |
// up to _nestDepth |
506 |
if (_nestedBracketsPattern == null) |
507 |
_nestedBracketsPattern = |
508 |
RepeatString(@" |
509 |
(?> # Atomic matching |
510 |
[^\[\]]+ # Anything other than brackets |
511 |
| |
512 |
\[ |
513 |
", _nestDepth) + RepeatString( |
514 |
@" \] |
515 |
)*" |
516 |
, _nestDepth); |
517 |
return _nestedBracketsPattern; |
518 |
} |
519 |
|
520 |
private static string _nestedParensPattern; |
521 |
|
522 |
/// <summary> |
523 |
/// Reusable pattern to match balanced (parens). See Friedl's |
524 |
/// "Mastering Regular Expressions", 2nd Ed., pp. 328-331. |
525 |
/// </summary> |
526 |
private static string GetNestedParensPattern() |
527 |
{ |
528 |
// in other words (this) and (this(also)) and (this(also(too))) |
529 |
// up to _nestDepth |
530 |
if (_nestedParensPattern == null) |
531 |
_nestedParensPattern = |
532 |
RepeatString(@" |
533 |
(?> # Atomic matching |
534 |
[^()\s]+ # Anything other than parens or whitespace |
535 |
| |
536 |
\( |
537 |
", _nestDepth) + RepeatString( |
538 |
@" \) |
539 |
)*" |
540 |
, _nestDepth); |
541 |
return _nestedParensPattern; |
542 |
} |
543 |
|
544 |
private static Regex _linkDef = new Regex(string.Format(@" |
545 |
^[ ]{{0,{0}}}\[(.+)\]: # id = $1 |
546 |
[ ]* |
547 |
\n? # maybe *one* newline |
548 |
[ ]* |
549 |
<?(\S+?)>? # url = $2 |
550 |
[ ]* |
551 |
\n? # maybe one newline |
552 |
[ ]* |
553 |
(?: |
554 |
(?<=\s) # lookbehind for whitespace |
555 |
[""(] |
556 |
(.+?) # title = $3 |
557 |
["")] |
558 |
[ ]* |
559 |
)? # title is optional |
560 |
(?:\n+|\Z)", _tabWidth - 1), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); |
561 |
|
562 |
/// <summary> |
563 |
/// Strips link definitions from text, stores the URLs and titles in hash references. |
564 |
/// </summary> |
565 |
/// <remarks> |
566 |
/// ^[id]: url "optional title" |
567 |
/// </remarks> |
568 |
private string StripLinkDefinitions(string text) |
569 |
{ |
570 |
return _linkDef.Replace(text, new MatchEvaluator(LinkEvaluator)); |
571 |
} |
572 |
|
573 |
private string LinkEvaluator(Match match) |
574 |
{ |
575 |
string linkID = match.Groups[1].Value.ToLowerInvariant(); |
576 |
_urls[linkID] = EncodeAmpsAndAngles(match.Groups[2].Value); |
577 |
|
578 |
if (match.Groups[3] != null && match.Groups[3].Length > 0) |
579 |
_titles[linkID] = match.Groups[3].Value.Replace("\"", """); |
580 |
|
581 |
return ""; |
582 |
} |
583 |
|
584 |
// compiling this monster regex results in worse performance. trust me. |
585 |
private static Regex _blocksHtml = new Regex(GetBlockPattern(), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace); |
586 |
|
587 |
|
588 |
/// <summary> |
589 |
/// derived pretty much verbatim from PHP Markdown |
590 |
/// </summary> |
591 |
private static string GetBlockPattern() |
592 |
{ |
593 |
|
594 |
// Hashify HTML blocks: |
595 |
// We only want to do this for block-level HTML tags, such as headers, |
596 |
// lists, and tables. That's because we still want to wrap <p>s around |
597 |
// "paragraphs" that are wrapped in non-block-level tags, such as anchors, |
598 |
// phrase emphasis, and spans. The list of tags we're looking for is |
599 |
// hard-coded: |
600 |
// |
601 |
// * List "a" is made of tags which can be both inline or block-level. |
602 |
// These will be treated block-level when the start tag is alone on |
603 |
// its line, otherwise they're not matched here and will be taken as |
604 |
// inline later. |
605 |
// * List "b" is made of tags which are always block-level; |
606 |
// |
607 |
string blockTagsA = "ins|del"; |
608 |
string blockTagsB = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|script|noscript|form|fieldset|iframe|math"; |
609 |
|
610 |
// Regular expression for the content of a block tag. |
611 |
string attr = @" |
612 |
(?> # optional tag attributes |
613 |
\s # starts with whitespace |
614 |
(?> |
615 |
[^>""/]+ # text outside quotes |
616 |
| |
617 |
/+(?!>) # slash not followed by > |
618 |
| |
619 |
""[^""]*"" # text inside double quotes (tolerate >) |
620 |
| |
621 |
'[^']*' # text inside single quotes (tolerate >) |
622 |
)* |
623 |
)? |
624 |
"; |
625 |
|
626 |
string content = RepeatString(@" |
627 |
(?> |
628 |
[^<]+ # content without tag |
629 |
| |
630 |
<\2 # nested opening tag |
631 |
" + attr + @" # attributes |
632 |
(?> |
633 |
/> |
634 |
| |
635 |
>", _nestDepth) + // end of opening tag |
636 |
".*?" + // last level nested tag content |
637 |
RepeatString(@" |
638 |
</\2\s*> # closing nested tag |
639 |
) |
640 |
| |
641 |
<(?!/\2\s*> # other tags with a different name |
642 |
) |
643 |
)*", _nestDepth); |
644 |
|
645 |
string content2 = content.Replace(@"\2", @"\3"); |
646 |
|
647 |
// First, look for nested blocks, e.g.: |
648 |
// <div> |
649 |
// <div> |
650 |
// tags for inner block must be indented. |
651 |
// </div> |
652 |
// </div> |
653 |
// |
654 |
// The outermost tags must start at the left margin for this to match, and |
655 |
// the inner nested divs must be indented. |
656 |
// We need to do this before the next, more liberal match, because the next |
657 |
// match will start at the first `<div>` and stop at the first `</div>`. |
658 |
string pattern = @" |
659 |
(?> |
660 |
(?> |
661 |
(?<=\n) # Starting at the beginning of a line |
662 |
| # or |
663 |
\A\n? # the beginning of the doc |
664 |
) |
665 |
( # save in $1 |
666 |
|
667 |
# Match from `\n<tag>` to `</tag>\n`, handling nested tags |
668 |
# in between. |
669 |
|
670 |
<($block_tags_b_re) # start tag = $2 |
671 |
$attr> # attributes followed by > and \n |
672 |
$content # content, support nesting |
673 |
</\2> # the matching end tag |
674 |
[ ]* # trailing spaces |
675 |
(?=\n+|\Z) # followed by a newline or end of document |
676 |
|
677 |
| # Special version for tags of group a. |
678 |
|
679 |
<($block_tags_a_re) # start tag = $3 |
680 |
$attr>[ ]*\n # attributes followed by > |
681 |
$content2 # content, support nesting |
682 |
</\3> # the matching end tag |
683 |
[ ]* # trailing spaces |
684 |
(?=\n+|\Z) # followed by a newline or end of document |
685 |
|
686 |
| # Special case just for <hr />. It was easier to make a special |
687 |
# case than to make the other regex more complicated. |
688 |
|
689 |
[ ]{0,$less_than_tab} |
690 |
<hr |
691 |
$attr # attributes |
692 |
/?> # the matching end tag |
693 |
[ ]* |
694 |
(?=\n{2,}|\Z) # followed by a blank line or end of document |
695 |
|
696 |
| # Special case for standalone HTML comments: |
697 |
|
698 |
(?<=\n\n|\A) # preceded by a blank line or start of document |
699 |
[ ]{0,$less_than_tab} |
700 |
(?s: |
701 |
<!--(?:|(?:[^>-]|-[^>])(?:[^-]|-[^-])*)--> |
702 |
) |
703 |
[ ]* |
704 |
(?=\n{2,}|\Z) # followed by a blank line or end of document |
705 |
|
706 |
| # PHP and ASP-style processor instructions (<? and <%) |
707 |
|
708 |
[ ]{0,$less_than_tab} |
709 |
(?s: |
710 |
<([?%]) # $4 |
711 |
.*? |
712 |
\4> |
713 |
) |
714 |
[ ]* |
715 |
(?=\n{2,}|\Z) # followed by a blank line or end of document |
716 |
|
717 |
) |
718 |
)"; |
719 |
|
720 |
pattern = pattern.Replace("$less_than_tab", (_tabWidth - 1).ToString()); |
721 |
pattern = pattern.Replace("$block_tags_b_re", blockTagsB); |
722 |
pattern = pattern.Replace("$block_tags_a_re", blockTagsA); |
723 |
pattern = pattern.Replace("$attr", attr); |
724 |
pattern = pattern.Replace("$content2", content2); |
725 |
pattern = pattern.Replace("$content", content); |
726 |
|
727 |
return pattern; |
728 |
} |
729 |
|
730 |
/// <summary> |
731 |
/// replaces any block-level HTML blocks with hash entries |
732 |
/// </summary> |
733 |
private string HashHTMLBlocks(string text) |
734 |
{ |
735 |
return _blocksHtml.Replace(text, new MatchEvaluator(HtmlEvaluator)); |
736 |
} |
737 |
|
738 |
private string HtmlEvaluator(Match match) |
739 |
{ |
740 |
string text = match.Groups[1].Value; |
741 |
string key = GetHashKey(text, isHtmlBlock: true); |
742 |
_htmlBlocks[key] = text; |
743 |
|
744 |
return string.Concat("\n\n", key, "\n\n"); |
745 |
} |
746 |
|
747 |
private static string GetHashKey(string s, bool isHtmlBlock) |
748 |
{ |
749 |
var delim = isHtmlBlock ? 'H' : 'E'; |
750 |
return "\x1A" + delim + Math.Abs(s.GetHashCode()).ToString() + delim; |
751 |
} |
752 |
|
753 |
private static Regex _htmlTokens = new Regex(@" |
754 |
(<!--(?:|(?:[^>-]|-[^>])(?:[^-]|-[^-])*)-->)| # match <!-- foo --> |
755 |
(<\?.*?\?>)| # match <?foo?> " + |
756 |
RepeatString(@" |
757 |
(<[A-Za-z\/!$](?:[^<>]|", _nestDepth) + RepeatString(@")*>)", _nestDepth) + |
758 |
" # match <tag> and </tag>", |
759 |
RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); |
760 |
|
761 |
/// <summary> |
762 |
/// returns an array of HTML tokens comprising the input string. Each token is |
763 |
/// either a tag (possibly with nested, tags contained therein, such |
764 |
/// as <a href="<MTFoo>">, or a run of text between tags. Each element of the |
765 |
/// array is a two-element array; the first is either 'tag' or 'text'; the second is |
766 |
/// the actual value. |
767 |
/// </summary> |
768 |
private List<Token> TokenizeHTML(string text) |
769 |
{ |
770 |
int pos = 0; |
771 |
int tagStart = 0; |
772 |
var tokens = new List<Token>(); |
773 |
|
774 |
// this regex is derived from the _tokenize() subroutine in Brad Choate's MTRegex plugin. |
775 |
// http://www.bradchoate.com/past/mtregex.php |
776 |
foreach (Match m in _htmlTokens.Matches(text)) |
777 |
{ |
778 |
tagStart = m.Index; |
779 |
|
780 |
if (pos < tagStart) |
781 |
tokens.Add(new Token(TokenType.Text, text.Substring(pos, tagStart - pos))); |
782 |
|
783 |
tokens.Add(new Token(TokenType.Tag, m.Value)); |
784 |
pos = tagStart + m.Length; |
785 |
} |
786 |
|
787 |
if (pos < text.Length) |
788 |
tokens.Add(new Token(TokenType.Text, text.Substring(pos, text.Length - pos))); |
789 |
|
790 |
return tokens; |
791 |
} |
792 |
|
793 |
|
794 |
private static Regex _anchorRef = new Regex(string.Format(@" |
795 |
( # wrap whole match in $1 |
796 |
\[ |
797 |
({0}) # link text = $2 |
798 |
\] |
799 |
|
800 |
[ ]? # one optional space |
801 |
(?:\n[ ]*)? # one optional newline followed by spaces |
802 |
|
803 |
\[ |
804 |
(.*?) # id = $3 |
805 |
\] |
806 |
)", GetNestedBracketsPattern()), RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); |
807 |
|
808 |
private static Regex _anchorInline = new Regex(string.Format(@" |
809 |
( # wrap whole match in $1 |
810 |
\[ |
811 |
({0}) # link text = $2 |
812 |
\] |
813 |
\( # literal paren |
814 |
[ ]* |
815 |
({1}) # href = $3 |
816 |
[ ]* |
817 |
( # $4 |
818 |
(['""]) # quote char = $5 |
819 |
(.*?) # title = $6 |
820 |
\5 # matching quote |
821 |
[ ]* # ignore any spaces between closing quote and ) |
822 |
)? # title is optional |
823 |
\) |
824 |
)", GetNestedBracketsPattern(), GetNestedParensPattern()), |
825 |
RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); |
826 |
|
827 |
private static Regex _anchorRefShortcut = new Regex(@" |
828 |
( # wrap whole match in $1 |
829 |
\[ |
830 |
([^\[\]]+) # link text = $2; can't contain [ or ] |
831 |
\] |
832 |
)", RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); |
833 |
|
834 |
/// <summary> |
835 |
/// Turn Markdown link shortcuts into HTML anchor tags |
836 |
/// </summary> |
837 |
/// <remarks> |
838 |
/// [link text](url "title") |
839 |
/// [link text][id] |
840 |
/// [id] |
841 |
/// </remarks> |
842 |
private string DoAnchors(string text) |
843 |
{ |
844 |
// First, handle reference-style links: [link text] [id] |
845 |
text = _anchorRef.Replace(text, new MatchEvaluator(AnchorRefEvaluator)); |
846 |
|
847 |
// Next, inline-style links: [link text](url "optional title") or [link text](url "optional title") |
848 |
text = _anchorInline.Replace(text, new MatchEvaluator(AnchorInlineEvaluator)); |
849 |
|
850 |
// Last, handle reference-style shortcuts: [link text] |
851 |
// These must come last in case you've also got [link test][1] |
852 |
// or [link test](/foo) |
853 |
text = _anchorRefShortcut.Replace(text, new MatchEvaluator(AnchorRefShortcutEvaluator)); |
854 |
return text; |
855 |
} |
856 |
|
857 |
private string SaveFromAutoLinking(string s) |
858 |
{ |
859 |
return s.Replace("://", AutoLinkPreventionMarker); |
860 |
} |
861 |
|
862 |
private string AnchorRefEvaluator(Match match) |
863 |
{ |
864 |
string wholeMatch = match.Groups[1].Value; |
865 |
string linkText = SaveFromAutoLinking(match.Groups[2].Value); |
866 |
string linkID = match.Groups[3].Value.ToLowerInvariant(); |
867 |
|
868 |
string result; |
869 |
|
870 |
// for shortcut links like [this][]. |
871 |
if (linkID == "") |
872 |
linkID = linkText.ToLowerInvariant(); |
873 |
|
874 |
if (_urls.ContainsKey(linkID)) |
875 |
{ |
876 |
string url = _urls[linkID]; |
877 |
|
878 |
url = EncodeProblemUrlChars(url); |
879 |
url = EscapeBoldItalic(url); |
880 |
result = "<a href=\"" + url + "\""; |
881 |
|
882 |
if (_titles.ContainsKey(linkID)) |
883 |
{ |
884 |
string title = AttributeEncode(_titles[linkID]); |
885 |
title = AttributeEncode(EscapeBoldItalic(title)); |
886 |
result += " title=\"" + title + "\""; |
887 |
} |
888 |
|
889 |
result += ">" + linkText + "</a>"; |
890 |
} |
891 |
else |
892 |
result = wholeMatch; |
893 |
|
894 |
return result; |
895 |
} |
896 |
|
897 |
private string AnchorRefShortcutEvaluator(Match match) |
898 |
{ |
899 |
string wholeMatch = match.Groups[1].Value; |
900 |
string linkText = SaveFromAutoLinking(match.Groups[2].Value); |
901 |
string linkID = Regex.Replace(linkText.ToLowerInvariant(), @"[ ]*\n[ ]*", " "); // lower case and remove newlines / extra spaces |
902 |
|
903 |
string result; |
904 |
|
905 |
if (_urls.ContainsKey(linkID)) |
906 |
{ |
907 |
string url = _urls[linkID]; |
908 |
|
909 |
url = EncodeProblemUrlChars(url); |
910 |
url = EscapeBoldItalic(url); |
911 |
result = "<a href=\"" + url + "\""; |
912 |
|
913 |
if (_titles.ContainsKey(linkID)) |
914 |
{ |
915 |
string title = AttributeEncode(_titles[linkID]); |
916 |
title = EscapeBoldItalic(title); |
917 |
result += " title=\"" + title + "\""; |
918 |
} |
919 |
|
920 |
result += ">" + linkText + "</a>"; |
921 |
} |
922 |
else |
923 |
result = wholeMatch; |
924 |
|
925 |
return result; |
926 |
} |
927 |
|
928 |
|
929 |
private string AnchorInlineEvaluator(Match match) |
930 |
{ |
931 |
string linkText = SaveFromAutoLinking(match.Groups[2].Value); |
932 |
string url = match.Groups[3].Value; |
933 |
string title = match.Groups[6].Value; |
934 |
string result; |
935 |
|
936 |
url = EncodeProblemUrlChars(url); |
937 |
url = EscapeBoldItalic(url); |
938 |
if (url.StartsWith("<") && url.EndsWith(">")) |
939 |
url = url.Substring(1, url.Length - 2); // remove <>'s surrounding URL, if present |
940 |
|
941 |
result = string.Format("<a href=\"{0}\"", url); |
942 |
|
943 |
if (!String.IsNullOrEmpty(title)) |
944 |
{ |
945 |
title = AttributeEncode(title); |
946 |
title = EscapeBoldItalic(title); |
947 |
result += string.Format(" title=\"{0}\"", title); |
948 |
} |
949 |
|
950 |
result += string.Format(">{0}</a>", linkText); |
951 |
return result; |
952 |
} |
953 |
|
954 |
private static Regex _imagesRef = new Regex(@" |
955 |
( # wrap whole match in $1 |
956 |
!\[ |
957 |
(.*?) # alt text = $2 |
958 |
\] |
959 |
|
960 |
[ ]? # one optional space |
961 |
(?:\n[ ]*)? # one optional newline followed by spaces |
962 |
|
963 |
\[ |
964 |
(.*?) # id = $3 |
965 |
\] |
966 |
|
967 |
)", RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); |
968 |
|
969 |
private static Regex _imagesInline = new Regex(String.Format(@" |
970 |
( # wrap whole match in $1 |
971 |
!\[ |
972 |
(.*?) # alt text = $2 |
973 |
\] |
974 |
\s? # one optional whitespace character |
975 |
\( # literal paren |
976 |
[ ]* |
977 |
({0}) # href = $3 |
978 |
[ ]* |
979 |
( # $4 |
980 |
(['""]) # quote char = $5 |
981 |
(.*?) # title = $6 |
982 |
\5 # matching quote |
983 |
[ ]* |
984 |
)? # title is optional |
985 |
\) |
986 |
)", GetNestedParensPattern()), |
987 |
RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); |
988 |
|
989 |
/// <summary> |
990 |
/// Turn Markdown image shortcuts into HTML img tags. |
991 |
/// </summary> |
992 |
/// <remarks> |
993 |
/// ![alt text][id] |
994 |
/// ![alt text](url "optional title") |
995 |
/// </remarks> |
996 |
private string DoImages(string text) |
997 |
{ |
998 |
// First, handle reference-style labeled images: ![alt text][id] |
999 |
text = _imagesRef.Replace(text, new MatchEvaluator(ImageReferenceEvaluator)); |
1000 |
|
1001 |
// Next, handle inline images: ![alt text](url "optional title") |
1002 |
// Don't forget: encode * and _ |
1003 |
text = _imagesInline.Replace(text, new MatchEvaluator(ImageInlineEvaluator)); |
1004 |
|
1005 |
return text; |
1006 |
} |
1007 |
|
1008 |
// This prevents the creation of horribly broken HTML when some syntax ambiguities |
1009 |
// collide. It likely still doesn't do what the user meant, but at least we're not |
1010 |
// outputting garbage. |
1011 |
private string EscapeImageAltText(string s) |
1012 |
{ |
1013 |
s = EscapeBoldItalic(s); |
1014 |
s = Regex.Replace(s, @"[\[\]()]", m => _escapeTable[m.ToString()]); |
1015 |
return s; |
1016 |
} |
1017 |
|
1018 |
private string ImageReferenceEvaluator(Match match) |
1019 |
{ |
1020 |
string wholeMatch = match.Groups[1].Value; |
1021 |
string altText = match.Groups[2].Value; |
1022 |
string linkID = match.Groups[3].Value.ToLowerInvariant(); |
1023 |
|
1024 |
// for shortcut links like ![this][]. |
1025 |
if (linkID == "") |
1026 |
linkID = altText.ToLowerInvariant(); |
1027 |
|
1028 |
if (_urls.ContainsKey(linkID)) |
1029 |
{ |
1030 |
string url = _urls[linkID]; |
1031 |
string title = null; |
1032 |
|
1033 |
if (_titles.ContainsKey(linkID)) |
1034 |
title = _titles[linkID]; |
1035 |
|
1036 |
return ImageTag(url, altText, title); |
1037 |
} |
1038 |
else |
1039 |
{ |
1040 |
// If there's no such link ID, leave intact: |
1041 |
return wholeMatch; |
1042 |
} |
1043 |
} |
1044 |
|
1045 |
private string ImageInlineEvaluator(Match match) |
1046 |
{ |
1047 |
string alt = match.Groups[2].Value; |
1048 |
string url = match.Groups[3].Value; |
1049 |
string title = match.Groups[6].Value; |
1050 |
|
1051 |
if (url.StartsWith("<") && url.EndsWith(">")) |
1052 |
url = url.Substring(1, url.Length - 2); // Remove <>'s surrounding URL, if present |
1053 |
|
1054 |
return ImageTag(url, alt, title); |
1055 |
} |
1056 |
|
1057 |
private string ImageTag(string url, string altText, string title) |
1058 |
{ |
1059 |
altText = EscapeImageAltText(AttributeEncode(altText)); |
1060 |
url = EncodeProblemUrlChars(url); |
1061 |
url = EscapeBoldItalic(url); |
1062 |
var result = string.Format("<img src=\"{0}\" alt=\"{1}\"", url, altText); |
1063 |
if (!String.IsNullOrEmpty(title)) |
1064 |
{ |
1065 |
title = AttributeEncode(EscapeBoldItalic(title)); |
1066 |
result += string.Format(" title=\"{0}\"", title); |
1067 |
} |
1068 |
result += _emptyElementSuffix; |
1069 |
return result; |
1070 |
} |
1071 |
|
1072 |
private static Regex _headerSetext = new Regex(@" |
1073 |
^(.+?) |
1074 |
[ ]* |
1075 |
\n |
1076 |
(=+|-+) # $1 = string of ='s or -'s |
1077 |
[ ]* |
1078 |
\n+", |
1079 |
RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); |
1080 |
|
1081 |
private static Regex _headerAtx = new Regex(@" |
1082 |
^(\#{1,6}) # $1 = string of #'s |
1083 |
[ ]* |
1084 |
(.+?) # $2 = Header text |
1085 |
[ ]* |
1086 |
\#* # optional closing #'s (not counted) |
1087 |
\n+", |
1088 |
RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); |
1089 |
|
1090 |
/// <summary> |
1091 |
/// Turn Markdown headers into HTML header tags |
1092 |
/// </summary> |
1093 |
/// <remarks> |
1094 |
/// Header 1 |
1095 |
/// ======== |
1096 |
/// |
1097 |
/// Header 2 |
1098 |
/// -------- |
1099 |
/// |
1100 |
/// # Header 1 |
1101 |
/// ## Header 2 |
1102 |
/// ## Header 2 with closing hashes ## |
1103 |
/// ... |
1104 |
/// ###### Header 6 |
1105 |
/// </remarks> |
1106 |
private string DoHeaders(string text) |
1107 |
{ |
1108 |
text = _headerSetext.Replace(text, new MatchEvaluator(SetextHeaderEvaluator)); |
1109 |
text = _headerAtx.Replace(text, new MatchEvaluator(AtxHeaderEvaluator)); |
1110 |
return text; |
1111 |
} |
1112 |
|
1113 |
private string SetextHeaderEvaluator(Match match) |
1114 |
{ |
1115 |
string header = match.Groups[1].Value; |
1116 |
int level = match.Groups[2].Value.StartsWith("=") ? 1 : 2; |
1117 |
return string.Format("<h{1}>{0}</h{1}>\n\n", RunSpanGamut(header), level); |
1118 |
} |
1119 |
|
1120 |
private string AtxHeaderEvaluator(Match match) |
1121 |
{ |
1122 |
string header = match.Groups[2].Value; |
1123 |
int level = match.Groups[1].Value.Length; |
1124 |
return string.Format("<h{1}>{0}</h{1}>\n\n", RunSpanGamut(header), level); |
1125 |
} |
1126 |
|
1127 |
|
1128 |
private static Regex _horizontalRules = new Regex(@" |
1129 |
^[ ]{0,3} # Leading space |
1130 |
([-*_]) # $1: First marker |
1131 |
(?> # Repeated marker group |
1132 |
[ ]{0,2} # Zero, one, or two spaces. |
1133 |
\1 # Marker character |
1134 |
){2,} # Group repeated at least twice |
1135 |
[ ]* # Trailing spaces |
1136 |
$ # End of line. |
1137 |
", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); |
1138 |
|
1139 |
/// <summary> |
1140 |
/// Turn Markdown horizontal rules into HTML hr tags |
1141 |
/// </summary> |
1142 |
/// <remarks> |
1143 |
/// *** |
1144 |
/// * * * |
1145 |
/// --- |
1146 |
/// - - - |
1147 |
/// </remarks> |
1148 |
private string DoHorizontalRules(string text) |
1149 |
{ |
1150 |
return _horizontalRules.Replace(text, "<hr" + _emptyElementSuffix + "\n"); |
1151 |
} |
1152 |
|
1153 |
private static string _wholeList = string.Format(@" |
1154 |
( # $1 = whole list |
1155 |
( # $2 |
1156 |
[ ]{{0,{1}}} |
1157 |
({0}) # $3 = first list item marker |
1158 |
[ ]+ |
1159 |
) |
1160 |
(?s:.+?) |
1161 |
( # $4 |
1162 |
\z |
1163 |
| |
1164 |
\n{{2,}} |
1165 |
(?=\S) |
1166 |
(?! # Negative lookahead for another list item marker |
1167 |
[ ]* |
1168 |
{0}[ ]+ |
1169 |
) |
1170 |
) |
1171 |
)", string.Format("(?:{0}|{1})", _markerUL, _markerOL), _tabWidth - 1); |
1172 |
|
1173 |
private static Regex _listNested = new Regex(@"^" + _wholeList, |
1174 |
RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); |
1175 |
|
1176 |
private static Regex _listTopLevel = new Regex(@"(?:(?<=\n\n)|\A\n?)" + _wholeList, |
1177 |
RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); |
1178 |
|
1179 |
/// <summary> |
1180 |
/// Turn Markdown lists into HTML ul and ol and li tags |
1181 |
/// </summary> |
1182 |
private string DoLists(string text) |
1183 |
{ |
1184 |
// We use a different prefix before nested lists than top-level lists. |
1185 |
// See extended comment in _ProcessListItems(). |
1186 |
if (_listLevel > 0) |
1187 |
text = _listNested.Replace(text, new MatchEvaluator(ListEvaluator)); |
1188 |
else |
1189 |
text = _listTopLevel.Replace(text, new MatchEvaluator(ListEvaluator)); |
1190 |
|
1191 |
return text; |
1192 |
} |
1193 |
|
1194 |
private string ListEvaluator(Match match) |
1195 |
{ |
1196 |
string list = match.Groups[1].Value; |
1197 |
string listType = Regex.IsMatch(match.Groups[3].Value, _markerUL) ? "ul" : "ol"; |
1198 |
string result; |
1199 |
|
1200 |
result = ProcessListItems(list, listType == "ul" ? _markerUL : _markerOL); |
1201 |
|
1202 |
result = string.Format("<{0}>\n{1}</{0}>\n", listType, result); |
1203 |
return result; |
1204 |
} |
1205 |
|
1206 |
/// <summary> |
1207 |
/// Process the contents of a single ordered or unordered list, splitting it |
1208 |
/// into individual list items. |
1209 |
/// </summary> |
1210 |
private string ProcessListItems(string list, string marker) |
1211 |
{ |
1212 |
// The listLevel global keeps track of when we're inside a list. |
1213 |
// Each time we enter a list, we increment it; when we leave a list, |
1214 |
// we decrement. If it's zero, we're not in a list anymore. |
1215 |
|
1216 |
// We do this because when we're not inside a list, we want to treat |
1217 |
// something like this: |
1218 |
|
1219 |
// I recommend upgrading to version |
1220 |
// 8. Oops, now this line is treated |
1221 |
// as a sub-list. |
1222 |
|
1223 |
// As a single paragraph, despite the fact that the second line starts |
1224 |
// with a digit-period-space sequence. |
1225 |
|
1226 |
// Whereas when we're inside a list (or sub-list), that line will be |
1227 |
// treated as the start of a sub-list. What a kludge, huh? This is |
1228 |
// an aspect of Markdown's syntax that's hard to parse perfectly |
1229 |
// without resorting to mind-reading. Perhaps the solution is to |
1230 |
// change the syntax rules such that sub-lists must start with a |
1231 |
// starting cardinal number; e.g. "1." or "a.". |
1232 |
|
1233 |
_listLevel++; |
1234 |
|
1235 |
// Trim trailing blank lines: |
1236 |
list = Regex.Replace(list, @"\n{2,}\z", "\n"); |
1237 |
|
1238 |
string pattern = string.Format( |
1239 |
@"(^[ ]*) # leading whitespace = $1 |
1240 |
({0}) [ ]+ # list marker = $2 |
1241 |
((?s:.+?) # list item text = $3 |
1242 |
(\n+)) |
1243 |
(?= (\z | \1 ({0}) [ ]+))", marker); |
1244 |
|
1245 |
bool lastItemHadADoubleNewline = false; |
1246 |
|
1247 |
// has to be a closure, so subsequent invocations can share the bool |
1248 |
MatchEvaluator ListItemEvaluator = (Match match) => |
1249 |
{ |
1250 |
string item = match.Groups[3].Value; |
1251 |
|
1252 |
bool endsWithDoubleNewline = item.EndsWith("\n\n"); |
1253 |
bool containsDoubleNewline = endsWithDoubleNewline || item.Contains("\n\n"); |
1254 |
|
1255 |
if (containsDoubleNewline || lastItemHadADoubleNewline) |
1256 |
// we could correct any bad indentation here.. |
1257 |
item = RunBlockGamut(Outdent(item) + "\n", unhash: false); |
1258 |
else |
1259 |
{ |
1260 |
// recursion for sub-lists |
1261 |
item = DoLists(Outdent(item)); |
1262 |
item = item.TrimEnd('\n'); |
1263 |
item = RunSpanGamut(item); |
1264 |
} |
1265 |
lastItemHadADoubleNewline = endsWithDoubleNewline; |
1266 |
return string.Format("<li>{0}</li>\n", item); |
1267 |
}; |
1268 |
|
1269 |
list = Regex.Replace(list, pattern, new MatchEvaluator(ListItemEvaluator), |
1270 |
RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline); |
1271 |
_listLevel--; |
1272 |
return list; |
1273 |
} |
1274 |
|
1275 |
private static Regex _codeBlock = new Regex(string.Format(@" |
1276 |
(?:\n\n|\A\n?) |
1277 |
( # $1 = the code block -- one or more lines, starting with a space |
1278 |
(?: |
1279 |
(?:[ ]{{{0}}}) # Lines must start with a tab-width of spaces |
1280 |
.*\n+ |
1281 |
)+ |
1282 |
) |
1283 |
((?=^[ ]{{0,{0}}}[^ \t\n])|\Z) # Lookahead for non-space at line-start, or end of doc", |
1284 |
_tabWidth), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); |
1285 |
|
1286 |
/// <summary> |
1287 |
/// /// Turn Markdown 4-space indented code into HTML pre code blocks |
1288 |
/// </summary> |
1289 |
private string DoCodeBlocks(string text) |
1290 |
{ |
1291 |
text = _codeBlock.Replace(text, new MatchEvaluator(CodeBlockEvaluator)); |
1292 |
return text; |
1293 |
} |
1294 |
|
1295 |
private string CodeBlockEvaluator(Match match) |
1296 |
{ |
1297 |
string codeBlock = match.Groups[1].Value; |
1298 |
|
1299 |
codeBlock = EncodeCode(Outdent(codeBlock)); |
1300 |
codeBlock = _newlinesLeadingTrailing.Replace(codeBlock, ""); |
1301 |
|
1302 |
return string.Concat("\n\n<pre><code>", codeBlock, "\n</code></pre>\n\n"); |
1303 |
} |
1304 |
|
1305 |
private static Regex _codeSpan = new Regex(@" |
1306 |
(?<!\\) # Character before opening ` can't be a backslash |
1307 |
(`+) # $1 = Opening run of ` |
1308 |
(.+?) # $2 = The code block |
1309 |
(?<!`) |
1310 |
\1 |
1311 |
(?!`)", RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); |
1312 |
|
1313 |
/// <summary> |
1314 |
/// Turn Markdown `code spans` into HTML code tags |
1315 |
/// </summary> |
1316 |
private string DoCodeSpans(string text) |
1317 |
{ |
1318 |
// * You can use multiple backticks as the delimiters if you want to |
1319 |
// include literal backticks in the code span. So, this input: |
1320 |
// |
1321 |
// Just type ``foo `bar` baz`` at the prompt. |
1322 |
// |
1323 |
// Will translate to: |
1324 |
// |
1325 |
// <p>Just type <code>foo `bar` baz</code> at the prompt.</p> |
1326 |
// |
1327 |
// There's no arbitrary limit to the number of backticks you |
1328 |
// can use as delimters. If you need three consecutive backticks |
1329 |
// in your code, use four for delimiters, etc. |
1330 |
// |
1331 |
// * You can use spaces to get literal backticks at the edges: |
1332 |
// |
1333 |
// ... type `` `bar` `` ... |
1334 |
// |
1335 |
// Turns to: |
1336 |
// |
1337 |
// ... type <code>`bar`</code> ... |
1338 |
// |
1339 |
|
1340 |
return _codeSpan.Replace(text, new MatchEvaluator(CodeSpanEvaluator)); |
1341 |
} |
1342 |
|
1343 |
private string CodeSpanEvaluator(Match match) |
1344 |
{ |
1345 |
string span = match.Groups[2].Value; |
1346 |
span = Regex.Replace(span, @"^[ ]*", ""); // leading whitespace |
1347 |
span = Regex.Replace(span, @"[ ]*$", ""); // trailing whitespace |
1348 |
span = EncodeCode(span); |
1349 |
span = SaveFromAutoLinking(span); // to prevent auto-linking. Not necessary in code *blocks*, but in code spans. |
1350 |
|
1351 |
return string.Concat("<code>", span, "</code>"); |
1352 |
} |
1353 |
|
1354 |
|
1355 |
private static Regex _bold = new Regex(@"(\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1", |
1356 |
RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); |
1357 |
private static Regex _strictBold = new Regex(@"([\W_]|^) (\*\*|__) (?=\S) ([^\r]*?\S[\*_]*) \2 ([\W_]|$)", |
1358 |
RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); |
1359 |
|
1360 |
private static Regex _italic = new Regex(@"(\*|_) (?=\S) (.+?) (?<=\S) \1", |
1361 |
RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); |
1362 |
private static Regex _strictItalic = new Regex(@"([\W_]|^) (\*|_) (?=\S) ([^\r\*_]*?\S) \2 ([\W_]|$)", |
1363 |
RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); |
1364 |
|
1365 |
/// <summary> |
1366 |
/// Turn Markdown *italics* and **bold** into HTML strong and em tags |
1367 |
/// </summary> |
1368 |
private string DoItalicsAndBold(string text) |
1369 |
{ |
1370 |
|
1371 |
// <strong> must go first, then <em> |
1372 |
if (_strictBoldItalic) |
1373 |
{ |
1374 |
text = _strictBold.Replace(text, "$1<strong>$3</strong>$4"); |
1375 |
text = _strictItalic.Replace(text, "$1<em>$3</em>$4"); |
1376 |
} |
1377 |
else |
1378 |
{ |
1379 |
text = _bold.Replace(text, "<strong>$2</strong>"); |
1380 |
text = _italic.Replace(text, "<em>$2</em>"); |
1381 |
} |
1382 |
return text; |
1383 |
} |
1384 |
|
1385 |
/// <summary> |
1386 |
/// Turn markdown line breaks (two space at end of line) into HTML break tags |
1387 |
/// </summary> |
1388 |
private string DoHardBreaks(string text) |
1389 |
{ |
1390 |
if (_autoNewlines) |
1391 |
text = Regex.Replace(text, @"\n", string.Format("<br{0}\n", _emptyElementSuffix)); |
1392 |
else |
1393 |
text = Regex.Replace(text, @" {2,}\n", string.Format("<br{0}\n", _emptyElementSuffix)); |
1394 |
return text; |
1395 |
} |
1396 |
|
1397 |
private static Regex _blockquote = new Regex(@" |
1398 |
( # Wrap whole match in $1 |
1399 |
( |
1400 |
^[ ]*>[ ]? # '>' at the start of a line |
1401 |
.+\n # rest of the first line |
1402 |
(.+\n)* # subsequent consecutive lines |
1403 |
\n* # blanks |
1404 |
)+ |
1405 |
)", RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline | RegexOptions.Compiled); |
1406 |
|
1407 |
/// <summary> |
1408 |
/// Turn Markdown > quoted blocks into HTML blockquote blocks |
1409 |
/// </summary> |
1410 |
private string DoBlockQuotes(string text) |
1411 |
{ |
1412 |
return _blockquote.Replace(text, new MatchEvaluator(BlockQuoteEvaluator)); |
1413 |
} |
1414 |
|
1415 |
private string BlockQuoteEvaluator(Match match) |
1416 |
{ |
1417 |
string bq = match.Groups[1].Value; |
1418 |
|
1419 |
bq = Regex.Replace(bq, @"^[ ]*>[ ]?", "", RegexOptions.Multiline); // trim one level of quoting |
1420 |
bq = Regex.Replace(bq, @"^[ ]+$", "", RegexOptions.Multiline); // trim whitespace-only lines |
1421 |
bq = RunBlockGamut(bq); // recurse |
1422 |
|
1423 |
bq = Regex.Replace(bq, @"^", " ", RegexOptions.Multiline); |
1424 |
|
1425 |
// These leading spaces screw with <pre> content, so we need to fix that: |
1426 |
bq = Regex.Replace(bq, @"(\s*<pre>.+?</pre>)", new MatchEvaluator(BlockQuoteEvaluator2), RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline); |
1427 |
|
1428 |
bq = string.Format("<blockquote>\n{0}\n</blockquote>", bq); |
1429 |
string key = GetHashKey(bq, isHtmlBlock: true); |
1430 |
_htmlBlocks[key] = bq; |
1431 |
|
1432 |
return "\n\n" + key + "\n\n"; |
1433 |
} |
1434 |
|
1435 |
private string BlockQuoteEvaluator2(Match match) |
1436 |
{ |
1437 |
return Regex.Replace(match.Groups[1].Value, @"^ ", "", RegexOptions.Multiline); |
1438 |
} |
1439 |
|
1440 |
private static Regex _autolinkBare = new Regex(@"(<|="")?\b(https?|ftp)(://[-A-Z0-9+&@#/%?=~_|\[\]\(\)!:,\.;]*[-A-Z0-9+&@#/%=~_|\[\])])(?=$|\W)", |
1441 |
RegexOptions.IgnoreCase | RegexOptions.Compiled); |
1442 |
|
1443 |
private static string handleTrailingParens(Match match) |
1444 |
{ |
1445 |
// The first group is essentially a negative lookbehind -- if there's a < or a =", we don't touch this. |
1446 |
// We're not using a *real* lookbehind, because of links with in links, like <a href="http://web.archive.org/web/20121130000728/http://www.google.com/"> |
1447 |
// With a real lookbehind, the full link would never be matched, and thus the http://www.google.com *would* be matched. |
1448 |
// With the simulated lookbehind, the full link *is* matched (just not handled, because of this early return), causing |
1449 |
// the google link to not be matched again. |
1450 |
if (match.Groups[1].Success) |
1451 |
return match.Value; |
1452 |
|
1453 |
var protocol = match.Groups[2].Value; |
1454 |
var link = match.Groups[3].Value; |
1455 |
if (!link.EndsWith(")")) |
1456 |
return "<" + protocol + link + ">"; |
1457 |
var level = 0; |
1458 |
foreach (Match c in Regex.Matches(link, "[()]")) |
1459 |
{ |
1460 |
if (c.Value == "(") |
1461 |
{ |
1462 |
if (level <= 0) |
1463 |
level = 1; |
1464 |
else |
1465 |
level++; |
1466 |
} |
1467 |
else |
1468 |
{ |
1469 |
level--; |
1470 |
} |
1471 |
} |
1472 |
var tail = ""; |
1473 |
if (level < 0) |
1474 |
{ |
1475 |
link = Regex.Replace(link, @"\){1," + (-level) + "}$", m => { tail = m.Value; return ""; }); |
1476 |
} |
1477 |
return "<" + protocol + link + ">" + tail; |
1478 |
} |
1479 |
|
1480 |
/// <summary> |
1481 |
/// Turn angle-delimited URLs into HTML anchor tags |
1482 |
/// </summary> |
1483 |
/// <remarks> |
1484 |
/// <http://www.example.com> |
1485 |
/// </remarks> |
1486 |
private string DoAutoLinks(string text) |
1487 |
{ |
1488 |
|
1489 |
if (_autoHyperlink) |
1490 |
{ |
1491 |
// fixup arbitrary URLs by adding Markdown < > so they get linked as well |
1492 |
// note that at this point, all other URL in the text are already hyperlinked as <a href=""></a> |
1493 |
// *except* for the <http://www.foo.com> case |
1494 |
text = _autolinkBare.Replace(text, handleTrailingParens); |
1495 |
} |
1496 |
|
1497 |
// Hyperlinks: <http://foo.com> |
1498 |
text = Regex.Replace(text, "<((https?|ftp):[^'\">\\s]+)>", new MatchEvaluator(HyperlinkEvaluator)); |
1499 |
|
1500 |
if (_linkEmails) |
1501 |
{ |
1502 |
// Email addresses: <address@domain.foo> |
1503 |
string pattern = |
1504 |
@"< |
1505 |
(?:mailto:)? |
1506 |
( |
1507 |
[-.\w]+ |
1508 |
\@ |
1509 |
[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ |
1510 |
) |
1511 |
>"; |
1512 |
text = Regex.Replace(text, pattern, new MatchEvaluator(EmailEvaluator), RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace); |
1513 |
} |
1514 |
|
1515 |
return text; |
1516 |
} |
1517 |
|
1518 |
private string HyperlinkEvaluator(Match match) |
1519 |
{ |
1520 |
string link = match.Groups[1].Value; |
1521 |
return string.Format("<a href=\"{0}\">{0}</a>", link); |
1522 |
} |
1523 |
|
1524 |
private string EmailEvaluator(Match match) |
1525 |
{ |
1526 |
string email = Unescape(match.Groups[1].Value); |
1527 |
|
1528 |
// |
1529 |
// Input: an email address, e.g. "foo@example.com" |
1530 |
// |
1531 |
// Output: the email address as a mailto link, with each character |
1532 |
// of the address encoded as either a decimal or hex entity, in |
1533 |
// the hopes of foiling most address harvesting spam bots. E.g.: |
1534 |
// |
1535 |
// <a href="mailto:foo@e |
1536 |
// xample.com">foo |
1537 |
// @example.com</a> |
1538 |
// |
1539 |
// Based by a filter by Matthew Wickline, posted to the BBEdit-Talk |
1540 |
// mailing list: <http://tinyurl.com/yu7ue> |
1541 |
// |
1542 |
email = "mailto:" + email; |
1543 |
|
1544 |
// leave ':' alone (to spot mailto: later) |
1545 |
email = EncodeEmailAddress(email); |
1546 |
|
1547 |
email = string.Format("<a href=\"{0}\">{0}</a>", email); |
1548 |
|
1549 |
// strip the mailto: from the visible part |
1550 |
email = Regex.Replace(email, "\">.+?:", "\">"); |
1551 |
return email; |
1552 |
} |
1553 |
|
1554 |
|
1555 |
private static Regex _outDent = new Regex(@"^[ ]{1," + _tabWidth + @"}", RegexOptions.Multiline | RegexOptions.Compiled); |
1556 |
|
1557 |
/// <summary> |
1558 |
/// Remove one level of line-leading spaces |
1559 |
/// </summary> |
1560 |
private string Outdent(string block) |
1561 |
{ |
1562 |
return _outDent.Replace(block, ""); |
1563 |
} |
1564 |
|
1565 |
|
1566 |
#region Encoding and Normalization |
1567 |
|
1568 |
|
1569 |
/// <summary> |
1570 |
/// encodes email address randomly |
1571 |
/// roughly 10% raw, 45% hex, 45% dec |
1572 |
/// note that @ is always encoded and : never is |
1573 |
/// </summary> |
1574 |
private string EncodeEmailAddress(string addr) |
1575 |
{ |
1576 |
var sb = new StringBuilder(addr.Length * 5); |
1577 |
var rand = new Random(); |
1578 |
int r; |
1579 |
foreach (char c in addr) |
1580 |
{ |
1581 |
r = rand.Next(1, 100); |
1582 |
if ((r > 90 || c == ':') && c != '@') |
1583 |
sb.Append(c); // m |
1584 |
else if (r < 45) |
1585 |
sb.AppendFormat("&#x{0:x};", (int)c); // m |
1586 |
else |
1587 |
sb.AppendFormat("&#{0};", (int)c); // m |
1588 |
} |
1589 |
return sb.ToString(); |
1590 |
} |
1591 |
|
1592 |
private static Regex _codeEncoder = new Regex(@"&|<|>|\\|\*|_|\{|\}|\[|\]", RegexOptions.Compiled); |
1593 |
|
1594 |
/// <summary> |
1595 |
/// Encode/escape certain Markdown characters inside code blocks and spans where they are literals |
1596 |
/// </summary> |
1597 |
private string EncodeCode(string code) |
1598 |
{ |
1599 |
return _codeEncoder.Replace(code, EncodeCodeEvaluator); |
1600 |
} |
1601 |
private string EncodeCodeEvaluator(Match match) |
1602 |
{ |
1603 |
switch (match.Value) |
1604 |
{ |
1605 |
// Encode all ampersands; HTML entities are not |
1606 |
// entities within a Markdown code span. |
1607 |
case "&": |
1608 |
return "&"; |
1609 |
// Do the angle bracket song and dance |
1610 |
case "<": |
1611 |
return "<"; |
1612 |
case ">": |
1613 |
return ">"; |
1614 |
// escape characters that are magic in Markdown |
1615 |
default: |
1616 |
return _escapeTable[match.Value]; |
1617 |
} |
1618 |
} |
1619 |
|
1620 |
|
1621 |
private static Regex _amps = new Regex(@"&(?!((#[0-9]+)|(#[xX][a-fA-F0-9]+)|([a-zA-Z][a-zA-Z0-9]*));)", RegexOptions.ExplicitCapture | RegexOptions.Compiled); |
1622 |
private static Regex _angles = new Regex(@"<(?![A-Za-z/?\$!])", RegexOptions.ExplicitCapture | RegexOptions.Compiled); |
1623 |
|
1624 |
/// <summary> |
1625 |
/// Encode any ampersands (that aren't part of an HTML entity) and left or right angle brackets |
1626 |
/// </summary> |
1627 |
private string EncodeAmpsAndAngles(string s) |
1628 |
{ |
1629 |
s = _amps.Replace(s, "&"); |
1630 |
s = _angles.Replace(s, "<"); |
1631 |
return s; |
1632 |
} |
1633 |
|
1634 |
private static Regex _backslashEscapes; |
1635 |
|
1636 |
/// <summary> |
1637 |
/// Encodes any escaped characters such as \`, \*, \[ etc |
1638 |
/// </summary> |
1639 |
private string EscapeBackslashes(string s) |
1640 |
{ |
1641 |
return _backslashEscapes.Replace(s, new MatchEvaluator(EscapeBackslashesEvaluator)); |
1642 |
} |
1643 |
private string EscapeBackslashesEvaluator(Match match) |
1644 |
{ |
1645 |
return _backslashEscapeTable[match.Value]; |
1646 |
} |
1647 |
|
1648 |
private static Regex _unescapes = new Regex("\x1A" + "E\\d+E", RegexOptions.Compiled); |
1649 |
|
1650 |
/// <summary> |
1651 |
/// swap back in all the special characters we've hidden |
1652 |
/// </summary> |
1653 |
private string Unescape(string s) |
1654 |
{ |
1655 |
return _unescapes.Replace(s, new MatchEvaluator(UnescapeEvaluator)); |
1656 |
} |
1657 |
private string UnescapeEvaluator(Match match) |
1658 |
{ |
1659 |
return _invertedEscapeTable[match.Value]; |
1660 |
} |
1661 |
|
1662 |
|
1663 |
/// <summary> |
1664 |
/// escapes Bold [ * ] and Italic [ _ ] characters |
1665 |
/// </summary> |
1666 |
private string EscapeBoldItalic(string s) |
1667 |
{ |
1668 |
s = s.Replace("*", _escapeTable["*"]); |
1669 |
s = s.Replace("_", _escapeTable["_"]); |
1670 |
return s; |
1671 |
} |
1672 |
|
1673 |
private static string AttributeEncode(string s) |
1674 |
{ |
1675 |
return s.Replace(">", ">").Replace("<", "<").Replace("\"", """); |
1676 |
} |
1677 |
|
1678 |
private static char[] _problemUrlChars = @"""'*()[]$:".ToCharArray(); |
1679 |
|
1680 |
/// <summary> |
1681 |
/// hex-encodes some unusual "problem" chars in URLs to avoid URL detection problems |
1682 |
/// </summary> |
1683 |
private string EncodeProblemUrlChars(string url) |
1684 |
{ |
1685 |
if (!_encodeProblemUrlCharacters) return url; |
1686 |
|
1687 |
var sb = new StringBuilder(url.Length); |
1688 |
bool encode; |
1689 |
char c; |
1690 |
|
1691 |
for (int i = 0; i < url.Length; i++) |
1692 |
{ |
1693 |
c = url[i]; |
1694 |
encode = Array.IndexOf(_problemUrlChars, c) != -1; |
1695 |
if (encode && c == ':' && i < url.Length - 1) |
1696 |
encode = !(url[i + 1] == '/') && !(url[i + 1] >= '0' && url[i + 1] <= '9'); |
1697 |
|
1698 |
if (encode) |
1699 |
sb.Append("%" + String.Format("{0:x}", (byte)c)); |
1700 |
else |
1701 |
sb.Append(c); |
1702 |
} |
1703 |
|
1704 |
return sb.ToString(); |
1705 |
} |
1706 |
|
1707 |
|
1708 |
/// <summary> |
1709 |
/// Within tags -- meaning between < and > -- encode [\ ` * _] so they |
1710 |
/// don't conflict with their use in Markdown for code, italics and strong. |
1711 |
/// We're replacing each such character with its corresponding hash |
1712 |
/// value; this is likely overkill, but it should prevent us from colliding |
1713 |
/// with the escape values by accident. |
1714 |
/// </summary> |
1715 |
private string EscapeSpecialCharsWithinTagAttributes(string text) |
1716 |
{ |
1717 |
var tokens = TokenizeHTML(text); |
1718 |
|
1719 |
// now, rebuild text from the tokens |
1720 |
var sb = new StringBuilder(text.Length); |
1721 |
|
1722 |
foreach (var token in tokens) |
1723 |
{ |
1724 |
string value = token.Value; |
1725 |
|
1726 |
if (token.Type == TokenType.Tag) |
1727 |
{ |
1728 |
value = value.Replace(@"\", _escapeTable[@"\"]); |
1729 |
|
1730 |
if (_autoHyperlink && value.StartsWith("<!")) // escape slashes in comments to prevent autolinking there -- http://meta.stackoverflow.com/questions/95987/html-comment-containing-url-breaks-if-followed-by-another-html-comment |
1731 |
value = value.Replace("/", _escapeTable["/"]); |
1732 |
|
1733 |
value = Regex.Replace(value, "(?<=.)</?code>(?=.)", _escapeTable[@"`"]); |
1734 |
value = EscapeBoldItalic(value); |
1735 |
} |
1736 |
|
1737 |
sb.Append(value); |
1738 |
} |
1739 |
|
1740 |
return sb.ToString(); |
1741 |
} |
1742 |
|
1743 |
/// <summary> |
1744 |
/// convert all tabs to _tabWidth spaces; |
1745 |
/// standardizes line endings from DOS (CR LF) or Mac (CR) to UNIX (LF); |
1746 |
/// makes sure text ends with a couple of newlines; |
1747 |
/// removes any blank lines (only spaces) in the text |
1748 |
/// </summary> |
1749 |
private string Normalize(string text) |
1750 |
{ |
1751 |
var output = new StringBuilder(text.Length); |
1752 |
var line = new StringBuilder(); |
1753 |
bool valid = false; |
1754 |
|
1755 |
for (int i = 0; i < text.Length; i++) |
1756 |
{ |
1757 |
switch (text[i]) |
1758 |
{ |
1759 |
case '\n': |
1760 |
if (valid) output.Append(line); |
1761 |
output.Append('\n'); |
1762 |
line.Length = 0; valid = false; |
1763 |
break; |
1764 |
case '\r': |
1765 |
if ((i < text.Length - 1) && (text[i + 1] != '\n')) |
1766 |
{ |
1767 |
if (valid) output.Append(line); |
1768 |
output.Append('\n'); |
1769 |
line.Length = 0; valid = false; |
1770 |
} |
1771 |
break; |
1772 |
case '\t': |
1773 |
int width = (_tabWidth - line.Length % _tabWidth); |
1774 |
for (int k = 0; k < width; k++) |
1775 |
line.Append(' '); |
1776 |
break; |
1777 |
case '\x1A': |
1778 |
break; |
1779 |
default: |
1780 |
if (!valid && text[i] != ' ') valid = true; |
1781 |
line.Append(text[i]); |
1782 |
break; |
1783 |
} |
1784 |
} |
1785 |
|
1786 |
if (valid) output.Append(line); |
1787 |
output.Append('\n'); |
1788 |
|
1789 |
// add two newlines to the end before return |
1790 |
return output.Append("\n\n").ToString(); |
1791 |
} |
1792 |
|
1793 |
#endregion |
1794 |
|
1795 |
/// <summary> |
1796 |
/// this is to emulate what's evailable in PHP |
1797 |
/// </summary> |
1798 |
private static string RepeatString(string text, int count) |
1799 |
{ |
1800 |
var sb = new StringBuilder(text.Length * count); |
1801 |
for (int i = 0; i < count; i++) |
1802 |
sb.Append(text); |
1803 |
return sb.ToString(); |
1804 |
} |
1805 |
|
1806 |
} |
1807 |
} |