프로젝트

일반

사용자정보

통계
| 브랜치(Branch): | 개정판:

markus / MarkusAutoUpdate / src / NetSparkle / Libraries / MarkdownSharp.cs @ 8e3d58f3

이력 | 보기 | 이력해설 | 다운로드 (69.7 KB)

1
/*
2
 * MarkdownSharp
3
 * -------------
4
 * a C# Markdown processor
5
 *
6
 * Markdown is a text-to-HTML conversion tool for web writers
7
 * Copyright (c) 2004 John Gruber
8
 * http://daringfireball.net/projects/markdown/
9
 *
10
 * Markdown.NET
11
 * Copyright (c) 2004-2009 Milan Negovan
12
 * http://www.aspnetresources.com
13
 * http://aspnetresources.com/blog/markdown_announced.aspx
14
 *
15
 * MarkdownSharp
16
 * Copyright (c) 2009-2011 Jeff Atwood
17
 * http://stackoverflow.com
18
 * http://www.codinghorror.com/blog/
19
 * http://code.google.com/p/markdownsharp/
20
 *
21
 * History: Milan ported the Markdown processor to C#. He granted license to me so I can open source it
22
 * and let the community contribute to and improve MarkdownSharp.
23
 *
24
 */
25

    
26
#region Copyright and license
27

    
28
/*
29

    
30
Copyright (c) 2009 - 2010 Jeff Atwood
31

    
32
http://www.opensource.org/licenses/mit-license.php
33
  
34
Permission is hereby granted, free of charge, to any person obtaining a copy
35
of this software and associated documentation files (the "Software"), to deal
36
in the Software without restriction, including without limitation the rights
37
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
38
copies of the Software, and to permit persons to whom the Software is
39
furnished to do so, subject to the following conditions:
40

    
41
The above copyright notice and this permission notice shall be included in
42
all copies or substantial portions of the Software.
43

    
44
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
45
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
46
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
47
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
48
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
49
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
50
THE SOFTWARE.
51

    
52
Copyright (c) 2003-2004 John Gruber
53
<http://daringfireball.net/>   
54
All rights reserved.
55

    
56
Redistribution and use in source and binary forms, with or without
57
modification, are permitted provided that the following conditions are
58
met:
59

    
60
* Redistributions of source code must retain the above copyright notice,
61
  this list of conditions and the following disclaimer.
62

    
63
* Redistributions in binary form must reproduce the above copyright
64
  notice, this list of conditions and the following disclaimer in the
65
  documentation and/or other materials provided with the distribution.
66

    
67
* Neither the name "Markdown" nor the names of its contributors may
68
  be used to endorse or promote products derived from this software
69
  without specific prior written permission.
70

    
71
This software is provided by the copyright holders and contributors "as
72
is" and any express or implied warranties, including, but not limited
73
to, the implied warranties of merchantability and fitness for a
74
particular purpose are disclaimed. In no event shall the copyright owner
75
or contributors be liable for any direct, indirect, incidental, special,
76
exemplary, or consequential damages (including, but not limited to,
77
procurement of substitute goods or services; loss of use, data, or
78
profits; or business interruption) however caused and on any theory of
79
liability, whether in contract, strict liability, or tort (including
80
negligence or otherwise) arising in any way out of the use of this
81
software, even if advised of the possibility of such damage.
82
*/
83

    
84
#endregion
85

    
86
using System;
87
using System.Collections.Generic;
88
using System.Configuration;
89
using System.Text;
90
using System.Text.RegularExpressions;
91

    
92
namespace MarkdownSharp
93
{
94

    
95
    /// <summary>
96
    /// 
97
    /// </summary>
98
    public class MarkdownOptions
99
    {
100
        /// <summary>
101
        /// when true, (most) bare plain URLs are auto-hyperlinked  
102
        /// WARNING: this is a significant deviation from the markdown spec
103
        /// </summary>
104
        public bool AutoHyperlink { get; set; }
105
        /// <summary>
106
        /// when true, RETURN becomes a literal newline  
107
        /// WARNING: this is a significant deviation from the markdown spec
108
        /// </summary>
109
        public bool AutoNewlines { get; set; }
110
        /// <summary>
111
        /// use ">" for HTML output, or " />" for XHTML output
112
        /// </summary>
113
        public string EmptyElementSuffix { get; set; }
114
        /// <summary>
115
        /// when true, problematic URL characters like [, ], (, and so forth will be encoded
116
        /// WARNING: this is a significant deviation from the markdown spec
117
        /// </summary>
118
        public bool EncodeProblemUrlCharacters { get; set; }
119
        /// <summary>
120
        /// when false, email addresses will never be auto-linked  
121
        /// WARNING: this is a significant deviation from the markdown spec
122
        /// </summary>
123
        public bool LinkEmails { get; set; }
124
        /// <summary>
125
        /// when true, bold and italic require non-word characters on either side  
126
        /// WARNING: this is a significant deviation from the markdown spec
127
        /// </summary>
128
        public bool StrictBoldItalic { get; set; }
129
    }
130

    
131

    
132
    /// <summary>
133
    /// Markdown is a text-to-HTML conversion tool for web writers.
134
    /// Markdown allows you to write using an easy-to-read, easy-to-write plain text format,
135
    /// then convert it to structurally valid XHTML (or HTML).
136
    /// </summary>
137
    public class Markdown
138
    {
139
        private const string _version = "1.13";
140

    
141
        #region Constructors and Options
142

    
143
        /// <summary>
144
        /// Create a new Markdown instance using default options
145
        /// </summary>
146
        public Markdown()
147
            : this(false)
148
        {
149
        }
150

    
151
        /// <summary>
152
        /// Create a new Markdown instance and optionally load options from a configuration
153
        /// file. There they should be stored in the appSettings section, available options are:
154
        ///
155
        ///     Markdown.StrictBoldItalic (true/false)
156
        ///     Markdown.EmptyElementSuffix (">" or " />" without the quotes)
157
        ///     Markdown.LinkEmails (true/false)
158
        ///     Markdown.AutoNewLines (true/false)
159
        ///     Markdown.AutoHyperlink (true/false)
160
        ///     Markdown.EncodeProblemUrlCharacters (true/false)
161
        ///     
162
        /// </summary>
163
        public Markdown(bool loadOptionsFromConfigFile)
164
        {
165
            if (!loadOptionsFromConfigFile) return;
166
//
167
//            var settings = ConfigurationManager.AppSettings;
168
//            foreach (string key in settings.Keys)
169
//            {
170
//                switch (key)
171
//                {
172
//                    case "Markdown.AutoHyperlink":
173
//                        _autoHyperlink = Convert.ToBoolean(settings[key]);
174
//                        break;
175
//                    case "Markdown.AutoNewlines":
176
//                        _autoNewlines = Convert.ToBoolean(settings[key]);
177
//                        break;
178
//                    case "Markdown.EmptyElementSuffix":
179
//                        _emptyElementSuffix = settings[key];
180
//                        break;
181
//                    case "Markdown.EncodeProblemUrlCharacters":
182
//                        _encodeProblemUrlCharacters = Convert.ToBoolean(settings[key]);
183
//                        break;
184
//                    case "Markdown.LinkEmails":
185
//                        _linkEmails = Convert.ToBoolean(settings[key]);
186
//                        break;
187
//                    case "Markdown.StrictBoldItalic":
188
//                        _strictBoldItalic = Convert.ToBoolean(settings[key]);
189
//                        break;
190
//                }
191
//            }
192
        }
193

    
194
        /// <summary>
195
        /// Create a new Markdown instance and set the options from the MarkdownOptions object.
196
        /// </summary>
197
        public Markdown(MarkdownOptions options)
198
        {
199
            _autoHyperlink = options.AutoHyperlink;
200
            _autoNewlines = options.AutoNewlines;
201
            _emptyElementSuffix = options.EmptyElementSuffix;
202
            _encodeProblemUrlCharacters = options.EncodeProblemUrlCharacters;
203
            _linkEmails = options.LinkEmails;
204
            _strictBoldItalic = options.StrictBoldItalic;
205
        }
206

    
207

    
208
        /// <summary>
209
        /// use ">" for HTML output, or " />" for XHTML output
210
        /// </summary>
211
        public string EmptyElementSuffix
212
        {
213
            get { return _emptyElementSuffix; }
214
            set { _emptyElementSuffix = value; }
215
        }
216
        private string _emptyElementSuffix = " />";
217

    
218
        /// <summary>
219
        /// when false, email addresses will never be auto-linked  
220
        /// WARNING: this is a significant deviation from the markdown spec
221
        /// </summary>
222
        public bool LinkEmails
223
        {
224
            get { return _linkEmails; }
225
            set { _linkEmails = value; }
226
        }
227
        private bool _linkEmails = true;
228

    
229
        /// <summary>
230
        /// when true, bold and italic require non-word characters on either side  
231
        /// WARNING: this is a significant deviation from the markdown spec
232
        /// </summary>
233
        public bool StrictBoldItalic
234
        {
235
            get { return _strictBoldItalic; }
236
            set { _strictBoldItalic = value; }
237
        }
238
        private bool _strictBoldItalic = false;
239

    
240
        /// <summary>
241
        /// when true, RETURN becomes a literal newline  
242
        /// WARNING: this is a significant deviation from the markdown spec
243
        /// </summary>
244
        public bool AutoNewLines
245
        {
246
            get { return _autoNewlines; }
247
            set { _autoNewlines = value; }
248
        }
249
        private bool _autoNewlines = false;
250

    
251
        /// <summary>
252
        /// when true, (most) bare plain URLs are auto-hyperlinked  
253
        /// WARNING: this is a significant deviation from the markdown spec
254
        /// </summary>
255
        public bool AutoHyperlink
256
        {
257
            get { return _autoHyperlink; }
258
            set { _autoHyperlink = value; }
259
        }
260
        private bool _autoHyperlink = false;
261

    
262
        /// <summary>
263
        /// when true, problematic URL characters like [, ], (, and so forth will be encoded
264
        /// WARNING: this is a significant deviation from the markdown spec
265
        /// </summary>
266
        public bool EncodeProblemUrlCharacters
267
        {
268
            get { return _encodeProblemUrlCharacters; }
269
            set { _encodeProblemUrlCharacters = value; }
270
        }
271
        private bool _encodeProblemUrlCharacters = false;
272

    
273
        #endregion
274

    
275
        private enum TokenType { Text, Tag }
276

    
277
        private struct Token
278
        {
279
            public Token(TokenType type, string value)
280
            {
281
                this.Type = type;
282
                this.Value = value;
283
            }
284
            public TokenType Type;
285
            public string Value;
286
        }
287

    
288
        /// <summary>
289
        /// maximum nested depth of [] and () supported by the transform; implementation detail
290
        /// </summary>
291
        private const int _nestDepth = 6;
292

    
293
        /// <summary>
294
        /// Tabs are automatically converted to spaces as part of the transform  
295
        /// this constant determines how "wide" those tabs become in spaces  
296
        /// </summary>
297
        private const int _tabWidth = 4;
298

    
299
        private const string _markerUL = @"[*+-]";
300
        private const string _markerOL = @"\d+[.]";
301

    
302
        private static readonly Dictionary<string, string> _escapeTable;
303
        private static readonly Dictionary<string, string> _invertedEscapeTable;
304
        private static readonly Dictionary<string, string> _backslashEscapeTable;
305

    
306
        private readonly Dictionary<string, string> _urls = new Dictionary<string, string>();
307
        private readonly Dictionary<string, string> _titles = new Dictionary<string, string>();
308
        private readonly Dictionary<string, string> _htmlBlocks = new Dictionary<string, string>();
309

    
310
        private int _listLevel;
311
        private static string AutoLinkPreventionMarker = "\x1AP"; // temporarily replaces "://" where auto-linking shouldn't happen;
312

    
313
        /// <summary>
314
        /// In the static constuctor we'll initialize what stays the same across all transforms.
315
        /// </summary>
316
        static Markdown()
317
        {
318
            // Table of hash values for escaped characters:
319
            _escapeTable = new Dictionary<string, string>();
320
            _invertedEscapeTable = new Dictionary<string, string>();
321
            // Table of hash value for backslash escaped characters:
322
            _backslashEscapeTable = new Dictionary<string, string>();
323

    
324
            string backslashPattern = "";
325

    
326
            foreach (char c in @"\`*_{}[]()>#+-.!/")
327
            {
328
                string key = c.ToString();
329
                string hash = GetHashKey(key, isHtmlBlock: false);
330
                _escapeTable.Add(key, hash);
331
                _invertedEscapeTable.Add(hash, key);
332
                _backslashEscapeTable.Add(@"\" + key, hash);
333
                backslashPattern += Regex.Escape(@"\" + key) + "|";
334
            }
335

    
336
            _backslashEscapes = new Regex(backslashPattern.Substring(0, backslashPattern.Length - 1), RegexOptions.Compiled);
337
        }
338

    
339
        /// <summary>
340
        /// current version of MarkdownSharp;  
341
        /// see http://code.google.com/p/markdownsharp/ for the latest code or to contribute
342
        /// </summary>
343
        public string Version
344
        {
345
            get { return _version; }
346
        }
347

    
348
        /// <summary>
349
        /// Transforms the provided Markdown-formatted text to HTML;  
350
        /// see http://en.wikipedia.org/wiki/Markdown
351
        /// </summary>
352
        /// <remarks>
353
        /// The order in which other subs are called here is
354
        /// essential. Link and image substitutions need to happen before
355
        /// EscapeSpecialChars(), so that any *'s or _'s in the a
356
        /// and img tags get encoded.
357
        /// </remarks>
358
        public string Transform(string text)
359
        {
360
            if (String.IsNullOrEmpty(text)) return "";
361

    
362
            Setup();
363

    
364
            text = Normalize(text);
365

    
366
            text = HashHTMLBlocks(text);
367
            text = StripLinkDefinitions(text);
368
            text = RunBlockGamut(text);
369
            text = Unescape(text);
370

    
371
            Cleanup();
372

    
373
            return text + "\n";
374
        }
375

    
376

    
377
        /// <summary>
378
        /// Perform transformations that form block-level tags like paragraphs, headers, and list items.
379
        /// </summary>
380
        private string RunBlockGamut(string text, bool unhash = true)
381
        {
382
            text = DoHeaders(text);
383
            text = DoHorizontalRules(text);
384
            text = DoLists(text);
385
            text = DoCodeBlocks(text);
386
            text = DoBlockQuotes(text);
387

    
388
            // We already ran HashHTMLBlocks() before, in Markdown(), but that
389
            // was to escape raw HTML in the original Markdown source. This time,
390
            // we're escaping the markup we've just created, so that we don't wrap
391
            // <p> tags around block-level tags.
392
            text = HashHTMLBlocks(text);
393

    
394
            text = FormParagraphs(text, unhash: unhash);
395

    
396
            return text;
397
        }
398

    
399

    
400
        /// <summary>
401
        /// Perform transformations that occur *within* block-level tags like paragraphs, headers, and list items.
402
        /// </summary>
403
        private string RunSpanGamut(string text)
404
        {
405
            text = DoCodeSpans(text);
406
            text = EscapeSpecialCharsWithinTagAttributes(text);
407
            text = EscapeBackslashes(text);
408

    
409
            // Images must come first, because ![foo][f] looks like an anchor.
410
            text = DoImages(text);
411
            text = DoAnchors(text);
412

    
413
            // Must come after DoAnchors(), because you can use < and >
414
            // delimiters in inline links like [this](<url>).
415
            text = DoAutoLinks(text);
416

    
417
            text = text.Replace(AutoLinkPreventionMarker, "://");
418

    
419
            text = EncodeAmpsAndAngles(text);
420
            text = DoItalicsAndBold(text);
421
            text = DoHardBreaks(text);
422

    
423
            return text;
424
        }
425

    
426
        private static Regex _newlinesLeadingTrailing = new Regex(@"^\n+|\n+\z", RegexOptions.Compiled);
427
        private static Regex _newlinesMultiple = new Regex(@"\n{2,}", RegexOptions.Compiled);
428
        private static Regex _leadingWhitespace = new Regex(@"^[ ]*", RegexOptions.Compiled);
429

    
430
        private static Regex _htmlBlockHash = new Regex("\x1AH\\d+H", RegexOptions.Compiled);
431

    
432
        /// <summary>
433
        /// splits on two or more newlines, to form "paragraphs";    
434
        /// each paragraph is then unhashed (if it is a hash and unhashing isn't turned off) or wrapped in HTML p tag
435
        /// </summary>
436
        private string FormParagraphs(string text, bool unhash = true)
437
        {
438
            // split on two or more newlines
439
            string[] grafs = _newlinesMultiple.Split(_newlinesLeadingTrailing.Replace(text, ""));
440

    
441
            for (int i = 0; i < grafs.Length; i++)
442
            {
443
                if (grafs[i].StartsWith("\x1AH"))
444
                {
445
                    // unhashify HTML blocks
446
                    if (unhash)
447
                    {
448
                        int sanityCheck = 50; // just for safety, guard against an infinite loop
449
                        bool keepGoing = true; // as long as replacements where made, keep going
450
                        while (keepGoing && sanityCheck > 0)
451
                        {
452
                            keepGoing = false;
453
                            grafs[i] = _htmlBlockHash.Replace(grafs[i], match =>
454
                            {
455
                                keepGoing = true;
456
                                return _htmlBlocks[match.Value];
457
                            });
458
                            sanityCheck--;
459
                        }
460
                        /* if (keepGoing)
461
                        {
462
                            // Logging of an infinite loop goes here.
463
                            // If such a thing should happen, please open a new issue on http://code.google.com/p/markdownsharp/
464
                            // with the input that caused it.
465
                        }*/
466
                    }
467
                }
468
                else
469
                {
470
                    // do span level processing inside the block, then wrap result in <p> tags
471
                    grafs[i] = _leadingWhitespace.Replace(RunSpanGamut(grafs[i]), "<p>") + "</p>";
472
                }
473
            }
474

    
475
            return string.Join("\n\n", grafs);
476
        }
477

    
478

    
479
        private void Setup()
480
        {
481
            // Clear the global hashes. If we don't clear these, you get conflicts
482
            // from other articles when generating a page which contains more than
483
            // one article (e.g. an index page that shows the N most recent
484
            // articles):
485
            _urls.Clear();
486
            _titles.Clear();
487
            _htmlBlocks.Clear();
488
            _listLevel = 0;
489
        }
490

    
491
        private void Cleanup()
492
        {
493
            Setup();
494
        }
495

    
496
        private static string _nestedBracketsPattern;
497

    
498
        /// <summary>
499
        /// Reusable pattern to match balanced [brackets]. See Friedl's
500
        /// "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
501
        /// </summary>
502
        private static string GetNestedBracketsPattern()
503
        {
504
            // in other words [this] and [this[also]] and [this[also[too]]]
505
            // up to _nestDepth
506
            if (_nestedBracketsPattern == null)
507
                _nestedBracketsPattern =
508
                    RepeatString(@"
509
                    (?>              # Atomic matching
510
                       [^\[\]]+      # Anything other than brackets
511
                     |
512
                       \[
513
                           ", _nestDepth) + RepeatString(
514
                    @" \]
515
                    )*"
516
                    , _nestDepth);
517
            return _nestedBracketsPattern;
518
        }
519

    
520
        private static string _nestedParensPattern;
521

    
522
        /// <summary>
523
        /// Reusable pattern to match balanced (parens). See Friedl's
524
        /// "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
525
        /// </summary>
526
        private static string GetNestedParensPattern()
527
        {
528
            // in other words (this) and (this(also)) and (this(also(too)))
529
            // up to _nestDepth
530
            if (_nestedParensPattern == null)
531
                _nestedParensPattern =
532
                    RepeatString(@"
533
                    (?>              # Atomic matching
534
                       [^()\s]+      # Anything other than parens or whitespace
535
                     |
536
                       \(
537
                           ", _nestDepth) + RepeatString(
538
                    @" \)
539
                    )*"
540
                    , _nestDepth);
541
            return _nestedParensPattern;
542
        }
543

    
544
        private static Regex _linkDef = new Regex(string.Format(@"
545
                        ^[ ]{{0,{0}}}\[(.+)\]:  # id = $1
546
                          [ ]*
547
                          \n?                   # maybe *one* newline
548
                          [ ]*
549
                        <?(\S+?)>?              # url = $2
550
                          [ ]*
551
                          \n?                   # maybe one newline
552
                          [ ]*
553
                        (?:
554
                            (?<=\s)             # lookbehind for whitespace
555
                            [""(]
556
                            (.+?)               # title = $3
557
                            ["")]
558
                            [ ]*
559
                        )?                      # title is optional
560
                        (?:\n+|\Z)", _tabWidth - 1), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
561

    
562
        /// <summary>
563
        /// Strips link definitions from text, stores the URLs and titles in hash references.
564
        /// </summary>
565
        /// <remarks>
566
        /// ^[id]: url "optional title"
567
        /// </remarks>
568
        private string StripLinkDefinitions(string text)
569
        {
570
            return _linkDef.Replace(text, new MatchEvaluator(LinkEvaluator));
571
        }
572

    
573
        private string LinkEvaluator(Match match)
574
        {
575
            string linkID = match.Groups[1].Value.ToLowerInvariant();
576
            _urls[linkID] = EncodeAmpsAndAngles(match.Groups[2].Value);
577

    
578
            if (match.Groups[3] != null && match.Groups[3].Length > 0)
579
                _titles[linkID] = match.Groups[3].Value.Replace("\"", "&quot;");
580

    
581
            return "";
582
        }
583

    
584
        // compiling this monster regex results in worse performance. trust me.
585
        private static Regex _blocksHtml = new Regex(GetBlockPattern(), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
586

    
587

    
588
        /// <summary>
589
        /// derived pretty much verbatim from PHP Markdown
590
        /// </summary>
591
        private static string GetBlockPattern()
592
        {
593

    
594
            // Hashify HTML blocks:
595
            // We only want to do this for block-level HTML tags, such as headers,
596
            // lists, and tables. That's because we still want to wrap <p>s around
597
            // "paragraphs" that are wrapped in non-block-level tags, such as anchors,
598
            // phrase emphasis, and spans. The list of tags we're looking for is
599
            // hard-coded:
600
            //
601
            // *  List "a" is made of tags which can be both inline or block-level.
602
            //    These will be treated block-level when the start tag is alone on
603
            //    its line, otherwise they're not matched here and will be taken as
604
            //    inline later.
605
            // *  List "b" is made of tags which are always block-level;
606
            //
607
            string blockTagsA = "ins|del";
608
            string blockTagsB = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|script|noscript|form|fieldset|iframe|math";
609

    
610
            // Regular expression for the content of a block tag.
611
            string attr = @"
612
            (?>                    # optional tag attributes
613
              \s                  # starts with whitespace
614
              (?>
615
                [^>""/]+              # text outside quotes
616
              |
617
                /+(?!>)                # slash not followed by >
618
              |
619
                ""[^""]*""            # text inside double quotes (tolerate >)
620
              |
621
                '[^']*'                  # text inside single quotes (tolerate >)
622
              )*
623
            )?  
624
            ";
625

    
626
            string content = RepeatString(@"
627
                (?>
628
                  [^<]+              # content without tag
629
                |
630
                  <\2              # nested opening tag
631
                    " + attr + @"       # attributes
632
                  (?>
633
                      />
634
                  |
635
                      >", _nestDepth) +   // end of opening tag
636
                      ".*?" +             // last level nested tag content
637
            RepeatString(@"
638
                      </\2\s*>          # closing nested tag
639
                  )
640
                  |        
641
                  <(?!/\2\s*>           # other tags with a different name
642
                  )
643
                )*", _nestDepth);
644

    
645
            string content2 = content.Replace(@"\2", @"\3");
646

    
647
            // First, look for nested blocks, e.g.:
648
            //   <div>
649
            //     <div>
650
            //     tags for inner block must be indented.
651
            //     </div>
652
            //   </div>
653
            //
654
            // The outermost tags must start at the left margin for this to match, and
655
            // the inner nested divs must be indented.
656
            // We need to do this before the next, more liberal match, because the next
657
            // match will start at the first `<div>` and stop at the first `</div>`.
658
            string pattern = @"
659
            (?>
660
                  (?>
661
                    (?<=\n)     # Starting at the beginning of a line
662
                    |           # or
663
                    \A\n?       # the beginning of the doc
664
                  )
665
                  (             # save in $1
666

    
667
                    # Match from `\n<tag>` to `</tag>\n`, handling nested tags
668
                    # in between.
669
                      
670
                        <($block_tags_b_re)   # start tag = $2
671
                        $attr>                # attributes followed by > and \n
672
                        $content              # content, support nesting
673
                        </\2>                 # the matching end tag
674
                        [ ]*                  # trailing spaces
675
                        (?=\n+|\Z)            # followed by a newline or end of document
676

    
677
                  | # Special version for tags of group a.
678

    
679
                        <($block_tags_a_re)   # start tag = $3
680
                        $attr>[ ]*\n          # attributes followed by >
681
                        $content2             # content, support nesting
682
                        </\3>                 # the matching end tag
683
                        [ ]*                  # trailing spaces
684
                        (?=\n+|\Z)            # followed by a newline or end of document
685
                      
686
                  | # Special case just for <hr />. It was easier to make a special
687
                    # case than to make the other regex more complicated.
688
                  
689
                        [ ]{0,$less_than_tab}
690
                        <hr
691
                        $attr                 # attributes
692
                        /?>                   # the matching end tag
693
                        [ ]*
694
                        (?=\n{2,}|\Z)         # followed by a blank line or end of document
695
                  
696
                  | # Special case for standalone HTML comments:
697
                  
698
                      (?<=\n\n|\A)            # preceded by a blank line or start of document
699
                      [ ]{0,$less_than_tab}
700
                      (?s:
701
                        <!--(?:|(?:[^>-]|-[^>])(?:[^-]|-[^-])*)-->
702
                      )
703
                      [ ]*
704
                      (?=\n{2,}|\Z)            # followed by a blank line or end of document
705
                  
706
                  | # PHP and ASP-style processor instructions (<? and <%)
707
                  
708
                      [ ]{0,$less_than_tab}
709
                      (?s:
710
                        <([?%])                # $4
711
                        .*?
712
                        \4>
713
                      )
714
                      [ ]*
715
                      (?=\n{2,}|\Z)            # followed by a blank line or end of document
716
                      
717
                  )
718
            )";
719

    
720
            pattern = pattern.Replace("$less_than_tab", (_tabWidth - 1).ToString());
721
            pattern = pattern.Replace("$block_tags_b_re", blockTagsB);
722
            pattern = pattern.Replace("$block_tags_a_re", blockTagsA);
723
            pattern = pattern.Replace("$attr", attr);
724
            pattern = pattern.Replace("$content2", content2);
725
            pattern = pattern.Replace("$content", content);
726

    
727
            return pattern;
728
        }
729

    
730
        /// <summary>
731
        /// replaces any block-level HTML blocks with hash entries
732
        /// </summary>
733
        private string HashHTMLBlocks(string text)
734
        {
735
            return _blocksHtml.Replace(text, new MatchEvaluator(HtmlEvaluator));
736
        }
737

    
738
        private string HtmlEvaluator(Match match)
739
        {
740
            string text = match.Groups[1].Value;
741
            string key = GetHashKey(text, isHtmlBlock: true);
742
            _htmlBlocks[key] = text;
743

    
744
            return string.Concat("\n\n", key, "\n\n");
745
        }
746

    
747
        private static string GetHashKey(string s, bool isHtmlBlock)
748
        {
749
            var delim = isHtmlBlock ? 'H' : 'E';
750
            return "\x1A" + delim + Math.Abs(s.GetHashCode()).ToString() + delim;
751
        }
752

    
753
        private static Regex _htmlTokens = new Regex(@"
754
            (<!--(?:|(?:[^>-]|-[^>])(?:[^-]|-[^-])*)-->)|        # match <!-- foo -->
755
            (<\?.*?\?>)|                 # match <?foo?> " +
756
            RepeatString(@"
757
            (<[A-Za-z\/!$](?:[^<>]|", _nestDepth) + RepeatString(@")*>)", _nestDepth) +
758
                                       " # match <tag> and </tag>",
759
            RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
760

    
761
        /// <summary>
762
        /// returns an array of HTML tokens comprising the input string. Each token is
763
        /// either a tag (possibly with nested, tags contained therein, such
764
        /// as &lt;a href="&lt;MTFoo&gt;"&gt;, or a run of text between tags. Each element of the
765
        /// array is a two-element array; the first is either 'tag' or 'text'; the second is
766
        /// the actual value.
767
        /// </summary>
768
        private List<Token> TokenizeHTML(string text)
769
        {
770
            int pos = 0;
771
            int tagStart = 0;
772
            var tokens = new List<Token>();
773

    
774
            // this regex is derived from the _tokenize() subroutine in Brad Choate's MTRegex plugin.
775
            // http://www.bradchoate.com/past/mtregex.php
776
            foreach (Match m in _htmlTokens.Matches(text))
777
            {
778
                tagStart = m.Index;
779

    
780
                if (pos < tagStart)
781
                    tokens.Add(new Token(TokenType.Text, text.Substring(pos, tagStart - pos)));
782

    
783
                tokens.Add(new Token(TokenType.Tag, m.Value));
784
                pos = tagStart + m.Length;
785
            }
786

    
787
            if (pos < text.Length)
788
                tokens.Add(new Token(TokenType.Text, text.Substring(pos, text.Length - pos)));
789

    
790
            return tokens;
791
        }
792

    
793

    
794
        private static Regex _anchorRef = new Regex(string.Format(@"
795
            (                               # wrap whole match in $1
796
                \[
797
                    ({0})                   # link text = $2
798
                \]
799

    
800
                [ ]?                        # one optional space
801
                (?:\n[ ]*)?                 # one optional newline followed by spaces
802

    
803
                \[
804
                    (.*?)                   # id = $3
805
                \]
806
            )", GetNestedBracketsPattern()), RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
807

    
808
        private static Regex _anchorInline = new Regex(string.Format(@"
809
                (                           # wrap whole match in $1
810
                    \[
811
                        ({0})               # link text = $2
812
                    \]
813
                    \(                      # literal paren
814
                        [ ]*
815
                        ({1})               # href = $3
816
                        [ ]*
817
                        (                   # $4
818
                        (['""])           # quote char = $5
819
                        (.*?)               # title = $6
820
                        \5                  # matching quote
821
                        [ ]*                # ignore any spaces between closing quote and )
822
                        )?                  # title is optional
823
                    \)
824
                )", GetNestedBracketsPattern(), GetNestedParensPattern()),
825
                  RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
826

    
827
        private static Regex _anchorRefShortcut = new Regex(@"
828
            (                               # wrap whole match in $1
829
              \[
830
                 ([^\[\]]+)                 # link text = $2; can't contain [ or ]
831
              \]
832
            )", RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
833

    
834
        /// <summary>
835
        /// Turn Markdown link shortcuts into HTML anchor tags
836
        /// </summary>
837
        /// <remarks>
838
        /// [link text](url "title")
839
        /// [link text][id]
840
        /// [id]
841
        /// </remarks>
842
        private string DoAnchors(string text)
843
        {
844
            // First, handle reference-style links: [link text] [id]
845
            text = _anchorRef.Replace(text, new MatchEvaluator(AnchorRefEvaluator));
846

    
847
            // Next, inline-style links: [link text](url "optional title") or [link text](url "optional title")
848
            text = _anchorInline.Replace(text, new MatchEvaluator(AnchorInlineEvaluator));
849

    
850
            //  Last, handle reference-style shortcuts: [link text]
851
            //  These must come last in case you've also got [link test][1]
852
            //  or [link test](/foo)
853
            text = _anchorRefShortcut.Replace(text, new MatchEvaluator(AnchorRefShortcutEvaluator));
854
            return text;
855
        }
856

    
857
        private string SaveFromAutoLinking(string s)
858
        {
859
            return s.Replace("://", AutoLinkPreventionMarker);
860
        }
861

    
862
        private string AnchorRefEvaluator(Match match)
863
        {
864
            string wholeMatch = match.Groups[1].Value;
865
            string linkText = SaveFromAutoLinking(match.Groups[2].Value);
866
            string linkID = match.Groups[3].Value.ToLowerInvariant();
867

    
868
            string result;
869

    
870
            // for shortcut links like [this][].
871
            if (linkID == "")
872
                linkID = linkText.ToLowerInvariant();
873

    
874
            if (_urls.ContainsKey(linkID))
875
            {
876
                string url = _urls[linkID];
877

    
878
                url = EncodeProblemUrlChars(url);
879
                url = EscapeBoldItalic(url);
880
                result = "<a href=\"" + url + "\"";
881

    
882
                if (_titles.ContainsKey(linkID))
883
                {
884
                    string title = AttributeEncode(_titles[linkID]);
885
                    title = AttributeEncode(EscapeBoldItalic(title));
886
                    result += " title=\"" + title + "\"";
887
                }
888

    
889
                result += ">" + linkText + "</a>";
890
            }
891
            else
892
                result = wholeMatch;
893

    
894
            return result;
895
        }
896

    
897
        private string AnchorRefShortcutEvaluator(Match match)
898
        {
899
            string wholeMatch = match.Groups[1].Value;
900
            string linkText = SaveFromAutoLinking(match.Groups[2].Value);
901
            string linkID = Regex.Replace(linkText.ToLowerInvariant(), @"[ ]*\n[ ]*", " ");  // lower case and remove newlines / extra spaces
902

    
903
            string result;
904

    
905
            if (_urls.ContainsKey(linkID))
906
            {
907
                string url = _urls[linkID];
908

    
909
                url = EncodeProblemUrlChars(url);
910
                url = EscapeBoldItalic(url);
911
                result = "<a href=\"" + url + "\"";
912

    
913
                if (_titles.ContainsKey(linkID))
914
                {
915
                    string title = AttributeEncode(_titles[linkID]);
916
                    title = EscapeBoldItalic(title);
917
                    result += " title=\"" + title + "\"";
918
                }
919

    
920
                result += ">" + linkText + "</a>";
921
            }
922
            else
923
                result = wholeMatch;
924

    
925
            return result;
926
        }
927

    
928

    
929
        private string AnchorInlineEvaluator(Match match)
930
        {
931
            string linkText = SaveFromAutoLinking(match.Groups[2].Value);
932
            string url = match.Groups[3].Value;
933
            string title = match.Groups[6].Value;
934
            string result;
935

    
936
            url = EncodeProblemUrlChars(url);
937
            url = EscapeBoldItalic(url);
938
            if (url.StartsWith("<") && url.EndsWith(">"))
939
                url = url.Substring(1, url.Length - 2); // remove <>'s surrounding URL, if present            
940

    
941
            result = string.Format("<a href=\"{0}\"", url);
942

    
943
            if (!String.IsNullOrEmpty(title))
944
            {
945
                title = AttributeEncode(title);
946
                title = EscapeBoldItalic(title);
947
                result += string.Format(" title=\"{0}\"", title);
948
            }
949

    
950
            result += string.Format(">{0}</a>", linkText);
951
            return result;
952
        }
953

    
954
        private static Regex _imagesRef = new Regex(@"
955
                    (               # wrap whole match in $1
956
                    !\[
957
                        (.*?)       # alt text = $2
958
                    \]
959

    
960
                    [ ]?            # one optional space
961
                    (?:\n[ ]*)?     # one optional newline followed by spaces
962

    
963
                    \[
964
                        (.*?)       # id = $3
965
                    \]
966

    
967
                    )", RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
968

    
969
        private static Regex _imagesInline = new Regex(String.Format(@"
970
              (                     # wrap whole match in $1
971
                !\[
972
                    (.*?)           # alt text = $2
973
                \]
974
                \s?                 # one optional whitespace character
975
                \(                  # literal paren
976
                    [ ]*
977
                    ({0})           # href = $3
978
                    [ ]*
979
                    (               # $4
980
                    (['""])       # quote char = $5
981
                    (.*?)           # title = $6
982
                    \5              # matching quote
983
                    [ ]*
984
                    )?              # title is optional
985
                \)
986
              )", GetNestedParensPattern()),
987
                  RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
988

    
989
        /// <summary>
990
        /// Turn Markdown image shortcuts into HTML img tags.
991
        /// </summary>
992
        /// <remarks>
993
        /// ![alt text][id]
994
        /// ![alt text](url "optional title")
995
        /// </remarks>
996
        private string DoImages(string text)
997
        {
998
            // First, handle reference-style labeled images: ![alt text][id]
999
            text = _imagesRef.Replace(text, new MatchEvaluator(ImageReferenceEvaluator));
1000

    
1001
            // Next, handle inline images:  ![alt text](url "optional title")
1002
            // Don't forget: encode * and _
1003
            text = _imagesInline.Replace(text, new MatchEvaluator(ImageInlineEvaluator));
1004

    
1005
            return text;
1006
        }
1007

    
1008
        // This prevents the creation of horribly broken HTML when some syntax ambiguities
1009
        // collide. It likely still doesn't do what the user meant, but at least we're not
1010
        // outputting garbage.
1011
        private string EscapeImageAltText(string s)
1012
        {
1013
            s = EscapeBoldItalic(s);
1014
            s = Regex.Replace(s, @"[\[\]()]", m => _escapeTable[m.ToString()]);
1015
            return s;
1016
        }
1017

    
1018
        private string ImageReferenceEvaluator(Match match)
1019
        {
1020
            string wholeMatch = match.Groups[1].Value;
1021
            string altText = match.Groups[2].Value;
1022
            string linkID = match.Groups[3].Value.ToLowerInvariant();
1023

    
1024
            // for shortcut links like ![this][].
1025
            if (linkID == "")
1026
                linkID = altText.ToLowerInvariant();
1027

    
1028
            if (_urls.ContainsKey(linkID))
1029
            {
1030
                string url = _urls[linkID];
1031
                string title = null;
1032

    
1033
                if (_titles.ContainsKey(linkID))
1034
                    title = _titles[linkID];
1035

    
1036
                return ImageTag(url, altText, title);
1037
            }
1038
            else
1039
            {
1040
                // If there's no such link ID, leave intact:
1041
                return wholeMatch;
1042
            }
1043
        }
1044

    
1045
        private string ImageInlineEvaluator(Match match)
1046
        {
1047
            string alt = match.Groups[2].Value;
1048
            string url = match.Groups[3].Value;
1049
            string title = match.Groups[6].Value;
1050

    
1051
            if (url.StartsWith("<") && url.EndsWith(">"))
1052
                url = url.Substring(1, url.Length - 2);    // Remove <>'s surrounding URL, if present
1053

    
1054
            return ImageTag(url, alt, title);
1055
        }
1056

    
1057
        private string ImageTag(string url, string altText, string title)
1058
        {
1059
            altText = EscapeImageAltText(AttributeEncode(altText));
1060
            url = EncodeProblemUrlChars(url);
1061
            url = EscapeBoldItalic(url);
1062
            var result = string.Format("<img src=\"{0}\" alt=\"{1}\"", url, altText);
1063
            if (!String.IsNullOrEmpty(title))
1064
            {
1065
                title = AttributeEncode(EscapeBoldItalic(title));
1066
                result += string.Format(" title=\"{0}\"", title);
1067
            }
1068
            result += _emptyElementSuffix;
1069
            return result;
1070
        }
1071

    
1072
        private static Regex _headerSetext = new Regex(@"
1073
                ^(.+?)
1074
                [ ]*
1075
                \n
1076
                (=+|-+)     # $1 = string of ='s or -'s
1077
                [ ]*
1078
                \n+",
1079
            RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
1080

    
1081
        private static Regex _headerAtx = new Regex(@"
1082
                ^(\#{1,6})  # $1 = string of #'s
1083
                [ ]*
1084
                (.+?)       # $2 = Header text
1085
                [ ]*
1086
                \#*         # optional closing #'s (not counted)
1087
                \n+",
1088
            RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
1089

    
1090
        /// <summary>
1091
        /// Turn Markdown headers into HTML header tags
1092
        /// </summary>
1093
        /// <remarks>
1094
        /// Header 1  
1095
        /// ========  
1096
        ///
1097
        /// Header 2  
1098
        /// --------  
1099
        ///
1100
        /// # Header 1  
1101
        /// ## Header 2  
1102
        /// ## Header 2 with closing hashes ##  
1103
        /// ...  
1104
        /// ###### Header 6  
1105
        /// </remarks>
1106
        private string DoHeaders(string text)
1107
        {
1108
            text = _headerSetext.Replace(text, new MatchEvaluator(SetextHeaderEvaluator));
1109
            text = _headerAtx.Replace(text, new MatchEvaluator(AtxHeaderEvaluator));
1110
            return text;
1111
        }
1112

    
1113
        private string SetextHeaderEvaluator(Match match)
1114
        {
1115
            string header = match.Groups[1].Value;
1116
            int level = match.Groups[2].Value.StartsWith("=") ? 1 : 2;
1117
            return string.Format("<h{1}>{0}</h{1}>\n\n", RunSpanGamut(header), level);
1118
        }
1119

    
1120
        private string AtxHeaderEvaluator(Match match)
1121
        {
1122
            string header = match.Groups[2].Value;
1123
            int level = match.Groups[1].Value.Length;
1124
            return string.Format("<h{1}>{0}</h{1}>\n\n", RunSpanGamut(header), level);
1125
        }
1126

    
1127

    
1128
        private static Regex _horizontalRules = new Regex(@"
1129
            ^[ ]{0,3}         # Leading space
1130
                ([-*_])       # $1: First marker
1131
                (?>           # Repeated marker group
1132
                    [ ]{0,2}  # Zero, one, or two spaces.
1133
                    \1        # Marker character
1134
                ){2,}         # Group repeated at least twice
1135
                [ ]*          # Trailing spaces
1136
                $             # End of line.
1137
            ", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
1138

    
1139
        /// <summary>
1140
        /// Turn Markdown horizontal rules into HTML hr tags
1141
        /// </summary>
1142
        /// <remarks>
1143
        /// ***  
1144
        /// * * *  
1145
        /// ---
1146
        /// - - -
1147
        /// </remarks>
1148
        private string DoHorizontalRules(string text)
1149
        {
1150
            return _horizontalRules.Replace(text, "<hr" + _emptyElementSuffix + "\n");
1151
        }
1152

    
1153
        private static string _wholeList = string.Format(@"
1154
            (                               # $1 = whole list
1155
              (                             # $2
1156
                [ ]{{0,{1}}}
1157
                ({0})                       # $3 = first list item marker
1158
                [ ]+
1159
              )
1160
              (?s:.+?)
1161
              (                             # $4
1162
                  \z
1163
                |
1164
                  \n{{2,}}
1165
                  (?=\S)
1166
                  (?!                       # Negative lookahead for another list item marker
1167
                    [ ]*
1168
                    {0}[ ]+
1169
                  )
1170
              )
1171
            )", string.Format("(?:{0}|{1})", _markerUL, _markerOL), _tabWidth - 1);
1172

    
1173
        private static Regex _listNested = new Regex(@"^" + _wholeList,
1174
            RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
1175

    
1176
        private static Regex _listTopLevel = new Regex(@"(?:(?<=\n\n)|\A\n?)" + _wholeList,
1177
            RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
1178

    
1179
        /// <summary>
1180
        /// Turn Markdown lists into HTML ul and ol and li tags
1181
        /// </summary>
1182
        private string DoLists(string text)
1183
        {
1184
            // We use a different prefix before nested lists than top-level lists.
1185
            // See extended comment in _ProcessListItems().
1186
            if (_listLevel > 0)
1187
                text = _listNested.Replace(text, new MatchEvaluator(ListEvaluator));
1188
            else
1189
                text = _listTopLevel.Replace(text, new MatchEvaluator(ListEvaluator));
1190

    
1191
            return text;
1192
        }
1193

    
1194
        private string ListEvaluator(Match match)
1195
        {
1196
            string list = match.Groups[1].Value;
1197
            string listType = Regex.IsMatch(match.Groups[3].Value, _markerUL) ? "ul" : "ol";
1198
            string result;
1199

    
1200
            result = ProcessListItems(list, listType == "ul" ? _markerUL : _markerOL);
1201

    
1202
            result = string.Format("<{0}>\n{1}</{0}>\n", listType, result);
1203
            return result;
1204
        }
1205

    
1206
        /// <summary>
1207
        /// Process the contents of a single ordered or unordered list, splitting it
1208
        /// into individual list items.
1209
        /// </summary>
1210
        private string ProcessListItems(string list, string marker)
1211
        {
1212
            // The listLevel global keeps track of when we're inside a list.
1213
            // Each time we enter a list, we increment it; when we leave a list,
1214
            // we decrement. If it's zero, we're not in a list anymore.
1215

    
1216
            // We do this because when we're not inside a list, we want to treat
1217
            // something like this:
1218

    
1219
            //    I recommend upgrading to version
1220
            //    8. Oops, now this line is treated
1221
            //    as a sub-list.
1222

    
1223
            // As a single paragraph, despite the fact that the second line starts
1224
            // with a digit-period-space sequence.
1225

    
1226
            // Whereas when we're inside a list (or sub-list), that line will be
1227
            // treated as the start of a sub-list. What a kludge, huh? This is
1228
            // an aspect of Markdown's syntax that's hard to parse perfectly
1229
            // without resorting to mind-reading. Perhaps the solution is to
1230
            // change the syntax rules such that sub-lists must start with a
1231
            // starting cardinal number; e.g. "1." or "a.".
1232

    
1233
            _listLevel++;
1234

    
1235
            // Trim trailing blank lines:
1236
            list = Regex.Replace(list, @"\n{2,}\z", "\n");
1237

    
1238
            string pattern = string.Format(
1239
              @"(^[ ]*)                    # leading whitespace = $1
1240
                ({0}) [ ]+                 # list marker = $2
1241
                ((?s:.+?)                  # list item text = $3
1242
                (\n+))      
1243
                (?= (\z | \1 ({0}) [ ]+))", marker);
1244

    
1245
            bool lastItemHadADoubleNewline = false;
1246

    
1247
            // has to be a closure, so subsequent invocations can share the bool
1248
            MatchEvaluator ListItemEvaluator = (Match match) =>
1249
            {
1250
                string item = match.Groups[3].Value;
1251

    
1252
                bool endsWithDoubleNewline = item.EndsWith("\n\n");
1253
                bool containsDoubleNewline = endsWithDoubleNewline || item.Contains("\n\n");
1254

    
1255
                if (containsDoubleNewline || lastItemHadADoubleNewline)
1256
                    // we could correct any bad indentation here..
1257
                    item = RunBlockGamut(Outdent(item) + "\n", unhash: false);
1258
                else
1259
                {
1260
                    // recursion for sub-lists
1261
                    item = DoLists(Outdent(item));
1262
                    item = item.TrimEnd('\n');
1263
                    item = RunSpanGamut(item);
1264
                }
1265
                lastItemHadADoubleNewline = endsWithDoubleNewline;
1266
                return string.Format("<li>{0}</li>\n", item);
1267
            };
1268

    
1269
            list = Regex.Replace(list, pattern, new MatchEvaluator(ListItemEvaluator),
1270
                                  RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
1271
            _listLevel--;
1272
            return list;
1273
        }
1274

    
1275
        private static Regex _codeBlock = new Regex(string.Format(@"
1276
                    (?:\n\n|\A\n?)
1277
                    (                        # $1 = the code block -- one or more lines, starting with a space
1278
                    (?:
1279
                        (?:[ ]{{{0}}})       # Lines must start with a tab-width of spaces
1280
                        .*\n+
1281
                    )+
1282
                    )
1283
                    ((?=^[ ]{{0,{0}}}[^ \t\n])|\Z) # Lookahead for non-space at line-start, or end of doc",
1284
                    _tabWidth), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
1285

    
1286
        /// <summary>
1287
        /// /// Turn Markdown 4-space indented code into HTML pre code blocks
1288
        /// </summary>
1289
        private string DoCodeBlocks(string text)
1290
        {
1291
            text = _codeBlock.Replace(text, new MatchEvaluator(CodeBlockEvaluator));
1292
            return text;
1293
        }
1294

    
1295
        private string CodeBlockEvaluator(Match match)
1296
        {
1297
            string codeBlock = match.Groups[1].Value;
1298

    
1299
            codeBlock = EncodeCode(Outdent(codeBlock));
1300
            codeBlock = _newlinesLeadingTrailing.Replace(codeBlock, "");
1301

    
1302
            return string.Concat("\n\n<pre><code>", codeBlock, "\n</code></pre>\n\n");
1303
        }
1304

    
1305
        private static Regex _codeSpan = new Regex(@"
1306
                    (?<!\\)   # Character before opening ` can't be a backslash
1307
                    (`+)      # $1 = Opening run of `
1308
                    (.+?)     # $2 = The code block
1309
                    (?<!`)
1310
                    \1
1311
                    (?!`)", RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
1312

    
1313
        /// <summary>
1314
        /// Turn Markdown `code spans` into HTML code tags
1315
        /// </summary>
1316
        private string DoCodeSpans(string text)
1317
        {
1318
            //    * You can use multiple backticks as the delimiters if you want to
1319
            //        include literal backticks in the code span. So, this input:
1320
            //
1321
            //        Just type ``foo `bar` baz`` at the prompt.
1322
            //
1323
            //        Will translate to:
1324
            //
1325
            //          <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
1326
            //
1327
            //        There's no arbitrary limit to the number of backticks you
1328
            //        can use as delimters. If you need three consecutive backticks
1329
            //        in your code, use four for delimiters, etc.
1330
            //
1331
            //    * You can use spaces to get literal backticks at the edges:
1332
            //
1333
            //          ... type `` `bar` `` ...
1334
            //
1335
            //        Turns to:
1336
            //
1337
            //          ... type <code>`bar`</code> ...         
1338
            //
1339

    
1340
            return _codeSpan.Replace(text, new MatchEvaluator(CodeSpanEvaluator));
1341
        }
1342

    
1343
        private string CodeSpanEvaluator(Match match)
1344
        {
1345
            string span = match.Groups[2].Value;
1346
            span = Regex.Replace(span, @"^[ ]*", ""); // leading whitespace
1347
            span = Regex.Replace(span, @"[ ]*$", ""); // trailing whitespace
1348
            span = EncodeCode(span);
1349
            span = SaveFromAutoLinking(span); // to prevent auto-linking. Not necessary in code *blocks*, but in code spans.
1350

    
1351
            return string.Concat("<code>", span, "</code>");
1352
        }
1353

    
1354

    
1355
        private static Regex _bold = new Regex(@"(\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1",
1356
            RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
1357
        private static Regex _strictBold = new Regex(@"([\W_]|^) (\*\*|__) (?=\S) ([^\r]*?\S[\*_]*) \2 ([\W_]|$)",
1358
            RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
1359

    
1360
        private static Regex _italic = new Regex(@"(\*|_) (?=\S) (.+?) (?<=\S) \1",
1361
            RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
1362
        private static Regex _strictItalic = new Regex(@"([\W_]|^) (\*|_) (?=\S) ([^\r\*_]*?\S) \2 ([\W_]|$)",
1363
            RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
1364

    
1365
        /// <summary>
1366
        /// Turn Markdown *italics* and **bold** into HTML strong and em tags
1367
        /// </summary>
1368
        private string DoItalicsAndBold(string text)
1369
        {
1370

    
1371
            // <strong> must go first, then <em>
1372
            if (_strictBoldItalic)
1373
            {
1374
                text = _strictBold.Replace(text, "$1<strong>$3</strong>$4");
1375
                text = _strictItalic.Replace(text, "$1<em>$3</em>$4");
1376
            }
1377
            else
1378
            {
1379
                text = _bold.Replace(text, "<strong>$2</strong>");
1380
                text = _italic.Replace(text, "<em>$2</em>");
1381
            }
1382
            return text;
1383
        }
1384

    
1385
        /// <summary>
1386
        /// Turn markdown line breaks (two space at end of line) into HTML break tags
1387
        /// </summary>
1388
        private string DoHardBreaks(string text)
1389
        {
1390
            if (_autoNewlines)
1391
                text = Regex.Replace(text, @"\n", string.Format("<br{0}\n", _emptyElementSuffix));
1392
            else
1393
                text = Regex.Replace(text, @" {2,}\n", string.Format("<br{0}\n", _emptyElementSuffix));
1394
            return text;
1395
        }
1396

    
1397
        private static Regex _blockquote = new Regex(@"
1398
            (                           # Wrap whole match in $1
1399
                (
1400
                ^[ ]*>[ ]?              # '>' at the start of a line
1401
                    .+\n                # rest of the first line
1402
                (.+\n)*                 # subsequent consecutive lines
1403
                \n*                     # blanks
1404
                )+
1405
            )", RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline | RegexOptions.Compiled);
1406

    
1407
        /// <summary>
1408
        /// Turn Markdown > quoted blocks into HTML blockquote blocks
1409
        /// </summary>
1410
        private string DoBlockQuotes(string text)
1411
        {
1412
            return _blockquote.Replace(text, new MatchEvaluator(BlockQuoteEvaluator));
1413
        }
1414

    
1415
        private string BlockQuoteEvaluator(Match match)
1416
        {
1417
            string bq = match.Groups[1].Value;
1418

    
1419
            bq = Regex.Replace(bq, @"^[ ]*>[ ]?", "", RegexOptions.Multiline);       // trim one level of quoting
1420
            bq = Regex.Replace(bq, @"^[ ]+$", "", RegexOptions.Multiline);           // trim whitespace-only lines
1421
            bq = RunBlockGamut(bq);                                                  // recurse
1422

    
1423
            bq = Regex.Replace(bq, @"^", "  ", RegexOptions.Multiline);
1424

    
1425
            // These leading spaces screw with <pre> content, so we need to fix that:
1426
            bq = Regex.Replace(bq, @"(\s*<pre>.+?</pre>)", new MatchEvaluator(BlockQuoteEvaluator2), RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline);
1427

    
1428
            bq = string.Format("<blockquote>\n{0}\n</blockquote>", bq);
1429
            string key = GetHashKey(bq, isHtmlBlock: true);
1430
            _htmlBlocks[key] = bq;
1431

    
1432
            return "\n\n" + key + "\n\n";
1433
        }
1434

    
1435
        private string BlockQuoteEvaluator2(Match match)
1436
        {
1437
            return Regex.Replace(match.Groups[1].Value, @"^  ", "", RegexOptions.Multiline);
1438
        }
1439

    
1440
        private static Regex _autolinkBare = new Regex(@"(<|="")?\b(https?|ftp)(://[-A-Z0-9+&@#/%?=~_|\[\]\(\)!:,\.;]*[-A-Z0-9+&@#/%=~_|\[\])])(?=$|\W)",
1441
            RegexOptions.IgnoreCase | RegexOptions.Compiled);
1442

    
1443
        private static string handleTrailingParens(Match match)
1444
        {
1445
            // The first group is essentially a negative lookbehind -- if there's a < or a =", we don't touch this.
1446
            // We're not using a *real* lookbehind, because of links with in links, like <a href="http://web.archive.org/web/20121130000728/http://www.google.com/">
1447
            // With a real lookbehind, the full link would never be matched, and thus the http://www.google.com *would* be matched.
1448
            // With the simulated lookbehind, the full link *is* matched (just not handled, because of this early return), causing
1449
            // the google link to not be matched again.
1450
            if (match.Groups[1].Success)
1451
                return match.Value;
1452

    
1453
            var protocol = match.Groups[2].Value;
1454
            var link = match.Groups[3].Value;
1455
            if (!link.EndsWith(")"))
1456
                return "<" + protocol + link + ">";
1457
            var level = 0;
1458
            foreach (Match c in Regex.Matches(link, "[()]"))
1459
            {
1460
                if (c.Value == "(")
1461
                {
1462
                    if (level <= 0)
1463
                        level = 1;
1464
                    else
1465
                        level++;
1466
                }
1467
                else
1468
                {
1469
                    level--;
1470
                }
1471
            }
1472
            var tail = "";
1473
            if (level < 0)
1474
            {
1475
                link = Regex.Replace(link, @"\){1," + (-level) + "}$", m => { tail = m.Value; return ""; });
1476
            }
1477
            return "<" + protocol + link + ">" + tail;
1478
        }
1479

    
1480
        /// <summary>
1481
        /// Turn angle-delimited URLs into HTML anchor tags
1482
        /// </summary>
1483
        /// <remarks>
1484
        /// &lt;http://www.example.com&gt;
1485
        /// </remarks>
1486
        private string DoAutoLinks(string text)
1487
        {
1488

    
1489
            if (_autoHyperlink)
1490
            {
1491
                // fixup arbitrary URLs by adding Markdown < > so they get linked as well
1492
                // note that at this point, all other URL in the text are already hyperlinked as <a href=""></a>
1493
                // *except* for the <http://www.foo.com> case
1494
                text = _autolinkBare.Replace(text, handleTrailingParens);
1495
            }
1496

    
1497
            // Hyperlinks: <http://foo.com>
1498
            text = Regex.Replace(text, "<((https?|ftp):[^'\">\\s]+)>", new MatchEvaluator(HyperlinkEvaluator));
1499

    
1500
            if (_linkEmails)
1501
            {
1502
                // Email addresses: <address@domain.foo>
1503
                string pattern =
1504
                    @"<
1505
                      (?:mailto:)?
1506
                      (
1507
                        [-.\w]+
1508
                        \@
1509
                        [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
1510
                      )
1511
                      >";
1512
                text = Regex.Replace(text, pattern, new MatchEvaluator(EmailEvaluator), RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace);
1513
            }
1514

    
1515
            return text;
1516
        }
1517

    
1518
        private string HyperlinkEvaluator(Match match)
1519
        {
1520
            string link = match.Groups[1].Value;
1521
            return string.Format("<a href=\"{0}\">{0}</a>", link);
1522
        }
1523

    
1524
        private string EmailEvaluator(Match match)
1525
        {
1526
            string email = Unescape(match.Groups[1].Value);
1527

    
1528
            //
1529
            //    Input: an email address, e.g. "foo@example.com"
1530
            //
1531
            //    Output: the email address as a mailto link, with each character
1532
            //            of the address encoded as either a decimal or hex entity, in
1533
            //            the hopes of foiling most address harvesting spam bots. E.g.:
1534
            //
1535
            //      <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
1536
            //        x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
1537
            //        &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
1538
            //
1539
            //    Based by a filter by Matthew Wickline, posted to the BBEdit-Talk
1540
            //    mailing list: <http://tinyurl.com/yu7ue>
1541
            //
1542
            email = "mailto:" + email;
1543

    
1544
            // leave ':' alone (to spot mailto: later)
1545
            email = EncodeEmailAddress(email);
1546

    
1547
            email = string.Format("<a href=\"{0}\">{0}</a>", email);
1548

    
1549
            // strip the mailto: from the visible part
1550
            email = Regex.Replace(email, "\">.+?:", "\">");
1551
            return email;
1552
        }
1553

    
1554

    
1555
        private static Regex _outDent = new Regex(@"^[ ]{1," + _tabWidth + @"}", RegexOptions.Multiline | RegexOptions.Compiled);
1556

    
1557
        /// <summary>
1558
        /// Remove one level of line-leading spaces
1559
        /// </summary>
1560
        private string Outdent(string block)
1561
        {
1562
            return _outDent.Replace(block, "");
1563
        }
1564

    
1565

    
1566
        #region Encoding and Normalization
1567

    
1568

    
1569
        /// <summary>
1570
        /// encodes email address randomly  
1571
        /// roughly 10% raw, 45% hex, 45% dec
1572
        /// note that @ is always encoded and : never is
1573
        /// </summary>
1574
        private string EncodeEmailAddress(string addr)
1575
        {
1576
            var sb = new StringBuilder(addr.Length * 5);
1577
            var rand = new Random();
1578
            int r;
1579
            foreach (char c in addr)
1580
            {
1581
                r = rand.Next(1, 100);
1582
                if ((r > 90 || c == ':') && c != '@')
1583
                    sb.Append(c);                         // m
1584
                else if (r < 45)
1585
                    sb.AppendFormat("&#x{0:x};", (int)c); // &#x6D
1586
                else
1587
                    sb.AppendFormat("&#{0};", (int)c);    // &#109
1588
            }
1589
            return sb.ToString();
1590
        }
1591

    
1592
        private static Regex _codeEncoder = new Regex(@"&|<|>|\\|\*|_|\{|\}|\[|\]", RegexOptions.Compiled);
1593

    
1594
        /// <summary>
1595
        /// Encode/escape certain Markdown characters inside code blocks and spans where they are literals
1596
        /// </summary>
1597
        private string EncodeCode(string code)
1598
        {
1599
            return _codeEncoder.Replace(code, EncodeCodeEvaluator);
1600
        }
1601
        private string EncodeCodeEvaluator(Match match)
1602
        {
1603
            switch (match.Value)
1604
            {
1605
                // Encode all ampersands; HTML entities are not
1606
                // entities within a Markdown code span.
1607
                case "&":
1608
                    return "&amp;";
1609
                // Do the angle bracket song and dance
1610
                case "<":
1611
                    return "&lt;";
1612
                case ">":
1613
                    return "&gt;";
1614
                // escape characters that are magic in Markdown
1615
                default:
1616
                    return _escapeTable[match.Value];
1617
            }
1618
        }
1619

    
1620

    
1621
        private static Regex _amps = new Regex(@"&(?!((#[0-9]+)|(#[xX][a-fA-F0-9]+)|([a-zA-Z][a-zA-Z0-9]*));)", RegexOptions.ExplicitCapture | RegexOptions.Compiled);
1622
        private static Regex _angles = new Regex(@"<(?![A-Za-z/?\$!])", RegexOptions.ExplicitCapture | RegexOptions.Compiled);
1623

    
1624
        /// <summary>
1625
        /// Encode any ampersands (that aren't part of an HTML entity) and left or right angle brackets
1626
        /// </summary>
1627
        private string EncodeAmpsAndAngles(string s)
1628
        {
1629
            s = _amps.Replace(s, "&amp;");
1630
            s = _angles.Replace(s, "&lt;");
1631
            return s;
1632
        }
1633

    
1634
        private static Regex _backslashEscapes;
1635

    
1636
        /// <summary>
1637
        /// Encodes any escaped characters such as \`, \*, \[ etc
1638
        /// </summary>
1639
        private string EscapeBackslashes(string s)
1640
        {
1641
            return _backslashEscapes.Replace(s, new MatchEvaluator(EscapeBackslashesEvaluator));
1642
        }
1643
        private string EscapeBackslashesEvaluator(Match match)
1644
        {
1645
            return _backslashEscapeTable[match.Value];
1646
        }
1647

    
1648
        private static Regex _unescapes = new Regex("\x1A" + "E\\d+E", RegexOptions.Compiled);
1649

    
1650
        /// <summary>
1651
        /// swap back in all the special characters we've hidden
1652
        /// </summary>
1653
        private string Unescape(string s)
1654
        {
1655
            return _unescapes.Replace(s, new MatchEvaluator(UnescapeEvaluator));
1656
        }
1657
        private string UnescapeEvaluator(Match match)
1658
        {
1659
            return _invertedEscapeTable[match.Value];
1660
        }
1661

    
1662

    
1663
        /// <summary>
1664
        /// escapes Bold [ * ] and Italic [ _ ] characters
1665
        /// </summary>
1666
        private string EscapeBoldItalic(string s)
1667
        {
1668
            s = s.Replace("*", _escapeTable["*"]);
1669
            s = s.Replace("_", _escapeTable["_"]);
1670
            return s;
1671
        }
1672

    
1673
        private static string AttributeEncode(string s)
1674
        {
1675
            return s.Replace(">", "&gt;").Replace("<", "&lt;").Replace("\"", "&quot;");
1676
        }
1677

    
1678
        private static char[] _problemUrlChars = @"""'*()[]$:".ToCharArray();
1679

    
1680
        /// <summary>
1681
        /// hex-encodes some unusual "problem" chars in URLs to avoid URL detection problems
1682
        /// </summary>
1683
        private string EncodeProblemUrlChars(string url)
1684
        {
1685
            if (!_encodeProblemUrlCharacters) return url;
1686

    
1687
            var sb = new StringBuilder(url.Length);
1688
            bool encode;
1689
            char c;
1690

    
1691
            for (int i = 0; i < url.Length; i++)
1692
            {
1693
                c = url[i];
1694
                encode = Array.IndexOf(_problemUrlChars, c) != -1;
1695
                if (encode && c == ':' && i < url.Length - 1)
1696
                    encode = !(url[i + 1] == '/') && !(url[i + 1] >= '0' && url[i + 1] <= '9');
1697

    
1698
                if (encode)
1699
                    sb.Append("%" + String.Format("{0:x}", (byte)c));
1700
                else
1701
                    sb.Append(c);
1702
            }
1703

    
1704
            return sb.ToString();
1705
        }
1706

    
1707

    
1708
        /// <summary>
1709
        /// Within tags -- meaning between &lt; and &gt; -- encode [\ ` * _] so they
1710
        /// don't conflict with their use in Markdown for code, italics and strong.
1711
        /// We're replacing each such character with its corresponding hash
1712
        /// value; this is likely overkill, but it should prevent us from colliding
1713
        /// with the escape values by accident.
1714
        /// </summary>
1715
        private string EscapeSpecialCharsWithinTagAttributes(string text)
1716
        {
1717
            var tokens = TokenizeHTML(text);
1718

    
1719
            // now, rebuild text from the tokens
1720
            var sb = new StringBuilder(text.Length);
1721

    
1722
            foreach (var token in tokens)
1723
            {
1724
                string value = token.Value;
1725

    
1726
                if (token.Type == TokenType.Tag)
1727
                {
1728
                    value = value.Replace(@"\", _escapeTable[@"\"]);
1729

    
1730
                    if (_autoHyperlink && value.StartsWith("<!")) // escape slashes in comments to prevent autolinking there -- http://meta.stackoverflow.com/questions/95987/html-comment-containing-url-breaks-if-followed-by-another-html-comment
1731
                        value = value.Replace("/", _escapeTable["/"]);
1732

    
1733
                    value = Regex.Replace(value, "(?<=.)</?code>(?=.)", _escapeTable[@"`"]);
1734
                    value = EscapeBoldItalic(value);
1735
                }
1736

    
1737
                sb.Append(value);
1738
            }
1739

    
1740
            return sb.ToString();
1741
        }
1742

    
1743
        /// <summary>
1744
        /// convert all tabs to _tabWidth spaces;
1745
        /// standardizes line endings from DOS (CR LF) or Mac (CR) to UNIX (LF);
1746
        /// makes sure text ends with a couple of newlines;
1747
        /// removes any blank lines (only spaces) in the text
1748
        /// </summary>
1749
        private string Normalize(string text)
1750
        {
1751
            var output = new StringBuilder(text.Length);
1752
            var line = new StringBuilder();
1753
            bool valid = false;
1754

    
1755
            for (int i = 0; i < text.Length; i++)
1756
            {
1757
                switch (text[i])
1758
                {
1759
                    case '\n':
1760
                        if (valid) output.Append(line);
1761
                        output.Append('\n');
1762
                        line.Length = 0; valid = false;
1763
                        break;
1764
                    case '\r':
1765
                        if ((i < text.Length - 1) && (text[i + 1] != '\n'))
1766
                        {
1767
                            if (valid) output.Append(line);
1768
                            output.Append('\n');
1769
                            line.Length = 0; valid = false;
1770
                        }
1771
                        break;
1772
                    case '\t':
1773
                        int width = (_tabWidth - line.Length % _tabWidth);
1774
                        for (int k = 0; k < width; k++)
1775
                            line.Append(' ');
1776
                        break;
1777
                    case '\x1A':
1778
                        break;
1779
                    default:
1780
                        if (!valid && text[i] != ' ') valid = true;
1781
                        line.Append(text[i]);
1782
                        break;
1783
                }
1784
            }
1785

    
1786
            if (valid) output.Append(line);
1787
            output.Append('\n');
1788

    
1789
            // add two newlines to the end before return
1790
            return output.Append("\n\n").ToString();
1791
        }
1792

    
1793
        #endregion
1794

    
1795
        /// <summary>
1796
        /// this is to emulate what's evailable in PHP
1797
        /// </summary>
1798
        private static string RepeatString(string text, int count)
1799
        {
1800
            var sb = new StringBuilder(text.Length * count);
1801
            for (int i = 0; i < count; i++)
1802
                sb.Append(text);
1803
            return sb.ToString();
1804
        }
1805

    
1806
    }
1807
}
클립보드 이미지 추가 (최대 크기: 500 MB)