/MarkusAutoUpdate/src/NetSparkle/Libraries/MarkdownSharp.cs - 이력해설 - MARKUS - 일정관리

d8f5045e

taeseongkim

/*

2

 * MarkdownSharp

3

 * -------------

4

 * a C# Markdown processor

5

6

 * Markdown is a text-to-HTML conversion tool for web writers

7

 * Copyright (c) 2004 John Gruber

8

 * http://daringfireball.net/projects/markdown/

9

10

 * Markdown.NET

11

 * Copyright (c) 2004-2009 Milan Negovan

12

 * http://www.aspnetresources.com

13

 * http://aspnetresources.com/blog/markdown_announced.aspx

14

15

 * MarkdownSharp

16

 * Copyright (c) 2009-2011 Jeff Atwood

17

 * http://stackoverflow.com

18

 * http://www.codinghorror.com/blog/

19

 * http://code.google.com/p/markdownsharp/

20

21

 * History: Milan ported the Markdown processor to C#. He granted license to me so I can open source it

22

 * and let the community contribute to and improve MarkdownSharp.

23

24

*/

25

26

#region Copyright and license

27

28

/*

29

30

Copyright (c) 2009 - 2010 Jeff Atwood

31

32

http://www.opensource.org/licenses/mit-license.php

33

34

Permission is hereby granted, free of charge, to any person obtaining a copy

35

of this software and associated documentation files (the "Software"), to deal

36

in the Software without restriction, including without limitation the rights

37

to use, copy, modify, merge, publish, distribute, sublicense, and/or sell

38

copies of the Software, and to permit persons to whom the Software is

39

furnished to do so, subject to the following conditions:

40

41

The above copyright notice and this permission notice shall be included in

42

all copies or substantial portions of the Software.

43

44

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

45

IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

46

FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

47

AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

48

LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

49

OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN

50

THE SOFTWARE.

51

52

Copyright (c) 2003-2004 John Gruber

53

<http://daringfireball.net/>

54

All rights reserved.

55

56

Redistribution and use in source and binary forms, with or without

57

modification, are permitted provided that the following conditions are

58

met:

59

60

* Redistributions of source code must retain the above copyright notice,

61

  this list of conditions and the following disclaimer.

62

63

* Redistributions in binary form must reproduce the above copyright

64

  notice, this list of conditions and the following disclaimer in the

65

  documentation and/or other materials provided with the distribution.

66

67

* Neither the name "Markdown" nor the names of its contributors may

68

  be used to endorse or promote products derived from this software

69

  without specific prior written permission.

70

71

This software is provided by the copyright holders and contributors "as

72

is" and any express or implied warranties, including, but not limited

73

to, the implied warranties of merchantability and fitness for a

74

particular purpose are disclaimed. In no event shall the copyright owner

75

or contributors be liable for any direct, indirect, incidental, special,

76

exemplary, or consequential damages (including, but not limited to,

77

procurement of substitute goods or services; loss of use, data, or

78

profits; or business interruption) however caused and on any theory of

79

liability, whether in contract, strict liability, or tort (including

80

negligence or otherwise) arising in any way out of the use of this

81

software, even if advised of the possibility of such damage.

82

*/

83

84

#endregion

85

86

using System;

87

using System.Collections.Generic;

88

using System.Configuration;

89

using System.Text;

90

using System.Text.RegularExpressions;

91

92

namespace MarkdownSharp

93

94

95

    /// <summary>

96

///

97

    /// </summary>

98

    public class MarkdownOptions

99

100

        /// <summary>

101

        /// when true, (most) bare plain URLs are auto-hyperlinked

102

        /// WARNING: this is a significant deviation from the markdown spec

103

        /// </summary>

104

        public bool AutoHyperlink { get; set; }

105

        /// <summary>

106

        /// when true, RETURN becomes a literal newline

107

        /// WARNING: this is a significant deviation from the markdown spec

108

        /// </summary>

109

        public bool AutoNewlines { get; set; }

110

        /// <summary>

111

        /// use ">" for HTML output, or " />" for XHTML output

112

        /// </summary>

113

        public string EmptyElementSuffix { get; set; }

114

        /// <summary>

115

        /// when true, problematic URL characters like [, ], (, and so forth will be encoded

116

        /// WARNING: this is a significant deviation from the markdown spec

117

        /// </summary>

118

        public bool EncodeProblemUrlCharacters { get; set; }

119

        /// <summary>

120

        /// when false, email addresses will never be auto-linked

121

        /// WARNING: this is a significant deviation from the markdown spec

122

        /// </summary>

123

        public bool LinkEmails { get; set; }

124

        /// <summary>

125

        /// when true, bold and italic require non-word characters on either side

126

        /// WARNING: this is a significant deviation from the markdown spec

127

        /// </summary>

128

        public bool StrictBoldItalic { get; set; }

    /// <summary>

133

    /// Markdown is a text-to-HTML conversion tool for web writers.

134

    /// Markdown allows you to write using an easy-to-read, easy-to-write plain text format,

135

    /// then convert it to structurally valid XHTML (or HTML).

136

    /// </summary>

137

    public class Markdown

138

139

        private const string _version = "1.13";

140

141

        #region Constructors and Options

142

143

        /// <summary>

144

        /// Create a new Markdown instance using default options

145

        /// </summary>

146

        public Markdown()

147

            : this(false)

        /// <summary>

152

        /// Create a new Markdown instance and optionally load options from a configuration

153

        /// file. There they should be stored in the appSettings section, available options are:

154

///

155

        ///     Markdown.StrictBoldItalic (true/false)

156

        ///     Markdown.EmptyElementSuffix (">" or " />" without the quotes)

157

        ///     Markdown.LinkEmails (true/false)

158

        ///     Markdown.AutoNewLines (true/false)

159

        ///     Markdown.AutoHyperlink (true/false)

160

        ///     Markdown.EncodeProblemUrlCharacters (true/false)

161

///

162

        /// </summary>

163

        public Markdown(bool loadOptionsFromConfigFile)

164

165

            if (!loadOptionsFromConfigFile) return;

166

//

167

//            var settings = ConfigurationManager.AppSettings;

168

//            foreach (string key in settings.Keys)

169

//            {

170

//                switch (key)

171

//                {

172

//                    case "Markdown.AutoHyperlink":

173

//                        _autoHyperlink = Convert.ToBoolean(settings[key]);

174

//                        break;

175

//                    case "Markdown.AutoNewlines":

176

//                        _autoNewlines = Convert.ToBoolean(settings[key]);

177

//                        break;

178

//                    case "Markdown.EmptyElementSuffix":

179

//                        _emptyElementSuffix = settings[key];

180

//                        break;

181

//                    case "Markdown.EncodeProblemUrlCharacters":

182

//                        _encodeProblemUrlCharacters = Convert.ToBoolean(settings[key]);

183

//                        break;

184

//                    case "Markdown.LinkEmails":

185

//                        _linkEmails = Convert.ToBoolean(settings[key]);

186

//                        break;

187

//                    case "Markdown.StrictBoldItalic":

188

//                        _strictBoldItalic = Convert.ToBoolean(settings[key]);

189

//                        break;

190

//                }

191

//            }

192

193

194

        /// <summary>

195

        /// Create a new Markdown instance and set the options from the MarkdownOptions object.

196

        /// </summary>

197

        public Markdown(MarkdownOptions options)

198

199

            _autoHyperlink = options.AutoHyperlink;

200

            _autoNewlines = options.AutoNewlines;

201

            _emptyElementSuffix = options.EmptyElementSuffix;

202

            _encodeProblemUrlCharacters = options.EncodeProblemUrlCharacters;

203

            _linkEmails = options.LinkEmails;

204

            _strictBoldItalic = options.StrictBoldItalic;

        /// <summary>

209

        /// use ">" for HTML output, or " />" for XHTML output

210

        /// </summary>

211

        public string EmptyElementSuffix

212

213

            get { return _emptyElementSuffix; }

214

            set { _emptyElementSuffix = value; }

215

216

        private string _emptyElementSuffix = " />";

217

218

        /// <summary>

219

        /// when false, email addresses will never be auto-linked

220

        /// WARNING: this is a significant deviation from the markdown spec

221

        /// </summary>

222

        public bool LinkEmails

223

224

            get { return _linkEmails; }

225

            set { _linkEmails = value; }

226

227

        private bool _linkEmails = true;

228

229

        /// <summary>

230

        /// when true, bold and italic require non-word characters on either side

231

        /// WARNING: this is a significant deviation from the markdown spec

232

        /// </summary>

233

        public bool StrictBoldItalic

234

235

            get { return _strictBoldItalic; }

236

            set { _strictBoldItalic = value; }

237

238

        private bool _strictBoldItalic = false;

239

240

        /// <summary>

241

        /// when true, RETURN becomes a literal newline

242

        /// WARNING: this is a significant deviation from the markdown spec

243

        /// </summary>

244

        public bool AutoNewLines

245

246

            get { return _autoNewlines; }

247

            set { _autoNewlines = value; }

248

249

        private bool _autoNewlines = false;

250

251

        /// <summary>

252

        /// when true, (most) bare plain URLs are auto-hyperlinked

253

        /// WARNING: this is a significant deviation from the markdown spec

254

        /// </summary>

255

        public bool AutoHyperlink

256

257

            get { return _autoHyperlink; }

258

            set { _autoHyperlink = value; }

259

260

        private bool _autoHyperlink = false;

261

262

        /// <summary>

263

        /// when true, problematic URL characters like [, ], (, and so forth will be encoded

264

        /// WARNING: this is a significant deviation from the markdown spec

265

        /// </summary>

266

        public bool EncodeProblemUrlCharacters

267

268

            get { return _encodeProblemUrlCharacters; }

269

            set { _encodeProblemUrlCharacters = value; }

270

271

        private bool _encodeProblemUrlCharacters = false;

272

273

        #endregion

274

275

        private enum TokenType { Text, Tag }

276

277

        private struct Token

278

279

            public Token(TokenType type, string value)

280

281

                this.Type = type;

282

                this.Value = value;

283

284

            public TokenType Type;

285

            public string Value;

286

287

288

        /// <summary>

289

        /// maximum nested depth of [] and () supported by the transform; implementation detail

290

        /// </summary>

291

        private const int _nestDepth = 6;

292

293

        /// <summary>

294

        /// Tabs are automatically converted to spaces as part of the transform

295

        /// this constant determines how "wide" those tabs become in spaces

296

        /// </summary>

297

        private const int _tabWidth = 4;

298

299

        private const string _markerUL = @"[*+-]";

300

        private const string _markerOL = @"\d+[.]";

301

302

        private static readonly Dictionary<string, string> _escapeTable;

303

        private static readonly Dictionary<string, string> _invertedEscapeTable;

304

        private static readonly Dictionary<string, string> _backslashEscapeTable;

305

306

        private readonly Dictionary<string, string> _urls = new Dictionary<string, string>();

307

        private readonly Dictionary<string, string> _titles = new Dictionary<string, string>();

308

        private readonly Dictionary<string, string> _htmlBlocks = new Dictionary<string, string>();

309

310

        private int _listLevel;

311

        private static string AutoLinkPreventionMarker = "\x1AP"; // temporarily replaces "://" where auto-linking shouldn't happen;

312

313

        /// <summary>

314

        /// In the static constuctor we'll initialize what stays the same across all transforms.

315

        /// </summary>

316

        static Markdown()

317

318

            // Table of hash values for escaped characters:

319

            _escapeTable = new Dictionary<string, string>();

320

            _invertedEscapeTable = new Dictionary<string, string>();

321

            // Table of hash value for backslash escaped characters:

322

            _backslashEscapeTable = new Dictionary<string, string>();

323

324

            string backslashPattern = "";

325

326

            foreach (char c in @"\`*_{}[]()>#+-.!/")

327

328

                string key = c.ToString();

329

                string hash = GetHashKey(key, isHtmlBlock: false);

330

                _escapeTable.Add(key, hash);

331

                _invertedEscapeTable.Add(hash, key);

332

                _backslashEscapeTable.Add(@"\" + key, hash);

333

                backslashPattern += Regex.Escape(@"\" + key) + "|";

334

335

336

            _backslashEscapes = new Regex(backslashPattern.Substring(0, backslashPattern.Length - 1), RegexOptions.Compiled);

337

338

339

        /// <summary>

340

        /// current version of MarkdownSharp;

341

        /// see http://code.google.com/p/markdownsharp/ for the latest code or to contribute

342

        /// </summary>

343

        public string Version

344

345

            get { return _version; }

346

347

348

        /// <summary>

349

        /// Transforms the provided Markdown-formatted text to HTML;

350

        /// see http://en.wikipedia.org/wiki/Markdown

351

        /// </summary>

352

        /// <remarks>

353

        /// The order in which other subs are called here is

354

        /// essential. Link and image substitutions need to happen before

355

        /// EscapeSpecialChars(), so that any *'s or _'s in the a

356

        /// and img tags get encoded.

357

        /// </remarks>

358

        public string Transform(string text)

359

360

            if (String.IsNullOrEmpty(text)) return "";

361

362

            Setup();

363

364

            text = Normalize(text);

365

366

            text = HashHTMLBlocks(text);

367

            text = StripLinkDefinitions(text);

368

            text = RunBlockGamut(text);

369

            text = Unescape(text);

370

371

            Cleanup();

372

373

            return text + "\n";

        /// <summary>

378

        /// Perform transformations that form block-level tags like paragraphs, headers, and list items.

379

        /// </summary>

380

        private string RunBlockGamut(string text, bool unhash = true)

381

382

            text = DoHeaders(text);

383

            text = DoHorizontalRules(text);

384

            text = DoLists(text);

385

            text = DoCodeBlocks(text);

386

            text = DoBlockQuotes(text);

387

388

            // We already ran HashHTMLBlocks() before, in Markdown(), but that

389

            // was to escape raw HTML in the original Markdown source. This time,

390

            // we're escaping the markup we've just created, so that we don't wrap

391

            // <p> tags around block-level tags.

392

            text = HashHTMLBlocks(text);

393

394

            text = FormParagraphs(text, unhash: unhash);

395

396

            return text;

        /// <summary>

401

        /// Perform transformations that occur *within* block-level tags like paragraphs, headers, and list items.

402

        /// </summary>

403

        private string RunSpanGamut(string text)

404

405

            text = DoCodeSpans(text);

406

            text = EscapeSpecialCharsWithinTagAttributes(text);

407

            text = EscapeBackslashes(text);

408

409

            // Images must come first, because ![foo][f] looks like an anchor.

410

            text = DoImages(text);

411

            text = DoAnchors(text);

412

413

            // Must come after DoAnchors(), because you can use < and >

414

            // delimiters in inline links like [this](<url>).

415

            text = DoAutoLinks(text);

416

417

            text = text.Replace(AutoLinkPreventionMarker, "://");

418

419

            text = EncodeAmpsAndAngles(text);

420

            text = DoItalicsAndBold(text);

421

            text = DoHardBreaks(text);

422

423

            return text;

424

425

426

        private static Regex _newlinesLeadingTrailing = new Regex(@"^\n+|\n+\z", RegexOptions.Compiled);

427

        private static Regex _newlinesMultiple = new Regex(@"\n{2,}", RegexOptions.Compiled);

428

        private static Regex _leadingWhitespace = new Regex(@"^[ ]*", RegexOptions.Compiled);

429

430

        private static Regex _htmlBlockHash = new Regex("\x1AH\\d+H", RegexOptions.Compiled);

431

432

        /// <summary>

433

        /// splits on two or more newlines, to form "paragraphs";

434

        /// each paragraph is then unhashed (if it is a hash and unhashing isn't turned off) or wrapped in HTML p tag

435

        /// </summary>

436

        private string FormParagraphs(string text, bool unhash = true)

437

438

            // split on two or more newlines

439

            string[] grafs = _newlinesMultiple.Split(_newlinesLeadingTrailing.Replace(text, ""));

440

441

            for (int i = 0; i < grafs.Length; i++)

442

443

                if (grafs[i].StartsWith("\x1AH"))

444

445

                    // unhashify HTML blocks

446

                    if (unhash)

447

448

                        int sanityCheck = 50; // just for safety, guard against an infinite loop

449

                        bool keepGoing = true; // as long as replacements where made, keep going

450

                        while (keepGoing && sanityCheck > 0)

451

452

                            keepGoing = false;

453

                            grafs[i] = _htmlBlockHash.Replace(grafs[i], match =>

454

455

                                keepGoing = true;

456

                                return _htmlBlocks[match.Value];

457

});

458

                            sanityCheck--;

459

460

                        /* if (keepGoing)

461

462

                            // Logging of an infinite loop goes here.

463

                            // If such a thing should happen, please open a new issue on http://code.google.com/p/markdownsharp/

464

                            // with the input that caused it.

465

}*/

466

467

468

                else

469

470

                    // do span level processing inside the block, then wrap result in <p> tags

471

                    grafs[i] = _leadingWhitespace.Replace(RunSpanGamut(grafs[i]), "<p>") + "</p>";

            return string.Join("\n\n", grafs);

        private void Setup()

480

481

            // Clear the global hashes. If we don't clear these, you get conflicts

482

            // from other articles when generating a page which contains more than

483

            // one article (e.g. an index page that shows the N most recent

484

            // articles):

485

            _urls.Clear();

486

            _titles.Clear();

487

            _htmlBlocks.Clear();

488

            _listLevel = 0;

489

490

491

        private void Cleanup()

492

493

            Setup();

494

495

496

        private static string _nestedBracketsPattern;

497

498

        /// <summary>

499

        /// Reusable pattern to match balanced [brackets]. See Friedl's

500

        /// "Mastering Regular Expressions", 2nd Ed., pp. 328-331.

501

        /// </summary>

502

        private static string GetNestedBracketsPattern()

503

504

            // in other words [this] and [this[also]] and [this[also[too]]]

505

            // up to _nestDepth

506

            if (_nestedBracketsPattern == null)

507

                _nestedBracketsPattern =

508

                    RepeatString(@"

509

                    (?>              # Atomic matching

510

                       [^\[\]]+      # Anything other than brackets

511

512

\[

513

                           ", _nestDepth) + RepeatString(

514

                    @" \]

515

)*"

516

                    , _nestDepth);

517

            return _nestedBracketsPattern;

518

519

520

        private static string _nestedParensPattern;

521

522

        /// <summary>

523

        /// Reusable pattern to match balanced (parens). See Friedl's

524

        /// "Mastering Regular Expressions", 2nd Ed., pp. 328-331.

525

        /// </summary>

526

        private static string GetNestedParensPattern()

527

528

            // in other words (this) and (this(also)) and (this(also(too)))

529

            // up to _nestDepth

530

            if (_nestedParensPattern == null)

531

                _nestedParensPattern =

532

                    RepeatString(@"

533

                    (?>              # Atomic matching

534

                       [^()\s]+      # Anything other than parens or whitespace

535

536

\(

537

                           ", _nestDepth) + RepeatString(

538

                    @" \)

539

)*"

540

                    , _nestDepth);

541

            return _nestedParensPattern;

542

543

544

        private static Regex _linkDef = new Regex(string.Format(@"

545

                        ^[ ]{{0,{0}}}\[(.+)\]:  # id = $1

546

                          [ ]*

547

                          \n?                   # maybe *one* newline

548

                          [ ]*

549

                        <?(\S+?)>?              # url = $2

550

                          [ ]*

551

                          \n?                   # maybe one newline

552

                          [ ]*

553

(?:

554

                            (?<=\s)             # lookbehind for whitespace

555

                            [""(]

556

                            (.+?)               # title = $3

557

                            ["")]

558

                            [ ]*

559

                        )?                      # title is optional

560

                        (?:\n+|\Z)", _tabWidth - 1), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);

561

562

        /// <summary>

563

        /// Strips link definitions from text, stores the URLs and titles in hash references.

564

        /// </summary>

565

        /// <remarks>

566

        /// ^[id]: url "optional title"

567

        /// </remarks>

568

        private string StripLinkDefinitions(string text)

569

570

            return _linkDef.Replace(text, new MatchEvaluator(LinkEvaluator));

571

572

573

        private string LinkEvaluator(Match match)

574

575

            string linkID = match.Groups[1].Value.ToLowerInvariant();

576

            _urls[linkID] = EncodeAmpsAndAngles(match.Groups[2].Value);

577

578

            if (match.Groups[3] != null && match.Groups[3].Length > 0)

579

                _titles[linkID] = match.Groups[3].Value.Replace("\"", "&quot;");

580

581

            return "";

582

583

584

        // compiling this monster regex results in worse performance. trust me.

585

        private static Regex _blocksHtml = new Regex(GetBlockPattern(), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);

586

587

588

        /// <summary>

589

        /// derived pretty much verbatim from PHP Markdown

590

        /// </summary>

591

        private static string GetBlockPattern()

592

593

594

            // Hashify HTML blocks:

595

            // We only want to do this for block-level HTML tags, such as headers,

596

            // lists, and tables. That's because we still want to wrap <p>s around

597

            // "paragraphs" that are wrapped in non-block-level tags, such as anchors,

598

            // phrase emphasis, and spans. The list of tags we're looking for is

599

            // hard-coded:

600

//

601

            // *  List "a" is made of tags which can be both inline or block-level.

602

            //    These will be treated block-level when the start tag is alone on

603

            //    its line, otherwise they're not matched here and will be taken as

604

            //    inline later.

605

            // *  List "b" is made of tags which are always block-level;

606

//

607

            string blockTagsA = "ins|del";

608

            string blockTagsB = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|script|noscript|form|fieldset|iframe|math";

609

610

            // Regular expression for the content of a block tag.

611

            string attr = @"

612

            (?>                    # optional tag attributes

613

              \s                  # starts with whitespace

614

(?>

615

                [^>""/]+              # text outside quotes

616

617

                /+(?!>)                # slash not followed by >

618

619

                ""[^""]*""            # text inside double quotes (tolerate >)

620

621

                '[^']*'                  # text inside single quotes (tolerate >)

622

)*

623

)?

624

";

625

626

            string content = RepeatString(@"

627

(?>

628

                  [^<]+              # content without tag

629

630

                  <\2              # nested opening tag

631

                    " + attr + @"       # attributes

632

(?>

633

/>

634

635

                      >", _nestDepth) +   // end of opening tag

636

                      ".*?" +             // last level nested tag content

637

            RepeatString(@"

638

                      </\2\s*>          # closing nested tag

639

640

641

                  <(?!/\2\s*>           # other tags with a different name

642

643

                )*", _nestDepth);

644

645

            string content2 = content.Replace(@"\2", @"\3");

646

647

            // First, look for nested blocks, e.g.:

648

            //   <div>

649

            //     <div>

650

            //     tags for inner block must be indented.

651

            //     </div>

652

            //   </div>

653

//

654

            // The outermost tags must start at the left margin for this to match, and

655

            // the inner nested divs must be indented.

656

            // We need to do this before the next, more liberal match, because the next

657

            // match will start at the first `<div>` and stop at the first `</div>`.

658

            string pattern = @"

659

(?>

660

(?>

661

                    (?<=\n)     # Starting at the beginning of a line

662

                    |           # or

663

                    \A\n?       # the beginning of the doc

664

665

                  (             # save in $1

666

667

                    # Match from `\n<tag>` to `</tag>\n`, handling nested tags

668

                    # in between.

669

670

                        <($block_tags_b_re)   # start tag = $2

671

                        $attr>                # attributes followed by > and \n

672

                        $content              # content, support nesting

673

                        </\2>                 # the matching end tag

674

                        [ ]*                  # trailing spaces

675

                        (?=\n+|\Z)            # followed by a newline or end of document

676

677

                  | # Special version for tags of group a.

678

679

                        <($block_tags_a_re)   # start tag = $3

680

                        $attr>[ ]*\n          # attributes followed by >

681

                        $content2             # content, support nesting

682

                        </\3>                 # the matching end tag

683

                        [ ]*                  # trailing spaces

684

                        (?=\n+|\Z)            # followed by a newline or end of document

685

686

                  | # Special case just for <hr />. It was easier to make a special

687

                    # case than to make the other regex more complicated.

688

689

                        [ ]{0,$less_than_tab}

690

<hr

691

                        $attr                 # attributes

692

                        /?>                   # the matching end tag

693

                        [ ]*

694

                        (?=\n{2,}|\Z)         # followed by a blank line or end of document

695

696

                  | # Special case for standalone HTML comments:

697

698

                      (?<=\n\n|\A)            # preceded by a blank line or start of document

699

                      [ ]{0,$less_than_tab}

700

                      (?s:

701

                        <!--(?:|(?:[^>-]|-[^>])(?:[^-]|-[^-])*)-->

702

703

                      [ ]*

704

                      (?=\n{2,}|\Z)            # followed by a blank line or end of document

705

706

                  | # PHP and ASP-style processor instructions (<? and <%)

707

708

                      [ ]{0,$less_than_tab}

709

                      (?s:

710

                        <([?%])                # $4

711

.*?

712

\4>

713

714

                      [ ]*

715

                      (?=\n{2,}|\Z)            # followed by a blank line or end of document

716

717

718

)";

719

720

            pattern = pattern.Replace("$less_than_tab", (_tabWidth - 1).ToString());

721

            pattern = pattern.Replace("$block_tags_b_re", blockTagsB);

722

            pattern = pattern.Replace("$block_tags_a_re", blockTagsA);

723

            pattern = pattern.Replace("$attr", attr);

724

            pattern = pattern.Replace("$content2", content2);

725

            pattern = pattern.Replace("$content", content);

726

727

            return pattern;

728

729

730

        /// <summary>

731

        /// replaces any block-level HTML blocks with hash entries

732

        /// </summary>

733

        private string HashHTMLBlocks(string text)

734

735

            return _blocksHtml.Replace(text, new MatchEvaluator(HtmlEvaluator));

736

737

738

        private string HtmlEvaluator(Match match)

739

740

            string text = match.Groups[1].Value;

741

            string key = GetHashKey(text, isHtmlBlock: true);

742

            _htmlBlocks[key] = text;

743

744

            return string.Concat("\n\n", key, "\n\n");

745

746

747

        private static string GetHashKey(string s, bool isHtmlBlock)

748

749

            var delim = isHtmlBlock ? 'H' : 'E';

750

            return "\x1A" + delim + Math.Abs(s.GetHashCode()).ToString() + delim;

751

752

753

        private static Regex _htmlTokens = new Regex(@"

754

            (<!--(?:|(?:[^>-]|-[^>])(?:[^-]|-[^-])*)-->)|        # match <!-- foo -->

755

            (<\?.*?\?>)|                 # match <?foo?> " +

756

            RepeatString(@"

757

            (<[A-Za-z\/!$](?:[^<>]|", _nestDepth) + RepeatString(@")*>)", _nestDepth) +

758

                                       " # match <tag> and </tag>",

759

            RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);

760

761

        /// <summary>

762

        /// returns an array of HTML tokens comprising the input string. Each token is

763

        /// either a tag (possibly with nested, tags contained therein, such

764

        /// as &lt;a href="&lt;MTFoo&gt;"&gt;, or a run of text between tags. Each element of the

765

        /// array is a two-element array; the first is either 'tag' or 'text'; the second is

766

        /// the actual value.

767

        /// </summary>

768

        private List<Token> TokenizeHTML(string text)

769

770

            int pos = 0;

771

            int tagStart = 0;

772

            var tokens = new List<Token>();

773

774

            // this regex is derived from the _tokenize() subroutine in Brad Choate's MTRegex plugin.

775

            // http://www.bradchoate.com/past/mtregex.php

776

            foreach (Match m in _htmlTokens.Matches(text))

777

778

                tagStart = m.Index;

779

780

                if (pos < tagStart)

781

                    tokens.Add(new Token(TokenType.Text, text.Substring(pos, tagStart - pos)));

782

783

                tokens.Add(new Token(TokenType.Tag, m.Value));

784

                pos = tagStart + m.Length;

785

786

787

            if (pos < text.Length)

788

                tokens.Add(new Token(TokenType.Text, text.Substring(pos, text.Length - pos)));

789

790

            return tokens;

        private static Regex _anchorRef = new Regex(string.Format(@"

795

            (                               # wrap whole match in $1

796

\[

797

                    ({0})                   # link text = $2

798

\]

799

800

                [ ]?                        # one optional space

801

                (?:\n[ ]*)?                 # one optional newline followed by spaces

802

803

\[

804

                    (.*?)                   # id = $3

805

\]

806

            )", GetNestedBracketsPattern()), RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);

807

808

        private static Regex _anchorInline = new Regex(string.Format(@"

809

                (                           # wrap whole match in $1

810

\[

811

                        ({0})               # link text = $2

812

\]

813

                    \(                      # literal paren

814

                        [ ]*

815

                        ({1})               # href = $3

816

                        [ ]*

817

                        (                   # $4

818

                        (['""])           # quote char = $5

819

                        (.*?)               # title = $6

820

                        \5                  # matching quote

821

                        [ ]*                # ignore any spaces between closing quote and )

822

                        )?                  # title is optional

823

\)

824

                )", GetNestedBracketsPattern(), GetNestedParensPattern()),

825

                  RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);

826

827

        private static Regex _anchorRefShortcut = new Regex(@"

828

            (                               # wrap whole match in $1

829

\[

830

                 ([^\[\]]+)                 # link text = $2; can't contain [ or ]

831

\]

832

            )", RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);

833

834

        /// <summary>

835

        /// Turn Markdown link shortcuts into HTML anchor tags

836

        /// </summary>

837

        /// <remarks>

838

        /// [link text](url "title")

839

        /// [link text][id]

840

        /// [id]

841

        /// </remarks>

842

        private string DoAnchors(string text)

843

844

            // First, handle reference-style links: [link text] [id]

845

            text = _anchorRef.Replace(text, new MatchEvaluator(AnchorRefEvaluator));

846

847

            // Next, inline-style links: [link text](url "optional title") or [link text](url "optional title")

848

            text = _anchorInline.Replace(text, new MatchEvaluator(AnchorInlineEvaluator));

849

850

            //  Last, handle reference-style shortcuts: [link text]

851

            //  These must come last in case you've also got [link test][1]

852

            //  or [link test](/foo)

853

            text = _anchorRefShortcut.Replace(text, new MatchEvaluator(AnchorRefShortcutEvaluator));

854

            return text;

855

856

857

        private string SaveFromAutoLinking(string s)

858

859

            return s.Replace("://", AutoLinkPreventionMarker);

860

861

862

        private string AnchorRefEvaluator(Match match)

863

864

            string wholeMatch = match.Groups[1].Value;

865

            string linkText = SaveFromAutoLinking(match.Groups[2].Value);

866

            string linkID = match.Groups[3].Value.ToLowerInvariant();

867

868

            string result;

869

870

            // for shortcut links like [this][].

871

            if (linkID == "")

872

                linkID = linkText.ToLowerInvariant();

873

874

            if (_urls.ContainsKey(linkID))

875

876

                string url = _urls[linkID];

877

878

                url = EncodeProblemUrlChars(url);

879

                url = EscapeBoldItalic(url);

880

                result = "<a href=\"" + url + "\"";

881

882

                if (_titles.ContainsKey(linkID))

883

884

                    string title = AttributeEncode(_titles[linkID]);

885

                    title = AttributeEncode(EscapeBoldItalic(title));

886

                    result += " title=\"" + title + "\"";

887

888

889

                result += ">" + linkText + "</a>";

890

891

            else

892

                result = wholeMatch;

893

894

            return result;

895

896

897

        private string AnchorRefShortcutEvaluator(Match match)

898

899

            string wholeMatch = match.Groups[1].Value;

900

            string linkText = SaveFromAutoLinking(match.Groups[2].Value);

901

            string linkID = Regex.Replace(linkText.ToLowerInvariant(), @"[ ]*\n[ ]*", " ");  // lower case and remove newlines / extra spaces

902

903

            string result;

904

905

            if (_urls.ContainsKey(linkID))

906

907

                string url = _urls[linkID];

908

909

                url = EncodeProblemUrlChars(url);

910

                url = EscapeBoldItalic(url);

911

                result = "<a href=\"" + url + "\"";

912

913

                if (_titles.ContainsKey(linkID))

914

915

                    string title = AttributeEncode(_titles[linkID]);

916

                    title = EscapeBoldItalic(title);

917

                    result += " title=\"" + title + "\"";

918

919

920

                result += ">" + linkText + "</a>";

921

922

            else

923

                result = wholeMatch;

924

925

            return result;

        private string AnchorInlineEvaluator(Match match)

930

931

            string linkText = SaveFromAutoLinking(match.Groups[2].Value);

932

            string url = match.Groups[3].Value;

933

            string title = match.Groups[6].Value;

934

            string result;

935

936

            url = EncodeProblemUrlChars(url);

937

            url = EscapeBoldItalic(url);

938

            if (url.StartsWith("<") && url.EndsWith(">"))

939

                url = url.Substring(1, url.Length - 2); // remove <>'s surrounding URL, if present

940

941

            result = string.Format("<a href=\"{0}\"", url);

942

943

            if (!String.IsNullOrEmpty(title))

944

945

                title = AttributeEncode(title);

946

                title = EscapeBoldItalic(title);

947

                result += string.Format(" title=\"{0}\"", title);

948

949

950

            result += string.Format(">{0}</a>", linkText);

951

            return result;

952

953

954

        private static Regex _imagesRef = new Regex(@"

955

                    (               # wrap whole match in $1

956

!\[

957

                        (.*?)       # alt text = $2

958

\]

959

960

                    [ ]?            # one optional space

961

                    (?:\n[ ]*)?     # one optional newline followed by spaces

962

963

\[

964

                        (.*?)       # id = $3

965

\]

966

967

                    )", RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);

968

969

        private static Regex _imagesInline = new Regex(String.Format(@"

970

              (                     # wrap whole match in $1

971

!\[

972

                    (.*?)           # alt text = $2

973

\]

974

                \s?                 # one optional whitespace character

975

                \(                  # literal paren

976

                    [ ]*

977

                    ({0})           # href = $3

978

                    [ ]*

979

                    (               # $4

980

                    (['""])       # quote char = $5

981

                    (.*?)           # title = $6

982

                    \5              # matching quote

983

                    [ ]*

984

                    )?              # title is optional

985

\)

986

              )", GetNestedParensPattern()),

987

                  RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);

988

989

        /// <summary>

990

        /// Turn Markdown image shortcuts into HTML img tags.

991

        /// </summary>

992

        /// <remarks>

993

        /// ![alt text][id]

994

        /// ![alt text](url "optional title")

995

        /// </remarks>

996

        private string DoImages(string text)

997

998

            // First, handle reference-style labeled images: ![alt text][id]

999

            text = _imagesRef.Replace(text, new MatchEvaluator(ImageReferenceEvaluator));

1000

1001

            // Next, handle inline images:  ![alt text](url "optional title")

1002

            // Don't forget: encode * and _

1003

            text = _imagesInline.Replace(text, new MatchEvaluator(ImageInlineEvaluator));

1004

1005

            return text;

1006

1007

1008

        // This prevents the creation of horribly broken HTML when some syntax ambiguities

1009

        // collide. It likely still doesn't do what the user meant, but at least we're not

1010

        // outputting garbage.

1011

        private string EscapeImageAltText(string s)

1012

1013

            s = EscapeBoldItalic(s);

1014

            s = Regex.Replace(s, @"[\[\]()]", m => _escapeTable[m.ToString()]);

1015

            return s;

1016

1017

1018

        private string ImageReferenceEvaluator(Match match)

1019

1020

            string wholeMatch = match.Groups[1].Value;

1021

            string altText = match.Groups[2].Value;

1022

            string linkID = match.Groups[3].Value.ToLowerInvariant();

1023

1024

            // for shortcut links like ![this][].

1025

            if (linkID == "")

1026

                linkID = altText.ToLowerInvariant();

1027

1028

            if (_urls.ContainsKey(linkID))

1029

1030

                string url = _urls[linkID];

1031

                string title = null;

1032

1033

                if (_titles.ContainsKey(linkID))

1034

                    title = _titles[linkID];

1035

1036

                return ImageTag(url, altText, title);

1037

1038

            else

1039

1040

                // If there's no such link ID, leave intact:

1041

                return wholeMatch;

        private string ImageInlineEvaluator(Match match)

1046

1047

            string alt = match.Groups[2].Value;

1048

            string url = match.Groups[3].Value;

1049

            string title = match.Groups[6].Value;

1050

1051

            if (url.StartsWith("<") && url.EndsWith(">"))

1052

                url = url.Substring(1, url.Length - 2);    // Remove <>'s surrounding URL, if present

1053

1054

            return ImageTag(url, alt, title);

1055

1056

1057

        private string ImageTag(string url, string altText, string title)

1058

1059

            altText = EscapeImageAltText(AttributeEncode(altText));

1060

            url = EncodeProblemUrlChars(url);

1061

            url = EscapeBoldItalic(url);

1062

            var result = string.Format("<img src=\"{0}\" alt=\"{1}\"", url, altText);

1063

            if (!String.IsNullOrEmpty(title))

1064

1065

                title = AttributeEncode(EscapeBoldItalic(title));

1066

                result += string.Format(" title=\"{0}\"", title);

1067

1068

            result += _emptyElementSuffix;

1069

            return result;

1070

1071

1072

        private static Regex _headerSetext = new Regex(@"

1073

                ^(.+?)

1074

                [ ]*

1075

\n

1076

                (=+|-+)     # $1 = string of ='s or -'s

1077

                [ ]*

1078

                \n+",

1079

            RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);

1080

1081

        private static Regex _headerAtx = new Regex(@"

1082

                ^(\#{1,6})  # $1 = string of #'s

1083

                [ ]*

1084

                (.+?)       # $2 = Header text

1085

                [ ]*

1086

                \#*         # optional closing #'s (not counted)

1087

                \n+",

1088

            RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);

1089

1090

        /// <summary>

1091

        /// Turn Markdown headers into HTML header tags

1092

        /// </summary>

1093

        /// <remarks>

1094

        /// Header 1

1095

        /// ========

1096

///

1097

        /// Header 2

1098

        /// --------

1099

///

1100

        /// # Header 1

1101

        /// ## Header 2

1102

        /// ## Header 2 with closing hashes ##

1103

        /// ...

1104

        /// ###### Header 6

1105

        /// </remarks>

1106

        private string DoHeaders(string text)

1107

1108

            text = _headerSetext.Replace(text, new MatchEvaluator(SetextHeaderEvaluator));

1109

            text = _headerAtx.Replace(text, new MatchEvaluator(AtxHeaderEvaluator));

1110

            return text;

1111

1112

1113

        private string SetextHeaderEvaluator(Match match)

1114

1115

            string header = match.Groups[1].Value;

1116

            int level = match.Groups[2].Value.StartsWith("=") ? 1 : 2;

1117

            return string.Format("<h{1}>{0}</h{1}>\n\n", RunSpanGamut(header), level);

1118

1119

1120

        private string AtxHeaderEvaluator(Match match)

1121

1122

            string header = match.Groups[2].Value;

1123

            int level = match.Groups[1].Value.Length;

1124

            return string.Format("<h{1}>{0}</h{1}>\n\n", RunSpanGamut(header), level);

        private static Regex _horizontalRules = new Regex(@"

1129

            ^[ ]{0,3}         # Leading space

1130

                ([-*_])       # $1: First marker

1131

                (?>           # Repeated marker group

1132

                    [ ]{0,2}  # Zero, one, or two spaces.

1133

                    \1        # Marker character

1134

                ){2,}         # Group repeated at least twice

1135

                [ ]*          # Trailing spaces

1136

                $             # End of line.

1137

            ", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);

1138

1139

        /// <summary>

1140

        /// Turn Markdown horizontal rules into HTML hr tags

1141

        /// </summary>

1142

        /// <remarks>

1143

        /// ***

1144

        /// * * *

1145

        /// ---

1146

        /// - - -

1147

        /// </remarks>

1148

        private string DoHorizontalRules(string text)

1149

1150

            return _horizontalRules.Replace(text, "<hr" + _emptyElementSuffix + "\n");

1151

1152

1153

        private static string _wholeList = string.Format(@"

1154

            (                               # $1 = whole list

1155

              (                             # $2

1156

                [ ]{{0,{1}}}

1157

                ({0})                       # $3 = first list item marker

1158

                [ ]+

1159

1160

              (?s:.+?)

1161

              (                             # $4

1162

\z

1163

1164

                  \n{{2,}}

1165

                  (?=\S)

1166

                  (?!                       # Negative lookahead for another list item marker

1167

                    [ ]*

1168

                    {0}[ ]+

1169

1170

1171

            )", string.Format("(?:{0}|{1})", _markerUL, _markerOL), _tabWidth - 1);

1172

1173

        private static Regex _listNested = new Regex(@"^" + _wholeList,

1174

            RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);

1175

1176

        private static Regex _listTopLevel = new Regex(@"(?:(?<=\n\n)|\A\n?)" + _wholeList,

1177

            RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);

1178

1179

        /// <summary>

1180

        /// Turn Markdown lists into HTML ul and ol and li tags

1181

        /// </summary>

1182

        private string DoLists(string text)

1183

1184

            // We use a different prefix before nested lists than top-level lists.

1185

            // See extended comment in _ProcessListItems().

1186

            if (_listLevel > 0)

1187

                text = _listNested.Replace(text, new MatchEvaluator(ListEvaluator));

1188

            else

1189

                text = _listTopLevel.Replace(text, new MatchEvaluator(ListEvaluator));

1190

1191

            return text;

1192

1193

1194

        private string ListEvaluator(Match match)

1195

1196

            string list = match.Groups[1].Value;

1197

            string listType = Regex.IsMatch(match.Groups[3].Value, _markerUL) ? "ul" : "ol";

1198

            string result;

1199

1200

            result = ProcessListItems(list, listType == "ul" ? _markerUL : _markerOL);

1201

1202

            result = string.Format("<{0}>\n{1}</{0}>\n", listType, result);

1203

            return result;

1204

1205

1206

        /// <summary>

1207

        /// Process the contents of a single ordered or unordered list, splitting it

1208

        /// into individual list items.

1209

        /// </summary>

1210

        private string ProcessListItems(string list, string marker)

1211

1212

            // The listLevel global keeps track of when we're inside a list.

1213

            // Each time we enter a list, we increment it; when we leave a list,

1214

            // we decrement. If it's zero, we're not in a list anymore.

1215

1216

            // We do this because when we're not inside a list, we want to treat

1217

            // something like this:

1218

1219

            //    I recommend upgrading to version

1220

            //    8. Oops, now this line is treated

1221

            //    as a sub-list.

1222

1223

            // As a single paragraph, despite the fact that the second line starts

1224

            // with a digit-period-space sequence.

1225

1226

            // Whereas when we're inside a list (or sub-list), that line will be

1227

            // treated as the start of a sub-list. What a kludge, huh? This is

1228

            // an aspect of Markdown's syntax that's hard to parse perfectly

1229

            // without resorting to mind-reading. Perhaps the solution is to

1230

            // change the syntax rules such that sub-lists must start with a

1231

            // starting cardinal number; e.g. "1." or "a.".

1232

1233

            _listLevel++;

1234

1235

            // Trim trailing blank lines:

1236

            list = Regex.Replace(list, @"\n{2,}\z", "\n");

1237

1238

            string pattern = string.Format(

1239

              @"(^[ ]*)                    # leading whitespace = $1

1240

                ({0}) [ ]+                 # list marker = $2

1241

                ((?s:.+?)                  # list item text = $3

1242

                (\n+))

1243

                (?= (\z | \1 ({0}) [ ]+))", marker);

1244

1245

            bool lastItemHadADoubleNewline = false;

1246

1247

            // has to be a closure, so subsequent invocations can share the bool

1248

            MatchEvaluator ListItemEvaluator = (Match match) =>

1249

1250

                string item = match.Groups[3].Value;

1251

1252

                bool endsWithDoubleNewline = item.EndsWith("\n\n");

1253

                bool containsDoubleNewline = endsWithDoubleNewline || item.Contains("\n\n");

1254

1255

                if (containsDoubleNewline || lastItemHadADoubleNewline)

1256

                    // we could correct any bad indentation here..

1257

                    item = RunBlockGamut(Outdent(item) + "\n", unhash: false);

1258

                else

1259

1260

                    // recursion for sub-lists

1261

                    item = DoLists(Outdent(item));

1262

                    item = item.TrimEnd('\n');

1263

                    item = RunSpanGamut(item);

1264

1265

                lastItemHadADoubleNewline = endsWithDoubleNewline;

1266

                return string.Format("<li>{0}</li>\n", item);

1267

};

1268

1269

            list = Regex.Replace(list, pattern, new MatchEvaluator(ListItemEvaluator),

1270

                                  RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);

1271

            _listLevel--;

1272

            return list;

1273

1274

1275

        private static Regex _codeBlock = new Regex(string.Format(@"

1276

                    (?:\n\n|\A\n?)

1277

                    (                        # $1 = the code block -- one or more lines, starting with a space

1278

(?:

1279

                        (?:[ ]{{{0}}})       # Lines must start with a tab-width of spaces

1280

                        .*\n+

1281

)+

1282

1283

                    ((?=^[ ]{{0,{0}}}[^ \t\n])|\Z) # Lookahead for non-space at line-start, or end of doc",

1284

                    _tabWidth), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);

1285

1286

        /// <summary>

1287

        /// /// Turn Markdown 4-space indented code into HTML pre code blocks

1288

        /// </summary>

1289

        private string DoCodeBlocks(string text)

1290

1291

            text = _codeBlock.Replace(text, new MatchEvaluator(CodeBlockEvaluator));

1292

            return text;

1293

1294

1295

        private string CodeBlockEvaluator(Match match)

1296

1297

            string codeBlock = match.Groups[1].Value;

1298

1299

            codeBlock = EncodeCode(Outdent(codeBlock));

1300

            codeBlock = _newlinesLeadingTrailing.Replace(codeBlock, "");

1301

1302

            return string.Concat("\n\n<pre><code>", codeBlock, "\n</code></pre>\n\n");

1303

1304

1305

        private static Regex _codeSpan = new Regex(@"

1306

                    (?<!\\)   # Character before opening ` can't be a backslash

1307

                    (`+)      # $1 = Opening run of `

1308

                    (.+?)     # $2 = The code block

1309

                    (?<!`)

1310

\1

1311

                    (?!`)", RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);

1312

1313

        /// <summary>

1314

        /// Turn Markdown `code spans` into HTML code tags

1315

        /// </summary>

1316

        private string DoCodeSpans(string text)

1317

1318

            //    * You can use multiple backticks as the delimiters if you want to

1319

            //        include literal backticks in the code span. So, this input:

1320

//

1321

            //        Just type ``foo `bar` baz`` at the prompt.

1322

//

1323

            //        Will translate to:

1324

//

1325

            //          <p>Just type <code>foo `bar` baz</code> at the prompt.</p>

1326

//

1327

            //        There's no arbitrary limit to the number of backticks you

1328

            //        can use as delimters. If you need three consecutive backticks

1329

            //        in your code, use four for delimiters, etc.

1330

//

1331

            //    * You can use spaces to get literal backticks at the edges:

1332

//

1333

            //          ... type `` `bar` `` ...

1334

//

1335

            //        Turns to:

1336

//

1337

            //          ... type <code>`bar`</code> ...

1338

//

1339

1340

            return _codeSpan.Replace(text, new MatchEvaluator(CodeSpanEvaluator));

1341

1342

1343

        private string CodeSpanEvaluator(Match match)

1344

1345

            string span = match.Groups[2].Value;

1346

            span = Regex.Replace(span, @"^[ ]*", ""); // leading whitespace

1347

            span = Regex.Replace(span, @"[ ]*$", ""); // trailing whitespace

1348

            span = EncodeCode(span);

1349

            span = SaveFromAutoLinking(span); // to prevent auto-linking. Not necessary in code *blocks*, but in code spans.

1350

1351

            return string.Concat("<code>", span, "</code>");

        private static Regex _bold = new Regex(@"(\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1",

1356

            RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);

1357

        private static Regex _strictBold = new Regex(@"([\W_]|^) (\*\*|__) (?=\S) ([^\r]*?\S[\*_]*) \2 ([\W_]|$)",

1358

            RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);

1359

1360

        private static Regex _italic = new Regex(@"(\*|_) (?=\S) (.+?) (?<=\S) \1",

1361

            RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);

1362

        private static Regex _strictItalic = new Regex(@"([\W_]|^) (\*|_) (?=\S) ([^\r\*_]*?\S) \2 ([\W_]|$)",

1363

            RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);

1364

1365

        /// <summary>

1366

        /// Turn Markdown *italics* and **bold** into HTML strong and em tags

1367

        /// </summary>

1368

        private string DoItalicsAndBold(string text)

1369

1370

1371

            // <strong> must go first, then <em>

1372

            if (_strictBoldItalic)

1373

1374

                text = _strictBold.Replace(text, "$1<strong>$3</strong>$4");

1375

                text = _strictItalic.Replace(text, "$1<em>$3</em>$4");

1376

1377

            else

1378

1379

                text = _bold.Replace(text, "<strong>$2</strong>");

1380

                text = _italic.Replace(text, "<em>$2</em>");

1381

1382

            return text;

1383

1384

1385

        /// <summary>

1386

        /// Turn markdown line breaks (two space at end of line) into HTML break tags

1387

        /// </summary>

1388

        private string DoHardBreaks(string text)

1389

1390

            if (_autoNewlines)

1391

                text = Regex.Replace(text, @"\n", string.Format("<br{0}\n", _emptyElementSuffix));

1392

            else

1393

                text = Regex.Replace(text, @" {2,}\n", string.Format("<br{0}\n", _emptyElementSuffix));

1394

            return text;

1395

1396

1397

        private static Regex _blockquote = new Regex(@"

1398

            (                           # Wrap whole match in $1

1399

1400

                ^[ ]*>[ ]?              # '>' at the start of a line

1401

                    .+\n                # rest of the first line

1402

                (.+\n)*                 # subsequent consecutive lines

1403

                \n*                     # blanks

1404

)+

1405

            )", RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline | RegexOptions.Compiled);

1406

1407

        /// <summary>

1408

        /// Turn Markdown > quoted blocks into HTML blockquote blocks

1409

        /// </summary>

1410

        private string DoBlockQuotes(string text)

1411

1412

            return _blockquote.Replace(text, new MatchEvaluator(BlockQuoteEvaluator));

1413

1414

1415

        private string BlockQuoteEvaluator(Match match)

1416

1417

            string bq = match.Groups[1].Value;

1418

1419

            bq = Regex.Replace(bq, @"^[ ]*>[ ]?", "", RegexOptions.Multiline);       // trim one level of quoting

1420

            bq = Regex.Replace(bq, @"^[ ]+$", "", RegexOptions.Multiline);           // trim whitespace-only lines

1421

            bq = RunBlockGamut(bq);                                                  // recurse

1422

1423

            bq = Regex.Replace(bq, @"^", "  ", RegexOptions.Multiline);

1424

1425

            // These leading spaces screw with <pre> content, so we need to fix that:

1426

            bq = Regex.Replace(bq, @"(\s*<pre>.+?</pre>)", new MatchEvaluator(BlockQuoteEvaluator2), RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline);

1427

1428

            bq = string.Format("<blockquote>\n{0}\n</blockquote>", bq);

1429

            string key = GetHashKey(bq, isHtmlBlock: true);

1430

            _htmlBlocks[key] = bq;

1431

1432

            return "\n\n" + key + "\n\n";

1433

1434

1435

        private string BlockQuoteEvaluator2(Match match)

1436

1437

            return Regex.Replace(match.Groups[1].Value, @"^  ", "", RegexOptions.Multiline);

1438

1439

1440

        private static Regex _autolinkBare = new Regex(@"(<|="")?\b(https?|ftp)(://[-A-Z0-9+&@#/%?=~_|\[\]\(\)!:,\.;]*[-A-Z0-9+&@#/%=~_|\[\])])(?=$|\W)",

1441

            RegexOptions.IgnoreCase | RegexOptions.Compiled);

1442

1443

        private static string handleTrailingParens(Match match)

1444

1445

            // The first group is essentially a negative lookbehind -- if there's a < or a =", we don't touch this.

1446

            // We're not using a *real* lookbehind, because of links with in links, like <a href="http://web.archive.org/web/20121130000728/http://www.google.com/">

1447

            // With a real lookbehind, the full link would never be matched, and thus the http://www.google.com *would* be matched.

1448

            // With the simulated lookbehind, the full link *is* matched (just not handled, because of this early return), causing

1449

            // the google link to not be matched again.

1450

            if (match.Groups[1].Success)

1451

                return match.Value;

1452

1453

            var protocol = match.Groups[2].Value;

1454

            var link = match.Groups[3].Value;

1455

            if (!link.EndsWith(")"))

1456

                return "<" + protocol + link + ">";

1457

            var level = 0;

1458

            foreach (Match c in Regex.Matches(link, "[()]"))

1459

1460

                if (c.Value == "(")

1461

1462

                    if (level <= 0)

1463

                        level = 1;

1464

                    else

1465

                        level++;

1466

1467

                else

1468

1469

                    level--;

1470

1471

1472

            var tail = "";

1473

            if (level < 0)

1474

1475

                link = Regex.Replace(link, @"\){1," + (-level) + "}$", m => { tail = m.Value; return ""; });

1476

1477

            return "<" + protocol + link + ">" + tail;

1478

1479

1480

        /// <summary>

1481

        /// Turn angle-delimited URLs into HTML anchor tags

1482

        /// </summary>

1483

        /// <remarks>

1484

        /// &lt;http://www.example.com&gt;

1485

        /// </remarks>

1486

        private string DoAutoLinks(string text)

1487

1488

1489

            if (_autoHyperlink)

1490

1491

                // fixup arbitrary URLs by adding Markdown < > so they get linked as well

1492

                // note that at this point, all other URL in the text are already hyperlinked as <a href=""></a>

1493

                // *except* for the <http://www.foo.com> case

1494

                text = _autolinkBare.Replace(text, handleTrailingParens);

1495

1496

1497

            // Hyperlinks: <http://foo.com>

1498

            text = Regex.Replace(text, "<((https?|ftp):[^'\">\\s]+)>", new MatchEvaluator(HyperlinkEvaluator));

1499

1500

            if (_linkEmails)

1501

1502

                // Email addresses: <address@domain.foo>

1503

                string pattern =

1504

@"<

1505

                      (?:mailto:)?

1506

1507

                        [-.\w]+

1508

\@

1509

                        [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+

1510

1511

>";

1512

                text = Regex.Replace(text, pattern, new MatchEvaluator(EmailEvaluator), RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace);

1513

1514

1515

            return text;

1516

1517

1518

        private string HyperlinkEvaluator(Match match)

1519

1520

            string link = match.Groups[1].Value;

1521

            return string.Format("<a href=\"{0}\">{0}</a>", link);

1522

1523

1524

        private string EmailEvaluator(Match match)

1525

1526

            string email = Unescape(match.Groups[1].Value);

1527

1528

//

1529

            //    Input: an email address, e.g. "foo@example.com"

1530

//

1531

            //    Output: the email address as a mailto link, with each character

1532

            //            of the address encoded as either a decimal or hex entity, in

1533

            //            the hopes of foiling most address harvesting spam bots. E.g.:

1534

//

1535

            //      <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;

1536

            //        x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;

1537

            //        &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>

1538

//

1539

            //    Based by a filter by Matthew Wickline, posted to the BBEdit-Talk

1540

            //    mailing list: <http://tinyurl.com/yu7ue>

1541

//

1542

            email = "mailto:" + email;

1543

1544

            // leave ':' alone (to spot mailto: later)

1545

            email = EncodeEmailAddress(email);

1546

1547

            email = string.Format("<a href=\"{0}\">{0}</a>", email);

1548

1549

            // strip the mailto: from the visible part

1550

            email = Regex.Replace(email, "\">.+?:", "\">");

1551

            return email;

        private static Regex _outDent = new Regex(@"^[ ]{1," + _tabWidth + @"}", RegexOptions.Multiline | RegexOptions.Compiled);

1556

1557

        /// <summary>

1558

        /// Remove one level of line-leading spaces

1559

        /// </summary>

1560

        private string Outdent(string block)

1561

1562

            return _outDent.Replace(block, "");

        #region Encoding and Normalization

1567

1568

1569

        /// <summary>

1570

        /// encodes email address randomly

1571

        /// roughly 10% raw, 45% hex, 45% dec

1572

        /// note that @ is always encoded and : never is

1573

        /// </summary>

1574

        private string EncodeEmailAddress(string addr)

1575

1576

            var sb = new StringBuilder(addr.Length * 5);

1577

            var rand = new Random();

1578

            int r;

1579

            foreach (char c in addr)

1580

1581

                r = rand.Next(1, 100);

1582

                if ((r > 90 || c == ':') && c != '@')

1583

                    sb.Append(c);                         // m

1584

                else if (r < 45)

1585

                    sb.AppendFormat("&#x{0:x};", (int)c); // &#x6D

1586

                else

1587

                    sb.AppendFormat("&#{0};", (int)c);    // &#109

1588

1589

            return sb.ToString();

1590

1591

1592

        private static Regex _codeEncoder = new Regex(@"&|<|>|\\|\*|_|\{|\}|\[|\]", RegexOptions.Compiled);

1593

1594

        /// <summary>

1595

        /// Encode/escape certain Markdown characters inside code blocks and spans where they are literals

1596

        /// </summary>

1597

        private string EncodeCode(string code)

1598

1599

            return _codeEncoder.Replace(code, EncodeCodeEvaluator);

1600

1601

        private string EncodeCodeEvaluator(Match match)

1602

1603

            switch (match.Value)

1604

1605

                // Encode all ampersands; HTML entities are not

1606

                // entities within a Markdown code span.

1607

                case "&":

1608

                    return "&amp;";

1609

                // Do the angle bracket song and dance

1610

                case "<":

1611

                    return "&lt;";

1612

                case ">":

1613

                    return "&gt;";

1614

                // escape characters that are magic in Markdown

1615

                default:

1616

                    return _escapeTable[match.Value];

        private static Regex _amps = new Regex(@"&(?!((#[0-9]+)|(#[xX][a-fA-F0-9]+)|([a-zA-Z][a-zA-Z0-9]*));)", RegexOptions.ExplicitCapture | RegexOptions.Compiled);

1622

        private static Regex _angles = new Regex(@"<(?![A-Za-z/?\$!])", RegexOptions.ExplicitCapture | RegexOptions.Compiled);

1623

1624

        /// <summary>

1625

        /// Encode any ampersands (that aren't part of an HTML entity) and left or right angle brackets

1626

        /// </summary>

1627

        private string EncodeAmpsAndAngles(string s)

1628

1629

            s = _amps.Replace(s, "&amp;");

1630

            s = _angles.Replace(s, "&lt;");

1631

            return s;

1632

1633

1634

        private static Regex _backslashEscapes;

1635

1636

        /// <summary>

1637

        /// Encodes any escaped characters such as \`, \*, \[ etc

1638

        /// </summary>

1639

        private string EscapeBackslashes(string s)

1640

1641

            return _backslashEscapes.Replace(s, new MatchEvaluator(EscapeBackslashesEvaluator));

1642

1643

        private string EscapeBackslashesEvaluator(Match match)

1644

1645

            return _backslashEscapeTable[match.Value];

1646

1647

1648

        private static Regex _unescapes = new Regex("\x1A" + "E\\d+E", RegexOptions.Compiled);

1649

1650

        /// <summary>

1651

        /// swap back in all the special characters we've hidden

1652

        /// </summary>

1653

        private string Unescape(string s)

1654

1655

            return _unescapes.Replace(s, new MatchEvaluator(UnescapeEvaluator));

1656

1657

        private string UnescapeEvaluator(Match match)

1658

1659

            return _invertedEscapeTable[match.Value];

        /// <summary>

1664

        /// escapes Bold [ * ] and Italic [ _ ] characters

1665

        /// </summary>

1666

        private string EscapeBoldItalic(string s)

1667

1668

            s = s.Replace("*", _escapeTable["*"]);

1669

            s = s.Replace("_", _escapeTable["_"]);

1670

            return s;

1671

1672

1673

        private static string AttributeEncode(string s)

1674

1675

            return s.Replace(">", "&gt;").Replace("<", "&lt;").Replace("\"", "&quot;");

1676

1677

1678

        private static char[] _problemUrlChars = @"""'*()[]$:".ToCharArray();

1679

1680

        /// <summary>

1681

        /// hex-encodes some unusual "problem" chars in URLs to avoid URL detection problems

1682

        /// </summary>

1683

        private string EncodeProblemUrlChars(string url)

1684

1685

            if (!_encodeProblemUrlCharacters) return url;

1686

1687

            var sb = new StringBuilder(url.Length);

1688

            bool encode;

1689

            char c;

1690

1691

            for (int i = 0; i < url.Length; i++)

1692

1693

                c = url[i];

1694

                encode = Array.IndexOf(_problemUrlChars, c) != -1;

1695

                if (encode && c == ':' && i < url.Length - 1)

1696

                    encode = !(url[i + 1] == '/') && !(url[i + 1] >= '0' && url[i + 1] <= '9');

1697

1698

                if (encode)

1699

                    sb.Append("%" + String.Format("{0:x}", (byte)c));

1700

                else

1701

                    sb.Append(c);

1702

1703

1704

            return sb.ToString();

        /// <summary>

1709

        /// Within tags -- meaning between &lt; and &gt; -- encode [\ ` * _] so they

1710

        /// don't conflict with their use in Markdown for code, italics and strong.

1711

        /// We're replacing each such character with its corresponding hash

1712

        /// value; this is likely overkill, but it should prevent us from colliding

1713

        /// with the escape values by accident.

1714

        /// </summary>

1715

        private string EscapeSpecialCharsWithinTagAttributes(string text)

1716

1717

            var tokens = TokenizeHTML(text);

1718

1719

            // now, rebuild text from the tokens

1720

            var sb = new StringBuilder(text.Length);

1721

1722

            foreach (var token in tokens)

1723

1724

                string value = token.Value;

1725

1726

                if (token.Type == TokenType.Tag)

1727

1728

                    value = value.Replace(@"\", _escapeTable[@"\"]);

1729

1730

                    if (_autoHyperlink && value.StartsWith("<!")) // escape slashes in comments to prevent autolinking there -- http://meta.stackoverflow.com/questions/95987/html-comment-containing-url-breaks-if-followed-by-another-html-comment

1731

                        value = value.Replace("/", _escapeTable["/"]);

1732

1733

                    value = Regex.Replace(value, "(?<=.)</?code>(?=.)", _escapeTable[@"`"]);

1734

                    value = EscapeBoldItalic(value);

1735

1736

1737

                sb.Append(value);

1738

1739

1740

            return sb.ToString();

1741

1742

1743

        /// <summary>

1744

        /// convert all tabs to _tabWidth spaces;

1745

        /// standardizes line endings from DOS (CR LF) or Mac (CR) to UNIX (LF);

1746

        /// makes sure text ends with a couple of newlines;

1747

        /// removes any blank lines (only spaces) in the text

1748

        /// </summary>

1749

        private string Normalize(string text)

1750

1751

            var output = new StringBuilder(text.Length);

1752

            var line = new StringBuilder();

1753

            bool valid = false;

1754

1755

            for (int i = 0; i < text.Length; i++)

1756

1757

                switch (text[i])

1758

1759

                    case '\n':

1760

                        if (valid) output.Append(line);

1761

                        output.Append('\n');

1762

                        line.Length = 0; valid = false;

1763

                        break;

1764

                    case '\r':

1765

                        if ((i < text.Length - 1) && (text[i + 1] != '\n'))

1766

1767

                            if (valid) output.Append(line);

1768

                            output.Append('\n');

1769

                            line.Length = 0; valid = false;

1770

1771

                        break;

1772

                    case '\t':

1773

                        int width = (_tabWidth - line.Length % _tabWidth);

1774

                        for (int k = 0; k < width; k++)

1775

                            line.Append(' ');

1776

                        break;

1777

                    case '\x1A':

1778

                        break;

1779

                    default:

1780

                        if (!valid && text[i] != ' ') valid = true;

1781

                        line.Append(text[i]);

1782

                        break;

            if (valid) output.Append(line);

1787

            output.Append('\n');

1788

1789

            // add two newlines to the end before return

1790

            return output.Append("\n\n").ToString();

1791

1792

1793

        #endregion

1794

1795

        /// <summary>

1796

        /// this is to emulate what's evailable in PHP

1797

        /// </summary>

1798

        private static string RepeatString(string text, int count)

1799

1800

            var sb = new StringBuilder(text.Length * count);

1801

            for (int i = 0; i < count; i++)

1802

                sb.Append(text);

1803

            return sb.ToString();

프로젝트

일반

사용자정보

MARKUS

markus / MarkusAutoUpdate / src / NetSparkle / Libraries / MarkdownSharp.cs @ f2b4c204