/TEST/Program.cs - 이력해설 - MARKUS - 일정관리

787a4489

KangIngu

using iTextSharp.text.pdf;

2

using iTextSharp.text.pdf.parser;

3

using System;

4

using System.Collections.Generic;

5

using System.IO;

6

using System.IO.Compression;

7

using System.Linq;

8

using System.Text;

9

10

namespace TEST

11

12

    public class RectAndText

13

14

        public iTextSharp.text.Rectangle Rect;

15

        public String Text;

16

        public RectAndText(iTextSharp.text.Rectangle rect, String text)

17

18

            this.Rect = rect;

19

            this.Text = text;

20

21

22

    //public class MyLocationTextExtractionStrategy : LocationTextExtractionStrategy

23

//{

24

    //    //Hold each coordinate

25

    //    public List<RectAndText> myPoints = new List<RectAndText>();

26

27

    //    //Automatically called for each chunk of text in the PDF

28

    //    public override void RenderText(TextRenderInfo renderInfo)

29

    //    {

30

    //        base.RenderText(renderInfo);

31

32

    //        //Get the bounding box for the chunk of text

33

    //        var bottomLeft = renderInfo.GetDescentLine().GetStartPoint();

34

    //        var topRight = renderInfo.GetAscentLine().GetEndPoint();

35

36

    //        //Create a rectangle from it

37

    //        var rect = new iTextSharp.text.Rectangle(

38

    //                                                bottomLeft[Vector.I1],

39

    //                                                bottomLeft[Vector.I2],

40

    //                                                topRight[Vector.I1],

41

    //                                                topRight[Vector.I2]

42

    //                                                );

43

44

    //        //Add this to our main collection

45

    //        this.myPoints.Add(new RectAndText(rect, renderInfo.GetText()));

46

    //    }

47

//}

48

49

    public class TopToBottomTextExtractionStrategy : ITextExtractionStrategy //라인에 텍스트 찾는

50

51

52

        private Vector lastStart;

53

        private Vector lastEnd;

54

55

        //Store each line individually. A SortedDictionary will automatically shuffle things around based on the key

56

        public SortedDictionary<int, StringBuilder> results = new SortedDictionary<int, StringBuilder>();

57

58

        //Constructor and some methods that aren't used

59

        public TopToBottomTextExtractionStrategy() { }

60

        public virtual void BeginTextBlock() { }

61

        public virtual void EndTextBlock() { }

62

        public virtual void RenderImage(ImageRenderInfo renderInfo) { }

63

64

        //Convert our lines into a giant block of text

65

        public virtual String GetResultantText()

66

67

            //Buffer

68

            StringBuilder buf = new StringBuilder();

69

            //Loop through each line (which is already sorted top to bottom)

70

            foreach (var s in results)

71

72

                //Append to the buffer

73

                buf.AppendLine(s.Value.ToString());

74

75

            return buf.ToString();

76

77

        public virtual void RenderText(TextRenderInfo renderInfo)

78

79

            bool firstRender = results.Count == 0;

80

81

            LineSegment segment = renderInfo.GetBaseline();

82

            Vector start = segment.GetStartPoint();

83

            Vector end = segment.GetEndPoint();

84

85

            //Use the Y value of the bottom left corner of the text for the key

86

            int currentLineKey = (int)start[1];

87

88

            if (!firstRender)

89

90

                Vector x0 = start;

91

                Vector x1 = lastStart;

92

                Vector x2 = lastEnd;

93

94

                float dist = (x2.Subtract(x1)).Cross((x1.Subtract(x0))).LengthSquared / x2.Subtract(x1).LengthSquared;

95

96

                float sameLineThreshold = 1f;

97

                //If we've detected that we're still on the same

98

                if (dist <= sameLineThreshold)

99

100

                    //Use the previous Y coordinate

101

                    currentLineKey = (int)lastStart[1];

102

103

104

            //Hack: PDFs start with zero at the bottom so our keys will be upside down. Using negative keys cheats this.

105

            currentLineKey = currentLineKey * -1;

106

107

            //If this line hasn't been used before add a new line to our collection

108

            if (!results.ContainsKey(currentLineKey))

109

110

                results.Add(currentLineKey, new StringBuilder());

111

112

113

            //Insert a space between blocks of text if it appears there should be

114

            if (!firstRender &&                                       //First pass never needs a leading space

115

                results[currentLineKey].Length != 0 &&                 //Don't append a space to the begining of a line

116

                !results[currentLineKey].ToString().EndsWith(" ") &&  //Don't append if the current buffer ends in a space already

117

                renderInfo.GetText().Length > 0 &&                    //Don't append if the new next is empty

118

                !renderInfo.GetText().StartsWith(" "))

119

            {              //Don't append if the new text starts with a space

120

                           //Calculate the distance between the two blocks

121

                float spacing = lastEnd.Subtract(start).Length;

122

                //If it "looks" like it should be a space

123

                if (spacing > renderInfo.GetSingleSpaceWidth() / 2f)

124

125

                    //Add a space

126

                    results[currentLineKey].Append(" ");

            //Add the text to the line in our collection

131

            results[currentLineKey].Append(renderInfo.GetText());

132

133

            lastStart = start;

134

            lastEnd = end;

    public class MyLocationTextExtractionStrategy : LocationTextExtractionStrategy

139

140

        //Hold each coordinate

141

        public List<RectAndText> myPoints = new List<RectAndText>();

142

143

        //The string that we're searching for

144

        public String TextToSearchFor { get; set; }

145

146

147

        //How to compare strings

148

        public System.Globalization.CompareOptions CompareOptions { get; set; }

149

150

        public MyLocationTextExtractionStrategy(String textToSearchFor, System.Globalization.CompareOptions compareOptions = System.Globalization.CompareOptions.None)

151

152

            this.TextToSearchFor = textToSearchFor;

153

            this.CompareOptions = compareOptions;

154

155

156

        //Automatically called for each chunk of text in the PDF

157

        public override void RenderText(TextRenderInfo renderInfo)

158

159

            base.RenderText(renderInfo);

160

161

            //if (renderInfo.GetText().Contains("기술들을"))

162

//{

163

164

//}

165

            //See if the current chunk contains the text

166

            var startPosition = System.Globalization.CultureInfo.CurrentCulture.CompareInfo.IndexOf(renderInfo.GetText(), this.TextToSearchFor, this.CompareOptions);

167

168

            //If not found bail

169

            if (startPosition < 0)

170

171

                return;

172

173

174

            //Grab the individual characters

175

            var chars = renderInfo.GetCharacterRenderInfos().Skip(startPosition).Take(this.TextToSearchFor.Length).ToList();

176

177

            //Grab the first and last character

178

            var firstChar = chars.First();

179

            var lastChar = chars.Last();

180

181

182

            //Get the bounding box for the chunk of text

183

            var bottomLeft = firstChar.GetDescentLine().GetStartPoint();

184

            var topRight = lastChar.GetAscentLine().GetEndPoint();

185

186

            //Create a rectangle from it

187

            var rect = new iTextSharp.text.Rectangle(

188

                                                    bottomLeft[Vector.I1],

189

                                                    bottomLeft[Vector.I2],

190

                                                    topRight[Vector.I1],

191

                                                    topRight[Vector.I2]

192

);

193

194

            //Add this to our main collection

195

            this.myPoints.Add(new RectAndText(rect, this.TextToSearchFor));

    class Program

201

202

        public static string Zip(string value)

203

204

            //Transform string into byte[]

205

            byte[] byteArray = new byte[value.Length];

206

            int indexBA = 0;

207

            foreach (char item in value.ToCharArray())

208

209

                byteArray[indexBA++] = (byte)item;

210

211

212

            //Prepare for compress

213

            System.IO.MemoryStream ms = new System.IO.MemoryStream();

214

            System.IO.Compression.GZipStream sw = new System.IO.Compression.GZipStream(ms, System.IO.Compression.CompressionMode.Compress);

215

216

            //Compress

217

            sw.Write(byteArray, 0, byteArray.Length);

218

            //Close, DO NOT FLUSH cause bytes will go missing...

219

            sw.Close();

220

221

            //Transform byte[] zip data to string

222

            byteArray = ms.ToArray();

223

            System.Text.StringBuilder sB = new System.Text.StringBuilder(byteArray.Length);

224

            foreach (byte item in byteArray)

225

226

                sB.Append((char)item);

227

228

            ms.Close();

229

            sw.Dispose();

230

            ms.Dispose();

231

            return sB.ToString();

232

233

234

        public static string UnZip(string value)

235

236

            //Transform string into byte[]

237

            byte[] byteArray = new byte[value.Length];

238

            int indexBA = 0;

239

            foreach (char item in value.ToCharArray())

240

241

                byteArray[indexBA++] = (byte)item;

242

243

244

            //Prepare for decompress

245

            System.IO.MemoryStream ms = new System.IO.MemoryStream(byteArray);

246

            System.IO.Compression.GZipStream sr = new System.IO.Compression.GZipStream(ms,

247

                System.IO.Compression.CompressionMode.Decompress);

248

249

            //Reset variable to collect uncompressed result

250

            byteArray = new byte[byteArray.Length];

251

252

            //Decompress

253

            int rByte = sr.Read(byteArray, 0, byteArray.Length);

254

255

            //Transform byte[] unzip data to string

256

            System.Text.StringBuilder sB = new System.Text.StringBuilder(rByte);

257

            //Read the number of bytes GZipStream red and do not a for each bytes in

258

            //resultByteArray;

259

            for (int i = 0; i < rByte; i++)

260

261

                sB.Append((char)byteArray[i]);

262

263

            sr.Close();

264

            ms.Close();

265

            sr.Dispose();

266

            ms.Dispose();

267

            return sB.ToString();

268

269

270

        static void Main(string[] args)

271

272

            //string rr= @"data:image/gif;base64,R0lGODlhPQBEAPeoAJosM//AwO/AwHVYZ/z595kzAP/s7P+goOXMv8+fhw/v739/f+8PD98fH/8mJl+fn/9ZWb8/PzWlwv///6wWGbImAPgTEMImIN9gUFCEm/gDALULDN8PAD6atYdCTX9gUNKlj8wZAKUsAOzZz+UMAOsJAP/Z2ccMDA8PD/95eX5NWvsJCOVNQPtfX/8zM8+QePLl38MGBr8JCP+zs9myn/8GBqwpAP/GxgwJCPny78lzYLgjAJ8vAP9fX/+MjMUcAN8zM/9wcM8ZGcATEL+QePdZWf/29uc/P9cmJu9MTDImIN+/r7+/vz8/P8VNQGNugV8AAF9fX8swMNgTAFlDOICAgPNSUnNWSMQ5MBAQEJE3QPIGAM9AQMqGcG9vb6MhJsEdGM8vLx8fH98AANIWAMuQeL8fABkTEPPQ0OM5OSYdGFl5jo+Pj/+pqcsTE78wMFNGQLYmID4dGPvd3UBAQJmTkP+8vH9QUK+vr8ZWSHpzcJMmILdwcLOGcHRQUHxwcK9PT9DQ0O/v70w5MLypoG8wKOuwsP/g4P/Q0IcwKEswKMl8aJ9fX2xjdOtGRs/Pz+Dg4GImIP8gIH0sKEAwKKmTiKZ8aB/f39Wsl+LFt8dgUE9PT5x5aHBwcP+AgP+WltdgYMyZfyywz78AAAAAAAD///8AAP9mZv///wAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACH5BAEAAKgALAAAAAA9AEQAAAj/AFEJHEiwoMGDCBMqXMiwocAbBww4nEhxoYkUpzJGrMixogkfGUNqlNixJEIDB0SqHGmyJSojM1bKZOmyop0gM3Oe2liTISKMOoPy7GnwY9CjIYcSRYm0aVKSLmE6nfq05QycVLPuhDrxBlCtYJUqNAq2bNWEBj6ZXRuyxZyDRtqwnXvkhACDV+euTeJm1Ki7A73qNWtFiF+/gA95Gly2CJLDhwEHMOUAAuOpLYDEgBxZ4GRTlC1fDnpkM+fOqD6DDj1aZpITp0dtGCDhr+fVuCu3zlg49ijaokTZTo27uG7Gjn2P+hI8+PDPERoUB318bWbfAJ5sUNFcuGRTYUqV/3ogfXp1rWlMc6awJjiAAd2fm4ogXjz56aypOoIde4OE5u/F9x199dlXnnGiHZWEYbGpsAEA3QXYnHwEFliKAgswgJ8LPeiUXGwedCAKABACCN+EA1pYIIYaFlcDhytd51sGAJbo3onOpajiihlO92KHGaUXGwWjUBChjSPiWJuOO/LYIm4v1tXfE6J4gCSJEZ7YgRYUNrkji9P55sF/ogxw5ZkSqIDaZBV6aSGYq/lGZplndkckZ98xoICbTcIJGQAZcNmdmUc210hs35nCyJ58fgmIKX5RQGOZowxaZwYA+JaoKQwswGijBV4C6SiTUmpphMspJx9unX4KaimjDv9aaXOEBteBqmuuxgEHoLX6Kqx+yXqqBANsgCtit4FWQAEkrNbpq7HSOmtwag5w57GrmlJBASEU18ADjUYb3ADTinIttsgSB1oJFfA63bduimuqKB1keqwUhoCSK374wbujvOSu4QG6UvxBRydcpKsav++Ca6G8A6Pr1x2kVMyHwsVxUALDq/krnrhPSOzXG1lUTIoffqGR7Goi2MAxbv6O2kEG56I7CSlRsEFKFVyovDJoIRTg7sugNRDGqCJzJgcKE0ywc0ELm6KBCCJo8DIPFeCWNGcyqNFE06ToAfV0HBRgxsvLThHn1oddQMrXj5DyAQgjEHSAJMWZwS3HPxT/QMbabI/iBCliMLEJKX2EEkomBAUCxRi42VDADxyTYDVogV+wSChqmKxEKCDAYFDFj4OmwbY7bDGdBhtrnTQYOigeChUmc1K3QTnAUfEgGFgAWt88hKA6aCRIXhxnQ1yg3BCayK44EWdkUQcBByEQChFXfCB776aQsG0BIlQgQgE8qO26X1h8cEUep8ngRBnOy74E9QgRgEAC8SvOfQkh7FDBDmS43PmGoIiKUUEGkMEC/PJHgxw0xH74yx/3XnaYRJgMB8obxQW6kL9QYEJ0FIFgByfIL7/IQAlvQwEpnAC7DtLNJCKUoO/w45c44GwCXiAFB/OXAATQryUxdN4LfFiwgjCNYg+kYMIEFkCKDs6PKAIJouyGWMS1FSKJOMRB/BoIxYJIUXFUxNwoIkEKPAgCBZSQHQ1A2EWDfDEUVLyADj5AChSIQW6gu10bE/JG2VnCZGfo4R4d0sdQoBAHhPjhIB94v/wRoRKQWGRHgrhGSQJxCS+0pCZbEhAAOw==";

273

274

            //var ra = Zip(rr);

275

            //var rb = UnZip(ra);

276

277

            //string filePath = @"E:\sample2.pdf";

278

279

            //Console.WriteLine("입력 : ");

280

            //string embro = Console.ReadLine();

281

            //while(embro!="종료")

282

//{

283

            //    var result = ReadPdfFile(filePath, embro);

284

            //    embro = Console.ReadLine();

285

//}

286

287

            iTextSharp.text.Document doc = new iTextSharp.text.Document(iTextSharp.text.PageSize.A4, 50, 50, 50, 50);

288

            PdfWriter writer = PdfWriter.GetInstance(doc, new FileStream(@"E:\test.pdf", FileMode.OpenOrCreate));

289

            doc.Open();

290

            iTextSharp.text.Font link = iTextSharp.text.FontFactory.GetFont("Arial", 12, iTextSharp.text.Font.UNDERLINE, new iTextSharp.text.BaseColor(0, 0, 255));

291

            iTextSharp.text.Anchor anchor = new iTextSharp.text.Anchor("www.mikesdotnetting.com", link);

292

            anchor.Reference = "http://www.mikesdotnetting.com";

293

            doc.Add(anchor);

294

295

            doc.Close();

296

            Console.WriteLine("Finished...");

297

298

            Console.ReadKey();

        public static List<int> ReadPdfFile(string fileName, string searchText)

304

305

            string currentText = string.Empty;

306

            System.Text.StringBuilder pdfText = new System.Text.StringBuilder();

307

            List<int> pages = new List<int>();

308

            if (File.Exists(fileName))

309

310

                PdfReader pdfReader = new PdfReader(fileName);

311

                for (int page = 1; page <= pdfReader.NumberOfPages; page++)

312

313

                    var t = new MyLocationTextExtractionStrategy(searchText, System.Globalization.CompareOptions.None);

314

315

                    //var t = new TopToBottomTextExtractionStrategy();

316

                    var ex = PdfTextExtractor.GetTextFromPage(pdfReader, page, t);

317

                    //if (ex.Contains(searchText) && searchText.Contains(" "))

318

//{

319

320

//}

321

                    //foreach (var p in t.myPoints)

322

//{

323

                    //    Console.WriteLine(string.Format("Found text {0} at {1}x{2}", p.Text, p.Rect.Left, p.Rect.Bottom));

324

//}

325

326

327

                //pdfReader.Close();

328

                //List<string> lines = new List<string>();

329

                //lines = pdfText.ToString().Trim().Split(' ').ToList();

330

                //List<string> matchedWord = new List<string>();

331

                //foreach (string item in lines)

332

//{

333

                //    if (!string.IsNullOrEmpty(item))

334

                //    {

335

                //        if (item.ToUpper().Contains(searthText.ToUpper()))

336

                //        {

337

                //            matchedWord.Add(item);

338

                //        }

339

                //    }

340

//}

341

342

            return pages;

343

344

345

프로젝트

일반

사용자정보

MARKUS

markus / TEST / Program.cs @ eb2b9248