[iText-questions] Problem reading the content from the PDF created by ITextSharp

ericvaleyev Sat, 13 Mar 2010 17:43:32 -0800

Hi All,
I've created an output PDF file using PdfWriter. The output PDF comes out no
problem. However, when I try to read the newly created PDF using
ExtractTextPDFBytes it returns an empty string.
Does anybody know why it happens? 
The function ExtractTextString will read any PDF, but not the one produced
by ITextSharp.


Below is the code I wrote. Please help me figure out where's the problem.
Txs. Eric

using System;

using System.Collections.Generic;

using System.ComponentModel;

using System.Data;

using System.Drawing;

using System.Linq;

using System.Text;

using System.Windows.Forms;

using iTextSharp;

using iTextSharp.text;

using iTextSharp.text.pdf;

using iTextSharp.text.xml;

using System.IO;

 

namespace PDFStamping

{

    public partial class Form1 : Form

    {

        public Form1()

        {

            InitializeComponent();

        }

        //Sample form with the button btnStamp

        private void btnStamp_Click(object sender, EventArgs e)

        {
            string inputFilePath = "C:\\Temp\\Test_PDF.pdf";
            string outputFilePath = "C:\\Temp\\Test_PDF_output.pdf";

            stampPDF(inputFilePath, outputFilePath);

     

        }

        //Main routine which creates an output PDF and show its content 

        private void stampPDF(string inputFile, string outputFile)

        {

            PdfImportedPage page = null;

            Int32 pageCount = 0; //The total of pages PDF

            Int32 pageNumber = 0; //The indicator of the current page

 

            //Read PDF

            PdfReader reader = new PdfReader(inputFile);

            Int32 intNoOfPages = reader.NumberOfPages;

 

            //Set Stamper                          

            Document pdfDoc = new
Document(reader.GetPageSizeWithRotation(1));

            PdfWriter writer = PdfWriter.GetInstance(pdfDoc, new
FileStream(outputFile, FileMode.Create));

            writer.SetPdfVersion(PdfWriter.PDF_VERSION_1_2);

            //Start Stamp

            pdfDoc.Open();

 

            PdfContentByte cb = writer.DirectContent;

 

            pageCount = reader.NumberOfPages;

            pageNumber = 1;

            for (int i = 0; i < pageCount; i++)

            {

               
pdfDoc.SetPageSize(reader.GetPageSizeWithRotation(pageNumber));

                pdfDoc.NewPage();

                page = writer.GetImportedPage(reader, pageNumber);

                cb.AddTemplate(page, 1.04F, 0, 0, 1.04F, 0, 0);

                pageNumber++;

            }

            string strRet = ExtractTextString(outputFile, 1);

            MessageBox.Show("Start PDF content" + "\n" + strRet + "\n" +
"End PDF content");

            pdfDoc.Close();

        }

 

        #region ExtractTextString

        /// <summary>

        /// Extracts the text from a PDF file.

        /// </summary>

        /// the full path to the pdf file.

        /// <returns>the extracted text</returns>

        public string ExtractTextString(string inFileName, Int32 intPage)

        {

            string outFile = null;

            try

            {

                // Create a reader for the given PDF file

                PdfReader reader = new PdfReader(inFileName);

                outFile +=
(ExtractTextPDFBytes(reader.GetPageContent(intPage)) + " ");

                return outFile;

            }

            catch

            {

                return "";

            }

        }

        #endregion

 

        #region ExtractTextPDFBytes

        /// <summary>

        /// This method processes an uncompressed Adobe (text) object

        /// and extracts text.

        /// </summary>

        /// uncompressed

        /// <returns></returns>

        private string ExtractTextPDFBytes(byte[] input)

        {

            bool blnStartBracket = false;

            bool blnStartWrite = false;

 

            if (input == null || input.Length == 0) return "";

 

            try

            {

                string resultString = "";

 

                for (int i = 0; i < input.Length; i++)

                {

                    char c = (char)input[i];

 

                    if (((c >= ' ') && (c <= '~')) || ((c >= 128) && (c <
255)))

                    {

                        if (blnStartBracket == true)

                            blnStartWrite = true;

 

                        if (c.ToString() == "(")

                        {

                            blnStartBracket = true;

                        }

                        else if (c.ToString() == ")")

                        {

                            blnStartBracket = false;

                            blnStartWrite = false;

                        }

 

                        if (blnStartWrite == true)

                        {

                            resultString += c.ToString();

                        }

                    }

                }

 

                return resultString;

            }

            catch

            {

                return "";

            }

        }

        #endregion

    }

}
-- 
View this message in context: 
http://old.nabble.com/Problem-reading-the-content-from-the-PDF-created-by-ITextSharp-tp27892267p27892267.html
Sent from the iText - General mailing list archive at Nabble.com.


------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
_______________________________________________
iText-questions mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/itext-questions

Buy the iText book: http://www.1t3xt.com/docs/book.php
Check the site with examples before you ask questions: 
http://www.1t3xt.info/examples/
You can also search the keywords list: http://1t3xt.info/tutorials/keywords/

[iText-questions] Problem reading the content from the PDF created by ITextSharp

Reply via email to