Re: [iText-questions] Problem reading the content from the PDF created by ITextSharp

Mark Storer Mon, 15 Mar 2010 12:18:42 -0700

Can you read the PDF from disk?  It's possible you're running across a
situation where you generate a 0-byte PDF (throwing an exception for
example).


Ah!  Or, you could be trying to check the contents of the file before
anything has been written to it.  Call close() BEFORE you call
ExtractTextString().

--Mark Storer
  Senior Software Engineer
  Cardiff.com
 
#include <disclaimer>
typedef std::disclaimer<Cardiff> Discard;

> -----Original Message-----
> From: ericvaleyev [mailto:[email protected]]
> Sent: Saturday, March 13, 2010 5:43 PM
> To: [email protected]
> Subject: [iText-questions] Problem reading the content from the PDF
> created by ITextSharp
> 
> 
> Hi All,
> I've created an output PDF file using PdfWriter. The output PDF comes
out
> no
> problem. However, when I try to read the newly created PDF using
> ExtractTextPDFBytes it returns an empty string.
> Does anybody know why it happens?
> The function ExtractTextString will read any PDF, but not the one
produced
> by ITextSharp.
> 
> Below is the code I wrote. Please help me figure out where's the
problem.
> Txs. Eric
> 
> using System;
> 
> using System.Collections.Generic;
> 
> using System.ComponentModel;
> 
> using System.Data;
> 
> using System.Drawing;
> 
> using System.Linq;
> 
> using System.Text;
> 
> using System.Windows.Forms;
> 
> using iTextSharp;
> 
> using iTextSharp.text;
> 
> using iTextSharp.text.pdf;
> 
> using iTextSharp.text.xml;
> 
> using System.IO;
> 
> 
> 
> namespace PDFStamping
> 
> {
> 
>     public partial class Form1 : Form
> 
>     {
> 
>         public Form1()
> 
>         {
> 
>             InitializeComponent();
> 
>         }
> 
>         //Sample form with the button btnStamp
> 
>         private void btnStamp_Click(object sender, EventArgs e)
> 
>         {
>             string inputFilePath = "C:\\Temp\\Test_PDF.pdf";
>             string outputFilePath = "C:\\Temp\\Test_PDF_output.pdf";
> 
>             stampPDF(inputFilePath, outputFilePath);
> 
> 
> 
>         }
> 
>         //Main routine which creates an output PDF and show its
content
> 
>         private void stampPDF(string inputFile, string outputFile)
> 
>         {
> 
>             PdfImportedPage page = null;
> 
>             Int32 pageCount = 0; //The total of pages PDF
> 
>             Int32 pageNumber = 0; //The indicator of the current page
> 
> 
> 
>             //Read PDF
> 
>             PdfReader reader = new PdfReader(inputFile);
> 
>             Int32 intNoOfPages = reader.NumberOfPages;
> 
> 
> 
>             //Set Stamper
> 
>             Document pdfDoc = new
> Document(reader.GetPageSizeWithRotation(1));
> 
>             PdfWriter writer = PdfWriter.GetInstance(pdfDoc, new
> FileStream(outputFile, FileMode.Create));
> 
>             writer.SetPdfVersion(PdfWriter.PDF_VERSION_1_2);
> 
>             //Start Stamp
> 
>             pdfDoc.Open();
> 
> 
> 
>             PdfContentByte cb = writer.DirectContent;
> 
> 
> 
>             pageCount = reader.NumberOfPages;
> 
>             pageNumber = 1;
> 
>             for (int i = 0; i < pageCount; i++)
> 
>             {
> 
> 
> pdfDoc.SetPageSize(reader.GetPageSizeWithRotation(pageNumber));
> 
>                 pdfDoc.NewPage();
> 
>                 page = writer.GetImportedPage(reader, pageNumber);
> 
>                 cb.AddTemplate(page, 1.04F, 0, 0, 1.04F, 0, 0);
> 
>                 pageNumber++;
> 
>             }
> 
>             string strRet = ExtractTextString(outputFile, 1);
> 
>             MessageBox.Show("Start PDF content" + "\n" + strRet + "\n"
+
> "End PDF content");
> 
>             pdfDoc.Close();
> 
>         }
> 
> 
> 
>         #region ExtractTextString
> 
>         /// <summary>
> 
>         /// Extracts the text from a PDF file.
> 
>         /// </summary>
> 
>         /// the full path to the pdf file.
> 
>         /// <returns>the extracted text</returns>
> 
>         public string ExtractTextString(string inFileName, Int32
intPage)
> 
>         {
> 
>             string outFile = null;
> 
>             try
> 
>             {
> 
>                 // Create a reader for the given PDF file
> 
>                 PdfReader reader = new PdfReader(inFileName);
> 
>                 outFile +=
> (ExtractTextPDFBytes(reader.GetPageContent(intPage)) + " ");
> 
>                 return outFile;
> 
>             }
> 
>             catch
> 
>             {
> 
>                 return "";
> 
>             }
> 
>         }
> 
>         #endregion
> 
> 
> 
>         #region ExtractTextPDFBytes
> 
>         /// <summary>
> 
>         /// This method processes an uncompressed Adobe (text) object
> 
>         /// and extracts text.
> 
>         /// </summary>
> 
>         /// uncompressed
> 
>         /// <returns></returns>
> 
>         private string ExtractTextPDFBytes(byte[] input)
> 
>         {
> 
>             bool blnStartBracket = false;
> 
>             bool blnStartWrite = false;
> 
> 
> 
>             if (input == null || input.Length == 0) return "";
> 
> 
> 
>             try
> 
>             {
> 
>                 string resultString = "";
> 
> 
> 
>                 for (int i = 0; i < input.Length; i++)
> 
>                 {
> 
>                     char c = (char)input[i];
> 
> 
> 
>                     if (((c >= ' ') && (c <= '~')) || ((c >= 128) &&
(c <
> 255)))
> 
>                     {
> 
>                         if (blnStartBracket == true)
> 
>                             blnStartWrite = true;
> 
> 
> 
>                         if (c.ToString() == "(")
> 
>                         {
> 
>                             blnStartBracket = true;
> 
>                         }
> 
>                         else if (c.ToString() == ")")
> 
>                         {
> 
>                             blnStartBracket = false;
> 
>                             blnStartWrite = false;
> 
>                         }
> 
> 
> 
>                         if (blnStartWrite == true)
> 
>                         {
> 
>                             resultString += c.ToString();
> 
>                         }
> 
>                     }
> 
>                 }
> 
> 
> 
>                 return resultString;
> 
>             }
> 
>             catch
> 
>             {
> 
>                 return "";
> 
>             }
> 
>         }
> 
>         #endregion
> 
>     }
> 
> }
> --
> View this message in context:
http://old.nabble.com/Problem-reading-the-
> content-from-the-PDF-created-by-ITextSharp-tp27892267p27892267.html
> Sent from the iText - General mailing list archive at Nabble.com.
> 
> 
>
------------------------------------------------------------------------
--
> ----
> Download Intel&#174; Parallel Studio Eval
> Try the new software tools for yourself. Speed compiling, find bugs
> proactively, and fine-tune applications for parallel performance.
> See why Intel Parallel Studio got high marks during beta.
> http://p.sf.net/sfu/intel-sw-dev
> _______________________________________________
> iText-questions mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/itext-questions
> 
> Buy the iText book: http://www.1t3xt.com/docs/book.php
> Check the site with examples before you ask questions:
> http://www.1t3xt.info/examples/
> You can also search the keywords list:
> http://1t3xt.info/tutorials/keywords/
> 
> 
> No virus found in this incoming message.
> Checked by AVG - www.avg.com
> Version: 9.0.733 / Virus Database: 271.1.1/2732 - Release Date:
03/15/10
> 00:33:00

------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
_______________________________________________
iText-questions mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/itext-questions

Buy the iText book: http://www.1t3xt.com/docs/book.php
Check the site with examples before you ask questions: 
http://www.1t3xt.info/examples/
You can also search the keywords list: http://1t3xt.info/tutorials/keywords/

Re: [iText-questions] Problem reading the content from the PDF created by ITextSharp

Reply via email to