Natarajan.T wrote:
FYI,
I am using PDFBox.jar to Convert PDF to Text.
Problem is in the runtime its printing lot of object messages
How can I avoid this one??? How can I go with this one.
import java.io.InputStream; import java.io.BufferedWriter; import java.io.IOException;
import org.pdfbox.util.PDFTextStripper; import org.pdfbox.pdfparser.PDFParser; import org.pdfbox.pdmodel.PDDocument; import org.pdfbox.pdmodel.PDDocumentInformation;
/** * @author natarajant * * TODO To change the template for this generated type comment go to * Window - Preferences - Java - Code Generation - Code and Comments */ public class PDFConverter extends DocumentConverter{
public PDFConverter() { }
/** * This method will construct the Lucene document object from the * given information by extracting the text from PDF file. * * @param reader and writer - InputStream and BufferedWriter * @return true or false i.e. extract the text or not */ public boolean extractText(InputStream reader, BufferedWriter writer) throws IOException{
PDFParser parser = null; PDDocument pdDoc = null; PDFTextStripper stripper = null; String pdftext = ""; String pdftitle = ""; try { parser = new PDFParser(reader); parser.parse(); pdDoc = parser.getPDDocument();
stripper = new PDFTextStripper(); pdftext = stripper.getText(pdDoc);
writer.write(pdftext +" ");
PDDocumentInformation info = pdDoc.getDocumentInformation(); pdftitle = info.getTitle();
} catch(Exception err) {
System.out.println(err.getMessage());
change this to return false;
}
writer.close();
return true;
}
finally { // close all open resources }
}
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]
