shuber 2004/09/28 15:23:03 CEST
Modified files: (Branch: JAHIA-4-0-BRANCH)
metadata/jbproject/jahia Jahia Libs.library
src/java/org/jahia/utils/fileparsers PDFExtractor.java
Log:
Cleanup of PDF extraction code & small optimization.
Revision Changes Path
No
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/.diff?r1=.-1&r2=No&f=h
No
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/.diff?r1=.-1&r2=No&f=h
1.3.2.2 +4 -5 jahia/src/java/org/jahia/utils/fileparsers/PDFExtractor.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/src/java/org/jahia/utils/fileparsers/PDFExtractor.java.diff?r1=1.3.2.1&r2=1.3.2.2&f=h
Index: PDFExtractor.java
===================================================================
RCS file:
/home/cvs/repository/jahia/src/java/org/jahia/utils/fileparsers/Attic/PDFExtractor.java,v
retrieving revision 1.3.2.1
retrieving revision 1.3.2.2
diff -u -r1.3.2.1 -r1.3.2.2
--- PDFExtractor.java 23 Sep 2004 16:28:07 -0000 1.3.2.1
+++ PDFExtractor.java 28 Sep 2004 13:23:03 -0000 1.3.2.2
@@ -10,6 +10,7 @@
import org.pdfbox.pdfparser.PDFParser;
import org.pdfbox.util.PDFTextStripper;
import org.pdfbox.util.*;
+import org.pdfbox.pdmodel.PDDocument;
/**
* Created by IntelliJ IDEA.
@@ -102,18 +103,16 @@
public Reader getPDFReader(InputStream fileStream,
String charSet) throws IOException {
try {
- PDFParser pdfParser = new PDFParser(fileStream);
+ BufferedInputStream bufFileStream = new BufferedInputStream(fileStream);
+ PDFParser pdfParser = new PDFParser(bufFileStream);
pdfParser.parse();
- COSDocument pdfDocument = pdfParser.getDocument();
+ PDDocument pdfDocument = pdfParser.getPDDocument();
if(pdfDocument.isEncrypted()) {
DecryptDocument decryptor = new DecryptDocument(pdfDocument);
decryptor.decryptDocument("");
}
- ByteArrayOutputStream out = new ByteArrayOutputStream();
PDFTextStripper stripper = new PDFTextStripper();
- stripper.writeText(pdfDocument, new OutputStreamWriter(out));
String docText = stripper.getText(pdfDocument);
- //byte contents[] = out.toByteArray();
byte contents[] = docText.getBytes(charSet);
try {