Author: jukka
Date: Thu Sep 24 08:48:03 2009
New Revision: 818405
URL: http://svn.apache.org/viewvc?rev=818405&view=rev
Log:
TIKA-158: Upgrade to Apache PDFBox
Modified:
lucene/tika/trunk/CHANGES.txt
lucene/tika/trunk/tika-parsers/pom.xml
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
Modified: lucene/tika/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/CHANGES.txt?rev=818405&r1=818404&r2=818405&view=diff
==============================================================================
--- lucene/tika/trunk/CHANGES.txt (original)
+++ lucene/tika/trunk/CHANGES.txt Thu Sep 24 08:48:03 2009
@@ -4,7 +4,10 @@
Release 0.5 - Current Development
------------------------
+The most notable changes in Tika 0.5 over the previous release are:
+ * Tika now uses the Apache PDFBox version 0.8.0-incubating for parsing PDF
+ documents. This version is much the 0.7.3 release used earlier. (TIKA-158)
Release 0.4 - 07/14/2009
------------------------
@@ -122,7 +125,7 @@
Andrzej Rusin
Chris A. Mattmann
Dave Meikle
- Georger Araújo
+ Georger Ara�jo
Guillermo Arribas
Jonathan Koren
Jukka Zitting
@@ -130,7 +133,7 @@
Kumar Raja Jana
Paul Borgermans
Peter Becker
- Sébastien Michel
+ S�bastien Michel
Uwe Schindler
See http://tinyurl.com/tika-0-3-contributions for more details on
Modified: lucene/tika/trunk/tika-parsers/pom.xml
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/pom.xml?rev=818405&r1=818404&r2=818405&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/pom.xml (original)
+++ lucene/tika/trunk/tika-parsers/pom.xml Thu Sep 24 08:48:03 2009
@@ -53,9 +53,9 @@
<version>1.0</version>
</dependency>
<dependency>
- <groupId>pdfbox</groupId>
+ <groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
- <version>0.7.3</version>
+ <version>0.8.0-incubating</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java?rev=818405&r1=818404&r2=818405&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
Thu Sep 24 08:48:03 2009
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -18,14 +18,14 @@
import java.io.IOException;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.util.PDFTextStripper;
+import org.apache.pdfbox.util.TextPosition;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.IOExceptionWithCause;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.sax.XHTMLContentHandler;
-import org.pdfbox.pdmodel.PDDocument;
-import org.pdfbox.pdmodel.PDPage;
-import org.pdfbox.util.PDFTextStripper;
-import org.pdfbox.util.TextPosition;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java?rev=818405&r1=818404&r2=818405&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
Thu Sep 24 08:48:03 2009
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -22,12 +22,11 @@
import java.util.Collections;
import java.util.Map;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.Parser;
-
-import org.pdfbox.pdmodel.PDDocument;
-import org.pdfbox.pdmodel.PDDocumentInformation;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;