Author: jukka
Date: Thu Sep 24 08:48:03 2009
New Revision: 818405

URL: http://svn.apache.org/viewvc?rev=818405&view=rev
Log:
TIKA-158: Upgrade to Apache PDFBox

Modified:
    lucene/tika/trunk/CHANGES.txt
    lucene/tika/trunk/tika-parsers/pom.xml
    
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
    
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java

Modified: lucene/tika/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/CHANGES.txt?rev=818405&r1=818404&r2=818405&view=diff
==============================================================================
--- lucene/tika/trunk/CHANGES.txt (original)
+++ lucene/tika/trunk/CHANGES.txt Thu Sep 24 08:48:03 2009
@@ -4,7 +4,10 @@
 Release 0.5 - Current Development
 ------------------------
 
+The most notable changes in Tika 0.5 over the previous release are:
 
+ * Tika now uses the Apache PDFBox version 0.8.0-incubating for parsing PDF
+   documents. This version is much the 0.7.3 release used earlier. (TIKA-158)
 
 Release 0.4 - 07/14/2009
 ------------------------
@@ -122,7 +125,7 @@
     Andrzej Rusin
     Chris A. Mattmann
     Dave Meikle
-    Georger Araújo
+    Georger Ara�jo
     Guillermo Arribas
     Jonathan Koren
     Jukka Zitting
@@ -130,7 +133,7 @@
     Kumar Raja Jana
     Paul Borgermans
     Peter Becker
-    Sébastien Michel
+    S�bastien Michel
     Uwe Schindler
 
 See http://tinyurl.com/tika-0-3-contributions for more details on

Modified: lucene/tika/trunk/tika-parsers/pom.xml
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/pom.xml?rev=818405&r1=818404&r2=818405&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/pom.xml (original)
+++ lucene/tika/trunk/tika-parsers/pom.xml Thu Sep 24 08:48:03 2009
@@ -53,9 +53,9 @@
       <version>1.0</version>
     </dependency>
     <dependency>
-      <groupId>pdfbox</groupId>
+      <groupId>org.apache.pdfbox</groupId>
       <artifactId>pdfbox</artifactId>
-      <version>0.7.3</version>
+      <version>0.8.0-incubating</version>
     </dependency>
     <dependency>
       <groupId>org.apache.poi</groupId>

Modified: 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java?rev=818405&r1=818404&r2=818405&view=diff
==============================================================================
--- 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
 (original)
+++ 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
 Thu Sep 24 08:48:03 2009
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -18,14 +18,14 @@
 
 import java.io.IOException;
 
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.util.PDFTextStripper;
+import org.apache.pdfbox.util.TextPosition;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.IOExceptionWithCause;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.sax.XHTMLContentHandler;
-import org.pdfbox.pdmodel.PDDocument;
-import org.pdfbox.pdmodel.PDPage;
-import org.pdfbox.util.PDFTextStripper;
-import org.pdfbox.util.TextPosition;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 

Modified: 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java?rev=818405&r1=818404&r2=818405&view=diff
==============================================================================
--- 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
 (original)
+++ 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
 Thu Sep 24 08:48:03 2009
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -22,12 +22,11 @@
 import java.util.Collections;
 import java.util.Map;
 
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDDocumentInformation;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.Parser;
-
-import org.pdfbox.pdmodel.PDDocument;
-import org.pdfbox.pdmodel.PDDocumentInformation;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 


Reply via email to