Author: tpalsulich
Date: Tue Oct 14 19:56:52 2014
New Revision: 1631859
URL: http://svn.apache.org/r1631859
Log:
OODT-630 et al. Upgrade Tika to version 1.6.
Added:
oodt/trunk/metadata/src/test/resources/
oodt/trunk/metadata/src/test/resources/copyandrewrite.test.conf
oodt/trunk/metadata/src/test/resources/extern-config.xml
oodt/trunk/metadata/src/test/resources/met_extr_preconditions.xml
oodt/trunk/metadata/src/test/resources/product-type-patterns-2.xml
oodt/trunk/metadata/src/test/resources/product-type-patterns.xml
oodt/trunk/metadata/src/test/resources/samplemet.xml
oodt/trunk/metadata/src/test/resources/testExtractor (with props)
oodt/trunk/metadata/src/test/resources/testfile.txt
oodt/trunk/metadata/src/test/resources/testfile2.txt
oodt/trunk/metadata/src/test/resources/testfile2.txt.met
oodt/trunk/metadata/src/test/resources/tika-mimetypes.xml
Removed:
oodt/trunk/metadata/src/testdata/
Modified:
oodt/trunk/CHANGES.txt
oodt/trunk/core/pom.xml
oodt/trunk/curator/pom.xml
oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/structs/Reference.java
oodt/trunk/grid/web-grid.iml
oodt/trunk/metadata/pom.xml
oodt/trunk/metadata/src/main/java/org/apache/oodt/cas/metadata/util/MimeTypeUtils.java
oodt/trunk/metadata/src/main/resources/tika-mimetypes.xml
oodt/trunk/metadata/src/test/org/apache/oodt/cas/metadata/MetadataTestCase.java
oodt/trunk/metadata/src/test/org/apache/oodt/cas/metadata/extractors/TestAbstractSAXConfigReader.java
oodt/trunk/metadata/src/test/org/apache/oodt/cas/metadata/extractors/TestCopyAndRewriteExtractor.java
oodt/trunk/metadata/src/test/org/apache/oodt/cas/metadata/extractors/TestExternMetExtractor.java
oodt/trunk/metadata/src/test/org/apache/oodt/cas/metadata/extractors/TestExternMetExtractorConfigReader.java
oodt/trunk/metadata/src/test/org/apache/oodt/cas/metadata/extractors/TestMetReader.java
oodt/trunk/metadata/src/test/org/apache/oodt/cas/metadata/extractors/TestProdTypePatternMetExtractor.java
oodt/trunk/metadata/src/test/org/apache/oodt/cas/metadata/preconditions/TestPreCondEvalUtils.java
oodt/trunk/mvn/plugins/cas-install/maven-cas-install-plugin.iml
oodt/trunk/product/pom.xml
oodt/trunk/product/src/main/java/org/apache/oodt/product/handlers/ofsn/OFSNFileHandler.java
oodt/trunk/protocol/http/pom.xml
oodt/trunk/protocol/imaps/pom.xml
oodt/trunk/webapp/fmprod/pom.xml
oodt/trunk/xmlquery/oodt-xmlquery.iml
Modified: oodt/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/oodt/trunk/CHANGES.txt?rev=1631859&r1=1631858&r2=1631859&view=diff
==============================================================================
--- oodt/trunk/CHANGES.txt (original)
+++ oodt/trunk/CHANGES.txt Tue Oct 14 19:56:52 2014
@@ -3,6 +3,8 @@ Apache OODT Change Log
Release 0.8 - Current Development
+* OODT-385, OODT-630, OODT-631, OODT-632. Upgraded Tika to version 1.6.
+
* OODT-757 Fixed PGETaskInstance bug that prevented instantiation of
AutoDetectProductCrawler (luca)
* OODT-756 HttpClient NoClassDefFoundError For the url-downloader Script
(Mengying Wang via mattmann)
Modified: oodt/trunk/core/pom.xml
URL:
http://svn.apache.org/viewvc/oodt/trunk/core/pom.xml?rev=1631859&r1=1631858&r2=1631859&view=diff
==============================================================================
--- oodt/trunk/core/pom.xml (original)
+++ oodt/trunk/core/pom.xml Tue Oct 14 19:56:52 2014
@@ -323,6 +323,20 @@ the License.
</plugin>
</plugins>
</build>
+ <dependencyManagement>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-core</artifactId>
+ <version>1.6</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-parsers</artifactId>
+ <version>1.6</version>
+ </dependency>
+ </dependencies>
+ </dependencyManagement>
<profiles>
<profile>
<id>audit</id>
Modified: oodt/trunk/curator/pom.xml
URL:
http://svn.apache.org/viewvc/oodt/trunk/curator/pom.xml?rev=1631859&r1=1631858&r2=1631859&view=diff
==============================================================================
--- oodt/trunk/curator/pom.xml (original)
+++ oodt/trunk/curator/pom.xml Tue Oct 14 19:56:52 2014
@@ -23,7 +23,6 @@ the License.
<version>0.8-SNAPSHOT</version>
<relativePath>../core/pom.xml</relativePath>
</parent>
- <groupId>org.apache.oodt</groupId>
<artifactId>cas-curator</artifactId>
<packaging>war</packaging>
<name>CAS Curation Interface</name>
Modified:
oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/structs/Reference.java
URL:
http://svn.apache.org/viewvc/oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/structs/Reference.java?rev=1631859&r1=1631858&r2=1631859&view=diff
==============================================================================
---
oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/structs/Reference.java
(original)
+++
oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/structs/Reference.java
Tue Oct 14 19:56:52 2014
@@ -20,11 +20,13 @@ package org.apache.oodt.cas.filemgr.stru
//JDK imports
import java.io.File;
import java.io.FileInputStream;
+import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
//OODT imports
import org.apache.oodt.cas.metadata.util.PathUtils;
+import org.apache.tika.Tika;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.mime.MimeType;
import org.apache.tika.mime.MimeTypeException;
@@ -112,9 +114,8 @@ public class Reference {
// ourselves to determine the which MimeType class to associate
// with this reference.
try {
- this.mimeType = mimeTypeRepository
- .getMimeType(new URL(origRef));
- } catch (MalformedURLException e) {
+ this.mimeType = mimeTypeRepository.forName(new
Tika().detect(origRef));
+ } catch (MimeTypeException e) {
e.printStackTrace();
}
Modified: oodt/trunk/grid/web-grid.iml
URL:
http://svn.apache.org/viewvc/oodt/trunk/grid/web-grid.iml?rev=1631859&r1=1631858&r2=1631859&view=diff
==============================================================================
--- oodt/trunk/grid/web-grid.iml (original)
+++ oodt/trunk/grid/web-grid.iml Tue Oct 14 19:56:52 2014
@@ -25,6 +25,8 @@
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
+ <orderEntry type="library" name="Maven: org.apache.tika:tika-core:0.8"
level="project" />
+ <orderEntry type="library" name="Maven: joda-time:joda-time:2.3"
level="project" />
<orderEntry type="module" module-name="oodt-commons" />
<orderEntry type="library" name="Maven: commons-dbcp:commons-dbcp:1.2.1"
level="project" />
<orderEntry type="library" name="Maven:
commons-collections:commons-collections:2.1" level="project" />
@@ -56,10 +58,10 @@
<orderEntry type="module" module-name="pcs-input" />
<orderEntry type="library" name="Maven: commons-io:commons-io:1.4"
level="project" />
<orderEntry type="library" name="Maven: commons-codec:commons-codec:1.3"
level="project" />
- <orderEntry type="library" name="Maven: org.apache.tika:tika-core:0.8"
level="project" />
- <orderEntry type="module" module-name="cas-cli" />
+ <orderEntry type="library" name="Maven: org.apache.tika:tika-core:1.6"
level="project" />
<orderEntry type="library" name="Maven: com.google.guava:guava:10.0.1"
level="project" />
<orderEntry type="library" name="Maven:
com.google.code.findbugs:jsr305:1.3.9" level="project" />
+ <orderEntry type="module" module-name="cas-cli" />
<orderEntry type="library" name="Maven:
org.springframework:spring-expression:3.0.5.RELEASE" level="project" />
<orderEntry type="library" name="Maven:
commons-httpclient:commons-httpclient:3.0-alpha1" level="project" />
<orderEntry type="library" name="Maven: commons-cli:commons-cli:1.2"
level="project" />
@@ -87,7 +89,7 @@
<orderEntry type="library" name="Maven:
com.fasterxml.jackson.core:jackson-core:2.1.1" level="project" />
<orderEntry type="library" name="Maven:
com.fasterxml.jackson.core:jackson-databind:2.1.1" level="project" />
<orderEntry type="library" name="Maven:
com.fasterxml.jackson.core:jackson-annotations:2.1.1" level="project" />
- <orderEntry type="library" name="Maven: joda-time:joda-time:2.3"
level="project" />
+ <orderEntry type="library" name="Maven: joda-time:joda-time:2.5"
level="project" />
<orderEntry type="library" name="Maven: com.hp.hpl.jena:jena:2.6.3"
level="project" />
<orderEntry type="library" name="Maven: com.hp.hpl.jena:iri:0.8"
level="project" />
<orderEntry type="library" name="Maven: com.ibm.icu:icu4j:3.4.4"
level="project" />
Modified: oodt/trunk/metadata/pom.xml
URL:
http://svn.apache.org/viewvc/oodt/trunk/metadata/pom.xml?rev=1631859&r1=1631858&r2=1631859&view=diff
==============================================================================
--- oodt/trunk/metadata/pom.xml (original)
+++ oodt/trunk/metadata/pom.xml Tue Oct 14 19:56:52 2014
@@ -51,25 +51,6 @@ the License.
</includes>
</resource>
</resources>
- <testResources>
- <testResource>
- <targetPath>org/apache/oodt/cas/metadata</targetPath>
- <directory>${basedir}/src/testdata</directory>
- <includes>
- <include>copyandrewrite.test.conf</include>
- <include>extern-config.xml</include>
- <include>met_extr_preconditions.xml</include>
- <include>product-type-patterns.xml</include>
- <include>product-type-patterns-2.xml</include>
- <include>samplemet.xml</include>
- <include>testExtractor</include>
- <include>testfile.txt</include>
- <include>testfile2.txt.met</include>
- <include>testfile2.txt</include>
- <include>tika-mimetypes.xml</include>
- </includes>
- </testResource>
- </testResources>
<plugins>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
@@ -162,7 +143,6 @@ the License.
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
- <version>0.8</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
Modified:
oodt/trunk/metadata/src/main/java/org/apache/oodt/cas/metadata/util/MimeTypeUtils.java
URL:
http://svn.apache.org/viewvc/oodt/trunk/metadata/src/main/java/org/apache/oodt/cas/metadata/util/MimeTypeUtils.java?rev=1631859&r1=1631858&r2=1631859&view=diff
==============================================================================
---
oodt/trunk/metadata/src/main/java/org/apache/oodt/cas/metadata/util/MimeTypeUtils.java
(original)
+++
oodt/trunk/metadata/src/main/java/org/apache/oodt/cas/metadata/util/MimeTypeUtils.java
Tue Oct 14 19:56:52 2014
@@ -29,6 +29,7 @@ import java.util.logging.Level;
import java.util.logging.Logger;
//APACHE imports
+import org.apache.tika.Tika;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MimeType;
import org.apache.tika.mime.MimeTypeException;
@@ -53,6 +54,8 @@ public final class MimeTypeUtils {
/* our Tika mime type registry */
private MimeTypes mimeTypes;
+ private Tika tika = new Tika();
+
/* whether or not magic should be employed or not */
private boolean mimeMagic;
@@ -171,11 +174,14 @@ public final class MimeTypeUtils {
// if returned null, or if it's the default type then try url
resolution
if (type == null
- || (type != null &&
type.getName().equals(MimeTypes.OCTET_STREAM))) {
+ || (type.getName().equals(MimeTypes.OCTET_STREAM))) {
// If no mime-type header, or cannot find a corresponding
registered
// mime-type, then guess a mime-type from the url pattern
- type = this.mimeTypes.getMimeType(url) != null ? this.mimeTypes
- .getMimeType(url) : type;
+ try {
+ type = mimeTypes.forName(tika.detect(url)) != null ?
mimeTypes.forName(tika.detect(url)) : type;
+ } catch (Exception e) {
+ // MimeTypeException or IOException from tika.detect. Ignore.
+ }
}
// if magic is enabled use mime magic to guess if the mime type
returned
@@ -185,7 +191,12 @@ public final class MimeTypeUtils {
// type
// returned by the magic
if (this.mimeMagic) {
- MimeType magicType = this.mimeTypes.getMimeType(data);
+ MimeType magicType;
+ try {
+ magicType = mimeTypes.forName(tika.detect(data));
+ } catch (Exception e) {
+ magicType = null;
+ }
if (magicType != null
&& !magicType.getName().equals(MimeTypes.OCTET_STREAM)
&& type != null
@@ -212,8 +223,8 @@ public final class MimeTypeUtils {
/**
* Facade interface to Tika's underlying
- * {@link MimeTypes#getMimeType(String)} method.
- *
+ * {@link tika.detect(String)} method.
+ *
* @param url
* A string representation of the document {@link URL} to sense
* the {@link MimeType} for.
@@ -221,17 +232,17 @@ public final class MimeTypeUtils {
* Document url in string form.
*/
public String getMimeType(URL url) {
- MimeType mimeType = this.mimeTypes.getMimeType(url);
- if (mimeType != null)
- return mimeType.getName();
- else
- return null;
+ try {
+ return tika.detect(url);
+ } catch (Exception e) {
+ return null;
+ }
}
/**
- * A facade interface to Tika's underlying {@link
MimeTypes#forName(String)}
+ * A facade interface to Tika's underlying {@link
org.apache.tika.tika.detect(String)}
* method.
- *
+ *
* @param name
* The name of a valid {@link MimeType} in the Tika mime
* registry.
@@ -239,46 +250,49 @@ public final class MimeTypeUtils {
* or null otherwise.
*/
public String getMimeType(String name) {
- MimeType mimeType = this.mimeTypes.getMimeType(name);
- if (mimeType != null)
- return mimeType.getName();
- else
- return null;
+ try {
+ return tika.detect(name);
+ } catch (Exception e) {
+ e.printStackTrace();
+ return null;
+ }
}
/**
- * Facade interface to Tika's underlying {@link
MimeTypes#getMimeType(File)}
+ * Facade interface to Tika's underlying {@link
org.apache.tika.Tika#detect(File)}
* method.
- *
+ *
* @param f
* The {@link File} to sense the {@link MimeType} for.
* @return The {@link MimeType} of the given {@link File}, or null if it
* cannot be determined.
*/
public String getMimeType(File f) {
- MimeType mimeType = this.mimeTypes.getMimeType(f);
- if (mimeType != null)
- return mimeType.getName();
- else
- return null;
+ try {
+ return tika.detect(f);
+ } catch (Exception e) {
+ System.err.println("\n\n\n");
+ e.printStackTrace();
+ System.err.println("\n\n\n");
+ return null;
+ }
}
/**
* Utility method to act as a facade to
* {@link MimeTypes#getMimeType(byte[])}.
- *
+ *
* @param data
* The byte data to get the {@link MimeType} for.
* @return The String representation of the resolved {@link MimeType}, or
* null if a suitable {@link MimeType} is not found.
*/
public String getMimeTypeByMagic(byte[] data) {
- MimeType type = this.mimeTypes.getMimeType(data);
- if (type != null) {
- return type.getName();
- } else
+ try {
+ return tika.detect(data);
+ } catch (Exception e) {
return null;
-
+ }
}
public String getDescriptionForMimeType(String mimeType) {