Author: tpalsulich
Date: Tue Oct 14 19:56:52 2014
New Revision: 1631859

URL: http://svn.apache.org/r1631859
Log:
OODT-630 et al. Upgrade Tika to version 1.6.

Added:
    oodt/trunk/metadata/src/test/resources/
    oodt/trunk/metadata/src/test/resources/copyandrewrite.test.conf
    oodt/trunk/metadata/src/test/resources/extern-config.xml
    oodt/trunk/metadata/src/test/resources/met_extr_preconditions.xml
    oodt/trunk/metadata/src/test/resources/product-type-patterns-2.xml
    oodt/trunk/metadata/src/test/resources/product-type-patterns.xml
    oodt/trunk/metadata/src/test/resources/samplemet.xml
    oodt/trunk/metadata/src/test/resources/testExtractor   (with props)
    oodt/trunk/metadata/src/test/resources/testfile.txt
    oodt/trunk/metadata/src/test/resources/testfile2.txt
    oodt/trunk/metadata/src/test/resources/testfile2.txt.met
    oodt/trunk/metadata/src/test/resources/tika-mimetypes.xml
Removed:
    oodt/trunk/metadata/src/testdata/
Modified:
    oodt/trunk/CHANGES.txt
    oodt/trunk/core/pom.xml
    oodt/trunk/curator/pom.xml
    
oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/structs/Reference.java
    oodt/trunk/grid/web-grid.iml
    oodt/trunk/metadata/pom.xml
    
oodt/trunk/metadata/src/main/java/org/apache/oodt/cas/metadata/util/MimeTypeUtils.java
    oodt/trunk/metadata/src/main/resources/tika-mimetypes.xml
    
oodt/trunk/metadata/src/test/org/apache/oodt/cas/metadata/MetadataTestCase.java
    
oodt/trunk/metadata/src/test/org/apache/oodt/cas/metadata/extractors/TestAbstractSAXConfigReader.java
    
oodt/trunk/metadata/src/test/org/apache/oodt/cas/metadata/extractors/TestCopyAndRewriteExtractor.java
    
oodt/trunk/metadata/src/test/org/apache/oodt/cas/metadata/extractors/TestExternMetExtractor.java
    
oodt/trunk/metadata/src/test/org/apache/oodt/cas/metadata/extractors/TestExternMetExtractorConfigReader.java
    
oodt/trunk/metadata/src/test/org/apache/oodt/cas/metadata/extractors/TestMetReader.java
    
oodt/trunk/metadata/src/test/org/apache/oodt/cas/metadata/extractors/TestProdTypePatternMetExtractor.java
    
oodt/trunk/metadata/src/test/org/apache/oodt/cas/metadata/preconditions/TestPreCondEvalUtils.java
    oodt/trunk/mvn/plugins/cas-install/maven-cas-install-plugin.iml
    oodt/trunk/product/pom.xml
    
oodt/trunk/product/src/main/java/org/apache/oodt/product/handlers/ofsn/OFSNFileHandler.java
    oodt/trunk/protocol/http/pom.xml
    oodt/trunk/protocol/imaps/pom.xml
    oodt/trunk/webapp/fmprod/pom.xml
    oodt/trunk/xmlquery/oodt-xmlquery.iml

Modified: oodt/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/oodt/trunk/CHANGES.txt?rev=1631859&r1=1631858&r2=1631859&view=diff
==============================================================================
--- oodt/trunk/CHANGES.txt (original)
+++ oodt/trunk/CHANGES.txt Tue Oct 14 19:56:52 2014
@@ -3,6 +3,8 @@ Apache OODT Change Log
 
 Release 0.8 - Current Development
 
+* OODT-385, OODT-630, OODT-631, OODT-632. Upgraded Tika to version 1.6.
+
 * OODT-757 Fixed PGETaskInstance bug that prevented instantiation of 
AutoDetectProductCrawler (luca)
 
 * OODT-756 HttpClient NoClassDefFoundError For the url-downloader Script 
(Mengying Wang via mattmann)

Modified: oodt/trunk/core/pom.xml
URL: 
http://svn.apache.org/viewvc/oodt/trunk/core/pom.xml?rev=1631859&r1=1631858&r2=1631859&view=diff
==============================================================================
--- oodt/trunk/core/pom.xml (original)
+++ oodt/trunk/core/pom.xml Tue Oct 14 19:56:52 2014
@@ -323,6 +323,20 @@ the License.
             </plugin>
         </plugins>
     </build>
+    <dependencyManagement>
+        <dependencies>
+            <dependency>
+                <groupId>org.apache.tika</groupId>
+                <artifactId>tika-core</artifactId>
+                <version>1.6</version>
+            </dependency>
+            <dependency>
+                <groupId>org.apache.tika</groupId>
+                <artifactId>tika-parsers</artifactId>
+                <version>1.6</version>
+            </dependency>
+        </dependencies>
+    </dependencyManagement>
     <profiles>
         <profile>
             <id>audit</id>

Modified: oodt/trunk/curator/pom.xml
URL: 
http://svn.apache.org/viewvc/oodt/trunk/curator/pom.xml?rev=1631859&r1=1631858&r2=1631859&view=diff
==============================================================================
--- oodt/trunk/curator/pom.xml (original)
+++ oodt/trunk/curator/pom.xml Tue Oct 14 19:56:52 2014
@@ -23,7 +23,6 @@ the License.
     <version>0.8-SNAPSHOT</version>
     <relativePath>../core/pom.xml</relativePath>
   </parent>
-  <groupId>org.apache.oodt</groupId>
   <artifactId>cas-curator</artifactId>
   <packaging>war</packaging>
   <name>CAS Curation Interface</name>

Modified: 
oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/structs/Reference.java
URL: 
http://svn.apache.org/viewvc/oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/structs/Reference.java?rev=1631859&r1=1631858&r2=1631859&view=diff
==============================================================================
--- 
oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/structs/Reference.java
 (original)
+++ 
oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/structs/Reference.java
 Tue Oct 14 19:56:52 2014
@@ -20,11 +20,13 @@ package org.apache.oodt.cas.filemgr.stru
 //JDK imports
 import java.io.File;
 import java.io.FileInputStream;
+import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
 
 //OODT imports
 import org.apache.oodt.cas.metadata.util.PathUtils;
+import org.apache.tika.Tika;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.mime.MimeType;
 import org.apache.tika.mime.MimeTypeException;
@@ -112,9 +114,8 @@ public class Reference {
         // ourselves to determine the which MimeType class to associate
         // with this reference.
         try {
-            this.mimeType = mimeTypeRepository
-                    .getMimeType(new URL(origRef));
-        } catch (MalformedURLException e) {
+            this.mimeType = mimeTypeRepository.forName(new 
Tika().detect(origRef));
+        } catch (MimeTypeException e) {
             e.printStackTrace();
         }
 

Modified: oodt/trunk/grid/web-grid.iml
URL: 
http://svn.apache.org/viewvc/oodt/trunk/grid/web-grid.iml?rev=1631859&r1=1631858&r2=1631859&view=diff
==============================================================================
--- oodt/trunk/grid/web-grid.iml (original)
+++ oodt/trunk/grid/web-grid.iml Tue Oct 14 19:56:52 2014
@@ -25,6 +25,8 @@
     </content>
     <orderEntry type="inheritedJdk" />
     <orderEntry type="sourceFolder" forTests="false" />
+    <orderEntry type="library" name="Maven: org.apache.tika:tika-core:0.8" 
level="project" />
+    <orderEntry type="library" name="Maven: joda-time:joda-time:2.3" 
level="project" />
     <orderEntry type="module" module-name="oodt-commons" />
     <orderEntry type="library" name="Maven: commons-dbcp:commons-dbcp:1.2.1" 
level="project" />
     <orderEntry type="library" name="Maven: 
commons-collections:commons-collections:2.1" level="project" />
@@ -56,10 +58,10 @@
     <orderEntry type="module" module-name="pcs-input" />
     <orderEntry type="library" name="Maven: commons-io:commons-io:1.4" 
level="project" />
     <orderEntry type="library" name="Maven: commons-codec:commons-codec:1.3" 
level="project" />
-    <orderEntry type="library" name="Maven: org.apache.tika:tika-core:0.8" 
level="project" />
-    <orderEntry type="module" module-name="cas-cli" />
+    <orderEntry type="library" name="Maven: org.apache.tika:tika-core:1.6" 
level="project" />
     <orderEntry type="library" name="Maven: com.google.guava:guava:10.0.1" 
level="project" />
     <orderEntry type="library" name="Maven: 
com.google.code.findbugs:jsr305:1.3.9" level="project" />
+    <orderEntry type="module" module-name="cas-cli" />
     <orderEntry type="library" name="Maven: 
org.springframework:spring-expression:3.0.5.RELEASE" level="project" />
     <orderEntry type="library" name="Maven: 
commons-httpclient:commons-httpclient:3.0-alpha1" level="project" />
     <orderEntry type="library" name="Maven: commons-cli:commons-cli:1.2" 
level="project" />
@@ -87,7 +89,7 @@
     <orderEntry type="library" name="Maven: 
com.fasterxml.jackson.core:jackson-core:2.1.1" level="project" />
     <orderEntry type="library" name="Maven: 
com.fasterxml.jackson.core:jackson-databind:2.1.1" level="project" />
     <orderEntry type="library" name="Maven: 
com.fasterxml.jackson.core:jackson-annotations:2.1.1" level="project" />
-    <orderEntry type="library" name="Maven: joda-time:joda-time:2.3" 
level="project" />
+    <orderEntry type="library" name="Maven: joda-time:joda-time:2.5" 
level="project" />
     <orderEntry type="library" name="Maven: com.hp.hpl.jena:jena:2.6.3" 
level="project" />
     <orderEntry type="library" name="Maven: com.hp.hpl.jena:iri:0.8" 
level="project" />
     <orderEntry type="library" name="Maven: com.ibm.icu:icu4j:3.4.4" 
level="project" />

Modified: oodt/trunk/metadata/pom.xml
URL: 
http://svn.apache.org/viewvc/oodt/trunk/metadata/pom.xml?rev=1631859&r1=1631858&r2=1631859&view=diff
==============================================================================
--- oodt/trunk/metadata/pom.xml (original)
+++ oodt/trunk/metadata/pom.xml Tue Oct 14 19:56:52 2014
@@ -51,25 +51,6 @@ the License.
         </includes>
       </resource>
     </resources>
-    <testResources>
-      <testResource>
-        <targetPath>org/apache/oodt/cas/metadata</targetPath>
-        <directory>${basedir}/src/testdata</directory>
-        <includes>
-             <include>copyandrewrite.test.conf</include>
-             <include>extern-config.xml</include>
-             <include>met_extr_preconditions.xml</include>
-             <include>product-type-patterns.xml</include>
-             <include>product-type-patterns-2.xml</include>
-             <include>samplemet.xml</include>
-             <include>testExtractor</include>
-             <include>testfile.txt</include>
-             <include>testfile2.txt.met</include>
-             <include>testfile2.txt</include>
-             <include>tika-mimetypes.xml</include>
-        </includes>
-      </testResource>
-    </testResources>
     <plugins>
       <plugin>
         <artifactId>maven-surefire-plugin</artifactId>
@@ -162,7 +143,6 @@ the License.
     <dependency>
       <groupId>org.apache.tika</groupId>
       <artifactId>tika-core</artifactId>
-      <version>0.8</version>
     </dependency>
     <dependency>
       <groupId>org.springframework</groupId>

Modified: 
oodt/trunk/metadata/src/main/java/org/apache/oodt/cas/metadata/util/MimeTypeUtils.java
URL: 
http://svn.apache.org/viewvc/oodt/trunk/metadata/src/main/java/org/apache/oodt/cas/metadata/util/MimeTypeUtils.java?rev=1631859&r1=1631858&r2=1631859&view=diff
==============================================================================
--- 
oodt/trunk/metadata/src/main/java/org/apache/oodt/cas/metadata/util/MimeTypeUtils.java
 (original)
+++ 
oodt/trunk/metadata/src/main/java/org/apache/oodt/cas/metadata/util/MimeTypeUtils.java
 Tue Oct 14 19:56:52 2014
@@ -29,6 +29,7 @@ import java.util.logging.Level;
 import java.util.logging.Logger;
 
 //APACHE imports
+import org.apache.tika.Tika;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.mime.MimeType;
 import org.apache.tika.mime.MimeTypeException;
@@ -53,6 +54,8 @@ public final class MimeTypeUtils {
     /* our Tika mime type registry */
     private MimeTypes mimeTypes;
 
+    private Tika tika = new Tika();
+
     /* whether or not magic should be employed or not */
     private boolean mimeMagic;
 
@@ -171,11 +174,14 @@ public final class MimeTypeUtils {
 
         // if returned null, or if it's the default type then try url 
resolution
         if (type == null
-                || (type != null && 
type.getName().equals(MimeTypes.OCTET_STREAM))) {
+                || (type.getName().equals(MimeTypes.OCTET_STREAM))) {
             // If no mime-type header, or cannot find a corresponding 
registered
             // mime-type, then guess a mime-type from the url pattern
-            type = this.mimeTypes.getMimeType(url) != null ? this.mimeTypes
-                    .getMimeType(url) : type;
+            try {
+                type = mimeTypes.forName(tika.detect(url)) != null ? 
mimeTypes.forName(tika.detect(url)) : type;
+            } catch (Exception e) {
+                // MimeTypeException or IOException from tika.detect. Ignore.
+            }
         }
 
         // if magic is enabled use mime magic to guess if the mime type 
returned
@@ -185,7 +191,12 @@ public final class MimeTypeUtils {
         // type
         // returned by the magic
         if (this.mimeMagic) {
-            MimeType magicType = this.mimeTypes.getMimeType(data);
+            MimeType magicType;
+            try {
+                magicType =  mimeTypes.forName(tika.detect(data));
+            } catch (Exception e) {
+                magicType = null;
+            }
             if (magicType != null
                     && !magicType.getName().equals(MimeTypes.OCTET_STREAM)
                     && type != null
@@ -212,8 +223,8 @@ public final class MimeTypeUtils {
 
     /**
      * Facade interface to Tika's underlying
-     * {@link MimeTypes#getMimeType(String)} method.
-     * 
+     * {@link tika.detect(String)} method.
+     *
      * @param url
      *            A string representation of the document {@link URL} to sense
      *            the {@link MimeType} for.
@@ -221,17 +232,17 @@ public final class MimeTypeUtils {
      *         Document url in string form.
      */
     public String getMimeType(URL url) {
-       MimeType mimeType = this.mimeTypes.getMimeType(url);
-       if (mimeType != null)
-               return mimeType.getName();
-       else
-               return null;
+        try {
+           return tika.detect(url);
+        } catch (Exception e) {
+            return null;
+        }
     }
 
     /**
-     * A facade interface to Tika's underlying {@link 
MimeTypes#forName(String)}
+     * A facade interface to Tika's underlying {@link 
org.apache.tika.tika.detect(String)}
      * method.
-     * 
+     *
      * @param name
      *            The name of a valid {@link MimeType} in the Tika mime
      *            registry.
@@ -239,46 +250,49 @@ public final class MimeTypeUtils {
      *         or null otherwise.
      */
     public String getMimeType(String name) {
-       MimeType mimeType = this.mimeTypes.getMimeType(name);
-       if (mimeType != null)
-               return mimeType.getName();
-       else
-               return null;
+        try {
+            return tika.detect(name);
+        } catch (Exception e) {
+            e.printStackTrace();
+            return null;
+        }
     }
 
     /**
-     * Facade interface to Tika's underlying {@link 
MimeTypes#getMimeType(File)}
+     * Facade interface to Tika's underlying {@link 
org.apache.tika.Tika#detect(File)}
      * method.
-     * 
+     *
      * @param f
      *            The {@link File} to sense the {@link MimeType} for.
      * @return The {@link MimeType} of the given {@link File}, or null if it
      *         cannot be determined.
      */
     public String getMimeType(File f) {
-       MimeType mimeType = this.mimeTypes.getMimeType(f);
-       if (mimeType != null)
-               return mimeType.getName();
-       else
-               return null;
+        try {
+            return tika.detect(f);
+        } catch (Exception e) {
+            System.err.println("\n\n\n");
+            e.printStackTrace();
+            System.err.println("\n\n\n");
+            return null;
+        }
     }
 
     /**
      * Utility method to act as a facade to
      * {@link MimeTypes#getMimeType(byte[])}.
-     * 
+     *
      * @param data
      *            The byte data to get the {@link MimeType} for.
      * @return The String representation of the resolved {@link MimeType}, or
      *         null if a suitable {@link MimeType} is not found.
      */
     public String getMimeTypeByMagic(byte[] data) {
-        MimeType type = this.mimeTypes.getMimeType(data);
-        if (type != null) {
-            return type.getName();
-        } else
+        try {
+            return tika.detect(data);
+        } catch (Exception e) {
             return null;
-
+        }
     }
     
     public String getDescriptionForMimeType(String mimeType) {


Reply via email to