Author: mattmann
Date: Fri Jul 16 22:55:48 2010
New Revision: 964970
URL: http://svn.apache.org/viewvc?rev=964970&view=rev
Log:
- OODT-15 WIP: fix problems with extractors in metadata; start to fix and
enable unit tests that were disabled per kelly
Added:
incubator/oodt/trunk/metadata/src/testdata/
incubator/oodt/trunk/metadata/src/testdata/met_extr_preconditions.xml
incubator/oodt/trunk/metadata/src/testdata/tika-mimetypes.xml
Modified:
incubator/oodt/trunk/metadata/pom.xml
incubator/oodt/trunk/metadata/src/main/java/org/apache/oodt/cas/metadata/extractors/CmdLineMetExtractor.java
incubator/oodt/trunk/metadata/src/main/java/org/apache/oodt/cas/metadata/extractors/CopyAndRewriteExtractor.java
incubator/oodt/trunk/metadata/src/main/java/org/apache/oodt/cas/metadata/util/MimeTypeUtils.java
incubator/oodt/trunk/metadata/src/test/org/apache/oodt/cas/metadata/preconditions/TestPreCondEvalUtils.java
Modified: incubator/oodt/trunk/metadata/pom.xml
URL:
http://svn.apache.org/viewvc/incubator/oodt/trunk/metadata/pom.xml?rev=964970&r1=964969&r2=964970&view=diff
==============================================================================
--- incubator/oodt/trunk/metadata/pom.xml (original)
+++ incubator/oodt/trunk/metadata/pom.xml Fri Jul 16 22:55:48 2010
@@ -57,6 +57,16 @@ the License.
</includes>
</resource>
</resources>
+ <testResources>
+ <testResource>
+ <targetPath>org/apache/oodt/cas/metadata/preconditions</targetPath>
+ <directory>${basedir}/src/testdata</directory>
+ <includes>
+ <include>met_extr_preconditions.xml</include>
+ <include>tika-mimetypes.xml</include>
+ </includes>
+ </testResource>
+ </testResources>
<plugins>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
@@ -73,7 +83,6 @@ the License.
</includes>
<excludes>
<!-- FIXME: These all assume $CWD, but should use
getResourceAsStream instead. Later. -->
-
<exclude>org/apache/oodt/cas/metadata/preconditions/TestPreCondEvalUtils.java</exclude>
<exclude>org/apache/oodt/cas/metadata/extractors/TestMetReader.java</exclude>
<exclude>org/apache/oodt/cas/metadata/extractors/TestExternMetExtractor.java</exclude>
<exclude>org/apache/oodt/cas/metadata/extractors/TestCopyAndRewriteExtractor.java</exclude>
@@ -109,9 +118,9 @@ the License.
<version>1.3</version>
</dependency>
<dependency>
- <groupId>apache</groupId>
+ <groupId>org.apache.tika</groupId>
<artifactId>tika</artifactId>
- <version>0.2-fork</version>
+ <version>0.3</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
Modified:
incubator/oodt/trunk/metadata/src/main/java/org/apache/oodt/cas/metadata/extractors/CmdLineMetExtractor.java
URL:
http://svn.apache.org/viewvc/incubator/oodt/trunk/metadata/src/main/java/org/apache/oodt/cas/metadata/extractors/CmdLineMetExtractor.java?rev=964970&r1=964969&r2=964970&view=diff
==============================================================================
---
incubator/oodt/trunk/metadata/src/main/java/org/apache/oodt/cas/metadata/extractors/CmdLineMetExtractor.java
(original)
+++
incubator/oodt/trunk/metadata/src/main/java/org/apache/oodt/cas/metadata/extractors/CmdLineMetExtractor.java
Fri Jul 16 22:55:48 2010
@@ -29,6 +29,7 @@ import java.util.logging.Level;
//OODT imports
import org.apache.oodt.cas.metadata.AbstractMetExtractor;
import org.apache.oodt.cas.metadata.MetExtractorConfigReader;
+import org.apache.oodt.cas.metadata.SerializableMetadata;
import org.apache.oodt.cas.metadata.Metadata;
import org.apache.oodt.commons.exec.EnvUtilities;
import org.apache.oodt.commons.xml.XMLUtils;
@@ -46,81 +47,82 @@ import org.apache.oodt.commons.xml.XMLUt
*/
public abstract class CmdLineMetExtractor extends AbstractMetExtractor {
- public CmdLineMetExtractor(MetExtractorConfigReader reader) {
- super(reader);
- }
-
- protected static void processMain(String[] args,
- CmdLineMetExtractor extractor) throws Exception {
- String usage = "Usage: " + extractor.getClass().getName()
- + " <file> <configfile>";
- String extractFilePath = null, configFilePath = null;
-
- if (args.length < 2) {
- System.err.println(usage);
- System.exit(1);
- }
-
- extractFilePath = args[0].replaceAll("\\\\", "");
- configFilePath = args[1];
-
- Metadata met = extractor.extractMetadata(new File(extractFilePath),
- configFilePath);
- XMLUtils.writeXmlToStream(met.toXML(),
- getMetFileOutputStream(extractFilePath));
- }
-
- protected static void processMain(String[] args,
- CmdLineMetExtractor extractor, OutputStream os) throws Exception {
- String usage = "Usage: " + extractor.getClass().getName()
- + " <file> <configfile>";
- String extractFilePath = null, configFilePath = null;
-
- if (args.length < 2) {
- System.err.println(usage);
- System.exit(1);
- }
-
- extractFilePath = args[0].replaceAll("\\\\", "");
- configFilePath = args[1];
-
- Metadata met = extractor.extractMetadata(new File(extractFilePath),
- configFilePath);
- XMLUtils.writeXmlToStream(met.toXML(), os);
-
- }
-
- private static FileOutputStream getMetFileOutputStream(String filePath) {
- Properties envVars = EnvUtilities.getEnv();
- String cwd = envVars.getProperty("PWD");
- if (cwd == null) {
- throw new RuntimeException(
- "Unable to get current working directory: failing!");
- }
-
- if (!cwd.endsWith("/")) {
- cwd += "/";
- }
-
- String metFilePath = cwd
- + new File(filePath).getName().replaceAll("\\\\", "") + ".met";
-
- // try and remove the met file, if it already exists
- // for some reason below, the writeXmlFile method in
- // XMLUtils doesn't overwrite, and throws an Exception
- File metFile = new File(metFilePath);
- if (!metFile.delete()) {
- LOG.log(Level.WARNING, "Attempt to overwrite met file: ["
- + metFilePath + "] unsuccessful!");
- }
-
- try {
- return new FileOutputStream(metFile);
- } catch (FileNotFoundException e) {
- LOG.log(Level.WARNING, "Could not create met file: [" + metFile
- + "]: Reason " + e.getMessage(), e);
- return null;
- }
- }
+ public CmdLineMetExtractor(MetExtractorConfigReader reader) {
+ super(reader);
+ }
+
+ protected static void processMain(String[] args,
+ CmdLineMetExtractor extractor) throws Exception {
+ String usage = "Usage: " + extractor.getClass().getName()
+ + " <file> <configfile>";
+ String extractFilePath = null, configFilePath = null;
+
+ if (args.length < 2) {
+ System.err.println(usage);
+ System.exit(1);
+ }
+
+ extractFilePath = args[0].replaceAll("\\\\", "");
+ configFilePath = args[1];
+
+ Metadata met = extractor.extractMetadata(new File(extractFilePath),
+ configFilePath);
+ XMLUtils.writeXmlToStream(new SerializableMetadata(met).toXML(),
+ getMetFileOutputStream(extractFilePath));
+ }
+
+ protected static void processMain(String[] args,
+ CmdLineMetExtractor extractor, OutputStream os) throws Exception {
+ String usage = "Usage: " + extractor.getClass().getName()
+ + " <file> <configfile>";
+ String extractFilePath = null, configFilePath = null;
+
+ if (args.length < 2) {
+ System.err.println(usage);
+ System.exit(1);
+ }
+
+ extractFilePath = args[0].replaceAll("\\\\", "");
+ configFilePath = args[1];
+
+ Metadata met = extractor.extractMetadata(new File(extractFilePath),
+ configFilePath);
+ XMLUtils.writeXmlToStream(new SerializableMetadata(met).toXML(), os);
+
+ }
+
+ private static FileOutputStream getMetFileOutputStream(String filePath) {
+ Properties envVars = EnvUtilities.getEnv();
+ String cwd = envVars.getProperty("PWD");
+ if (cwd == null) {
+ throw new RuntimeException(
+ "Unable to get current working directory: failing!");
+ }
+
+ if (!cwd.endsWith("/")) {
+ cwd += "/";
+ }
+
+ String metFilePath = cwd
+ + new File(filePath).getName().replaceAll("\\\\", "") + ".met";
+
+ // try and remove the met file, if it already exists
+ // for some reason below, the writeXmlFile method in
+ // XMLUtils doesn't overwrite, and throws an Exception
+ File metFile = new File(metFilePath);
+ if (!metFile.delete()) {
+ LOG.log(Level.WARNING, "Attempt to overwrite met file: ["
+ + metFilePath + "] unsuccessful!");
+ }
+
+ try {
+ return new FileOutputStream(metFile);
+ } catch (FileNotFoundException e) {
+ LOG.log(Level.WARNING, "Could not create met file: [" + metFile
+ + "]: Reason " + e.getMessage(), e);
+ return null;
+ }
+ }
}
+
Modified:
incubator/oodt/trunk/metadata/src/main/java/org/apache/oodt/cas/metadata/extractors/CopyAndRewriteExtractor.java
URL:
http://svn.apache.org/viewvc/incubator/oodt/trunk/metadata/src/main/java/org/apache/oodt/cas/metadata/extractors/CopyAndRewriteExtractor.java?rev=964970&r1=964969&r2=964970&view=diff
==============================================================================
---
incubator/oodt/trunk/metadata/src/main/java/org/apache/oodt/cas/metadata/extractors/CopyAndRewriteExtractor.java
(original)
+++
incubator/oodt/trunk/metadata/src/main/java/org/apache/oodt/cas/metadata/extractors/CopyAndRewriteExtractor.java
Fri Jul 16 22:55:48 2010
@@ -24,6 +24,7 @@ import java.util.logging.Level;
//OODT imports
import org.apache.oodt.cas.metadata.Metadata;
+import org.apache.oodt.cas.metadata.SerializableMetadata;
import org.apache.oodt.cas.metadata.exceptions.MetExtractionException;
import org.apache.oodt.cas.metadata.extractors.CmdLineMetExtractor;
import org.apache.oodt.cas.metadata.util.PathUtils;
@@ -52,80 +53,81 @@ import org.apache.oodt.cas.metadata.util
*/
public class CopyAndRewriteExtractor extends CmdLineMetExtractor {
- private final static String FILENAME = "Filename";
+ private final static String FILENAME = "Filename";
- private final static String FILE_LOCATION = "FileLocation";
+ private final static String FILE_LOCATION = "FileLocation";
- private static CopyAndRewriteConfigReader reader = new
CopyAndRewriteConfigReader();
+ private static CopyAndRewriteConfigReader reader = new
CopyAndRewriteConfigReader();
- /**
- * Default Constructor.
- *
- */
- public CopyAndRewriteExtractor() {
- super(reader);
- }
-
- /*
- * (non-Javadoc)
- *
- * @see
org.apache.oodt.cas.metadata.AbstractMetExtractor#extractMetadata(java.io.File)
- */
- public Metadata extrMetadata(File file) throws MetExtractionException {
- if (this.config == null) {
- throw new MetExtractionException(
- "No config file defined: unable to copy and rewrite
metadata!");
- }
-
- Metadata met = null;
-
- try {
- met = new Metadata(new File(PathUtils
- .replaceEnvVariables(((CopyAndRewriteConfig) this.config)
- .getProperty("orig.met.file.path"))).toURL()
- .openStream());
- } catch (Exception e) {
- e.printStackTrace();
- throw new MetExtractionException(
- "error parsing original met file: ["
- + ((CopyAndRewriteConfig) this.config)
- .getProperty("orig.met.file.path")
- + "]: Message: " + e.getMessage());
- }
-
- addDefaultFields(file, met);
-
- // now override
- int numOverrideFields = Integer
- .parseInt(((CopyAndRewriteConfig) this.config)
- .getProperty("numRewriteFields"));
-
- LOG.log(Level.FINE, "Extracting metadata: num rewrite fields: ["
- + numOverrideFields + "]");
-
- for (int i = 0; i < numOverrideFields; i++) {
- String rewriteFieldName = ((CopyAndRewriteConfig) this.config)
- .getProperty("rewriteField" + (i + 1));
- String rewriteFieldStr = ((CopyAndRewriteConfig) this.config)
- .getProperty(rewriteFieldName + ".pattern");
- LOG.log(Level.FINE, "Rewrite string: [" + rewriteFieldStr + "]");
- rewriteFieldStr = PathUtils.replaceEnvVariables(rewriteFieldStr,
- met);
- met.replaceMetadata(rewriteFieldName, rewriteFieldStr);
- }
-
- return met;
-
- }
-
- public static void main(String[] args) throws Exception {
- processMain(args, new CopyAndRewriteExtractor());
- }
-
- private void addDefaultFields(File file, Metadata met) {
- met.replaceMetadata(FILENAME, file.getName());
- met.replaceMetadata(FILE_LOCATION, file.getParentFile()
- .getAbsolutePath());
- }
+ /**
+ * Default Constructor.
+ *
+ */
+ public CopyAndRewriteExtractor() {
+ super(reader);
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
gov.nasa.jpl.oodt.cas.metadata.AbstractMetExtractor#extractMetadata(java.io.File)
+ */
+ public Metadata extrMetadata(File file) throws MetExtractionException {
+ if (this.config == null) {
+ throw new MetExtractionException(
+ "No config file defined: unable to copy and rewrite
metadata!");
+ }
+
+ Metadata met = null;
+
+ try {
+ met = new SerializableMetadata(new File(PathUtils
+ .replaceEnvVariables(((CopyAndRewriteConfig) this.config)
+ .getProperty("orig.met.file.path"))).toURL()
+ .openStream());
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new MetExtractionException(
+ "error parsing original met file: ["
+ + ((CopyAndRewriteConfig) this.config)
+ .getProperty("orig.met.file.path")
+ + "]: Message: " + e.getMessage());
+ }
+
+ addDefaultFields(file, met);
+
+ // now override
+ int numOverrideFields = Integer
+ .parseInt(((CopyAndRewriteConfig) this.config)
+ .getProperty("numRewriteFields"));
+
+ LOG.log(Level.FINE, "Extracting metadata: num rewrite fields: ["
+ + numOverrideFields + "]");
+
+ for (int i = 0; i < numOverrideFields; i++) {
+ String rewriteFieldName = ((CopyAndRewriteConfig) this.config)
+ .getProperty("rewriteField" + (i + 1));
+ String rewriteFieldStr = ((CopyAndRewriteConfig) this.config)
+ .getProperty(rewriteFieldName + ".pattern");
+ LOG.log(Level.FINE, "Rewrite string: [" + rewriteFieldStr + "]");
+ rewriteFieldStr = PathUtils.replaceEnvVariables(rewriteFieldStr,
+ met);
+ met.replaceMetadata(rewriteFieldName, rewriteFieldStr);
+ }
+
+ return met;
+
+ }
+
+ public static void main(String[] args) throws Exception {
+ processMain(args, new CopyAndRewriteExtractor());
+ }
+
+ private void addDefaultFields(File file, Metadata met) {
+ met.replaceMetadata(FILENAME, file.getName());
+ met.replaceMetadata(FILE_LOCATION, file.getParentFile()
+ .getAbsolutePath());
+ }
}
+
Modified:
incubator/oodt/trunk/metadata/src/main/java/org/apache/oodt/cas/metadata/util/MimeTypeUtils.java
URL:
http://svn.apache.org/viewvc/incubator/oodt/trunk/metadata/src/main/java/org/apache/oodt/cas/metadata/util/MimeTypeUtils.java?rev=964970&r1=964969&r2=964970&view=diff
==============================================================================
---
incubator/oodt/trunk/metadata/src/main/java/org/apache/oodt/cas/metadata/util/MimeTypeUtils.java
(original)
+++
incubator/oodt/trunk/metadata/src/main/java/org/apache/oodt/cas/metadata/util/MimeTypeUtils.java
Fri Jul 16 22:55:48 2010
@@ -166,7 +166,7 @@ public final class MimeTypeUtils {
// if returned null, or if it's the default type then try url
resolution
if (type == null
- || (type != null && type.getName().equals(MimeTypes.DEFAULT)))
{
+ || (type != null &&
type.getName().equals(MimeTypes.OCTET_STREAM))) {
// If no mime-type header, or cannot find a corresponding
registered
// mime-type, then guess a mime-type from the url pattern
type = this.mimeTypes.getMimeType(url) != null ? this.mimeTypes
@@ -182,7 +182,7 @@ public final class MimeTypeUtils {
if (this.mimeMagic) {
MimeType magicType = this.mimeTypes.getMimeType(data);
if (magicType != null
- && !magicType.getName().equals(MimeTypes.DEFAULT)
+ && !magicType.getName().equals(MimeTypes.OCTET_STREAM)
&& type != null
&& !type.getName().equals(magicType.getName())) {
// If magic enabled and the current mime type differs from that
@@ -196,7 +196,7 @@ public final class MimeTypeUtils {
// default type
if (type == null) {
try {
- type = this.mimeTypes.forName(MimeTypes.DEFAULT);
+ type = this.mimeTypes.forName(MimeTypes.OCTET_STREAM);
} catch (Exception ignore) {
}
}
Modified:
incubator/oodt/trunk/metadata/src/test/org/apache/oodt/cas/metadata/preconditions/TestPreCondEvalUtils.java
URL:
http://svn.apache.org/viewvc/incubator/oodt/trunk/metadata/src/test/org/apache/oodt/cas/metadata/preconditions/TestPreCondEvalUtils.java?rev=964970&r1=964969&r2=964970&view=diff
==============================================================================
---
incubator/oodt/trunk/metadata/src/test/org/apache/oodt/cas/metadata/preconditions/TestPreCondEvalUtils.java
(original)
+++
incubator/oodt/trunk/metadata/src/test/org/apache/oodt/cas/metadata/preconditions/TestPreCondEvalUtils.java
Fri Jul 16 22:55:48 2010
@@ -20,9 +20,11 @@ package org.apache.oodt.cas.metadata.pre
//JDK imports
import java.io.File;
-import java.io.FileNotFoundException;
+import java.io.IOException;
import java.util.LinkedList;
+//Spring imports
+import org.springframework.beans.BeansException;
import org.springframework.context.support.FileSystemXmlApplicationContext;
//Junit imports
@@ -44,21 +46,22 @@ public class TestPreCondEvalUtils extend
private PreCondEvalUtils evalUtils;
- private static final String MET_EXTR_TEST_FILE =
"./src/main/resources/examples/met_extr_preconditions.xml";
- public TestPreCondEvalUtils() throws FileNotFoundException,
- ClassNotFoundException, InstantiationException,
- IllegalAccessException {
+ public TestPreCondEvalUtils() throws ClassNotFoundException,
InstantiationException,
+ IllegalAccessException, BeansException, IOException {
this.preconditions = new LinkedList<String>();
this.preconditions.add("CheckThatDataFileSizeIsGreaterThanZero");
this.preconditions.add("CheckThatDataFileExists");
this.preconditions.add("CheckDataFileMimeType");
- this.evalUtils = new PreCondEvalUtils(new
FileSystemXmlApplicationContext(MET_EXTR_TEST_FILE));
+ File preCondFile = new
File(getClass().getResource("met_extr_preconditions.xml").getFile());
+ assertNotNull(preCondFile);
+ this.evalUtils = new PreCondEvalUtils(new
FileSystemXmlApplicationContext(preCondFile.toURL().toExternalForm()));
}
public void testEval(){
- File prodFile = new File(MET_EXTR_TEST_FILE);
+ File prodFile = null;
try{
+ prodFile = new
File(getClass().getResource("met_extr_preconditions.xml").getFile());
assertTrue(this.evalUtils.eval(this.preconditions, prodFile));
}
catch(Throwable e){
Added: incubator/oodt/trunk/metadata/src/testdata/met_extr_preconditions.xml
URL:
http://svn.apache.org/viewvc/incubator/oodt/trunk/metadata/src/testdata/met_extr_preconditions.xml?rev=964970&view=auto
==============================================================================
--- incubator/oodt/trunk/metadata/src/testdata/met_extr_preconditions.xml
(added)
+++ incubator/oodt/trunk/metadata/src/testdata/met_extr_preconditions.xml Fri
Jul 16 22:55:48 2010
@@ -0,0 +1,48 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Copyright (c) 2008, California Institute of Technology.
+ ALL RIGHTS RESERVED. U.S. Government sponsorship acknowledged.
+
+ $Id$
+
+ Author: bfoster, mattmann
+ Description: Describes pre-conditions that should be evaluated before
+ running a particular MetExtractor.
+-->
+
+<beans xmlns="http://www.springframework.org/schema/beans"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-2.5.xsd">
+
+
+ <!-- Precondition Comparators -->
+ <bean id="PreconditionComparator" lazy-init="true" abstract="true"
class="org.apache.oodt.cas.metadata.preconditions.PreConditionComparator"/>
+
+
+ <bean id="CheckThatDataFileSizeIsGreaterThanZero" lazy-init="true"
parent="PreconditionComparator"
class="org.apache.oodt.cas.metadata.preconditions.FileSizeComparator">
+ <property name="description" value="Check if the current data file
size is greater than zero"/>
+ <property name="compareItem">
+ <value type="java.lang.Long">0</value>
+ </property>
+ <property name="type" value="greater_than"/>
+ </bean>
+
+ <bean id="CheckThatDataFileExists" lazy-init="true"
parent="PreconditionComparator"
class="org.apache.oodt.cas.metadata.preconditions.ExistanceCheckComparator">
+ <property name="description" value="Check if the current data file
exists"/>
+ <property name="compareItem">
+ <value type="java.lang.Boolean">true</value>
+ </property>
+ <property name="type" value="equal_to"/>
+ </bean>
+
+ <bean id="CheckDataFileMimeType" lazy-init="true"
parent="PreconditionComparator"
class="org.apache.oodt.cas.metadata.preconditions.MimeTypeComparator">
+ <property name="description" value="Check that data file mime type
matches the specified mime type"/>
+ <property name="compareItem">
+ <value type="java.lang.String">application/xml</value>
+ </property>
+ <property name="type" value="equal_to"/>
+ <property name="mimeTypeRepo"
value="./src/testdata/tika-mimetypes.xml"/>
+ <property name="useMagic" value="true"/>
+ </bean>
+
+</beans>
\ No newline at end of file
Added: incubator/oodt/trunk/metadata/src/testdata/tika-mimetypes.xml
URL:
http://svn.apache.org/viewvc/incubator/oodt/trunk/metadata/src/testdata/tika-mimetypes.xml?rev=964970&view=auto
==============================================================================
--- incubator/oodt/trunk/metadata/src/testdata/tika-mimetypes.xml (added)
+++ incubator/oodt/trunk/metadata/src/testdata/tika-mimetypes.xml Fri Jul 16
22:55:48 2010
@@ -0,0 +1,654 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Copyright (c) 2008, California Institute of Technology.
+ ALL RIGHTS RESERVED. U.S. Government sponsorship acknowledged.
+
+ $Id$
+ -->
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+ Description: This xml file defines the valid mime types used by Tika.
+ The mime types within this file are based on the types in the
mime-types.xml
+ file available in Apache Nutch.
+-->
+
+<mime-info>
+
+ <mime-type type="text/plain">
+ <magic priority="50">
+ <match value="This is TeX," type="string" offset="0" />
+ <match value="This is METAFONT," type="string"
offset="0" />
+ </magic>
+ <glob pattern="*.txt" />
+ <glob pattern="*.asc" />
+
+ <!-- TIKA-85: http://www.apache.org/dev/svn-eol-style.txt -->
+ <glob pattern="INSTALL" />
+ <glob pattern="KEYS" />
+ <glob pattern="Makefile" />
+ <glob pattern="README" />
+ <glob pattern="abs-linkmap" />
+ <glob pattern="abs-menulinks" />
+ <glob pattern="*.aart" />
+ <glob pattern="*.ac" />
+ <glob pattern="*.am" />
+ <glob pattern="*.bat" />
+ <glob pattern="*.c" />
+ <glob pattern="*.cat" />
+ <glob pattern="*.cgi" />
+ <glob pattern="*.classpath" />
+ <glob pattern="*.cmd" />
+ <glob pattern="*.conf" />
+ <glob pattern="*.config" />
+ <glob pattern="*.cpp" />
+ <glob pattern="*.css" />
+ <glob pattern="*.cwiki" />
+ <glob pattern="*.data" />
+ <glob pattern="*.dcl" />
+ <glob pattern="*.dtd" />
+ <glob pattern="*.egrm" />
+ <glob pattern="*.ent" />
+ <glob pattern="*.ft" />
+ <glob pattern="*.fn" />
+ <glob pattern="*.fv" />
+ <glob pattern="*.grm" />
+ <glob pattern="*.g" />
+ <glob pattern="*.h" />
+ <glob pattern=".htaccess" />
+ <glob pattern="*.ihtml" />
+ <glob pattern="*.in" />
+ <glob pattern="*.java" />
+ <glob pattern="*.jmx" />
+ <glob pattern="*.jsp" />
+ <glob pattern="*.js" />
+ <glob pattern="*.junit" />
+ <glob pattern="*.jx" />
+ <glob pattern="*.manifest" />
+ <glob pattern="*.m4" />
+ <glob pattern="*.mf" />
+ <glob pattern="*.MF" />
+ <glob pattern="*.meta" />
+ <glob pattern="*.mod" />
+ <glob pattern="*.n3" />
+ <glob pattern="*.pen" />
+ <glob pattern="*.pl" />
+ <glob pattern="*.pm" />
+ <glob pattern="*.pod" />
+ <glob pattern="*.pom" />
+ <glob pattern="*.project" />
+ <glob pattern="*.properties" />
+ <glob pattern="*.py" />
+ <glob pattern="*.rb" />
+ <glob pattern="*.rdf" />
+ <glob pattern="*.rnc" />
+ <glob pattern="*.rng" />
+ <glob pattern="*.rnx" />
+ <glob pattern="*.roles" />
+ <glob pattern="*.sh" />
+ <glob pattern="*.sql" />
+ <glob pattern="*.svg" />
+ <glob pattern="*.tld" />
+ <glob pattern="*.types" />
+ <glob pattern="*.vm" />
+ <glob pattern="*.vsl" />
+ <glob pattern="*.wsdd" />
+ <glob pattern="*.wsdl" />
+ <glob pattern="*.xargs" />
+ <glob pattern="*.xcat" />
+ <glob pattern="*.xconf" />
+ <glob pattern="*.xegrm" />
+ <glob pattern="*.xgrm" />
+ <glob pattern="*.xlex" />
+ <glob pattern="*.xlog" />
+ <glob pattern="*.xmap" />
+ <glob pattern="*.xroles" />
+ <glob pattern="*.xsamples" />
+ <glob pattern="*.xsd" />
+ <glob pattern="*.xsl" />
+ <glob pattern="*.xslt" />
+ <glob pattern="*.xsp" />
+ <glob pattern="*.xul" />
+ <glob pattern="*.xweb" />
+ <glob pattern="*.xwelcome" />
+ </mime-type>
+
+ <mime-type type="text/html">
+ <magic priority="50">
+ <match value="<!DOCTYPE HTML" type="string"
+ offset="0:64" />
+ <match value="<!doctype html" type="string"
+ offset="0:64" />
+ <match value="<HEAD" type="string" offset="0:64" />
+ <match value="<head" type="string" offset="0:64" />
+ <match value="<TITLE" type="string" offset="0:64" />
+ <match value="<title" type="string" offset="0:64" />
+ <match value="<html" type="string" offset="0:64" />
+ <match value="<HTML" type="string" offset="0:64" />
+ <match value="<BODY" type="string" offset="0" />
+ <match value="<body" type="string" offset="0" />
+ <match value="<TITLE" type="string" offset="0" />
+ <match value="<title" type="string" offset="0" />
+ <match value="<!--" type="string" offset="0" />
+ <match value="<h1" type="string" offset="0" />
+ <match value="<H1" type="string" offset="0" />
+ <match value="<!doctype HTML" type="string"
offset="0" />
+ <match value="<!DOCTYPE html" type="string"
offset="0" />
+ </magic>
+ <glob pattern="*.html" />
+ <glob pattern="*.htm" />
+ </mime-type>
+
+ <mime-type type="application/xhtml+xml">
+ <sub-class-of type="application/xml" />
+ <glob pattern="*.xhtml" />
+ <root-XML namespaceURI='http://www.w3.org/1999/xhtml'
+ localName='html' />
+ </mime-type>
+
+ <mime-type type="application/vnd.ms-powerpoint">
+ <glob pattern="*.ppz" />
+ <glob pattern="*.ppt" />
+ <glob pattern="*.pps" />
+ <glob pattern="*.pot" />
+ <magic priority="50">
+ <match value="0xcfd0e011" type="little32" offset="0" />
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.ms-excel">
+ <magic priority="50">
+ <match value="Microsoft Excel 5.0 Worksheet"
type="string"
+ offset="2080" />
+ </magic>
+ <glob pattern="*.xls" />
+ <glob pattern="*.xlc" />
+ <glob pattern="*.xll" />
+ <glob pattern="*.xlm" />
+ <glob pattern="*.xlw" />
+ <glob pattern="*.xla" />
+ <glob pattern="*.xlt" />
+ <glob pattern="*.xld" />
+ <alias type="application/msexcel" />
+ </mime-type>
+
+<!-- ===================================================================== -->
+<!-- Open Document Format for Office Applications (OpenDocument) v1.0 -->
+<!-- http://www.oasis-open.org/specs/index.php#opendocumentv1.0 -->
+<!-- ===================================================================== -->
+
+ <mime-type type="application/vnd.oasis.opendocument.text">
+ <comment>OpenDocument v1.0: Text document</comment>
+ <alias type="application/x-vnd.oasis.opendocument.text" />
+ <glob pattern="*.odt" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+
value="mimetypeapplication/vnd.oasis.opendocument.text" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.text-template">
+ <comment>OpenDocument v1.0: Text document used as
template</comment>
+ <alias
type="application/x-vnd.oasis.opendocument.text-template" />
+ <glob pattern="*.ott" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+
value="mimetypeapplication/vnd.oasis.opendocument.text-template" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.graphics">
+ <comment>OpenDocument v1.0: Graphics document
(Drawing)</comment>
+ <alias type="application/x-vnd.oasis.opendocument.graphics" />
+ <glob pattern="*.odg" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+
value="mimetypeapplication/vnd.oasis.opendocument.graphics" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.graphics-template">
+ <comment>OpenDocument v1.0: Graphics document used as
template</comment>
+ <alias
type="application/x-vnd.oasis.opendocument.graphics-template" />
+ <glob pattern="*.otg" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+
value="mimetypeapplication/vnd.oasis.opendocument.graphics-template" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.presentation">
+ <comment>OpenDocument v1.0: Presentation document</comment>
+ <alias type="application/x-vnd.oasis.opendocument.presentation"
/>
+ <glob pattern="*.odp" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+
value="mimetypeapplication/vnd.oasis.opendocument.presentation" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type
type="application/vnd.oasis.opendocument.presentation-template">
+ <comment>OpenDocument v1.0: Presentation document used as
template</comment>
+ <alias
type="application/x-vnd.oasis.opendocument.presentation-template" />
+ <glob pattern="*.otp" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+
value="mimetypeapplication/vnd.oasis.opendocument.presentation-template" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.spreadsheet">
+ <comment>OpenDocument v1.0: Spreadsheet document</comment>
+ <alias type="application/x-vnd.oasis.opendocument.spreadsheet"
/>
+ <glob pattern="*.ods" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+
value="mimetypeapplication/vnd.oasis.opendocument.spreadsheet" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type
type="application/vnd.oasis.opendocument.spreadsheet-template">
+ <comment>OpenDocument v1.0: Spreadsheet document used as
template</comment>
+ <alias
type="application/x-vnd.oasis.opendocument.spreadsheet-template" />
+ <glob pattern="*.ots" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+
value="mimetypeapplication/vnd.oasis.opendocument.spreadsheet-template" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.chart">
+ <comment>OpenDocument v1.0: Chart document</comment>
+ <alias type="application/x-vnd.oasis.opendocument.chart" />
+ <glob pattern="*.odc" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+
value="mimetypeapplication/vnd.oasis.opendocument.chart" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.chart-template">
+ <comment>OpenDocument v1.0: Chart document used as
template</comment>
+ <alias
type="application/x-vnd.oasis.opendocument.chart-template" />
+ <glob pattern="*.otc" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+
value="mimetypeapplication/vnd.oasis.opendocument.chart-template" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.image">
+ <comment>OpenDocument v1.0: Image document</comment>
+ <alias type="application/x-vnd.oasis.opendocument.image" />
+ <glob pattern="*.odi" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+
value="mimetypeapplication/vnd.oasis.opendocument.image" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.image-template">
+ <comment>OpenDocument v1.0: Image document used as
template</comment>
+ <alias
type="application/x-vnd.oasis.opendocument.image-template" />
+ <glob pattern="*.oti" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+
value="mimetypeapplication/vnd.oasis.opendocument.image-template" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.formula">
+ <comment>OpenDocument v1.0: Formula document</comment>
+ <alias type="application/x-vnd.oasis.opendocument.formula" />
+ <glob pattern="*.odf" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+
value="mimetypeapplication/vnd.oasis.opendocument.formula" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.formula-template">
+ <comment>OpenDocument v1.0: Formula document used as
template</comment>
+ <alias
type="application/x-vnd.oasis.opendocument.formula-template" />
+ <glob pattern="*.otf" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+
value="mimetypeapplication/vnd.oasis.opendocument.formula-template" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.text-master">
+ <comment>OpenDocument v1.0: Global Text document</comment>
+ <alias type="application/x-vnd.oasis.opendocument.text-master"
/>
+ <glob pattern="*.odm" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+
value="mimetypeapplication/vnd.oasis.opendocument.text-master" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.text-web">
+ <comment>OpenDocument v1.0: Text document used as template for
HTML documents</comment>
+ <alias type="application/x-vnd.oasis.opendocument.text-web" />
+ <glob pattern="*.oth" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+
value="mimetypeapplication/vnd.oasis.opendocument.text-web" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/zip">
+ <alias type="application/x-zip-compressed" />
+ <magic priority="40">
+ <match value="PK\003\004" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.zip" />
+ </mime-type>
+
+ <mime-type type="application/msword">
+ <magic priority="50">
+ <match value="\x31\xbe\x00\x00" type="string"
offset="0" />
+ <match value="PO^Q`" type="string" offset="0" />
+ <match value="\376\067\0\043" type="string" offset="0"
/>
+ <match value="\333\245-\0\0\0" type="string" offset="0"
/>
+ <match value="Microsoft Word 6.0 Document" type="string"
+ offset="2080" />
+ <match value="Microsoft Word document data"
type="string"
+ offset="2112" />
+ </magic>
+ <glob pattern="*.doc" />
+ <alias type="application/vnd.ms-word" />
+ </mime-type>
+
+ <mime-type type="application/octet-stream">
+ <magic priority="50">
+ <match value="\037\036" type="string" offset="0" />
+ <match value="017437" type="host16" offset="0" />
+ <match value="0x1fff" type="host16" offset="0" />
+ <match value="\377\037" type="string" offset="0" />
+ <match value="0145405" type="host16" offset="0" />
+ </magic>
+ <glob pattern="*.bin" />
+ </mime-type>
+
+ <mime-type type="application/pdf">
+ <magic priority="50">
+ <match value="%PDF-" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.pdf" />
+ <alias type="application/x-pdf" />
+ </mime-type>
+
+ <mime-type type="application/atom+xml">
+ <root-XML localName="feed"
+ namespaceURI="http://purl.org/atom/ns#" />
+ </mime-type>
+
+ <mime-type type="application/mac-binhex40">
+ <glob pattern="*.hqx" />
+ </mime-type>
+
+ <mime-type type="application/mac-compactpro">
+ <glob pattern="*.cpt" />
+ </mime-type>
+
+ <mime-type type="application/rtf">
+ <glob pattern="*.rtf"/>
+ <alias type="text/rtf" />
+ </mime-type>
+
+ <mime-type type="application/rss+xml">
+ <alias type="text/rss" />
+ <root-XML localName="rss" />
+ <root-XML namespaceURI="http://purl.org/rss/1.0/" />
+ <glob pattern="*.rss" />
+ </mime-type>
+
+ <!-- added in by mattmann -->
+ <mime-type type="application/xml">
+ <alias type="text/xml" />
+ <glob pattern="*.xml" />
+ </mime-type>
+
+ <mime-type type="application/x-mif">
+ <alias type="application/vnd.mif" />
+ </mime-type>
+
+ <mime-type type="application/vnd.wap.wbxml">
+ <glob pattern="*.wbxml" />
+ </mime-type>
+
+ <mime-type type="application/vnd.wap.wmlc">
+ <_comment>Compiled WML Document</_comment>
+ <glob pattern="*.wmlc" />
+ </mime-type>
+
+ <mime-type type="application/vnd.wap.wmlscriptc">
+ <_comment>Compiled WML Script</_comment>
+ <glob pattern="*.wmlsc" />
+ </mime-type>
+
+ <mime-type type="text/vnd.wap.wmlscript">
+ <_comment>WML Script</_comment>
+ <glob pattern="*.wmls" />
+ </mime-type>
+
+ <mime-type type="application/x-bzip">
+ <alias type="application/x-bzip2" />
+ </mime-type>
+
+ <mime-type type="application/x-bzip-compressed-tar">
+ <glob pattern="*.tbz" />
+ <glob pattern="*.tbz2" />
+ </mime-type>
+
+ <mime-type type="application/x-cdlink">
+ <_comment>Virtual CD-ROM CD Image File</_comment>
+ <glob pattern="*.vcd" />
+ </mime-type>
+
+ <mime-type type="application/x-director">
+ <_comment>Shockwave Movie</_comment>
+ <glob pattern="*.dcr" />
+ <glob pattern="*.dir" />
+ <glob pattern="*.dxr" />
+ </mime-type>
+
+ <mime-type type="application/x-futuresplash">
+ <_comment>Macromedia FutureSplash File</_comment>
+ <glob pattern="*.spl" />
+ </mime-type>
+
+ <mime-type type="application/x-java">
+ <alias type="application/java" />
+ </mime-type>
+
+ <mime-type type="application/x-koan">
+ <_comment>SSEYO Koan File</_comment>
+ <glob pattern="*.skp" />
+ <glob pattern="*.skd" />
+ <glob pattern="*.skt" />
+ <glob pattern="*.skm" />
+ </mime-type>
+
+ <mime-type type="application/x-latex">
+ <_comment>LaTeX Source Document</_comment>
+ <glob pattern="*.latex" />
+ </mime-type>
+
+ <!-- JC CHANGED
+ <mime-type type="application/x-mif">
+ <_comment>FrameMaker MIF document</_comment>
+ <glob pattern="*.mif"/>
+ </mime-type> -->
+
+ <mime-type type="application/x-ms-dos-executable">
+ <alias type="application/x-dosexec" />
+ </mime-type>
+
+ <mime-type type="application/ogg">
+ <alias type="application/x-ogg" />
+ </mime-type>
+
+ <mime-type type="application/x-rar">
+ <alias type="application/x-rar-compressed" />
+ </mime-type>
+
+ <mime-type type="application/x-shellscript">
+ <alias type="application/x-sh" />
+ </mime-type>
+
+ <mime-type type="application/xhtml+xml">
+ <glob pattern="*.xht" />
+ </mime-type>
+
+ <mime-type type="audio/midi">
+ <glob pattern="*.kar" />
+ </mime-type>
+
+ <mime-type type="audio/x-pn-realaudio">
+ <alias type="audio/x-realaudio" />
+ </mime-type>
+
+ <mime-type type="image/tiff">
+ <magic priority="50">
+ <match value="0x4d4d2a00" type="string" offset="0" />
+ <match value="0x49492a00" type="string" offset="0" />
+ </magic>
+ </mime-type>
+
+ <mime-type type="message/rfc822">
+ <magic priority="50">
+ <match type="string" value="Relay-Version:" offset="0"
/>
+ <match type="string" value="#! rnews" offset="0" />
+ <match type="string" value="N#! rnews" offset="0" />
+ <match type="string" value="Forward to" offset="0" />
+ <match type="string" value="Pipe to" offset="0" />
+ <match type="string" value="Return-Path:" offset="0" />
+ <match type="string" value="From:" offset="0" />
+ <match type="string" value="Message-ID:" offset="0" />
+ <match type="string" value="Date:" offset="0" />
+ </magic>
+ </mime-type>
+
+ <mime-type type="image/vnd.wap.wbmp">
+ <_comment>Wireless Bitmap File Format</_comment>
+ <glob pattern="*.wbmp" />
+ </mime-type>
+
+ <mime-type type="image/x-psd">
+ <alias type="image/photoshop" />
+ </mime-type>
+
+ <mime-type type="image/x-xcf">
+ <alias type="image/xcf" />
+ <magic priority="50">
+ <match type="string" value="gimp xcf " offset="0" />
+ </magic>
+ </mime-type>
+
+ <mime-type type="model/iges">
+ <_comment>
+ Initial Graphics Exchange Specification Format
+ </_comment>
+ <glob pattern="*.igs" />
+ <glob pattern="*.iges" />
+ </mime-type>
+
+ <mime-type type="model/mesh">
+ <glob pattern="*.msh" />
+ <glob pattern="*.mesh" />
+ <glob pattern="*.silo" />
+ </mime-type>
+
+ <mime-type type="model/vrml">
+ <glob pattern="*.vrml" />
+ </mime-type>
+
+ <mime-type type="text/x-tcl">
+ <alias type="application/x-tcl" />
+ </mime-type>
+
+ <mime-type type="text/x-tex">
+ <alias type="application/x-tex" />
+ </mime-type>
+
+ <mime-type type="text/x-texinfo">
+ <alias type="application/x-texinfo" />
+ </mime-type>
+
+ <mime-type type="text/x-troff-me">
+ <alias type="application/x-troff-me" />
+ </mime-type>
+
+ <mime-type type="video/vnd.mpegurl">
+ <glob pattern="*.mxu" />
+ </mime-type>
+
+ <mime-type type="x-conference/x-cooltalk">
+ <_comment>Cooltalk Audio</_comment>
+ <glob pattern="*.ice" />
+ </mime-type>
+
+<!-- ===================================================================== -->
+<!-- TIKA-85: http://www.apache.org/dev/svn-eol-style.txt -->
+<!-- ===================================================================== -->
+
+ <mime-type type="image/x-icon">
+ <glob pattern="*.ico" />
+ </mime-type>
+
+ <mime-type type="image/jpeg">
+ <glob pattern="*.jpg" />
+ </mime-type>
+
+ <mime-type type="image/png">
+ <glob pattern="*.png" />
+ </mime-type>
+
+</mime-info>