Author: jukka
Date: Fri Aug 17 12:11:25 2007
New Revision: 567112
URL: http://svn.apache.org/viewvc?view=rev&rev=567112
Log:
TIKA-8: Replaced the jmimeinfo dependency with a trivial mime type detector.
Modified:
incubator/tika/trunk/src/main/java/org/apache/tika/parser/ParserFactory.java
incubator/tika/trunk/src/main/java/org/apache/tika/utils/MimeTypesUtils.java
Modified:
incubator/tika/trunk/src/main/java/org/apache/tika/parser/ParserFactory.java
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/ParserFactory.java?view=diff&rev=567112&r1=567111&r2=567112
==============================================================================
---
incubator/tika/trunk/src/main/java/org/apache/tika/parser/ParserFactory.java
(original)
+++
incubator/tika/trunk/src/main/java/org/apache/tika/parser/ParserFactory.java
Fri Aug 17 12:11:25 2007
@@ -24,7 +24,6 @@
import org.apache.tika.config.ParserConfig;
import org.apache.tika.exception.LiusException;
import org.apache.tika.utils.MimeTypesUtils;
-import net.hedges.mimeinfo.MimeInfoException;
import org.apache.log4j.Logger;
@@ -41,7 +40,7 @@
* Build parser from file and Lius config object
*/
public static Parser getParser(File file, LiusConfig tc)
- throws MimeInfoException, IOException, LiusException {
+ throws IOException, LiusException {
String mimeType = MimeTypesUtils.getMimeType(file);
ParserConfig pc = tc.getParserConfig(mimeType);
String className = pc.getParserClass();
@@ -76,7 +75,7 @@
* Build parser from string file path and Lius config object
*/
public static Parser getParser(String str, LiusConfig tc)
- throws MimeInfoException, IOException, LiusException {
+ throws IOException, LiusException {
return getParser(new File(str), tc);
}
@@ -84,7 +83,7 @@
* Build parser from string file path and Lius config file path
*/
public static Parser getParser(String str, String tcPath)
- throws MimeInfoException, IOException, LiusException {
+ throws IOException, LiusException {
LiusConfig tc = LiusConfig.getInstance(tcPath);
return getParser(new File(str), tc);
}
@@ -93,7 +92,7 @@
* Build parser from file and Lius config file path
*/
public static Parser getParser(File file, String tcPath)
- throws MimeInfoException, IOException, LiusException {
+ throws IOException, LiusException {
LiusConfig tc = LiusConfig.getInstance(tcPath);
return getParser(file, tc);
}
Modified:
incubator/tika/trunk/src/main/java/org/apache/tika/utils/MimeTypesUtils.java
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/utils/MimeTypesUtils.java?view=diff&rev=567112&r1=567111&r2=567112
==============================================================================
---
incubator/tika/trunk/src/main/java/org/apache/tika/utils/MimeTypesUtils.java
(original)
+++
incubator/tika/trunk/src/main/java/org/apache/tika/utils/MimeTypesUtils.java
Fri Aug 17 12:11:25 2007
@@ -17,10 +17,6 @@
package org.apache.tika.utils;
import java.io.File;
-import java.io.IOException;
-
-import net.hedges.mimeinfo.MimeInfo;
-import net.hedges.mimeinfo.MimeInfoException;
/**
* Detect mime type from file
@@ -29,10 +25,32 @@
*/
public class MimeTypesUtils {
- public static String getMimeType(File file) throws MimeInfoException,
- IOException {
- MimeInfo mimeInfo = new MimeInfo();
- return mimeInfo.getMimeType(file);
+ public static String getMimeType(File file) {
+ // FIXME: See TIKA-8
+ String name = file.getName().toLowerCase();
+ if (name.endsWith(".txt")) {
+ return "text/plain";
+ } else if (name.endsWith(".pdf")) {
+ return "application/pdf";
+ } else if (name.endsWith(".htm")) {
+ return "text/html";
+ } else if (name.endsWith(".html")) {
+ return "text/html";
+ } else if (name.endsWith(".xhtml")) {
+ return "application/xhtml+xml";
+ } else if (name.endsWith(".xml")) {
+ return "application/xml";
+ } else if (name.endsWith(".doc")) {
+ return "application/msword";
+ } else if (name.endsWith(".ppt")) {
+ return "application/vnd.ms-powerpoint";
+ } else if (name.endsWith(".xls")) {
+ return "application/vnd.ms-excel";
+ } else if (name.endsWith(".zip")) {
+ return "application/zip";
+ } else {
+ return "application/octet-stream";
+ }
}
}