Author: jukka
Date: Wed Sep 30 15:04:53 2009
New Revision: 820291
URL: http://svn.apache.org/viewvc?rev=820291&view=rev
Log:
TIKA-296: Automatically set the supertype for "+xml" mimetypes
Patch by Ken Krugler.
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java?rev=820291&r1=820290&r2=820291&view=diff
==============================================================================
---
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
(original)
+++
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
Wed Sep 30 15:04:53 2009
@@ -53,15 +53,22 @@
public final class MimeTypes implements Detector {
/**
- * Name of the {...@link #root root} type, application/octet-stream.
+ * Name of the {...@link #rootMimeType root} type,
application/octet-stream.
*/
public final static String OCTET_STREAM = "application/octet-stream";
/**
- * Name of the {...@link #text text} type, text/plain.
+ * Name of the {...@link #textMimeType text} type, text/plain.
*/
public final static String PLAIN_TEXT = "text/plain";
+
+ /**
+ * Name of the {...@link #xml xml} type, application/xml.
+ */
+ public final static String XML = "application/xml";
+
+
/**
* Lookup table for all the ASCII/ISO-Latin/UTF-8/etc. control bytes
* in the range below 0x20 (the space character). If an entry in this
@@ -98,13 +105,18 @@
/**
* Root type, application/octet-stream.
*/
- private final MimeType root;
+ private final MimeType rootMimeType;
/**
* Text type, text/plain.
*/
- private final MimeType text;
+ private final MimeType textMimeType;
+ /*
+ * xml type, application/xml
+ */
+ private final MimeType xmlMimeType;
+
/** All the registered MimeTypes indexed on their name */
private final Map<String, MimeType> types = new HashMap<String,
MimeType>();
@@ -120,16 +132,20 @@
private final XmlRootExtractor xmlRootExtractor;
public MimeTypes() {
- root = new MimeType(this, OCTET_STREAM);
- text = new MimeType(this, PLAIN_TEXT);
+ rootMimeType = new MimeType(this, OCTET_STREAM);
+ textMimeType = new MimeType(this, PLAIN_TEXT);
+ xmlMimeType = new MimeType(this, XML);
+
try {
- text.setSuperType(root);
+ textMimeType.setSuperType(rootMimeType);
+ xmlMimeType.setSuperType(rootMimeType);
} catch (MimeTypeException e) {
throw new IllegalStateException("Error in MimeType logic", e);
}
- types.put(root.getName(), root);
- types.put(text.getName(), text);
+ types.put(rootMimeType.getName(), rootMimeType);
+ types.put(textMimeType.getName(), textMimeType);
+ types.put(xmlMimeType.getName(), xmlMimeType);
try {
xmlRootExtractor = new XmlRootExtractor();
@@ -179,7 +195,7 @@
if (type != null) {
return type;
} else {
- return root;
+ return rootMimeType;
}
}
@@ -238,10 +254,10 @@
for (int i = 0; i < data.length; i++) {
int b = data[i] & 0xFF; // prevent sign extension
if (b < IS_CONTROL_BYTE.length && IS_CONTROL_BYTE[b]) {
- return root;
+ return rootMimeType;
}
}
- return text;
+ return textMimeType;
}
/**
@@ -391,9 +407,11 @@
if (type == null) {
type = new MimeType(this, name);
if (name.startsWith("text/")) {
- type.setSuperType(text);
+ type.setSuperType(textMimeType);
+ } else if (name.endsWith("+xml")) {
+ type.setSuperType(xmlMimeType);
} else {
- type.setSuperType(root);
+ type.setSuperType(rootMimeType);
}
types.put(name, type);
}
@@ -506,7 +524,7 @@
*/
public MediaType detect(InputStream input, Metadata metadata)
throws IOException {
- MimeType type = root;
+ MimeType type = rootMimeType;
// Get type based on magic prefix
if (input != null) {
Modified:
lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java?rev=820291&r1=820290&r2=820291&view=diff
==============================================================================
---
lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
(original)
+++
lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
Wed Sep 30 15:04:53 2009
@@ -48,6 +48,15 @@
testFile("application/xml", "test-long-comment.xml");
testFile("application/xslt+xml", "stylesheet.xsl");
}
+
+ public void testAutosetSupertype() throws MimeTypeException {
+ MimeTypes types = new MimeTypes();
+ MimeType type = types.forName("application/something+xml");
+ assertEquals("application/xml", type.getSuperType().getName());
+
+ type = types.forName("text/something");
+ assertEquals("text/plain", type.getSuperType().getName());
+ }
private void testFile(String expected, String filename) throws IOException
{
InputStream in = getClass().getResourceAsStream(filename);
Modified:
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java?rev=820291&r1=820290&r2=820291&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java
Wed Sep 30 15:04:53 2009
@@ -18,6 +18,9 @@
import java.io.IOException;
import java.io.InputStream;
+import java.util.Arrays;
+import java.util.Iterator;
+
import junit.framework.TestCase;
public class MimeTypesTest extends TestCase {
@@ -68,14 +71,20 @@
}
public void testSubTypes() {
- assertEquals(1, binary.getSubTypes().size());
- assertEquals(
- "text/plain",
- binary.getSubTypes().iterator().next().getName());
+ assertEquals(2, binary.getSubTypes().size());
+ Iterator<MimeType> iter = binary.getSubTypes().iterator();
+ String[] typeNames = new String[2];
+ typeNames[0] = iter.next().getName();
+ typeNames[1] = iter.next().getName();
+ Arrays.sort(typeNames);
+ assertEquals("application/xml", typeNames[0]);
+ assertEquals("text/plain", typeNames[1]);
+
assertEquals(1, text.getSubTypes().size());
assertEquals(
"text/html",
text.getSubTypes().iterator().next().getName());
+
assertEquals(0, html.getSubTypes().size());
}