Author: jukka
Date: Fri Oct 19 15:27:58 2007
New Revision: 586632
URL: http://svn.apache.org/viewvc?rev=586632&view=rev
Log:
TIKA-84 - Add MimeTypes.getMimeType(InputStream)
Modified:
incubator/tika/trunk/CHANGES.txt
incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java
Modified: incubator/tika/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/CHANGES.txt?rev=586632&r1=586631&r2=586632&view=diff
==============================================================================
--- incubator/tika/trunk/CHANGES.txt (original)
+++ incubator/tika/trunk/CHANGES.txt Fri Oct 19 15:27:58 2007
@@ -121,3 +121,5 @@
54. TIKA-71 - Remove ParserConfig and ParserFactory (jukka)
55. TIKA-83 - Create a org.apache.tika.sax package for SAX utilities (jukka)
+
+56. TIKA-84 - Add MimeTypes.getMimeType(InputStream) (jukka)
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java?rev=586632&r1=586631&r2=586632&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java
(original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java Fri
Oct 19 15:27:58 2007
@@ -18,6 +18,8 @@
// JDK imports
import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
import java.net.URL;
import java.util.Arrays;
import java.util.Map;
@@ -175,26 +177,18 @@
}
/**
- * Find the Mime Content Type of a stream from its content.
- *
- * @param data
- * are the first bytes of data of the content to analyze.
- * Depending on the length of provided data, all known MimeTypes
- * are checked. If the length of provided data is greater or
- * egals to the value returned by [EMAIL PROTECTED]
#getMinLength()}, then
- * all known MimeTypes are checked, otherwise only the MimeTypes
- * that could be analyzed with the length of provided data are
- * analyzed.
- *
- * @return The Mime Content Type found for the specified data, or
- * <code>null</code> if none is found.
- * @see #getMinLength()
+ * Returns the MIME type that best matches the given first few bytes
+ * of a document stream. Returns <code>null</code> if no matching type
+ * is found.
+ * <p>
+ * The given byte array is expected to be at least [EMAIL PROTECTED]
#getMinLength()}
+ * long, or shorter only if the document stream itself is shorter.
+ *
+ * @param data first few bytes of a document stream
+ * @return matching MIME type, or <code>null</code>
*/
public MimeType getMimeType(byte[] data) {
- // Preliminary checks
- if ((data == null) || (data.length < 1)) {
- return null;
- }
+ assert data != null;
// First, check for XML descriptions (level by level)
for (MimeInfo info : xmls) {
@@ -212,6 +206,41 @@
}
return null;
+ }
+
+ /**
+ * Returns the MIME type that best matches the first few bytes of the
+ * given document stream.
+ * <p>
+ * If the given stream supports the mark feature (and doesn't throw an
+ * exception during this method call), then it is safe to use
+ * <code>stream.mark([EMAIL PROTECTED] #getMinLength()})</code> before and
+ * <code>stream.reset()</code> after this method call to restore the
+ * stream to the state it was in before this method call.
+ *
+ * @see #getMimeType(byte[])
+ * @param stream document stream
+ * @return matching MIME type
+ * @throws IOException if the stream can be read
+ */
+ public MimeType getMimeType(InputStream stream) throws IOException {
+ assert stream != null;
+
+ byte[] bytes = new byte[getMinLength()];
+ int totalRead = 0;
+
+ int lastRead = stream.read(bytes);
+ while (lastRead != -1) {
+ totalRead += lastRead;
+ if (totalRead == bytes.length) {
+ return getMimeType(bytes);
+ }
+ lastRead = stream.read(bytes, totalRead, bytes.length - totalRead);
+ }
+
+ byte[] shorter = new byte[totalRead];
+ System.arraycopy(bytes, 0, shorter, 0, totalRead);
+ return getMimeType(shorter);
}
/**