Author: jukka
Date: Mon Oct 22 12:48:34 2007
New Revision: 587217
URL: http://svn.apache.org/viewvc?rev=587217&view=rev
Log:
TIKA-84 - Add MimeTypes.getMimeType(InputStream)
- Added also getMimeType(String, InputStream)
- Extracted common code to readMagicHeader(InputStream)
- Javadoc improvements
Modified:
incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java?rev=587217&r1=587216&r2=587217&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java
(original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java Mon
Oct 22 12:48:34 2007
@@ -37,8 +37,15 @@
* This class is a MimeType repository. It gathers a set of MimeTypes and
* enables to retrieves a content-type from its name, from a file name, or from
* a magic character sequence.
- *
- *
+ * <p>
+ * The MIME type detection methods that take an [EMAIL PROTECTED] InputStream}
as
+ * an argument will never reads more than [EMAIL PROTECTED] #getMinLength()}
bytes
+ * from the stream. Also the given stream is never
+ * [EMAIL PROTECTED] InputStream#close() closed}, [EMAIL PROTECTED]
InputStream#mark(int) marked},
+ * or [EMAIL PROTECTED] InputStream#reset() reset} by the methods. Thus a
client can
+ * use the [EMAIL PROTECTED] InputStream#markSupported() mark feature} of the
stream
+ * (if available) to restore the stream back to the state it was before type
+ * detection if it wants to process the stream based on the detected type.
*/
public final class MimeTypes {
@@ -178,14 +185,13 @@
/**
* Returns the MIME type that best matches the given first few bytes
- * of a document stream. Returns <code>null</code> if no matching type
- * is found.
+ * of a document stream.
* <p>
* The given byte array is expected to be at least [EMAIL PROTECTED]
#getMinLength()}
* long, or shorter only if the document stream itself is shorter.
*
* @param data first few bytes of a document stream
- * @return matching MIME type, or <code>null</code>
+ * @return matching MIME type, or <code>null</code> if no match is found
*/
public MimeType getMimeType(byte[] data) {
assert data != null;
@@ -211,19 +217,30 @@
/**
* Returns the MIME type that best matches the first few bytes of the
* given document stream.
- * <p>
- * If the given stream supports the mark feature (and doesn't throw an
- * exception during this method call), then it is safe to use
- * <code>stream.mark([EMAIL PROTECTED] #getMinLength()})</code> before and
- * <code>stream.reset()</code> after this method call to restore the
- * stream to the state it was in before this method call.
*
* @see #getMimeType(byte[])
* @param stream document stream
- * @return matching MIME type
+ * @return matching MIME type, or <code>null</code> if no match is found
* @throws IOException if the stream can be read
*/
public MimeType getMimeType(InputStream stream) throws IOException {
+ return getMimeType(readMagicHeader(stream));
+ }
+
+ /**
+ * Reads the first [EMAIL PROTECTED] #getMinLength()} bytes from the given
stream.
+ * If the stream is shorter, then the entire content of the stream is
+ * returned.
+ * <p>
+ * The given stream is never [EMAIL PROTECTED] InputStream#close() closed},
+ * [EMAIL PROTECTED] InputStream#mark(int) marked}, or
+ * [EMAIL PROTECTED] InputStream#reset() reset} by this method.
+ *
+ * @param stream stream to be read
+ * @return first [EMAIL PROTECTED] #getMinLength()} (or fewer) bytes of
the stream
+ * @throws IOException if the stream can not be read
+ */
+ private byte[] readMagicHeader(InputStream stream) throws IOException {
assert stream != null;
byte[] bytes = new byte[getMinLength()];
@@ -233,14 +250,14 @@
while (lastRead != -1) {
totalRead += lastRead;
if (totalRead == bytes.length) {
- return getMimeType(bytes);
+ return bytes;
}
lastRead = stream.read(bytes, totalRead, bytes.length - totalRead);
}
byte[] shorter = new byte[totalRead];
System.arraycopy(bytes, 0, shorter, 0, totalRead);
- return getMimeType(shorter);
+ return shorter;
}
/**
@@ -271,6 +288,21 @@
}
return mimeType;
+ }
+
+ /**
+ * Returns the MIME type that best matches the given document name and
+ * the first few bytes of the given document stream.
+ *
+ * @see #getMimeType(String, byte[])
+ * @param name document name
+ * @param stream document stream
+ * @return matching MIME type, or <code>null</code> if no match is found
+ * @throws IOException if the stream can not be read
+ */
+ public MimeType getMimeType(String name, InputStream stream)
+ throws IOException {
+ return getMimeType(name, readMagicHeader(stream));
}
/**