This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-4518 in repository https://gitbox.apache.org/repos/asf/tika.git
commit ffda563bd8b4a849a85f9bfd44ce77818a1f4ecb Author: tallison <[email protected]> AuthorDate: Wed Oct 15 12:19:36 2025 -0400 TIKA-4518 -- improve recursive file extraction, with focus on logging PST email issues. --- CHANGES.txt | 14 ++---- .../java/org/apache/tika/cli/TikaCLIAsyncTest.java | 2 +- .../test/java/org/apache/tika/cli/TikaCLITest.java | 29 ++++++++++- tika-app/src/test/resources/test-data/testPST.pst | Bin 0 -> 2302976 bytes .../extractor/DefaultEmbeddedStreamTranslator.java | 7 ++- .../tika/extractor/EmbeddedStreamTranslator.java | 1 - .../apache/tika/extractor/RUnpackExtractor.java | 35 ++++++++----- .../java/org/apache/tika/io/FilenameUtils.java | 29 ++++++++++- .../java/org/apache/tika/io/FilenameUtilsTest.java | 8 ++- .../microsoft/MSEmbeddedStreamTranslator.java | 2 - .../microsoft/PSTEmailStreamTranslator.java | 55 +++++++++++++++++++++ ....apache.tika.extractor.EmbeddedStreamTranslator | 3 +- .../AbstractEmbeddedDocumentBytesHandler.java | 37 ++------------ 13 files changed, 154 insertions(+), 68 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 6de20938a..2d91e48c2 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -7,17 +7,11 @@ Release 4.0.0-BETA1 - ??? * Headers are no longer injected into the body/content of MSG files (TIKA-4345). Please open a ticket if you need this behavior across email formats. - * Remove tika-batch (TIKA-4333). + * Removed several modules, including: tika-batch (TIKA-4333), snaps deployment (TIKA-4502), + dotnet (TIKA-4332), advanced media module (TIKA-4500), tika-dl module (TIKA-4499), + tika-fuzzing module (TIKA-4506). - * Remove snaps deployment (TIKA-4502). - - * Removed the dotnet module (TIKA-4332). - - * Removed the advanced media module (TIKA-4500). - - * Removed the tika-dl module (TIKA-4499). - - * Removed the tika-fuzzing module (TIKA-4506). + * API changes in the EmbeddedStreamTranslator (TIKA-4518). OTHER CHANGES diff --git a/tika-app/src/test/java/org/apache/tika/cli/TikaCLIAsyncTest.java b/tika-app/src/test/java/org/apache/tika/cli/TikaCLIAsyncTest.java index 096e3ce73..faacd49a2 100644 --- a/tika-app/src/test/java/org/apache/tika/cli/TikaCLIAsyncTest.java +++ b/tika-app/src/test/java/org/apache/tika/cli/TikaCLIAsyncTest.java @@ -121,7 +121,7 @@ public class TikaCLIAsyncTest { json++; } } - assertEquals(20, json); + assertEquals(21, json); } private void checkForPrettyPrint(File f) throws IOException { diff --git a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java index 93de4e409..94ccfd96c 100644 --- a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java +++ b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java @@ -26,6 +26,7 @@ import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.io.PrintStream; +import java.io.Reader; import java.net.URI; import java.nio.file.FileVisitResult; import java.nio.file.FileVisitor; @@ -34,6 +35,7 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.attribute.BasicFileAttributes; import java.util.HashSet; +import java.util.List; import java.util.Set; import org.jetbrains.annotations.NotNull; @@ -44,7 +46,11 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; import org.apache.tika.exception.TikaException; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.TikaCoreProperties; +import org.apache.tika.serialization.JsonMetadataList; import org.apache.tika.utils.ProcessUtils; +import org.apache.tika.utils.StringUtils; /** * Tests the Tika's cli @@ -285,6 +291,28 @@ public class TikaCLITest { testRecursiveUnpack("testPDFPackage.pdf", expectedChildren, 2); } + @Test + public void testPSTRUnpack() throws Exception { + String[] expectedChildren = new String[]{"testPST.pst.json", + "testPST.pst-embed/00000007-First email.msg", + "testPST.pst-embed/00000001-Feature Generators.msg", + "testPST.pst-embed/00000008-First email.msg", + "testPST.pst-embed/00000004-[jira] [Resolved] (TIKA-1249) Vcard files detection.msg", + "testPST.pst-embed/00000003-Feature Generators.msg", + "testPST.pst-embed/00000002-putstatic\".msg", + "testPST.pst-embed/00000005-[jira] [Commented] (TIKA-1250) Process loops infintely processing a CHM file.msg", + "testPST.pst-embed/00000009-attachment.docx", + "testPST.pst-embed/00000006-[WEBINAR] - \"Introducing Couchbase Server 2.5\".msg"}; + testRecursiveUnpack("testPST.pst", expectedChildren, 2); + try (Reader reader = Files.newBufferedReader(extractDir.resolve("testPST.pst.json"))) { + List<Metadata> metadataList = JsonMetadataList.fromJson(reader); + for (Metadata m : metadataList) { + String content = m.get(TikaCoreProperties.TIKA_CONTENT); + assertFalse(StringUtils.isBlank(content)); + } + } + } + /** * Tests -l option of the cli @@ -378,7 +406,6 @@ public class TikaCLITest { .list(); assertNotNull(jsonFile); assertEquals(expectedLength, jsonFile.length); - //assertEquals(fileNames.size(), expectedChildrenFileNames.length); for (String expectedChildName : expectedChildrenFileNames) { assertTrue(fileNames.contains(expectedChildName)); diff --git a/tika-app/src/test/resources/test-data/testPST.pst b/tika-app/src/test/resources/test-data/testPST.pst new file mode 100644 index 000000000..8ccc69547 Binary files /dev/null and b/tika-app/src/test/resources/test-data/testPST.pst differ diff --git a/tika-core/src/main/java/org/apache/tika/extractor/DefaultEmbeddedStreamTranslator.java b/tika-core/src/main/java/org/apache/tika/extractor/DefaultEmbeddedStreamTranslator.java index bf2321481..b9d6985cc 100644 --- a/tika-core/src/main/java/org/apache/tika/extractor/DefaultEmbeddedStreamTranslator.java +++ b/tika-core/src/main/java/org/apache/tika/extractor/DefaultEmbeddedStreamTranslator.java @@ -27,9 +27,7 @@ import org.apache.tika.utils.ServiceLoaderUtils; /** * Loads EmbeddedStreamTranslators via service loading. Tries to run each - * in turn and returns the first non-null value. If no translation has occurred, - * this returns the original InputStream. If a translation has occurred, the - * translator will consume the InputStream but not close it. + * in turn. If a translator accepts the stream, it will do the translation but not close the stream. */ public class DefaultEmbeddedStreamTranslator implements EmbeddedStreamTranslator { @@ -69,9 +67,10 @@ public class DefaultEmbeddedStreamTranslator implements EmbeddedStreamTranslator } /** - * This will consume the InputStream and return a new stream of translated bytes. + * This will consume the InputStream and write the stream to the output stream * @param inputStream * @param metadata + * @param outputStream to write to * @return * @throws IOException */ diff --git a/tika-core/src/main/java/org/apache/tika/extractor/EmbeddedStreamTranslator.java b/tika-core/src/main/java/org/apache/tika/extractor/EmbeddedStreamTranslator.java index 2391f0be5..4a582506f 100644 --- a/tika-core/src/main/java/org/apache/tika/extractor/EmbeddedStreamTranslator.java +++ b/tika-core/src/main/java/org/apache/tika/extractor/EmbeddedStreamTranslator.java @@ -17,7 +17,6 @@ package org.apache.tika.extractor; import java.io.IOException; -import java.io.InputStream; import java.io.OutputStream; import org.apache.tika.io.TikaInputStream; diff --git a/tika-core/src/main/java/org/apache/tika/extractor/RUnpackExtractor.java b/tika-core/src/main/java/org/apache/tika/extractor/RUnpackExtractor.java index c5d8185b2..70c21ffb4 100644 --- a/tika-core/src/main/java/org/apache/tika/extractor/RUnpackExtractor.java +++ b/tika-core/src/main/java/org/apache/tika/extractor/RUnpackExtractor.java @@ -111,20 +111,27 @@ public class RUnpackExtractor extends ParsingEmbeddedDocumentExtractor { } } - private void parseWithBytes(TikaInputStream tis, ContentHandler handler, Metadata metadata) - throws TikaException, IOException, SAXException { - //TODO -- improve the efficiency of this so that we're not - //literally writing out a file per request + private void parseWithBytes(TikaInputStream tis, ContentHandler handler, Metadata metadata) throws TikaException, IOException, SAXException { + Path tmp = Files.createTempFile("tika-tmp-", ".bin"); - if (embeddedStreamTranslator.shouldTranslate(tis, metadata)) { - try (OutputStream os = Files.newOutputStream(tmp)) { - embeddedStreamTranslator.translate(tis, metadata, os); + try { + //translate the stream or not + if (embeddedStreamTranslator.shouldTranslate(tis, metadata)) { + try (OutputStream os = Files.newOutputStream(tmp)) { + embeddedStreamTranslator.translate(tis, metadata, os); + } + } else { + Files.copy(tis, tmp, StandardCopyOption.REPLACE_EXISTING); + } + + //now do the parse + if (tis.getOpenContainer() != null) { + parse(tis, handler, metadata); + } else { + try (TikaInputStream tisTmp = TikaInputStream.get(tmp)) { + parse(tisTmp, handler, metadata); + } } - } else { - Files.copy(tis, tmp, StandardCopyOption.REPLACE_EXISTING); - } - try (TikaInputStream tmpTis = TikaInputStream.get(tmp)) { - parse(tis, handler, metadata); } finally { try { storeEmbeddedBytes(tmp, metadata); @@ -142,6 +149,10 @@ public class RUnpackExtractor extends ParsingEmbeddedDocumentExtractor { } private void storeEmbeddedBytes(Path p, Metadata metadata) { + if (p == null) { + return; + } + if (! embeddedBytesSelector.select(metadata)) { if (LOGGER.isDebugEnabled()) { LOGGER.debug("skipping embedded bytes {} <-> {}", diff --git a/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java b/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java index de57eda72..cf250e934 100644 --- a/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java +++ b/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java @@ -140,12 +140,16 @@ public class FilenameUtils { public static String getSanitizedEmbeddedFileName(Metadata metadata, String defaultExtension, int maxLength) { - String path = getEmbeddedPath(metadata); + String path = getEmbeddedName(metadata); //fName could be a full path or null if (StringUtils.isBlank(path)) { return null; } path = path.replaceAll("\u0000", " "); + if (path.startsWith("\"") && path.endsWith("\"")) { + path = path.substring(1, path.length() - 1); + } + int prefixLength = getPrefixLength(path); if (prefixLength > 0) { path = path.substring(prefixLength); @@ -173,6 +177,7 @@ public class FilenameUtils { namePart = namePart.replaceAll("(\\.\\.)+", "_"); namePart = namePart.replaceAll("[/\\\\]+", "_"); namePart = namePart.replaceAll(":+", "_"); + namePart = namePart.trim(); if (StringUtils.isBlank(namePart)) { return null; @@ -286,6 +291,7 @@ public class FilenameUtils { return path; } + //may return null private static String getEmbeddedPath(Metadata metadata) { //potentially look for other values in embedded path or original file name, etc... //maybe different fallback order? @@ -304,6 +310,27 @@ public class FilenameUtils { return metadata.get(TikaCoreProperties.ORIGINAL_RESOURCE_NAME); } + //this tries for resource name first, and then backs off to path + private static String getEmbeddedName(Metadata metadata) { + //potentially look for other values in embedded path or original file name, etc... + //maybe different fallback order? + String path = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY); + if (! StringUtils.isBlank(path)) { + return path; + } + path = metadata.get(TikaCoreProperties.EMBEDDED_RELATIONSHIP_ID); + if (! StringUtils.isBlank(path)) { + return path; + } + + path = metadata.get(TikaCoreProperties.EMBEDDED_RESOURCE_PATH); + if (! StringUtils.isBlank(path)) { + return path; + } + + return metadata.get(TikaCoreProperties.ORIGINAL_RESOURCE_NAME); + } + /** * Calculate the extension based on the {@link Metadata#CONTENT_TYPE} value. * On parse exception or null value, return the default value. diff --git a/tika-core/src/test/java/org/apache/tika/io/FilenameUtilsTest.java b/tika-core/src/test/java/org/apache/tika/io/FilenameUtilsTest.java index c3abd4134..c670bac83 100644 --- a/tika-core/src/test/java/org/apache/tika/io/FilenameUtilsTest.java +++ b/tika-core/src/test/java/org/apache/tika/io/FilenameUtilsTest.java @@ -151,6 +151,9 @@ public class FilenameUtilsTest { assertEquals("brown fox.xlsx", sanitizeFilename("a:/the quick:brown fox.xlsx")); assertEquals("_the quick brown fox.xlsx", sanitizeFilename("C:\\a/b/c/..the quick brown fox.xlsx")); assertEquals("_the quick brown fox.xlsx", sanitizeFilename("~/a/b/c/.the quick brown fox.xlsx")); + assertEquals("the quick%3Ebrown fox.xlsx", sanitizeFilename("the quick>brown fox.xlsx")); + assertEquals("the quick\"brown fox.xlsx", sanitizeFilename("the quick\"brown fox.xlsx")); + assertEquals("the quick brown fox.xlsx", sanitizeFilename("\"the quick brown fox.xlsx\"")); assertEquals("_.docx", sanitizeFilename("..................docx")); assertEquals("_.docx", sanitizeFilename("..docx")); @@ -168,7 +171,7 @@ public class FilenameUtilsTest { @Test public void testEmbeddedFilePaths() throws Exception { String n = "the quick brown fox.docx"; - /*assertEquals(n, sanitizePath(n)); + assertEquals(n, sanitizePath(n)); assertEquals(n, sanitizePath(n.substring(0, n.length() - 5), "application/vnd.openxmlformats-officedocument.wordprocessingml.document")); assertEquals(n, sanitizeFilename("the quick\u0000brown fox.docx")); @@ -204,7 +207,7 @@ public class FilenameUtilsTest { assertNull(sanitizePath("")); assertNull(sanitizePath(null)); assertNull(sanitizePath("/")); - assertNull(sanitizePath("~/"));*/ + assertNull(sanitizePath("~/")); assertNull(sanitizePath("C:")); assertNull(sanitizePath("C:/")); assertNull(sanitizePath("C:\\")); @@ -235,6 +238,7 @@ public class FilenameUtilsTest { private Metadata getMetadata(String name) { Metadata metadata = new Metadata(); + metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, name); metadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_PATH, name); return metadata; } diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/extractor/microsoft/MSEmbeddedStreamTranslator.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/extractor/microsoft/MSEmbeddedStreamTranslator.java index 3833b91da..433d34a00 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/extractor/microsoft/MSEmbeddedStreamTranslator.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/extractor/microsoft/MSEmbeddedStreamTranslator.java @@ -21,8 +21,6 @@ import java.io.InputStream; import java.io.OutputStream; import org.apache.commons.io.IOUtils; -import org.apache.commons.io.input.CloseShieldInputStream; -import org.apache.commons.io.input.UnsynchronizedByteArrayInputStream; import org.apache.commons.io.output.CloseShieldOutputStream; import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream; import org.apache.poi.poifs.filesystem.DirectoryEntry; diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/extractor/microsoft/PSTEmailStreamTranslator.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/extractor/microsoft/PSTEmailStreamTranslator.java new file mode 100644 index 000000000..055072481 --- /dev/null +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/extractor/microsoft/PSTEmailStreamTranslator.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.extractor.microsoft; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.concurrent.atomic.AtomicLong; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.tika.extractor.EmbeddedStreamTranslator; +import org.apache.tika.io.TikaInputStream; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.TikaCoreProperties; +import org.apache.tika.mime.MediaType; + +public class PSTEmailStreamTranslator implements EmbeddedStreamTranslator { + private static final String MIME_TYPE = MediaType.application("x-tika-pst-mail-item").toString(); + + private static final Logger LOG = LoggerFactory.getLogger(PSTEmailStreamTranslator.class); + private static final AtomicLong EMAIL_ITEMS = new AtomicLong(0); + private static final long LOG_EVERY_X_ITEMS = 100; + + @Override + public boolean shouldTranslate(TikaInputStream tis, Metadata metadata) throws IOException { + return MIME_TYPE.equals(metadata.get(TikaCoreProperties.CONTENT_TYPE_PARSER_OVERRIDE)) + || MIME_TYPE.equals(metadata.get(Metadata.CONTENT_TYPE)); + } + + @Override + public void translate(TikaInputStream tis, Metadata metadata, OutputStream os) throws IOException { + if (!shouldTranslate(tis, metadata)) { + return; + } + if (EMAIL_ITEMS.getAndIncrement() % LOG_EVERY_X_ITEMS == 0) { + LOG.warn("Translating pst email objects to .eml or .msg is not yet supported. " + + "Please open a ticket on our JIRA or a pull request on Github."); + } + } +} diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/resources/META-INF/services/org.apache.tika.extractor.EmbeddedStreamTranslator b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/resources/META-INF/services/org.apache.tika.extractor.EmbeddedStreamTranslator index e59cba80e..509de7d95 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/resources/META-INF/services/org.apache.tika.extractor.EmbeddedStreamTranslator +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/resources/META-INF/services/org.apache.tika.extractor.EmbeddedStreamTranslator @@ -12,4 +12,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -org.apache.tika.extractor.microsoft.MSEmbeddedStreamTranslator \ No newline at end of file +org.apache.tika.extractor.microsoft.MSEmbeddedStreamTranslator +org.apache.tika.extractor.microsoft.PSTEmailStreamTranslator \ No newline at end of file diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/AbstractEmbeddedDocumentBytesHandler.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/AbstractEmbeddedDocumentBytesHandler.java index 80ff66984..5dd27e419 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/AbstractEmbeddedDocumentBytesHandler.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/AbstractEmbeddedDocumentBytesHandler.java @@ -18,26 +18,18 @@ package org.apache.tika.pipes.core.extractor; import java.io.IOException; import java.io.InputStream; -import java.nio.file.Path; -import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; import java.util.Locale; -import org.apache.tika.config.TikaConfig; import org.apache.tika.extractor.EmbeddedDocumentBytesHandler; import org.apache.tika.io.FilenameUtils; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; -import org.apache.tika.mime.MimeTypeException; -import org.apache.tika.mime.MimeTypes; -import org.apache.tika.pipes.core.extractor.EmbeddedDocumentBytesConfig; import org.apache.tika.utils.StringUtils; public abstract class AbstractEmbeddedDocumentBytesHandler implements EmbeddedDocumentBytesHandler { - private static final MimeTypes MIME_TYPES = TikaConfig.getDefaultConfig().getMimeRepository(); - List<Integer> ids = new ArrayList<>(); public String getEmitKey(String containerEmitKey, int embeddedId, @@ -56,13 +48,10 @@ public abstract class AbstractEmbeddedDocumentBytesHandler implements EmbeddedDo emitKey.append("-embed"); emitKey.append("/"); emitKey.append(embeddedIdString).append(embeddedDocumentBytesConfig.getEmbeddedIdPrefix()); - Path p = Paths.get(metadata.get(TikaCoreProperties.EMBEDDED_RESOURCE_PATH)); - String fName = p.getFileName().toString(); - emitKey.append(fName); - if (! fName.contains(".")) { - appendSuffix(emitKey, metadata, embeddedDocumentBytesConfig); + String fName = FilenameUtils.getSanitizedEmbeddedFileName(metadata, ".bin", 100); + if (! StringUtils.isBlank(fName)) { + emitKey.append(fName); } - return emitKey.toString(); } else if (embeddedDocumentBytesConfig.getKeyBaseStrategy() == EmbeddedDocumentBytesConfig.KEY_BASE_STRATEGY.CONTAINER_NAME_NUMBERED) { @@ -101,25 +90,7 @@ public abstract class AbstractEmbeddedDocumentBytesHandler implements EmbeddedDo emitKey.append(suffix); } else if (embeddedDocumentBytesConfig.getSuffixStrategy() .equals(EmbeddedDocumentBytesConfig.SUFFIX_STRATEGY.DETECTED)) { - emitKey.append(getExtension(metadata)); + emitKey.append(FilenameUtils.calculateExtension(metadata, ".bin")); } } - - private String getExtension(Metadata metadata) { - String mime = metadata.get(Metadata.CONTENT_TYPE); - try { - String ext = MIME_TYPES - .forName(mime) - .getExtension(); - if (ext == null) { - return ".bin"; - } else { - return ext; - } - } catch (MimeTypeException e) { - //swallow - } - return ".bin"; - - } }
