This is an automated email from the ASF dual-hosted git repository. nick pushed a commit to branch multiple-parsers in repository https://gitbox.apache.org/repos/asf/tika.git
commit 4e7bdaf5b1cb6964075007a1f437b3d6fd08d139 Author: Nick Burch <n...@gagravarr.org> AuthorDate: Wed Apr 4 08:11:13 2018 +0100 Replace the old experimental Fallback ParserDecorator code with a call to the new FallbackParser --- .../org/apache/tika/parser/ParserDecorator.java | 60 +++------------------- 1 file changed, 8 insertions(+), 52 deletions(-) diff --git a/tika-core/src/main/java/org/apache/tika/parser/ParserDecorator.java b/tika-core/src/main/java/org/apache/tika/parser/ParserDecorator.java index 55c33e9..e58bf9f 100644 --- a/tika-core/src/main/java/org/apache/tika/parser/ParserDecorator.java +++ b/tika-core/src/main/java/org/apache/tika/parser/ParserDecorator.java @@ -23,10 +23,11 @@ import java.util.HashSet; import java.util.Set; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.TemporaryResources; -import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; +import org.apache.tika.mime.MediaTypeRegistry; +import org.apache.tika.parser.multiple.AbstractMultipleParser.MetadataPolicy; +import org.apache.tika.parser.multiple.FallbackParser; import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; @@ -98,59 +99,14 @@ public class ParserDecorator extends AbstractParser { /** * Decorates the given parsers into a virtual parser, where they'll * be tried in preference order until one works without error. - * TODO Is this the right name? - * TODO Is this the right place to put this? Should it be in CompositeParser? Elsewhere? - * TODO Should we reset the Metadata if we try another parser? - * TODO Should we reset the ContentHandler if we try another parser? - * TODO Should we log/report failures anywhere? - * @deprecated Do not use until the TODOs are resolved, see TIKA-1509 + * @deprecated This has been replaced by {@link FallbackParser} */ public static final Parser withFallbacks( final Collection<? extends Parser> parsers, final Set<MediaType> types) { - Parser parser = EmptyParser.INSTANCE; - if (!parsers.isEmpty()) parser = parsers.iterator().next(); - - return new ParserDecorator(parser) { - private static final long serialVersionUID = 1625187131782069683L; - @Override - public Set<MediaType> getSupportedTypes(ParseContext context) { - return types; - } - @Override - public void parse(InputStream stream, ContentHandler handler, - Metadata metadata, ParseContext context) - throws IOException, SAXException, TikaException { - // Must have a TikaInputStream, so we can re-use it if parsing fails - // Need to close internally created tstream to release resources - TemporaryResources tmp = (TikaInputStream.isTikaInputStream(stream)) ? null - : new TemporaryResources(); - try { - TikaInputStream tstream = - TikaInputStream.get(stream, tmp); - tstream.getFile(); - // Try each parser in turn - for (Parser p : parsers) { - tstream.mark(-1); - try { - p.parse(tstream, handler, metadata, context); - return; - } catch (Exception e) { - // TODO How to log / record this failure? - } - // Prepare for the next parser, if present - tstream.reset(); - } - } finally { - if (tmp != null) { - tmp.dispose(); - } - } - } - @Override - public String getDecorationName() { - return "With Fallback"; - } - }; + // Delegate to the new FallbackParser for now, until people upgrade + // Keep old behaviour on metadata, which was to preseve all + MediaTypeRegistry registry = MediaTypeRegistry.getDefaultRegistry(); + return new FallbackParser(registry, MetadataPolicy.KEEP_ALL, parsers); } /** -- To stop receiving notification emails like this one, please contact n...@apache.org.