This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/tika.git
commit b329820804bb74cc290337041eee595395e77435 Author: tallison <[email protected]> AuthorDate: Fri Apr 3 09:58:19 2020 -0400 TIKA-2572 -- review overly broad catches --- .../src/main/java/org/apache/tika/detect/AutoDetectReader.java | 2 +- .../src/main/java/org/apache/tika/detect/XmlRootExtractor.java | 2 ++ .../src/main/java/org/apache/tika/parser/CompositeParser.java | 9 ++++++--- .../src/main/java/org/apache/tika/utils/CharsetUtils.java | 9 +++++---- .../src/main/java/org/apache/tika/parser/crypto/TSDParser.java | 6 +++++- .../java/org/apache/tika/parser/html/HtmlEncodingDetector.java | 3 ++- .../java/org/apache/tika/parser/mbox/OutlookPSTParser.java | 10 ++++------ .../java/org/apache/tika/parser/microsoft/OfficeParser.java | 5 ++--- .../tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java | 9 +++++---- .../parser/microsoft/ooxml/SXWPFWordExtractorDecorator.java | 2 ++ .../apache/tika/parser/ner/corenlp/CoreNLPNERecogniser.java | 2 +- .../java/org/apache/tika/parser/ocr/TesseractOCRParser.java | 4 +++- .../apache/tika/parser/pkg/StreamingZipContainerDetector.java | 4 ++-- .../main/java/org/apache/tika/parser/rtf/TextExtractor.java | 2 +- .../java/org/apache/tika/parser/txt/Icu4jEncodingDetector.java | 2 +- .../org/apache/tika/parser/txt/UniversalEncodingListener.java | 2 +- 16 files changed, 43 insertions(+), 30 deletions(-) diff --git a/tika-core/src/main/java/org/apache/tika/detect/AutoDetectReader.java b/tika-core/src/main/java/org/apache/tika/detect/AutoDetectReader.java index 44dce8e..ca23a17 100644 --- a/tika-core/src/main/java/org/apache/tika/detect/AutoDetectReader.java +++ b/tika-core/src/main/java/org/apache/tika/detect/AutoDetectReader.java @@ -76,7 +76,7 @@ public class AutoDetectReader extends BufferedReader { if (charset != null) { try { return CharsetUtils.forName(charset); - } catch (Exception e) { + } catch (IllegalArgumentException e) { // ignore } } diff --git a/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java b/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java index 20a0be5..38148a5 100644 --- a/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java +++ b/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java @@ -53,6 +53,8 @@ public class XmlRootExtractor { XMLReaderUtils.parseSAX( new CloseShieldInputStream(stream), new OfflineContentHandler(handler), EMPTY_CONTEXT); + } catch (SecurityException e) { + throw e; } catch (Exception ignore) { } return handler.rootElement; diff --git a/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java b/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java index c5c95a6..bf10736 100644 --- a/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java +++ b/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java @@ -275,9 +275,9 @@ public class CompositeParser extends AbstractParser { ParserUtils.recordParserDetails(parser, metadata); try { parser.parse(taggedStream, taggedHandler, metadata, context); - } catch (RuntimeException e) { - throw new TikaException( - "Unexpected RuntimeException from " + parser, e); + } catch (SecurityException e) { + //rethrow security exceptions + throw e; } catch (IOException e) { taggedStream.throwIfCauseOf(e); throw new TikaException( @@ -286,6 +286,9 @@ public class CompositeParser extends AbstractParser { if (taggedHandler != null) taggedHandler.throwIfCauseOf(e); throw new TikaException( "TIKA-237: Illegal SAXException from " + parser, e); + } catch (RuntimeException e) { + throw new TikaException( + "Unexpected RuntimeException from " + parser, e); } } finally { tmp.dispose(); diff --git a/tika-core/src/main/java/org/apache/tika/utils/CharsetUtils.java b/tika-core/src/main/java/org/apache/tika/utils/CharsetUtils.java index 29e8782..a2931cf 100644 --- a/tika-core/src/main/java/org/apache/tika/utils/CharsetUtils.java +++ b/tika-core/src/main/java/org/apache/tika/utils/CharsetUtils.java @@ -18,6 +18,7 @@ package org.apache.tika.utils; import static java.util.Locale.ENGLISH; +import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.nio.charset.Charset; import java.nio.charset.IllegalCharsetNameException; @@ -56,7 +57,7 @@ public class CharsetUtils { for (String alias : charset.aliases()) { COMMON_CHARSETS.put(alias.toLowerCase(ENGLISH), charset); } - } catch (Exception e) { + } catch (IllegalArgumentException e) { // ignore } } @@ -139,7 +140,7 @@ public class CharsetUtils { public static String clean(String charsetName) { try { return forName(charsetName).name(); - } catch (Exception e) { + } catch (IllegalArgumentException e) { return null; } } @@ -194,8 +195,8 @@ public class CharsetUtils { if (cs != null) { return cs; } - } catch (Exception e) { - // ignore + } catch (IllegalArgumentException|IllegalAccessException|InvocationTargetException e) { + //ignore } } diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/crypto/TSDParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/crypto/TSDParser.java index 1107d7c..f841066 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/crypto/TSDParser.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/crypto/TSDParser.java @@ -128,6 +128,8 @@ public class TSDParser extends AbstractParser { tsdMetasList.add(tsdMetas); } + } catch (SecurityException e) { + throw e; } catch (Exception ex) { LOG.error("Error in TSDParser.buildMetas {}", ex.getMessage()); tsdMetasList.clear(); @@ -167,6 +169,8 @@ public class TSDParser extends AbstractParser { edx.parseEmbedded(is, handler, metadata, false); } + } catch (SecurityException e) { + throw e; } catch (Exception ex) { LOG.error("Error in TSDParser.parseTSDContent {}", ex.getMessage()); } finally { @@ -179,7 +183,7 @@ public class TSDParser extends AbstractParser { if (cmsTimeStampedDataParser != null) { try { cmsTimeStampedDataParser.close(); - } catch (Exception ex) { + } catch (IOException ex) { LOG.error("Error in TSDParser.closeCMSParser {}", ex.getMessage()); } } diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlEncodingDetector.java b/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlEncodingDetector.java index c4c5188..c86ba7e 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlEncodingDetector.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlEncodingDetector.java @@ -22,6 +22,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.nio.ByteBuffer; import java.nio.charset.Charset; +import java.nio.charset.IllegalCharsetNameException; import java.nio.charset.StandardCharsets; import java.util.Collections; import java.util.HashSet; @@ -161,7 +162,7 @@ public class HtmlEncodingDetector implements EncodingDetector { if (CharsetUtils.isSupported(candCharset)) { try { return CharsetUtils.forName(candCharset); - } catch (Exception e) { + } catch (IllegalArgumentException e) { //ignore } } diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java index 3f9ce98..d810265 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java @@ -93,12 +93,10 @@ public class OutlookPSTParser extends AbstractParser { if (isValid) { parseFolder(xhtml, pstFile.getRootFolder(), embeddedExtractor); } + } catch (TikaException e) { + throw e; } catch (Exception e) { - if(e instanceof TikaException) { - throw (TikaException) e; - }else { - throw new TikaException(e.getMessage(), e); - } + throw new TikaException(e.getMessage(), e); } finally { if (pstFile != null && pstFile.getFileHandle() != null) { try { @@ -267,7 +265,7 @@ public class OutlookPSTParser extends AbstractParser { xhtml.endElement("div"); } catch (Exception e) { - throw new TikaException("Unable to unpack document stream", e); + EmbeddedDocumentUtil.recordEmbeddedStreamException(e, mailMetadata); } } } diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java index d38bcd4..391c912 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java @@ -317,10 +317,9 @@ public class OfficeParser extends AbstractOfficeParser { try { reader = new VBAMacroReader(fs); macros = reader.readMacros(); + } catch (SecurityException e) { + throw e; } catch (Exception e) { - if (e instanceof SecurityException) { - throw e; - } Metadata m = new Metadata(); m.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE, TikaCoreProperties.EmbeddedResourceType.MACRO.toString()); m.set(Metadata.CONTENT_TYPE, "text/x-vbasic"); diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java index 1865518..4ecebfb 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java @@ -183,8 +183,10 @@ public abstract class AbstractOOXMLExtractor implements OOXMLExtractor { tStream.close(); } + } catch (SecurityException e) { + throw e; } catch (Exception ex) { - + //swallow } } @@ -204,10 +206,9 @@ public abstract class AbstractOOXMLExtractor implements OOXMLExtractor { for (PackageRelationship rel : source.getRelationships()) { try { handleEmbeddedPart(source, rel, handler, metadata, handledTarget); + } catch (SAXException|SecurityException e) { + throw e; } catch (Exception e) { - if (e instanceof SAXException) { - throw e; - } EmbeddedDocumentUtil.recordEmbeddedStreamException(e, metadata); } } diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXWPFWordExtractorDecorator.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXWPFWordExtractorDecorator.java index 0641a81..57bbfeb 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXWPFWordExtractorDecorator.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXWPFWordExtractorDecorator.java @@ -129,6 +129,8 @@ public class SXWPFWordExtractorDecorator extends AbstractOOXMLExtractor { XWPFStylesShim styles = null; try { styles = loadStyles(documentPart); + } catch (SecurityException e) { + throw e; } catch (Exception e) { metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING, ExceptionUtils.getStackTrace(e)); diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ner/corenlp/CoreNLPNERecogniser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ner/corenlp/CoreNLPNERecogniser.java index 18b3d1a..c446e0a 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/ner/corenlp/CoreNLPNERecogniser.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/ner/corenlp/CoreNLPNERecogniser.java @@ -146,7 +146,7 @@ public class CoreNLPNERecogniser implements NERecogniser { } } catch (Exception e) { - LOG.debug(e.getMessage(), e); + LOG.warn(e.getMessage(), e); } return names; } diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java index 52cbfb1..b2c4496 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java @@ -225,8 +225,10 @@ public class TesseractOCRParser extends AbstractParser implements Initializable } + } catch (SecurityException e) { + throw e; } catch (Exception e) { - + //swallow } finally { IOUtils.closeQuietly(tmp); } diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/StreamingZipContainerDetector.java b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/StreamingZipContainerDetector.java index abfd29e..67eaea8 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/StreamingZipContainerDetector.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/StreamingZipContainerDetector.java @@ -218,7 +218,7 @@ public class StreamingZipContainerDetector extends ZipContainerDetectorBase impl } catch (SecurityException e) { throw e; } catch (Exception e) { - + //swallow } return relsHandler.rels; } @@ -251,7 +251,7 @@ public class StreamingZipContainerDetector extends ZipContainerDetectorBase impl } catch (SecurityException e) { throw e; } catch (Exception e) { - + //swallow } return contentTypeHandler.mediaType; } diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java index dfc0956..1c6e4eb 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java @@ -341,7 +341,7 @@ final class TextExtractor { private static Charset getCharset(String name) { try { return CharsetUtils.forName(name); - } catch (Exception e) { + } catch (IllegalArgumentException e) { return ASCII; } } diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/txt/Icu4jEncodingDetector.java b/tika-parsers/src/main/java/org/apache/tika/parser/txt/Icu4jEncodingDetector.java index 4a2c56b..9777d2e 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/txt/Icu4jEncodingDetector.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/txt/Icu4jEncodingDetector.java @@ -70,7 +70,7 @@ public class Icu4jEncodingDetector implements EncodingDetector { for (CharsetMatch match : detector.detectAll()) { try { return CharsetUtils.forName(match.getName()); - } catch (Exception e) { + } catch (IllegalArgumentException e) { // ignore } } diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/txt/UniversalEncodingListener.java b/tika-parsers/src/main/java/org/apache/tika/parser/txt/UniversalEncodingListener.java index 5e215a9..179466d 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/txt/UniversalEncodingListener.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/txt/UniversalEncodingListener.java @@ -75,7 +75,7 @@ class UniversalEncodingListener implements CharsetListener { } try { this.charset = CharsetUtils.forName(name); - } catch (Exception e) { + } catch (IllegalArgumentException e) { // ignore } }
