This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push: new 441cef8f3 TIKA-3873 -- add an encrypted metadata value when docs are encrypted. 441cef8f3 is described below commit 441cef8f3946f4804bf7f472f07cfae36f7e29b5 Author: tballison <talli...@apache.org> AuthorDate: Fri Nov 4 18:07:20 2022 -0400 TIKA-3873 -- add an encrypted metadata value when docs are encrypted. --- .../src/main/java/org/apache/tika/metadata/TikaCoreProperties.java | 3 +++ .../main/java/org/apache/tika/parser/RecursiveParserWrapper.java | 7 +++++++ .../src/test/java/org/apache/tika/parser/odf/ODFParserTest.java | 3 +++ 3 files changed, 13 insertions(+) diff --git a/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java b/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java index 03a0e2cd2..8ba1834f4 100644 --- a/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java +++ b/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java @@ -291,6 +291,9 @@ public interface TikaCoreProperties { Property SIGNATURE_FILTER = Property.internalTextBag("signature:filter"); Property SIGNATURE_CONTACT_INFO = Property.internalTextBag("signature:contact-info"); + //is the file encrypted + Property IS_ENCRYPTED = Property.internalBoolean(TIKA_META_PREFIX + "encrypted"); + /** * A file might contain different types of embedded documents. * The most common is the ATTACHMENT. diff --git a/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java b/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java index e9e9457bb..79ff4c379 100644 --- a/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java +++ b/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java @@ -25,6 +25,7 @@ import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; import org.apache.tika.exception.CorruptedFileException; +import org.apache.tika.exception.EncryptedDocumentException; import org.apache.tika.exception.TikaException; import org.apache.tika.exception.WriteLimitReachedException; import org.apache.tika.exception.ZeroByteFileException; @@ -162,6 +163,9 @@ public class RecursiveParserWrapper extends ParserDecorator { context.set(RecursivelySecureContentHandler.class, secureContentHandler); getWrappedParser().parse(tis, secureContentHandler, metadata, context); } catch (Throwable e) { + if (e instanceof EncryptedDocumentException) { + metadata.set(TikaCoreProperties.IS_ENCRYPTED, "true"); + } if (WriteLimitReachedException.isWriteLimitReached(e)) { metadata.set(TikaCoreProperties.WRITE_LIMIT_REACHED, "true"); } else { @@ -255,6 +259,9 @@ public class RecursiveParserWrapper extends ParserDecorator { } catch (CorruptedFileException e) { throw e; } catch (TikaException e) { + if (e instanceof EncryptedDocumentException) { + metadata.set(TikaCoreProperties.IS_ENCRYPTED, true); + } if (context.get(ZeroByteFileException.IgnoreZeroByteFileException.class) != null && e instanceof ZeroByteFileException) { //do nothing diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java index b9ee45bc2..7feab3a38 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java @@ -414,6 +414,9 @@ public class ODFParserTest extends TikaTest { assertThrows(EncryptedDocumentException.class, () -> { getRecursiveMetadata(p, false); }); + + List<Metadata> metadataList = getRecursiveMetadata(p, true); + assertEquals("true", metadataList.get(0).get(TikaCoreProperties.IS_ENCRYPTED)); } //this, of course, should throw an EncryptedDocumentException