This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch branch_3x
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/branch_3x by this push:
new 78d8e0c44 improve documentation for DefaultZipContainerDetector
78d8e0c44 is described below
commit 78d8e0c44c99ca158877a072859343cb4ae93c65
Author: tallison <[email protected]>
AuthorDate: Wed Aug 20 08:51:04 2025 -0400
improve documentation for DefaultZipContainerDetector
---
.../tika/detect/zip/DefaultZipContainerDetector.java | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/DefaultZipContainerDetector.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/DefaultZipContainerDetector.java
index 1ea61d23b..af6f0e68c 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/DefaultZipContainerDetector.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/DefaultZipContainerDetector.java
@@ -45,6 +45,20 @@ import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
+/**
+ * This class is designed to detect subtypes of zip-based file formats.
+ * For the sake of efficiency, it also detects archive and compressor formats
+ * via commons-compress.
+ * <p>
+ * As a first step, it uses commons-compress to detect any archive format
+ * supported by commons-compress. If "zip" file is detected, then the
+ * ZipContainerDetectors are run to try to identify a subtype.
+ * <p>
+ * If an archive format that is not a zip is detected, that mime type is
returned.
+ * <p>
+ * Finally, if the file is not detected as an archive format, this runs
+ * commons-compress' compressor format detector.
+ */
public class DefaultZipContainerDetector implements Detector {
//Regrettably, some tiff files can be incorrectly identified