This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 616c35fdb TIKA-4511 -- detected compressed bmp (#2361)
616c35fdb is described below
commit 616c35fdbcb0303852237c5f2dbf493d0f34f287
Author: Tim Allison <[email protected]>
AuthorDate: Thu Oct 9 10:11:42 2025 -0400
TIKA-4511 -- detected compressed bmp (#2361)
---
CHANGES.txt | 2 +
.../org/apache/tika/mime/tika-mimetypes.xml | 44 ++++++++++++++++++----
2 files changed, 38 insertions(+), 8 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index 861b8207c..6de20938a 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -25,6 +25,8 @@ Release 4.0.0-BETA1 - ???
Release 3.3.0 - ???
+ * Add detection of compressed bmp (TIKA-4511).
+
* Allow per file timeouts in tika-pipes (TIKA-4497).
* Add matroska detector (TIKA-1180).
diff --git
a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 0978ef575..5c5523f47 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -6380,20 +6380,48 @@
<magic priority="50">
<match value="BM" type="string" offset="0">
<match value="0x0100" type="string" offset="26">
- <match value="0x0000" type="string" offset="28"/>
- <match value="0x0100" type="string" offset="28"/>
- <match value="0x0400" type="string" offset="28"/>
- <match value="0x0800" type="string" offset="28"/>
- <match value="0x1000" type="string" offset="28"/>
- <match value="0x1800" type="string" offset="28"/>
- <match value="0x2000" type="string" offset="28"/>
+ <match minShouldMatch="2">
+ <match minShouldMatch="1">
+ <match value="0x0000" type="string" offset="28"/>
+ <match value="0x0100" type="string" offset="28"/>
+ <match value="0x0400" type="string" offset="28"/>
+ <match value="0x0800" type="string" offset="28"/>
+ <match value="0x1000" type="string" offset="28"/>
+ <match value="0x1800" type="string" offset="28"/>
+ <match value="0x2000" type="string" offset="28"/>
+ </match>
+ <match value="0x00000000" type="string" offset="30"/>
+ </match>
</match>
</match>
</magic>
<glob pattern="*.bmp"/>
<glob pattern="*.dib"/>
</mime-type>
-
+ <mime-type type="image/bmp;format=compressed">
+ <acronym>BMP</acronym>
+ <_comment>Windows bitmap compressed</_comment>
+ <tika:link>http://en.wikipedia.org/wiki/BMP_file_format</tika:link>
+ <tika:uti>com.microsoft.bmp</tika:uti>
+ <!-- detection could be based on a non-zero value at offset=30.
+ current strategy is to determine uncompressed if value is zero
+ at a lower priority.
+ -->
+ <magic priority="45">
+ <match value="BM" type="string" offset="0">
+ <match value="0x0100" type="string" offset="26">
+ <match value="0x0000" type="string" offset="28"/>
+ <match value="0x0100" type="string" offset="28"/>
+ <match value="0x0400" type="string" offset="28"/>
+ <match value="0x0800" type="string" offset="28"/>
+ <match value="0x1000" type="string" offset="28"/>
+ <match value="0x1800" type="string" offset="28"/>
+ <match value="0x2000" type="string" offset="28"/>
+ </match>
+ </match>
+ </magic>
+ <sub-class-of type="image/bmp"/>
+ </mime-type>
<mime-type type="image/x-bpg">
<acronym>BPG</acronym>
<_comment>Better Portable Graphics</_comment>