This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 616c35fdb TIKA-4511 -- detected compressed bmp (#2361)
616c35fdb is described below

commit 616c35fdbcb0303852237c5f2dbf493d0f34f287
Author: Tim Allison <[email protected]>
AuthorDate: Thu Oct 9 10:11:42 2025 -0400

    TIKA-4511 -- detected compressed bmp (#2361)
---
 CHANGES.txt                                        |  2 +
 .../org/apache/tika/mime/tika-mimetypes.xml        | 44 ++++++++++++++++++----
 2 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index 861b8207c..6de20938a 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -25,6 +25,8 @@ Release 4.0.0-BETA1 - ???
 
 Release 3.3.0 - ???
 
+  * Add detection of compressed bmp (TIKA-4511).
+
   * Allow per file timeouts in tika-pipes (TIKA-4497).
 
   * Add matroska detector (TIKA-1180).
diff --git 
a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml 
b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 0978ef575..5c5523f47 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -6380,20 +6380,48 @@
     <magic priority="50">
       <match value="BM" type="string" offset="0">
         <match value="0x0100" type="string" offset="26">
-         <match value="0x0000" type="string" offset="28"/>
-         <match value="0x0100" type="string" offset="28"/>
-         <match value="0x0400" type="string" offset="28"/>
-         <match value="0x0800" type="string" offset="28"/>
-         <match value="0x1000" type="string" offset="28"/>
-         <match value="0x1800" type="string" offset="28"/>
-         <match value="0x2000" type="string" offset="28"/>
+          <match minShouldMatch="2">
+            <match minShouldMatch="1">
+              <match value="0x0000" type="string" offset="28"/>
+              <match value="0x0100" type="string" offset="28"/>
+              <match value="0x0400" type="string" offset="28"/>
+              <match value="0x0800" type="string" offset="28"/>
+              <match value="0x1000" type="string" offset="28"/>
+              <match value="0x1800" type="string" offset="28"/>
+              <match value="0x2000" type="string" offset="28"/>
+            </match>
+            <match value="0x00000000" type="string" offset="30"/>
+          </match>
         </match>
       </match>
     </magic>
     <glob pattern="*.bmp"/>
     <glob pattern="*.dib"/>
   </mime-type>
-
+  <mime-type type="image/bmp;format=compressed">
+    <acronym>BMP</acronym>
+    <_comment>Windows bitmap compressed</_comment>
+    <tika:link>http://en.wikipedia.org/wiki/BMP_file_format</tika:link>
+    <tika:uti>com.microsoft.bmp</tika:uti>
+    <!-- detection could be based on a non-zero value at offset=30.
+         current strategy is to determine uncompressed if value is zero
+         at a lower priority.
+    -->
+    <magic priority="45">
+      <match value="BM" type="string" offset="0">
+        <match value="0x0100" type="string" offset="26">
+          <match value="0x0000" type="string" offset="28"/>
+          <match value="0x0100" type="string" offset="28"/>
+          <match value="0x0400" type="string" offset="28"/>
+          <match value="0x0800" type="string" offset="28"/>
+          <match value="0x1000" type="string" offset="28"/>
+          <match value="0x1800" type="string" offset="28"/>
+          <match value="0x2000" type="string" offset="28"/>
+        </match>
+      </match>
+    </magic>
+    <sub-class-of type="image/bmp"/>
+  </mime-type>
   <mime-type type="image/x-bpg">
     <acronym>BPG</acronym>
     <_comment>Better Portable Graphics</_comment>

Reply via email to