This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push: new 6ebf9cdb7 improve detection of audio/mpeg TIKA-3994 (#1052) 6ebf9cdb7 is described below commit 6ebf9cdb7445d5731f4471a1ce903376e904ec13 Author: Tim Allison <talli...@apache.org> AuthorDate: Fri Mar 31 13:38:54 2023 -0400 improve detection of audio/mpeg TIKA-3994 (#1052) --- tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml index a877a2e11..fdb855f1c 100644 --- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml +++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml @@ -5243,10 +5243,14 @@ <match value="0xfffb" type="string" offset="0"/> <!-- V1, L3 --> <match value="0xfffc" type="string" offset="0"/> <!-- V1, L2, CRC --> <match value="0xfffd" type="string" offset="0"/> <!-- V1, L2 --> + <match value="0xffe3" type="string" offset="0"/> <!-- MP3 2.5 from pronom --> <!-- TIKA-417: This is the UTF-16 LE byte order mark! --> <!-- match value="0xfffe" type="string" offset="0"/ --> <!-- V1, L1, CRC --> <match value="0xffff" type="string" offset="0"/> <!-- V1, L1 --> <match value="ID3" type="string" offset="0"/> + <!-- in the wild, 0D0A or quite a few \x00 may precede the magic --> + <match value="(?:\\x0D\\x0A|\\x00{1,1024})(?:\\xff[\\xe3\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd\\xfe\\xff]|ID3)" + type="regex" offset="0"/> </magic> <glob pattern="*.mpga"/> <glob pattern="*.mp2"/>