This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-4054 in repository https://gitbox.apache.org/repos/asf/tika.git
commit f6a7b8a7c954dc5a881b28ec32ddeaba3194a037 Author: tballison <[email protected]> AuthorDate: Fri May 26 13:56:44 2023 -0400 TIKA-4054 -- add a bunch of mimes via Greg Lepore --- .../org/apache/tika/mime/tika-mimetypes.xml | 116 ++++++++++++++++++++- .../java/org/apache/tika/mime/OneOffMimeTest.java | 17 +++ 2 files changed, 128 insertions(+), 5 deletions(-) diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml index 5b0a479fe..db90614b3 100644 --- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml +++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml @@ -3328,7 +3328,19 @@ </magic> <glob pattern="*.arj"/> </mime-type> - + <mime-type type="application/x-asprs"> + <_comment>ASPRS Lidar Data Exchange Format</_comment> + <magic priority="50"> + <match value="LASF" type="string" offset="0"> + <!-- version 1.1 --> + <match value="\x01\x01" type="string" offset="24"/> + <!-- version 1.2 --> + <match value="\x01\x02" type="string" offset="24"/> + </match> + </magic> + <glob pattern="*.las"/> + <glob pattern="*.laz"/> + </mime-type> <mime-type type="application/x-authorware-bin"> <glob pattern="*.aab"/> <glob pattern="*.x32"/> @@ -3586,6 +3598,23 @@ <sub-class-of type="application/x-tar"/> </mime-type> + <mime-type type="application/x-amiga-disk-format"> + <_comment>Amiga Disk File</_comment> + <glob pattern="*.adf"/> + <magic priority="50"> + <match value="DOS" offset="0" type="string"> + <match value="\x00" offset="4" type="string"/> + <match value="\x01" offset="4" type="string"/> + <match value="\x02" offset="4" type="string"/> + <match value="\x03" offset="4" type="string"/> + <match value="\x04" offset="4" type="string"/> + <match value="\x05" offset="4" type="string"/> + <match value="\x06" offset="4" type="string"/> + <match value="\x07" offset="4" type="string"/> + </match> + </magic> + </mime-type> + <mime-type type="application/x-brotli"> <glob pattern="*.br" /> <glob pattern="*.brotli" /> @@ -3852,10 +3881,12 @@ <mime-type type="application/x-font-dos"/> <mime-type type="application/x-font-framemaker"/> <mime-type type="application/x-font-ghostscript"> + <!-- conflict with portable sound format --> <glob pattern="*.gsf"/> </mime-type> <mime-type type="application/x-font-libgrx"/> <mime-type type="application/x-font-linux-psf"> + <!-- conflict with portable sound format --> <glob pattern="*.psf"/> </mime-type> @@ -4086,7 +4117,16 @@ <mime-type type="application/x-java-pack200"> <glob pattern="*.pack"/> </mime-type> - + <mime-type type="application/x-jeol-jdf"> + <_comment>JDF NMR Spectroscopy</_comment> + <glob pattern="*.jdf"/> + <magic priority="50"> + <!-- big endian --> + <match value="JEOL.NMR" offset="0" type="string"/> + <!-- little endian --> + <match value="RMN.LOEJ" offset="0" type="string"/> + </magic> + </mime-type> <mime-type type="application/x-kdelnk"> <magic priority="50"> <match value="[KDE\ Desktop\ Entry]" type="string" offset="0"/> @@ -4617,7 +4657,13 @@ <match value="HEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!" offset="0" /> </magic> </mime-type> - + <mime-type type="application/x-spss-sav"> + <_comment>SPSS Data File</_comment> + <glob pattern="*.sav"/> + <magic priority="50"> + <match value="$FL2@(#)" offset="0" /> + </magic> + </mime-type> <mime-type type="application/x-sc"> <magic priority="50"> <match value="Spreadsheet" type="string" offset="38"/> @@ -4659,7 +4705,13 @@ </magic> <glob pattern="*.swf"/> </mime-type> - + <mime-type type="application/x-sibelius"> + <_comment>Sibelius</_comment> + <magic priority="50"> + <match value="\x0FSIBELIUS" type="string" offset="0"/> + </magic> + <glob pattern="*.sib"/> + </mime-type> <mime-type type="application/x-silverlight-app"> <glob pattern="*.xap"/> </mime-type> @@ -4678,6 +4730,14 @@ <glob pattern="*.sfdu"/> </mime-type> + <mime-type type="application/x-spectrum-tzx"> + <_comment>TAP (ZX Spectrum)</_comment> + <magic priority="50"> + <match value="ZXTape!\x1a" type="string" offset="0"/> + </magic> + <glob pattern="*.tzx"/> + </mime-type> + <mime-type type="application/x-sqlite3"> <magic priority="50"> <match value="SQLite format 3\x00" type="string" offset="0"/> @@ -5362,6 +5422,42 @@ <glob pattern="*.ogg"/> <sub-class-of type="audio/ogg"/> </mime-type> + <mime-type type="audio/x-psf"> + <_comment>Portable Sound Format</_comment> + <tika:link>http://web.archive.org/web/20140125155137/http://wiki.neillcorlett.com/PSFFormat</tika:link> + <magic priority="50"> + <match value="PSF" type="string" offset="0"> + <!-- Playstation (PSF1) --> + <match value="\x01" type="string" offset="3"/> + <!-- Playstation 2 (PSF2) --> + <match value="\x02" type="string" offset="3"/> + <!-- Sega Saturn --> + <match value="\x11" type="string" offset="3"/> + <!-- Sega Dreamcast --> + <match value="\x12" type="string" offset="3"/> + <!-- Sega Genesis --> + <match value="\x13" type="string" offset="3"/> + <!-- Nintendo 64--> + <match value="\x21" type="string" offset="3"/> + <!-- GameBoy Advance --> + <match value="\x22" type="string" offset="3"/> + <!-- Super NES --> + <match value="\x23" type="string" offset="3"/> + <!-- Capcom QSound --> + <match value="\x41" type="string" offset="3"/> + </match> + </magic> + <!-- conflict with application/x-font-linux-psf + <glob pattern="*.psf"/>--> + <glob pattern="*.psf1"/> + <glob pattern="*.psflib"/> + <glob pattern="*.minipsf"/> + <glob pattern="*.minipsf1"/> + <!-- conflict with application/x-font-ghostscript + <glob pattern="*.gsf"/> --> + <glob pattern="*.gslib"/> + <glob pattern="*.minigsf"/> + </mime-type> <mime-type type="audio/x-sap"> <_comment>Slight Atari Player</_comment> <tika:link>https://asap.sourceforge.net/sap-format.html</tika:link> @@ -5670,7 +5766,17 @@ <mime-type type="chemical/x-xyz"> <glob pattern="*.xyz"/> </mime-type> - + <mime-type type="image/x-3ds"> + <_comment>3D Studio (V1)</_comment> + <magic priority="50"> + <match value="MM" type="string" offset="0"> + <match value="\x02\x00\x0A\x00\x00\x00" type="string" offset="6"> + <match value="==" type="string" offset="16"/> + </match> + </match> + </magic> + <glob pattern="*.3ds"/> + </mime-type> <mime-type type="image/aces"> <_comment>ACES Image Container File</_comment> <magic priority="50"> diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/OneOffMimeTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/OneOffMimeTest.java index 45c491639..fb0ad4b54 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/OneOffMimeTest.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/OneOffMimeTest.java @@ -18,6 +18,7 @@ package org.apache.tika.mime; import static org.junit.jupiter.api.Assertions.assertEquals; +import java.io.File; import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; @@ -28,6 +29,7 @@ import org.apache.commons.io.input.UnsynchronizedByteArrayInputStream; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import org.apache.tika.Tika; import org.apache.tika.TikaTest; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; @@ -46,6 +48,21 @@ public class OneOffMimeTest extends TikaTest { assertByName(mime, p); } + @Test + @Disabled("again for development purposes with files that aren't suitable for the repo") + public void testDir() throws Exception { + Path root = Paths.get(""); + Tika tika = new Tika(); + for (File f : root.toFile().listFiles()) { + String fileMime = tika.detect(f); + String streamMime = ""; + try (InputStream is = Files.newInputStream(f.toPath())) { + streamMime = tika.detect(is); + } + System.out.println(f.getName() + " fileMime=" + fileMime + " stream=" + streamMime); + } + } + private void assertByName(String expected, Path p) throws Exception { Metadata metadata = new Metadata(); metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, p.getFileName().toString());
