This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 063cc8e8b TIKA-4553 -- rm TikaConfig from tika-detectors and
tika-eval-core (#2434)
063cc8e8b is described below
commit 063cc8e8b813e68d5f726a957d3793a19a28e1f2
Author: Tim Allison <[email protected]>
AuthorDate: Tue Dec 9 14:40:40 2025 -0500
TIKA-4553 -- rm TikaConfig from tika-detectors and tika-eval-core (#2434)
---
tika-detectors/tika-detector-magika/pom.xml | 27 +++++++++
.../apache/tika/detect/magika/MagikaDetector.java | 62 +++++++++++++++------
.../tika/detect/magika/TestMagikaIntegration.java | 7 +--
.../src/test/resources/configs/tika-config.json | 12 ++++
.../src/test/resources/configs/tika-config.xml | 28 ----------
tika-detectors/tika-detector-siegfried/pom.xml | 21 +++++++
.../tika/detect/siegfried/SiegfriedDetector.java | 62 +++++++++++++++------
.../detect/siegfried/TestSiegfriedIntegration.java | 7 +--
.../src/test/resources/configs/tika-config.json | 12 ++++
.../src/test/resources/configs/tika-config.xml | 27 ---------
.../apache/tika/eval/core/util/MimeUtilTest.java | 64 ----------------------
11 files changed, 168 insertions(+), 161 deletions(-)
diff --git a/tika-detectors/tika-detector-magika/pom.xml
b/tika-detectors/tika-detector-magika/pom.xml
index 789a06d91..69fc4e1a6 100644
--- a/tika-detectors/tika-detector-magika/pom.xml
+++ b/tika-detectors/tika-detector-magika/pom.xml
@@ -39,6 +39,20 @@
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>
+ <!-- Annotation processor - contains @TikaComponent and ensures build
order.
+ "provided" because it is only used at compile time -->
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-annotation-processor</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-serialization</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
@@ -68,6 +82,19 @@
</excludes>
</configuration>
</plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <configuration>
+ <annotationProcessorPaths>
+ <path>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-annotation-processor</artifactId>
+ <version>${project.version}</version>
+ </path>
+ </annotationProcessorPaths>
+ </configuration>
+ </plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
diff --git
a/tika-detectors/tika-detector-magika/src/main/java/org/apache/tika/detect/magika/MagikaDetector.java
b/tika-detectors/tika-detector-magika/src/main/java/org/apache/tika/detect/magika/MagikaDetector.java
index a96ad20a4..4ef38ce73 100644
---
a/tika-detectors/tika-detector-magika/src/main/java/org/apache/tika/detect/magika/MagikaDetector.java
+++
b/tika-detectors/tika-detector-magika/src/main/java/org/apache/tika/detect/magika/MagikaDetector.java
@@ -31,7 +31,10 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.tika.config.ConfigDeserializer;
import org.apache.tika.config.Field;
+import org.apache.tika.config.JsonConfig;
+import org.apache.tika.config.TikaComponent;
import org.apache.tika.detect.Detector;
import org.apache.tika.io.BoundedInputStream;
import org.apache.tika.io.TemporaryResources;
@@ -50,6 +53,7 @@ import org.apache.tika.utils.StringUtils;
* The default behavior is to run detection, report the results in the
* metadata and then return null so that other detectors will be used.
*/
+@TikaComponent
public class MagikaDetector implements Detector {
enum STATUS {
@@ -90,11 +94,35 @@ public class MagikaDetector implements Detector {
private static ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static boolean HAS_WARNED = false;
private Boolean hasMagika = null;
- private String magikaPath = DEFAULT_MAGIKA_PATH;
- private int maxBytes = 1_000_000;
- private long timeoutMs = DEFAULT_TIMEOUT_MS;
- private boolean useMime = false;
+ /**
+ * Configuration class for JSON deserialization.
+ */
+ public static class Config {
+ public String magikaPath = DEFAULT_MAGIKA_PATH;
+ public int maxBytes = 1_000_000;
+ public long timeoutMs = DEFAULT_TIMEOUT_MS;
+ public boolean useMime = false;
+ }
+
+ private final Config config;
+
+ /**
+ * Default constructor.
+ */
+ public MagikaDetector() {
+ this.config = new Config();
+ }
+
+ /**
+ * Constructor for JSON configuration.
+ * Requires tika-serialization on the classpath.
+ *
+ * @param jsonConfig JSON configuration
+ */
+ public MagikaDetector(JsonConfig jsonConfig) {
+ this.config = ConfigDeserializer.buildConfig(jsonConfig, Config.class);
+ }
public static boolean checkHasMagika(String magikaCommandPath) {
String[] commandline = new String[]{magikaCommandPath, "--version"};
@@ -136,11 +164,11 @@ public class MagikaDetector implements Detector {
@Override
public MediaType detect(InputStream input, Metadata metadata) throws
IOException {
if (hasMagika == null) {
- hasMagika = checkHasMagika(this.magikaPath);
+ hasMagika = checkHasMagika(this.config.magikaPath);
}
if (!hasMagika) {
if (!HAS_WARNED) {
- LOGGER.warn("'magika' command isn't working: '" + magikaPath +
"'");
+ LOGGER.warn("'magika' command isn't working: '" +
config.magikaPath + "'");
HAS_WARNED = true;
}
return MediaType.OCTET_STREAM;
@@ -152,10 +180,10 @@ public class MagikaDetector implements Detector {
return detectOnPath(tis.getPath(), metadata);
}
- input.mark(maxBytes);
+ input.mark(config.maxBytes);
try (TemporaryResources tmp = new TemporaryResources()) {
Path tmpFile = tmp.createTempFile();
- Files.copy(new BoundedInputStream(maxBytes, input), tmpFile,
REPLACE_EXISTING);
+ Files.copy(new BoundedInputStream(config.maxBytes, input),
tmpFile, REPLACE_EXISTING);
return detectOnPath(tmpFile, metadata);
} finally {
input.reset();
@@ -174,23 +202,23 @@ public class MagikaDetector implements Detector {
*/
@Field
public void setUseMime(boolean useMime) {
- this.useMime = useMime;
+ this.config.useMime = useMime;
}
public boolean isUseMime() {
- return useMime;
+ return config.useMime;
}
private MediaType detectOnPath(Path path, Metadata metadata) throws
IOException {
String[] args = new String[]{
- ProcessUtils.escapeCommandLine(magikaPath),
+ ProcessUtils.escapeCommandLine(config.magikaPath),
ProcessUtils.escapeCommandLine(path.toAbsolutePath().toString()),
"--json"
};
ProcessBuilder builder = new ProcessBuilder(args);
- FileProcessResult result = ProcessUtils.execute(builder, timeoutMs,
10000000, 1000);
- return processResult(result, metadata, useMime);
+ FileProcessResult result = ProcessUtils.execute(builder,
config.timeoutMs, 10000000, 1000);
+ return processResult(result, metadata, config.useMime);
}
protected static MediaType processResult(FileProcessResult result,
Metadata metadata,
@@ -331,8 +359,8 @@ public class MagikaDetector implements Detector {
public void setMagikaPath(String fileCommandPath) {
//this opens up a potential command vulnerability.
//Don't ever let an untrusted user set this.
- this.magikaPath = fileCommandPath;
- checkHasMagika(this.magikaPath);
+ this.config.magikaPath = fileCommandPath;
+ checkHasMagika(this.config.magikaPath);
}
/**
@@ -344,11 +372,11 @@ public class MagikaDetector implements Detector {
*/
@Field
public void setMaxBytes(int maxBytes) {
- this.maxBytes = maxBytes;
+ this.config.maxBytes = maxBytes;
}
@Field
public void setTimeoutMs(long timeoutMs) {
- this.timeoutMs = timeoutMs;
+ this.config.timeoutMs = timeoutMs;
}
}
diff --git
a/tika-detectors/tika-detector-magika/src/test/java/org/apache/tika/detect/magika/TestMagikaIntegration.java
b/tika-detectors/tika-detector-magika/src/test/java/org/apache/tika/detect/magika/TestMagikaIntegration.java
index 9b190d77c..7ead58d91 100644
---
a/tika-detectors/tika-detector-magika/src/test/java/org/apache/tika/detect/magika/TestMagikaIntegration.java
+++
b/tika-detectors/tika-detector-magika/src/test/java/org/apache/tika/detect/magika/TestMagikaIntegration.java
@@ -27,9 +27,8 @@ import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.apache.tika.TikaTest;
-import org.apache.tika.config.TikaConfig;
+import org.apache.tika.config.loader.TikaLoader;
import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.Parser;
@Disabled("need to have magika on the path")
@@ -37,8 +36,8 @@ public class TestMagikaIntegration extends TikaTest {
@Test
public void testIntegration() throws Exception {
- TikaConfig tikaConfig = new TikaConfig(getConfig("tika-config.xml"));
- Parser p = new AutoDetectParser(tikaConfig);
+ TikaLoader tikaLoader = TikaLoader.load(getConfig("tika-config.json"));
+ Parser p = tikaLoader.loadAutoDetectParser();
List<Metadata> metadataList = getRecursiveMetadata("testPDF.pdf", p);
debug(getRecursiveMetadata("testPDF.pdf", p));
Metadata m = metadataList.get(0);
diff --git
a/tika-detectors/tika-detector-magika/src/test/resources/configs/tika-config.json
b/tika-detectors/tika-detector-magika/src/test/resources/configs/tika-config.json
new file mode 100644
index 000000000..a97581cd3
--- /dev/null
+++
b/tika-detectors/tika-detector-magika/src/test/resources/configs/tika-config.json
@@ -0,0 +1,12 @@
+{
+ "detectors": [
+ {
+ "default-detector": {}
+ },
+ {
+ "magika-detector": {
+ "useMime": true
+ }
+ }
+ ]
+}
diff --git
a/tika-detectors/tika-detector-magika/src/test/resources/configs/tika-config.xml
b/tika-detectors/tika-detector-magika/src/test/resources/configs/tika-config.xml
deleted file mode 100644
index 294f25290..000000000
---
a/tika-detectors/tika-detector-magika/src/test/resources/configs/tika-config.xml
+++ /dev/null
@@ -1,28 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<properties>
- <detectors>
- <detector class="org.apache.tika.detect.DefaultDetector"/>
- <detector class="org.apache.tika.detect.magika.MagikaDetector">
- <params>
- <param name="magikaPath"
type="string">/home/tallison/.cargo/bin/magika</param> <!-- or replace with
full path to the commandline -->
- <param name="useMime" type="bool">true</param>
- </params>
- </detector>
- </detectors>
-</properties>
diff --git a/tika-detectors/tika-detector-siegfried/pom.xml
b/tika-detectors/tika-detector-siegfried/pom.xml
index 0f15f86c2..75a723ce2 100644
--- a/tika-detectors/tika-detector-siegfried/pom.xml
+++ b/tika-detectors/tika-detector-siegfried/pom.xml
@@ -39,6 +39,14 @@
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>
+ <!-- Annotation processor - contains @TikaComponent and ensures build
order.
+ "provided" because it is only used at compile time -->
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-annotation-processor</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
<!-- test dependencies -->
<dependency>
<groupId>${project.groupId}</groupId>
@@ -75,6 +83,19 @@
</excludes>
</configuration>
</plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <configuration>
+ <annotationProcessorPaths>
+ <path>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-annotation-processor</artifactId>
+ <version>${project.version}</version>
+ </path>
+ </annotationProcessorPaths>
+ </configuration>
+ </plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
diff --git
a/tika-detectors/tika-detector-siegfried/src/main/java/org/apache/tika/detect/siegfried/SiegfriedDetector.java
b/tika-detectors/tika-detector-siegfried/src/main/java/org/apache/tika/detect/siegfried/SiegfriedDetector.java
index 7629a066a..e1b94ef27 100644
---
a/tika-detectors/tika-detector-siegfried/src/main/java/org/apache/tika/detect/siegfried/SiegfriedDetector.java
+++
b/tika-detectors/tika-detector-siegfried/src/main/java/org/apache/tika/detect/siegfried/SiegfriedDetector.java
@@ -29,7 +29,10 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.tika.config.ConfigDeserializer;
import org.apache.tika.config.Field;
+import org.apache.tika.config.JsonConfig;
+import org.apache.tika.config.TikaComponent;
import org.apache.tika.detect.Detector;
import org.apache.tika.io.BoundedInputStream;
import org.apache.tika.io.TemporaryResources;
@@ -48,6 +51,7 @@ import org.apache.tika.utils.StringUtils;
* The default behavior is to run detection, report the results in the
* metadata and then return null so that other detectors will be used.
*/
+@TikaComponent
public class SiegfriedDetector implements Detector {
enum STATUS {
@@ -90,11 +94,35 @@ public class SiegfriedDetector implements Detector {
private static ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static boolean HAS_WARNED = false;
private Boolean hasSiegfriedCommand = null;
- private String siegfriedPath = DEFAULT_SIEGFRIED_PATH;
- private int maxBytes = 1_000_000;
- private long timeoutMs = DEFAULT_TIMEOUT_MS;
- private boolean useMime = false;
+ /**
+ * Configuration class for JSON deserialization.
+ */
+ public static class Config {
+ public String siegfriedPath = DEFAULT_SIEGFRIED_PATH;
+ public int maxBytes = 1_000_000;
+ public long timeoutMs = DEFAULT_TIMEOUT_MS;
+ public boolean useMime = false;
+ }
+
+ private final Config config;
+
+ /**
+ * Default constructor.
+ */
+ public SiegfriedDetector() {
+ this.config = new Config();
+ }
+
+ /**
+ * Constructor for JSON configuration.
+ * Requires tika-serialization on the classpath.
+ *
+ * @param jsonConfig JSON configuration
+ */
+ public SiegfriedDetector(JsonConfig jsonConfig) {
+ this.config = ConfigDeserializer.buildConfig(jsonConfig, Config.class);
+ }
public static boolean checkHasSiegfried(String siegfriedCommandPath) {
String[] commandline = new String[]{siegfriedCommandPath, "-version"};
@@ -110,11 +138,11 @@ public class SiegfriedDetector implements Detector {
@Override
public MediaType detect(InputStream input, Metadata metadata) throws
IOException {
if (hasSiegfriedCommand == null) {
- hasSiegfriedCommand = checkHasSiegfried(this.siegfriedPath);
+ hasSiegfriedCommand = checkHasSiegfried(this.config.siegfriedPath);
}
if (!hasSiegfriedCommand) {
if (!HAS_WARNED) {
- LOGGER.warn("'siegfried' command isn't working: '" +
siegfriedPath + "'");
+ LOGGER.warn("'siegfried' command isn't working: '" +
config.siegfriedPath + "'");
HAS_WARNED = true;
}
return MediaType.OCTET_STREAM;
@@ -126,10 +154,10 @@ public class SiegfriedDetector implements Detector {
return detectOnPath(tis.getPath(), metadata);
}
- input.mark(maxBytes);
+ input.mark(config.maxBytes);
try (TemporaryResources tmp = new TemporaryResources()) {
Path tmpFile = tmp.createTempFile();
- Files.copy(new BoundedInputStream(maxBytes, input), tmpFile,
REPLACE_EXISTING);
+ Files.copy(new BoundedInputStream(config.maxBytes, input),
tmpFile, REPLACE_EXISTING);
return detectOnPath(tmpFile, metadata);
} finally {
input.reset();
@@ -148,20 +176,20 @@ public class SiegfriedDetector implements Detector {
*/
@Field
public void setUseMime(boolean useMime) {
- this.useMime = useMime;
+ this.config.useMime = useMime;
}
public boolean isUseMime() {
- return useMime;
+ return config.useMime;
}
private MediaType detectOnPath(Path path, Metadata metadata) throws
IOException {
- String[] args = new
String[]{ProcessUtils.escapeCommandLine(siegfriedPath), "-json",
+ String[] args = new
String[]{ProcessUtils.escapeCommandLine(config.siegfriedPath), "-json",
ProcessUtils.escapeCommandLine(path.toAbsolutePath().toString())};
ProcessBuilder builder = new ProcessBuilder(args);
- FileProcessResult result = ProcessUtils.execute(builder, timeoutMs,
1000000, 1000);
- return processResult(result, metadata, useMime);
+ FileProcessResult result = ProcessUtils.execute(builder,
config.timeoutMs, 1000000, 1000);
+ return processResult(result, metadata, config.useMime);
}
protected static MediaType processResult(FileProcessResult result,
Metadata metadata,
@@ -261,8 +289,8 @@ public class SiegfriedDetector implements Detector {
public void setSiegfriedPath(String fileCommandPath) {
//this opens up a potential command vulnerability.
//Don't ever let an untrusted user set this.
- this.siegfriedPath = fileCommandPath;
- checkHasSiegfried(this.siegfriedPath);
+ this.config.siegfriedPath = fileCommandPath;
+ checkHasSiegfried(this.config.siegfriedPath);
}
/**
@@ -274,11 +302,11 @@ public class SiegfriedDetector implements Detector {
*/
@Field
public void setMaxBytes(int maxBytes) {
- this.maxBytes = maxBytes;
+ this.config.maxBytes = maxBytes;
}
@Field
public void setTimeoutMs(long timeoutMs) {
- this.timeoutMs = timeoutMs;
+ this.config.timeoutMs = timeoutMs;
}
}
diff --git
a/tika-detectors/tika-detector-siegfried/src/test/java/org/apache/tika/detect/siegfried/TestSiegfriedIntegration.java
b/tika-detectors/tika-detector-siegfried/src/test/java/org/apache/tika/detect/siegfried/TestSiegfriedIntegration.java
index e96eff0fa..cbdaabfc6 100644
---
a/tika-detectors/tika-detector-siegfried/src/test/java/org/apache/tika/detect/siegfried/TestSiegfriedIntegration.java
+++
b/tika-detectors/tika-detector-siegfried/src/test/java/org/apache/tika/detect/siegfried/TestSiegfriedIntegration.java
@@ -24,8 +24,7 @@ import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.apache.tika.TikaTest;
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.config.loader.TikaLoader;
import org.apache.tika.parser.Parser;
@Disabled("need to have siegfried on the path")
@@ -33,8 +32,8 @@ public class TestSiegfriedIntegration extends TikaTest {
@Test
public void testIntegration() throws Exception {
- TikaConfig tikaConfig = new TikaConfig(getConfig("tika-config.xml"));
- Parser p = new AutoDetectParser(tikaConfig);
+ TikaLoader tikaLoader = TikaLoader.load(getConfig("tika-config.json"));
+ Parser p = tikaLoader.loadAutoDetectParser();
debug(getRecursiveMetadata("testPDF.pdf", p));
}
diff --git
a/tika-detectors/tika-detector-siegfried/src/test/resources/configs/tika-config.json
b/tika-detectors/tika-detector-siegfried/src/test/resources/configs/tika-config.json
new file mode 100644
index 000000000..27d934cb8
--- /dev/null
+++
b/tika-detectors/tika-detector-siegfried/src/test/resources/configs/tika-config.json
@@ -0,0 +1,12 @@
+{
+ "detectors": [
+ {
+ "default-detector": {}
+ },
+ {
+ "siegfried-detector": {
+ "siegfriedPath": ""
+ }
+ }
+ ]
+}
diff --git
a/tika-detectors/tika-detector-siegfried/src/test/resources/configs/tika-config.xml
b/tika-detectors/tika-detector-siegfried/src/test/resources/configs/tika-config.xml
deleted file mode 100644
index 0f0b6bc4d..000000000
---
a/tika-detectors/tika-detector-siegfried/src/test/resources/configs/tika-config.xml
+++ /dev/null
@@ -1,27 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<properties>
- <detectors>
- <detector class="org.apache.tika.detect.DefaultDetector"/>
- <detector class="org.apache.tika.detect.siegfried.SiegfriedDetector">
- <params>
- <param name="siegfriedPath" type="string"><!-- put path here
--></param>
- </params>
- </detector>
- </detectors>
-</properties>
diff --git
a/tika-eval/tika-eval-core/src/test/java/org/apache/tika/eval/core/util/MimeUtilTest.java
b/tika-eval/tika-eval-core/src/test/java/org/apache/tika/eval/core/util/MimeUtilTest.java
deleted file mode 100644
index 7726eb6e8..000000000
---
a/tika-eval/tika-eval-core/src/test/java/org/apache/tika/eval/core/util/MimeUtilTest.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.eval.core.util;
-
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-import org.junit.jupiter.api.Disabled;
-import org.junit.jupiter.api.Test;
-
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.mime.MimeType;
-import org.apache.tika.mime.MimeTypeException;
-import org.apache.tika.mime.MimeTypes;
-
-@Disabled("Fix mimetype.getExtension to work with these and then we can get
rid of MimeUtil")
-public class MimeUtilTest {
-
- private final TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
-
- @Test
- public void testBasic() throws Exception {
- assertResult("application/pdf", ".pdf");
- assertResult("APPLICATION/PDF", ".pdf");
- assertResult("text/plain; charset=ISO-8859-1", ".txt");
- assertResult("application/xhtml+xml; charset=UTF-8\n", ".html");
- assertResult("application/xml; charset=UTF-8\n", ".xml");
-
- assertException("bogosity", "xml");
- }
-
- private void assertException(String contentType, String expected) {
- boolean ex = false;
- try {
- assertResult(contentType, expected);
- } catch (MimeTypeException e) {
- ex = true;
- }
- assertTrue(ex, "Should have had exception for: " + contentType);
- }
-
- private void assertResult(String contentType, String expected) throws
MimeTypeException {
- MimeTypes r = tikaConfig.getMimeRepository();
- MimeType mt = r.forName(contentType);
-
-// String ext = MimeUtil.getExtension(contentType, config);
- assertEquals(expected, mt.getExtension());
- }
-}