This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 8a723436a TIKA-4553 -- rm TikaConfig from tika-example (#2435)
8a723436a is described below
commit 8a723436a12e66c401ced4ccb9f26dbfb9fabb0b
Author: Tim Allison <[email protected]>
AuthorDate: Tue Dec 9 17:08:06 2025 -0500
TIKA-4553 -- rm TikaConfig from tika-example (#2435)
---
...mple.java => DumpTikaConfigExample.java.disabled} | 0
.../apache/tika/example/ExtractEmbeddedFiles.java | 18 +++++++++++-------
.../java/org/apache/tika/example/MyFirstTika.java | 20 ++++++++++----------
.../tika/example/TranscribeTranslateExample.java | 7 ++++---
.../tika/example/DumpTikaConfigExampleTest.java | 4 +++-
5 files changed, 28 insertions(+), 21 deletions(-)
diff --git
a/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java
b/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java.disabled
similarity index 100%
rename from
tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java
rename to
tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java.disabled
diff --git
a/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java
b/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java
index 171e8ec49..46966337f 100644
---
a/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java
+++
b/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java
@@ -26,8 +26,9 @@ import org.apache.commons.io.FilenameUtils;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
-import org.apache.tika.config.TikaConfig;
+import org.apache.tika.config.loader.TikaLoader;
import org.apache.tika.detect.Detector;
+import org.apache.tika.exception.TikaConfigException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
@@ -36,15 +37,18 @@ import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MimeTypeException;
-import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
public class ExtractEmbeddedFiles {
- private Parser parser = new AutoDetectParser();
- private Detector detector = ((AutoDetectParser) parser).getDetector();
- private TikaConfig config = TikaConfig.getDefaultConfig();
+
+ private TikaLoader tikaLoader = TikaLoader.loadDefault();
+ private Parser parser = tikaLoader.loadAutoDetectParser();
+ private Detector detector = tikaLoader.loadDetectors();
+
+ public ExtractEmbeddedFiles() throws TikaConfigException, IOException {
+ }
public void extract(InputStream is, Path outputDir) throws SAXException,
TikaException, IOException {
Metadata m = new Metadata();
@@ -97,8 +101,8 @@ public class ExtractEmbeddedFiles {
if (name.indexOf('.') == -1 && contentType != null) {
try {
- name += config
- .getMimeRepository()
+ name += tikaLoader
+ .getMimeTypes()
.forName(contentType.toString())
.getExtension();
} catch (MimeTypeException e) {
diff --git
a/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java
b/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java
index 99336dc1f..05c9fa2ce 100755
--- a/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java
+++ b/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java
@@ -25,7 +25,7 @@ import java.nio.file.Paths;
import org.apache.commons.io.FileUtils;
import org.xml.sax.ContentHandler;
-import org.apache.tika.config.TikaConfig;
+import org.apache.tika.config.loader.TikaLoader;
import org.apache.tika.detect.Detector;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.langdetect.optimaize.OptimaizeLangDetector;
@@ -50,10 +50,10 @@ import org.apache.tika.sax.BodyContentHandler;
public class MyFirstTika {
public static void main(String[] args) throws Exception {
String filename = args[0];
- TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
+ TikaLoader tikaLoader = TikaLoader.loadDefault();
Metadata metadata = new Metadata();
- String text = parseUsingComponents(filename, tikaConfig, metadata);
+ String text = parseUsingComponents(filename, tikaLoader, metadata);
System.out.println("Parsed Metadata: ");
System.out.println(metadata);
System.out.println("Parsed Text: ");
@@ -62,25 +62,25 @@ public class MyFirstTika {
System.out.println("-------------------------");
metadata = new Metadata();
- text = parseUsingAutoDetect(filename, tikaConfig, metadata);
+ text = parseUsingAutoDetect(filename, tikaLoader, metadata);
System.out.println("Parsed Metadata: ");
System.out.println(metadata);
System.out.println("Parsed Text: ");
System.out.println(text);
}
- public static String parseUsingAutoDetect(String filename, TikaConfig
tikaConfig, Metadata metadata) throws Exception {
+ public static String parseUsingAutoDetect(String filename, TikaLoader
tikaLoader, Metadata metadata) throws Exception {
System.out.println("Handling using AutoDetectParser: [" + filename +
"]");
- AutoDetectParser parser = new AutoDetectParser(tikaConfig);
+ Parser parser = tikaLoader.loadAutoDetectParser();
ContentHandler handler = new BodyContentHandler();
TikaInputStream stream = TikaInputStream.get(Paths.get(filename),
metadata);
parser.parse(stream, handler, metadata, new ParseContext());
return handler.toString();
}
- public static String parseUsingComponents(String filename, TikaConfig
tikaConfig, Metadata metadata) throws Exception {
- MimeTypes mimeRegistry = tikaConfig.getMimeRepository();
+ public static String parseUsingComponents(String filename, TikaLoader
tikaLoader, Metadata metadata) throws Exception {
+ MimeTypes mimeRegistry = tikaLoader.getMimeTypes();
System.out.println("Examining: [" + filename + "]");
@@ -91,7 +91,7 @@ public class MyFirstTika {
System.out.println("The MIME type (based on MAGIC) is: [" +
mimeRegistry.detect(stream, metadata) + "]");
stream = TikaInputStream.get(Paths.get(filename));
- Detector detector = tikaConfig.getDetector();
+ Detector detector = tikaLoader.loadDetectors();
System.out.println("The MIME type (based on the Detector interface)
is: [" + detector.detect(stream, metadata) + "]");
LanguageDetector langDetector = new
OptimaizeLangDetector().loadModels();
@@ -100,7 +100,7 @@ public class MyFirstTika {
System.out.println("The language of this content is: [" +
lang.getLanguage() + "]");
// Get a non-detecting parser that handles all the types it can
- Parser parser = tikaConfig.getParser();
+ Parser parser = tikaLoader.loadParsers();
// Tell it what we think the content is
MediaType type = detector.detect(stream, metadata);
metadata.set(Metadata.CONTENT_TYPE, type.toString());
diff --git
a/tika-example/src/main/java/org/apache/tika/example/TranscribeTranslateExample.java
b/tika-example/src/main/java/org/apache/tika/example/TranscribeTranslateExample.java
index 24f24ac8b..4a6233fa0 100644
---
a/tika-example/src/main/java/org/apache/tika/example/TranscribeTranslateExample.java
+++
b/tika-example/src/main/java/org/apache/tika/example/TranscribeTranslateExample.java
@@ -20,7 +20,7 @@ import java.nio.file.Path;
import java.nio.file.Paths;
import org.apache.tika.Tika;
-import org.apache.tika.config.TikaConfig;
+import org.apache.tika.config.loader.TikaLoader;
import org.apache.tika.language.translate.Translator;
import org.apache.tika.language.translate.impl.GoogleTranslator;
@@ -69,8 +69,9 @@ public class TranscribeTranslateExample {
* @param file the name of the file (which needs to be on the Java
Classpath) to transcribe.
* @return transcribed text.
*/
- public static String amazonTranscribe(Path tikaConfig, Path file) throws
Exception {
- return new Tika(new TikaConfig(tikaConfig)).parseToString(file);
+ public static String amazonTranscribe(Path tikaConfigPath, Path file)
throws Exception {
+ TikaLoader tikaLoader = TikaLoader.load(tikaConfigPath);
+ return new Tika(tikaLoader.loadDetectors(),
tikaLoader.loadAutoDetectParser()).parseToString(file);
}
/**
diff --git
a/tika-example/src/test/java/org/apache/tika/example/DumpTikaConfigExampleTest.java
b/tika-example/src/test/java/org/apache/tika/example/DumpTikaConfigExampleTest.java
index 8e41ae49f..329a0a260 100644
---
a/tika-example/src/test/java/org/apache/tika/example/DumpTikaConfigExampleTest.java
+++
b/tika-example/src/test/java/org/apache/tika/example/DumpTikaConfigExampleTest.java
@@ -31,6 +31,7 @@ import java.nio.file.Files;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.apache.tika.config.TikaConfig;
@@ -40,6 +41,7 @@ import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.Parser;
+@Disabled
public class DumpTikaConfigExampleTest {
private File configFile;
@@ -66,7 +68,7 @@ public class DumpTikaConfigExampleTest {
@Test
public void testDump() throws Exception {
- DumpTikaConfigExample ex = new DumpTikaConfigExample();
+ //DumpTikaConfigExample ex = new DumpTikaConfigExample();
for (Charset charset : new Charset[]{UTF_8, UTF_16LE}) {
for (TikaConfigSerializer.Mode mode :
TikaConfigSerializer.Mode.values()) {
Writer writer = new OutputStreamWriter(new
FileOutputStream(configFile), charset);