This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 488867f54 TIKA-4545 -- add translators and refactor loaders (#2420)
488867f54 is described below
commit 488867f5412e6b3c4d92bb1361752550edcfda14
Author: Tim Allison <[email protected]>
AuthorDate: Wed Dec 3 16:32:32 2025 -0500
TIKA-4545 -- add translators and refactor loaders (#2420)
* TIKA-4545 -- add translators and refactor loaders
---
.../tika/language/translate/EmptyTranslator.java | 3 +
.../tika/pipes/core/emitter/EmitterManager.java | 3 +-
.../tika/pipes/core/fetcher/FetcherManager.java | 3 +-
.../core/pipesiterator/PipesIteratorManager.java | 3 +-
.../tika/pipes/core/reporter/ReporterManager.java | 3 +-
.../java/org/apache/tika/plugins/TikaConfigs.java | 16 ---
.../org/apache/tika/plugins/TikaPluginManager.java | 14 ++-
.../org/apache/tika/plugins/TikaConfigsTest.java | 4 +-
.../tika/config/loader/ComponentInstantiator.java | 116 +++++++++++++++++++++
.../config/loader/CompositeComponentLoader.java | 55 +---------
.../apache/tika/config/loader/DetectorLoader.java | 60 +----------
.../tika/config/loader/EncodingDetectorLoader.java | 60 +----------
.../apache/tika/config/loader/ParserLoader.java | 30 +-----
.../org/apache/tika/config/loader/TikaLoader.java | 19 ++++
.../tika/config/loader/TranslatorLoader.java | 108 +++++++++++++++++++
.../apache/tika/config/loader/TikaLoaderTest.java | 43 ++++++++
.../resources/configs/test-translator-config.json | 5 +
.../tika/server/core/TikaResourceFetcherTest.java | 1 +
.../tika/server/core/TranslateResourceTest.java | 2 -
tika-translate/pom.xml | 24 +++++
.../language/translate/impl/CachedTranslator.java | 2 +
.../language/translate/impl/GoogleTranslator.java | 2 +
.../translate/impl/JoshuaNetworkTranslator.java | 2 +
.../language/translate/impl/Lingo24Translator.java | 2 +
.../language/translate/impl/MarianTranslator.java | 2 +
.../translate/impl/MicrosoftTranslator.java | 2 +
.../language/translate/impl/MosesTranslator.java | 2 +
.../language/translate/impl/RTGTranslator.java | 2 +
.../language/translate/impl/YandexTranslator.java | 2 +
.../org.apache.tika.language.translate.Translator | 22 ----
30 files changed, 366 insertions(+), 246 deletions(-)
diff --git
a/tika-core/src/main/java/org/apache/tika/language/translate/EmptyTranslator.java
b/tika-core/src/main/java/org/apache/tika/language/translate/EmptyTranslator.java
index 9324af224..35c75b57e 100644
---
a/tika-core/src/main/java/org/apache/tika/language/translate/EmptyTranslator.java
+++
b/tika-core/src/main/java/org/apache/tika/language/translate/EmptyTranslator.java
@@ -16,11 +16,14 @@
*/
package org.apache.tika.language.translate;
+import org.apache.tika.config.TikaComponent;
+
/**
* Dummy translator that always declines to give any text. Useful as a
* sentinel translator for when none others are available.
* for unknown document types.
*/
+@TikaComponent
public class EmptyTranslator implements Translator {
public String translate(String text, String sourceLanguage, String
targetLanguage) {
return null;
diff --git
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/emitter/EmitterManager.java
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/emitter/EmitterManager.java
index af0abce6d..a99cde81c 100644
---
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/emitter/EmitterManager.java
+++
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/emitter/EmitterManager.java
@@ -46,7 +46,8 @@ public class EmitterManager {
private final Map<String, Emitter> emitterMap = new ConcurrentHashMap<>();
public static EmitterManager load(PluginManager pluginManager, TikaConfigs
tikaConfigs) throws IOException, TikaConfigException {
- JsonNode fetchersNode = tikaConfigs.getRoot().get(CONFIG_KEY);
+ JsonNode fetchersNode = tikaConfigs.getTikaJsonConfig()
+ .getRootNode().get(CONFIG_KEY);
Map<String, Emitter> fetchers =
PluginComponentLoader.loadInstances(pluginManager,
EmitterFactory.class, fetchersNode);
return new EmitterManager(fetchers);
diff --git
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/fetcher/FetcherManager.java
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/fetcher/FetcherManager.java
index 8293f5830..a0c926d40 100644
---
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/fetcher/FetcherManager.java
+++
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/fetcher/FetcherManager.java
@@ -45,7 +45,8 @@ public class FetcherManager {
public static FetcherManager load(PluginManager pluginManager, TikaConfigs
tikaConfigs) throws TikaConfigException, IOException {
- JsonNode fetchersNode = tikaConfigs.getRoot().get(CONFIG_KEY);
+ JsonNode fetchersNode = tikaConfigs.getTikaJsonConfig()
+ .getRootNode().get(CONFIG_KEY);
Map<String, Fetcher> fetchers =
PluginComponentLoader.loadInstances(pluginManager,
FetcherFactory.class, fetchersNode);
return new FetcherManager(fetchers);
diff --git
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/pipesiterator/PipesIteratorManager.java
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/pipesiterator/PipesIteratorManager.java
index 2ea0e41e9..b80f24baf 100644
---
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/pipesiterator/PipesIteratorManager.java
+++
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/pipesiterator/PipesIteratorManager.java
@@ -39,7 +39,8 @@ public class PipesIteratorManager {
public static Optional<PipesIterator> load(PluginManager pluginManager,
TikaConfigs tikaConfigs) throws IOException, TikaConfigException {
- JsonNode node = tikaConfigs.getRoot().get(CONFIG_KEY);
+ JsonNode node = tikaConfigs.getTikaJsonConfig()
+ .getRootNode().get(CONFIG_KEY);
return PluginComponentLoader.loadSingleton(pluginManager,
PipesIteratorFactory.class, node);
}
diff --git
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/reporter/ReporterManager.java
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/reporter/ReporterManager.java
index e62b6842f..a539103c5 100644
---
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/reporter/ReporterManager.java
+++
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/reporter/ReporterManager.java
@@ -39,7 +39,8 @@ public class ReporterManager {
public static PipesReporter load(PluginManager pluginManager, TikaConfigs
tikaConfigs) throws IOException, TikaConfigException {
- JsonNode node = tikaConfigs.getRoot().get(CONFIG_KEY);
+ JsonNode node = tikaConfigs.getTikaJsonConfig()
+ .getRootNode().get(CONFIG_KEY);
List<PipesReporter> reporters =
PluginComponentLoader.loadUnnamedInstances(pluginManager,
PipesReporterFactory.class, node);
if (reporters.isEmpty()) {
diff --git
a/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaConfigs.java
b/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaConfigs.java
index acefb3640..d2a780b64 100644
--- a/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaConfigs.java
+++ b/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaConfigs.java
@@ -21,9 +21,7 @@ import java.nio.file.Path;
import java.util.Iterator;
import java.util.Set;
-import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.tika.config.loader.TikaJsonConfig;
import org.apache.tika.exception.TikaConfigException;
@@ -66,9 +64,6 @@ public class TikaConfigs {
"server"
);
- static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
- .configure(DeserializationFeature.ACCEPT_SINGLE_VALUE_AS_ARRAY,
true);
-
private final TikaJsonConfig tikaJsonConfig;
/**
@@ -113,17 +108,6 @@ public class TikaConfigs {
return tikaJsonConfig;
}
- /**
- * Gets the root JSON node.
- * Deprecated - use {@link #getTikaJsonConfig()} instead.
- *
- * @return the root JSON node
- */
- @Deprecated
- public JsonNode getRoot() {
- return tikaJsonConfig.getRootNode();
- }
-
/**
* Deserializes a configuration value for the given key.
*
diff --git
a/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaPluginManager.java
b/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaPluginManager.java
index df23e078c..ad560558b 100644
---
a/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaPluginManager.java
+++
b/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaPluginManager.java
@@ -23,7 +23,9 @@ import java.nio.file.Path;
import java.util.List;
import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
import org.pf4j.DefaultExtensionFinder;
import org.pf4j.DefaultPluginManager;
import org.pf4j.ExtensionFinder;
@@ -43,6 +45,14 @@ public class TikaPluginManager extends DefaultPluginManager {
private static final Logger LOG =
LoggerFactory.getLogger(TikaPluginManager.class);
+ //we're only using this to convert a single path or a list of paths to a
list
+ //we don't need all the functionality of the polymorphic objectmapper in
tika-serialization
+ private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+ static {
+
OBJECT_MAPPER.configure(DeserializationFeature.ACCEPT_SINGLE_VALUE_AS_ARRAY,
true);
+ }
+
/**
* Loads plugin manager from a pre-parsed TikaJsonConfig.
* This is the preferred method when sharing configuration across
@@ -83,12 +93,12 @@ public class TikaPluginManager extends DefaultPluginManager
{
*/
public static TikaPluginManager load(TikaConfigs tikaConfigs)
throws TikaConfigException, IOException {
- JsonNode root = tikaConfigs.getRoot();
+ JsonNode root = tikaConfigs.getTikaJsonConfig().getRootNode();
JsonNode pluginRoots = root.get("plugin-roots");
if (pluginRoots == null) {
throw new TikaConfigException("plugin-roots must be specified");
}
- List<Path> roots = TikaConfigs.OBJECT_MAPPER.convertValue(pluginRoots,
+ List<Path> roots = OBJECT_MAPPER.convertValue(pluginRoots,
new TypeReference<List<Path>>() {});
if (roots.isEmpty()) {
throw new TikaConfigException("plugin-roots must not be empty");
diff --git
a/tika-plugins-core/src/test/java/org/apache/tika/plugins/TikaConfigsTest.java
b/tika-plugins-core/src/test/java/org/apache/tika/plugins/TikaConfigsTest.java
index 3ecafc018..207765d26 100644
---
a/tika-plugins-core/src/test/java/org/apache/tika/plugins/TikaConfigsTest.java
+++
b/tika-plugins-core/src/test/java/org/apache/tika/plugins/TikaConfigsTest.java
@@ -137,8 +137,8 @@ public class TikaConfigsTest {
""";
TikaConfigs configs = loadFromString(json);
- assertNotNull(configs.getRoot());
- assertNotNull(configs.getRoot().get("fetchers"));
+ assertNotNull(configs.getTikaJsonConfig().getRootNode());
+
assertNotNull(configs.getTikaJsonConfig().getRootNode().get("fetchers"));
}
private TikaConfigs loadFromString(String json) throws Exception {
diff --git
a/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentInstantiator.java
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentInstantiator.java
new file mode 100644
index 000000000..2f9a66e4c
--- /dev/null
+++
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentInstantiator.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config.loader;
+
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import org.apache.tika.config.JsonConfig;
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.utils.ServiceLoaderUtils;
+
+/**
+ * Utility class for instantiating Tika components from JSON configuration.
+ * Provides common logic for all component loaders to avoid code duplication.
+ */
+public class ComponentInstantiator {
+
+ /**
+ * Instantiates a component with JsonConfig constructor or falls back to
zero-arg constructor.
+ * <p>
+ * Instantiation strategy:
+ * <ol>
+ * <li>Try constructor with JsonConfig parameter</li>
+ * <li>If not found and JSON config has actual configuration, throw
error</li>
+ * <li>Otherwise fall back to zero-arg constructor via ServiceLoader</li>
+ * </ol>
+ *
+ * @param componentClass the component class to instantiate
+ * @param jsonConfig the JSON configuration for the component
+ * @param classLoader the class loader to use
+ * @param componentTypeName the component type name (e.g., "Detector",
"Parser") for error messages
+ * @param objectMapper the Jackson ObjectMapper for parsing JSON
+ * @param <T> the component type
+ * @return the instantiated component
+ * @throws TikaConfigException if instantiation fails
+ */
+ @SuppressWarnings("unchecked")
+ public static <T> T instantiate(Class<?> componentClass,
+ JsonConfig jsonConfig,
+ ClassLoader classLoader,
+ String componentTypeName,
+ ObjectMapper objectMapper)
+ throws TikaConfigException {
+ try {
+ T component;
+
+ // Try constructor with JsonConfig parameter
+ try {
+ Constructor<?> constructor =
componentClass.getConstructor(JsonConfig.class);
+ component = (T) constructor.newInstance(jsonConfig);
+ } catch (NoSuchMethodException e) {
+ // Check if JSON config has actual configuration
+ if (hasConfiguration(jsonConfig, objectMapper)) {
+ throw new TikaConfigException(
+ componentTypeName + " '" +
componentClass.getName() + "' has configuration in JSON, " +
+ "but does not have a constructor that accepts
JsonConfig. " +
+ "Please add a constructor: public " +
componentClass.getSimpleName() + "(JsonConfig jsonConfig)");
+ }
+ // Fall back to zero-arg constructor if no configuration
provided
+ component = (T) ServiceLoaderUtils.newInstance(componentClass,
+ new org.apache.tika.config.ServiceLoader(classLoader));
+ }
+
+ return component;
+ } catch (InstantiationException | IllegalAccessException |
InvocationTargetException e) {
+ throw new TikaConfigException("Failed to instantiate " +
componentTypeName + ": " +
+ componentClass.getName(), e);
+ }
+ }
+
+ /**
+ * Checks if the JsonConfig contains actual configuration (non-empty JSON
object with fields).
+ *
+ * @param jsonConfig the JSON configuration
+ * @param objectMapper the Jackson ObjectMapper for parsing JSON
+ * @return true if there's meaningful configuration, false if empty or
just "{}"
+ */
+ public static boolean hasConfiguration(JsonConfig jsonConfig, ObjectMapper
objectMapper) {
+ if (jsonConfig == null) {
+ return false;
+ }
+ String json = jsonConfig.json();
+ if (json == null || json.trim().isEmpty()) {
+ return false;
+ }
+ // Parse to check if it's an empty object or has actual fields
+ try {
+ JsonNode node = objectMapper.readTree(json);
+ // Check if it's an object and has at least one field
+ if (node.isObject() && node.size() > 0) {
+ return true;
+ }
+ return false;
+ } catch (Exception e) {
+ // If we can't parse it, assume it has configuration to be safe
+ return true;
+ }
+ }
+}
diff --git
a/tika-serialization/src/main/java/org/apache/tika/config/loader/CompositeComponentLoader.java
b/tika-serialization/src/main/java/org/apache/tika/config/loader/CompositeComponentLoader.java
index 7548f67a1..da5d5f59e 100644
---
a/tika-serialization/src/main/java/org/apache/tika/config/loader/CompositeComponentLoader.java
+++
b/tika-serialization/src/main/java/org/apache/tika/config/loader/CompositeComponentLoader.java
@@ -16,8 +16,6 @@
*/
package org.apache.tika.config.loader;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
@@ -32,7 +30,6 @@ import org.slf4j.LoggerFactory;
import org.apache.tika.config.JsonConfig;
import org.apache.tika.exception.TikaConfigException;
-import org.apache.tika.utils.ServiceLoaderUtils;
/**
* Generic loader for Tika components (detectors, encoding detectors, filters,
etc.).
@@ -172,58 +169,10 @@ public class CompositeComponentLoader<T> {
}
}
- @SuppressWarnings("unchecked")
private T instantiateComponent(Class<?> componentClass, JsonConfig
configJson)
throws TikaConfigException {
- try {
- // Try constructor with JsonConfig parameter
- try {
- Constructor<?> constructor =
componentClass.getConstructor(JsonConfig.class);
- return (T) constructor.newInstance(configJson);
- } catch (NoSuchMethodException e) {
- // Check if JSON config has actual configuration
- if (hasConfiguration(configJson)) {
- throw new TikaConfigException(
- "Component '" + componentClass.getName() + "' has
configuration in JSON, " +
- "but does not have a constructor that accepts
JsonConfig. " +
- "Please add a constructor: public " +
componentClass.getSimpleName() + "(JsonConfig jsonConfig)");
- }
- // Fall back to zero-arg constructor if no configuration
provided
- return (T) ServiceLoaderUtils.newInstance(componentClass,
- new org.apache.tika.config.ServiceLoader(classLoader));
- }
- } catch (InstantiationException | IllegalAccessException |
InvocationTargetException e) {
- throw new TikaConfigException("Failed to instantiate component: " +
- componentClass.getName(), e);
- }
- }
-
- /**
- * Checks if the JsonConfig contains actual configuration (non-empty JSON
object with fields).
- *
- * @param jsonConfig the JSON configuration
- * @return true if there's meaningful configuration, false if empty or
just "{}"
- */
- private boolean hasConfiguration(JsonConfig jsonConfig) {
- if (jsonConfig == null) {
- return false;
- }
- String json = jsonConfig.json();
- if (json == null || json.trim().isEmpty()) {
- return false;
- }
- // Parse to check if it's an empty object or has actual fields
- try {
- JsonNode node = objectMapper.readTree(json);
- // Check if it's an object and has at least one field
- if (node.isObject() && node.size() > 0) {
- return true;
- }
- return false;
- } catch (Exception e) {
- // If we can't parse it, assume it has configuration to be safe
- return true;
- }
+ return ComponentInstantiator.instantiate(componentClass, configJson,
classLoader,
+ componentTypeName, objectMapper);
}
private List<T> loadSpiComponents() {
diff --git
a/tika-serialization/src/main/java/org/apache/tika/config/loader/DetectorLoader.java
b/tika-serialization/src/main/java/org/apache/tika/config/loader/DetectorLoader.java
index 8c63a8867..f2146cfc1 100644
---
a/tika-serialization/src/main/java/org/apache/tika/config/loader/DetectorLoader.java
+++
b/tika-serialization/src/main/java/org/apache/tika/config/loader/DetectorLoader.java
@@ -16,8 +16,6 @@
*/
package org.apache.tika.config.loader;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@@ -36,7 +34,6 @@ import org.apache.tika.detect.CompositeDetector;
import org.apache.tika.detect.DefaultDetector;
import org.apache.tika.detect.Detector;
import org.apache.tika.exception.TikaConfigException;
-import org.apache.tika.utils.ServiceLoaderUtils;
/**
* Loader for detectors with support for SPI fallback via "default-detector"
marker.
@@ -190,63 +187,10 @@ public class DetectorLoader {
}
}
- @SuppressWarnings("unchecked")
private Detector instantiateDetector(Class<?> detectorClass, JsonConfig
jsonConfig)
throws TikaConfigException {
-
- try {
- Detector detector;
-
- // Try constructor with JsonConfig parameter
- try {
- Constructor<?> constructor =
detectorClass.getConstructor(JsonConfig.class);
- detector = (Detector) constructor.newInstance(jsonConfig);
- } catch (NoSuchMethodException e) {
- // Check if JSON config has actual configuration
- if (hasConfiguration(jsonConfig)) {
- throw new TikaConfigException(
- "Detector '" + detectorClass.getName() + "' has
configuration in JSON, " +
- "but does not have a constructor that accepts
JsonConfig. " +
- "Please add a constructor: public " +
detectorClass.getSimpleName() + "(JsonConfig jsonConfig)");
- }
- // Fall back to zero-arg constructor if no configuration
provided
- detector = (Detector)
ServiceLoaderUtils.newInstance(detectorClass,
- new org.apache.tika.config.ServiceLoader(classLoader));
- }
-
- return detector;
- } catch (InstantiationException | IllegalAccessException |
InvocationTargetException e) {
- throw new TikaConfigException("Failed to instantiate detector: " +
- detectorClass.getName(), e);
- }
- }
-
- /**
- * Checks if the JsonConfig contains actual configuration (non-empty JSON
object with fields).
- *
- * @param jsonConfig the JSON configuration
- * @return true if there's meaningful configuration, false if empty or
just "{}"
- */
- private boolean hasConfiguration(JsonConfig jsonConfig) {
- if (jsonConfig == null) {
- return false;
- }
- String json = jsonConfig.json();
- if (json == null || json.trim().isEmpty()) {
- return false;
- }
- // Parse to check if it's an empty object or has actual fields
- try {
- JsonNode node = objectMapper.readTree(json);
- // Check if it's an object and has at least one field
- if (node.isObject() && node.size() > 0) {
- return true;
- }
- return false;
- } catch (Exception e) {
- // If we can't parse it, assume it has configuration to be safe
- return true;
- }
+ return ComponentInstantiator.instantiate(detectorClass, jsonConfig,
classLoader,
+ "Detector", objectMapper);
}
/**
diff --git
a/tika-serialization/src/main/java/org/apache/tika/config/loader/EncodingDetectorLoader.java
b/tika-serialization/src/main/java/org/apache/tika/config/loader/EncodingDetectorLoader.java
index 4a46f595d..35079dfeb 100644
---
a/tika-serialization/src/main/java/org/apache/tika/config/loader/EncodingDetectorLoader.java
+++
b/tika-serialization/src/main/java/org/apache/tika/config/loader/EncodingDetectorLoader.java
@@ -16,8 +16,6 @@
*/
package org.apache.tika.config.loader;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@@ -36,7 +34,6 @@ import org.apache.tika.detect.CompositeEncodingDetector;
import org.apache.tika.detect.DefaultEncodingDetector;
import org.apache.tika.detect.EncodingDetector;
import org.apache.tika.exception.TikaConfigException;
-import org.apache.tika.utils.ServiceLoaderUtils;
/**
* Loader for encoding detectors with support for SPI fallback via
"default-encoding-detector" marker.
@@ -179,63 +176,10 @@ public class EncodingDetectorLoader {
}
}
- @SuppressWarnings("unchecked")
private EncodingDetector instantiateEncodingDetector(Class<?>
detectorClass, JsonConfig jsonConfig)
throws TikaConfigException {
-
- try {
- EncodingDetector detector;
-
- // Try constructor with JsonConfig parameter
- try {
- Constructor<?> constructor =
detectorClass.getConstructor(JsonConfig.class);
- detector = (EncodingDetector)
constructor.newInstance(jsonConfig);
- } catch (NoSuchMethodException e) {
- // Check if JSON config has actual configuration
- if (hasConfiguration(jsonConfig)) {
- throw new TikaConfigException(
- "Encoding detector '" + detectorClass.getName() +
"' has configuration in JSON, " +
- "but does not have a constructor that accepts
JsonConfig. " +
- "Please add a constructor: public " +
detectorClass.getSimpleName() + "(JsonConfig jsonConfig)");
- }
- // Fall back to zero-arg constructor if no configuration
provided
- detector = (EncodingDetector)
ServiceLoaderUtils.newInstance(detectorClass,
- new org.apache.tika.config.ServiceLoader(classLoader));
- }
-
- return detector;
- } catch (InstantiationException | IllegalAccessException |
InvocationTargetException e) {
- throw new TikaConfigException("Failed to instantiate encoding
detector: " +
- detectorClass.getName(), e);
- }
- }
-
- /**
- * Checks if the JsonConfig contains actual configuration (non-empty JSON
object with fields).
- *
- * @param jsonConfig the JSON configuration
- * @return true if there's meaningful configuration, false if empty or
just "{}"
- */
- private boolean hasConfiguration(JsonConfig jsonConfig) {
- if (jsonConfig == null) {
- return false;
- }
- String json = jsonConfig.json();
- if (json == null || json.trim().isEmpty()) {
- return false;
- }
- // Parse to check if it's an empty object or has actual fields
- try {
- JsonNode node = objectMapper.readTree(json);
- // Check if it's an object and has at least one field
- if (node.isObject() && node.size() > 0) {
- return true;
- }
- return false;
- } catch (Exception e) {
- // If we can't parse it, assume it has configuration to be safe
- return true;
- }
+ return ComponentInstantiator.instantiate(detectorClass, jsonConfig,
classLoader,
+ "EncodingDetector", objectMapper);
}
/**
diff --git
a/tika-serialization/src/main/java/org/apache/tika/config/loader/ParserLoader.java
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ParserLoader.java
index 177cacce7..786d2e9bb 100644
---
a/tika-serialization/src/main/java/org/apache/tika/config/loader/ParserLoader.java
+++
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ParserLoader.java
@@ -224,7 +224,7 @@ public class ParserLoader {
parser = (Parser) constructor.newInstance(jsonConfig);
} catch (NoSuchMethodException e) {
// Check if JSON config has actual configuration
- if (hasConfiguration(jsonConfig)) {
+ if (ComponentInstantiator.hasConfiguration(jsonConfig,
objectMapper)) {
throw new TikaConfigException(
"Parser '" + parserClass.getName() + "' has
configuration in JSON, " +
"but does not have a constructor that accepts
JsonConfig. " +
@@ -260,34 +260,6 @@ public class ParserLoader {
}
}
- /**
- * Checks if the JsonConfig contains actual configuration (non-empty JSON
object with fields).
- *
- * @param jsonConfig the JSON configuration
- * @return true if there's meaningful configuration, false if empty or
just "{}"
- */
- private boolean hasConfiguration(JsonConfig jsonConfig) {
- if (jsonConfig == null) {
- return false;
- }
- String json = jsonConfig.json();
- if (json == null || json.trim().isEmpty()) {
- return false;
- }
- // Parse to check if it's an empty object or has actual fields
- try {
- JsonNode node = objectMapper.readTree(json);
- // Check if it's an object and has at least one field
- if (node.isObject() && node.size() > 0) {
- return true;
- }
- return false;
- } catch (Exception e) {
- // If we can't parse it, assume it has configuration to be safe
- return true;
- }
- }
-
private Parser applyMimeFiltering(Parser parser,
FrameworkConfig.ParserDecoration decoration) {
List<String> includes = decoration.getMimeInclude();
List<String> excludes = decoration.getMimeExclude();
diff --git
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
index 1f81802e3..67b57cc69 100644
---
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
+++
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
@@ -27,6 +27,7 @@ import org.apache.tika.config.GlobalSettings;
import org.apache.tika.detect.Detector;
import org.apache.tika.detect.EncodingDetector;
import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.language.translate.Translator;
import org.apache.tika.metadata.filter.CompositeMetadataFilter;
import org.apache.tika.metadata.filter.MetadataFilter;
import org.apache.tika.metadata.filter.NoOpFilter;
@@ -86,6 +87,7 @@ public class TikaLoader {
private EncodingDetector encodingDetectors;
private MetadataFilter metadataFilter;
private Renderer renderers;
+ private Translator translator;
private ConfigLoader configLoader;
private GlobalSettings globalSettings;
@@ -285,6 +287,23 @@ public class TikaLoader {
return renderers;
}
+ /**
+ * Loads and returns the translator.
+ * If "translator" section exists in config, uses that translator.
+ * If section missing, uses SPI to discover translator.
+ * Results are cached - subsequent calls return the same instance.
+ *
+ * @return the translator
+ * @throws TikaConfigException if loading fails
+ */
+ public synchronized Translator loadTranslator() throws TikaConfigException
{
+ if (translator == null) {
+ TranslatorLoader loader = new TranslatorLoader(classLoader,
objectMapper);
+ translator = loader.load(config);
+ }
+ return translator;
+ }
+
/**
* Loads and returns an AutoDetectParser configured with this loader's
parsers and detectors.
* Results are cached - subsequent calls return the same instance.
diff --git
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TranslatorLoader.java
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TranslatorLoader.java
new file mode 100644
index 000000000..b84905e2c
--- /dev/null
+++
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TranslatorLoader.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config.loader;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.tika.config.JsonConfig;
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.language.translate.DefaultTranslator;
+import org.apache.tika.language.translate.Translator;
+
+/**
+ * Loader for translators.
+ * Only one translator is supported at a time.
+ */
+public class TranslatorLoader {
+
+ private static final Logger LOG =
LoggerFactory.getLogger(TranslatorLoader.class);
+
+ private final ClassLoader classLoader;
+ private final ObjectMapper objectMapper;
+
+ public TranslatorLoader(ClassLoader classLoader, ObjectMapper
objectMapper) {
+ this.classLoader = classLoader;
+ this.objectMapper = objectMapper;
+ }
+
+ /**
+ * Loads a translator from JSON config.
+ * <p>
+ * If "translator" section exists in config, uses that translator.
+ * If section missing, uses DefaultTranslator to discover translator via
SPI.
+ *
+ * @param config the Tika JSON configuration
+ * @return the translator
+ * @throws TikaConfigException if loading fails
+ */
+ public Translator load(TikaJsonConfig config) throws TikaConfigException {
+ // Check if translator section exists in config
+ if (config.hasComponentSection("translator")) {
+ JsonNode translatorNode = config.getRootNode().get("translator");
+ return loadConfiguredTranslator(translatorNode);
+ } else {
+ // No configured translator - use DefaultTranslator to load from
SPI
+ return createDefaultTranslator();
+ }
+ }
+
+ private Translator loadConfiguredTranslator(JsonNode translatorNode)
+ throws TikaConfigException {
+ try {
+ // The translator node should be an object with a "class" field
+ if (!translatorNode.has("class")) {
+ throw new TikaConfigException("Translator configuration must
have a 'class' field");
+ }
+
+ String className = translatorNode.get("class").asText();
+ ComponentRegistry registry = new ComponentRegistry("translators",
classLoader);
+ Class<?> translatorClass = registry.getComponentClass(className);
+
+ // Remove "class" field from config before extraction
+ ObjectNode configCopy = ((ObjectNode) translatorNode).deepCopy();
+ configCopy.remove("class");
+
+ // Extract framework config (e.g., _decorate if present)
+ FrameworkConfig frameworkConfig =
FrameworkConfig.extract(configCopy, objectMapper);
+
+ // Instantiate translator
+ return instantiateTranslator(translatorClass,
frameworkConfig.getComponentConfigJson());
+
+ } catch (Exception e) {
+ throw new TikaConfigException("Failed to load translator", e);
+ }
+ }
+
+ private Translator instantiateTranslator(Class<?> translatorClass,
JsonConfig jsonConfig)
+ throws TikaConfigException {
+ return ComponentInstantiator.instantiate(translatorClass, jsonConfig,
classLoader,
+ "Translator", objectMapper);
+ }
+
+ /**
+ * Creates a DefaultTranslator that loads a translator from SPI.
+ *
+ * @return the DefaultTranslator with SPI-loaded translator
+ */
+ private DefaultTranslator createDefaultTranslator() {
+ return new DefaultTranslator(new
org.apache.tika.config.ServiceLoader(classLoader));
+ }
+}
diff --git
a/tika-serialization/src/test/java/org/apache/tika/config/loader/TikaLoaderTest.java
b/tika-serialization/src/test/java/org/apache/tika/config/loader/TikaLoaderTest.java
index 168e673dd..44c145418 100644
---
a/tika-serialization/src/test/java/org/apache/tika/config/loader/TikaLoaderTest.java
+++
b/tika-serialization/src/test/java/org/apache/tika/config/loader/TikaLoaderTest.java
@@ -29,6 +29,8 @@ import java.nio.file.Path;
import org.junit.jupiter.api.Test;
import org.xml.sax.helpers.DefaultHandler;
+import org.apache.tika.language.translate.EmptyTranslator;
+import org.apache.tika.language.translate.Translator;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
@@ -315,4 +317,45 @@ public class TikaLoaderTest {
.contains(MediaType.parse("application/test+optin")),
"Should NOT support application/test+optin (opt-in only, not
in SPI)");
}
+
+ @Test
+ public void testTranslatorLoading() throws Exception {
+ URL configUrl =
getClass().getResource("/configs/test-translator-config.json");
+ Path configPath = Path.of(configUrl.toURI());
+
+ TikaLoader loader = TikaLoader.load(configPath);
+ Translator translator = loader.loadTranslator();
+
+ assertNotNull(translator, "Translator should not be null");
+ assertTrue(translator instanceof EmptyTranslator, "Should be
EmptyTranslator");
+ assertTrue(translator.isAvailable(), "Translator should be available");
+ }
+
+ @Test
+ public void testTranslatorLazyLoading() throws Exception {
+ URL configUrl =
getClass().getResource("/configs/test-translator-config.json");
+ Path configPath = Path.of(configUrl.toURI());
+
+ TikaLoader loader = TikaLoader.load(configPath);
+
+ // Load translator
+ Translator translator1 = loader.loadTranslator();
+ assertNotNull(translator1, "First load should return translator");
+
+ // Load again - should return cached instance
+ Translator translator2 = loader.loadTranslator();
+ assertTrue(translator1 == translator2, "Should return same cached
instance");
+ }
+
+ @Test
+ public void testDefaultTranslatorWhenNotConfigured() throws Exception {
+ URL configUrl =
getClass().getResource("/configs/test-loader-config.json");
+ Path configPath = Path.of(configUrl.toURI());
+
+ TikaLoader loader = TikaLoader.load(configPath);
+ Translator translator = loader.loadTranslator();
+
+ assertNotNull(translator, "Translator should not be null");
+ // Should be DefaultTranslator since no translator configured in
test-loader-config.json
+ }
}
diff --git
a/tika-serialization/src/test/resources/configs/test-translator-config.json
b/tika-serialization/src/test/resources/configs/test-translator-config.json
new file mode 100644
index 000000000..4e4b88fcc
--- /dev/null
+++ b/tika-serialization/src/test/resources/configs/test-translator-config.json
@@ -0,0 +1,5 @@
+{
+ "translator": {
+ "class": "empty-translator"
+ }
+}
diff --git
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceFetcherTest.java
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceFetcherTest.java
index 8d3373570..84f2b6198 100644
---
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceFetcherTest.java
+++
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceFetcherTest.java
@@ -94,6 +94,7 @@ public class TikaResourceFetcherTest extends CXFTestBase {
protected InputStreamFactory getInputStreamFactory(InputStream is) {
try (TikaInputStream tis = TikaInputStream.get(is)) {
TikaConfigs tikaConfigs = TikaConfigs.load(tis.getPath());
+
System.out.println(tikaConfigs.getTikaJsonConfig().getRootNode().toPrettyString());
TikaPluginManager pluginManager =
TikaPluginManager.load(tikaConfigs);
FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaConfigs);
return new FetcherStreamFactory(fetcherManager);
diff --git
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TranslateResourceTest.java
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TranslateResourceTest.java
index 249ac85f2..e156fdf93 100644
---
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TranslateResourceTest.java
+++
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TranslateResourceTest.java
@@ -27,14 +27,12 @@ import jakarta.ws.rs.core.Response;
import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
-import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.apache.tika.server.core.resource.TranslateResource;
import org.apache.tika.server.core.writer.TarWriter;
import org.apache.tika.server.core.writer.ZipWriter;
-@Disabled("until we get translators working")
public class TranslateResourceTest extends CXFTestBase {
private static final String TRANSLATE_PATH = "/translate";
diff --git a/tika-translate/pom.xml b/tika-translate/pom.xml
index 3ab74ad8c..014ad2ca6 100644
--- a/tika-translate/pom.xml
+++ b/tika-translate/pom.xml
@@ -41,6 +41,16 @@
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
+
+ <!-- Annotation processor - contains @TikaComponent and ensures build
order.
+ "provided" because it is only used at compile time -->
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-annotation-processor</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-langdetect-optimaize</artifactId>
@@ -133,6 +143,7 @@
org.apache.tika.*,
*;resolution:=optional
</Import-Package>
+
<Include-Resource>{maven-resources},META-INF=target/classes/META-INF</Include-Resource>
</instructions>
</configuration>
</plugin>
@@ -162,6 +173,19 @@
<artifactId>maven-site-plugin</artifactId>
<version>3.21.0</version>
</plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <configuration>
+ <annotationProcessorPaths>
+ <path>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-annotation-processor</artifactId>
+ <version>${project.version}</version>
+ </path>
+ </annotationProcessorPaths>
+ </configuration>
+ </plugin>
</plugins>
<pluginManagement>
diff --git
a/tika-translate/src/main/java/org/apache/tika/language/translate/impl/CachedTranslator.java
b/tika-translate/src/main/java/org/apache/tika/language/translate/impl/CachedTranslator.java
index 88eef1544..a55b169b5 100644
---
a/tika-translate/src/main/java/org/apache/tika/language/translate/impl/CachedTranslator.java
+++
b/tika-translate/src/main/java/org/apache/tika/language/translate/impl/CachedTranslator.java
@@ -21,6 +21,7 @@ import java.util.HashMap;
import com.fasterxml.jackson.databind.util.LRUMap;
+import org.apache.tika.config.TikaComponent;
import org.apache.tika.exception.TikaException;
import org.apache.tika.language.detect.LanguageResult;
import org.apache.tika.language.translate.Translator;
@@ -28,6 +29,7 @@ import org.apache.tika.language.translate.Translator;
/**
* CachedTranslator. Saves a map of previous translations in order to prevent
repetitive translation requests.
*/
+@TikaComponent
public class CachedTranslator extends AbstractTranslator {
private static final int INITIAL_ENTRIES = 100;
private static final int MAX_ENTRIES = 1000;
diff --git
a/tika-translate/src/main/java/org/apache/tika/language/translate/impl/GoogleTranslator.java
b/tika-translate/src/main/java/org/apache/tika/language/translate/impl/GoogleTranslator.java
index 607a48d93..45f5ebb14 100644
---
a/tika-translate/src/main/java/org/apache/tika/language/translate/impl/GoogleTranslator.java
+++
b/tika-translate/src/main/java/org/apache/tika/language/translate/impl/GoogleTranslator.java
@@ -32,6 +32,7 @@ import org.apache.cxf.jaxrs.client.WebClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.tika.config.TikaComponent;
import org.apache.tika.exception.TikaException;
/**
@@ -42,6 +43,7 @@ import org.apache.tika.exception.TikaException;
* from <a href="http://hayageek.com">hayageek.com</a>. Set your API key in
* translator.google.properties.
*/
+@TikaComponent
public class GoogleTranslator extends AbstractTranslator {
private static final Logger LOG =
LoggerFactory.getLogger(GoogleTranslator.class);
diff --git
a/tika-translate/src/main/java/org/apache/tika/language/translate/impl/JoshuaNetworkTranslator.java
b/tika-translate/src/main/java/org/apache/tika/language/translate/impl/JoshuaNetworkTranslator.java
index 4676954f9..e2af6ecb0 100644
---
a/tika-translate/src/main/java/org/apache/tika/language/translate/impl/JoshuaNetworkTranslator.java
+++
b/tika-translate/src/main/java/org/apache/tika/language/translate/impl/JoshuaNetworkTranslator.java
@@ -40,6 +40,7 @@ import org.apache.cxf.jaxrs.client.WebClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.tika.config.TikaComponent;
import org.apache.tika.exception.TikaException;
/**
@@ -62,6 +63,7 @@ import org.apache.tika.exception.TikaException;
* Joshua requires input to be pre-formatted into sentences, one per line,
* so this translation implementation takes care of that.
*/
+@TikaComponent
public class JoshuaNetworkTranslator extends AbstractTranslator {
private static final Logger LOG =
LoggerFactory.getLogger(JoshuaNetworkTranslator.class);
diff --git
a/tika-translate/src/main/java/org/apache/tika/language/translate/impl/Lingo24Translator.java
b/tika-translate/src/main/java/org/apache/tika/language/translate/impl/Lingo24Translator.java
index e31a07f32..152976d19 100644
---
a/tika-translate/src/main/java/org/apache/tika/language/translate/impl/Lingo24Translator.java
+++
b/tika-translate/src/main/java/org/apache/tika/language/translate/impl/Lingo24Translator.java
@@ -32,6 +32,7 @@ import org.apache.cxf.jaxrs.client.WebClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.tika.config.TikaComponent;
import org.apache.tika.exception.TikaException;
/**
@@ -40,6 +41,7 @@ import org.apache.tika.exception.TikaException;
* You can sign up for an access plan online on the <a
href="https://developer.lingo24.com/plans">Lingo24 Developer Portal</a>
* and set your Application's User Key in the
<code>translator.lingo24.properties</code> file.
*/
+@TikaComponent
public class Lingo24Translator extends AbstractTranslator {
private static final Logger LOG =
LoggerFactory.getLogger(Lingo24Translator.class);
diff --git
a/tika-translate/src/main/java/org/apache/tika/language/translate/impl/MarianTranslator.java
b/tika-translate/src/main/java/org/apache/tika/language/translate/impl/MarianTranslator.java
index 824e28b3f..3790efdbc 100644
---
a/tika-translate/src/main/java/org/apache/tika/language/translate/impl/MarianTranslator.java
+++
b/tika-translate/src/main/java/org/apache/tika/language/translate/impl/MarianTranslator.java
@@ -46,6 +46,7 @@ import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.tika.config.TikaComponent;
import org.apache.tika.exception.TikaException;
import org.apache.tika.utils.StringUtils;
@@ -56,6 +57,7 @@ import org.apache.tika.utils.StringUtils;
*
* @link https://marian-nmt.github.io/.
*/
+@TikaComponent
public class MarianTranslator extends AbstractTranslator {
private static final Logger LOG =
LoggerFactory.getLogger(MarianTranslator.class);
diff --git
a/tika-translate/src/main/java/org/apache/tika/language/translate/impl/MicrosoftTranslator.java
b/tika-translate/src/main/java/org/apache/tika/language/translate/impl/MicrosoftTranslator.java
index b87c6701a..c0bd3cc0d 100644
---
a/tika-translate/src/main/java/org/apache/tika/language/translate/impl/MicrosoftTranslator.java
+++
b/tika-translate/src/main/java/org/apache/tika/language/translate/impl/MicrosoftTranslator.java
@@ -25,6 +25,7 @@ import com.memetix.mst.translate.Translate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.tika.config.TikaComponent;
import org.apache.tika.exception.TikaException;
import org.apache.tika.language.translate.Translator;
@@ -34,6 +35,7 @@ import org.apache.tika.language.translate.Translator;
*
* @since Tika 1.6
*/
+@TikaComponent
public class MicrosoftTranslator implements Translator {
public static final String PROPERTIES_FILE =
"translator.microsoft.properties";
diff --git
a/tika-translate/src/main/java/org/apache/tika/language/translate/impl/MosesTranslator.java
b/tika-translate/src/main/java/org/apache/tika/language/translate/impl/MosesTranslator.java
index 77c00c595..26083e7d8 100644
---
a/tika-translate/src/main/java/org/apache/tika/language/translate/impl/MosesTranslator.java
+++
b/tika-translate/src/main/java/org/apache/tika/language/translate/impl/MosesTranslator.java
@@ -26,12 +26,14 @@ import java.io.OutputStreamWriter;
import java.nio.charset.Charset;
import java.util.Properties;
+import org.apache.tika.config.TikaComponent;
import org.apache.tika.exception.TikaException;
/**
* Translator that uses the Moses decoder for translation.
* Users must install the Moses system before using this Translator. @link
http://www.statmt.org/moses/.
*/
+@TikaComponent
public class MosesTranslator extends ExternalTranslator {
private static final String DEFAULT_PATH = "dummy-path";
diff --git
a/tika-translate/src/main/java/org/apache/tika/language/translate/impl/RTGTranslator.java
b/tika-translate/src/main/java/org/apache/tika/language/translate/impl/RTGTranslator.java
index 8275aca8f..b47cd4282 100644
---
a/tika-translate/src/main/java/org/apache/tika/language/translate/impl/RTGTranslator.java
+++
b/tika-translate/src/main/java/org/apache/tika/language/translate/impl/RTGTranslator.java
@@ -37,6 +37,7 @@ import org.json.simple.parser.ParseException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.tika.config.TikaComponent;
import org.apache.tika.exception.TikaException;
@@ -70,6 +71,7 @@ import org.apache.tika.exception.TikaException;
* RTG requires input to be pre-formatted into sentences, one per line,
* so this translation implementation takes care of that.
*/
+@TikaComponent
public class RTGTranslator extends AbstractTranslator {
public static final String RTG_TRANSLATE_URL_BASE =
"http://localhost:6060";
diff --git
a/tika-translate/src/main/java/org/apache/tika/language/translate/impl/YandexTranslator.java
b/tika-translate/src/main/java/org/apache/tika/language/translate/impl/YandexTranslator.java
index c2e6f6bcc..16aa6ede8 100644
---
a/tika-translate/src/main/java/org/apache/tika/language/translate/impl/YandexTranslator.java
+++
b/tika-translate/src/main/java/org/apache/tika/language/translate/impl/YandexTranslator.java
@@ -33,6 +33,7 @@ import org.apache.cxf.jaxrs.client.WebClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.tika.config.TikaComponent;
import org.apache.tika.exception.TikaException;
import org.apache.tika.language.translate.Translator;
@@ -41,6 +42,7 @@ import org.apache.tika.language.translate.Translator;
* You can sign up for free access online on the <a
href="https://tech.yandex.com/key/form.xml?service=trnsl">API Key form</a>
* and set your Application's User Key in the
<code>translator.yandex.properties</code> file.
*/
+@TikaComponent
public class YandexTranslator implements Translator {
private static final Logger LOG =
LoggerFactory.getLogger(YandexTranslator.class);
diff --git
a/tika-translate/src/main/resources/META-INF/services/org.apache.tika.language.translate.Translator
b/tika-translate/src/main/resources/META-INF/services/org.apache.tika.language.translate.Translator
deleted file mode 100644
index 71cc28df9..000000000
---
a/tika-translate/src/main/resources/META-INF/services/org.apache.tika.language.translate.Translator
+++ /dev/null
@@ -1,22 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-org.apache.tika.language.translate.impl.MicrosoftTranslator
-org.apache.tika.language.translate.impl.GoogleTranslator
-org.apache.tika.language.translate.impl.Lingo24Translator
-org.apache.tika.language.translate.impl.CachedTranslator
-org.apache.tika.language.translate.impl.JoshuaNetworkTranslator
-org.apache.tika.language.translate.impl.RTGTranslator
-org.apache.tika.language.translate.impl.MarianTranslator