This is an automated email from the ASF dual-hosted git repository.
tballison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new de9ea3d177 add tika-eval into tika-app (#2816)
de9ea3d177 is described below
commit de9ea3d1776f4e2ed18d9522a11d163af5c56d45
Author: Tim Allison <[email protected]>
AuthorDate: Thu May 14 12:38:17 2026 -0400
add tika-eval into tika-app (#2816)
---
tika-app/pom.xml | 6 ++++++
tika-eval/tika-eval-core/pom.xml | 6 ++++++
.../tika/eval/core/metadata/TikaEvalMetadataFilter.java | 2 ++
tika-pipes/tika-pipes-fork-parser/pom.xml | 11 +++++++++++
.../org/apache/tika/config/loader/ComponentInstantiator.java | 4 +++-
5 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/tika-app/pom.xml b/tika-app/pom.xml
index 7e33098259..93f1394c92 100644
--- a/tika-app/pom.xml
+++ b/tika-app/pom.xml
@@ -68,6 +68,12 @@
<artifactId>tika-ml-junkdetect</artifactId>
<version>${project.version}</version>
</dependency>
+ <!-- tika-eval: TikaEvalMetadataFilter (tokens, OOV, lang, languageness)
-->
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-eval-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>tika-xmp</artifactId>
diff --git a/tika-eval/tika-eval-core/pom.xml b/tika-eval/tika-eval-core/pom.xml
index 28bf95a442..5bf5da8a53 100644
--- a/tika-eval/tika-eval-core/pom.xml
+++ b/tika-eval/tika-eval-core/pom.xml
@@ -29,6 +29,12 @@
<dependencies>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-annotation-processor</artifactId>
+ <version>${revision}</version>
+ <scope>provided</scope>
+ </dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>tika-core</artifactId>
diff --git
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java
index 945ba84ee8..7c015b79a4 100644
---
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java
+++
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java
@@ -22,6 +22,7 @@ import java.util.Map;
import org.apache.commons.lang3.StringUtils;
+import org.apache.tika.config.TikaComponent;
import org.apache.tika.eval.core.langid.LanguageIDWrapper;
import org.apache.tika.eval.core.textstats.BasicTokenCountStatsCalculator;
import org.apache.tika.eval.core.textstats.CommonTokens;
@@ -37,6 +38,7 @@ import org.apache.tika.metadata.filter.MetadataFilterBase;
import org.apache.tika.ml.junkdetect.JunkDetector;
import org.apache.tika.quality.TextQualityScore;
+@TikaComponent
public class TikaEvalMetadataFilter extends MetadataFilterBase {
public static String TIKA_EVAL_NS = "tika-eval" +
TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER;
diff --git a/tika-pipes/tika-pipes-fork-parser/pom.xml
b/tika-pipes/tika-pipes-fork-parser/pom.xml
index fd5856aa6a..8d381e9ed3 100644
--- a/tika-pipes/tika-pipes-fork-parser/pom.xml
+++ b/tika-pipes/tika-pipes-fork-parser/pom.xml
@@ -59,6 +59,17 @@
<version>${project.version}</version>
<type>pom</type>
</dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-langdetect-charsoup</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <!-- tika-eval: TikaEvalMetadataFilter (tokens, OOV, lang, languageness)
-->
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-eval-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>tika-core</artifactId>
diff --git
a/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentInstantiator.java
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentInstantiator.java
index f82daa581c..4b87ba78ca 100644
---
a/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentInstantiator.java
+++
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentInstantiator.java
@@ -127,7 +127,9 @@ public class ComponentInstantiator {
Constructor<?> constructor =
componentClass.getConstructor(JsonConfig.class);
String jsonString = configNode != null ? configNode.toString()
: "{}";
JsonConfig jsonConfig = () -> jsonString;
- return (T) constructor.newInstance(jsonConfig);
+ T component = (T) constructor.newInstance(jsonConfig);
+ initializeIfNeeded(component);
+ return component;
} catch (NoSuchMethodException e) {
// No JsonConfig constructor, fall back to other methods
}