This is an automated email from the ASF dual-hosted git repository.

tballison pushed a commit to branch haystack-tika-eval-integration
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 1777a9bbc6e9ca530f4192879371029d53e62a1f
Author: tballison <[email protected]>
AuthorDate: Wed May 13 12:42:03 2026 -0400

    add tika-eval into tika-app
---
 tika-app/pom.xml                                                    | 6 ++++++
 tika-eval/tika-eval-core/pom.xml                                    | 6 ++++++
 .../org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java  | 2 ++
 .../java/org/apache/tika/config/loader/ComponentInstantiator.java   | 4 +++-
 4 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/tika-app/pom.xml b/tika-app/pom.xml
index 7e33098259..93f1394c92 100644
--- a/tika-app/pom.xml
+++ b/tika-app/pom.xml
@@ -68,6 +68,12 @@
       <artifactId>tika-ml-junkdetect</artifactId>
       <version>${project.version}</version>
     </dependency>
+    <!-- tika-eval: TikaEvalMetadataFilter (tokens, OOV, lang, languageness) 
-->
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-eval-core</artifactId>
+      <version>${project.version}</version>
+    </dependency>
     <dependency>
       <groupId>${project.groupId}</groupId>
       <artifactId>tika-xmp</artifactId>
diff --git a/tika-eval/tika-eval-core/pom.xml b/tika-eval/tika-eval-core/pom.xml
index 28bf95a442..5bf5da8a53 100644
--- a/tika-eval/tika-eval-core/pom.xml
+++ b/tika-eval/tika-eval-core/pom.xml
@@ -29,6 +29,12 @@
 
 
   <dependencies>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-annotation-processor</artifactId>
+      <version>${revision}</version>
+      <scope>provided</scope>
+    </dependency>
     <dependency>
       <groupId>${project.groupId}</groupId>
       <artifactId>tika-core</artifactId>
diff --git 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java
 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java
index 945ba84ee8..7c015b79a4 100644
--- 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java
+++ 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java
@@ -22,6 +22,7 @@ import java.util.Map;
 
 import org.apache.commons.lang3.StringUtils;
 
+import org.apache.tika.config.TikaComponent;
 import org.apache.tika.eval.core.langid.LanguageIDWrapper;
 import org.apache.tika.eval.core.textstats.BasicTokenCountStatsCalculator;
 import org.apache.tika.eval.core.textstats.CommonTokens;
@@ -37,6 +38,7 @@ import org.apache.tika.metadata.filter.MetadataFilterBase;
 import org.apache.tika.ml.junkdetect.JunkDetector;
 import org.apache.tika.quality.TextQualityScore;
 
+@TikaComponent
 public class TikaEvalMetadataFilter extends MetadataFilterBase {
 
     public static String TIKA_EVAL_NS = "tika-eval" + 
TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER;
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentInstantiator.java
 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentInstantiator.java
index f82daa581c..4b87ba78ca 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentInstantiator.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentInstantiator.java
@@ -127,7 +127,9 @@ public class ComponentInstantiator {
                 Constructor<?> constructor = 
componentClass.getConstructor(JsonConfig.class);
                 String jsonString = configNode != null ? configNode.toString() 
: "{}";
                 JsonConfig jsonConfig = () -> jsonString;
-                return (T) constructor.newInstance(jsonConfig);
+                T component = (T) constructor.newInstance(jsonConfig);
+                initializeIfNeeded(component);
+                return component;
             } catch (NoSuchMethodException e) {
                 // No JsonConfig constructor, fall back to other methods
             }

Reply via email to