This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 344a7ea6a TIKA-4555 -- rm tika-server-eval (#2456)
344a7ea6a is described below

commit 344a7ea6a756bdc38f9606aa01fc60376c4839ba
Author: Tim Allison <[email protected]>
AuthorDate: Tue Dec 16 11:08:38 2025 -0500

    TIKA-4555 -- rm tika-server-eval (#2456)
---
 tika-server/pom.xml                                |   1 -
 tika-server/tika-server-eval/pom.xml               |  59 -------
 .../apache/tika/server/eval/TikaEvalResource.java  | 190 ---------------------
 ...he.tika.server.core.resource.TikaServerResource |  15 --
 .../tika/server/eval/TikaEvalResourceTest.java     | 169 ------------------
 5 files changed, 434 deletions(-)

diff --git a/tika-server/pom.xml b/tika-server/pom.xml
index e001dd9e1..7a76de956 100644
--- a/tika-server/pom.xml
+++ b/tika-server/pom.xml
@@ -20,7 +20,6 @@
     <module>tika-server-core</module>
     <module>tika-server-standard</module>
     <module>tika-server-client</module>
-    <module>tika-server-eval</module>
   </modules>
 
   <parent>
diff --git a/tika-server/tika-server-eval/pom.xml 
b/tika-server/tika-server-eval/pom.xml
deleted file mode 100644
index ddc6505c9..000000000
--- a/tika-server/tika-server-eval/pom.xml
+++ /dev/null
@@ -1,59 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
-  <parent>
-    <artifactId>tika-server</artifactId>
-    <groupId>org.apache.tika</groupId>
-    <version>4.0.0-SNAPSHOT</version>
-  </parent>
-  <modelVersion>4.0.0</modelVersion>
-
-  <artifactId>tika-server-eval</artifactId>
-  <name>Apache Tika server tika-eval handler</name>
-
-  <dependencies>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-eval-core</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-server-core</artifactId>
-      <version>${project.version}</version>
-      <scope>provided</scope>
-    </dependency>
-  </dependencies>
-
-  <build>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-jar-plugin</artifactId>
-        <configuration>
-          <archive>
-            <manifestEntries>
-              
<Automatic-Module-Name>org.apache.tika.server.eval</Automatic-Module-Name>
-            </manifestEntries>
-          </archive>
-        </configuration>
-      </plugin>
-
-    </plugins>
-  </build>
-</project>
\ No newline at end of file
diff --git 
a/tika-server/tika-server-eval/src/main/java/org/apache/tika/server/eval/TikaEvalResource.java
 
b/tika-server/tika-server-eval/src/main/java/org/apache/tika/server/eval/TikaEvalResource.java
deleted file mode 100644
index 73430f292..000000000
--- 
a/tika-server/tika-server-eval/src/main/java/org/apache/tika/server/eval/TikaEvalResource.java
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.server.eval;
-
-import static 
org.apache.tika.eval.core.metadata.TikaEvalMetadataFilter.LANGUAGE;
-import static 
org.apache.tika.eval.core.metadata.TikaEvalMetadataFilter.LANGUAGE_CONFIDENCE;
-import static 
org.apache.tika.eval.core.metadata.TikaEvalMetadataFilter.NUM_ALPHA_TOKENS;
-import static 
org.apache.tika.eval.core.metadata.TikaEvalMetadataFilter.NUM_TOKENS;
-import static 
org.apache.tika.eval.core.metadata.TikaEvalMetadataFilter.NUM_UNIQUE_ALPHA_TOKENS;
-import static 
org.apache.tika.eval.core.metadata.TikaEvalMetadataFilter.NUM_UNIQUE_TOKENS;
-import static 
org.apache.tika.eval.core.metadata.TikaEvalMetadataFilter.OUT_OF_VOCABULARY;
-
-import java.io.BufferedReader;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import jakarta.ws.rs.Consumes;
-import jakarta.ws.rs.PUT;
-import jakarta.ws.rs.Path;
-import jakarta.ws.rs.Produces;
-
-import org.apache.tika.eval.core.langid.LanguageIDWrapper;
-import org.apache.tika.eval.core.metadata.TikaEvalMetadataFilter;
-import org.apache.tika.eval.core.textstats.BasicTokenCountStatsCalculator;
-import org.apache.tika.eval.core.textstats.CommonTokens;
-import org.apache.tika.eval.core.textstats.CompositeTextStatsCalculator;
-import org.apache.tika.eval.core.textstats.TextStatsCalculator;
-import org.apache.tika.eval.core.tokens.CommonTokenResult;
-import org.apache.tika.eval.core.tokens.ContrastStatistics;
-import org.apache.tika.eval.core.tokens.TokenContraster;
-import org.apache.tika.eval.core.tokens.TokenCounts;
-import org.apache.tika.language.detect.LanguageResult;
-import org.apache.tika.metadata.Property;
-import org.apache.tika.server.core.ServerStatus;
-import org.apache.tika.server.core.ServerStatusResource;
-import org.apache.tika.server.core.resource.TikaServerResource;
-import org.apache.tika.utils.StringUtils;
-
-@Path("/eval")
-public class TikaEvalResource implements TikaServerResource, 
ServerStatusResource {
-
-    public static final String TEXT = "text";
-    public static final String TEXT_A = "textA";
-    public static final String TEXT_B = "textB";
-    public static final String ID = "id";
-
-    public static final Property DICE = Property.externalReal(
-            TikaEvalMetadataFilter.TIKA_EVAL_NS + "dice");
-
-    public static final Property OVERLAP = Property.externalReal(
-            TikaEvalMetadataFilter.TIKA_EVAL_NS + "overlap");
-
-    private ServerStatus serverStatus;
-    public static final long DEFAULT_TIMEOUT_MILLIS = 60000;
-
-    static CompositeTextStatsCalculator TEXT_STATS_CALCULATOR;
-
-    static {
-        List<TextStatsCalculator> calcs = new ArrayList<>();
-        calcs.add(new BasicTokenCountStatsCalculator());
-        calcs.add(new CommonTokens());
-        TEXT_STATS_CALCULATOR = new CompositeTextStatsCalculator(calcs);
-    }
-
-    @PUT
-    @Consumes("application/json")
-    @Produces("application/json")
-    @Path("compare")
-    public Map<String, Object> compare(InputStream is) throws Exception {
-        JsonNode node = null;
-        try (BufferedReader reader = new BufferedReader(
-                new InputStreamReader(is, StandardCharsets.UTF_8))) {
-            node = new ObjectMapper().readTree(reader);
-        }
-        String id = node.get(ID).asText();
-        String textA = node.get(TEXT_A).asText();
-        String textB = node.get(TEXT_B).asText();
-        long timeoutMillis = node.has("timeoutMillis") ? 
node.get("timeoutMillis").asLong() :
-                DEFAULT_TIMEOUT_MILLIS;
-        return compareText(id, textA, textB, timeoutMillis);
-    }
-
-    @PUT
-    @Consumes("application/json")
-    @Produces("application/json")
-    @Path("profile")
-    public Map<String, Object> profile(InputStream is) throws Exception {
-        JsonNode node = null;
-        try (BufferedReader reader = new BufferedReader(
-                new InputStreamReader(is, StandardCharsets.UTF_8))) {
-            node = new ObjectMapper().readTree(reader);
-        }
-        String id = node.get(ID).asText();
-        String text = node.get(TEXT).asText();
-        long timeoutMillis = node.has("timeoutMillis") ? 
node.get("timeoutMillis").asLong() :
-                DEFAULT_TIMEOUT_MILLIS;
-        return profile(id, text, timeoutMillis);
-    }
-
-    private Map<String, Object> profile(String id, String text, long 
timeoutMillis) {
-
-        Map<String, Object> stats = new HashMap<>();
-        long taskId = serverStatus.start(ServerStatus.TASK.PARSE, id, 
timeoutMillis);
-        try {
-            profile(StringUtils.EMPTY, text, stats);
-        } finally {
-            serverStatus.complete(taskId);
-        }
-        return stats;
-    }
-
-
-    private Map<String, Object> compareText(String id, String textA, String 
textB, long timeoutMillis) {
-
-        Map<String, Object> stats = new HashMap<>();
-        long taskId = serverStatus.start(ServerStatus.TASK.PARSE, id, 
timeoutMillis);
-        try {
-            TokenCounts tokensA = profile("A", textA, stats);
-            TokenCounts tokensB = profile("B", textB, stats);
-            TokenContraster tokenContraster = new TokenContraster();
-            ContrastStatistics contrastStatistics =
-                    tokenContraster.calculateContrastStatistics(tokensA, 
tokensB);
-            reportContrastStats(contrastStatistics, stats);
-        } finally {
-            serverStatus.complete(taskId);
-        }
-        return stats;
-    }
-
-    private void reportContrastStats(ContrastStatistics contrastStatistics,
-                                     Map<String, Object> stats) {
-        stats.put(DICE.getName(), contrastStatistics.getDiceCoefficient());
-        stats.put(OVERLAP.getName(), contrastStatistics.getOverlap());
-        //TODO, add topNMore, topNUnique
-    }
-
-    private TokenCounts profile(String suffix, String content, Map<String, 
Object> stats) {
-        Map<Class, Object> results = TEXT_STATS_CALCULATOR.calculate(content);
-
-        TokenCounts tokenCounts = (TokenCounts) 
results.get(BasicTokenCountStatsCalculator.class);
-        stats.put(NUM_TOKENS.getName() + suffix, tokenCounts.getTotalTokens());
-        stats.put(NUM_UNIQUE_TOKENS.getName() + suffix, 
tokenCounts.getTotalUniqueTokens());
-
-
-        //common token results
-        CommonTokenResult commonTokenResult = (CommonTokenResult) 
results.get(CommonTokens.class);
-        stats.put(NUM_ALPHA_TOKENS.getName() + suffix, 
commonTokenResult.getAlphabeticTokens());
-        stats.put(NUM_UNIQUE_ALPHA_TOKENS.getName() + suffix, 
commonTokenResult.getUniqueAlphabeticTokens());
-        if (commonTokenResult.getAlphabeticTokens() > 0) {
-            stats.put(OUT_OF_VOCABULARY.getName() + suffix, 
commonTokenResult.getOOV());
-        } else {
-            stats.put(OUT_OF_VOCABULARY.getName() + suffix, -1.0f);
-        }
-
-        //languages
-        List<LanguageResult> probabilities =
-                (List<LanguageResult>) results.get(LanguageIDWrapper.class);
-        if (probabilities.size() > 0) {
-            stats.put(LANGUAGE.getName() + suffix, 
probabilities.get(0).getLanguage());
-            stats.put(LANGUAGE_CONFIDENCE.getName() + suffix, 
probabilities.get(0).getRawScore());
-        }
-        return tokenCounts;
-    }
-
-    @Override
-    public void setServerStatus(ServerStatus serverStatus) {
-        this.serverStatus = serverStatus;
-    }
-}
diff --git 
a/tika-server/tika-server-eval/src/main/resources/META-INF/services/org.apache.tika.server.core.resource.TikaServerResource
 
b/tika-server/tika-server-eval/src/main/resources/META-INF/services/org.apache.tika.server.core.resource.TikaServerResource
deleted file mode 100644
index def7e1af5..000000000
--- 
a/tika-server/tika-server-eval/src/main/resources/META-INF/services/org.apache.tika.server.core.resource.TikaServerResource
+++ /dev/null
@@ -1,15 +0,0 @@
-#  Licensed to the Apache Software Foundation (ASF) under one or more
-#  contributor license agreements.  See the NOTICE file distributed with
-#  this work for additional information regarding copyright ownership.
-#  The ASF licenses this file to You under the Apache License, Version 2.0
-#  (the "License"); you may not use this file except in compliance with
-#  the License.  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-org.apache.tika.server.eval.TikaEvalResource
\ No newline at end of file
diff --git 
a/tika-server/tika-server-eval/src/test/java/org/apache/tika/server/eval/TikaEvalResourceTest.java
 
b/tika-server/tika-server-eval/src/test/java/org/apache/tika/server/eval/TikaEvalResourceTest.java
deleted file mode 100644
index 98ce13c02..000000000
--- 
a/tika-server/tika-server-eval/src/test/java/org/apache/tika/server/eval/TikaEvalResourceTest.java
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.server.eval;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import com.fasterxml.jackson.core.JsonProcessingException;
-import com.fasterxml.jackson.core.type.TypeReference;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import jakarta.ws.rs.core.Response;
-import org.apache.cxf.binding.BindingFactoryManager;
-import org.apache.cxf.endpoint.Server;
-import org.apache.cxf.jaxrs.JAXRSBindingFactory;
-import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
-import org.apache.cxf.jaxrs.client.WebClient;
-import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
-import org.apache.cxf.transport.common.gzip.GZIPInInterceptor;
-import org.apache.cxf.transport.common.gzip.GZIPOutInterceptor;
-import org.junit.jupiter.api.AfterAll;
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Test;
-
-import org.apache.tika.eval.core.metadata.TikaEvalMetadataFilter;
-import org.apache.tika.server.core.ProduceTypeResourceComparator;
-import org.apache.tika.server.core.ServerStatus;
-import org.apache.tika.server.core.TikaServerConfig;
-import org.apache.tika.server.core.writer.JSONObjWriter;
-
-public class TikaEvalResourceTest {
-
-    protected static final String END_POINT =
-            "http://localhost:"; + TikaServerConfig.DEFAULT_PORT;
-
-    protected static final String COMPARE_END_POINT = END_POINT + 
"/eval/compare";
-    protected static final String PROFILE_END_POINT = END_POINT + 
"/eval/profile";
-    protected static Server SERVER;
-
-    ObjectMapper objectMapper = new ObjectMapper();
-
-    @BeforeAll
-    public static void setUp() throws Exception {
-        ServerStatus serverStatus = new ServerStatus();
-        JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean();
-        //set compression interceptors
-        sf.setOutInterceptors(Collections.singletonList(new 
GZIPOutInterceptor()));
-        sf.setInInterceptors(Collections.singletonList(new 
GZIPInInterceptor()));
-
-        setUpResources(sf, serverStatus);
-        setUpProviders(sf);
-        sf.setAddress(END_POINT + "/");
-        sf.setResourceComparator(new ProduceTypeResourceComparator());
-
-        BindingFactoryManager manager = 
sf.getBus().getExtension(BindingFactoryManager.class);
-
-        JAXRSBindingFactory factory = new JAXRSBindingFactory();
-        factory.setBus(sf.getBus());
-
-        manager.registerBindingFactory(JAXRSBindingFactory.JAXRS_BINDING_ID, 
factory);
-        SERVER = sf.create();
-    }
-
-    @AfterAll
-    public static void tearDown() throws Exception {
-        SERVER.stop();
-        SERVER.destroy();
-    }
-
-    protected static void setUpResources(JAXRSServerFactoryBean sf, 
ServerStatus serverStatus) {
-        sf.setResourceClasses(TikaEvalResource.class);
-        TikaEvalResource tikaEvalResource = new TikaEvalResource();
-        tikaEvalResource.setServerStatus(serverStatus);
-        sf.setResourceProvider(TikaEvalResource.class,
-                new SingletonResourceProvider(tikaEvalResource));
-    }
-
-    protected static void setUpProviders(JAXRSServerFactoryBean sf) {
-        List<Object> providers = new ArrayList<>();
-        providers.add(new JSONObjWriter());
-        sf.setProviders(providers);
-    }
-
-    @Test
-    public void testBasicProfile() throws Exception {
-        Map<String, String> request = new HashMap<>();
-        request.put(TikaEvalResource.ID, "1");
-        request.put(TikaEvalResource.TEXT, "the quick brown fox jumped 
qwertyuiop");
-        Response response = profile(request);
-        Map<String, Object> results = deserialize(response);
-        assertEquals(6, 
(int)results.get(TikaEvalMetadataFilter.NUM_TOKENS.getName()));
-        assertEquals(0.166, 
(double)results.get(TikaEvalMetadataFilter.OUT_OF_VOCABULARY.getName()),
-                0.01);
-        assertEquals("eng", 
(String)results.get(TikaEvalMetadataFilter.LANGUAGE.getName()));
-    }
-
-    @Test
-    public void testBasicCompare() throws Exception {
-        Map<String, String> request = new HashMap<>();
-        request.put(TikaEvalResource.ID, "1");
-        request.put(TikaEvalResource.TEXT_A, "the quick brown fox jumped 
qwertyuiop");
-        request.put(TikaEvalResource.TEXT_B, "the the the fast brown dog 
jumped qwertyuiop");
-        Response response = compare(request);
-        Map<String, Object> results = deserialize(response);
-        assertEquals(6,
-                (int)results.get(TikaEvalMetadataFilter.NUM_TOKENS.getName() + 
"A"));
-        assertEquals(0.166,
-                
(double)results.get(TikaEvalMetadataFilter.OUT_OF_VOCABULARY.getName() + "A"),
-                0.01);
-        assertEquals("eng", 
results.get(TikaEvalMetadataFilter.LANGUAGE.getName() + "A"));
-
-        assertEquals(0.666, 
(double)results.get(TikaEvalResource.DICE.getName()), 0.01);
-        assertEquals(0.571, 
(double)results.get(TikaEvalResource.OVERLAP.getName()), 0.01);
-    }
-
-    private Map<String, Object> deserialize(Response response) throws 
IOException {
-        TypeReference<HashMap<String, Object>> typeRef
-                = new TypeReference<HashMap<String, Object>>() {};
-        try (BufferedReader reader =
-                     new BufferedReader(
-                             new 
InputStreamReader((InputStream)response.getEntity(),
-                             StandardCharsets.UTF_8))) {
-            return objectMapper.readValue(reader, typeRef);
-        }
-    }
-
-    private Response profile(Map<String, String> request) throws 
JsonProcessingException {
-
-        String jsonRequest = objectMapper//.writerWithDefaultPrettyPrinter()
-                .writeValueAsString(request);
-        return  WebClient.create(PROFILE_END_POINT)
-                .type("application/json")
-                .accept("application/json")
-                .put(jsonRequest.getBytes(StandardCharsets.UTF_8));
-    }
-
-    private Response compare(Map<String, String> request) throws 
JsonProcessingException {
-
-        String jsonRequest = objectMapper//.writerWithDefaultPrettyPrinter()
-                .writeValueAsString(request);
-        return  WebClient.create(COMPARE_END_POINT)
-                .type("application/json")
-                .accept("application/json")
-                .put(jsonRequest.getBytes(StandardCharsets.UTF_8));
-    }
-}

Reply via email to