This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 20e782969 TIKA-4343 -- remove agepredictor (#2346)
20e782969 is described below

commit 20e782969c7117ecdebfab1d66310949e1c407c6
Author: Tim Allison <[email protected]>
AuthorDate: Fri Oct 3 12:39:40 2025 -0400

    TIKA-4343 -- remove agepredictor (#2346)
---
 .../tika-parsers-ml/tika-age-recogniser/pom.xml    | 374 ---------------------
 .../tika/parser/recognition/AgeRecogniser.java     | 142 --------
 .../parser/recognition/AgeRecogniserConfig.java    |  66 ----
 .../tika/parser/recognition/AgeRecogniserTest.java |  85 -----
 .../tika/parser/recognition/tika-config-age.xml    |  31 --
 .../tika/parser/ner/opennlp/ModelGetter.groovy     |   7 +-
 6 files changed, 1 insertion(+), 704 deletions(-)

diff --git a/tika-parsers/tika-parsers-ml/tika-age-recogniser/pom.xml 
b/tika-parsers/tika-parsers-ml/tika-age-recogniser/pom.xml
deleted file mode 100644
index a2ac44bbd..000000000
--- a/tika-parsers/tika-parsers-ml/tika-age-recogniser/pom.xml
+++ /dev/null
@@ -1,374 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
https://maven.apache.org/xsd/maven-4.0.0.xsd";>
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <groupId>org.apache.tika</groupId>
-    <artifactId>tika-parsers-ml</artifactId>
-    <version>4.0.0-SNAPSHOT</version>
-  </parent>
-
-  <artifactId>tika-age-recogniser</artifactId>
-  <packaging>jar</packaging>
-
-  <name>Apache Tika age recogniser</name>
-  <url>http://maven.apache.org</url>
-
-  <properties>
-      <curator.version>5.9.0</curator.version>
-      <hadoop.version>3.4.2</hadoop.version>
-  </properties>
-
-  <!-- we're not maintaining this module.
-  Keep this here instead of cluttering the parent pom -->
-  <dependencyManagement>
-    <dependencies>
-      <dependency>
-        <groupId>com.google.code.findbugs</groupId>
-        <artifactId>jsr305</artifactId>
-        <version>3.0.2</version>
-      </dependency>
-      <dependency>
-        <groupId>org.scalamacros</groupId>
-        <artifactId>quasiquotes_2.10</artifactId>
-        <version>2.1.1</version>
-      </dependency>
-      <!-- can't use 2.13.*, NoClassDefFoundError: scala/Serializable -->
-      <dependency>
-        <groupId>org.scala-lang</groupId>
-        <artifactId>scala-library</artifactId>
-        <version>2.12.19</version>
-      </dependency>
-      <dependency>
-        <groupId>net.bytebuddy</groupId>
-        <artifactId>byte-buddy</artifactId>
-        <version>1.17.7</version>
-      </dependency>
-      <dependency>
-        <groupId>org.scala-lang</groupId>
-        <artifactId>scala-reflect</artifactId>
-        <version>2.13.17</version>
-      </dependency>
-      <dependency>
-        <groupId>org.scala-lang</groupId>
-        <artifactId>scala-compiler</artifactId>
-        <version>2.13.17</version>
-      </dependency>
-      <dependency>
-        <groupId>commons-net</groupId>
-        <artifactId>commons-net</artifactId>
-        <version>${commons.net.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>com.thoughtworks.paranamer</groupId>
-        <artifactId>paranamer</artifactId>
-        <version>2.8.3</version>
-      </dependency>
-      <dependency>
-        <groupId>org.apache.avro</groupId>
-        <artifactId>avro</artifactId>
-        <version>1.12.0</version>
-      </dependency>
-      <dependency>
-        <groupId>commons-lang</groupId>
-        <artifactId>commons-lang</artifactId>
-        <version>2.6</version>
-      </dependency>
-      <dependency>
-        <groupId>org.codehaus.jackson</groupId>
-        <artifactId>jackson-core-asl</artifactId>
-        <version>1.9.13</version>
-      </dependency>
-      <dependency>
-        <groupId>org.codehaus.jackson</groupId>
-        <artifactId>jackson-mapper-asl</artifactId>
-        <version>1.9.13</version>
-      </dependency>
-      <!-- avoid convergence error with hadoop-yarn-common vs 
jackson-module-jaxb-annotations -->
-      <dependency>
-        <groupId>javax.xml.bind</groupId>
-        <artifactId>jaxb-api</artifactId>
-        <version>2.3.1</version>
-      </dependency>
-
-      <!-- netty dependency removed, either it isn't needed,
-           or our test coverage isn't good enough -->
-      <dependency>
-          <groupId>org.apache.commons</groupId>
-          <artifactId>commons-math3</artifactId>
-          <version>3.6.1</version>
-      </dependency>
-      <dependency>
-          <groupId>org.apache.commons</groupId>
-          <artifactId>commons-collections4</artifactId>
-          <version>${commons.collections4.version}</version>
-      </dependency>
-      <dependency>
-          <groupId>org.apache.curator</groupId>
-          <artifactId>curator-recipes</artifactId>
-          <version>${curator.version}</version>
-      </dependency>
-
-      <!-- avoid many security and convergence problems (still not perfect) -->
-      <!-- TODO spark-core_2.10 / spark-network-shuffle_2.10 (used by 
age-predictor-api) use log4j1,
-           and are also insecure themselves -->
-      <dependency>
-          <groupId>org.apache.curator</groupId>
-          <artifactId>curator-framework</artifactId>
-          <version>${curator.version}</version>
-      </dependency>
-      <dependency>
-          <groupId>org.apache.curator</groupId>
-          <artifactId>curator-client</artifactId>
-          <version>${curator.version}</version>
-      </dependency>
-      <dependency>
-          <groupId>org.apache.ivy</groupId>
-          <artifactId>ivy</artifactId>
-          <version>2.5.3</version>
-      </dependency>
-      <dependency>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-common</artifactId>
-          <version>${hadoop.version}</version>
-          <exclusions>
-              <exclusion>
-                  <groupId>org.bouncycastle</groupId>
-                  <artifactId>bcprov-jdk15on</artifactId>
-              </exclusion>
-          </exclusions>
-      </dependency>
-      <dependency>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-annotations</artifactId>
-          <version>${hadoop.version}</version>
-      </dependency>
-      <dependency>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-mapreduce-client-core</artifactId>
-          <version>${hadoop.version}</version>
-      </dependency>
-      <dependency>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-client</artifactId>
-          <version>${hadoop.version}</version>
-      </dependency>
-      <dependency>
-          <groupId>io.dropwizard.metrics</groupId>
-          <artifactId>metrics-core</artifactId>
-          <version>4.2.37</version>
-      </dependency>
-      <dependency>
-          <groupId>org.apache.commons</groupId>
-          <artifactId>commons-text</artifactId>
-          <version>1.14.0</version>
-      </dependency>
-      <dependency>
-          <groupId>org.codehaus.woodstox</groupId>
-          <artifactId>stax2-api</artifactId>
-          <version>4.2.2</version>
-      </dependency>
-      <dependency>
-          <groupId>org.apache.commons</groupId>
-          <artifactId>commons-configuration2</artifactId>
-          <version>2.12.0</version>
-      </dependency>
-      <dependency>
-          <groupId>org.apache.spark</groupId>
-          <artifactId>spark-mllib_2.10</artifactId>
-          <version>2.2.3</version>
-      </dependency>
-      <dependency>
-          <groupId>org.apache.spark</groupId>
-          <artifactId>spark-core_2.10</artifactId>
-          <version>2.2.3</version>
-      </dependency>
-      <dependency>
-          <groupId>org.apache.spark</groupId>
-          <artifactId>spark-network-shuffle_2.10</artifactId>
-          <version>2.2.3</version>
-      </dependency>
-      <dependency>
-          <groupId>org.jline</groupId>
-          <artifactId>jline</artifactId>
-          <version>3.30.6</version>
-      </dependency>
-      <dependency>
-          <groupId>javax.activation</groupId>
-          <artifactId>activation</artifactId>
-          <version>1.1.1</version>
-      </dependency>
-      <dependency>
-          <groupId>org.codehaus.janino</groupId>
-          <artifactId>janino</artifactId>
-          <version>3.1.12</version>
-      </dependency>
-      <dependency>
-          <groupId>org.codehaus.janino</groupId>
-          <artifactId>commons-compiler</artifactId>
-          <version>3.1.12</version>
-      </dependency>
-      <dependency>
-          <groupId>org.glassfish.jersey.core</groupId>
-          <artifactId>jersey-common</artifactId>
-          <version>3.1.11</version>
-      </dependency>
-      <dependency>
-          <groupId>org.glassfish.hk2</groupId>
-          <artifactId>osgi-resource-locator</artifactId>
-          <version>3.0.0</version>
-      </dependency>
-      <dependency>
-          <groupId>dnsjava</groupId>
-          <artifactId>dnsjava</artifactId>
-          <version>3.6.3</version>
-      </dependency>
-      <dependency>
-          <groupId>com.jamesmurty.utils</groupId>
-          <artifactId>java-xmlbuilder</artifactId>
-          <version>1.3</version>
-      </dependency>
-    </dependencies>
-  </dependencyManagement>
-  <dependencies>
-    <!-- AgePredictor client for Tika -->
-    <dependency>
-      <groupId>edu.usc.ir</groupId>
-      <artifactId>age-predictor-api</artifactId>
-      <version>1.0</version>
-      <exclusions>
-        <exclusion>
-          <groupId>io.netty</groupId>
-          <artifactId>netty</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>io.netty</groupId>
-          <artifactId>netty-transport</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>io.netty</groupId>
-          <artifactId>netty-buffer</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>commons-collections</groupId>
-          <artifactId>commons-collections</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.slf4j</groupId>
-          <artifactId>slf4j-log4j12</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.bouncycastle</groupId>
-          <artifactId>bcprov-jdk15on</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.jline</groupId>
-          <artifactId>jline</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <!-- used by hadoop-common -->
-    <dependency>
-        <groupId>org.bouncycastle</groupId>
-        <artifactId>bcprov-jdk18on</artifactId>
-    </dependency>
-    <!-- Test dependencies -->
-    <dependency>
-      <groupId>org.mockito</groupId>
-      <artifactId>mockito-core</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.logging.log4j</groupId>
-      <artifactId>log4j-slf4j2-impl</artifactId>
-      <scope>test</scope>
-    </dependency>
-  </dependencies>
-
-  <build>
-    <plugins>
-      <plugin>
-        <artifactId>maven-assembly-plugin</artifactId>
-        <configuration>
-          <descriptorRefs>
-            <descriptorRef>jar-with-dependencies</descriptorRef>
-          </descriptorRefs>
-        </configuration>
-        <executions>
-          <execution>
-            <id>make-assembly</id>
-            <phase>package</phase>
-            <goals>
-              <goal>single</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-      <!-- dependencies in this module need to be cleaned up.
-          Until TIKA-2368 is resolved, report but ignore
-          vulnerabilities.
-        -->
-      <plugin>
-        <groupId>org.sonatype.ossindex.maven</groupId>
-        <artifactId>ossindex-maven-plugin</artifactId>
-        <configuration>
-          <fail>false</fail>
-        </configuration>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.rat</groupId>
-        <artifactId>apache-rat-plugin</artifactId>
-        <configuration>
-          <excludes>
-            <exclude>model/opennlp/*.bin</exclude>
-            <exclude>model/org/apache/tika/parser/recognition/**</exclude>
-          </excludes>
-        </configuration>
-      </plugin>
-      <!-- to get the OpenNLP models in the right place for AgeRecogniser: 
TODO: fix AgeRecogniser in next version to load from classpath -->
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-antrun-plugin</artifactId>
-        <executions>
-          <execution>
-            <phase>generate-test-resources</phase>
-            <goals>
-              <goal>run</goal>
-            </goals>
-            <configuration>
-              <target>
-                <copy failonerror="false" 
file="${basedir}/../tika-parser-nlp-module/src/test/resources/org/apache/tika/parser/ner/opennlp/en-pos-maxent.bin"
 todir="${basedir}/model/opennlp/" />
-                <copy failonerror="false" 
file="${basedir}/../tika-parser-nlp-module/src/test/resources/org/apache/tika/parser/ner/opennlp/en-sent.bin"
 todir="${basedir}/model/opennlp/" />
-                <copy failonerror="false" 
file="${basedir}/../tika-parser-nlp-module/src/test/resources/org/apache/tika/parser/ner/opennlp/en-token.bin"
 todir="${basedir}/model/opennlp/" />
-                <copy failonerror="false" 
file="${basedir}/../tika-age-recogniser/src/test/resources/org/apache/tika/parser/recognition/classify-bigram.bin"
 todir="${basedir}/model/org/apache/tika/parser/recognition/" />
-                <copy failonerror="false" 
file="${basedir}/../tika-age-recogniser/src/test/resources/org/apache/tika/parser/recognition/regression-global.bin"
 todir="${basedir}/model/org/apache/tika/parser/recognition/" />
-              </target>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-
-    </plugins>
-  </build>
-
-  <scm>
-    <tag>3.0.0-rc1</tag>
-  </scm>
-</project>
diff --git 
a/tika-parsers/tika-parsers-ml/tika-age-recogniser/src/main/java/org/apache/tika/parser/recognition/AgeRecogniser.java
 
b/tika-parsers/tika-parsers-ml/tika-age-recogniser/src/main/java/org/apache/tika/parser/recognition/AgeRecogniser.java
deleted file mode 100644
index 509e4e7c0..000000000
--- 
a/tika-parsers/tika-parsers-ml/tika-age-recogniser/src/main/java/org/apache/tika/parser/recognition/AgeRecogniser.java
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright 
owlocationNameEntitieship.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tika.parser.recognition;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.Reader;
-import java.nio.charset.StandardCharsets;
-import java.util.Collections;
-import java.util.Map;
-import java.util.Set;
-
-import edu.usc.irds.agepredictor.authorage.AgePredicterLocal;
-import opennlp.tools.util.InvalidFormatException;
-import org.apache.commons.io.IOUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.xml.sax.ContentHandler;
-
-import org.apache.tika.Tika;
-import org.apache.tika.config.Initializable;
-import org.apache.tika.config.InitializableProblemHandler;
-import org.apache.tika.config.Param;
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.exception.TikaConfigException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-
-
-/**
- * Parser for extracting features from text. Below features are extracted
- *
- * <ul>
- * <li>Author Age</li>
- * </ul>
- */
-public class AgeRecogniser implements Parser, Initializable {
-
-    public static final String MD_KEY_ESTIMATED_AGE_RANGE = 
"Estimated-Author-Age-Range";
-    public static final String MD_KEY_ESTIMATED_AGE = "Estimated-Author-Age";
-    private static final long serialVersionUID = 1108439049093046832L;
-    private static final Logger LOG = 
LoggerFactory.getLogger(AgeRecogniser.class);
-    private static final MediaType MEDIA_TYPE = MediaType.TEXT_PLAIN;
-    private static final Set<MediaType> SUPPORTED_TYPES = 
Collections.singleton(MEDIA_TYPE);
-    private static AgePredicterLocal agePredictor;
-    private static volatile boolean available = false;
-    public Tika secondaryParser;
-    private AgeRecogniserConfig config;
-
-    public AgeRecogniser() {
-        try {
-            secondaryParser = new Tika(new TikaConfig());
-            available = true;
-        } catch (Exception e) {
-            available = false;
-            LOG.error("Unable to initialize secondary parser", e);
-        }
-    }
-
-    @Override
-    public void checkInitialization(InitializableProblemHandler problemHandler)
-            throws TikaConfigException {
-        //TODO: what do we want to check here?
-    }
-
-    @Override
-    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
-        return SUPPORTED_TYPES;
-    }
-
-    @Override
-    public void initialize(Map<String, Param> params) throws 
TikaConfigException {
-        config = new AgeRecogniserConfig(params);
-
-    }
-
-    public AgePredicterLocal getAgePredictorClient() throws 
InvalidFormatException, IOException {
-        if (agePredictor == null) {
-            agePredictor = new AgePredicterLocal(config.getPathClassifyModel(),
-                    config.getPathClassifyRegression());
-        }
-        return agePredictor;
-    }
-
-    /**
-     * USED in test cases to mock response of AgeClassifier
-     */
-    protected static void setAgePredictorClient(AgePredicterLocal 
agePredicter) {
-        if (AgeRecogniser.agePredictor == null) {
-            AgeRecogniser.agePredictor = agePredicter;
-        }
-    }
-
-    @Override
-    public void parse(InputStream inputStream, ContentHandler handler, 
Metadata metadata,
-                      ParseContext context) throws IOException {
-
-        this.config = context.get(AgeRecogniserConfig.class, config);
-        if (!available) {
-            LOG.error("Parser Unavailable, check your configuration");
-            return;
-        }
-
-        // If content is not plain text use Tika to extract text out of 
content.
-        Reader reader;
-        if 
(MediaType.TEXT_PLAIN.toString().equals(metadata.get(Metadata.CONTENT_TYPE))) {
-            reader = new InputStreamReader(inputStream, 
StandardCharsets.UTF_8);
-        } else {
-            reader = secondaryParser.parse(inputStream);
-        }
-
-        // Use Spark AgePredictor to get predicted Age
-        try {
-            double predictAuthorAge = 
getAgePredictorClient().predictAge(IOUtils.toString(reader));
-
-            metadata.add(MD_KEY_ESTIMATED_AGE, 
Double.toString(predictAuthorAge));
-
-        } catch (Exception e) {
-            LOG.error("Age Predictor is not available. Please check wiki for 
detailed instructions",
-                    e);
-            return;
-        }
-    }
-}
diff --git 
a/tika-parsers/tika-parsers-ml/tika-age-recogniser/src/main/java/org/apache/tika/parser/recognition/AgeRecogniserConfig.java
 
b/tika-parsers/tika-parsers-ml/tika-age-recogniser/src/main/java/org/apache/tika/parser/recognition/AgeRecogniserConfig.java
deleted file mode 100644
index 243484955..000000000
--- 
a/tika-parsers/tika-parsers-ml/tika-age-recogniser/src/main/java/org/apache/tika/parser/recognition/AgeRecogniserConfig.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright 
owlocationNameEntitieship.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tika.parser.recognition;
-
-import java.net.URL;
-import java.util.Map;
-
-import org.apache.tika.config.Param;
-
-
-/**
- * Stores URL for AgePredictor
- */
-public class AgeRecogniserConfig {
-
-    private String pathClassifyModel = null;
-    private String pathClassifyRegression = null;
-
-    public AgeRecogniserConfig(Map<String, Param> params) {
-
-        URL classifyUrl = AgeRecogniserConfig.class
-                
.getResource(params.get("age.path.classify").getValue().toString());
-
-        if (classifyUrl != null) {
-            setPathClassifyModel(classifyUrl.getFile());
-        }
-
-        URL regressionUrl = AgeRecogniserConfig.class
-                
.getResource(params.get("age.path.regression").getValue().toString());
-
-        if (regressionUrl != null) {
-            setPathClassifyRegression(regressionUrl.getFile());
-        }
-    }
-
-    public String getPathClassifyModel() {
-        return pathClassifyModel;
-    }
-
-    public void setPathClassifyModel(String pathClassifyModel) {
-        this.pathClassifyModel = pathClassifyModel;
-    }
-
-    public String getPathClassifyRegression() {
-        return pathClassifyRegression;
-    }
-
-    public void setPathClassifyRegression(String pathClassifyRegression) {
-        this.pathClassifyRegression = pathClassifyRegression;
-    }
-}
diff --git 
a/tika-parsers/tika-parsers-ml/tika-age-recogniser/src/test/java/org/apache/tika/parser/recognition/AgeRecogniserTest.java
 
b/tika-parsers/tika-parsers-ml/tika-age-recogniser/src/test/java/org/apache/tika/parser/recognition/AgeRecogniserTest.java
deleted file mode 100644
index f672e78f0..000000000
--- 
a/tika-parsers/tika-parsers-ml/tika-age-recogniser/src/test/java/org/apache/tika/parser/recognition/AgeRecogniserTest.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.recognition;
-
-import static org.junit.jupiter.api.Assertions.assertArrayEquals;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
-
-import java.io.ByteArrayInputStream;
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
-
-import edu.usc.irds.agepredictor.authorage.AgePredicterLocal;
-import org.junit.jupiter.api.Test;
-
-import org.apache.tika.Tika;
-import org.apache.tika.TikaTest;
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.parser.CompositeParser;
-
-
-public class AgeRecogniserTest extends TikaTest {
-
-    private static final String CONFIG_FILE = "tika-config-age.xml";
-    private static final String TEST_TEXT =
-            "I am student at University of Southern California (USC)," +
-                    " located in Los Angeles . USC's football team is called 
by name Trojans." +
-                    " Mr. John McKay was a head coach of the team from 1960 - 
1975";
-    private static final double TEST_AGE = 26.4;
-
-    static {
-        /**
-         * Injecting mock AgeClassifer into AgeParser to generate test response
-         */
-        AgePredicterLocal mockAgeClassifier = mock(AgePredicterLocal.class);
-        AgeRecogniser.setAgePredictorClient(mockAgeClassifier);
-
-        try {
-            when(mockAgeClassifier.predictAge(TEST_TEXT)).thenReturn(TEST_AGE);
-        } catch (Exception e) {
-            e.printStackTrace();
-        }
-    }
-
-    @Test
-    public void testAgeRecogniser() throws Exception {
-
-        //test config is added to resources directory
-        try (InputStream is = getResourceAsStream(CONFIG_FILE);
-                InputStream bis = new ByteArrayInputStream(
-                        TEST_TEXT.getBytes(StandardCharsets.UTF_8))) {
-            TikaConfig config = new TikaConfig(is);
-            Tika tika = new Tika(config);
-
-            Metadata md = new Metadata();
-            tika.parse(bis, md);
-
-            assertArrayEquals(new 
String[]{CompositeParser.class.getCanonicalName(),
-                            AgeRecogniser.class.getCanonicalName()},
-                    md.getValues(TikaCoreProperties.TIKA_PARSED_BY),
-                    "Age Parser not invoked.");
-            assertArrayEquals(
-                    new String[]{Double.toString(TEST_AGE)},
-                    md.getValues(AgeRecogniser.MD_KEY_ESTIMATED_AGE),
-                    "Wrong age predicted.");
-        }
-    }
-
-}
diff --git 
a/tika-parsers/tika-parsers-ml/tika-age-recogniser/src/test/resources/org/apache/tika/parser/recognition/tika-config-age.xml
 
b/tika-parsers/tika-parsers-ml/tika-age-recogniser/src/test/resources/org/apache/tika/parser/recognition/tika-config-age.xml
deleted file mode 100644
index 7f214dd5e..000000000
--- 
a/tika-parsers/tika-parsers-ml/tika-age-recogniser/src/test/resources/org/apache/tika/parser/recognition/tika-config-age.xml
+++ /dev/null
@@ -1,31 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-<properties>
-    <parsers>
-        <parser class="org.apache.tika.parser.recognition.AgeRecogniser">
-            <mime>text/plain</mime>
-            <mime>text/html</mime>
-            <mime>application/xhtml+xml</mime>
-            <params>
-                <param name="age.path.classify" 
type="string">classify-bigram.bin</param>
-                <param name="age.path.regression" 
type="string">regression-global.bin</param>
-            </params>
-        </parser>
-    </parsers>
-
-</properties>
diff --git 
a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/test/resources/org/apache/tika/parser/ner/opennlp/ModelGetter.groovy
 
b/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/test/resources/org/apache/tika/parser/ner/opennlp/ModelGetter.groovy
index 88e29ddbd..cf054f2ec 100644
--- 
a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/test/resources/org/apache/tika/parser/ner/opennlp/ModelGetter.groovy
+++ 
b/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/test/resources/org/apache/tika/parser/ner/opennlp/ModelGetter.groovy
@@ -95,14 +95,11 @@ if (mvnProxies && mvnProxies.size() > 0) {
 
 def urlPrefix = "http://opennlp.sourceforge.net/models-1.5";
 def prefixPath = "src/test/resources/org/apache/tika/parser/ner/opennlp/"
-def ageUrlPrefix = 
"https://raw.githubusercontent.com/USCDataScience/AgePredictor/master/model";
-def agePrefixPath = "src/test/resources/org/apache/tika/parser/recognition/"
 
 // detecting proper path for test resources
 if (new File("tika-parsers").exists() && new File("tika-app").exists()  ) {
     // running from parent maven project, but resources should go to sub-module
     prefixPath = "tika-parsers/tika-parsers-ml/tika-parser-nlp-module/" + 
prefixPath
-    agePrefixPath = "tika-parsers/tika-parsers-ml/tika-age-recogniser/" + 
agePrefixPath
 }
 
 def modelFiles = //filePath : url
@@ -112,9 +109,7 @@ def modelFiles = //filePath : url
          (prefixPath + "en-pos-maxent.bin"): (urlPrefix + 
"/en-pos-maxent.bin"),
          (prefixPath + "en-sent.bin"): (urlPrefix + "/en-sent.bin"),
          (prefixPath + "en-token.bin"): (urlPrefix + "/en-token.bin"),
-         (prefixPath + "ner-date.bin"): (urlPrefix + "/en-ner-date.bin"),
-         (agePrefixPath + "classify-bigram.bin"): (ageUrlPrefix + 
"/classify-bigram.bin"),
-         (agePrefixPath + "regression-global.bin"): (ageUrlPrefix + 
"/regression-global.bin")]
+         (prefixPath + "ner-date.bin"): (urlPrefix + "/en-ner-date.bin")]
 
 for (def entry : modelFiles) {
     File file = new File(entry.key)

Reply via email to