This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new b822e8eb3 TIKA-4574 -- rm SentimentAnalysisParser (#2455)
b822e8eb3 is described below

commit b822e8eb372ec61ce2a1d96a28166a120d7ee68d
Author: Tim Allison <[email protected]>
AuthorDate: Tue Dec 16 11:08:55 2025 -0500

    TIKA-4574 -- rm SentimentAnalysisParser (#2455)
---
 .../tika-parsers-ml/tika-parser-nlp-module/pom.xml |  49 --------
 .../parser/sentiment/SentimentAnalysisParser.java  | 137 ---------------------
 .../sentiment/SentimentAnalysisParserTest.java     |  84 -------------
 .../configs/tika-config-sentiment-opennlp-cat.json |  10 --
 .../configs/tika-config-sentiment-opennlp.json     |  10 --
 5 files changed, 290 deletions(-)

diff --git a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/pom.xml 
b/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/pom.xml
index 997162225..1424f537f 100644
--- a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/pom.xml
+++ b/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/pom.xml
@@ -101,55 +101,6 @@
             <groupId>com.google.code.gson</groupId>
             <artifactId>gson</artifactId>
         </dependency>
-        <!-- sentiment parser -->
-        <dependency>
-            <groupId>edu.usc.ir</groupId>
-            <artifactId>sentiment-analysis-parser</artifactId>
-            <version>0.1</version>
-            <exclusions>
-                <exclusion>
-                    <groupId>org.apache.tika</groupId>
-                    <artifactId>tika-parsers</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.apache.tika</groupId>
-                    <artifactId>tika-translate</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.apache.tika</groupId>
-                    <artifactId>tika-langdetect</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.apache.tika</groupId>
-                    <artifactId>tika-core</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.apache.tika</groupId>
-                    <artifactId>tika-serialization</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.apache.tika</groupId>
-                    <artifactId>tika-batch</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.slf4j</groupId>
-                    <artifactId>slf4j-log4j12</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.slf4j</groupId>
-                    <artifactId>jul-to-slf4j</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.slf4j</groupId>
-                    <artifactId>jcl-over-slf4j</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>log4j</groupId>
-                    <artifactId>log4j</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
-
         <dependency>
             <groupId>org.slf4j</groupId>
             <artifactId>log4j-over-slf4j</artifactId>
diff --git 
a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/sentiment/SentimentAnalysisParser.java
 
b/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/sentiment/SentimentAnalysisParser.java
deleted file mode 100644
index 16a4b81ab..000000000
--- 
a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/sentiment/SentimentAnalysisParser.java
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.sentiment;
-
-import java.io.File;
-import java.io.IOException;
-import java.net.URL;
-import java.util.Collections;
-import java.util.Set;
-
-import opennlp.tools.sentiment.SentimentME;
-import opennlp.tools.sentiment.SentimentModel;
-import org.apache.commons.io.IOUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-import org.apache.tika.config.Initializable;
-import org.apache.tika.config.TikaComponent;
-import org.apache.tika.exception.TikaConfigException;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-
-/**
- * This parser classifies documents based on the sentiment of document.
- * The classifier is powered by Apache OpenNLP's Maximum Entropy Classifier
- */
-@TikaComponent(spi = false)
-public class SentimentAnalysisParser implements Parser, Initializable {
-
-    public static final String DEF_MODEL =
-            
"https://raw.githubusercontent.com/USCDataScience/SentimentAnalysisParser/master/sentiment-models/src/main/resources/edu/usc/irds/sentiment/en-netflix-sentiment.bin";;
-    private static final Set<MediaType> SUPPORTED_TYPES =
-            Collections.singleton(MediaType.application("sentiment"));
-    private static final Logger LOG = 
LoggerFactory.getLogger(SentimentAnalysisParser.class);
-    private SentimentME classifier;
-
-    /**
-     * Path to model path. Default is {@value DEF_MODEL}
-     * <p>
-     * <br/>
-     * The path could be one of the following:
-     * <ul>
-     * <li>a HTTP or HTTPS URL (Not recommended for production use since no 
caching is
-     * implemented) </li>
-     * <li>an absolute or relative path on local file system (recommended for 
production use in
-     * standalone mode)</li>
-     * <li>a relative path known to class loader (Especially useful in 
distributed environments,
-     * recommended for advanced users</li>
-     * </ul>
-     * Note: on conflict: the model from local file system gets the priority
-     * over classpath
-     */
-    private String modelPath = DEF_MODEL;
-
-    @Override
-    public void initialize() throws TikaConfigException {
-        LOG.debug("Initializing...");
-        if (modelPath == null) {
-            throw new TikaConfigException("Parameter 'modelPath' is required 
but it is not set");
-        }
-        try {
-            URL resolvedUrl = null;
-            if (modelPath.startsWith("http://";) || 
modelPath.startsWith("https://";)) {
-                resolvedUrl = new URL(modelPath);
-            } else {
-                resolvedUrl = 
getClass().getClassLoader().getResource(modelPath);
-                File file = new File(modelPath);
-                if (file.exists()) { // file on filesystem gets higher priority
-                    resolvedUrl = file.toURI().toURL();
-                }
-            }
-            if (resolvedUrl == null) {
-                throw new TikaConfigException("Model doesn't exists :" + 
modelPath);
-            }
-            LOG.info("Sentiment Model is at {}", resolvedUrl);
-            long st = System.currentTimeMillis();
-            SentimentModel model = new SentimentModel(resolvedUrl);
-            long time = System.currentTimeMillis() - st;
-            LOG.debug("time taken to load model {}", time);
-            classifier = new SentimentME(model);
-        } catch (Exception e) {
-            LOG.warn("Failed to load sentiment model from {}" + modelPath);
-            throw new TikaConfigException(e.getMessage(), e);
-        }
-    }
-
-    /**
-     * Returns the types supported
-     *
-     * @param context the parse context
-     * @return the set of types supported
-     */
-    @Override
-    public Set<MediaType> getSupportedTypes(ParseContext context) {
-        return SUPPORTED_TYPES;
-    }
-
-    /**
-     * Performs the parse
-     *
-     * @param stream   the input
-     * @param handler  the content handler
-     * @param metadata the metadata passed
-     * @param context  the context for the parser
-     */
-    @Override
-    public void parse(TikaInputStream tis, ContentHandler handler, Metadata 
metadata,
-                      ParseContext context) throws IOException, SAXException, 
TikaException {
-        if (classifier == null) {
-            LOG.warn(getClass().getSimpleName() + " is not configured 
properly.");
-            return;
-        }
-        String inputString = IOUtils.toString(tis, "UTF-8");
-        String sentiment = classifier.predict(inputString);
-        metadata.add("Sentiment", sentiment);
-    }
-}
diff --git 
a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/test/java/org/apache/tika/parser/sentiment/SentimentAnalysisParserTest.java
 
b/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/test/java/org/apache/tika/parser/sentiment/SentimentAnalysisParserTest.java
deleted file mode 100644
index 8380bcc81..000000000
--- 
a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/test/java/org/apache/tika/parser/sentiment/SentimentAnalysisParserTest.java
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.sentiment;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
-
-import java.io.IOException;
-import java.net.URISyntaxException;
-import java.nio.charset.StandardCharsets;
-
-import org.junit.jupiter.api.Test;
-
-import org.apache.tika.TikaTest;
-import org.apache.tika.config.loader.TikaLoader;
-import org.apache.tika.exception.TikaConfigException;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.Parser;
-
-/**
- * Test case for {@link SentimentAnalysisParser}
- */
-public class SentimentAnalysisParserTest extends TikaTest {
-
-    @Test
-    public void endToEndTest() throws Exception {
-        Parser parser = getParser("tika-config-sentiment-opennlp.json");
-        if (parser == null) {
-            return;
-        }
-
-        String text = "What a wonderful thought it is that" +
-                " some of the best days of our lives haven't happened yet.";
-        Metadata md = 
getXML(TikaInputStream.get(text.getBytes(StandardCharsets.UTF_8)),
-                parser, new Metadata()).metadata;
-        String sentiment = md.get("Sentiment");
-        assertNotNull(sentiment);
-        assertEquals("positive", sentiment);
-    }
-
-    @Test
-    public void testCategorical() throws Exception {
-        Parser parser = getParser("tika-config-sentiment-opennlp-cat.json");
-        if (parser == null) {
-            return;
-        }
-        String text = "Whatever, I need some cooling off time!";
-        Metadata md = 
getXML(TikaInputStream.get(text.getBytes(StandardCharsets.UTF_8)),
-                parser, new Metadata()).metadata;
-        String sentiment = md.get("Sentiment");
-        assertNotNull(sentiment);
-        assertEquals("angry", sentiment);
-    }
-
-    private Parser getParser(String configJson) throws TikaException, 
IOException, URISyntaxException {
-        try {
-            return TikaLoader.load(
-                            getConfigPath(SentimentAnalysisParserTest.class, 
configJson))
-                    .loadAutoDetectParser();
-        } catch (TikaConfigException e) {
-            //if can't connect to pull sentiment model...ignore test
-            if (e.getCause() instanceof IOException) {
-                return null;
-            }
-            throw e;
-        }
-    }
-}
diff --git 
a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/test/resources/configs/tika-config-sentiment-opennlp-cat.json
 
b/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/test/resources/configs/tika-config-sentiment-opennlp-cat.json
deleted file mode 100644
index 5e362efc0..000000000
--- 
a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/test/resources/configs/tika-config-sentiment-opennlp-cat.json
+++ /dev/null
@@ -1,10 +0,0 @@
-{
-  "parsers": [
-    {
-      "sentiment-analysis-parser": {
-        "_mime-include": ["text/plain", "application/sentiment"],
-        "modelPath": 
"https://raw.githubusercontent.com/USCDataScience/SentimentAnalysisParser/master/sentiment-models/src/main/resources/edu/usc/irds/sentiment/ht-sentiment-categ.bin";
-      }
-    }
-  ]
-}
diff --git 
a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/test/resources/configs/tika-config-sentiment-opennlp.json
 
b/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/test/resources/configs/tika-config-sentiment-opennlp.json
deleted file mode 100644
index eefb16d6f..000000000
--- 
a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/test/resources/configs/tika-config-sentiment-opennlp.json
+++ /dev/null
@@ -1,10 +0,0 @@
-{
-  "parsers": [
-    {
-      "sentiment-analysis-parser": {
-        "_mime-include": ["text/plain", "application/sentiment"],
-        "modelPath": 
"https://raw.githubusercontent.com/USCDataScience/SentimentAnalysisParser/master/sentiment-models/src/main/resources/edu/usc/irds/sentiment/en-netflix-sentiment.bin";
-      }
-    }
-  ]
-}

Reply via email to