This is an automated email from the ASF dual-hosted git repository. mawiesne pushed a commit to branch migrate-mahout-addon-to-opennlp-tools-2_1_0 in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git
commit 81ac83965a8fa42d89aeb90ea5946fa396f5578e Author: Martin Wiesner <[email protected]> AuthorDate: Fri Jan 20 10:44:37 2023 +0100 updates sandbox component 'mahout-addon' to be compatible with latest opennlp-tools release - adjusts opennlp-tools to 2.1.0 - adjusts parent project (org.apache.apache) to version 18 - adjusts Java language level to 11 - updates to mahout-core in version 0.9 to mitigate several CVEs - removes unused imports --- mahout-addon/pom.xml | 32 +++++++++++++++------- .../mahout/AbstractOnlineLearnerTrainer.java | 15 ++++------ .../addons/mahout/LogisticRegressionTrainer.java | 5 ---- .../mahout/OnlineLogisticRegressionTrainer.java | 1 - .../addons/mahout/VectorClassifierModel.java | 3 ++ 5 files changed, 31 insertions(+), 25 deletions(-) diff --git a/mahout-addon/pom.xml b/mahout-addon/pom.xml index d2c2d0a..81a92e3 100644 --- a/mahout-addon/pom.xml +++ b/mahout-addon/pom.xml @@ -21,15 +21,17 @@ <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> - + <parent> - <groupId>org.apache.opennlp</groupId> - <artifactId>opennlp</artifactId> - <version>1.6.0-SNAPSHOT</version> - <relativePath>../opennlp/pom.xml</relativePath> - </parent> - + <groupId>org.apache</groupId> + <artifactId>apache</artifactId> + <!-- TODO OPENNLP-1452 once this is resolved, move to 29 as well. --> + <version>18</version> + <relativePath /> + </parent> + <artifactId>mahout-addon</artifactId> + <version>2.1.1-SNAPSHOT</version> <packaging>jar</packaging> <name>Apache OpenNLP Mahout Addon</name> @@ -37,24 +39,34 @@ <dependency> <groupId>org.apache.opennlp</groupId> <artifactId>opennlp-tools</artifactId> - <version>1.6.0-SNAPSHOT</version> + <version>2.1.0</version> </dependency> - + <dependency> <groupId>org.apache.mahout</groupId> <artifactId>mahout-core</artifactId> - <version>0.8</version> + <version>0.9</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> + <version>4.13.1</version> <scope>test</scope> </dependency> </dependencies> <build> <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-compiler-plugin</artifactId> + <configuration> + <source>11</source> + <target>11</target> + <compilerArgument>-Xlint</compilerArgument> + </configuration> + </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-dependency-plugin</artifactId> diff --git a/mahout-addon/src/main/java/opennlp/addons/mahout/AbstractOnlineLearnerTrainer.java b/mahout-addon/src/main/java/opennlp/addons/mahout/AbstractOnlineLearnerTrainer.java index 9b8d85e..cfe5a73 100644 --- a/mahout-addon/src/main/java/opennlp/addons/mahout/AbstractOnlineLearnerTrainer.java +++ b/mahout-addon/src/main/java/opennlp/addons/mahout/AbstractOnlineLearnerTrainer.java @@ -19,16 +19,12 @@ package opennlp.addons.mahout; -import java.io.IOException; import java.util.HashMap; import java.util.Map; import opennlp.tools.ml.AbstractEventTrainer; import opennlp.tools.ml.model.DataIndexer; -import opennlp.tools.ml.model.MaxentModel; -import org.apache.mahout.classifier.sgd.AdaptiveLogisticRegression; -import org.apache.mahout.classifier.sgd.L1; import org.apache.mahout.math.RandomAccessSparseVector; import org.apache.mahout.math.Vector; @@ -39,6 +35,7 @@ abstract class AbstractOnlineLearnerTrainer extends AbstractEventTrainer { public AbstractOnlineLearnerTrainer() { } + @Override public void init(Map<String, String> trainParams, Map<String, String> reportMap) { String iterationsValue = trainParams.get("Iterations"); @@ -53,16 +50,16 @@ abstract class AbstractOnlineLearnerTrainer extends AbstractEventTrainer { protected void trainOnlineLearner(DataIndexer indexer, org.apache.mahout.classifier.OnlineLearner pa) { int cardinality = indexer.getPredLabels().length; - int outcomes[] = indexer.getOutcomeList(); + int[] outcomes = indexer.getOutcomeList(); for (int i = 0; i < indexer.getContexts().length; i++) { Vector vector = new RandomAccessSparseVector(cardinality); - int features[] = indexer.getContexts()[i]; - - for (int fi = 0; fi < features.length; fi++) { - vector.set(features[fi], indexer.getNumTimesEventsSeen()[i]); + int[] features = indexer.getContexts()[i]; + + for (int feature : features) { + vector.set(feature, indexer.getNumTimesEventsSeen()[i]); } pa.train(outcomes[i], vector); diff --git a/mahout-addon/src/main/java/opennlp/addons/mahout/LogisticRegressionTrainer.java b/mahout-addon/src/main/java/opennlp/addons/mahout/LogisticRegressionTrainer.java index 85442bb..b2a29ae 100644 --- a/mahout-addon/src/main/java/opennlp/addons/mahout/LogisticRegressionTrainer.java +++ b/mahout-addon/src/main/java/opennlp/addons/mahout/LogisticRegressionTrainer.java @@ -23,16 +23,11 @@ import java.io.IOException; import java.util.HashMap; import java.util.Map; -import opennlp.tools.ml.AbstractEventTrainer; import opennlp.tools.ml.model.DataIndexer; import opennlp.tools.ml.model.MaxentModel; import org.apache.mahout.classifier.sgd.AdaptiveLogisticRegression; import org.apache.mahout.classifier.sgd.L1; -import org.apache.mahout.classifier.sgd.OnlineLogisticRegression; -import org.apache.mahout.classifier.sgd.PassiveAggressive; -import org.apache.mahout.math.RandomAccessSparseVector; -import org.apache.mahout.math.Vector; public class LogisticRegressionTrainer extends AbstractOnlineLearnerTrainer { diff --git a/mahout-addon/src/main/java/opennlp/addons/mahout/OnlineLogisticRegressionTrainer.java b/mahout-addon/src/main/java/opennlp/addons/mahout/OnlineLogisticRegressionTrainer.java index 5b09870..ee23b9c 100644 --- a/mahout-addon/src/main/java/opennlp/addons/mahout/OnlineLogisticRegressionTrainer.java +++ b/mahout-addon/src/main/java/opennlp/addons/mahout/OnlineLogisticRegressionTrainer.java @@ -25,7 +25,6 @@ import java.util.Map; import opennlp.tools.ml.model.DataIndexer; import opennlp.tools.ml.model.MaxentModel; -import org.apache.mahout.classifier.sgd.AdaptiveLogisticRegression; import org.apache.mahout.classifier.sgd.L1; import org.apache.mahout.classifier.sgd.OnlineLogisticRegression; diff --git a/mahout-addon/src/main/java/opennlp/addons/mahout/VectorClassifierModel.java b/mahout-addon/src/main/java/opennlp/addons/mahout/VectorClassifierModel.java index 046ed2a..fabe3e2 100644 --- a/mahout-addon/src/main/java/opennlp/addons/mahout/VectorClassifierModel.java +++ b/mahout-addon/src/main/java/opennlp/addons/mahout/VectorClassifierModel.java @@ -42,6 +42,7 @@ public class VectorClassifierModel implements MaxentModel { this.predMap = predMap; } + @Override public double[] eval(String[] features) { Vector vector = new RandomAccessSparseVector(predMap.size()); @@ -64,10 +65,12 @@ public class VectorClassifierModel implements MaxentModel { return outcomes; } + @Override public double[] eval(String[] context, double[] probs) { return eval(context); } + @Override public double[] eval(String[] context, float[] values) { return eval(context); }
