mawiesne commented on code in PR #758: URL: https://github.com/apache/opennlp/pull/758#discussion_r2052106714
########## opennlp-tools/src/main/java/opennlp/tools/ml/AbstractTrainer.java: ########## @@ -55,6 +57,20 @@ public void init(TrainingParameters trainParams, Map<String,String> reportMap) { this.reportMap = reportMap; } + /** + * Initializes a {@link AbstractTrainer} using following parameters. + * + * @param trainParams The {@link TrainingParameters} to use. + * @param reportMap The {@link Map} instance used as report map. + * @param config The {@link TrainingConfiguration} to use. Review Comment: See comment for `Trainer` interface. ########## opennlp-tools/src/main/java/opennlp/tools/commons/Trainer.java: ########## @@ -35,4 +36,14 @@ public interface Trainer { */ void init(TrainingParameters trainParams, Map<String, String> reportMap); + /** + * Conducts the initialization of a {@link Trainer} via + * {@link TrainingParameters}, {@link Map report map} and {@link TrainingConfiguration} + * + * @param trainParams The {@link TrainingParameters} to use. + * @param reportMap The {@link Map} instance used as report map. + * @param config The {@link TrainingConfiguration} to use. Review Comment: The training configuration should not be null at runtime, I guess? Pls therefore add: "Must not be {@code null}." to the parameter JavaDoc. Same applies for different scenarios, pls re-iterate JavaDoc with that idea in mind. ########## opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronTrainer.java: ########## @@ -432,4 +450,25 @@ private static boolean isPerfectSquare(int n) { return root * root == n; } + /** + * Get the {@link StopCriteria} associated with this Trainer. + * @param trainingConfig - If {@link TrainingConfiguration} is null or + * {@link TrainingConfiguration#stopCriteria()} is null then return a default {@link StopCriteria}. + */ + private StopCriteria getStopCriteria(TrainingConfiguration trainingConfig) { + return trainingConfig != null && trainingConfig.stopCriteria() != null + ? trainingConfig.stopCriteria() : new IterDeltaAccuracyUnderTolerance(trainingParameters); + } + + /** + * Get the {@link TrainingProgressMonitor} associated with this Trainer. + * @param trainingConfig - If {@link TrainingConfiguration} is null or + * {@link TrainingConfiguration#progMon()}is null then + * return the default {@link TrainingProgressMonitor}. + */ Review Comment: Pls link with `Trainer` interface via `{@link ..}` Pls add specific return value JavaDoc. Pls format `null` as `{@code null}`, twice here. ########## opennlp-tools/src/main/java/opennlp/tools/monitoring/DefaultTrainingProgressMonitor.java: ########## @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.monitoring; + + +import java.util.LinkedList; +import java.util.List; +import java.util.Objects; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static opennlp.tools.monitoring.StopCriteria.FINISHED; + +/** + * An implementation of {@link TrainingProgressMonitor} which publishes model training progress to the chosen + * logging destination. + */ +public class DefaultTrainingProgressMonitor implements TrainingProgressMonitor { + + private static final Logger logger = LoggerFactory.getLogger(DefaultTrainingProgressMonitor.class); + + /** + * Keeps a track whether training was already finished because StopCriteria was met. + */ + private volatile boolean isTrainingFinished; + + /** + * An underlying list to capture training progress events. + */ + private final List<String> progress; + + public DefaultTrainingProgressMonitor() { + this.progress = new LinkedList<>(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized void finishedIteration(int iteration, int numberCorrectEvents, int totalEvents, + TrainingMeasure measure, double measureValue) { + progress.add(String.format("%s: (%s/%s) %s : %s", iteration, numberCorrectEvents, totalEvents, + measure.getMeasureName(), measureValue)); + } + + @Override + public synchronized void finishedTraining(int iterations, StopCriteria stopCriteria) { Review Comment: Pls add, `{@inheritDoc}` as above, consistently. ########## opennlp-tools/src/main/java/opennlp/tools/monitoring/TrainingProgressMonitor.java: ########## @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.monitoring; + +/** + * An interface to capture Training Progress of a {@link opennlp.tools.ml.model.AbstractModel}. + */ + +public interface TrainingProgressMonitor { + + /** + * Captures the Iteration progress. + * + * @param iteration The completed iteration number. + * @param numberCorrectEvents Number of correctly predicted events in this iteration. + * @param totalEvents Total count of events processed in this iteration. + * @param measure Measure used to quantify training success. Review Comment: Pls {@link ...} to TrainingMeasure here. ########## opennlp-tools/src/main/java/opennlp/tools/monitoring/TrainingProgressMonitor.java: ########## @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.monitoring; + +/** + * An interface to capture Training Progress of a {@link opennlp.tools.ml.model.AbstractModel}. + */ + +public interface TrainingProgressMonitor { + + /** + * Captures the Iteration progress. + * + * @param iteration The completed iteration number. + * @param numberCorrectEvents Number of correctly predicted events in this iteration. + * @param totalEvents Total count of events processed in this iteration. + * @param measure Measure used to quantify training success. + * @param measureValue measure value corresponding to the applicable {@link TrainingMeasure}. + */ + void finishedIteration(int iteration, int numberCorrectEvents, int totalEvents, + TrainingMeasure measure, double measureValue); + + /** + * Captures the Training completion progress. + * + * @param iterations Total number of iterations configured for training. + * @param stopCriteria Exit criteria for training. + */ + void finishedTraining(int iterations, StopCriteria stopCriteria); + + /** + * Checks whether the training has finished. * + * @return A boolean value to identify whether the training has finished. Review Comment: Is boolean, obviously. Better: `{@code true} if the training has finished, {@code false} if the training is not yet completed.` ########## opennlp-tools/src/test/java/opennlp/tools/monitoring/DefaultTrainingProgressMonitorTest.java: ########## @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.monitoring; + +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import ch.qos.logback.classic.Level; +import ch.qos.logback.classic.Logger; +import ch.qos.logback.classic.spi.ILoggingEvent; +import ch.qos.logback.core.read.ListAppender; +import org.assertj.core.api.Assertions; Review Comment: Try to eliminate dependance on `org.assertj.core...` here and elsewhere. ########## opennlp-tools/src/test/java/opennlp/tools/ml/TrainerFactoryTest.java: ########## @@ -78,4 +83,15 @@ void testIsSequenceTrainerFalse() { Assertions.assertNotEquals(TrainerType.EVENT_MODEL_SEQUENCE_TRAINER, trainerType); } + @Test + void testGetEventTrainerConfiguration() { + mlParams.put(TrainingParameters.ALGORITHM_PARAM, GISTrainer.MAXENT_VALUE); + + TrainingConfiguration config = new TrainingConfiguration(new DefaultTrainingProgressMonitor(), + new LogLikelihoodThresholdBreached(mlParams)); + + AbstractTrainer trainer = (AbstractTrainer)TrainerFactory.getEventTrainer(mlParams, null, config); + assertTrue(trainer.getTrainingConfiguration().progMon() instanceof DefaultTrainingProgressMonitor); Review Comment: Pls remove extra whitespace here and in line below after `instanceof` keyword. ########## opennlp-tools/src/main/java/opennlp/tools/ml/TrainerFactory.java: ########## @@ -189,11 +207,14 @@ public static <T> EventModelSequenceTrainer<T> getEventModelSequenceTrainer( * {@link GISTrainer#MAXENT_VALUE} will be used. * @param reportMap A {@link Map} that shall be used during initialization of * the {@link EventTrainer}. + * @param config The {@link TrainingConfiguration} to be used. This determines the type of + * {@link opennlp.tools.monitoring.TrainingProgressMonitor} Review Comment: Please add an import for TrainingProgressMonitor and StopCriteria in TrainerFactory. This way, we keep JavaDoc more compact here, avoiding fully qualified package names. ########## opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java: ########## @@ -709,4 +727,25 @@ synchronized double getLoglikelihood() { return loglikelihood; } } + + /** + * Get the {@link StopCriteria} associated with this Trainer. + * @param trainingConfig - If {@link TrainingConfiguration} is null or + * {@link TrainingConfiguration#stopCriteria()} is null then return a default {@link StopCriteria}. + */ Review Comment: Pls link with `Trainer` interface via `{@link ..}` Pls add specific return value JavaDoc. Pls set `null` as `{@code null}`, twice here. ########## opennlp-tools/pom.xml: ########## @@ -72,6 +72,13 @@ <scope>test</scope> </dependency> + <dependency> + <groupId>org.assertj</groupId> + <artifactId>assertj-core</artifactId> Review Comment: @NishantShri4 Can we avoid AssertJ as extra dependency? I wonder, why assertions of JUnit 5.x are not sufficient? ########## opennlp-tools/src/main/java/opennlp/tools/monitoring/LogLikelihoodThresholdBreached.java: ########## @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.monitoring; + +import opennlp.tools.util.TrainingParameters; + +import static opennlp.tools.ml.maxent.GISTrainer.LOG_LIKELIHOOD_THRESHOLD_DEFAULT; +import static opennlp.tools.ml.maxent.GISTrainer.LOG_LIKELIHOOD_THRESHOLD_PARAM; + +/** + * A {@link StopCriteria} implementation to identify whether the + * difference between the log likelihood of current and previous iteration is under the defined threshold. + */ +public class LogLikelihoodThresholdBreached implements StopCriteria { Review Comment: Could require a change towards the idea to make use of generic <T extends Number> in the separate comment. ########## opennlp-tools/src/main/java/opennlp/tools/ml/AbstractTrainer.java: ########## @@ -108,4 +124,12 @@ protected void addToReport(String key, String value) { reportMap.put(key, value); } + /** + * Retrieves the {@link TrainingConfiguration} associated with a {@link AbstractTrainer}. Review Comment: Should read "with an...", extra "n" ########## opennlp-tools/src/main/java/opennlp/tools/ml/TrainerFactory.java: ########## @@ -180,6 +182,22 @@ public static <T> EventModelSequenceTrainer<T> getEventModelSequenceTrainer( } } + /** + * Works like {@link TrainerFactory#getEventTrainer(TrainingParameters, Map, TrainingConfiguration)} + * except that the {@link TrainingConfiguration} is initialized with {@link DefaultTrainingProgressMonitor} + * and a null {@link opennlp.tools.monitoring.StopCriteria}. + * If not provided, the actual {@link opennlp.tools.monitoring.StopCriteria} + * will be decided by the {@link EventTrainer} implementation. + * Review Comment: Pls add specific parameter JavaDoc. Pls add specific return value JavaDoc. ########## opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java: ########## @@ -709,4 +727,25 @@ synchronized double getLoglikelihood() { return loglikelihood; } } + + /** + * Get the {@link StopCriteria} associated with this Trainer. + * @param trainingConfig - If {@link TrainingConfiguration} is null or + * {@link TrainingConfiguration#stopCriteria()} is null then return a default {@link StopCriteria}. + */ + private StopCriteria getStopCriteria(TrainingConfiguration trainingConfig) { + return trainingConfig != null && trainingConfig.stopCriteria() != null + ? trainingConfig.stopCriteria() : new LogLikelihoodThresholdBreached(trainingParameters); + } + + /** + * Get the {@link TrainingProgressMonitor} associated with this Trainer. + * @param trainingConfig If {@link TrainingConfiguration} is null or + * {@link TrainingConfiguration#progMon()} is null then return a default {@link TrainingProgressMonitor}. + */ Review Comment: Pls link with `Trainer` interface via `{@link ..}` Pls add specific return value JavaDoc. Pls set `null` as `{@code null}`, twice here. ########## opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronTrainer.java: ########## @@ -432,4 +450,25 @@ private static boolean isPerfectSquare(int n) { return root * root == n; } + /** + * Get the {@link StopCriteria} associated with this Trainer. + * @param trainingConfig - If {@link TrainingConfiguration} is null or + * {@link TrainingConfiguration#stopCriteria()} is null then return a default {@link StopCriteria}. + */ Review Comment: Pls link with `Trainer` interface via `{@link ..}` Pls add specific return value JavaDoc. Pls format `null` as `{@code null}`, twice here. ########## opennlp-tools/src/main/java/opennlp/tools/monitoring/DefaultTrainingProgressMonitor.java: ########## @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.monitoring; + + +import java.util.LinkedList; +import java.util.List; +import java.util.Objects; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static opennlp.tools.monitoring.StopCriteria.FINISHED; + +/** + * An implementation of {@link TrainingProgressMonitor} which publishes model training progress to the chosen Review Comment: Better: "The default implementation of ..." ########## opennlp-tools/src/main/java/opennlp/tools/monitoring/DefaultTrainingProgressMonitor.java: ########## @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.monitoring; + + +import java.util.LinkedList; +import java.util.List; +import java.util.Objects; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static opennlp.tools.monitoring.StopCriteria.FINISHED; + +/** + * An implementation of {@link TrainingProgressMonitor} which publishes model training progress to the chosen + * logging destination. + */ +public class DefaultTrainingProgressMonitor implements TrainingProgressMonitor { + + private static final Logger logger = LoggerFactory.getLogger(DefaultTrainingProgressMonitor.class); + + /** + * Keeps a track whether training was already finished because StopCriteria was met. Review Comment: Pls, use `{@link ...}` for `StopCriteria` ########## opennlp-tools/src/main/java/opennlp/tools/monitoring/TrainingProgressMonitor.java: ########## @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.monitoring; + +/** + * An interface to capture Training Progress of a {@link opennlp.tools.ml.model.AbstractModel}. + */ + +public interface TrainingProgressMonitor { + + /** + * Captures the Iteration progress. + * + * @param iteration The completed iteration number. + * @param numberCorrectEvents Number of correctly predicted events in this iteration. + * @param totalEvents Total count of events processed in this iteration. + * @param measure Measure used to quantify training success. + * @param measureValue measure value corresponding to the applicable {@link TrainingMeasure}. + */ + void finishedIteration(int iteration, int numberCorrectEvents, int totalEvents, + TrainingMeasure measure, double measureValue); + + /** + * Captures the Training completion progress. + * + * @param iterations Total number of iterations configured for training. + * @param stopCriteria Exit criteria for training. Review Comment: Pls {@link ...} here. ########## opennlp-tools/src/main/java/opennlp/tools/monitoring/StopCriteria.java: ########## @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.monitoring; + +import java.util.function.Predicate; + +/** + * Stop-criteria for a {@link opennlp.tools.ml.model.AbstractModel} training . Review Comment: Pls, clear extra whitespace at end of sentence. Better (?): ``` * Stop criteria for the training of models. If the predicate is met, the training is aborted. * * @see Predicate * @see opennlp.tools.ml.model.AbstractModel ``` ########## opennlp-tools/src/main/java/opennlp/tools/monitoring/IterDeltaAccuracyUnderTolerance.java: ########## @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.monitoring; + +import opennlp.tools.ml.perceptron.PerceptronTrainer; +import opennlp.tools.util.TrainingParameters; + +/** + * A {@link StopCriteria} implementation to identify whether the absolute + * difference between the training accuracy of current and previous iteration is under the defined tolerance. + */ +public class IterDeltaAccuracyUnderTolerance implements StopCriteria { Review Comment: Could require a change towards the idea to make use of generic `<T extends Number>` in the separate comment. ########## opennlp-tools/src/main/java/opennlp/tools/monitoring/TrainingProgressMonitor.java: ########## @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.monitoring; + +/** + * An interface to capture Training Progress of a {@link opennlp.tools.ml.model.AbstractModel}. Review Comment: lower-cased for Training + Progress, as those a simple nouns here. ########## opennlp-tools/src/main/java/opennlp/tools/monitoring/StopCriteria.java: ########## @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.monitoring; + +import java.util.function.Predicate; + +/** + * Stop-criteria for a {@link opennlp.tools.ml.model.AbstractModel} training . + */ +public interface StopCriteria extends Predicate<Double> { + + String FINISHED = "Training Finished after completing %s Iterations successfully."; + + /** + * @return - A detailed message when the stop-criteria is satisfied during model training. Review Comment: Pls avoid using "-" as a start for the return value JavaDoc. Check it for other occurrences. ########## opennlp-tools/src/main/java/opennlp/tools/monitoring/TrainingProgressMonitor.java: ########## @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.monitoring; + +/** + * An interface to capture Training Progress of a {@link opennlp.tools.ml.model.AbstractModel}. + */ + +public interface TrainingProgressMonitor { + + /** + * Captures the Iteration progress. + * + * @param iteration The completed iteration number. + * @param numberCorrectEvents Number of correctly predicted events in this iteration. + * @param totalEvents Total count of events processed in this iteration. + * @param measure Measure used to quantify training success. + * @param measureValue measure value corresponding to the applicable {@link TrainingMeasure}. + */ + void finishedIteration(int iteration, int numberCorrectEvents, int totalEvents, + TrainingMeasure measure, double measureValue); + + /** + * Captures the Training completion progress. Review Comment: lower-cased for "Training" here. ########## opennlp-tools/src/main/java/opennlp/tools/monitoring/TrainingProgressMonitor.java: ########## @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.monitoring; + +/** + * An interface to capture Training Progress of a {@link opennlp.tools.ml.model.AbstractModel}. + */ + +public interface TrainingProgressMonitor { + + /** + * Captures the Iteration progress. + * + * @param iteration The completed iteration number. + * @param numberCorrectEvents Number of correctly predicted events in this iteration. + * @param totalEvents Total count of events processed in this iteration. + * @param measure Measure used to quantify training success. + * @param measureValue measure value corresponding to the applicable {@link TrainingMeasure}. + */ + void finishedIteration(int iteration, int numberCorrectEvents, int totalEvents, + TrainingMeasure measure, double measureValue); + + /** + * Captures the Training completion progress. + * + * @param iterations Total number of iterations configured for training. + * @param stopCriteria Exit criteria for training. + */ + void finishedTraining(int iterations, StopCriteria stopCriteria); + + /** + * Checks whether the training has finished. * Review Comment: Pls remove extra "*" at the end of this line. ########## opennlp-tools/src/test/java/opennlp/tools/ml/MockSequenceTrainer.java: ########## @@ -34,5 +35,10 @@ public AbstractModel train(SequenceStream<Event> events) { @Override public void init(TrainingParameters trainParams, Map<String, String> reportMap) { } - + + @Override + public void init(TrainingParameters trainParams, Map<String, String> reportMap + , TrainingConfiguration config) { Review Comment: pls move `,` to line above (40), looks odd. ########## opennlp-tools/src/test/java/opennlp/tools/monitoring/IterDeltaAccuracyUnderToleranceTest.java: ########## @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.monitoring; + +import java.util.Map; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; + +import opennlp.tools.util.TrainingParameters; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class IterDeltaAccuracyUnderToleranceTest { + + private StopCriteria stopCriteria; + + @BeforeEach + public void setup() { + stopCriteria = new IterDeltaAccuracyUnderTolerance(new TrainingParameters(Map.of("Tolerance", + .00002))); + } + + @ParameterizedTest() + @CsvSource( {"0.01,false", "-0.01,false", "0.00001,true", "-0.00001,true"}) Review Comment: That's a nice approach! 👍 ########## opennlp-tools/src/main/java/opennlp/tools/util/TrainingConfiguration.java: ########## @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.util; + +import opennlp.tools.monitoring.StopCriteria; +import opennlp.tools.monitoring.TrainingProgressMonitor; + +/** + * Training Configuration used in {@link opennlp.tools.ml.model.AbstractModel} training. Review Comment: lower-cased for `Configuration` here. ########## opennlp-tools/src/test/java/opennlp/tools/monitoring/DefaultTrainingProgressMonitorTest.java: ########## @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.monitoring; + +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import ch.qos.logback.classic.Level; +import ch.qos.logback.classic.Logger; +import ch.qos.logback.classic.spi.ILoggingEvent; +import ch.qos.logback.core.read.ListAppender; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.slf4j.LoggerFactory; + +import opennlp.tools.util.TrainingParameters; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class DefaultTrainingProgressMonitorTest { + + private static final String LOGGER_NAME = "opennlp"; + private TrainingProgressMonitor progressMonitor; Review Comment: Pls reorganize fields and group static fields "en bloc" and non-static fields below. ########## opennlp-tools/src/test/java/opennlp/tools/monitoring/IterDeltaAccuracyUnderToleranceTest.java: ########## @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.monitoring; + +import java.util.Map; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; + +import opennlp.tools.util.TrainingParameters; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class IterDeltaAccuracyUnderToleranceTest { + + private StopCriteria stopCriteria; + + @BeforeEach + public void setup() { + stopCriteria = new IterDeltaAccuracyUnderTolerance(new TrainingParameters(Map.of("Tolerance", + .00002))); + } + + @ParameterizedTest() Review Comment: Remove extra brackets here, not required when no arguments are specified. ########## pom.xml: ########## @@ -180,6 +180,7 @@ <!-- Dependency versions --> <junit.version>5.12.1</junit.version> <junit5-system-exit.version>2.0.2</junit5-system-exit.version> + <assertj-core.version>3.27.3</assertj-core.version> Review Comment: Avoid it, see comment(s) above. ########## opennlp-tools/src/test/java/opennlp/tools/monitoring/LogLikelihoodThresholdBreachedTest.java: ########## @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.monitoring; + +import java.util.Map; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; + +import opennlp.tools.util.TrainingParameters; + +import static opennlp.tools.ml.maxent.GISTrainer.LOG_LIKELIHOOD_THRESHOLD_PARAM; +import static org.junit.jupiter.api.Assertions.assertEquals; + + +class LogLikelihoodThresholdBreachedTest { + + private StopCriteria stopCriteria; + + @BeforeEach + public void setup() { + stopCriteria = new LogLikelihoodThresholdBreached( + new TrainingParameters(Map.of(LOG_LIKELIHOOD_THRESHOLD_PARAM,5.))); + } + + @ParameterizedTest() Review Comment: Remove extra brackets here, not required when no arguments are specified. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@opennlp.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org