mawiesne commented on code in PR #492: URL: https://github.com/apache/opennlp/pull/492#discussion_r1116010408
########## opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascDocumentStream.java: ########## @@ -140,8 +143,7 @@ public MascDocumentStream(File mascCorpusDirectory, documents.add(MascDocument.parseDocument(hdrFilePath, f_primary, f_seg, f_penn, f_s, f_ne)); } catch (IOException e) { - System.err.println("Failed to parse the file: " + hdrFilePath); - System.err.println('\t' + e.getMessage()); + logger.warn("Failed to parse the file: {}", hdrFilePath, e); Review Comment: This might be a `logger.error(...)` statement, even though it's inside a loop here. Reason: exception (handling) involved. Open for other thoughts. ########## opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascDocument.java: ########## @@ -353,7 +353,7 @@ private void addPennTags(Map<String, Map<Integer, ?>> tagMaps) throws IOExceptio //Check that all tokens have at least one quark. for (Map.Entry<Integer, int[]> token : tokenToQuarks.entrySet()) { if (token.getValue().length == 0) { - System.err.println("[ERROR] Token without quarks: " + token.getKey()); + logger.error("Token without quarks: {}", token.getKey()); Review Comment: This should better be a `logger.warn(...)` statement, I guess. Text has informative character and no exception has occurred. ########## opennlp-tools/src/main/java/opennlp/tools/log/LogPrintStream.java: ########## @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package opennlp.tools.log; + +import java.io.PrintStream; +import java.util.Objects; + +import org.slf4j.Logger; +import org.slf4j.event.Level; + +import opennlp.tools.commons.Internal; + +/** + * This class serves as an adapter for a {@link Logger} used within a {@link PrintStream}. + */ +@Internal +public class LogPrintStream extends PrintStream { + + private final Logger logger; + private final Level level; + + /** + * Creates a {@link LogPrintStream} for the given {@link Logger} Review Comment: Please add a `.` at the end of the sentence. ########## opennlp-distr/pom.xml: ########## @@ -54,6 +54,17 @@ <groupId>org.apache.opennlp</groupId> <artifactId>opennlp-brat-annotator</artifactId> </dependency> + <!-- ship the dist with a logging impl for cli users --> + <dependency> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-api</artifactId> + </dependency> + <dependency> + <groupId>org.apache.logging.log4j</groupId> + <artifactId>log4j-slf4j-impl</artifactId> + <version>2.19.0</version> Review Comment: Can we make this version string a Maven property in the parent pom? ########## opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/ParallelNegLogLikelihood.java: ########## @@ -149,7 +154,7 @@ private void computeInParallel(double[] x, Class<? extends ComputeTask> taskClas future.get(); } catch (Exception e) { - e.printStackTrace(); + logger.warn(e.getLocalizedMessage(), e); Review Comment: `logger.error(...)` here instead? ########## opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java: ########## @@ -189,9 +191,9 @@ public void init(TrainingParameters trainingParameters, Map<String, String> repo // Just in case someone is using "llthreshold" instead of LLThreshold... // this warning can be removed in a future version of OpenNLP. if (trainingParameters.getDoubleParameter(OLD_LL_THRESHOLD_PARAM, -1.) > 0. ) { - display("WARNING: the training parameter: " + OLD_LL_THRESHOLD_PARAM + - " has been deprecated. Please use " + - LOG_LIKELIHOOD_THRESHOLD_DEFAULT + " instead"); + logger.warn("The training parameter: " + + "{} has been deprecated. Please use {} instead", Review Comment: Please add a `.` at the end of the sentence, that is after "instead". ########## opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java: ########## @@ -407,9 +399,9 @@ public GISModel trainModel(int iterations, DataIndexer di, Prior modelPrior, int prior.setLabels(outcomeLabels, predLabels); numPreds = predLabels.length; - display("\tNumber of Event Tokens: " + numUniqueEvents + "\n"); - display("\t Number of Outcomes: " + numOutcomes + "\n"); - display("\t Number of Predicates: " + numPreds + "\n"); + logger.info("Number of Event Tokens: {}", numUniqueEvents); Review Comment: C/Should these three messages be considered a group? IMHO, it is an atomic block of information that makes sense as "en bloc", eg. for copying the results of a model training for purposes of documentation. I'm open of the multi-line approach, but would like to get @kinow's or @jzonthemtn's opinion here. ########## opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronTrainer.java: ########## @@ -265,25 +270,25 @@ public AbstractModel trainModel(int iterations, DataIndexer di, int cutoff, bool numPreds = predLabels.length; numOutcomes = outcomeLabels.length; - display("done.\n"); + logger.info("done."); - display("\tNumber of Event Tokens: " + numUniqueEvents + "\n"); - display("\t Number of Outcomes: " + numOutcomes + "\n"); - display("\t Number of Predicates: " + numPreds + "\n"); + logger.info("Number of Event Tokens: {}", numUniqueEvents); Review Comment: See previous comment / seeking for feedback on "en bloc" infos. ########## opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/SimplePerceptronSequenceTrainer.java: ########## @@ -195,11 +199,12 @@ public AbstractModel trainModel(int iterations, SequenceStream<Event> sequenceSt updates = new int[numPreds][numOutcomes][3]; } - display("done.\n"); + logger.info("done."); + + logger.info("Number of Event Tokens: {}", numEvents); Review Comment: See previous comment / seeking for feedback on "en bloc" infos. ########## opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java: ########## @@ -489,13 +481,13 @@ public GISModel trainModel(int iterations, DataIndexer di, Prior modelPrior, int } } - display("...done.\n"); + logger.info("...done."); /* Find the parameters *****/ if (threads == 1) { - display("Computing model parameters ...\n"); + logger.info("Computing model parameters ..."); } else { - display("Computing model parameters in " + threads + " threads...\n"); + logger.info("Computing model parameters in {}} threads...", threads); Review Comment: `{}}` is wrong and should read `{}`. => remove extra `}`. ########## opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesTrainer.java: ########## @@ -228,7 +233,7 @@ private double trainingStats(EvalParameters evalParams) { } } double trainingAccuracy = (double) numCorrect / numEvents; - display("Stats: (" + numCorrect + "/" + numEvents + ") " + trainingAccuracy + "\n"); + logger.info("Stats: (" + numCorrect + "/" + numEvents + ") " + trainingAccuracy); Review Comment: Can we make use of `{}` logger value injection here? ########## opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/SimplePerceptronSequenceTrainer.java: ########## @@ -235,14 +240,8 @@ public AbstractModel trainModel(int iterations, SequenceStream<Event> sequenceSt } private void findParameters(int iterations) throws IOException { - display("Performing " + iterations + " iterations.\n"); + logger.info("Performing " + iterations + " iterations.\n"); Review Comment: Value injection via `{}` as well? ########## opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java: ########## @@ -319,32 +323,25 @@ public void insert(final Parse constituent) { int pn = parts.size(); for (; pi < pn; pi++) { Parse subPart = parts.get(pi); - //System.err.println("Parse.insert:con="+constituent+" sp["+pi+"] "+subPart+" "+subPart.getType()); Span sp = subPart.span; if (sp.getStart() >= ic.getEnd()) { break; } // constituent contains subPart else if (ic.contains(sp)) { - //System.err.println("Parse.insert:con contains subPart"); parts.remove(pi); pi--; constituent.parts.add(subPart); subPart.setParent(constituent); - //System.err.println("Parse.insert: "+subPart.hashCode()+" -> "+subPart.getParent().hashCode()); Review Comment: This commented sysout could/might be valuable for debugging infos ########## opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java: ########## @@ -319,32 +323,25 @@ public void insert(final Parse constituent) { int pn = parts.size(); for (; pi < pn; pi++) { Parse subPart = parts.get(pi); - //System.err.println("Parse.insert:con="+constituent+" sp["+pi+"] "+subPart+" "+subPart.getType()); Span sp = subPart.span; if (sp.getStart() >= ic.getEnd()) { break; } // constituent contains subPart else if (ic.contains(sp)) { - //System.err.println("Parse.insert:con contains subPart"); Review Comment: This commented sysout could/might be valuable for debugging infos ########## opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java: ########## @@ -319,32 +323,25 @@ public void insert(final Parse constituent) { int pn = parts.size(); for (; pi < pn; pi++) { Parse subPart = parts.get(pi); - //System.err.println("Parse.insert:con="+constituent+" sp["+pi+"] "+subPart+" "+subPart.getType()); Span sp = subPart.span; if (sp.getStart() >= ic.getEnd()) { break; } // constituent contains subPart else if (ic.contains(sp)) { - //System.err.println("Parse.insert:con contains subPart"); parts.remove(pi); pi--; constituent.parts.add(subPart); subPart.setParent(constituent); - //System.err.println("Parse.insert: "+subPart.hashCode()+" -> "+subPart.getParent().hashCode()); pn = parts.size(); } else if (sp.contains(ic)) { - //System.err.println("Parse.insert:subPart contains con"); Review Comment: This commented sysout could/might be valuable for debugging infos ########## opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java: ########## @@ -362,14 +359,10 @@ public void show(StringBuffer sb) { if (!type.equals(AbstractBottomUpParser.TOK_NODE)) { sb.append("("); sb.append(type).append(" "); - //System.out.print(label+" "); Review Comment: These commented sysouts could/might be valuable for debugging infos ########## opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java: ########## @@ -146,11 +150,9 @@ private Parser(MaxentModel buildModel, MaxentModel checkModel, POSTagger tagger, for (int boi = 0, bon = buildModel.getNumOutcomes(); boi < bon; boi++) { String outcome = buildModel.getOutcome(boi); if (outcome.startsWith(START)) { - //System.err.println("startMap "+outcome+"->"+outcome.substring(START.length())); Review Comment: This commented sysout could/might be valuable for debugging infos ########## opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java: ########## @@ -444,33 +450,33 @@ else if (1 - cprobs[completeIndex] > probMass) { //just incomplete advances if (cprobs[completeIndex] > probMass) { setComplete(updatedNode); newParse2.addProb(StrictMath.log(cprobs[completeIndex])); - if (debugOn) System.out.println("Only advancing complete node"); + if (logger.isDebugEnabled()) logger.debug("Only advancing complete node"); } else if (1 - cprobs[completeIndex] > probMass) { setIncomplete(updatedNode); newParse2.addProb(StrictMath.log(1 - cprobs[completeIndex])); - if (debugOn) System.out.println("Only advancing incomplete node"); + if (logger.isDebugEnabled()) logger.debug("Only advancing incomplete node"); } else { setComplete(updatedNode); Parse newParse3 = newParse2.cloneRoot(updatedNode, originalZeroIndex); newParse3.addProb(StrictMath.log(cprobs[completeIndex])); newParsesList.add(newParse3); setIncomplete(updatedNode); newParse2.addProb(StrictMath.log(1 - cprobs[completeIndex])); - if (debugOn) - System.out.println("Advancing both complete and incomplete nodes; c=" + if (logger.isDebugEnabled()) + logger.debug("Advancing both complete and incomplete nodes; c=" Review Comment: We can use {} injection for the `c` parameter here. ########## opennlp-tools/src/main/java/opennlp/tools/tokenize/TokSpanEventStream.java: ########## @@ -134,7 +138,7 @@ else if (tokens[ti].getEnd() < cSpan.getStart()) { //keep looking } else { - System.out.println("Bad training token: " + tokens[ti] + " cand: " + cSpan + + logger.info("Bad training token: " + tokens[ti] + " cand: " + cSpan + Review Comment: Should this be `logger.warn(...)` instead? The message as a slightly alerting start. ########## opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java: ########## @@ -221,7 +224,7 @@ protected void addParseEvents(List<Event> parseEvents, Parse[] chunks) { /* Right frontier consisting of partially-built nodes based on current state of the parse.*/ List<Parse> currentRightFrontier = Parser.getRightFrontier(currentChunks[0],punctSet); if (currentRightFrontier.size() != rightFrontier.size()) { - System.err.println("fontiers mis-aligned: " + currentRightFrontier.size() + " != " + logger.error("fontiers mis-aligned: " + currentRightFrontier.size() + " != " Review Comment: Missing "r" in Logger message. `fontiers` -> `Frontiers`... ########## opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesTrainer.java: ########## @@ -145,17 +150,17 @@ public AbstractModel trainModel(DataIndexer di) { numPreds = predLabels.length; numOutcomes = outcomeLabels.length; - display("done.\n"); + logger.info("done."); - display("\tNumber of Event Tokens: " + numUniqueEvents + "\n"); - display("\t Number of Outcomes: " + numOutcomes + "\n"); - display("\t Number of Predicates: " + numPreds + "\n"); + logger.info("Number of Event Tokens: {}", numUniqueEvents); Review Comment: See previous comment / seeking for feedback on "en bloc" infos. ########## opennlp-tools/src/main/java/opennlp/tools/util/DownloadUtil.java: ########## @@ -215,7 +220,7 @@ private String fetchPageIndex() { html.append(line); } } catch (IOException e) { - System.err.println("Could not read page index from " + indexUrl); + logger.error("Could not read page index from {}", indexUrl); Review Comment: please add `, e` so the logger has a chance to print the stack trace accordingly. ########## opennlp-tools/src/main/java/opennlp/tools/tokenize/lang/en/TokenSampleStream.java: ########## @@ -118,7 +121,7 @@ public void remove() { } private static void usage() { - System.err.println("TokenSampleStream [-spans] < in"); - System.err.println("Where in is a space delimited list of tokens."); + logger.error("TokenSampleStream [-spans] < in"); Review Comment: This should be an info message, as it informs on the usage. => `logger.info(...)` for this msg and the one below. ########## opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronTrainer.java: ########## @@ -391,7 +394,7 @@ private MutableContext[] findParameters(int iterations, boolean useAverage) { if (StrictMath.abs(prevAccuracy1 - trainingAccuracy) < tolerance && StrictMath.abs(prevAccuracy2 - trainingAccuracy) < tolerance && StrictMath.abs(prevAccuracy3 - trainingAccuracy) < tolerance) { - display("Stopping: change in training set accuracy less than " + tolerance + "\n"); + logger.info("Stopping: change in training set accuracy less than {}", tolerance); Review Comment: This could be a warning => `logger.warn(...)` ########## opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModelReader.java: ########## @@ -94,8 +98,8 @@ public AbstractModel constructModel() throws IOException { public void checkModelType() throws IOException { String modelType = readUTF(); if (!modelType.equals("NaiveBayes")) - System.out.println("Error: attempting to load a " + modelType + + logger.error("Error: attempting to load a {}" + Review Comment: The pre-text of that logging message "Error:" can be removed now. ########## opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronTrainer.java: ########## @@ -439,22 +442,10 @@ private double trainingStats(EvalParameters evalParams) { } } double trainingAccuracy = (double) numCorrect / numEvents; - display("Stats: (" + numCorrect + "/" + numEvents + ") " + trainingAccuracy + "\n"); + logger.info("Stats: (" + numCorrect + "/" + numEvents + ") " + trainingAccuracy); Review Comment: Can we make use of {} logger value injection here? ########## opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java: ########## @@ -319,32 +323,25 @@ public void insert(final Parse constituent) { int pn = parts.size(); for (; pi < pn; pi++) { Parse subPart = parts.get(pi); - //System.err.println("Parse.insert:con="+constituent+" sp["+pi+"] "+subPart+" "+subPart.getType()); Review Comment: This commented sysout could/might be valuable for debugging infos - Should we keep it and bring it to debug level? @kinow / @jzonthemtn wdyt? ########## opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java: ########## @@ -319,32 +323,25 @@ public void insert(final Parse constituent) { int pn = parts.size(); for (; pi < pn; pi++) { Parse subPart = parts.get(pi); - //System.err.println("Parse.insert:con="+constituent+" sp["+pi+"] "+subPart+" "+subPart.getType()); Span sp = subPart.span; if (sp.getStart() >= ic.getEnd()) { break; } // constituent contains subPart else if (ic.contains(sp)) { - //System.err.println("Parse.insert:con contains subPart"); parts.remove(pi); pi--; constituent.parts.add(subPart); subPart.setParent(constituent); - //System.err.println("Parse.insert: "+subPart.hashCode()+" -> "+subPart.getParent().hashCode()); pn = parts.size(); } else if (sp.contains(ic)) { - //System.err.println("Parse.insert:subPart contains con"); subPart.insert(constituent); return; } } - //System.err.println("Parse.insert:adding con="+constituent+" to "+this); Review Comment: This commented sysout could/might be valuable for debugging infos ########## opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java: ########## @@ -146,11 +150,9 @@ private Parser(MaxentModel buildModel, MaxentModel checkModel, POSTagger tagger, for (int boi = 0, bon = buildModel.getNumOutcomes(); boi < bon; boi++) { String outcome = buildModel.getOutcome(boi); if (outcome.startsWith(START)) { - //System.err.println("startMap "+outcome+"->"+outcome.substring(START.length())); startTypeMap.put(outcome, outcome.substring(START.length())); } else if (outcome.startsWith(CONT)) { - //System.err.println("contMap "+outcome+"->"+outcome.substring(CONT.length())); Review Comment: This commented sysout could/might be valuable for debugging infos -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@opennlp.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org