Author: joern
Date: Sat May 21 11:19:54 2016
New Revision: 1744905
URL: http://svn.apache.org/viewvc?rev=1744905&view=rev
Log:
OPENNLP-848 Print training data summary at end of training.
Added:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/NameSampleCountersStream.java
(with props)
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
Added:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/NameSampleCountersStream.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/NameSampleCountersStream.java?rev=1744905&view=auto
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/NameSampleCountersStream.java
(added)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/NameSampleCountersStream.java
Sat May 21 11:19:54 2016
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.cmdline.namefind;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import opennlp.tools.namefind.NameSample;
+import opennlp.tools.util.FilterObjectStream;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.Span;
+
+/**
+ * Counts tokens, sentences and names by type
+ */
+public class NameSampleCountersStream
+ extends FilterObjectStream<NameSample, NameSample> {
+
+ private int sentenceCount;
+ private int tokenCount;
+
+ private Map<String, Integer> nameCounters = new HashMap<>();
+
+ protected NameSampleCountersStream(ObjectStream<NameSample> samples) {
+ super(samples);
+ }
+
+ @Override
+ public NameSample read() throws IOException {
+
+ NameSample sample = samples.read();
+
+ if (sample != null) {
+ sentenceCount++;
+ tokenCount += sample.getSentence().length;
+
+ for (Span nameSpan : sample.getNames()) {
+ Integer nameCounter = nameCounters.get(nameSpan.getType());
+
+ if (nameCounter == null) {
+ nameCounter = 0;
+ }
+
+ nameCounters.put(nameSpan.getType(), nameCounter + 1);
+ }
+ }
+
+ return sample;
+ }
+
+ @Override
+ public void reset() throws IOException, UnsupportedOperationException {
+ super.reset();
+
+ sentenceCount = 0;
+ tokenCount = 0;
+ nameCounters = new HashMap<>();
+ }
+
+ public int getSentenceCount() {
+ return sentenceCount;
+ }
+
+ public int getTokenCount() {
+ return tokenCount;
+ }
+
+ public Map<String, Integer> getNameCounters() {
+ return Collections.unmodifiableMap(nameCounters);
+ }
+
+ public void printSummary() {
+ System.out.println("Training data summary:");
+ System.out.println("#Sentences: " + getSentenceCount());
+ System.out.println("#Tokens: " + getTokenCount());
+
+ int totalNames = 0;
+ for (Map.Entry<String, Integer> counter : getNameCounters().entrySet()) {
+ System.out.println("#" + counter.getKey() + " entities: " +
counter.getValue());
+ totalNames += counter.getValue();
+ }
+ }
+}
Propchange:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/NameSampleCountersStream.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java?rev=1744905&r1=1744904&r2=1744905&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
Sat May 21 11:19:54 2016
@@ -224,6 +224,9 @@ public final class TokenNameFinderTraine
throw new TerminateToolException(-1, e.getMessage(), e);
}
+ NameSampleCountersStream counters = new
NameSampleCountersStream(sampleStream);
+ sampleStream = counters;
+
TokenNameFinderModel model;
try {
model = opennlp.tools.namefind.NameFinderME.train(
@@ -241,7 +244,12 @@ public final class TokenNameFinderTraine
// sorry that this can fail
}
}
-
+
+ System.out.println();
+ counters.printSummary();
+ System.out.println();
+
CmdLineUtil.writeModel("name finder", modelOutFile, model);
+
}
}