Author: joern
Date: Sat May 21 11:19:54 2016
New Revision: 1744905

URL: http://svn.apache.org/viewvc?rev=1744905&view=rev
Log:
OPENNLP-848 Print training data summary at end of training.

Added:
    
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/NameSampleCountersStream.java
   (with props)
Modified:
    
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java

Added: 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/NameSampleCountersStream.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/NameSampleCountersStream.java?rev=1744905&view=auto
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/NameSampleCountersStream.java
 (added)
+++ 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/NameSampleCountersStream.java
 Sat May 21 11:19:54 2016
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.cmdline.namefind;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import opennlp.tools.namefind.NameSample;
+import opennlp.tools.util.FilterObjectStream;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.Span;
+
+/**
+ * Counts tokens, sentences and names by type
+ */
+public class NameSampleCountersStream
+    extends FilterObjectStream<NameSample, NameSample> {
+
+  private int sentenceCount;
+  private int tokenCount;
+  
+  private Map<String, Integer> nameCounters = new HashMap<>();
+  
+  protected NameSampleCountersStream(ObjectStream<NameSample> samples) {
+    super(samples);
+  }
+
+  @Override
+  public NameSample read() throws IOException {
+    
+    NameSample sample = samples.read();
+    
+    if (sample != null) {
+      sentenceCount++;
+      tokenCount += sample.getSentence().length;
+      
+      for (Span nameSpan : sample.getNames()) {
+        Integer nameCounter = nameCounters.get(nameSpan.getType());
+        
+        if (nameCounter == null) {
+          nameCounter = 0;
+        }
+        
+        nameCounters.put(nameSpan.getType(), nameCounter + 1);
+      }
+    }
+    
+    return sample;
+  }
+  
+  @Override
+  public void reset() throws IOException, UnsupportedOperationException {
+    super.reset();
+    
+    sentenceCount = 0;
+    tokenCount = 0;
+    nameCounters = new HashMap<>();
+  }
+  
+  public int getSentenceCount() {
+    return sentenceCount;
+  }
+  
+  public int getTokenCount() {
+    return tokenCount;
+  }
+  
+  public Map<String, Integer> getNameCounters() {
+    return Collections.unmodifiableMap(nameCounters);
+  }
+
+  public void printSummary() {
+    System.out.println("Training data summary:");
+    System.out.println("#Sentences: " + getSentenceCount());
+    System.out.println("#Tokens: " + getTokenCount());
+    
+    int totalNames = 0;
+    for (Map.Entry<String, Integer> counter : getNameCounters().entrySet()) {
+      System.out.println("#" + counter.getKey() + " entities: " + 
counter.getValue());
+      totalNames += counter.getValue();
+    }
+  }
+}

Propchange: 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/NameSampleCountersStream.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java?rev=1744905&r1=1744904&r2=1744905&view=diff
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
 (original)
+++ 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
 Sat May 21 11:19:54 2016
@@ -224,6 +224,9 @@ public final class TokenNameFinderTraine
       throw new TerminateToolException(-1, e.getMessage(), e);
     }
 
+    NameSampleCountersStream counters = new 
NameSampleCountersStream(sampleStream);
+    sampleStream = counters;
+    
     TokenNameFinderModel model;
     try {
       model = opennlp.tools.namefind.NameFinderME.train(
@@ -241,7 +244,12 @@ public final class TokenNameFinderTraine
         // sorry that this can fail
       }
     }
-
+    
+    System.out.println();
+    counters.printSummary();
+    System.out.println();
+    
     CmdLineUtil.writeModel("name finder", modelOutFile, model);
+    
   }
 }


Reply via email to