Author: joern
Date: Fri Apr 17 14:09:08 2015
New Revision: 1674316

URL: http://svn.apache.org/r1674316
Log:
OPENNLP-769 First draft of evaluation tests using OntoNotes4

Added:
    
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java
    
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java
    
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java
Modified:
    
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
    
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesParseSampleStream.java
    
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserCrossEvaluator.java

Modified: 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java?rev=1674316&r1=1674315&r2=1674316&view=diff
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
 (original)
+++ 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
 Fri Apr 17 14:09:08 2015
@@ -44,7 +44,7 @@ public class OntoNotesNameSampleStream e
 
   private List<NameSample> nameSamples = new LinkedList<NameSample>();
 
-  protected OntoNotesNameSampleStream(ObjectStream<String> samples) {
+  public OntoNotesNameSampleStream(ObjectStream<String> samples) {
     super(samples);
 
     Map<String, String> tokenConversionMap = new HashMap<String, String>();

Modified: 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesParseSampleStream.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesParseSampleStream.java?rev=1674316&r1=1674315&r2=1674316&view=diff
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesParseSampleStream.java
 (original)
+++ 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesParseSampleStream.java
 Fri Apr 17 14:09:08 2015
@@ -27,7 +27,7 @@ import opennlp.tools.util.ObjectStream;
 // Should be possible with this one, to train the parser and pos tagger!
 public class OntoNotesParseSampleStream extends FilterObjectStream<String, 
Parse> {
 
-  protected OntoNotesParseSampleStream(ObjectStream<String> samples) {
+  public OntoNotesParseSampleStream(ObjectStream<String> samples) {
     super(samples);
   }
 

Modified: 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserCrossEvaluator.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserCrossEvaluator.java?rev=1674316&r1=1674315&r2=1674316&view=diff
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserCrossEvaluator.java
 (original)
+++ 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserCrossEvaluator.java
 Fri Apr 17 14:09:08 2015
@@ -38,7 +38,7 @@ public class ParserCrossEvaluator {
 
   private ParserEvaluationMonitor[] monitors;
 
-  ParserCrossEvaluator(String languageCode, TrainingParameters params, 
HeadRules rules, ParserType parserType,
+  public ParserCrossEvaluator(String languageCode, TrainingParameters params, 
HeadRules rules, ParserType parserType,
       ParserEvaluationMonitor... monitors) {
     this.languageCode = languageCode;
     this.params = params;

Added: 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java?rev=1674316&view=auto
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java
 (added)
+++ 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java
 Fri Apr 17 14:09:08 2015
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.eval;
+
+import java.io.File;
+import java.io.FileFilter;
+import java.io.IOException;
+import java.nio.charset.Charset;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.formats.DirectorySampleStream;
+import opennlp.tools.formats.convert.FileToStringSampleStream;
+import opennlp.tools.formats.ontonotes.OntoNotesNameSampleStream;
+import opennlp.tools.namefind.NameSample;
+import opennlp.tools.namefind.NameSampleTypeFilter;
+import opennlp.tools.namefind.TokenNameFinderCrossValidator;
+import opennlp.tools.namefind.TokenNameFinderFactory;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
+import opennlp.tools.util.model.ModelUtil;
+
+public class OntoNotes4NameFinderEval {
+
+  private static void crossEval(TrainingParameters params, String type, double 
expectedScore)
+      throws IOException {
+
+    ObjectStream<File> documentStream = new DirectorySampleStream(new File(
+        EvalUtil.getOpennlpDataDir(), "ontonotes4/data/files/data/english"), 
new FileFilter() {
+
+      public boolean accept(File file) {
+        if (file.isFile()) {
+          return file.getName().endsWith(".name");
+        }
+
+        return file.isDirectory();
+      }
+    }, true);
+
+    ObjectStream<NameSample> samples = new OntoNotesNameSampleStream(new 
FileToStringSampleStream(
+        documentStream, Charset.forName("UTF-8")));
+
+    TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("en", 
 null,
+        params, new TokenNameFinderFactory());
+
+    if (type != null) {
+      samples = new NameSampleTypeFilter(new String[]{type}, samples);
+    }
+
+    cv.evaluate(samples, 10);
+
+    Assert.assertEquals(expectedScore, cv.getFMeasure().getFMeasure(), 0.001d);
+  }
+
+  @Test
+  public void evalEnglishPersonNameFinder() throws IOException {
+    TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
+    crossEval(params, "person", 0.8269650989441869d);
+  }
+
+  // organization
+  // location
+  // date
+  // duration
+  // all types
+
+  @Test
+  public void evalAllTypesNameFinder() throws IOException {
+    TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
+    crossEval(params, null, 0.8269650989441869d);
+  }
+}

Added: 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java?rev=1674316&view=auto
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java
 (added)
+++ 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java
 Fri Apr 17 14:09:08 2015
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.eval;
+
+import java.io.File;
+import java.io.FileFilter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.charset.Charset;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.formats.DirectorySampleStream;
+import opennlp.tools.formats.convert.FileToStringSampleStream;
+import opennlp.tools.formats.ontonotes.DocumentToLineStream;
+import opennlp.tools.formats.ontonotes.OntoNotesParseSampleStream;
+import opennlp.tools.parser.HeadRules;
+import opennlp.tools.parser.ParserCrossEvaluator;
+import opennlp.tools.parser.ParserType;
+import opennlp.tools.parser.lang.en.HeadRulesTest;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
+import opennlp.tools.util.model.ModelUtil;
+
+public class OntoNotes4ParserEval {
+
+  private static void crossEval(TrainingParameters params, HeadRules rules, 
double expectedScore)
+      throws IOException {
+
+    ObjectStream<File> documentStream = new DirectorySampleStream(new File(
+        EvalUtil.getOpennlpDataDir(), "ontonotes4/data/files/data/english"), 
new FileFilter() {
+
+      public boolean accept(File file) {
+        if (file.isFile()) {
+          return file.getName().endsWith(".parse");
+        }
+
+        return file.isDirectory();
+      }
+    }, true);
+
+    OntoNotesParseSampleStream samples = new OntoNotesParseSampleStream(
+        new DocumentToLineStream(new FileToStringSampleStream(
+        documentStream, Charset.forName("UTF-8"))));
+
+    ParserCrossEvaluator cv = new ParserCrossEvaluator("en", params, rules, 
ParserType.CHUNKING);
+
+    cv.evaluate(samples, 10);
+
+    Assert.assertEquals(0.8d, cv.getFMeasure().getFMeasure(), expectedScore);
+  }
+
+  @Test
+  public void evalEnglishMaxent() throws IOException {
+
+    HeadRules headRules;
+    try (InputStream headRulesIn =
+        
HeadRulesTest.class.getResourceAsStream("/opennlp/tools/parser/en_head_rules")) 
{
+      headRules = new opennlp.tools.parser.lang.en.HeadRules(
+          new InputStreamReader(headRulesIn, "UTF-8"));
+    }
+
+    crossEval(ModelUtil.createDefaultTrainingParameters(), headRules, -0.0d);
+  }
+}

Added: 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java?rev=1674316&view=auto
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java
 (added)
+++ 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java
 Fri Apr 17 14:09:08 2015
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.eval;
+
+import java.io.File;
+import java.io.FileFilter;
+import java.io.IOException;
+import java.nio.charset.Charset;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.formats.DirectorySampleStream;
+import opennlp.tools.formats.convert.FileToStringSampleStream;
+import opennlp.tools.formats.convert.ParseToPOSSampleStream;
+import opennlp.tools.formats.ontonotes.DocumentToLineStream;
+import opennlp.tools.formats.ontonotes.OntoNotesParseSampleStream;
+import opennlp.tools.postag.POSTaggerCrossValidator;
+import opennlp.tools.postag.POSTaggerFactory;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
+import opennlp.tools.util.model.ModelUtil;
+
+public class OntoNotes4PosTaggerEval {
+
+  private static void crossEval(TrainingParameters params, double 
expectedScore)
+      throws IOException {
+    
+    ObjectStream<File> documentStream = new DirectorySampleStream(new File(
+        EvalUtil.getOpennlpDataDir(), "ontonotes4/data/files/data/english"), 
new FileFilter() {
+
+      public boolean accept(File file) {
+        if (file.isFile()) {
+          return file.getName().endsWith(".parse");
+        }
+
+        return file.isDirectory();
+      }
+    }, true);
+
+    ParseToPOSSampleStream samples = new ParseToPOSSampleStream(new 
OntoNotesParseSampleStream(
+        new DocumentToLineStream(
+        new FileToStringSampleStream(documentStream, 
Charset.forName("UTF-8"))))); 
+    
+    POSTaggerCrossValidator cv = new POSTaggerCrossValidator("en", params, new 
POSTaggerFactory());
+    cv.evaluate(samples, 10);
+    
+    Assert.assertEquals(expectedScore, cv.getWordAccuracy(), 0.0001d);
+  }
+  
+  @Test
+  public void evalEnglishMaxentTagger() throws IOException {
+    crossEval(ModelUtil.createDefaultTrainingParameters(), 
0.9707977252663043d);
+  }
+}


Reply via email to