Author: joern
Date: Fri Apr 17 14:09:08 2015
New Revision: 1674316
URL: http://svn.apache.org/r1674316
Log:
OPENNLP-769 First draft of evaluation tests using OntoNotes4
Added:
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesParseSampleStream.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserCrossEvaluator.java
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java?rev=1674316&r1=1674315&r2=1674316&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
Fri Apr 17 14:09:08 2015
@@ -44,7 +44,7 @@ public class OntoNotesNameSampleStream e
private List<NameSample> nameSamples = new LinkedList<NameSample>();
- protected OntoNotesNameSampleStream(ObjectStream<String> samples) {
+ public OntoNotesNameSampleStream(ObjectStream<String> samples) {
super(samples);
Map<String, String> tokenConversionMap = new HashMap<String, String>();
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesParseSampleStream.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesParseSampleStream.java?rev=1674316&r1=1674315&r2=1674316&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesParseSampleStream.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesParseSampleStream.java
Fri Apr 17 14:09:08 2015
@@ -27,7 +27,7 @@ import opennlp.tools.util.ObjectStream;
// Should be possible with this one, to train the parser and pos tagger!
public class OntoNotesParseSampleStream extends FilterObjectStream<String,
Parse> {
- protected OntoNotesParseSampleStream(ObjectStream<String> samples) {
+ public OntoNotesParseSampleStream(ObjectStream<String> samples) {
super(samples);
}
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserCrossEvaluator.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserCrossEvaluator.java?rev=1674316&r1=1674315&r2=1674316&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserCrossEvaluator.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserCrossEvaluator.java
Fri Apr 17 14:09:08 2015
@@ -38,7 +38,7 @@ public class ParserCrossEvaluator {
private ParserEvaluationMonitor[] monitors;
- ParserCrossEvaluator(String languageCode, TrainingParameters params,
HeadRules rules, ParserType parserType,
+ public ParserCrossEvaluator(String languageCode, TrainingParameters params,
HeadRules rules, ParserType parserType,
ParserEvaluationMonitor... monitors) {
this.languageCode = languageCode;
this.params = params;
Added:
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java?rev=1674316&view=auto
==============================================================================
---
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java
(added)
+++
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java
Fri Apr 17 14:09:08 2015
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.eval;
+
+import java.io.File;
+import java.io.FileFilter;
+import java.io.IOException;
+import java.nio.charset.Charset;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.formats.DirectorySampleStream;
+import opennlp.tools.formats.convert.FileToStringSampleStream;
+import opennlp.tools.formats.ontonotes.OntoNotesNameSampleStream;
+import opennlp.tools.namefind.NameSample;
+import opennlp.tools.namefind.NameSampleTypeFilter;
+import opennlp.tools.namefind.TokenNameFinderCrossValidator;
+import opennlp.tools.namefind.TokenNameFinderFactory;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
+import opennlp.tools.util.model.ModelUtil;
+
+public class OntoNotes4NameFinderEval {
+
+ private static void crossEval(TrainingParameters params, String type, double
expectedScore)
+ throws IOException {
+
+ ObjectStream<File> documentStream = new DirectorySampleStream(new File(
+ EvalUtil.getOpennlpDataDir(), "ontonotes4/data/files/data/english"),
new FileFilter() {
+
+ public boolean accept(File file) {
+ if (file.isFile()) {
+ return file.getName().endsWith(".name");
+ }
+
+ return file.isDirectory();
+ }
+ }, true);
+
+ ObjectStream<NameSample> samples = new OntoNotesNameSampleStream(new
FileToStringSampleStream(
+ documentStream, Charset.forName("UTF-8")));
+
+ TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("en",
null,
+ params, new TokenNameFinderFactory());
+
+ if (type != null) {
+ samples = new NameSampleTypeFilter(new String[]{type}, samples);
+ }
+
+ cv.evaluate(samples, 10);
+
+ Assert.assertEquals(expectedScore, cv.getFMeasure().getFMeasure(), 0.001d);
+ }
+
+ @Test
+ public void evalEnglishPersonNameFinder() throws IOException {
+ TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
+ crossEval(params, "person", 0.8269650989441869d);
+ }
+
+ // organization
+ // location
+ // date
+ // duration
+ // all types
+
+ @Test
+ public void evalAllTypesNameFinder() throws IOException {
+ TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
+ crossEval(params, null, 0.8269650989441869d);
+ }
+}
Added:
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java?rev=1674316&view=auto
==============================================================================
---
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java
(added)
+++
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java
Fri Apr 17 14:09:08 2015
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.eval;
+
+import java.io.File;
+import java.io.FileFilter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.charset.Charset;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.formats.DirectorySampleStream;
+import opennlp.tools.formats.convert.FileToStringSampleStream;
+import opennlp.tools.formats.ontonotes.DocumentToLineStream;
+import opennlp.tools.formats.ontonotes.OntoNotesParseSampleStream;
+import opennlp.tools.parser.HeadRules;
+import opennlp.tools.parser.ParserCrossEvaluator;
+import opennlp.tools.parser.ParserType;
+import opennlp.tools.parser.lang.en.HeadRulesTest;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
+import opennlp.tools.util.model.ModelUtil;
+
+public class OntoNotes4ParserEval {
+
+ private static void crossEval(TrainingParameters params, HeadRules rules,
double expectedScore)
+ throws IOException {
+
+ ObjectStream<File> documentStream = new DirectorySampleStream(new File(
+ EvalUtil.getOpennlpDataDir(), "ontonotes4/data/files/data/english"),
new FileFilter() {
+
+ public boolean accept(File file) {
+ if (file.isFile()) {
+ return file.getName().endsWith(".parse");
+ }
+
+ return file.isDirectory();
+ }
+ }, true);
+
+ OntoNotesParseSampleStream samples = new OntoNotesParseSampleStream(
+ new DocumentToLineStream(new FileToStringSampleStream(
+ documentStream, Charset.forName("UTF-8"))));
+
+ ParserCrossEvaluator cv = new ParserCrossEvaluator("en", params, rules,
ParserType.CHUNKING);
+
+ cv.evaluate(samples, 10);
+
+ Assert.assertEquals(0.8d, cv.getFMeasure().getFMeasure(), expectedScore);
+ }
+
+ @Test
+ public void evalEnglishMaxent() throws IOException {
+
+ HeadRules headRules;
+ try (InputStream headRulesIn =
+
HeadRulesTest.class.getResourceAsStream("/opennlp/tools/parser/en_head_rules"))
{
+ headRules = new opennlp.tools.parser.lang.en.HeadRules(
+ new InputStreamReader(headRulesIn, "UTF-8"));
+ }
+
+ crossEval(ModelUtil.createDefaultTrainingParameters(), headRules, -0.0d);
+ }
+}
Added:
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java?rev=1674316&view=auto
==============================================================================
---
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java
(added)
+++
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java
Fri Apr 17 14:09:08 2015
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.eval;
+
+import java.io.File;
+import java.io.FileFilter;
+import java.io.IOException;
+import java.nio.charset.Charset;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.formats.DirectorySampleStream;
+import opennlp.tools.formats.convert.FileToStringSampleStream;
+import opennlp.tools.formats.convert.ParseToPOSSampleStream;
+import opennlp.tools.formats.ontonotes.DocumentToLineStream;
+import opennlp.tools.formats.ontonotes.OntoNotesParseSampleStream;
+import opennlp.tools.postag.POSTaggerCrossValidator;
+import opennlp.tools.postag.POSTaggerFactory;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
+import opennlp.tools.util.model.ModelUtil;
+
+public class OntoNotes4PosTaggerEval {
+
+ private static void crossEval(TrainingParameters params, double
expectedScore)
+ throws IOException {
+
+ ObjectStream<File> documentStream = new DirectorySampleStream(new File(
+ EvalUtil.getOpennlpDataDir(), "ontonotes4/data/files/data/english"),
new FileFilter() {
+
+ public boolean accept(File file) {
+ if (file.isFile()) {
+ return file.getName().endsWith(".parse");
+ }
+
+ return file.isDirectory();
+ }
+ }, true);
+
+ ParseToPOSSampleStream samples = new ParseToPOSSampleStream(new
OntoNotesParseSampleStream(
+ new DocumentToLineStream(
+ new FileToStringSampleStream(documentStream,
Charset.forName("UTF-8")))));
+
+ POSTaggerCrossValidator cv = new POSTaggerCrossValidator("en", params, new
POSTaggerFactory());
+ cv.evaluate(samples, 10);
+
+ Assert.assertEquals(expectedScore, cv.getWordAccuracy(), 0.0001d);
+ }
+
+ @Test
+ public void evalEnglishMaxentTagger() throws IOException {
+ crossEval(ModelUtil.createDefaultTrainingParameters(),
0.9707977252663043d);
+ }
+}