Author: joern
Date: Wed Apr 15 13:40:38 2015
New Revision: 1673762

URL: http://svn.apache.org/r1673762
Log:
OPENNLP-765 Added CONLL-X Pos Tagger performance tests

Added:
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/
    
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/ConllXPosTaggerEval.java
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/EvalUtil.java
Modified:
    opennlp/trunk/opennlp-tools/pom.xml
    
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java

Modified: opennlp/trunk/opennlp-tools/pom.xml
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/pom.xml?rev=1673762&r1=1673761&r2=1673762&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/pom.xml (original)
+++ opennlp/trunk/opennlp-tools/pom.xml Wed Apr 15 13:40:38 2015
@@ -76,7 +76,10 @@
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-surefire-plugin</artifactId>
         <configuration>
-          <argLine>-Xmx512m</argLine>
+          <argLine>-Xmx1024m</argLine>
+          <excludes>
+            <exclude>/opennlp/tools/eval/**/*</exclude>
+          </excludes>
         </configuration>
       </plugin>
                        

Modified: 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java?rev=1673762&r1=1673761&r2=1673762&view=diff
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java
 (original)
+++ 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java
 Wed Apr 15 13:40:38 2015
@@ -46,7 +46,7 @@ public class ConllXPOSSampleStream exten
     super(new ParagraphStream(lineStream));
   }
 
-  ConllXPOSSampleStream(InputStreamFactory in, Charset charset) throws 
IOException {
+  public ConllXPOSSampleStream(InputStreamFactory in, Charset charset) throws 
IOException {
     super(new ParagraphStream(new PlainTextByLineStream(in, charset)));
   }
 

Added: 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/ConllXPosTaggerEval.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/ConllXPosTaggerEval.java?rev=1673762&view=auto
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/ConllXPosTaggerEval.java
 (added)
+++ 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/ConllXPosTaggerEval.java
 Wed Apr 15 13:40:38 2015
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.eval;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.Charset;
+
+import opennlp.tools.formats.ConllXPOSSampleStream;
+import opennlp.tools.postag.POSEvaluator;
+import opennlp.tools.postag.POSModel;
+import opennlp.tools.postag.POSSample;
+import opennlp.tools.postag.POSTaggerFactory;
+import opennlp.tools.postag.POSTaggerME;
+import opennlp.tools.util.MarkableFileInputStreamFactory;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
+import opennlp.tools.util.model.ModelUtil;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Evaluates the POS Tagger on the CONLL-X data. The CONLL-X data includes 
training and evaluation data for
+ * Danish, Dutch, Portuguese and Swedish.
+ * <p>
+ * The following files are needed in the data directory to run this test:
+ * conllx/data/danish/ddt/train/danish_ddt_train.conll<br>
+ * conllx/data/danish/ddt/test/danish_ddt_test.conll<br>
+ * conllx/data/dutch/alpino/train/dutch_alpino_train.conll<br>
+ * conllx/data/dutch/alpino/test/dutch_alpino_test.conll<br>
+ * conllx/data/portuguese/bosque/treebank/portuguese_bosque_train.conll<br>
+ * conllx/data/portuguese/bosque/test/portuguese_bosque_test.conll<br>
+ * conllx/data/swedish/talbanken05/train/swedish_talbanken05_train.conll<br>
+ * conllx/data/swedish/talbanken05/test/swedish_talbanken05_test.conll<br>
+ * <p>
+ * The structure follows the structure of the CONLL-X data distribution. There 
is
+ * one package for each language, and an extra package containing the tests 
for all
+ * languages.
+ */
+public class ConllXPosTaggerEval {
+
+  private static POSModel train(File trainFile, String lang,
+      TrainingParameters params) throws IOException {
+
+    ObjectStream<POSSample> samples =
+        new ConllXPOSSampleStream(new 
MarkableFileInputStreamFactory(trainFile), Charset.forName("UTF-8"));
+
+    return POSTaggerME.train(lang, samples, params, new POSTaggerFactory());
+  }
+
+  private static void eval(POSModel model, File testData,
+      double expectedAccuracy) throws IOException {
+
+    ObjectStream<POSSample> samples = new ConllXPOSSampleStream(
+        new MarkableFileInputStreamFactory(testData), 
Charset.forName("UTF-8"));
+
+    POSEvaluator evaluator = new POSEvaluator(new POSTaggerME(model));
+    evaluator.evaluate(samples);
+
+    Assert.assertEquals(expectedAccuracy, evaluator.getWordAccuracy(), 0.0001);
+  }
+
+  @Test
+  public void evalDanish() throws IOException {
+    TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
+
+    POSModel maxentModel = train(new File(EvalUtil.getOpennlpDataDir(),
+        "conllx/data/danish/ddt/train/danish_ddt_train.conll"), "da", params);
+
+    eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(),
+        "conllx/data/danish/ddt/test/danish_ddt_test.conll"), 
0.9512987012987013d);
+  }
+
+  @Test
+  public void evalDutch() throws IOException {
+    TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
+
+    POSModel maxentModel = train(new File(EvalUtil.getOpennlpDataDir(),
+        "conllx/data/dutch/alpino/train/dutch_alpino_train.conll"), "nl", 
params);
+
+    eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(),
+        "conllx/data/dutch/alpino/test/dutch_alpino_test.conll"), 
0.9174574753804834d);
+  }
+
+  @Test
+  public void evalPortuguese() throws IOException {
+    TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
+
+    POSModel maxentModel = train(new File(EvalUtil.getOpennlpDataDir(),
+        
"conllx/data/portuguese/bosque/treebank/portuguese_bosque_train.conll"), "pt", 
params);
+
+    eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(),
+        "conllx/data/portuguese/bosque/test/portuguese_bosque_test.conll"), 
0.9659110277825124d);
+  }
+
+  @Test
+  public void evalSwedish() throws IOException {
+    TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
+
+    POSModel maxentModel = train(new File(EvalUtil.getOpennlpDataDir(),
+        
"conllx/data/swedish/talbanken05/train/swedish_talbanken05_train.conll"), "se", 
params);
+
+    eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(),
+        
"conllx/data/swedish/talbanken05/test/swedish_talbanken05_test.conll"), 
0.9275106082036775d);
+  }
+}

Added: 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/EvalUtil.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/EvalUtil.java?rev=1673762&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/EvalUtil.java 
(added)
+++ opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/EvalUtil.java 
Wed Apr 15 13:40:38 2015
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.eval;
+
+import java.io.File;
+
+public class EvalUtil {
+
+  public static final File getOpennlpDataDir() {
+    return new File(System.getProperty("OPENNLP_DATA_DIR"));
+  }
+}


Reply via email to