Author: colen
Date: Thu Apr 30 03:39:44 2015
New Revision: 1676890
URL: http://svn.apache.org/r1676890
Log:
OPENNLP-770 Evaluation using CONLL 2000
Added:
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/Conll00ChunkerEval.java
(with props)
Added:
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/Conll00ChunkerEval.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/Conll00ChunkerEval.java?rev=1676890&view=auto
==============================================================================
---
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/Conll00ChunkerEval.java
(added)
+++
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/Conll00ChunkerEval.java
Thu Apr 30 03:39:44 2015
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package opennlp.tools.eval;
+
+import java.io.File;
+import java.io.IOException;
+
+import opennlp.tools.chunker.ChunkSample;
+import opennlp.tools.chunker.ChunkSampleStream;
+import opennlp.tools.chunker.ChunkerEvaluator;
+import opennlp.tools.chunker.ChunkerFactory;
+import opennlp.tools.chunker.ChunkerME;
+import opennlp.tools.chunker.ChunkerModel;
+import opennlp.tools.util.MarkableFileInputStreamFactory;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+import opennlp.tools.util.TrainingParameters;
+import opennlp.tools.util.model.ModelUtil;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Evaluates the chunker against the English CONLL2000 corpus.
+ * <p>
+ * Download the train and eval gz files from the CONLL2000 shared task
+ * <a href="http://www.cnts.ua.ac.be/conll2000/chunking/"> site </a>
+ * and decompress them into this directory: $OPENNLP_DATA_DIR/conll00.
+ */
+public class Conll00ChunkerEval {
+
+ private static ChunkerModel train(File trainFile, TrainingParameters params)
+ throws IOException {
+
+ ObjectStream<ChunkSample> samples = new ChunkSampleStream(
+ new PlainTextByLineStream(
+ new MarkableFileInputStreamFactory(trainFile), "UTF-8"));
+
+ return ChunkerME.train("en", samples, params, new ChunkerFactory());
+ }
+
+ private static void eval(ChunkerModel model, File testData,
+ double expectedFMeasure) throws IOException {
+
+ ObjectStream<ChunkSample> samples = new ChunkSampleStream(
+ new PlainTextByLineStream(new MarkableFileInputStreamFactory(testData),
+ "UTF-8"));
+
+ ChunkerEvaluator evaluator = new ChunkerEvaluator(new ChunkerME(model));
+ evaluator.evaluate(samples);
+ Assert.assertEquals(expectedFMeasure,
+ evaluator.getFMeasure().getFMeasure(), 0.0001);
+ }
+
+ @Test
+ public void evalEnglish() throws IOException {
+ TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
+
+ ChunkerModel maxentModel = train(new File(EvalUtil.getOpennlpDataDir(),
+ "conll00/train.txt"), params);
+
+ eval(maxentModel,
+ new File(EvalUtil.getOpennlpDataDir(), "conll00/test.txt"),
+ 0.9239687473746113d);
+ }
+}
Propchange:
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/eval/Conll00ChunkerEval.java
------------------------------------------------------------------------------
svn:mime-type = text/plain