[
https://issues.apache.org/jira/browse/OPENNLP-941?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16439514#comment-16439514
]
ASF GitHub Bot commented on OPENNLP-941:
kottmann closed pull request #308: [OPENNLP-941] Added eval support to
detokenizer
URL: https://github.com/apache/opennlp/pull/308
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DetokenEvaluationErrorListener.java
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DetokenEvaluationErrorListener.java
new file mode 100644
index 0..4e313328c
--- /dev/null
+++
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DetokenEvaluationErrorListener.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.cmdline.tokenizer;
+
+import java.io.OutputStream;
+
+import opennlp.tools.cmdline.EvaluationErrorPrinter;
+import opennlp.tools.tokenize.TokenSample;
+import opennlp.tools.tokenize.TokenizerEvaluationMonitor;
+import opennlp.tools.util.eval.EvaluationMonitor;
+
+/**
+ * A default implementation of {@link EvaluationMonitor} that prints
+ * to an output stream.
+ */
+public class DetokenEvaluationErrorListener extends
+EvaluationErrorPrinter implements TokenizerEvaluationMonitor {
+
+ /**
+ * Creates a listener that will print to System.err
+ */
+ public DetokenEvaluationErrorListener() {
+super(System.err);
+ }
+
+ /**
+ * Creates a listener that will print to a given {@link OutputStream}
+ */
+ public DetokenEvaluationErrorListener(OutputStream outputStream) {
+super(outputStream);
+ }
+
+ @Override
+ public void missclassified(TokenSample reference, TokenSample prediction) {
+printError(reference, prediction);
+ }
+
+}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/tokenize/DetokenizerEvaluator.java
b/opennlp-tools/src/main/java/opennlp/tools/tokenize/DetokenizerEvaluator.java
new file mode 100644
index 0..7d9df4fc6
--- /dev/null
+++
b/opennlp-tools/src/main/java/opennlp/tools/tokenize/DetokenizerEvaluator.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.tools.tokenize;
+
+
+import java.util.ArrayList;
+
+import opennlp.tools.cmdline.tokenizer.DetokenEvaluationErrorListener;
+import opennlp.tools.util.Span;
+import opennlp.tools.util.eval.Evaluator;
+import opennlp.tools.util.eval.FMeasure;
+
+/**
+ * The {@link DetokenizerEvaluator} measures the performance of
+ * the given {@link Detokenizer} with the provided reference
+ * {@link TokenSample}s.
+ *
+ * @see DetokenizerEvaluator
+ * @see Detokenizer
+ * @see TokenSample
+ */
+
+public class DetokenizerEvaluator extends Evaluator {
+ private FMeasure fmeasure = new FMeasure();
+
+ /**
+ * The {@link Detokenizer} used to create the
+ * predicted tokens.
+ */
+ private Detokenizer detokenizer;
+
+ /**
+ * Initializes the current instance with the
+ * given {@link Detokenizer}.
+ *
+ * @param detokenizer the {@link Detokenizer} to evaluate.
+ * @param listeners evaluation sample listeners
+ */
+ public DetokenizerEvaluator(Detokenizer