Repository: opennlp
Updated Branches:
  refs/heads/master 2284819f3 -> 05a916ef1


Revert "OPENNLP-1082: Add EOS to SDEventStream if missing"

This reverts commit b5b6d5c27443e1837b80b089206aad480852cd1c.


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/05a916ef
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/05a916ef
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/05a916ef

Branch: refs/heads/master
Commit: 05a916ef1726e4487e5aefaec1c170a3ee763895
Parents: 2284819
Author: Jörn Kottmann <[email protected]>
Authored: Mon Jul 3 16:27:32 2017 +0200
Committer: Jörn Kottmann <[email protected]>
Committed: Mon Jul 3 16:44:32 2017 +0200

----------------------------------------------------------------------
 .../opennlp/tools/cmdline/ArgumentParser.java   | 18 ++--------
 .../SentenceDetectorCrossValidatorTool.java     |  9 +----
 .../sentdetect/SentenceDetectorTrainerTool.java |  9 +----
 .../cmdline/sentdetect/TrainingParams.java      |  5 ---
 .../tools/sentdetect/SDCrossValidator.java      | 23 ++-----------
 .../opennlp/tools/sentdetect/SDEventStream.java | 35 ++------------------
 .../tools/sentdetect/SentenceDetectorME.java    | 19 ++++-------
 .../opennlp/tools/sentdetect/lang/Factory.java  |  6 ++--
 .../tools/sentdetect/SDEventStreamTest.java     | 33 +++---------------
 9 files changed, 22 insertions(+), 135 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/05a916ef/opennlp-tools/src/main/java/opennlp/tools/cmdline/ArgumentParser.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/ArgumentParser.java 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/ArgumentParser.java
index 4d028cd..8243560 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/ArgumentParser.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/ArgumentParser.java
@@ -127,21 +127,6 @@ public class ArgumentParser {
     }
   }
 
-  private static class CharacterArgumentFactory implements ArgumentFactory {
-
-    public Object parseArgument(Method method, String argName, String 
argValue) {
-      if (argValue != null) {
-        char[] chars = argValue.toCharArray();
-        if (chars.length != 1) {
-          throw new TerminateToolException(1,  String.format(INVALID_ARG, 
argName, argValue) +
-              "Character should have size 1.");
-        }
-        return Character.valueOf(chars[0]);
-      }
-      return null;
-    }
-  }
-
   private static class ArgumentProxy implements InvocationHandler {
 
     private final Map<String, Object> arguments;
@@ -169,7 +154,6 @@ public class ArgumentParser {
     factories.put(String.class, new StringArgumentFactory());
     factories.put(File.class, new FileArgumentFactory());
     factories.put(Charset.class, new CharsetArgumentFactory());
-    factories.put(Character.class, new CharacterArgumentFactory());
 
     argumentFactories = Collections.unmodifiableMap(factories);
   }
@@ -236,6 +220,7 @@ public class ArgumentParser {
    * @param argProxyInterface interface with parameter descriptions
    * @return the help message usage string
    */
+  @SuppressWarnings({"unchecked"})
   public static <T> String createUsage(Class<T> argProxyInterface) {
     return createUsage(new Class[]{argProxyInterface});
   }
@@ -399,6 +384,7 @@ public class ArgumentParser {
    * @param argProxyInterface interface with parameters description
    * @return true, if arguments are valid
    */
+  @SuppressWarnings({"unchecked"})
   public static <T> boolean validateArguments(String[] args, Class<T> 
argProxyInterface) {
     return validateArguments(args, new Class[]{argProxyInterface});
   }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/05a916ef/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java
 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java
index 3a28254..55d1df6 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java
@@ -67,18 +67,11 @@ public final class SentenceDetectorCrossValidatorTool
       eos = eosString.toCharArray();
     }
 
-    Character defaultEOS;
-    if (params.getDefaultEosChar() != null) {
-      defaultEOS = params.getDefaultEosChar();
-    } else {
-      defaultEOS = '\n';
-    }
-
     try {
       Dictionary abbreviations = 
SentenceDetectorTrainerTool.loadDict(params.getAbbDict());
       SentenceDetectorFactory sdFactory = SentenceDetectorFactory.create(
           params.getFactory(), params.getLang(), true, abbreviations, eos);
-      validator = new SDCrossValidator(params.getLang(), mlParams, sdFactory, 
defaultEOS,
+      validator = new SDCrossValidator(params.getLang(), mlParams, sdFactory,
           errorListener);
 
       validator.evaluate(sampleStream, params.getFolds());

http://git-wip-us.apache.org/repos/asf/opennlp/blob/05a916ef/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java
 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java
index b63de0f..cdd6916 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java
@@ -84,13 +84,6 @@ public final class SentenceDetectorTrainerTool
       eos = eosString.toCharArray();
     }
 
-    Character defaultEOS;
-    if (params.getDefaultEosChar() != null) {
-      defaultEOS = params.getDefaultEosChar();
-    } else {
-      defaultEOS = '\n';
-    }
-
     SentenceModel model;
 
     try {
@@ -98,7 +91,7 @@ public final class SentenceDetectorTrainerTool
       SentenceDetectorFactory sdFactory = SentenceDetectorFactory.create(
           params.getFactory(), params.getLang(), true, dict, eos);
       model = SentenceDetectorME.train(params.getLang(), sampleStream,
-          sdFactory, mlParams, defaultEOS);
+          sdFactory, mlParams);
     } catch (IOException e) {
       throw createTerminationIOException(e);
     }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/05a916ef/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/TrainingParams.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/TrainingParams.java
 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/TrainingParams.java
index 5b7289a..fbdf4db 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/TrainingParams.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/TrainingParams.java
@@ -38,11 +38,6 @@ interface TrainingParams extends BasicTrainingParams {
   @OptionalParameter
   String getEosChars();
 
-  @ParameterDescription(valueName = "string", description = "EOS character to 
use if EOS is " +
-      "missing in sample. Default is \\n.")
-  @OptionalParameter
-  Character getDefaultEosChar();
-
   @ParameterDescription(valueName = "factoryName",
       description = "A sub-class of SentenceDetectorFactory where to get 
implementation and resources.")
   @OptionalParameter

http://git-wip-us.apache.org/repos/asf/opennlp/blob/05a916ef/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java 
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java
index cb43896..2f6daec 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java
@@ -34,8 +34,6 @@ public class SDCrossValidator {
 
   private final TrainingParameters params;
 
-  private final Character defaultEOS;
-
   private FMeasure fmeasure = new FMeasure();
 
   private SentenceDetectorEvaluationMonitor[] listeners;
@@ -43,25 +41,11 @@ public class SDCrossValidator {
   private SentenceDetectorFactory sdFactory;
 
   public SDCrossValidator(String languageCode, TrainingParameters params,
-                          SentenceDetectorFactory sdFactory, Character 
defaultEOS,
-                          SentenceDetectorEvaluationMonitor... listeners) {
+      SentenceDetectorFactory sdFactory, SentenceDetectorEvaluationMonitor... 
listeners) {
     this.languageCode = languageCode;
     this.params = params;
     this.listeners = listeners;
     this.sdFactory = sdFactory;
-    this.defaultEOS = defaultEOS;
-  }
-
-  /**
-   * @deprecated Use
-   *             {@link #SDCrossValidator(String, TrainingParameters,
-   *             SentenceDetectorFactory, Character, 
SentenceDetectorEvaluationMonitor...)}
-   *             and pass in a {@link SentenceDetectorFactory}.
-   */
-  @Deprecated
-  public SDCrossValidator(String languageCode, TrainingParameters params,
-                          SentenceDetectorFactory sdFactory, 
SentenceDetectorEvaluationMonitor... listeners) {
-    this(languageCode, params, sdFactory, '\n', listeners);
   }
 
   /**
@@ -70,7 +54,6 @@ public class SDCrossValidator {
    *             SentenceDetectorFactory, 
SentenceDetectorEvaluationMonitor...)}
    *             and pass in a {@link SentenceDetectorFactory}.
    */
-  @Deprecated
   public SDCrossValidator(String languageCode, TrainingParameters params) {
     this(languageCode, params, new SentenceDetectorFactory(languageCode, true,
         null, null));
@@ -82,7 +65,6 @@ public class SDCrossValidator {
    *             SentenceDetectorEvaluationMonitor...)}
    *             instead and pass in a TrainingParameters object.
    */
-  @Deprecated
   public SDCrossValidator(String languageCode, TrainingParameters params,
       SentenceDetectorEvaluationMonitor... listeners) {
     this(languageCode, params, new SentenceDetectorFactory(languageCode, true,
@@ -94,7 +76,6 @@ public class SDCrossValidator {
    *     SentenceDetectorFactory, SentenceDetectorEvaluationMonitor...)}
    *     instead and pass in a TrainingParameters object.
    */
-  @Deprecated
   public SDCrossValidator(String languageCode) {
     this(languageCode, ModelUtil.createDefaultTrainingParameters());
   }
@@ -122,7 +103,7 @@ public class SDCrossValidator {
       SentenceModel model;
 
       model = SentenceDetectorME.train(languageCode, trainingSampleStream,
-          sdFactory, params, defaultEOS);
+          sdFactory, params);
 
       // do testing
       SentenceDetectorEvaluator evaluator = new SentenceDetectorEvaluator(

http://git-wip-us.apache.org/repos/asf/opennlp/blob/05a916ef/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDEventStream.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDEventStream.java 
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDEventStream.java
index aefb54a..a656143 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDEventStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDEventStream.java
@@ -20,7 +20,6 @@ package opennlp.tools.sentdetect;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Iterator;
-import java.util.List;
 
 import opennlp.tools.ml.model.Event;
 import opennlp.tools.util.AbstractEventStream;
@@ -29,7 +28,6 @@ import opennlp.tools.util.Span;
 
 public class SDEventStream extends AbstractEventStream<SentenceSample> {
 
-  private final Character defaultEOS;
   private SDContextGenerator cg;
   private EndOfSentenceScanner scanner;
 
@@ -39,40 +37,21 @@ public class SDEventStream extends 
AbstractEventStream<SentenceSample> {
    * @param samples
    */
   public SDEventStream(ObjectStream<SentenceSample> samples, 
SDContextGenerator cg,
-                       EndOfSentenceScanner scanner, Character defaultEOS) {
+      EndOfSentenceScanner scanner) {
     super(samples);
 
     this.cg = cg;
     this.scanner = scanner;
-    this.defaultEOS = defaultEOS;
-  }
-
-  /**
-   * Initializes the current instance with NEW LINE as default EOS.
-   *
-   * @param samples
-   */
-  public SDEventStream(ObjectStream<SentenceSample> samples, 
SDContextGenerator cg,
-                       EndOfSentenceScanner scanner) {
-    super(samples);
-
-    this.cg = cg;
-    this.scanner = scanner;
-    this.defaultEOS = '\n';
   }
 
   @Override
   protected Iterator<Event> createEvents(SentenceSample sample) {
 
-    Collection<Event> events = new ArrayList();
+    Collection<Event> events = new ArrayList<>();
 
     for (Span sentenceSpan : sample.getSentences()) {
       String sentenceString = 
sentenceSpan.getCoveredText(sample.getDocument()).toString();
 
-      // last position should be a EOS, if not we add it.
-      sentenceString = addTrailingEosIfMissing(sentenceString);
-
-
       for (Iterator<Integer> it = scanner.getPositions(
           sentenceString).iterator(); it.hasNext();) {
 
@@ -90,14 +69,4 @@ public class SDEventStream extends 
AbstractEventStream<SentenceSample> {
 
     return events.iterator();
   }
-
-  protected String addTrailingEosIfMissing(String sentenceString) {
-    List<Integer> positions = scanner.getPositions(
-        sentenceString.substring(sentenceString.length() - 2));
-    if (positions.size() > 0) {
-      // trailing is a EOS
-      return sentenceString;
-    }
-    return sentenceString + defaultEOS;
-  }
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/05a916ef/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java 
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
index c76342e..b5ad804 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
@@ -299,7 +299,7 @@ public class SentenceDetectorME implements SentenceDetector 
{
 
   /**
    * @deprecated Use
-   *             {@link #train(String, ObjectStream, SentenceDetectorFactory, 
TrainingParameters, Character)}
+   *             {@link #train(String, ObjectStream, SentenceDetectorFactory, 
TrainingParameters)}
    *             and pass in af {@link SentenceDetectorFactory}.
    */
   public static SentenceModel train(String languageCode,
@@ -307,25 +307,18 @@ public class SentenceDetectorME implements 
SentenceDetector {
       Dictionary abbreviations, TrainingParameters mlParams) throws 
IOException {
     SentenceDetectorFactory sdFactory = new SentenceDetectorFactory(
         languageCode, useTokenEnd, abbreviations, null);
-    return train(languageCode, samples, sdFactory, mlParams, null);
-  }
-
-  public static SentenceModel train(String languageCode,
-                                    ObjectStream<SentenceSample> samples, 
SentenceDetectorFactory sdFactory,
-                                    TrainingParameters mlParams) throws 
IOException {
-
-    return train(languageCode, samples, sdFactory, mlParams, '\n');
+    return train(languageCode, samples, sdFactory, mlParams);
   }
 
   public static SentenceModel train(String languageCode,
       ObjectStream<SentenceSample> samples, SentenceDetectorFactory sdFactory,
-      TrainingParameters mlParams, Character defaultEOS) throws IOException {
+      TrainingParameters mlParams) throws IOException {
 
-    Map<String, String> manifestInfoEntries = new HashMap();
+    Map<String, String> manifestInfoEntries = new HashMap<>();
 
     // TODO: Fix the EventStream to throw exceptions when training goes wrong
     ObjectStream<Event> eventStream = new SDEventStream(samples,
-        sdFactory.getSDContextGenerator(), 
sdFactory.getEndOfSentenceScanner(), defaultEOS);
+        sdFactory.getSDContextGenerator(), 
sdFactory.getEndOfSentenceScanner());
 
     EventTrainer trainer = TrainerFactory.getEventTrainer(mlParams, 
manifestInfoEntries);
 
@@ -336,7 +329,7 @@ public class SentenceDetectorME implements SentenceDetector 
{
 
   /**
    * @deprecated Use
-   *             {@link #train(String, ObjectStream, SentenceDetectorFactory, 
TrainingParameters, Character)}
+   *             {@link #train(String, ObjectStream, SentenceDetectorFactory, 
TrainingParameters)}
    *             and pass in af {@link SentenceDetectorFactory}.
    */
   @Deprecated

http://git-wip-us.apache.org/repos/asf/opennlp/blob/05a916ef/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java 
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
index f4959ea..4a34229 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
@@ -29,13 +29,13 @@ import 
opennlp.tools.sentdetect.lang.th.SentenceContextGenerator;
 public class Factory {
 
   public static final char[] ptEosCharacters = new char[] { '.', '?', '!', ';',
-      ':', '(', ')', '«', '»', '\'', '"', '\n'};
+      ':', '(', ')', '«', '»', '\'', '"' };
 
-  public static final char[] defaultEosCharacters = new char[] { '.', '!', 
'?', '\n'};
+  public static final char[] defaultEosCharacters = new char[] { '.', '!', '?' 
};
 
   public static final char[] thEosCharacters = new char[] { ' ','\n' };
 
-  public static final char[] jpEosCharacters = new char[] {'。', '!', 
'?', '\n'};
+  public static final char[] jpEosCharacters = new char[] {'。', '!', 
'?'};
 
   public EndOfSentenceScanner createEndOfSentenceScanner(String languageCode) {
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/05a916ef/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SDEventStreamTest.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SDEventStreamTest.java 
b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SDEventStreamTest.java
index 25a8add..138e915 100644
--- 
a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SDEventStreamTest.java
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SDEventStreamTest.java
@@ -42,7 +42,11 @@ public class SDEventStreamTest {
     ObjectStream<SentenceSample> sampleStream =
         ObjectStreamUtils.createObjectStream(sample);
 
-    ObjectStream<Event> eventStream = createSDEventStream(sampleStream,"eng", 
'\n');
+    Factory factory = new Factory();
+
+    ObjectStream<Event> eventStream = new SDEventStream(sampleStream,
+        factory.createSentenceContextGenerator("eng"),
+        factory.createEndOfSentenceScanner("eng"));
 
     Assert.assertEquals(SentenceDetectorME.NO_SPLIT, 
eventStream.read().getOutcome());
     Assert.assertEquals(SentenceDetectorME.SPLIT, 
eventStream.read().getOutcome());
@@ -51,31 +55,4 @@ public class SDEventStreamTest {
 
     Assert.assertNull(eventStream.read());
   }
-
-  @Test
-  public void testInsertDefaultEOS() throws IOException {
-
-    String document = "Test sent. one Test sent. 2";
-    SentenceSample sample = new SentenceSample(document,
-        new Span(0, 14), new Span(15, 27));
-
-    ObjectStream<SentenceSample> sampleStream =
-        ObjectStreamUtils.createObjectStream(sample);
-
-
-    SDEventStream eventStream = createSDEventStream(sampleStream,"eng", '\n');
-
-    String sent = "abc";
-    Assert.assertEquals(sent + "\n", 
eventStream.addTrailingEosIfMissing(sent));
-    sent = "abc.";
-    Assert.assertEquals(sent, eventStream.addTrailingEosIfMissing(sent));
-  }
-
-  private SDEventStream createSDEventStream(ObjectStream<SentenceSample> 
sampleStream,
-                                            String languageCode, Character 
defaultEOS) {
-    Factory factory = new Factory();
-    return new SDEventStream(sampleStream,
-        factory.createSentenceContextGenerator(languageCode),
-        factory.createEndOfSentenceScanner(languageCode), defaultEOS);
-  }
 }

Reply via email to