opennlp git commit: OPENNLP-1022:Fix documentation to remove references to 'Save XXXModel to database, this closes apache/opennlp#158

2017-04-16 Thread smarthi
Repository: opennlp
Updated Branches:
  refs/heads/master a59765cd4 -> f8fbfc9fd


OPENNLP-1022:Fix documentation to remove references to 'Save XXXModel to 
database, this closes apache/opennlp#158


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/f8fbfc9f
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/f8fbfc9f
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/f8fbfc9f

Branch: refs/heads/master
Commit: f8fbfc9fdca4b5e9ba1a5608ca17e7b6feb18c3c
Parents: a59765c
Author: smarthi 
Authored: Sun Apr 16 21:45:17 2017 -0400
Committer: smarthi 
Committed: Sun Apr 16 21:45:17 2017 -0400

--
 opennlp-docs/src/docbkx/chunker.xml | 29 ++---
 opennlp-docs/src/docbkx/doccat.xml  | 44 ++
 opennlp-docs/src/docbkx/introduction.xml| 17 +-
 opennlp-docs/src/docbkx/lemmatizer.xml  | 38 +++-
 opennlp-docs/src/docbkx/namefinder.xml  | 36 +++-
 opennlp-docs/src/docbkx/parser.xml  |  2 +-
 opennlp-docs/src/docbkx/postagger.xml   | 62 ++--
 opennlp-docs/src/docbkx/sentdetect.xml  | 33 ++-
 opennlp-docs/src/docbkx/tokenizer.xml   | 15 +
 .../main/java/opennlp/tools/ml/BeamSearch.java  | 23 +++-
 10 files changed, 46 insertions(+), 253 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/f8fbfc9f/opennlp-docs/src/docbkx/chunker.xml
--
diff --git a/opennlp-docs/src/docbkx/chunker.xml 
b/opennlp-docs/src/docbkx/chunker.xml
index 0c04e8a..b67a7fd 100644
--- a/opennlp-docs/src/docbkx/chunker.xml
+++ b/opennlp-docs/src/docbkx/chunker.xml
@@ -81,19 +81,8 @@ Rockwell_NNP said_VBD the_DT agreement_NN calls_VBZ for_IN 
it_PRP to_TO supply_V
 InputStream modelIn = null;
 ChunkerModel model = null;
 
-try {
-  modelIn = new FileInputStream("en-chunker.bin");
+try (modelIn = new FileInputStream("en-chunker.bin")){
   model = new ChunkerModel(modelIn);
-} catch (IOException e) {
-  // Model loading failed, handle the error
-  e.printStackTrace();
-} finally {
-  if (modelIn != null) {
-try {
-  modelIn.close();
-} catch (IOException e) {
-}
-  }
 }]]>

After the model is loaded a Chunker can be instantiated.
@@ -242,28 +231,18 @@ $ opennlp ChunkerTrainerME -model en-chunker.bin -lang en 
-data en-chunker.train
 illustrates how to do it:
 
 
 
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/f8fbfc9f/opennlp-docs/src/docbkx/doccat.xml
--
diff --git a/opennlp-docs/src/docbkx/doccat.xml 
b/opennlp-docs/src/docbkx/doccat.xml
index 7fe3f1f..c056732 100644
--- a/opennlp-docs/src/docbkx/doccat.xml
+++ b/opennlp-docs/src/docbkx/doccat.xml
@@ -127,33 +127,16 @@ $ opennlp DoccatTrainer -model en-doccat.bin -lang en 
-data en-doccat.train -enc


+]]>

Now might be a good time to cruise over to Hulu or something, because 
this could take a while if you've got a large training set.
You may see a lot of output as well. Once you're done, you can pretty 
quickly step to classification directly,
@@ -162,27 +145,10 @@ finally {



+]]>
 



http://git-wip-us.apache.org/repos/asf/opennlp/blob/f8fbfc9f/opennlp-docs/src/docbkx/introduction.xml
--
diff --git a/opennlp-docs/src/docbkx/introduction.xml 
b/opennlp-docs/src/docbkx/introduction.xml
index a3bd482..65fcd9d 100644
--- a/opennlp-docs/src/docbkx/introduction.xml
+++ b/opennlp-docs/src/docbkx/introduction.xml
@@ -65,23 +65,10 @@ under the License.
 constructor of the model class:
 
 
+]]>
 
 
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/f8fbfc9f/opennlp-docs/src/docbkx/lemmatizer.xml
--
diff --git a/opennlp-docs/src/docbkx/lemmatizer.xml 
b/opennlp-docs/src/docbkx/lemmatizer.xml
index 34668d0..1fa5540 100644
--- a/opennlp-docs/src/docbkx/lemmatizer.xml
+++ b/opennlp-docs/src/docbkx/lemmatizer.xml
@@ -88,22 +88,11 @@ signed VBD sign
In the example below it is loaded from disk:


+}
+]]>

After the model is loaded a LemmatizerME can be 
instantiated.

[opennlp] Git Push Summary

2017-04-16 Thread koji
Repository: opennlp
Updated Branches:
  refs/heads/OPENNLP-1020 [deleted] 2691b7106


opennlp git commit: OPENNLP-1020: MockInputStreamFactory.createInputStream should create a new InputStream. This closes apache/opennlp#156

2017-04-16 Thread koji
Repository: opennlp
Updated Branches:
  refs/heads/master 27214015c -> a59765cd4


OPENNLP-1020: MockInputStreamFactory.createInputStream should create a new 
InputStream. This closes apache/opennlp#156


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/a59765cd
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/a59765cd
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/a59765cd

Branch: refs/heads/master
Commit: a59765cd4eb84f58af508ba5494c42b579c1dab3
Parents: 2721401
Author: koji 
Authored: Mon Apr 17 10:30:40 2017 +0900
Committer: koji 
Committed: Mon Apr 17 10:30:40 2017 +0900

--
 .../lemmatizer/LemmatizerEvaluatorTest.java | 14 +++---
 .../tools/lemmatizer/LemmatizerMETest.java  |  8 ++-
 .../tools/namefind/NameFinderMETest.java| 51 ++--
 .../tools/util/MockInputStreamFactory.java  | 30 
 4 files changed, 46 insertions(+), 57 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/a59765cd/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerEvaluatorTest.java
--
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerEvaluatorTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerEvaluatorTest.java
index 0eb775d..2f4e74d 100644
--- 
a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerEvaluatorTest.java
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerEvaluatorTest.java
@@ -18,8 +18,8 @@
 package opennlp.tools.lemmatizer;
 
 import java.io.ByteArrayOutputStream;
+import java.io.File;
 import java.io.IOException;
-import java.io.InputStream;
 import java.io.OutputStream;
 
 import org.junit.Assert;
@@ -47,18 +47,18 @@ public class LemmatizerEvaluatorTest {
*/
   @Test
   public void testEvaluator() throws IOException {
-InputStream inPredicted = getClass().getClassLoader()
-.getResourceAsStream("opennlp/tools/lemmatizer/output.txt");
-InputStream inExpected = getClass().getClassLoader()
-.getResourceAsStream("opennlp/tools/lemmatizer/output.txt");
+String inPredicted = "opennlp/tools/lemmatizer/output.txt";
+String inExpected = "opennlp/tools/lemmatizer/output.txt";
 
 String encoding = "UTF-8";
 
 DummyLemmaSampleStream predictedSample = new DummyLemmaSampleStream(
-new PlainTextByLineStream(new MockInputStreamFactory(inPredicted), 
encoding), true);
+new PlainTextByLineStream(
+  new MockInputStreamFactory(new File(inPredicted)), encoding), true);
 
 DummyLemmaSampleStream expectedSample = new DummyLemmaSampleStream(
-new PlainTextByLineStream(new MockInputStreamFactory(inExpected), 
encoding), false);
+new PlainTextByLineStream(
+  new MockInputStreamFactory(new File(inExpected)), encoding), false);
 
 Lemmatizer dummyLemmatizer = new DummyLemmatizer(predictedSample);
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/a59765cd/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java
--
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java 
b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java
index 97dcc3c..4631763 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java
@@ -17,8 +17,8 @@
 
 package opennlp.tools.lemmatizer;
 
+import java.io.File;
 import java.io.IOException;
-import java.io.InputStream;
 
 import org.junit.Assert;
 import org.junit.Before;
@@ -63,11 +63,9 @@ public class LemmatizerMETest {
   public void startup() throws IOException {
 // train the lemmatizer
 
-InputStream in = getClass().getClassLoader()
-.getResourceAsStream("opennlp/tools/lemmatizer/trial.old.tsv");
-
 ObjectStream sampleStream = new LemmaSampleStream(
-new PlainTextByLineStream(new MockInputStreamFactory(in), "UTF-8"));
+new PlainTextByLineStream(new MockInputStreamFactory(
+  new File("opennlp/tools/lemmatizer/trial.old.tsv")), "UTF-8"));
 
 TrainingParameters params = new TrainingParameters();
 params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(100));

http://git-wip-us.apache.org/repos/asf/opennlp/blob/a59765cd/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
--
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java 
b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
index 

opennlp git commit: OPENNLP-1020: MockInputStreamFactory.createInputStream should create a new InputStream

2017-04-16 Thread koji
Repository: opennlp
Updated Branches:
  refs/heads/OPENNLP-1020 [created] 2691b7106


OPENNLP-1020: MockInputStreamFactory.createInputStream should create a new 
InputStream


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/2691b710
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/2691b710
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/2691b710

Branch: refs/heads/OPENNLP-1020
Commit: 2691b7106f5d5698c658af85a814029a32a4331e
Parents: 2721401
Author: koji 
Authored: Mon Apr 17 09:44:04 2017 +0900
Committer: koji 
Committed: Mon Apr 17 09:44:04 2017 +0900

--
 .../lemmatizer/LemmatizerEvaluatorTest.java | 14 +++---
 .../tools/lemmatizer/LemmatizerMETest.java  |  8 ++-
 .../tools/namefind/NameFinderMETest.java| 51 ++--
 .../tools/util/MockInputStreamFactory.java  | 30 
 4 files changed, 46 insertions(+), 57 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/2691b710/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerEvaluatorTest.java
--
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerEvaluatorTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerEvaluatorTest.java
index 0eb775d..2f4e74d 100644
--- 
a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerEvaluatorTest.java
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerEvaluatorTest.java
@@ -18,8 +18,8 @@
 package opennlp.tools.lemmatizer;
 
 import java.io.ByteArrayOutputStream;
+import java.io.File;
 import java.io.IOException;
-import java.io.InputStream;
 import java.io.OutputStream;
 
 import org.junit.Assert;
@@ -47,18 +47,18 @@ public class LemmatizerEvaluatorTest {
*/
   @Test
   public void testEvaluator() throws IOException {
-InputStream inPredicted = getClass().getClassLoader()
-.getResourceAsStream("opennlp/tools/lemmatizer/output.txt");
-InputStream inExpected = getClass().getClassLoader()
-.getResourceAsStream("opennlp/tools/lemmatizer/output.txt");
+String inPredicted = "opennlp/tools/lemmatizer/output.txt";
+String inExpected = "opennlp/tools/lemmatizer/output.txt";
 
 String encoding = "UTF-8";
 
 DummyLemmaSampleStream predictedSample = new DummyLemmaSampleStream(
-new PlainTextByLineStream(new MockInputStreamFactory(inPredicted), 
encoding), true);
+new PlainTextByLineStream(
+  new MockInputStreamFactory(new File(inPredicted)), encoding), true);
 
 DummyLemmaSampleStream expectedSample = new DummyLemmaSampleStream(
-new PlainTextByLineStream(new MockInputStreamFactory(inExpected), 
encoding), false);
+new PlainTextByLineStream(
+  new MockInputStreamFactory(new File(inExpected)), encoding), false);
 
 Lemmatizer dummyLemmatizer = new DummyLemmatizer(predictedSample);
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/2691b710/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java
--
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java 
b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java
index 97dcc3c..4631763 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java
@@ -17,8 +17,8 @@
 
 package opennlp.tools.lemmatizer;
 
+import java.io.File;
 import java.io.IOException;
-import java.io.InputStream;
 
 import org.junit.Assert;
 import org.junit.Before;
@@ -63,11 +63,9 @@ public class LemmatizerMETest {
   public void startup() throws IOException {
 // train the lemmatizer
 
-InputStream in = getClass().getClassLoader()
-.getResourceAsStream("opennlp/tools/lemmatizer/trial.old.tsv");
-
 ObjectStream sampleStream = new LemmaSampleStream(
-new PlainTextByLineStream(new MockInputStreamFactory(in), "UTF-8"));
+new PlainTextByLineStream(new MockInputStreamFactory(
+  new File("opennlp/tools/lemmatizer/trial.old.tsv")), "UTF-8"));
 
 TrainingParameters params = new TrainingParameters();
 params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(100));

http://git-wip-us.apache.org/repos/asf/opennlp/blob/2691b710/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
--
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java 
b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
index c258d07..876df5b 100644

opennlp git commit: OPENNLP-1024:Add unit tests and javadocs for DirectorySampleStream, this closes apache/opennlp#160

2017-04-16 Thread smarthi
Repository: opennlp
Updated Branches:
  refs/heads/master cff6e0009 -> 27214015c


OPENNLP-1024:Add unit tests and javadocs for DirectorySampleStream, this closes 
apache/opennlp#160


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/27214015
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/27214015
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/27214015

Branch: refs/heads/master
Commit: 27214015c725619e0098e227fd29816d0891d4c5
Parents: cff6e00
Author: jzonthemtn 
Authored: Sun Apr 16 17:24:16 2017 -0400
Committer: smarthi 
Committed: Sun Apr 16 17:24:19 2017 -0400

--
 .../tools/formats/DirectorySampleStream.java|  34 +++-
 .../formats/DirectorySampleStreamTest.java  | 187 +++
 2 files changed, 215 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/27214015/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java 
b/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java
index 3a5621a..da73507 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java
@@ -28,24 +28,30 @@ import java.util.Stack;
 import opennlp.tools.util.ObjectStream;
 
 /**
- * The directory sample stream scans a directory (recursively) for plain text
- * files and outputs each file as a String object.
+ * The directory sample stream allows for creating a stream
+ * from a directory listing of files.
  */
 public class DirectorySampleStream implements ObjectStream {
 
   private final List inputDirectories;
 
-  private final boolean isRecursiveScan;
+  private final boolean recursive;
 
   private final FileFilter fileFilter;
 
   private Stack directories = new Stack<>();
 
   private Stack textFiles = new Stack<>();
-
+  
+  /**
+   * Creates a new directory sample stream.
+   * @param dirs The directories to read.
+   * @param fileFilter The {@link FileFilter filter} to apply while 
enumerating files.
+   * @param recursive Enables or disables recursive file listing.
+   */
   public DirectorySampleStream(File[] dirs, FileFilter fileFilter, boolean 
recursive) {
 this.fileFilter = fileFilter;
-isRecursiveScan = recursive;
+this.recursive = recursive;
 
 List inputDirectoryList = new ArrayList<>(dirs.length);
 
@@ -64,10 +70,17 @@ public class DirectorySampleStream implements 
ObjectStream {
 directories.addAll(inputDirectories);
   }
 
+  /**
+   * Creates a new directory sample stream.
+   * @param dir The {@link File directory}.
+   * @param fileFilter The {@link FileFilter filter} to apply while 
enumerating files.
+   * @param recursive Enables or disables recursive file listing.
+   */
   public DirectorySampleStream(File dir, FileFilter fileFilter, boolean 
recursive) {
 this(new File[]{dir}, fileFilter, recursive);
   }
 
+  @Override
   public File read() throws IOException {
 
 while (textFiles.isEmpty() && !directories.isEmpty()) {
@@ -86,7 +99,7 @@ public class DirectorySampleStream implements 
ObjectStream {
 if (file.isFile()) {
   textFiles.push(file);
 }
-else if (isRecursiveScan && file.isDirectory()) {
+else if (recursive && file.isDirectory()) {
   directories.push(file);
 }
   }
@@ -100,6 +113,7 @@ public class DirectorySampleStream implements 
ObjectStream {
 }
   }
 
+  @Override
   public void reset() {
 directories.clear();
 textFiles.clear();
@@ -107,6 +121,14 @@ public class DirectorySampleStream implements 
ObjectStream {
 directories.addAll(inputDirectories);
   }
 
+  /**
+   * {@inheritDoc}
+   * Calling this function has no effect on
+   * the stream.
+   */
+  @Override
   public void close() throws IOException {
+
   }
+  
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/27214015/opennlp-tools/src/test/java/opennlp/tools/formats/DirectorySampleStreamTest.java
--
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/formats/DirectorySampleStreamTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/formats/DirectorySampleStreamTest.java
new file mode 100644
index 000..d17188e
--- /dev/null
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/formats/DirectorySampleStreamTest.java
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information 

opennlp git commit: OPENNLP-1023:Remove unused HashList class, this closes apache/opennlp#159

2017-04-16 Thread smarthi
Repository: opennlp
Updated Branches:
  refs/heads/master 4efd950d4 -> e76ba3694


OPENNLP-1023:Remove unused HashList class, this closes apache/opennlp#159


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/e76ba369
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/e76ba369
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/e76ba369

Branch: refs/heads/master
Commit: e76ba36948d3e58d6d726162f27245ff4a347b85
Parents: 4efd950
Author: jzonthemtn 
Authored: Sun Apr 16 16:22:28 2017 -0400
Committer: smarthi 
Committed: Sun Apr 16 16:28:24 2017 -0400

--
 .../main/java/opennlp/tools/util/HashList.java  | 92 
 1 file changed, 92 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/e76ba369/opennlp-tools/src/main/java/opennlp/tools/util/HashList.java
--
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/HashList.java 
b/opennlp-tools/src/main/java/opennlp/tools/util/HashList.java
deleted file mode 100644
index e88a907..000
--- a/opennlp-tools/src/main/java/opennlp/tools/util/HashList.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package opennlp.tools.util;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.List;
-
-/**
- * Class which creates mapping between keys and a list of values.
- */
-@SuppressWarnings("unchecked")
-public class HashList extends HashMap {
-
-  private static final long serialVersionUID = 1;
-
-  public HashList() {
-  }
-
-  public Object get(Object key, int index) {
-if (get(key) != null) {
-  return ((List) get(key)).get(index);
-}
-else {
-  return null;
-}
-  }
-
-  public Object putAll(Object key, Collection values) {
-List o = (List) get(key);
-
-if (o == null) {
-  o = new ArrayList();
-  super.put(key, o);
-}
-
-o.addAll(values);
-
-if (o.size() == values.size())
-  return null;
-else
-  return o;
-  }
-
-  @Override
-  public List put(Object key, Object value) {
-List o = (List) get(key);
-
-if (o == null) {
-  o = new ArrayList();
-  super.put(key, o);
-}
-
-o.add(value);
-
-if (o.size() == 1)
-  return null;
-else
-  return o;
-  }
-
-  public boolean remove(Object key, Object value) {
-List l = (List) get(key);
-if (l == null) {
-  return false;
-}
-else {
-  boolean r = l.remove(value);
-  if (l.size() == 0) {
-remove(key);
-  }
-  return r;
-}
-  }
-}



[25/50] [abbrv] opennlp git commit: OPENNLP-996:Remove heap memory settings from Opennlp-tools

2017-04-16 Thread joern
OPENNLP-996:Remove heap memory settings from Opennlp-tools


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/81acc6e6
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/81acc6e6
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/81acc6e6

Branch: refs/heads/parser_regression
Commit: 81acc6e69a7120b3f9644d54c30cae34b02b78f1
Parents: 82caa55
Author: smarthi 
Authored: Sun Feb 26 12:56:04 2017 -0500
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:54 2017 +0200

--
 opennlp-tools/pom.xml | 5 -
 1 file changed, 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/81acc6e6/opennlp-tools/pom.xml
--
diff --git a/opennlp-tools/pom.xml b/opennlp-tools/pom.xml
index d2630c9..663e903 100644
--- a/opennlp-tools/pom.xml
+++ b/opennlp-tools/pom.xml
@@ -33,10 +33,6 @@
   bundle
   Apache OpenNLP Tools
 
-  
--Xmx4096m
-  
-
   
 
   org.osgi
@@ -81,7 +77,6 @@
 org.apache.maven.plugins
 maven-surefire-plugin
 
-  @{argLine}
   
 /opennlp/tools/eval/**/*
   



[12/50] [abbrv] opennlp git commit: NoJira: Fix badge rendering

2017-04-16 Thread joern
NoJira: Fix badge rendering


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/a2049d6f
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/a2049d6f
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/a2049d6f

Branch: refs/heads/parser_regression
Commit: a2049d6fa4715b681a8b1ab7fb70a2c8923f8975
Parents: 6ecc17e
Author: smarthi 
Authored: Tue Feb 7 22:51:13 2017 -0500
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:52 2017 +0200

--
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/a2049d6f/README.md
--
diff --git a/README.md b/README.md
index 2d31eb1..02b146a 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ Welcome to Apache OpenNLP!
 
 [![Build 
Status](https://api.travis-ci.org/apache/opennlp.svg?branch=master)](https://travis-ci.org/apache/opennlp)
 [![Coverage 
Status](https://coveralls.io/repos/github/apache/opennlp/badge.svg?branch=master)](https://coveralls.io/github/apache/opennlp?branch=master)
-[![Maven 
Central](https://maven-badges.herokuapp.com/maven-central/org.apache.opennlp/opennlp/badge.svg?style=plastic])](https://maven-badges.herokuapp.com/maven-central/org.apache.opennlp/opennlp)
+[![Maven 
Central](https://maven-badges.herokuapp.com/maven-central/org.apache.opennlp/opennlp/badge.svg?style=plastic)](https://maven-badges.herokuapp.com/maven-central/org.apache.opennlp/opennlp)
 [![Documentation 
Status](https://img.shields.io/:docs-latest-green.svg)](http://opennlp.apache.org/documentation.html)
 [![GitHub 
license](https://img.shields.io/badge/license-Apache%202-blue.svg)](https://raw.githubusercontent.com/apache/opennlp/master/LICENSE)
 [![Twitter 
Follow](https://img.shields.io/twitter/follow/ApacheOpennlp.svg?style=social)](https://twitter.com/ApacheOpenNLP)



[16/50] [abbrv] opennlp git commit: OPENNLP-990 Fix all array style violations and add a checkstyle rule

2017-04-16 Thread joern
http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/parser/PosSampleStream.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/parser/PosSampleStream.java 
b/opennlp-tools/src/main/java/opennlp/tools/parser/PosSampleStream.java
index 1e90ecc..259d9f4 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/PosSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/PosSampleStream.java
@@ -37,8 +37,8 @@ public class PosSampleStream extends 
FilterObjectStream {
 
   Parse[] nodes = parse.getTagNodes();
 
-  String toks[] = new String[nodes.length];
-  String preds[] = new String[nodes.length];
+  String[] toks = new String[nodes.length];
+  String[] preds = new String[nodes.length];
 
   for (int ti = 0; ti < nodes.length; ti++) {
 Parse tok = nodes[ti];

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java 
b/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
index f103450..5f5eb25 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
@@ -99,7 +99,7 @@ public class POSDictionary implements Iterable, 
MutableTagDictionary {
 return dictionary.keySet().iterator();
   }
 
-  private static String tagsToString(String tags[]) {
+  private static String tagsToString(String[] tags) {
 
 StringBuilder tagString = new StringBuilder();
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/postag/POSEvaluator.java
--
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSEvaluator.java 
b/opennlp-tools/src/main/java/opennlp/tools/postag/POSEvaluator.java
index 26cb79c..eaf6baf 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSEvaluator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSEvaluator.java
@@ -57,8 +57,8 @@ public class POSEvaluator extends Evaluator {
   @Override
   protected POSSample processSample(POSSample reference) {
 
-String predictedTags[] = tagger.tag(reference.getSentence(), 
reference.getAddictionalContext());
-String referenceTags[] = reference.getTags();
+String[] predictedTags = tagger.tag(reference.getSentence(), 
reference.getAddictionalContext());
+String[] referenceTags = reference.getTags();
 
 for (int i = 0; i < referenceTags.length; i++) {
   if (referenceTags[i].equals(predictedTags[i])) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSample.java
--
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSSample.java 
b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSample.java
index b1b2d32..9512e38 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSSample.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSample.java
@@ -37,7 +37,7 @@ public class POSSample {
 
   private final String[][] additionalContext;
 
-  public POSSample(String sentence[], String tags[]) {
+  public POSSample(String[] sentence, String[] tags) {
 this(sentence, tags, null);
   }
 
@@ -66,7 +66,7 @@ public class POSSample {
 this.additionalContext = ac;
   }
 
-  public POSSample(String sentence[], String tags[],
+  public POSSample(String[] sentence, String[] tags,
   String[][] additionalContext) {
 this(Arrays.asList(sentence), Arrays.asList(tags), additionalContext);
   }
@@ -120,10 +120,10 @@ public class POSSample {
 
   public static POSSample parse(String sentenceString) throws 
InvalidFormatException {
 
-String tokenTags[] = WhitespaceTokenizer.INSTANCE.tokenize(sentenceString);
+String[] tokenTags = WhitespaceTokenizer.INSTANCE.tokenize(sentenceString);
 
-String sentence[] = new String[tokenTags.length];
-String tags[] = new String[tokenTags.length];
+String[] sentence = new String[tokenTags.length];
+String[] tags = new String[tokenTags.length];
 
 for (int i = 0; i < tokenTags.length; i++) {
   int split = tokenTags[i].lastIndexOf("_");

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleEventStream.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleEventStream.java 
b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleEventStream.java
index 4dd31e0..aa3c99d 100644
--- 

[27/50] [abbrv] opennlp git commit: NoJira: Run jacoco during build and not afterwards

2017-04-16 Thread joern
NoJira: Run jacoco during build and not afterwards


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/96107813
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/96107813
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/96107813

Branch: refs/heads/parser_regression
Commit: 9610781359e2ffb9a5f09b0c94f3a56a24ca78bc
Parents: 40cdacb
Author: Jörn Kottmann 
Authored: Mon Feb 20 14:22:56 2017 +0100
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:54 2017 +0200

--
 .travis.yml | 4 ++--
 pom.xml | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/96107813/.travis.yml
--
diff --git a/.travis.yml b/.travis.yml
index 49d902e..b3399b2 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -20,7 +20,7 @@ before_install:
   - export M2_HOME=$PWD/apache-maven-3.3.9
   - export PATH=$M2_HOME/bin:$PATH
 
-script: mvn clean install
+script: mvn clean install -Pjacoco
 
 after_success:
-  - mvn clean test -Pjacoco jacoco:report coveralls:report
+  - mvn jacoco:report coveralls:report

http://git-wip-us.apache.org/repos/asf/opennlp/blob/96107813/pom.xml
--
diff --git a/pom.xml b/pom.xml
index 8e37452..45d3c37 100644
--- a/pom.xml
+++ b/pom.xml
@@ -214,9 +214,6 @@
org.eluder.coveralls

coveralls-maven-plugin

${coveralls.maven.plugin}
-   
-   
BD8e0j90KZlQdko7H3wEo5a0mTLhmoeyk
-   

 

@@ -397,6 +394,9 @@
 

jacoco
+   
+   1
+   






[02/50] [abbrv] opennlp git commit: NoJira: Fix Coveralls Report, this closes apache/opennlp#116

2017-04-16 Thread joern
NoJira: Fix Coveralls Report, this closes apache/opennlp#116


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/6ecc17e8
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/6ecc17e8
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/6ecc17e8

Branch: refs/heads/parser_regression
Commit: 6ecc17e88b096cd7a12f65b869d9ce6a9444727e
Parents: 6f33261
Author: smarthi 
Authored: Tue Feb 7 22:46:28 2017 -0500
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:51 2017 +0200

--
 README.md |  5 ++---
 opennlp-tools/pom.xml | 10 +++---
 2 files changed, 9 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/6ecc17e8/README.md
--
diff --git a/README.md b/README.md
index faff141..2d31eb1 100644
--- a/README.md
+++ b/README.md
@@ -20,10 +20,9 @@ Welcome to Apache OpenNLP!
 
 [![Build 
Status](https://api.travis-ci.org/apache/opennlp.svg?branch=master)](https://travis-ci.org/apache/opennlp)
 [![Coverage 
Status](https://coveralls.io/repos/github/apache/opennlp/badge.svg?branch=master)](https://coveralls.io/github/apache/opennlp?branch=master)
+[![Maven 
Central](https://maven-badges.herokuapp.com/maven-central/org.apache.opennlp/opennlp/badge.svg?style=plastic])](https://maven-badges.herokuapp.com/maven-central/org.apache.opennlp/opennlp)
 [![Documentation 
Status](https://img.shields.io/:docs-latest-green.svg)](http://opennlp.apache.org/documentation.html)
 [![GitHub 
license](https://img.shields.io/badge/license-Apache%202-blue.svg)](https://raw.githubusercontent.com/apache/opennlp/master/LICENSE)
-[![GitHub 
forks](https://img.shields.io/github/forks/apache/opennlp.svg)](https://github.com/apache/opennlp/network)
-[![GitHub 
stars](https://img.shields.io/github/stars/apache/opennlp.svg)](https://github.com/apache/opennlp/stargazers)
 [![Twitter 
Follow](https://img.shields.io/twitter/follow/ApacheOpennlp.svg?style=social)](https://twitter.com/ApacheOpenNLP)
 
 The Apache OpenNLP library is a machine learning based toolkit for the 
processing of natural language text.
@@ -38,7 +37,7 @@ well as the annotated text resources that those models are 
derived from.
 
 For additional information about OpenNLP, visit the [OpenNLP Home 
Page](http://opennlp.apache.org/)
 
-Documentation for OpenNLP, including JavaDocs, code usage and command line 
interface are available[here](http://opennlp.apache.org/documentation.html)
+Documentation for OpenNLP, including JavaDocs, code usage and command line 
interface are available [here](http://opennlp.apache.org/documentation.html)
 
 Using OpenNLP as a Library
 Running any application that uses OpenNLP will require installing a binary or 
source version and setting the environment.

http://git-wip-us.apache.org/repos/asf/opennlp/blob/6ecc17e8/opennlp-tools/pom.xml
--
diff --git a/opennlp-tools/pom.xml b/opennlp-tools/pom.xml
index 22fc017..c7e9624 100644
--- a/opennlp-tools/pom.xml
+++ b/opennlp-tools/pom.xml
@@ -33,6 +33,10 @@
   bundle
   Apache OpenNLP Tools
 
+  
+-Xmx4096m
+  
+
   
 
   org.osgi
@@ -41,7 +45,7 @@
   provided
   true
 
-
+
 
   org.osgi
   org.osgi.compendium
@@ -49,7 +53,7 @@
   provided
   true
 
-
+
 
   junit
   junit
@@ -77,7 +81,7 @@
 org.apache.maven.plugins
 maven-surefire-plugin
 
-  -Xmx4096m
+  @{argLine}
   
 /opennlp/tools/eval/**/*
   



[49/50] [abbrv] opennlp git commit: OPENNLP-1016: Add more tests for StringList

2017-04-16 Thread joern
OPENNLP-1016: Add more tests for StringList


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/5eb8ff8d
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/5eb8ff8d
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/5eb8ff8d

Branch: refs/heads/parser_regression
Commit: 5eb8ff8deb442ece89fad5f14368c6cbe10772d4
Parents: a1ced40
Author: koji 
Authored: Mon Apr 10 14:16:34 2017 +0900
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:57 2017 +0200

--
 .../test/java/opennlp/tools/util/StringListTest.java | 15 ++-
 1 file changed, 14 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/5eb8ff8d/opennlp-tools/src/test/java/opennlp/tools/util/StringListTest.java
--
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/StringListTest.java 
b/opennlp-tools/src/test/java/opennlp/tools/util/StringListTest.java
index a57a2ae..d8c7ca2 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/util/StringListTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/StringListTest.java
@@ -28,6 +28,16 @@ import org.junit.Test;
 public class StringListTest {
 
   /**
+   * Tests {@link StringList} which uses {@link String#intern}.
+   */
+  @Test
+  public void testIntern() {
+StringList l1 = new StringList("a");
+StringList l2 = new StringList("a", "b");
+Assert.assertTrue(l1.getToken(0) == l2.getToken(0));
+  }
+
+  /**
* Tests {@link StringList#getToken(int)}.
*/
   @Test
@@ -90,6 +100,8 @@ public class StringListTest {
   public void testHashCode() {
 Assert.assertEquals(new StringList("a", "b").hashCode(),
 new StringList("a", "b").hashCode());
+Assert.assertNotEquals(new StringList("a", "b").hashCode(),
+new StringList("a", "c").hashCode());
   }
 
   /**
@@ -97,6 +109,7 @@ public class StringListTest {
*/
   @Test
   public void testToString() {
-new StringList("a", "b").toString();
+Assert.assertEquals("[a]", new StringList("a").toString());
+Assert.assertEquals("[a,b]", new StringList("a", "b").toString());
   }
 }



[19/50] [abbrv] opennlp git commit: NoJira: Adding public RepoToken to investigate Travis coveralls build failures, this closes apache/opennlp#128

2017-04-16 Thread joern
NoJira: Adding public RepoToken to investigate Travis coveralls build failures, 
this closes apache/opennlp#128


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/fc3b12fa
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/fc3b12fa
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/fc3b12fa

Branch: refs/heads/parser_regression
Commit: fc3b12fa42c58b1065a62150cb4831ad56ec
Parents: 41f153a
Author: smarthi 
Authored: Thu Feb 16 00:17:53 2017 -0500
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:53 2017 +0200

--
 .travis.yml | 6 ++
 pom.xml | 3 +++
 2 files changed, 9 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/fc3b12fa/.travis.yml
--
diff --git a/.travis.yml b/.travis.yml
index b4c83ad..49d902e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,6 +4,12 @@ jdk: oraclejdk8
 
 sudo: false
 
+env:
+  global:
+   # The next declaration is the encrypted COVERITY_SCAN_TOKEN, created
+   # via the "travis encrypt" command using the project repo's public key
+   - secure: 
"WLRKO/tD2rFN+a/HKSf9iZkaMaFE8/luXcJCXGfewoHysF7LgIJ76AN9HY50woVJykl+T/tEhK5c/+H/IKO5zH8Rvz/Q9XxPTvUTOwH7oFOHCQ66mKTvn27Z4fp+JbkPKJuhWDUzPvS/Alo3wE70UELnFRTFoRsemfNNa95uPJobfx5deOfX80mipHOn16dA1q8LuzQa6iF2HIVuh7ygLleTV0cDJyXmIlg3EbKGEitozIv0WkwALrBjLS7KmCcXTKxXqCm1Be2MFRoh9ab2bEooXlv2zRh2wT0c04RckFm1AJGpGQelXLl3NxxcRJSpIN9OTkpVUfwm28TIXk2SzdgPMrP11yFK/DPKTv0jwyk1bFrmZMMso5Y2rP6wjNEtw5ExYSpk3xebcieLJwXhCwkkWAT3DdAAeXO5z4Nf36lryjRgqvlsVF1ofqAK5Sh+qH93/TJOE+hVEj74xUT9pVaxemY61ymvSt8L21XkUsp8T5ILq9jWoaMQCaAwZIaJiHXYjQhmsrFRkNaY4cl9AUGwpHmm750uqhmoVfuJzQg5/vGMZ0LWeCgR9qsG5MG0yijE8ghExUOe7R4gcNAJW2XOfjzMTy74jdsJbsJPUeci/R4wzrXTSCQVJ5nj2LhBF6HyqPyUrIV2MB14gAIItc1LASuB1GLkGoXjIdt0HN8="
+
 cache:
   directories:
 - $HOME/.m2

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fc3b12fa/pom.xml
--
diff --git a/pom.xml b/pom.xml
index 268a54e..8e37452 100644
--- a/pom.xml
+++ b/pom.xml
@@ -214,6 +214,9 @@
org.eluder.coveralls

coveralls-maven-plugin

${coveralls.maven.plugin}
+   
+   
BD8e0j90KZlQdko7H3wEo5a0mTLhmoeyk
+   

 




[21/50] [abbrv] opennlp git commit: NoJira: Add checkstyle for new lines and fix existing files

2017-04-16 Thread joern
NoJira: Add checkstyle for new lines and fix existing files

This closes #123


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/73cf5600
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/73cf5600
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/73cf5600

Branch: refs/heads/parser_regression
Commit: 73cf5600206d225f46c17797793f2e2b77e7d422
Parents: 91352d5
Author: jzonthemtn 
Authored: Mon Feb 13 15:20:30 2017 -0500
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:53 2017 +0200

--
 checkstyle.xml| 2 ++
 .../java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java | 3 ++-
 .../src/main/java/opennlp/tools/chunker/package-info.java | 2 +-
 .../main/java/opennlp/tools/cmdline/AbstractEvaluatorTool.java| 2 +-
 .../src/main/java/opennlp/tools/cmdline/BasicCmdLineTool.java | 2 +-
 .../src/main/java/opennlp/tools/cmdline/ObjectStreamFactory.java  | 2 +-
 .../java/opennlp/tools/cmdline/chunker/ChunkerConverterTool.java  | 2 +-
 .../java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java   | 2 +-
 .../main/java/opennlp/tools/cmdline/lemmatizer/package-info.java  | 2 +-
 .../main/java/opennlp/tools/cmdline/params/BasicFormatParams.java | 2 +-
 .../tools/cmdline/sentdetect/SentenceDetectorConverterTool.java   | 2 +-
 .../opennlp/tools/cmdline/tokenizer/TokenizerConverterTool.java   | 2 +-
 .../src/main/java/opennlp/tools/dictionary/package-info.java  | 2 +-
 .../src/main/java/opennlp/tools/doccat/package-info.java  | 2 +-
 .../java/opennlp/tools/formats/AbstractSampleStreamFactory.java   | 2 +-
 .../java/opennlp/tools/formats/ChunkerSampleStreamFactory.java| 2 +-
 .../opennlp/tools/formats/DetokenizerSampleStreamFactory.java | 2 +-
 .../java/opennlp/tools/formats/LemmatizerSampleStreamFactory.java | 2 +-
 .../main/java/opennlp/tools/formats/ParseSampleStreamFactory.java | 2 +-
 .../java/opennlp/tools/formats/SentenceSampleStreamFactory.java   | 2 +-
 .../main/java/opennlp/tools/formats/TokenSampleStreamFactory.java | 2 +-
 .../tools/formats/ontonotes/OntoNotesNameSampleStream.java| 2 +-
 .../src/main/java/opennlp/tools/formats/package-info.java | 2 +-
 .../src/main/java/opennlp/tools/languagemodel/package-info.java   | 2 +-
 .../src/main/java/opennlp/tools/lemmatizer/package-info.java  | 2 +-
 .../java/opennlp/tools/namefind/NameFinderSequenceValidator.java  | 2 +-
 .../src/main/java/opennlp/tools/namefind/package-info.java| 2 +-
 opennlp-tools/src/main/java/opennlp/tools/ngram/package-info.java | 2 +-
 opennlp-tools/src/main/java/opennlp/tools/package-info.java   | 2 +-
 opennlp-tools/src/main/java/opennlp/tools/parser/HeadRules.java   | 2 +-
 .../java/opennlp/tools/parser/ParserChunkerSequenceValidator.java | 2 +-
 .../src/main/java/opennlp/tools/parser/ParserEventTypeEnum.java   | 2 +-
 .../src/main/java/opennlp/tools/parser/chunking/package-info.java | 2 +-
 .../src/main/java/opennlp/tools/parser/package-info.java  | 2 +-
 .../java/opennlp/tools/parser/treeinsert/ParserEventStream.java   | 2 +-
 .../main/java/opennlp/tools/parser/treeinsert/package-info.java   | 2 +-
 .../src/main/java/opennlp/tools/postag/package-info.java  | 2 +-
 .../src/main/java/opennlp/tools/sentdetect/package-info.java  | 2 +-
 .../src/main/java/opennlp/tools/tokenize/package-info.java| 2 +-
 .../src/main/java/opennlp/tools/util/CollectionObjectStream.java  | 2 +-
 .../src/main/java/opennlp/tools/util/InputStreamFactory.java  | 2 +-
 .../src/main/java/opennlp/tools/util/ext/package-info.java| 2 +-
 .../opennlp/tools/util/featuregen/BigramNameFeatureGenerator.java | 2 +-
 .../src/main/java/opennlp/tools/util/featuregen/package-info.java | 2 +-
 .../main/java/opennlp/tools/util/model/DictionarySerializer.java  | 2 +-
 .../main/java/opennlp/tools/util/model/PropertiesSerializer.java  | 2 +-
 opennlp-tools/src/main/java/opennlp/tools/util/package-info.java  | 2 +-
 .../src/test/java/opennlp/tools/dictionary/DictionaryTest.java| 2 +-
 .../java/opennlp/tools/formats/Conll03NameSampleStreamTest.java   | 2 +-
 .../test/java/opennlp/tools/ml/maxent/ScaleDoesntMatterTest.java  | 2 +-
 .../opennlp/tools/ml/maxent/io/RealValueFileEventStreamTest.java  | 2 +-
 .../opennlp/tools/ml/naivebayes/NaiveBayesModelReadWriteTest.java | 2 +-
 .../java/opennlp/tools/namefind/DictionaryNameFinderTest.java | 2 +-
 .../src/test/java/opennlp/tools/namefind/RegexNameFinderTest.java | 2 +-
 .../src/test/java/opennlp/tools/postag/DummyPOSTaggerFactory.java | 2 +-
 .../src/test/java/opennlp/tools/postag/POSModelTest.java  | 2 +-
 .../src/test/java/opennlp/tools/postag/POSSampleTest.java | 2 +-
 .../src/test/java/opennlp/tools/tokenize/SimpleTokenizerTest.java | 2 +-
 

[15/50] [abbrv] opennlp git commit: OPENNLP-983: Make suffix/prefix length configurable

2017-04-16 Thread joern
OPENNLP-983: Make suffix/prefix length configurable

This closes #121


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/1cd2658d
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/1cd2658d
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/1cd2658d

Branch: refs/heads/parser_regression
Commit: 1cd2658d0179afcf982229fab9c24da62f944c58
Parents: fc3b12f
Author: jzonthemtn 
Authored: Mon Feb 13 07:57:21 2017 -0500
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:53 2017 +0200

--
 .../tools/util/featuregen/GeneratorFactory.java | 22 -
 .../util/featuregen/PrefixFeatureGenerator.java | 32 +--
 .../util/featuregen/SuffixFeatureGenerator.java | 33 +--
 .../featuregen/PrefixFeatureGeneratorTest.java  | 92 
 .../featuregen/SuffixFeatureGeneratorTest.java  | 92 
 5 files changed, 251 insertions(+), 20 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/1cd2658d/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
 
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
index fa97f43..ef08cfb 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
@@ -555,7 +555,16 @@ public class GeneratorFactory {
 
 public AdaptiveFeatureGenerator create(Element generatorElement,
 FeatureGeneratorResourceProvider resourceManager) {
-  return new PrefixFeatureGenerator();
+
+  String attribute = generatorElement.getAttribute("length");
+
+  int prefixLength = PrefixFeatureGenerator.DEFAULT_MAX_LENGTH;
+
+  if (!Objects.equals(attribute, "")) {
+prefixLength = Integer.parseInt(attribute);
+  }
+
+  return new PrefixFeatureGenerator(prefixLength);
 }
 
 static void register(Map factoryMap) {
@@ -570,7 +579,16 @@ public class GeneratorFactory {
 
 public AdaptiveFeatureGenerator create(Element generatorElement,
 FeatureGeneratorResourceProvider resourceManager) {
-  return new SuffixFeatureGenerator();
+
+  String attribute = generatorElement.getAttribute("length");
+
+  int suffixLength = SuffixFeatureGenerator.DEFAULT_MAX_LENGTH;
+
+  if (!Objects.equals(attribute, "")) {
+suffixLength = Integer.parseInt(attribute);
+  }
+
+  return new SuffixFeatureGenerator(suffixLength);
 }
 
 static void register(Map factoryMap) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/1cd2658d/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java
 
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java
index 8cdd48f..04fcd15 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java
@@ -21,21 +21,35 @@ import java.util.List;
 
 public class PrefixFeatureGenerator implements AdaptiveFeatureGenerator {
 
-  private static final int PREFIX_LENGTH = 4;
-
-  private static String[] getPrefixes(String lex) {
-String[] prefs = new String[PREFIX_LENGTH];
-for (int li = 0; li < PREFIX_LENGTH; li++) {
-  prefs[li] = lex.substring(0, Math.min(li + 1, lex.length()));
-}
-return prefs;
+  static final int DEFAULT_MAX_LENGTH = 4;
+  
+  private final int prefixLength;
+  
+  public PrefixFeatureGenerator() {
+prefixLength = DEFAULT_MAX_LENGTH;
+  }
+  
+  public PrefixFeatureGenerator(int prefixLength) {
+this.prefixLength = prefixLength;
   }
 
+  @Override
   public void createFeatures(List features, String[] tokens, int index,
   String[] previousOutcomes) {
-String[] prefs = PrefixFeatureGenerator.getPrefixes(tokens[index]);
+String[] prefs = getPrefixes(tokens[index]);
 for (String pref : prefs) {
   features.add("pre=" + pref);
 }
   }
+  
+  private String[] getPrefixes(String lex) {
+  
+int prefixes = Math.min(prefixLength, lex.length());
+
+String[] prefs = new String[prefixes];
+for (int li = 0; li < prefixes; li++) {
+  prefs[li] = lex.substring(0, Math.min(li + 1, lex.length()));
+}
+return prefs;

[33/50] [abbrv] opennlp git commit: OPENNLP-997: Exclude the generated stemmer code from the coverage report, this closes apache/opennlp#135

2017-04-16 Thread joern
OPENNLP-997: Exclude the generated stemmer code from the coverage report, this 
closes apache/opennlp#135


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/20d0a76f
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/20d0a76f
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/20d0a76f

Branch: refs/heads/parser_regression
Commit: 20d0a76fe092993c25abf7aa3dfce34bcb72db5f
Parents: 76609f5
Author: smarthi 
Authored: Tue Feb 28 08:28:05 2017 -0500
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:55 2017 +0200

--
 pom.xml | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/20d0a76f/pom.xml
--
diff --git a/pom.xml b/pom.xml
index 45d3c37..12c9ee6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -126,7 +126,7 @@
2.17
1.0C
4.3.0
-   0.7.8
+   0.7.9
2.19.1

 
@@ -185,7 +185,13 @@

org.jacoco

jacoco-maven-plugin
-   0.7.8
+   
${jacoco.maven.plugin}
+   
+   
+**/stemmer/*
+   
**/stemmer/snowball/*
+
+   



jacoco-prepare-agent
@@ -222,6 +228,10 @@

${maven.surefire.plugin}


${opennlp.forkCount}
+   
+   
**/stemmer/*
+   
**/stemmer/snowball/*
+   


 



[20/50] [abbrv] opennlp git commit: OPENNLP-986 - Stupid Backoff as default LM discounting

2017-04-16 Thread joern
OPENNLP-986 - Stupid Backoff as default LM discounting


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/41f153aa
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/41f153aa
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/41f153aa

Branch: refs/heads/parser_regression
Commit: 41f153aa946641afb076b86c243a7b93667778f6
Parents: 73cf560
Author: Tommaso Teofili 
Authored: Tue Feb 14 14:49:09 2017 +0100
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:53 2017 +0200

--
 .../tools/languagemodel/NGramLanguageModel.java | 74 +---
 .../java/opennlp/tools/ngram/NGramUtils.java|  3 +-
 .../LanguageModelEvaluationTest.java|  2 +-
 .../languagemodel/NgramLanguageModelTest.java   | 15 ++--
 4 files changed, 28 insertions(+), 66 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/41f153aa/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java
 
b/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java
index e11c107..501c1bc 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java
@@ -26,52 +26,30 @@ import opennlp.tools.util.StringList;
 
 /**
  * A {@link opennlp.tools.languagemodel.LanguageModel} based on a {@link 
opennlp.tools.ngram.NGramModel}
- * using Laplace smoothing probability estimation to get the probabilities of 
the ngrams.
- * See also {@link NGramUtils#calculateLaplaceSmoothingProbability(
- *opennlp.tools.util.StringList, Iterable, int, Double)}.
+ * using Stupid Backoff to get the probabilities of the ngrams.
  */
 public class NGramLanguageModel extends NGramModel implements LanguageModel {
 
   private static final int DEFAULT_N = 3;
-  private static final double DEFAULT_K = 1d;
 
   private final int n;
-  private final double k;
 
   public NGramLanguageModel() {
-this(DEFAULT_N, DEFAULT_K);
+this(DEFAULT_N);
   }
 
   public NGramLanguageModel(int n) {
-this(n, DEFAULT_K);
-  }
-
-  public NGramLanguageModel(double k) {
-this(DEFAULT_N, k);
-  }
-
-  public NGramLanguageModel(int n, double k) {
 this.n = n;
-this.k = k;
   }
 
   public NGramLanguageModel(InputStream in) throws IOException {
-this(in, DEFAULT_N, DEFAULT_K);
-  }
-
-  public NGramLanguageModel(InputStream in, double k) throws IOException {
-this(in, DEFAULT_N, k);
-  }
-
-  public NGramLanguageModel(InputStream in, int n) throws IOException {
-this(in, n, DEFAULT_K);
+this(in, DEFAULT_N);
   }
 
-  public NGramLanguageModel(InputStream in, int n, double k)
+  public NGramLanguageModel(InputStream in, int n)
   throws IOException {
 super(in);
 this.n = n;
-this.k = k;
   }
 
   @Override
@@ -79,24 +57,13 @@ public class NGramLanguageModel extends NGramModel 
implements LanguageModel {
 double probability = 0d;
 if (size() > 0) {
   for (StringList ngram : NGramUtils.getNGrams(sample, n)) {
-StringList nMinusOneToken = NGramUtils
-.getNMinusOneTokenFirst(ngram);
-if (size() > 100) {
-  // use stupid backoff
-  probability += Math.log(
-  getStupidBackoffProbability(ngram, nMinusOneToken));
-} else {
-  // use laplace smoothing
-  probability += Math.log(
-  getLaplaceSmoothingProbability(ngram, nMinusOneToken));
+double score = stupidBackoff(ngram);
+probability += Math.log(score);
+if (Double.isNaN(probability)) {
+  probability = 0d;
 }
   }
-  if (Double.isNaN(probability)) {
-probability = 0d;
-  } else if (probability != 0) {
-probability = Math.exp(probability);
-  }
-
+  probability = Math.exp(probability);
 }
 return probability;
   }
@@ -125,24 +92,21 @@ public class NGramLanguageModel extends NGramModel 
implements LanguageModel {
 return token;
   }
 
-  private double getLaplaceSmoothingProbability(StringList ngram,
-StringList nMinusOneToken) {
-return (getCount(ngram) + k) / (getCount(nMinusOneToken) + k * size());
-  }
-
-  private double getStupidBackoffProbability(StringList ngram,
- StringList nMinusOneToken) {
+  private double stupidBackoff(StringList ngram) {
 int count = getCount(ngram);
+StringList nMinusOneToken = NGramUtils.getNMinusOneTokenFirst(ngram);
 if (nMinusOneToken == null || nMinusOneToken.size() 

[41/50] [abbrv] opennlp git commit: OPENNLP-1004: Write a test case for the BilouCodec class

2017-04-16 Thread joern
OPENNLP-1004: Write a test case for the BilouCodec class

This closes #142


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/bc99b72f
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/bc99b72f
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/bc99b72f

Branch: refs/heads/parser_regression
Commit: bc99b72feaa8e3416feaa5df59c6198bfe7dbd8b
Parents: 5c9f6ab
Author: Peter Thygesen 
Authored: Wed Mar 15 18:54:26 2017 +0100
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:56 2017 +0200

--
 .../opennlp/tools/namefind/BilouCodecTest.java  | 209 +++
 1 file changed, 209 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/bc99b72f/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java
--
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java 
b/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java
new file mode 100644
index 000..96d939f
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java
@@ -0,0 +1,209 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.namefind;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.util.Span;
+
+/**
+ * This is the test class for {@link BilouCodec}.
+ */
+public class BilouCodecTest {
+
+  private static final BilouCodec codec = new BilouCodec();
+
+  private static final String A_TYPE = "atype";
+  private static final String A_START = A_TYPE + "-" + BilouCodec.START;
+  private static final String A_CONTINUE = A_TYPE + "-" + BilouCodec.CONTINUE;
+  private static final String A_LAST = A_TYPE + "-" + BilouCodec.LAST;
+  private static final String A_UNIT = A_TYPE + "-" + BilouCodec.UNIT;
+
+  private static final String B_TYPE = "btype";
+  private static final String B_START = B_TYPE + "-" + BilouCodec.START;
+  private static final String B_CONTINUE = B_TYPE + "-" + BilouCodec.CONTINUE;
+  private static final String B_LAST = B_TYPE + "-" + BilouCodec.LAST;
+  private static final String B_UNIT = B_TYPE + "-" + BilouCodec.UNIT;
+
+  private static final String C_TYPE = "ctype";
+  private static final String C_UNIT = C_TYPE + "-" + BilouCodec.UNIT;
+
+  private static final String OTHER = BilouCodec.OTHER;
+
+  @Test
+  public void testEncodeNoNames() {
+NameSample nameSample = new NameSample("Once upon a time.".split(" "), new 
Span[] {}, true);
+String[] expected = new String[] {OTHER, OTHER, OTHER, OTHER};
+String[] acutal = codec.encode(nameSample.getNames(), 
nameSample.getSentence().length);
+Assert.assertArrayEquals("Only 'Other' is expected.", expected, acutal);
+  }
+
+  @Test
+  public void testEncodeSingleUnitTokenSpan() {
+String[] sentence = "I called Julie again.".split(" ");
+Span[] singleSpan = new Span[] { new Span(2,3, A_TYPE)};
+NameSample nameSample = new NameSample(sentence, singleSpan, true);
+String[] expected = new String[] {OTHER, OTHER, A_UNIT, OTHER};
+String[] acutal = codec.encode(nameSample.getNames(), 
nameSample.getSentence().length);
+Assert.assertArrayEquals("'Julie' should be 'unit' only, the rest should 
be 'other'.", expected, acutal);
+  }
+
+  @Test
+  public void testEncodeDoubleTokenSpan() {
+String[] sentence = "I saw Stefanie Schmidt today.".split(" ");
+Span[] singleSpan = new Span[] { new Span(2,4, A_TYPE)};
+NameSample nameSample = new NameSample(sentence, singleSpan, true);
+String[] expected = new String[] {OTHER, OTHER, A_START, A_LAST, OTHER};
+String[] acutal = codec.encode(nameSample.getNames(), 
nameSample.getSentence().length);
+Assert.assertArrayEquals("'Stefanie' should be 'start' only, 'Schmidt' is 
'last' " +
+"and the rest should be 'other'.", expected, acutal);
+  }
+
+  @Test
+  public void 

[42/50] [abbrv] opennlp git commit: OPENNLP-1005: Implement areOutcomesCompatible for BilouCodec

2017-04-16 Thread joern
OPENNLP-1005: Implement areOutcomesCompatible for BilouCodec

This issue closes #144


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/36de0131
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/36de0131
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/36de0131

Branch: refs/heads/parser_regression
Commit: 36de0131947d98e8246ef0fcf8eaf56d546d27b4
Parents: bc99b72
Author: Peter Thygesen 
Authored: Thu Mar 16 11:19:58 2017 +0100
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:56 2017 +0200

--
 .../java/opennlp/tools/namefind/BilouCodec.java |  61 +++
 .../opennlp/tools/namefind/BilouCodecTest.java  | 375 +++
 2 files changed, 436 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/36de0131/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java
--
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java 
b/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java
index 7e8508a..50cc4bf 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java
@@ -19,7 +19,9 @@ package opennlp.tools.namefind;
 
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
 
 import opennlp.tools.util.SequenceCodec;
 import opennlp.tools.util.SequenceValidator;
@@ -111,8 +113,67 @@ public class BilouCodec implements SequenceCodec {
 return new BilouNameFinderSequenceValidator();
   }
 
+  /**
+   * B requires CL or L
+   * C requires BL
+   * L requires B
+   * O requires any valid combo/unit
+   * U requires none
+   *
+   * @param outcomes all possible model outcomes
+   *
+   * @return true, if model outcomes are compatible
+   */
   @Override
   public boolean areOutcomesCompatible(String[] outcomes) {
+Set start = new HashSet<>();
+Set cont = new HashSet<>();
+Set last = new HashSet<>();
+Set unit = new HashSet<>();
+
+for (int i = 0; i < outcomes.length; i++) {
+  String outcome = outcomes[i];
+  if (outcome.endsWith(BilouCodec.START)) {
+start.add(outcome.substring(0, outcome.length()
+- BilouCodec.START.length()));
+  } else if (outcome.endsWith(BilouCodec.CONTINUE)) {
+cont.add(outcome.substring(0, outcome.length()
+- BilouCodec.CONTINUE.length()));
+  } else if (outcome.endsWith(BilouCodec.LAST)) {
+last.add(outcome.substring(0, outcome.length()
+- BilouCodec.LAST.length()));
+  } else if (outcome.endsWith(BilouCodec.UNIT)) {
+unit.add(outcome.substring(0, outcome.length()
+- BilouCodec.UNIT.length()));
+  } else if (!outcome.equals(BilouCodec.OTHER)) {
+return false;
+  }
+}
+
+if (start.size() == 0 && unit.size() == 0) {
+  return false;
+} else {
+  // Start, must have matching Last
+  for (String startPrefix : start) {
+if (!last.contains(startPrefix)) {
+  return false;
+}
+  }
+  // Cont, must have matching Start and Last
+  for (String contPrefix : cont) {
+if (!start.contains(contPrefix) && !last.contains(contPrefix)) {
+  return false;
+}
+  }
+  // Last, must have matching Start
+  for (String lastPrefix : last) {
+if (!start.contains(lastPrefix)) {
+  return false;
+}
+  }
+
+}
+
 return true;
   }
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/36de0131/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java
--
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java 
b/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java
index 96d939f..353c7e4 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java
@@ -206,4 +206,379 @@ public class BilouCodecTest {
 Assert.assertArrayEquals(expected, actual);
   }
 
+
+  @Test
+  public void testCompatibilityEmpty() {
+Assert.assertFalse(codec.areOutcomesCompatible(new String[] {}));
+  }
+
+  /**
+   * Singles and singles in combination with other valid type (unit/start+last)
+   */
+
+  /**
+   * B-Start => Fail
+   * A-Unit, B-Start => Fail
+   * A-Start, A-Last, B-Start => Fail
+   */
+  @Test
+  public void testCompatibilitySinglesStart() {
+Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_START}));
+Assert.assertFalse(codec.areOutcomesCompatible(new 

[18/50] [abbrv] opennlp git commit: OPENNLP-176: Switch language codes to ISO-639-3

2017-04-16 Thread joern
OPENNLP-176: Switch language codes to ISO-639-3

This closes #114


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/eee42316
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/eee42316
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/eee42316

Branch: refs/heads/parser_regression
Commit: eee423166308c454dc176d2d52b12c29e2a08f19
Parents: fdff127
Author: Jörn Kottmann 
Authored: Sun Jan 29 11:06:08 2017 +0100
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:53 2017 +0200

--
 .../cmdline/namefind/CensusDictionaryCreatorTool.java |  2 +-
 .../opennlp/tools/cmdline/parser/ParserTrainerTool.java   |  4 ++--
 .../tools/formats/AbstractSampleStreamFactory.java|  2 +-
 .../tools/formats/Conll03NameSampleStreamFactory.java |  6 +++---
 .../main/java/opennlp/tools/sentdetect/lang/Factory.java  | 10 +-
 5 files changed, 12 insertions(+), 12 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/eee42316/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
index 6042510..f9bf5e0 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
@@ -50,7 +50,7 @@ public class CensusDictionaryCreatorTool extends 
BasicCmdLineTool {
   interface Parameters {
 
 @ParameterDescription(valueName = "code")
-@OptionalParameter(defaultValue = "en")
+@OptionalParameter(defaultValue = "eng")
 String getLang();
 
 @ParameterDescription(valueName = "charsetName")

http://git-wip-us.apache.org/repos/asf/opennlp/blob/eee42316/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
index 3a8dd5a..2709fd5 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
@@ -90,10 +90,10 @@ public final class ParserTrainerTool extends 
AbstractTrainerTool

[37/50] [abbrv] opennlp git commit: OPENNLP-1000: Add a test case for the BilouNameFinderSequenceValidator

2017-04-16 Thread joern
OPENNLP-1000: Add a test case for the BilouNameFinderSequenceValidator

This closes #139


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/5c9f6ab3
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/5c9f6ab3
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/5c9f6ab3

Branch: refs/heads/parser_regression
Commit: 5c9f6ab305ae806580f720fb738dac125e42e0b5
Parents: 17493d1
Author: Peter Thygesen 
Authored: Mon Mar 13 00:49:49 2017 +0100
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:56 2017 +0200

--
 .../BilouNameFinderSequenceValidator.java   |  23 +-
 .../BilouNameFinderSequenceValidatorTest.java   | 435 +++
 2 files changed, 449 insertions(+), 9 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/5c9f6ab3/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouNameFinderSequenceValidator.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouNameFinderSequenceValidator.java
 
b/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouNameFinderSequenceValidator.java
index 6e73504..19700fb 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouNameFinderSequenceValidator.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouNameFinderSequenceValidator.java
@@ -23,19 +23,22 @@ public class BilouNameFinderSequenceValidator implements
 SequenceValidator {
 
   public boolean validSequence(int i, String[] inputSequence,
-  String[] outcomesSequence, String outcome) {
+   String[] outcomesSequence, String outcome) {
 
-if (outcome.endsWith(NameFinderME.CONTINUE) || 
outcome.endsWith(BilouCodec.LAST)) {
+if (outcome.endsWith(BilouCodec.CONTINUE) || 
outcome.endsWith(BilouCodec.LAST)) {
 
   int li = outcomesSequence.length - 1;
 
   if (li == -1) {
 return false;
-  } else if (outcomesSequence[li].endsWith(NameFinderME.OTHER) ||
+  } else if (outcomesSequence[li].endsWith(BilouCodec.OTHER) ||
   outcomesSequence[li].endsWith(BilouCodec.UNIT)) {
 return false;
-  } else if (outcomesSequence[li].endsWith(NameFinderME.CONTINUE) ||
-  outcomesSequence[li].endsWith(NameFinderME.START)) {
+  } else if (outcomesSequence[li].endsWith(BilouCodec.LAST) &&
+  (outcome.endsWith(BilouCodec.CONTINUE) || 
outcome.endsWith(BilouCodec.LAST))) {
+return false;
+  } else if (outcomesSequence[li].endsWith(BilouCodec.CONTINUE) ||
+  outcomesSequence[li].endsWith(BilouCodec.START)) {
 // if it is continue, we have to check if previous match was of the 
same type
 String previousNameType = 
NameFinderME.extractNameType(outcomesSequence[li]);
 String nameType = NameFinderME.extractNameType(outcome);
@@ -50,10 +53,12 @@ public class BilouNameFinderSequenceValidator implements
   }
 }
 
-if (outcomesSequence.length - 1 > 0) {
-  if (outcome.endsWith(NameFinderME.OTHER)) {
-if (outcomesSequence[outcomesSequence.length - 
1].endsWith(NameFinderME.START)
-|| outcomesSequence[outcomesSequence.length - 
1].endsWith(NameFinderME.CONTINUE)) {
+if (outcomesSequence.length > 0) {
+  if (outcome.endsWith(BilouCodec.START)
+  || outcome.endsWith(BilouCodec.OTHER)
+  || outcome.endsWith(BilouCodec.UNIT)) {
+if (outcomesSequence[outcomesSequence.length - 
1].endsWith(BilouCodec.START)
+|| outcomesSequence[outcomesSequence.length - 
1].endsWith(BilouCodec.CONTINUE)) {
   return false;
 }
   }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/5c9f6ab3/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouNameFinderSequenceValidatorTest.java
--
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouNameFinderSequenceValidatorTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouNameFinderSequenceValidatorTest.java
new file mode 100644
index 000..a234beb
--- /dev/null
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouNameFinderSequenceValidatorTest.java
@@ -0,0 +1,435 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless 

[26/50] [abbrv] opennlp git commit: OPENNLP-982: Allow loading of 1.5.x models

2017-04-16 Thread joern
OPENNLP-982: Allow loading of 1.5.x models

This closes #129


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/ebb5b248
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/ebb5b248
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/ebb5b248

Branch: refs/heads/parser_regression
Commit: ebb5b24851706ec3684d4b0ddf4e35542984dfc5
Parents: eee4231
Author: Jörn Kottmann 
Authored: Fri Feb 17 12:32:11 2017 +0100
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:54 2017 +0200

--
 .../src/main/java/opennlp/tools/util/model/BaseModel.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/ebb5b248/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java 
b/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java
index 20acd9d..f70fb03 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java
@@ -416,7 +416,7 @@ public abstract class BaseModel implements 
ArtifactProvider, Serializable {
 // Major and minor version must match, revision might be
 // this check allows for the use of models of n minor release behind 
current minor release
 if (Version.currentVersion().getMajor() != version.getMajor() ||
-Version.currentVersion().getMinor() - 2 > version.getMinor()) {
+Version.currentVersion().getMinor() - 3 > version.getMinor()) {
   throw new InvalidFormatException("Model version " + version + " is 
not supported by this ("
   + Version.currentVersion() + ") version of OpenNLP!");
 }



[09/50] [abbrv] opennlp git commit: OPENNLP-984: Remove type parameter from POS Tagger Trainer cli

2017-04-16 Thread joern
OPENNLP-984: Remove type parameter from POS Tagger Trainer cli


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/672f1b09
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/672f1b09
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/672f1b09

Branch: refs/heads/parser_regression
Commit: 672f1b09dfcb1c854f930b6f1c7911e41141e1fa
Parents: cd23b58
Author: Jörn Kottmann 
Authored: Sat Feb 11 00:20:45 2017 +0100
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:52 2017 +0200

--
 .../cmdline/postag/POSTaggerTrainerTool.java| 25 
 .../tools/cmdline/postag/TrainingParams.java|  6 -
 2 files changed, 31 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/672f1b09/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java
 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java
index 4a78602..1e6fb54 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java
@@ -34,8 +34,6 @@ import opennlp.tools.postag.POSTaggerFactory;
 import opennlp.tools.postag.POSTaggerME;
 import opennlp.tools.postag.TagDictionary;
 import opennlp.tools.util.InvalidFormatException;
-import opennlp.tools.util.TrainingParameters;
-import opennlp.tools.util.model.ModelType;
 import opennlp.tools.util.model.ModelUtil;
 
 public final class POSTaggerTrainerTool
@@ -63,7 +61,6 @@ public final class POSTaggerTrainerTool
 
 if (mlParams == null) {
   mlParams = ModelUtil.createDefaultTrainingParameters();
-  mlParams.put(TrainingParameters.ALGORITHM_PARAM, 
getModelType(params.getType()).toString());
 }
 
 File modelOutFile = params.getModel();
@@ -142,26 +139,4 @@ public final class POSTaggerTrainerTool
 
 CmdLineUtil.writeModel("pos tagger", modelOutFile, model);
   }
-
-  static ModelType getModelType(String modelString) {
-ModelType model;
-if (modelString == null)
-  modelString = "maxent";
-
-switch (modelString) {
-  case "maxent":
-model = ModelType.MAXENT;
-break;
-  case "perceptron":
-model = ModelType.PERCEPTRON;
-break;
-  case "perceptron_sequence":
-model = ModelType.PERCEPTRON_SEQUENCE;
-break;
-  default:
-model = null;
-break;
-}
-return model;
-  }
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/672f1b09/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java
index 221dcbe..690b359 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java
@@ -29,12 +29,6 @@ import opennlp.tools.cmdline.params.BasicTrainingParams;
  * Note: Do not use this class, internal use only!
  */
 interface TrainingParams extends BasicTrainingParams {
-
-  @ParameterDescription(valueName = "maxent|perceptron|perceptron_sequence",
-  description = "The type of the token name finder model. One of 
maxent|perceptron|perceptron_sequence.")
-  @OptionalParameter(defaultValue = "maxent")
-  String getType();
-
   @ParameterDescription(valueName = "dictionaryPath", description = "The XML 
tag dictionary file")
   @OptionalParameter
   File getDict();



[40/50] [abbrv] opennlp git commit: OPENNLP-1003: Write a test case for the BioCodec class

2017-04-16 Thread joern
OPENNLP-1003: Write a test case for the BioCodec class

This closes #141


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/17493d1c
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/17493d1c
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/17493d1c

Branch: refs/heads/parser_regression
Commit: 17493d1cd7505441a363846fbd0a7ec2a8d67a06
Parents: fc10d2e
Author: Peter Thygesen 
Authored: Tue Mar 14 23:22:24 2017 +0100
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:56 2017 +0200

--
 .../opennlp/tools/namefind/BioCodecTest.java| 263 +++
 1 file changed, 263 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/17493d1c/opennlp-tools/src/test/java/opennlp/tools/namefind/BioCodecTest.java
--
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/namefind/BioCodecTest.java 
b/opennlp-tools/src/test/java/opennlp/tools/namefind/BioCodecTest.java
new file mode 100644
index 000..c894742
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/BioCodecTest.java
@@ -0,0 +1,263 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.namefind;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.util.Span;
+
+/**
+ * This is the test class for {@link BioCodec}.
+ */
+public class BioCodecTest {
+
+  private static final BioCodec codec = new BioCodec();
+
+  private static final String A_TYPE = "atype";
+  private static final String A_START = A_TYPE + "-" + BioCodec.START;
+  private static final String A_CONTINUE = A_TYPE + "-" + BioCodec.CONTINUE;
+
+  private static final String B_TYPE = "btype";
+  private static final String B_START = B_TYPE + "-" + BioCodec.START;
+  private static final String B_CONTINUE = B_TYPE + "-" + BioCodec.CONTINUE;
+
+  private static final String C_TYPE = "ctype";
+  private static final String C_START = C_TYPE + "-" + BioCodec.START;
+
+  private static final String OTHER = BioCodec.OTHER;
+
+  @Test
+  public void testEncodeNoNames() {
+NameSample nameSample = new NameSample("Once upon a time.".split(" "), new 
Span[] {}, true);
+String[] expected = new String[] { OTHER, OTHER, OTHER, OTHER};
+String[] actual = codec.encode(nameSample.getNames(), 
nameSample.getSentence().length);
+Assert.assertArrayEquals("Only 'Other' is expected.", expected, actual);
+  }
+
+  @Test
+  public void testEncodeSingleTokenSpan() {
+String[] sentence = "I called Julie again.".split(" ");
+Span[] spans = new Span[] { new Span(2,3, A_TYPE)};
+NameSample nameSample = new NameSample(sentence, spans, true);
+String[] expected = new String[] {OTHER, OTHER, A_START, OTHER};
+String[] actual = codec.encode(nameSample.getNames(), 
nameSample.getSentence().length);
+Assert.assertArrayEquals("'Julie' should be 'start' only, the rest should 
be 'other'.", expected, actual);
+  }
+
+  @Test
+  public void testEncodeDoubleTokenSpan() {
+String[] sentence = "I saw Stefanie Schmidt today.".split(" ");
+Span[] span = new Span[] { new Span(2,4, A_TYPE)};
+NameSample nameSample = new NameSample(sentence, span, true);
+String[] expected = new String[] {OTHER, OTHER, A_START, A_CONTINUE, 
OTHER};
+String[] actual = codec.encode(nameSample.getNames(), 
nameSample.getSentence().length);
+Assert.assertArrayEquals("'Stefanie' should be 'start' only, 'Schmidt' is 
" +
+"'continue' and the rest should be 'other'.", expected, actual);
+  }
+
+  @Test
+  public void testEncodeDoubleTokenSpanNoType() {
+final String DEFAULT_START = "default" + "-" + BioCodec.START;
+final String DEFAULT_CONTINUE = "default" + "-" + BioCodec.CONTINUE;
+String[] sentence = "I saw Stefanie Schmidt today.".split(" ");
+Span[] span = new Span[] { new Span(2,4, null)};
+NameSample nameSample = 

[04/50] [abbrv] opennlp git commit: NoJira: Run coveralls:report after build

2017-04-16 Thread joern
NoJira: Run coveralls:report after build


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/6f332610
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/6f332610
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/6f332610

Branch: refs/heads/parser_regression
Commit: 6f332610eb7a4fcc0b978b90f1fc9c8b253aa2b8
Parents: 51cd809
Author: Jörn Kottmann 
Authored: Tue Feb 7 18:49:39 2017 +0100
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:51 2017 +0200

--
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/6f332610/.travis.yml
--
diff --git a/.travis.yml b/.travis.yml
index 81bf0f7..b4c83ad 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -17,4 +17,4 @@ before_install:
 script: mvn clean install
 
 after_success:
-  - mvn clean test -Pjacoco jacoco:report
\ No newline at end of file
+  - mvn clean test -Pjacoco jacoco:report coveralls:report



[13/50] [abbrv] opennlp git commit: NoJira: Update pom and jira version to 1.8.0

2017-04-16 Thread joern
NoJira: Update pom and jira version to 1.8.0


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/daa9fcaa
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/daa9fcaa
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/daa9fcaa

Branch: refs/heads/parser_regression
Commit: daa9fcaa0722f59222c374028e2cee097a29fa12
Parents: ac787a4
Author: Jörn Kottmann 
Authored: Thu Feb 9 18:41:20 2017 +0100
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:52 2017 +0200

--
 opennlp-brat-annotator/pom.xml   | 2 +-
 opennlp-distr/pom.xml| 4 ++--
 opennlp-docs/pom.xml | 2 +-
 opennlp-morfologik-addon/pom.xml | 2 +-
 opennlp-tools/pom.xml| 2 +-
 opennlp-uima/pom.xml | 2 +-
 pom.xml  | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/daa9fcaa/opennlp-brat-annotator/pom.xml
--
diff --git a/opennlp-brat-annotator/pom.xml b/opennlp-brat-annotator/pom.xml
index 1633deb..6c7be0d 100644
--- a/opennlp-brat-annotator/pom.xml
+++ b/opennlp-brat-annotator/pom.xml
@@ -17,7 +17,7 @@

org.apache.opennlp
opennlp
-   1.7.3-SNAPSHOT
+   1.8.0-SNAPSHOT
../pom.xml

 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/daa9fcaa/opennlp-distr/pom.xml
--
diff --git a/opennlp-distr/pom.xml b/opennlp-distr/pom.xml
index c0a57c3..1ce102d 100644
--- a/opennlp-distr/pom.xml
+++ b/opennlp-distr/pom.xml
@@ -24,7 +24,7 @@

org.apache.opennlp
opennlp
-   1.7.3-SNAPSHOT
+   1.8.0-SNAPSHOT
../pom.xml

 
@@ -127,7 +127,7 @@
   generate-resources 
   jira-report
 
-  12339150
+  12339249
   
${basedir}/target/issuesFixed/
   1000 
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/daa9fcaa/opennlp-docs/pom.xml
--
diff --git a/opennlp-docs/pom.xml b/opennlp-docs/pom.xml
index fd2b0d1..fbf0b5c 100644
--- a/opennlp-docs/pom.xml
+++ b/opennlp-docs/pom.xml
@@ -24,7 +24,7 @@
   
org.apache.opennlp
opennlp
-   1.7.3-SNAPSHOT
+   1.8.0-SNAPSHOT
 ../pom.xml
   
   

http://git-wip-us.apache.org/repos/asf/opennlp/blob/daa9fcaa/opennlp-morfologik-addon/pom.xml
--
diff --git a/opennlp-morfologik-addon/pom.xml b/opennlp-morfologik-addon/pom.xml
index 1c384c7..c46f101 100644
--- a/opennlp-morfologik-addon/pom.xml
+++ b/opennlp-morfologik-addon/pom.xml
@@ -24,7 +24,7 @@

org.apache.opennlp
opennlp
-   1.7.3-SNAPSHOT
+   1.8.0-SNAPSHOT
../pom.xml

 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/daa9fcaa/opennlp-tools/pom.xml
--
diff --git a/opennlp-tools/pom.xml b/opennlp-tools/pom.xml
index c7e9624..d2630c9 100644
--- a/opennlp-tools/pom.xml
+++ b/opennlp-tools/pom.xml
@@ -25,7 +25,7 @@
   
 org.apache.opennlp
 opennlp
-1.7.3-SNAPSHOT
+1.8.0-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/daa9fcaa/opennlp-uima/pom.xml
--
diff --git a/opennlp-uima/pom.xml b/opennlp-uima/pom.xml
index 070fec9..7cfdb72 100644
--- a/opennlp-uima/pom.xml
+++ b/opennlp-uima/pom.xml
@@ -25,7 +25,7 @@

org.apache.opennlp
opennlp
-   1.7.3-SNAPSHOT
+   1.8.0-SNAPSHOT
../pom.xml
 
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/daa9fcaa/pom.xml
--
diff --git a/pom.xml b/pom.xml
index 98acfb1..268a54e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -31,7 +31,7 @@
 
org.apache.opennlp
opennlp
-   1.7.3-SNAPSHOT
+   1.8.0-SNAPSHOT
pom
 
Apache OpenNLP Reactor



[22/50] [abbrv] opennlp git commit: OPENNLP-978: Set name finder defaults to perceptron and cutoff zero

2017-04-16 Thread joern
OPENNLP-978: Set name finder defaults to perceptron and cutoff zero


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/91352d5f
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/91352d5f
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/91352d5f

Branch: refs/heads/parser_regression
Commit: 91352d5fe620ad3fab988222878d4953603db6e3
Parents: 672f1b0
Author: Jörn Kottmann 
Authored: Tue Feb 7 23:58:43 2017 +0100
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:53 2017 +0200

--
 .../namefind/TokenNameFinderCrossValidatorTool.java|  4 ++--
 .../cmdline/namefind/TokenNameFinderTrainerTool.java   |  3 ++-
 .../main/java/opennlp/tools/namefind/NameFinderME.java |  6 ++
 .../java/opennlp/tools/util/TrainingParameters.java| 13 +
 .../java/opennlp/tools/namefind/NameFinderMETest.java  |  1 +
 5 files changed, 24 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/91352d5f/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
index 333abd9..153d6f7 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
@@ -42,8 +42,8 @@ import 
opennlp.tools.namefind.TokenNameFinderEvaluationMonitor;
 import opennlp.tools.namefind.TokenNameFinderFactory;
 import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.SequenceCodec;
+import opennlp.tools.util.TrainingParameters;
 import opennlp.tools.util.eval.EvaluationMonitor;
-import opennlp.tools.util.model.ModelUtil;
 
 public final class TokenNameFinderCrossValidatorTool
 extends AbstractCrossValidatorTool {
@@ -65,7 +65,7 @@ public final class TokenNameFinderCrossValidatorTool
 
 mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), true);
 if (mlParams == null) {
-  mlParams = ModelUtil.createDefaultTrainingParameters();
+  mlParams = new TrainingParameters();
 }
 
 byte featureGeneratorBytes[] =

http://git-wip-us.apache.org/repos/asf/opennlp/blob/91352d5f/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
index a8d4417..fb73506 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
@@ -40,6 +40,7 @@ import opennlp.tools.namefind.TokenNameFinderFactory;
 import opennlp.tools.namefind.TokenNameFinderModel;
 import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.SequenceCodec;
+import opennlp.tools.util.TrainingParameters;
 import opennlp.tools.util.featuregen.GeneratorFactory;
 import opennlp.tools.util.model.ArtifactSerializer;
 import opennlp.tools.util.model.ModelUtil;
@@ -166,7 +167,7 @@ public final class TokenNameFinderTrainerTool
 
 mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), true);
 if (mlParams == null) {
-  mlParams = ModelUtil.createDefaultTrainingParameters();
+  mlParams = new TrainingParameters();
 }
 
 File modelOutFile = params.getModel();

http://git-wip-us.apache.org/repos/asf/opennlp/blob/91352d5f/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java 
b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
index 6ce0b83..5a16f34 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
@@ -37,6 +37,7 @@ import opennlp.tools.ml.TrainerFactory.TrainerType;
 import opennlp.tools.ml.model.Event;
 import opennlp.tools.ml.model.MaxentModel;
 import opennlp.tools.ml.model.SequenceClassificationModel;
+import opennlp.tools.ml.perceptron.PerceptronTrainer;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.Sequence;
 import opennlp.tools.util.SequenceCodec;
@@ -219,6 

[24/50] [abbrv] opennlp git commit: OPENNLP-995: Add a PR Review Template for contributors

2017-04-16 Thread joern
OPENNLP-995: Add a PR Review Template for contributors


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/82caa558
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/82caa558
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/82caa558

Branch: refs/heads/parser_regression
Commit: 82caa558d8942c9366af07a5a80bd088aff6c10b
Parents: 6cdca66
Author: smarthi 
Authored: Thu Feb 23 07:46:17 2017 -0500
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:54 2017 +0200

--
 .github/CONTRIBUTING.md  | 11 +++
 .github/PULL_REQUEST_TEMPLATE.md | 27 +++
 2 files changed, 38 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/82caa558/.github/CONTRIBUTING.md
--
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
new file mode 100644
index 000..577eb16
--- /dev/null
+++ b/.github/CONTRIBUTING.md
@@ -0,0 +1,11 @@
+# How to contribute to Apache OpenNLP
+
+Thank you for your intention to contribute to the Apache OpenNLP project. As 
an open-source community, we highly appreciate external contributions to our 
project.
+
+To make the process smooth for the project *committers* (those who review and 
accept changes) and *contributors* (those who propose new changes via pull 
requests), there are a few rules to follow.
+
+## Contribution Guidelines
+
+Please check out the [How to get 
involved](http://opennlp.apache.org/get-involved.html) to understand how 
contributions are made. 
+A detailed list of coding standards can be found at [Apache OpenNLP Code 
Conventions](http://opennlp.apache.org/code-conventions.html) which also 
contains a list of coding guidelines that you should follow.
+For pull requests, there is a [check list](PULL_REQUEST_TEMPLATE.md) with 
criteria for acceptable contributions.

http://git-wip-us.apache.org/repos/asf/opennlp/blob/82caa558/.github/PULL_REQUEST_TEMPLATE.md
--
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 000..579e2e0
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,27 @@
+Thank you for contributing to Apache OpenNLP.
+
+In order to streamline the review of the contribution we ask you
+to ensure the following steps have been taken:
+
+### For all changes:
+- [ ] Is there a JIRA ticket associated with this PR? Is it referenced 
+ in the commit message?
+
+- [ ] Does your PR title start with OPENNLP- where  is the JIRA number 
you are trying to resolve? Pay particular attention to the hyphen "-" character.
+
+- [ ] Has your PR been rebased against the latest commit within the target 
branch (typically master)?
+
+- [ ] Is your initial contribution a single, squashed commit?
+
+### For code changes:
+- [ ] Have you ensured that the full suite of tests is executed via mvn clean 
install at the root opennlp folder?
+- [ ] Have you written or updated unit tests to verify your changes?
+- [ ] If adding new dependencies to the code, are these dependencies licensed 
in a way that is compatible for inclusion under [ASF 
2.0](http://www.apache.org/legal/resolved.html#category-a)? 
+- [ ] If applicable, have you updated the LICENSE file, including the main 
LICENSE file in opennlp folder?
+- [ ] If applicable, have you updated the NOTICE file, including the main 
NOTICE file found in opennlp folder?
+
+### For documentation related changes:
+- [ ] Have you ensured that format looks appropriate for the output in which 
it is rendered?
+
+### Note:
+Please ensure that once the PR is submitted, you check travis-ci for build 
issues and submit an update to your PR as soon as possible.



[32/50] [abbrv] opennlp git commit: Revert "OPENNLP-1002 Remove deprecated GIS class"

2017-04-16 Thread joern
Revert "OPENNLP-1002 Remove deprecated GIS class"

This reverts commit efa257676280abd316bb677e5a8de5cb9fe1dd73.


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/1b6ad719
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/1b6ad719
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/1b6ad719

Branch: refs/heads/parser_regression
Commit: 1b6ad719760ead028810715b49ff15219385ee42
Parents: 7487812
Author: Jörn Kottmann 
Authored: Fri Mar 10 17:22:28 2017 +0100
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:55 2017 +0200

--
 .../main/java/opennlp/tools/ml/maxent/GIS.java  | 303 +++
 1 file changed, 303 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/1b6ad719/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java
--
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java 
b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java
new file mode 100644
index 000..97c214d
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java
@@ -0,0 +1,303 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.ml.maxent;
+
+import java.io.IOException;
+
+import opennlp.tools.ml.AbstractEventTrainer;
+import opennlp.tools.ml.model.AbstractModel;
+import opennlp.tools.ml.model.DataIndexer;
+import opennlp.tools.ml.model.Event;
+import opennlp.tools.ml.model.Prior;
+import opennlp.tools.ml.model.UniformPrior;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
+
+/**
+ * A Factory class which uses instances of GISTrainer to create and train
+ * GISModels.
+ * @deprecated use {@link GISTrainer}
+ */
+@Deprecated
+public class GIS extends AbstractEventTrainer {
+
+  public static final String MAXENT_VALUE = "MAXENT";
+
+  /**
+   * Set this to false if you don't want messages about the progress of model
+   * training displayed. Alternately, you can use the overloaded version of
+   * trainModel() to conditionally enable progress messages.
+   */
+  public static boolean PRINT_MESSAGES = true;
+
+  /**
+   * If we are using smoothing, this is used as the "number" of times we want
+   * the trainer to imagine that it saw a feature that it actually didn't see.
+   * Defaulted to 0.1.
+   */
+  private static final double SMOOTHING_OBSERVATION = 0.1;
+
+  private static final String SMOOTHING_PARAM = "smoothing";
+  private static final boolean SMOOTHING_DEFAULT = false;
+
+  public GIS() {
+  }
+
+  public GIS(TrainingParameters parameters) {
+super(parameters);
+  }
+  
+  public boolean isValid() {
+
+if (!super.isValid()) {
+  return false;
+}
+
+String algorithmName = getAlgorithm();
+
+return !(algorithmName != null && !(MAXENT_VALUE.equals(algorithmName)));
+  }
+
+  public boolean isSortAndMerge() {
+return true;
+  }
+
+  public AbstractModel doTrain(DataIndexer indexer) throws IOException {
+int iterations = getIterations();
+
+AbstractModel model;
+
+boolean printMessages = 
trainingParameters.getBooleanParameter(VERBOSE_PARAM, VERBOSE_DEFAULT);
+boolean smoothing = 
trainingParameters.getBooleanParameter(SMOOTHING_PARAM, SMOOTHING_DEFAULT);
+int threads = 
trainingParameters.getIntParameter(TrainingParameters.THREADS_PARAM, 1);
+
+model = trainModel(iterations, indexer, printMessages, smoothing, null, 
threads);
+
+return model;
+  }
+
+  // << members related to AbstractEventTrainer
+
+  /**
+   * Train a model using the GIS algorithm, assuming 100 iterations and no
+   * cutoff.
+   *
+   * @param eventStream
+   *  The EventStream holding the data on which this model will be
+   *  trained.
+   * @return The newly trained model, which can be used immediately or saved to
+   * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+   */
+  public static GISModel trainModel(ObjectStream eventStream) 

[48/50] [abbrv] opennlp git commit: OPENNLP-1015: Add tests for DataIndexers

2017-04-16 Thread joern
OPENNLP-1015: Add tests for DataIndexers

Closes #152


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/7589af69
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/7589af69
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/7589af69

Branch: refs/heads/parser_regression
Commit: 7589af69ea8a73326bed5e2f5b5c0445f95112eb
Parents: 5eb8ff8
Author: koji 
Authored: Fri Apr 7 21:50:02 2017 +0900
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:57 2017 +0200

--
 .../tools/ml/model/OnePassDataIndexerTest.java  |  64 ++
 .../model/OnePassRealValueDataIndexerTest.java  | 116 +++
 .../ml/model/SimpleEventStreamBuilder.java  |  76 
 .../tools/ml/model/TwoPassDataIndexerTest.java  |  64 ++
 4 files changed, 320 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/7589af69/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassDataIndexerTest.java
--
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassDataIndexerTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassDataIndexerTest.java
new file mode 100644
index 000..e629e7a
--- /dev/null
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassDataIndexerTest.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.ml.model;
+
+import java.io.IOException;
+import java.util.Collections;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
+
+public class OnePassDataIndexerTest {
+
+  @Test
+  public void testIndex() throws IOException {
+// He belongs to  Apache Software Foundation  .
+ObjectStream eventStream = new SimpleEventStreamBuilder()
+.add("other/w=he n1w=belongs n2w=to po=other pow=other,He 
powf=other,ic ppo=other")
+.add("other/w=belongs p1w=he n1w=to n2w=apache po=other 
pow=other,belongs powf=other,lc ppo=other")
+.add("other/w=to p1w=belongs p2w=he n1w=apache n2w=software po=other 
pow=other,to" +
+  " powf=other,lc ppo=other")
+.add("org-start/w=apache p1w=to p2w=belongs n1w=software 
n2w=foundation po=other pow=other,Apache" +
+  " powf=other,ic ppo=other")
+.add("org-cont/w=software p1w=apache p2w=to n1w=foundation n2w=. 
po=org-start" +
+  " pow=org-start,Software powf=org-start,ic ppo=other")
+.add("org-cont/w=foundation p1w=software p2w=apache n1w=. po=org-cont 
pow=org-cont,Foundation" +
+  " powf=org-cont,ic ppo=org-start")
+.add("other/w=. p1w=foundation p2w=software po=org-cont pow=org-cont,. 
powf=org-cont,other" +
+  " ppo=org-cont")
+.build();
+
+DataIndexer indexer = new OnePassDataIndexer();
+indexer.init(new TrainingParameters(Collections.emptyMap()), null);
+indexer.index(eventStream);
+Assert.assertEquals(3, indexer.getContexts().length);
+Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[0]);
+Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[1]);
+Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[2]);
+Assert.assertNull(indexer.getValues());
+Assert.assertEquals(5, indexer.getNumEvents());
+Assert.assertArrayEquals(new int[]{0, 1, 2}, indexer.getOutcomeList());
+Assert.assertArrayEquals(new int[]{3, 1, 1}, 
indexer.getNumTimesEventsSeen());
+Assert.assertArrayEquals(new String[]{"ppo=other"}, 
indexer.getPredLabels());
+Assert.assertArrayEquals(new String[]{"other", "org-start", "org-cont"}, 
indexer.getOutcomeLabels());
+Assert.assertArrayEquals(new int[]{5}, indexer.getPredCounts());
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/7589af69/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassRealValueDataIndexerTest.java

[36/50] [abbrv] opennlp git commit: OPENNLP-125: Make POS Tagger feature generation configurable

2017-04-16 Thread joern
OPENNLP-125: Make POS Tagger feature generation configurable


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/dd39d066
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/dd39d066
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/dd39d066

Branch: refs/heads/parser_regression
Commit: dd39d06629294f3c9bd3980d02ba0f1716839e0e
Parents: 711d70b
Author: Jörn Kottmann 
Authored: Thu Feb 9 18:54:27 2017 +0100
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:55 2017 +0200

--
 .../namefind/TokenNameFinderTrainerTool.java|   2 +-
 .../postag/POSTaggerCrossValidatorTool.java |  10 +-
 .../cmdline/postag/POSTaggerTrainerTool.java|  26 +--
 .../tools/cmdline/postag/TrainingParams.java|  13 +-
 .../postag/ConfigurablePOSContextGenerator.java | 105 +++
 .../opennlp/tools/postag/POSDictionary.java |   8 +-
 .../java/opennlp/tools/postag/POSModel.java |  40 +++--
 .../tools/postag/POSTaggerCrossValidator.java   |  44 ++---
 .../opennlp/tools/postag/POSTaggerFactory.java  | 179 ++-
 .../tools/util/featuregen/GeneratorFactory.java |  12 ++
 .../featuregen/PosTaggerFeatureGenerator.java   |  62 +++
 .../tools/postag/pos-default-features.xml   |  38 
 .../ConfigurablePOSContextGeneratorTest.java|  55 ++
 .../tools/postag/DummyPOSTaggerFactory.java |  14 +-
 .../tools/postag/POSTaggerFactoryTest.java  |  11 +-
 15 files changed, 534 insertions(+), 85 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/dd39d066/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
index 5bb18d2..4fb8cb9 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
@@ -67,7 +67,7 @@ public final class TokenNameFinderTrainerTool
 return null;
   }
 
-  static byte[] openFeatureGeneratorBytes(File featureGenDescriptorFile) {
+  public static byte[] openFeatureGeneratorBytes(File 
featureGenDescriptorFile) {
 byte[] featureGeneratorBytes = null;
 // load descriptor file into memory
 if (featureGenDescriptorFile != null) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/dd39d066/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java
 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java
index d91d4ee..67ad2b9 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java
@@ -22,10 +22,12 @@ import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
+import java.util.Map;
 
 import opennlp.tools.cmdline.AbstractCrossValidatorTool;
 import opennlp.tools.cmdline.CmdLineUtil;
 import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.namefind.TokenNameFinderTrainerTool;
 import opennlp.tools.cmdline.params.CVParams;
 import opennlp.tools.cmdline.params.FineGrainedEvaluatorParams;
 import opennlp.tools.cmdline.postag.POSTaggerCrossValidatorTool.CVToolParams;
@@ -75,10 +77,16 @@ public final class POSTaggerCrossValidatorTool
   }
 }
 
+Map resources = TokenNameFinderTrainerTool.loadResources(
+params.getResources(), params.getFeaturegen());
+
+byte[] featureGeneratorBytes =
+
TokenNameFinderTrainerTool.openFeatureGeneratorBytes(params.getFeaturegen());
+
 POSTaggerCrossValidator validator;
 try {
   validator = new POSTaggerCrossValidator(params.getLang(), mlParams,
-  params.getDict(), params.getNgram(), params.getTagDictCutoff(),
+  params.getDict(), featureGeneratorBytes, resources, 
params.getTagDictCutoff(),
   params.getFactory(), missclassifiedListener, reportListener);
 
   validator.evaluate(sampleStream, params.getFolds());

http://git-wip-us.apache.org/repos/asf/opennlp/blob/dd39d066/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java
--
diff --git 

[14/50] [abbrv] opennlp git commit: OPENNLP-980: Deprecate low-level feature constructors and methods

2017-04-16 Thread joern
OPENNLP-980: Deprecate low-level feature constructors and methods


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/ac787a4d
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/ac787a4d
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/ac787a4d

Branch: refs/heads/parser_regression
Commit: ac787a4dabf9823bf8b7e91f1b73d60d17e01006
Parents: b41fcd6
Author: Jörn Kottmann 
Authored: Wed Feb 1 21:38:19 2017 +0100
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:52 2017 +0200

--
 .../src/main/java/opennlp/tools/ml/maxent/GISModel.java | 1 +
 .../java/opennlp/tools/ml/maxent/quasinewton/QNModel.java   | 1 +
 .../src/main/java/opennlp/tools/ml/model/AbstractModel.java | 9 +
 .../java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java   | 2 ++
 .../java/opennlp/tools/ml/perceptron/PerceptronModel.java   | 5 +
 5 files changed, 18 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/ac787a4d/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java
--
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java 
b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java
index e546d1c..14c7fa3 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java
@@ -152,6 +152,7 @@ public final class GISModel extends AbstractModel {
* string representation of the outcomes can be obtained from the
* method getOutcome(int i).
*/
+  @Deprecated // visibility will be reduced in 1.8.1
   public static double[] eval(int[] context, float[] values, double[] prior,
   EvalParameters model) {
 Context[] params = model.getParams();

http://git-wip-us.apache.org/repos/asf/opennlp/blob/ac787a4d/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java 
b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java
index a35d54c..f02ee75 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java
@@ -103,6 +103,7 @@ public class QNModel extends AbstractModel {
*  Model parameters
* @return Normalized probabilities for the outcomes given the context.
*/
+  @Deprecated // visibility will be reduced in 1.8.1
   public static double[] eval(int[] context, float[] values, double[] probs,
   int nOutcomes, int nPredLabels, double[] parameters) {
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/ac787a4d/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java 
b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java
index e5a60a7..eb80f1b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java
@@ -39,6 +39,15 @@ public abstract class AbstractModel implements MaxentModel {
   /** The type of the model. */
   protected ModelType modelType;
 
+  /**
+   * @deprecated this will be removed in 1.8.1, pmap should be private
+   *
+   * @param params
+   * @param predLabels
+   * @param pmap
+   * @param outcomeNames
+   */
+  @Deprecated
   public AbstractModel(Context[] params, String[] predLabels,
   Map pmap, String[] outcomeNames) {
 this.pmap = pmap;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/ac787a4d/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java 
b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java
index ec3d9bd..0a28704 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java
@@ -31,6 +31,7 @@ public class NaiveBayesModel extends AbstractModel {
   protected double[] outcomeTotals;
   protected long vocabulary;
 
+  @Deprecated
   public NaiveBayesModel(Context[] params, String[] predLabels, Map pmap,
  String[] outcomeNames) {
 super(params, predLabels, pmap, outcomeNames);
@@ -87,6 +88,7 @@ public 

[34/50] [abbrv] opennlp git commit: OPENNLP-904 Harmonize lemmatizer API and function to get multiple lemmas

2017-04-16 Thread joern
OPENNLP-904 Harmonize lemmatizer API and function to get multiple lemmas

OPENNLP-904 add minor correction after PR comment


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/d3c16d53
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/d3c16d53
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/d3c16d53

Branch: refs/heads/parser_regression
Commit: d3c16d53633595619963114e9499c92fe1d7ee2a
Parents: b78abfb
Author: Rodrigo Agerri 
Authored: Fri Feb 3 16:00:38 2017 +0100
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:55 2017 +0200

--
 .../cmdline/lemmatizer/LemmatizerMETool.java|  4 +-
 .../tools/lemmatizer/DictionaryLemmatizer.java  | 70 ++--
 .../lemmatizer/LemmaSampleEventStream.java  |  2 +-
 .../tools/lemmatizer/LemmaSampleStream.java |  4 +-
 .../opennlp/tools/lemmatizer/Lemmatizer.java| 16 -
 .../opennlp/tools/lemmatizer/LemmatizerME.java  | 64 --
 .../tools/lemmatizer/DummyLemmatizer.java   |  7 ++
 .../tools/lemmatizer/LemmatizerMETest.java  |  3 +-
 8 files changed, 136 insertions(+), 34 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/d3c16d53/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java
 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java
index e4e47b5..90ba95d 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java
@@ -72,10 +72,8 @@ public class LemmatizerMETool extends BasicCmdLineTool {
 continue;
   }
 
-  String[] preds = lemmatizer.lemmatize(posSample.getSentence(),
+  String[] lemmas = lemmatizer.lemmatize(posSample.getSentence(),
   posSample.getTags());
-  String[] lemmas = lemmatizer.decodeLemmas(posSample.getSentence(),
-  preds);
 
   System.out.println(new LemmaSample(posSample.getSentence(),
   posSample.getTags(), lemmas).toString());

http://git-wip-us.apache.org/repos/asf/opennlp/blob/d3c16d53/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
 
b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
index b1b04a1..9f0b0b0 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
@@ -37,7 +37,7 @@ public class DictionaryLemmatizer implements Lemmatizer {
   /**
* The hashmap containing the dictionary.
*/
-  private final Map dictMap;
+  private final Map dictMap;
 
   /**
* Construct a hashmap from the input tab separated dictionary.
@@ -47,26 +47,24 @@ public class DictionaryLemmatizer implements Lemmatizer {
* @param dictionary
*  the input dictionary via inputstream
*/
-  public DictionaryLemmatizer(final InputStream dictionary) {
+  public DictionaryLemmatizer(final InputStream dictionary) throws IOException 
{
 this.dictMap = new HashMap<>();
-final BufferedReader breader = new BufferedReader(new 
InputStreamReader(dictionary));
+final BufferedReader breader = new BufferedReader(
+new InputStreamReader(dictionary));
 String line;
-try {
-  while ((line = breader.readLine()) != null) {
-final String[] elems = line.split("\t");
-this.dictMap.put(Arrays.asList(elems[0], elems[1]), elems[2]);
-  }
-} catch (final IOException e) {
-  e.printStackTrace();
+while ((line = breader.readLine()) != null) {
+  final String[] elems = line.split("\t");
+  this.dictMap.put(Arrays.asList(elems[0], elems[1]), 
Arrays.asList(elems[2]));
 }
   }
 
+
   /**
* Get the Map containing the dictionary.
*
* @return dictMap the Map
*/
-  public Map getDictMap() {
+  public Map getDictMap() {
 return this.dictMap;
   }
 
@@ -85,31 +83,65 @@ public class DictionaryLemmatizer implements Lemmatizer {
 return keys;
   }
 
+
   public String[] lemmatize(final String[] tokens, final String[] postags) {
 List lemmas = new ArrayList<>();
 for (int i = 0; i < tokens.length; i++) {
-  lemmas.add(this.apply(tokens[i], postags[i]));
+  lemmas.add(this.lemmatize(tokens[i], postags[i]));
 

[38/50] [abbrv] opennlp git commit: OPENNLP-1002 Remove deprecated GIS class

2017-04-16 Thread joern
OPENNLP-1002 Remove deprecated GIS class


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/fc10d2e9
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/fc10d2e9
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/fc10d2e9

Branch: refs/heads/parser_regression
Commit: fc10d2e9ec3c98e93bdae7d503f1e09848a28a6a
Parents: 1b6ad71
Author: Jörn Kottmann 
Authored: Sun Mar 12 11:10:43 2017 +0100
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:56 2017 +0200

--
 .../cmdline/parser/BuildModelUpdaterTool.java   |   7 +-
 .../cmdline/parser/CheckModelUpdaterTool.java   |   7 +-
 .../main/java/opennlp/tools/ml/maxent/GIS.java  | 303 ---
 .../tools/ml/maxent/GISIndexingTest.java|  78 +++--
 .../tools/ml/maxent/ScaleDoesntMatterTest.java  |  20 +-
 5 files changed, 80 insertions(+), 335 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/fc10d2e9/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java
 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java
index 327355b..7efd342 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java
@@ -20,7 +20,8 @@ package opennlp.tools.cmdline.parser;
 import java.io.IOException;
 
 import opennlp.tools.dictionary.Dictionary;
-import opennlp.tools.ml.maxent.GIS;
+import opennlp.tools.ml.EventTrainer;
+import opennlp.tools.ml.TrainerFactory;
 import opennlp.tools.ml.model.Event;
 import opennlp.tools.ml.model.MaxentModel;
 import opennlp.tools.parser.Parse;
@@ -28,6 +29,7 @@ import opennlp.tools.parser.ParserEventTypeEnum;
 import opennlp.tools.parser.ParserModel;
 import opennlp.tools.parser.chunking.ParserEventStream;
 import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.model.ModelUtil;
 
 public final class BuildModelUpdaterTool extends ModelUpdaterTool {
 
@@ -50,7 +52,8 @@ public final class BuildModelUpdaterTool extends 
ModelUpdaterTool {
 ObjectStream bes = new ParserEventStream(parseSamples,
 originalModel.getHeadRules(), ParserEventTypeEnum.BUILD, mdict);
 
-GIS trainer = new GIS();
+EventTrainer trainer = TrainerFactory.getEventTrainer(
+ModelUtil.createDefaultTrainingParameters(), null);
 MaxentModel buildModel = trainer.train(bes);
 
 parseSamples.close();

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fc10d2e9/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java
 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java
index 55e96ba..0c98812 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java
@@ -20,7 +20,8 @@ package opennlp.tools.cmdline.parser;
 import java.io.IOException;
 
 import opennlp.tools.dictionary.Dictionary;
-import opennlp.tools.ml.maxent.GIS;
+import opennlp.tools.ml.EventTrainer;
+import opennlp.tools.ml.TrainerFactory;
 import opennlp.tools.ml.model.Event;
 import opennlp.tools.ml.model.MaxentModel;
 import opennlp.tools.parser.Parse;
@@ -28,6 +29,7 @@ import opennlp.tools.parser.ParserEventTypeEnum;
 import opennlp.tools.parser.ParserModel;
 import opennlp.tools.parser.chunking.ParserEventStream;
 import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.model.ModelUtil;
 
 // trains a new check model ...
 public final class CheckModelUpdaterTool extends ModelUpdaterTool {
@@ -51,7 +53,8 @@ public final class CheckModelUpdaterTool extends 
ModelUpdaterTool {
 ObjectStream bes = new ParserEventStream(parseSamples,
 originalModel.getHeadRules(), ParserEventTypeEnum.CHECK, mdict);
 
-GIS trainer = new GIS();
+EventTrainer trainer = TrainerFactory.getEventTrainer(
+ModelUtil.createDefaultTrainingParameters(), null);
 MaxentModel checkModel = trainer.train(bes);
 
 parseSamples.close();

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fc10d2e9/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java
--
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java 
b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java
deleted file mode 100644

[50/50] [abbrv] opennlp git commit: OPENNLP-1010: Fix NaiveBayes model writer

2017-04-16 Thread joern
OPENNLP-1010: Fix NaiveBayes model writer

The previous sortValues method was based on Perceptron, but for some reason it 
was not working
for NaiveBayes. Changed it to the one from GIS fixed it.

this closes apache/opennlp#154


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/3ac2fb37
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/3ac2fb37
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/3ac2fb37

Branch: refs/heads/parser_regression
Commit: 3ac2fb37750595dfff573bb813b48a9d889052a2
Parents: ef4c667
Author: William D C M SILVA 
Authored: Fri Apr 14 09:35:36 2017 -0300
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:58 2017 +0200

--
 .../ml/naivebayes/NaiveBayesModelWriter.java|  71 ---
 .../NaiveBayesSerializedCorrectnessTest.java| 184 +++
 2 files changed, 225 insertions(+), 30 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/3ac2fb37/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModelWriter.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModelWriter.java
 
b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModelWriter.java
index bbb6eee..510bf76 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModelWriter.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModelWriter.java
@@ -55,44 +55,55 @@ public abstract class NaiveBayesModelWriter extends 
AbstractModelWriter {
 }
   }
 
+
   protected ComparablePredicate[] sortValues() {
-ComparablePredicate[] sortPreds;
-ComparablePredicate[] tmpPreds = new ComparablePredicate[PARAMS.length];
-int[] tmpOutcomes = new int[numOutcomes];
-double[] tmpParams = new double[numOutcomes];
-int numPreds = 0;
-//remove parameters with 0 weight and predicates with no parameters
-for (int pid = 0; pid < PARAMS.length; pid++) {
-  int numParams = 0;
-  double[] predParams = PARAMS[pid].getParameters();
-  int[] outcomePattern = PARAMS[pid].getOutcomes();
-  for (int pi = 0; pi < predParams.length; pi++) {
-if (predParams[pi] != 0d) {
-  tmpOutcomes[numParams] = outcomePattern[pi];
-  tmpParams[numParams] = predParams[pi];
-  numParams++;
-}
-  }
 
-  int[] activeOutcomes = new int[numParams];
-  double[] activeParams = new double[numParams];
+ComparablePredicate[] sortPreds = new ComparablePredicate[PARAMS.length];
 
-  for (int pi = 0; pi < numParams; pi++) {
-activeOutcomes[pi] = tmpOutcomes[pi];
-activeParams[pi] = tmpParams[pi];
-  }
-  if (numParams != 0) {
-tmpPreds[numPreds] = new ComparablePredicate(PRED_LABELS[pid], 
activeOutcomes, activeParams);
-numPreds++;
-  }
+int numParams = 0;
+for (int pid = 0; pid < PARAMS.length; pid++) {
+  int[] predkeys = PARAMS[pid].getOutcomes();
+  // Arrays.sort(predkeys);
+  int numActive = predkeys.length;
+  double[] activeParams = PARAMS[pid].getParameters();
+
+  numParams += numActive;
+  /*
+   * double[] activeParams = new double[numActive];
+   *
+   * int id = 0; for (int i=0; i < predkeys.length; i++) { int oid =
+   * predkeys[i]; activeOutcomes[id] = oid; activeParams[id] =
+   * PARAMS[pid].getParams(oid); id++; }
+   */
+  sortPreds[pid] = new ComparablePredicate(PRED_LABELS[pid],
+  predkeys, activeParams);
 }
-System.err.println("Compressed " + PARAMS.length + " parameters to " + 
numPreds);
-sortPreds = new ComparablePredicate[numPreds];
-System.arraycopy(tmpPreds, 0, sortPreds, 0, numPreds);
+
 Arrays.sort(sortPreds);
 return sortPreds;
   }
 
+  protected List 
compressOutcomes(ComparablePredicate[] sorted) {
+List outcomePatterns = new ArrayList<>();
+if (sorted.length > 0) {
+  ComparablePredicate cp = sorted[0];
+  List newGroup = new ArrayList<>();
+  for (int i = 0; i < sorted.length; i++) {
+if (cp.compareTo(sorted[i]) == 0) {
+  newGroup.add(sorted[i]);
+} else {
+  cp = sorted[i];
+  outcomePatterns.add(newGroup);
+  newGroup = new ArrayList<>();
+  newGroup.add(sorted[i]);
+}
+  }
+  outcomePatterns.add(newGroup);
+}
+return outcomePatterns;
+  }
+
+
 
   protected List 
computeOutcomePatterns(ComparablePredicate[] sorted) {
 ComparablePredicate cp = sorted[0];

http://git-wip-us.apache.org/repos/asf/opennlp/blob/3ac2fb37/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesSerializedCorrectnessTest.java

[01/50] [abbrv] opennlp git commit: Rolling back Release 1.7.2 RC

2017-04-16 Thread joern
Repository: opennlp
Updated Branches:
  refs/heads/parser_regression [created] 3ac2fb377


Rolling back Release 1.7.2 RC


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/a81f37b3
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/a81f37b3
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/a81f37b3

Branch: refs/heads/parser_regression
Commit: a81f37b3c89c37b092f0a83d1c5cf5959bafd10c
Parents: c91d353
Author: smarthi 
Authored: Wed Feb 1 09:15:41 2017 -0500
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:50 2017 +0200

--
 opennlp-brat-annotator/pom.xml   | 2 +-
 opennlp-distr/pom.xml| 2 +-
 opennlp-docs/pom.xml | 2 +-
 opennlp-morfologik-addon/pom.xml | 2 +-
 opennlp-tools/pom.xml| 2 +-
 opennlp-uima/pom.xml | 2 +-
 pom.xml  | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/a81f37b3/opennlp-brat-annotator/pom.xml
--
diff --git a/opennlp-brat-annotator/pom.xml b/opennlp-brat-annotator/pom.xml
index 1633deb..4bf95cf 100644
--- a/opennlp-brat-annotator/pom.xml
+++ b/opennlp-brat-annotator/pom.xml
@@ -17,7 +17,7 @@

org.apache.opennlp
opennlp
-   1.7.3-SNAPSHOT
+   1.7.2-SNAPSHOT
../pom.xml

 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/a81f37b3/opennlp-distr/pom.xml
--
diff --git a/opennlp-distr/pom.xml b/opennlp-distr/pom.xml
index c0a57c3..613bd80 100644
--- a/opennlp-distr/pom.xml
+++ b/opennlp-distr/pom.xml
@@ -24,7 +24,7 @@

org.apache.opennlp
opennlp
-   1.7.3-SNAPSHOT
+   1.7.2-SNAPSHOT
../pom.xml

 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/a81f37b3/opennlp-docs/pom.xml
--
diff --git a/opennlp-docs/pom.xml b/opennlp-docs/pom.xml
index fd2b0d1..7b916c0 100644
--- a/opennlp-docs/pom.xml
+++ b/opennlp-docs/pom.xml
@@ -24,7 +24,7 @@
   
org.apache.opennlp
opennlp
-   1.7.3-SNAPSHOT
+   1.7.2-SNAPSHOT
 ../pom.xml
   
   

http://git-wip-us.apache.org/repos/asf/opennlp/blob/a81f37b3/opennlp-morfologik-addon/pom.xml
--
diff --git a/opennlp-morfologik-addon/pom.xml b/opennlp-morfologik-addon/pom.xml
index 1c384c7..d62a70a 100644
--- a/opennlp-morfologik-addon/pom.xml
+++ b/opennlp-morfologik-addon/pom.xml
@@ -24,7 +24,7 @@

org.apache.opennlp
opennlp
-   1.7.3-SNAPSHOT
+   1.7.2-SNAPSHOT
../pom.xml

 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/a81f37b3/opennlp-tools/pom.xml
--
diff --git a/opennlp-tools/pom.xml b/opennlp-tools/pom.xml
index 22fc017..9441ebb 100644
--- a/opennlp-tools/pom.xml
+++ b/opennlp-tools/pom.xml
@@ -25,7 +25,7 @@
   
 org.apache.opennlp
 opennlp
-1.7.3-SNAPSHOT
+1.7.2-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/a81f37b3/opennlp-uima/pom.xml
--
diff --git a/opennlp-uima/pom.xml b/opennlp-uima/pom.xml
index 070fec9..1e99c3d 100644
--- a/opennlp-uima/pom.xml
+++ b/opennlp-uima/pom.xml
@@ -25,7 +25,7 @@

org.apache.opennlp
opennlp
-   1.7.3-SNAPSHOT
+   1.7.2-SNAPSHOT
../pom.xml
 
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/a81f37b3/pom.xml
--
diff --git a/pom.xml b/pom.xml
index 98acfb1..7081f25 100644
--- a/pom.xml
+++ b/pom.xml
@@ -31,7 +31,7 @@
 
org.apache.opennlp
opennlp
-   1.7.3-SNAPSHOT
+   1.7.2-SNAPSHOT
pom
 
Apache OpenNLP Reactor



[45/50] [abbrv] opennlp git commit: OPENNLP-1006: Refactor usage of tag constants in sequence validators

2017-04-16 Thread joern
OPENNLP-1006: Refactor usage of tag constants in sequence validators


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/8abe90d3
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/8abe90d3
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/8abe90d3

Branch: refs/heads/parser_regression
Commit: 8abe90d3f79f4e8bd8da0780bb8368b018aee64b
Parents: 81b07ec
Author: Peter Thygesen 
Authored: Tue Mar 28 16:59:34 2017 +0200
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:57 2017 +0200

--
 .../src/main/java/opennlp/tools/namefind/BioCodec.java   | 10 +-
 .../tools/namefind/NameFinderSequenceValidator.java  | 11 +++
 2 files changed, 12 insertions(+), 9 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/8abe90d3/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java
--
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java 
b/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java
index 2218021..c0570a5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java
@@ -118,13 +118,13 @@ public class BioCodec implements SequenceCodec {
 
 for (int i = 0; i < outcomes.length; i++) {
   String outcome = outcomes[i];
-  if (outcome.endsWith(NameFinderME.START)) {
+  if (outcome.endsWith(BioCodec.START)) {
 start.add(outcome.substring(0, outcome.length()
-- NameFinderME.START.length()));
-  } else if (outcome.endsWith(NameFinderME.CONTINUE)) {
+- BioCodec.START.length()));
+  } else if (outcome.endsWith(BioCodec.CONTINUE)) {
 cont.add(outcome.substring(0, outcome.length()
-- NameFinderME.CONTINUE.length()));
-  } else if (!outcome.equals(NameFinderME.OTHER)) {
+- BioCodec.CONTINUE.length()));
+  } else if (!outcome.equals(BioCodec.OTHER)) {
 // got unexpected outcome
 return false;
   }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/8abe90d3/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java
 
b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java
index 5143468..bb6700e 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java
@@ -19,6 +19,9 @@ package opennlp.tools.namefind;
 
 import opennlp.tools.util.SequenceValidator;
 
+/**
+ * This class is created by the {@link BioCodec}.
+ */
 public class NameFinderSequenceValidator implements
 SequenceValidator {
 
@@ -27,16 +30,16 @@ public class NameFinderSequenceValidator implements
 
 // outcome is formatted like "cont" or "sometype-cont", so we
 // can check if it ends with "cont".
-if (outcome.endsWith(NameFinderME.CONTINUE)) {
+if (outcome.endsWith(BioCodec.CONTINUE)) {
 
   int li = outcomesSequence.length - 1;
 
   if (li == -1) {
 return false;
-  } else if (outcomesSequence[li].endsWith(NameFinderME.OTHER)) {
+  } else if (outcomesSequence[li].endsWith(BioCodec.OTHER)) {
 return false;
-  } else if (outcomesSequence[li].endsWith(NameFinderME.CONTINUE) ||
-  outcomesSequence[li].endsWith(NameFinderME.START)) {
+  } else if (outcomesSequence[li].endsWith(BioCodec.CONTINUE) ||
+  outcomesSequence[li].endsWith(BioCodec.START)) {
 // if it is continue or start, we have to check if previous match was 
of the same type
 String previousNameType = 
NameFinderME.extractNameType(outcomesSequence[li]);
 String nameType = NameFinderME.extractNameType(outcome);



[10/50] [abbrv] opennlp git commit: OpenNLP-981: Add training stream hash to AbstractEventTrainer. This closes #118.

2017-04-16 Thread joern
OpenNLP-981: Add training stream hash to AbstractEventTrainer. This closes #118.


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/cd23b58a
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/cd23b58a
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/cd23b58a

Branch: refs/heads/parser_regression
Commit: cd23b58a3c04053d8c6cafa761aa0fc533774304
Parents: daa9fca
Author: Daniel Russ 
Authored: Thu Feb 9 09:56:12 2017 -0500
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:52 2017 +0200

--
 .../opennlp/tools/ml/AbstractEventTrainer.java |  1 +
 .../ml/perceptron/PerceptronPrepAttachTest.java| 17 +
 2 files changed, 18 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/cd23b58a/opennlp-tools/src/main/java/opennlp/tools/ml/AbstractEventTrainer.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/ml/AbstractEventTrainer.java 
b/opennlp-tools/src/main/java/opennlp/tools/ml/AbstractEventTrainer.java
index c465f88..bb11aaa 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/AbstractEventTrainer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/AbstractEventTrainer.java
@@ -88,6 +88,7 @@ public abstract class AbstractEventTrainer extends 
AbstractTrainer implements Ev
 HashSumEventStream hses = new HashSumEventStream(events);
 DataIndexer indexer = getDataIndexer(hses);
 
+addToReport("Training-Eventhash", hses.calculateHashSum().toString(16));
 return train(indexer);
   }
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/cd23b58a/opennlp-tools/src/test/java/opennlp/tools/ml/perceptron/PerceptronPrepAttachTest.java
--
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/ml/perceptron/PerceptronPrepAttachTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/ml/perceptron/PerceptronPrepAttachTest.java
index d4d70ca..eda49f8 100644
--- 
a/opennlp-tools/src/test/java/opennlp/tools/ml/perceptron/PerceptronPrepAttachTest.java
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/ml/perceptron/PerceptronPrepAttachTest.java
@@ -23,6 +23,7 @@ import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
 import java.util.HashMap;
+import java.util.Map;
 
 import org.junit.Assert;
 import org.junit.Test;
@@ -134,4 +135,20 @@ public class PerceptronPrepAttachTest {
 Assert.assertEquals(modelA, modelB);
 Assert.assertEquals(modelA.hashCode(), modelB.hashCode());
   }
+  
+  @Test
+  public void verifyReportMap() throws IOException {
+TrainingParameters trainParams = new TrainingParameters();
+trainParams.put(AbstractTrainer.ALGORITHM_PARAM, 
PerceptronTrainer.PERCEPTRON_VALUE);
+trainParams.put(AbstractTrainer.CUTOFF_PARAM, Integer.toString(1));
+// Since we are verifying the report map, we don't need to have more than 
1 iteration
+trainParams.put(AbstractTrainer.ITERATIONS_PARAM, Integer.toString(1));
+trainParams.put("UseSkippedAveraging", Boolean.toString(true));
+
+Map reportMap = new HashMap<>();
+EventTrainer trainer = TrainerFactory.getEventTrainer(trainParams, 
reportMap);
+trainer.train(PrepAttachDataUtil.createTrainingStream());
+Assert.assertTrue("Report Map does not contain the training event hash",
+reportMap.containsKey("Training-Eventhash")); 
+  }
 }



[23/50] [abbrv] opennlp git commit: OPENNLP-994: Remove deprecated methods from the Document Categorizer, this closes apache/opennlp#133

2017-04-16 Thread joern
OPENNLP-994: Remove deprecated methods from the Document Categorizer, this 
closes apache/opennlp#133


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/76609f5c
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/76609f5c
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/76609f5c

Branch: refs/heads/parser_regression
Commit: 76609f5c105bcfc3abab6e2d19de283d945c96a6
Parents: 81acc6e
Author: smarthi 
Authored: Mon Feb 27 17:23:40 2017 -0500
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:54 2017 +0200

--
 .../doccat/DoccatCrossValidatorTool.java|   7 +-
 .../tools/cmdline/doccat/DoccatTool.java|  11 +-
 .../tools/cmdline/doccat/DoccatTrainerTool.java |   5 +-
 .../opennlp/tools/doccat/DoccatFactory.java |  93 +
 .../tools/doccat/DocumentCategorizer.java   |  54 ++
 .../doccat/DocumentCategorizerEvaluator.java|   2 +-
 .../tools/doccat/DocumentCategorizerME.java | 101 ++-
 .../opennlp/tools/doccat/DocumentSample.java|   6 --
 .../formats/LeipzigDoccatSampleStream.java  |  19 ++--
 .../tools/doccat/DocumentCategorizerMETest.java |  18 ++--
 .../tools/doccat/DocumentCategorizerNBTest.java |  17 ++--
 .../tools/doccat/DocumentSampleTest.java|   4 +-
 .../doccat/AbstractDocumentCategorizer.java |  29 +++---
 .../java/opennlp/uima/util/AnnotatorUtil.java   |   6 +-
 14 files changed, 66 insertions(+), 306 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/76609f5c/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java
 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java
index f0f1712..a73aba7 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java
@@ -36,7 +36,6 @@ import opennlp.tools.doccat.DoccatEvaluationMonitor;
 import opennlp.tools.doccat.DoccatFactory;
 import opennlp.tools.doccat.DocumentSample;
 import opennlp.tools.doccat.FeatureGenerator;
-import opennlp.tools.tokenize.Tokenizer;
 import opennlp.tools.util.eval.EvaluationMonitor;
 import opennlp.tools.util.model.ModelUtil;
 
@@ -84,16 +83,12 @@ public final class DoccatCrossValidatorTool extends
 FeatureGenerator[] featureGenerators = DoccatTrainerTool
 .createFeatureGenerators(params.getFeatureGenerators());
 
-Tokenizer tokenizer = DoccatTrainerTool.createTokenizer(params
-.getTokenizer());
-
 DoccatEvaluationMonitor[] listenersArr = listeners
 .toArray(new DoccatEvaluationMonitor[listeners.size()]);
 
 DoccatCrossValidator validator;
 try {
-  DoccatFactory factory = DoccatFactory.create(params.getFactory(),
-  tokenizer, featureGenerators);
+  DoccatFactory factory = DoccatFactory.create(params.getFactory(), 
featureGenerators);
   validator = new DoccatCrossValidator(params.getLang(), mlParams,
   factory, listenersArr);
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/76609f5c/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java
index a01d354..49a640c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java
@@ -28,6 +28,7 @@ import opennlp.tools.cmdline.SystemInputStreamFactory;
 import opennlp.tools.doccat.DoccatModel;
 import opennlp.tools.doccat.DocumentCategorizerME;
 import opennlp.tools.doccat.DocumentSample;
+import opennlp.tools.tokenize.WhitespaceTokenizer;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.ParagraphStream;
 import opennlp.tools.util.PlainTextByLineStream;
@@ -36,7 +37,7 @@ public class DoccatTool extends BasicCmdLineTool {
 
   @Override
   public String getShortDescription() {
-return "learnable document categorizer";
+return "learned document categorizer";
   }
 
   @Override
@@ -53,7 +54,7 @@ public class DoccatTool extends BasicCmdLineTool {
 
   DoccatModel model = new DoccatModelLoader().load(new File(args[0]));
 
-  DocumentCategorizerME doccat = new DocumentCategorizerME(model);
+  DocumentCategorizerME documentCategorizerME = new 
DocumentCategorizerME(model);
 
   /*
* moved 

[07/50] [abbrv] opennlp git commit: [maven-release-plugin] prepare for next development iteration

2017-04-16 Thread joern
[maven-release-plugin] prepare for next development iteration


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/f9db192d
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/f9db192d
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/f9db192d

Branch: refs/heads/parser_regression
Commit: f9db192d33138c9d5048a2349f06e70b13719ff6
Parents: 4b8ebad
Author: smarthi 
Authored: Wed Feb 1 11:03:42 2017 -0500
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:51 2017 +0200

--
 opennlp-brat-annotator/pom.xml   | 2 +-
 opennlp-distr/pom.xml| 2 +-
 opennlp-docs/pom.xml | 2 +-
 opennlp-morfologik-addon/pom.xml | 2 +-
 opennlp-tools/pom.xml| 2 +-
 opennlp-uima/pom.xml | 2 +-
 pom.xml  | 4 ++--
 7 files changed, 8 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/f9db192d/opennlp-brat-annotator/pom.xml
--
diff --git a/opennlp-brat-annotator/pom.xml b/opennlp-brat-annotator/pom.xml
index 53517ca..1633deb 100644
--- a/opennlp-brat-annotator/pom.xml
+++ b/opennlp-brat-annotator/pom.xml
@@ -17,7 +17,7 @@

org.apache.opennlp
opennlp
-   1.7.2
+   1.7.3-SNAPSHOT
../pom.xml

 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/f9db192d/opennlp-distr/pom.xml
--
diff --git a/opennlp-distr/pom.xml b/opennlp-distr/pom.xml
index ae86021..c0a57c3 100644
--- a/opennlp-distr/pom.xml
+++ b/opennlp-distr/pom.xml
@@ -24,7 +24,7 @@

org.apache.opennlp
opennlp
-   1.7.2
+   1.7.3-SNAPSHOT
../pom.xml

 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/f9db192d/opennlp-docs/pom.xml
--
diff --git a/opennlp-docs/pom.xml b/opennlp-docs/pom.xml
index b765d91..fd2b0d1 100644
--- a/opennlp-docs/pom.xml
+++ b/opennlp-docs/pom.xml
@@ -24,7 +24,7 @@
   
org.apache.opennlp
opennlp
-   1.7.2
+   1.7.3-SNAPSHOT
 ../pom.xml
   
   

http://git-wip-us.apache.org/repos/asf/opennlp/blob/f9db192d/opennlp-morfologik-addon/pom.xml
--
diff --git a/opennlp-morfologik-addon/pom.xml b/opennlp-morfologik-addon/pom.xml
index 50844f2..1c384c7 100644
--- a/opennlp-morfologik-addon/pom.xml
+++ b/opennlp-morfologik-addon/pom.xml
@@ -24,7 +24,7 @@

org.apache.opennlp
opennlp
-   1.7.2
+   1.7.3-SNAPSHOT
../pom.xml

 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/f9db192d/opennlp-tools/pom.xml
--
diff --git a/opennlp-tools/pom.xml b/opennlp-tools/pom.xml
index 6cdb688..22fc017 100644
--- a/opennlp-tools/pom.xml
+++ b/opennlp-tools/pom.xml
@@ -25,7 +25,7 @@
   
 org.apache.opennlp
 opennlp
-1.7.2
+1.7.3-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/f9db192d/opennlp-uima/pom.xml
--
diff --git a/opennlp-uima/pom.xml b/opennlp-uima/pom.xml
index 39f1040..070fec9 100644
--- a/opennlp-uima/pom.xml
+++ b/opennlp-uima/pom.xml
@@ -25,7 +25,7 @@

org.apache.opennlp
opennlp
-   1.7.2
+   1.7.3-SNAPSHOT
../pom.xml
 
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/f9db192d/pom.xml
--
diff --git a/pom.xml b/pom.xml
index bbb48c8..98acfb1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -31,7 +31,7 @@
 
org.apache.opennlp
opennlp
-   1.7.2
+   1.7.3-SNAPSHOT
pom
 
Apache OpenNLP Reactor
@@ -40,7 +40,7 @@

scm:git:g...@github.com:apache/opennlp.git

scm:git:https://git-wip-us.apache.org/repos/asf/opennlp.git
https://git-wip-us.apache.org/repos/asf?p=opennlp.git
-   opennlp-1.7.2
+   HEAD

 




[17/50] [abbrv] opennlp git commit: OPENNLP-990 Fix all array style violations and add a checkstyle rule

2017-04-16 Thread joern
OPENNLP-990 Fix all array style violations and add a checkstyle rule

This closes #127


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/fdff127b
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/fdff127b
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/fdff127b

Branch: refs/heads/parser_regression
Commit: fdff127b38dafb2bbb8df186385bfdb8abc0e9d1
Parents: 1cd2658
Author: Peter Thygesen 
Authored: Thu Feb 16 12:48:12 2017 +0100
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:53 2017 +0200

--
 checkstyle.xml  |  1 +
 .../opennlp/bratann/NameFinderAnnService.java   |  2 +-
 .../opennlp/bratann/NameFinderResource.java |  6 ++---
 .../java/opennlp/morfologik/cmdline/CLI.java|  2 +-
 .../chunker/ChunkSampleSequenceStream.java  |  4 +--
 .../java/opennlp/tools/chunker/Chunker.java |  4 +--
 .../tools/cmdline/AbstractConverterTool.java|  2 +-
 .../opennlp/tools/cmdline/ArgumentParser.java   | 16 +--
 .../opennlp/tools/cmdline/BasicCmdLineTool.java |  2 +-
 .../main/java/opennlp/tools/cmdline/CLI.java|  2 +-
 .../java/opennlp/tools/cmdline/CmdLineUtil.java | 10 +++
 .../tools/cmdline/EvaluationErrorPrinter.java   | 10 +++
 .../cmdline/FineGrainedReportListener.java  |  2 +-
 .../tools/cmdline/ObjectStreamFactory.java  |  2 +-
 .../opennlp/tools/cmdline/TypedCmdLineTool.java |  2 +-
 .../tools/cmdline/doccat/DoccatTool.java|  2 +-
 .../cmdline/entitylinker/EntityLinkerTool.java  |  2 +-
 .../TokenNameFinderCrossValidatorTool.java  |  4 +--
 .../namefind/TokenNameFinderEvaluatorTool.java  |  2 +-
 .../cmdline/namefind/TokenNameFinderTool.java   |  6 ++---
 .../namefind/TokenNameFinderTrainerTool.java|  8 +++---
 .../tools/cmdline/postag/POSTaggerTool.java |  2 +-
 .../tokenizer/DictionaryDetokenizerTool.java|  2 +-
 .../opennlp/tools/dictionary/Dictionary.java|  2 +-
 .../tools/doccat/DocumentCategorizer.java   |  6 ++---
 .../DocumentCategorizerContextGenerator.java|  2 +-
 .../doccat/DocumentCategorizerEvaluator.java|  4 +--
 .../tools/doccat/DocumentCategorizerME.java |  4 +--
 .../opennlp/tools/doccat/DocumentSample.java|  4 +--
 .../tools/doccat/DocumentSampleStream.java  |  4 +--
 .../formats/BioNLP2004NameSampleStream.java |  2 +-
 .../tools/formats/Conll02NameSampleStream.java  |  2 +-
 .../tools/formats/Conll03NameSampleStream.java  |  2 +-
 .../tools/formats/ConllXPOSSampleStream.java|  2 +-
 .../tools/formats/DirectorySampleStream.java|  4 +--
 .../tools/formats/EvalitaNameSampleStream.java  |  2 +-
 .../formats/LeipzigDoccatSampleStream.java  |  2 +-
 .../LeipzigDocumentSampleStreamFactory.java |  4 +--
 .../formats/brat/BratAnnotationStream.java  | 10 +++
 .../tools/formats/brat/BratDocument.java|  2 +-
 .../formats/brat/BratNameSampleStream.java  |  2 +-
 .../convert/FileToByteArraySampleStream.java|  2 +-
 .../convert/FileToStringSampleStream.java   |  2 +-
 .../formats/muc/MucNameContentHandler.java  |  2 +-
 .../ontonotes/OntoNotesNameSampleStream.java|  2 +-
 .../lemmatizer/LemmaSampleSequenceStream.java   |  6 ++---
 .../opennlp/tools/lemmatizer/Lemmatizer.java|  2 +-
 .../main/java/opennlp/tools/ml/BeamSearch.java  |  4 +--
 .../java/opennlp/tools/ml/maxent/GISModel.java  |  2 +-
 .../opennlp/tools/ml/model/MaxentModel.java |  2 +-
 .../SimplePerceptronSequenceTrainer.java|  4 +--
 .../java/opennlp/tools/namefind/BioCodec.java   |  2 +-
 .../namefind/DefaultNameContextGenerator.java   |  4 +--
 .../tools/namefind/DictionaryNameFinder.java|  2 +-
 .../tools/namefind/NameFinderEventStream.java   |  2 +-
 .../opennlp/tools/namefind/NameFinderME.java|  2 +-
 .../java/opennlp/tools/namefind/NameSample.java |  2 +-
 .../namefind/NameSampleSequenceStream.java  |  4 +--
 .../opennlp/tools/namefind/RegexNameFinder.java |  8 +++---
 .../opennlp/tools/namefind/TokenNameFinder.java |  2 +-
 .../namefind/TokenNameFinderCrossValidator.java |  4 +--
 .../namefind/TokenNameFinderEvaluator.java  |  4 +--
 .../tools/namefind/TokenNameFinderFactory.java  |  2 +-
 .../tools/namefind/TokenNameFinderModel.java|  2 +-
 .../tools/parser/AbstractBottomUpParser.java|  6 ++---
 .../tools/parser/ChunkContextGenerator.java |  2 +-
 .../tools/parser/ParserChunkerFactory.java  |  2 +-
 .../parser/ParserChunkerSequenceValidator.java  |  2 +-
 .../opennlp/tools/parser/PosSampleStream.java   |  4 +--
 .../opennlp/tools/postag/POSDictionary.java |  2 +-
 .../java/opennlp/tools/postag/POSEvaluator.java |  4 +--
 .../java/opennlp/tools/postag/POSSample.java| 10 +++
 .../tools/postag/POSSampleEventStream.java  |  6 ++---
 .../tools/postag/POSSampleSequenceStream.java   |  4 +--

[11/50] [abbrv] opennlp git commit: OPENNLP-964: Ignore LICENSE, NOTICE and README files in the model

2017-04-16 Thread joern
OPENNLP-964: Ignore LICENSE, NOTICE and README files in the model


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/b41fcd69
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/b41fcd69
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/b41fcd69

Branch: refs/heads/parser_regression
Commit: b41fcd69baef80ed1e99656e9a3b7424aa294bb8
Parents: a2049d6
Author: Jörn Kottmann 
Authored: Thu Feb 2 19:13:02 2017 +0100
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:52 2017 +0200

--
 .../tagdict/MorfologikPOSTaggerFactory.java | 15 +--
 .../tools/namefind/TokenNameFinderModel.java| 14 +-
 .../opennlp/tools/util/model/BaseModel.java |  2 +
 .../tools/util/model/ByteArraySerializer.java   | 33 ++
 .../util/model/ByteArraySerializerTest.java | 45 
 5 files changed, 82 insertions(+), 27 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/b41fcd69/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java
--
diff --git 
a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java
 
b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java
index 370b4d0..592ef7d 100644
--- 
a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java
+++ 
b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java
@@ -22,7 +22,6 @@ import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.OutputStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.Map;
@@ -33,7 +32,7 @@ import opennlp.tools.dictionary.Dictionary;
 import opennlp.tools.postag.POSTaggerFactory;
 import opennlp.tools.postag.TagDictionary;
 import opennlp.tools.util.model.ArtifactSerializer;
-import opennlp.tools.util.model.ModelUtil;
+import opennlp.tools.util.model.ByteArraySerializer;
 
 public class MorfologikPOSTaggerFactory extends POSTaggerFactory {
 
@@ -150,16 +149,4 @@ public class MorfologikPOSTaggerFactory extends 
POSTaggerFactory {
 info));
 return new MorfologikTagDictionary(dict);
   }
-
-  static class ByteArraySerializer implements ArtifactSerializer {
-
-public byte[] create(InputStream in) throws IOException {
-  return ModelUtil.read(in);
-}
-
-public void serialize(byte[] artifact, OutputStream out) throws 
IOException {
-  out.write(artifact);
-}
-  }
-
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/b41fcd69/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java 
b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
index 05a3615..09eefc5 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
@@ -21,7 +21,6 @@ package opennlp.tools.namefind;
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.OutputStream;
 import java.net.URL;
 import java.util.Map;
 import java.util.Properties;
@@ -36,7 +35,7 @@ import opennlp.tools.util.featuregen.BrownCluster;
 import opennlp.tools.util.featuregen.WordClusterDictionary;
 import opennlp.tools.util.model.ArtifactSerializer;
 import opennlp.tools.util.model.BaseModel;
-import opennlp.tools.util.model.ModelUtil;
+import opennlp.tools.util.model.ByteArraySerializer;
 
 /**
  * The {@link TokenNameFinderModel} is the model used
@@ -53,17 +52,6 @@ public class TokenNameFinderModel extends BaseModel {
 }
   }
 
-  private static class ByteArraySerializer implements 
ArtifactSerializer {
-
-public byte[] create(InputStream in) throws IOException {
-  return ModelUtil.read(in);
-}
-
-public void serialize(byte[] artifact, OutputStream out) throws 
IOException {
-  out.write(artifact);
-}
-  }
-
   private static final String COMPONENT_NAME = "NameFinderME";
   private static final String MAXENT_MODEL_ENTRY_NAME = "nameFinder.model";
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/b41fcd69/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java
--
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java 

[28/50] [abbrv] opennlp git commit: OPENNLP-229: Add test for NameFinderSequenceValidator

2017-04-16 Thread joern
OPENNLP-229: Add test for NameFinderSequenceValidator

This closes #125


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/40cdacb5
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/40cdacb5
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/40cdacb5

Branch: refs/heads/parser_regression
Commit: 40cdacb55583cf70d7e47b26fc0108fa71f3ab51
Parents: ebb5b24
Author: Peter Thygesen 
Authored: Wed Feb 15 21:12:48 2017 +0100
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:54 2017 +0200

--
 .../NameFinderSequenceValidatorTest.java| 186 +++
 1 file changed, 186 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/40cdacb5/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderSequenceValidatorTest.java
--
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderSequenceValidatorTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderSequenceValidatorTest.java
new file mode 100644
index 000..35752c1
--- /dev/null
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderSequenceValidatorTest.java
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.tools.namefind;
+
+import org.junit.Assert;
+import org.junit.Ignore;
+import org.junit.Test;
+
+/**
+ * This is the test class for {@link NameFinderSequenceValidator}..
+ */
+public class NameFinderSequenceValidatorTest {
+
+  private static NameFinderSequenceValidator validator = new 
NameFinderSequenceValidator();
+  private static String START_A = "TypeA-" + NameFinderME.START;
+  private static String CONTINUE_A = "TypeA-" + NameFinderME.CONTINUE;
+  private static String START_B = "TypeB-" + NameFinderME.START;
+  private static String CONTINUE_B = "TypeB-" + NameFinderME.CONTINUE;
+  private static String OTHER = NameFinderME.OTHER;
+
+  @Test
+  public void testContinueCannotBeFirstOutcome() {
+
+final String outcome = CONTINUE_A;
+
+String[] inputSequence = new String[] {"PersonA", "is", "here"};
+String[] outcomesSequence = new String[] {};
+Assert.assertFalse(validator.validSequence(0, inputSequence, 
outcomesSequence, outcome));
+
+  }
+
+  @Test
+  public void testContinueAfterStartAndSameType() {
+
+final String outcome = CONTINUE_A;
+
+// previous start, same name type
+String[] inputSequence = new String[] {"Stefanie", "Schmidt", "is", 
"German"};
+String[] outcomesSequence = new String[] {START_A};
+Assert.assertTrue(validator.validSequence(1, inputSequence, 
outcomesSequence, outcome));
+
+  }
+
+  @Ignore
+  @Test
+  public void testContinueAfterStartAndNotSameType() {
+
+final String outcome = CONTINUE_B;
+
+// previous start, not same name type
+String[] inputSequence = new String[] {"PersonA", "LocationA", 
"something"};
+String[] outcomesSequence = new String[] {START_A};
+Assert.assertFalse(validator.validSequence(1, inputSequence, 
outcomesSequence, outcome));
+  }
+
+  @Test
+  public void testContinueAfterContinueAndSameType() {
+
+final String outcome = CONTINUE_A;
+
+// previous continue, same name type
+String[] inputSequence = new String[] {"FirstName", "MidleName", 
"LastName", "is", "a", "long", "name"};
+String[] outcomesSequence = new String[] {START_A, CONTINUE_A};
+Assert.assertTrue(validator.validSequence(2, inputSequence, 
outcomesSequence, outcome));
+  }
+
+  @Test
+  public void testContinueAfterContinueAndNotSameType() {
+
+final String outcome = CONTINUE_B;
+
+// previous continue, not same name type
+String[] inputSequence = new String[] {"FirstName", "LastName", 
"LocationA", "something"};
+String[] outcomesSequence = new String[] {START_A, CONTINUE_A};
+Assert.assertFalse(validator.validSequence(2, inputSequence, 
outcomesSequence, outcome));
+  }
+
+  @Test
+  public void testContinueAfterOther() {
+
+final 

[05/50] [abbrv] opennlp git commit: OpenNLP-977: Remove deprecated map methods

2017-04-16 Thread joern
OpenNLP-977: Remove deprecated map methods


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/51cd8091
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/51cd8091
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/51cd8091

Branch: refs/heads/parser_regression
Commit: 51cd80914b9cddb5771232929b1e9326cecc6170
Parents: 212cf14
Author: Daniel Russ 
Authored: Mon Feb 6 13:39:59 2017 -0500
Committer: Jörn Kottmann 
Committed: Sun Apr 16 19:24:51 2017 +0200

--
 .../java/opennlp/tools/chunker/ChunkerME.java   |  6 +-
 .../java/opennlp/tools/cmdline/CmdLineUtil.java |  4 +-
 .../tools/cmdline/parser/ParserTrainerTool.java | 10 +--
 .../cmdline/postag/POSTaggerTrainerTool.java|  2 +-
 .../sentdetect/SentenceDetectorTrainerTool.java |  2 +-
 .../cmdline/tokenizer/TokenizerTrainerTool.java |  4 +-
 .../tools/doccat/DocumentCategorizerME.java |  2 +-
 .../opennlp/tools/lemmatizer/LemmatizerME.java  |  8 +-
 .../tools/ml/EventModelSequenceTrainer.java |  3 +
 .../java/opennlp/tools/ml/EventTrainer.java |  3 +
 .../java/opennlp/tools/ml/SequenceTrainer.java  |  3 +
 .../java/opennlp/tools/ml/TrainerFactory.java   | 83 +---
 .../opennlp/tools/namefind/NameFinderME.java|  8 +-
 .../opennlp/tools/parser/chunking/Parser.java   |  4 +-
 .../opennlp/tools/parser/treeinsert/Parser.java |  6 +-
 .../java/opennlp/tools/postag/POSTaggerME.java  |  8 +-
 .../tools/sentdetect/SentenceDetectorME.java|  2 +-
 .../opennlp/tools/tokenize/TokenizerME.java |  2 +-
 .../java/opennlp/tools/ml/MockEventTrainer.java |  6 ++
 .../opennlp/tools/ml/MockSequenceTrainer.java   |  6 ++
 .../opennlp/tools/ml/TrainerFactoryTest.java| 12 +--
 .../tools/ml/maxent/GISIndexingTest.java|  4 +-
 .../tools/ml/maxent/MaxentPrepAttachTest.java   |  5 +-
 .../ml/maxent/quasinewton/QNPrepAttachTest.java | 11 ++-
 .../ml/naivebayes/NaiveBayesPrepAttachTest.java |  5 +-
 .../ml/perceptron/PerceptronPrepAttachTest.java | 11 ++-
 .../java/opennlp/uima/util/OpennlpUtil.java |  4 +-
 27 files changed, 114 insertions(+), 110 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
--
diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
index a59b5ce..71917fb 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
@@ -171,7 +171,7 @@ public class ChunkerME implements Chunker {
 
 Map manifestInfoEntries = new HashMap<>();
 
-TrainerType trainerType = 
TrainerFactory.getTrainerType(mlParams.getSettings());
+TrainerType trainerType = TrainerFactory.getTrainerType(mlParams);
 
 
 MaxentModel chunkerModel = null;
@@ -179,13 +179,13 @@ public class ChunkerME implements Chunker {
 
 if (TrainerType.EVENT_MODEL_TRAINER.equals(trainerType)) {
   ObjectStream es = new ChunkerEventStream(in, 
factory.getContextGenerator());
-  EventTrainer trainer = 
TrainerFactory.getEventTrainer(mlParams.getSettings(),
+  EventTrainer trainer = TrainerFactory.getEventTrainer(mlParams,
   manifestInfoEntries);
   chunkerModel = trainer.train(es);
 }
 else if (TrainerType.SEQUENCE_TRAINER.equals(trainerType)) {
   SequenceTrainer trainer = TrainerFactory.getSequenceModelTrainer(
-  mlParams.getSettings(), manifestInfoEntries);
+  mlParams, manifestInfoEntries);
 
   // TODO: This will probably cause issue, since the feature generator 
uses the outcomes array
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java
--
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java
index 6855898..7ea2a0b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java
@@ -324,11 +324,11 @@ public final class CmdLineUtil {
 throw new TerminateToolException(-1, "Error during parameters loading: 
" + e.getMessage(), e);
   }
 
-  if (!TrainerFactory.isValid(params.getSettings())) {
+  if (!TrainerFactory.isValid(params)) {
 throw new TerminateToolException(1, "Training parameters file '" + 
paramFile + "' is invalid!");
   }
 
-  TrainerFactory.TrainerType trainerType =