opennlp git commit: OPENNLP-1022:Fix documentation to remove references to 'Save XXXModel to database, this closes apache/opennlp#158
Repository: opennlp Updated Branches: refs/heads/master a59765cd4 -> f8fbfc9fd OPENNLP-1022:Fix documentation to remove references to 'Save XXXModel to database, this closes apache/opennlp#158 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/f8fbfc9f Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/f8fbfc9f Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/f8fbfc9f Branch: refs/heads/master Commit: f8fbfc9fdca4b5e9ba1a5608ca17e7b6feb18c3c Parents: a59765c Author: smarthiAuthored: Sun Apr 16 21:45:17 2017 -0400 Committer: smarthi Committed: Sun Apr 16 21:45:17 2017 -0400 -- opennlp-docs/src/docbkx/chunker.xml | 29 ++--- opennlp-docs/src/docbkx/doccat.xml | 44 ++ opennlp-docs/src/docbkx/introduction.xml| 17 +- opennlp-docs/src/docbkx/lemmatizer.xml | 38 +++- opennlp-docs/src/docbkx/namefinder.xml | 36 +++- opennlp-docs/src/docbkx/parser.xml | 2 +- opennlp-docs/src/docbkx/postagger.xml | 62 ++-- opennlp-docs/src/docbkx/sentdetect.xml | 33 ++- opennlp-docs/src/docbkx/tokenizer.xml | 15 + .../main/java/opennlp/tools/ml/BeamSearch.java | 23 +++- 10 files changed, 46 insertions(+), 253 deletions(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/f8fbfc9f/opennlp-docs/src/docbkx/chunker.xml -- diff --git a/opennlp-docs/src/docbkx/chunker.xml b/opennlp-docs/src/docbkx/chunker.xml index 0c04e8a..b67a7fd 100644 --- a/opennlp-docs/src/docbkx/chunker.xml +++ b/opennlp-docs/src/docbkx/chunker.xml @@ -81,19 +81,8 @@ Rockwell_NNP said_VBD the_DT agreement_NN calls_VBZ for_IN it_PRP to_TO supply_V InputStream modelIn = null; ChunkerModel model = null; -try { - modelIn = new FileInputStream("en-chunker.bin"); +try (modelIn = new FileInputStream("en-chunker.bin")){ model = new ChunkerModel(modelIn); -} catch (IOException e) { - // Model loading failed, handle the error - e.printStackTrace(); -} finally { - if (modelIn != null) { -try { - modelIn.close(); -} catch (IOException e) { -} - } }]]> After the model is loaded a Chunker can be instantiated. @@ -242,28 +231,18 @@ $ opennlp ChunkerTrainerME -model en-chunker.bin -lang en -data en-chunker.train illustrates how to do it: http://git-wip-us.apache.org/repos/asf/opennlp/blob/f8fbfc9f/opennlp-docs/src/docbkx/doccat.xml -- diff --git a/opennlp-docs/src/docbkx/doccat.xml b/opennlp-docs/src/docbkx/doccat.xml index 7fe3f1f..c056732 100644 --- a/opennlp-docs/src/docbkx/doccat.xml +++ b/opennlp-docs/src/docbkx/doccat.xml @@ -127,33 +127,16 @@ $ opennlp DoccatTrainer -model en-doccat.bin -lang en -data en-doccat.train -enc +]]> Now might be a good time to cruise over to Hulu or something, because this could take a while if you've got a large training set. You may see a lot of output as well. Once you're done, you can pretty quickly step to classification directly, @@ -162,27 +145,10 @@ finally { +]]> http://git-wip-us.apache.org/repos/asf/opennlp/blob/f8fbfc9f/opennlp-docs/src/docbkx/introduction.xml -- diff --git a/opennlp-docs/src/docbkx/introduction.xml b/opennlp-docs/src/docbkx/introduction.xml index a3bd482..65fcd9d 100644 --- a/opennlp-docs/src/docbkx/introduction.xml +++ b/opennlp-docs/src/docbkx/introduction.xml @@ -65,23 +65,10 @@ under the License. constructor of the model class: +]]> http://git-wip-us.apache.org/repos/asf/opennlp/blob/f8fbfc9f/opennlp-docs/src/docbkx/lemmatizer.xml -- diff --git a/opennlp-docs/src/docbkx/lemmatizer.xml b/opennlp-docs/src/docbkx/lemmatizer.xml index 34668d0..1fa5540 100644 --- a/opennlp-docs/src/docbkx/lemmatizer.xml +++ b/opennlp-docs/src/docbkx/lemmatizer.xml @@ -88,22 +88,11 @@ signed VBD sign In the example below it is loaded from disk: +} +]]> After the model is loaded a LemmatizerME can be instantiated.
[opennlp] Git Push Summary
Repository: opennlp Updated Branches: refs/heads/OPENNLP-1020 [deleted] 2691b7106
opennlp git commit: OPENNLP-1020: MockInputStreamFactory.createInputStream should create a new InputStream. This closes apache/opennlp#156
Repository: opennlp Updated Branches: refs/heads/master 27214015c -> a59765cd4 OPENNLP-1020: MockInputStreamFactory.createInputStream should create a new InputStream. This closes apache/opennlp#156 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/a59765cd Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/a59765cd Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/a59765cd Branch: refs/heads/master Commit: a59765cd4eb84f58af508ba5494c42b579c1dab3 Parents: 2721401 Author: kojiAuthored: Mon Apr 17 10:30:40 2017 +0900 Committer: koji Committed: Mon Apr 17 10:30:40 2017 +0900 -- .../lemmatizer/LemmatizerEvaluatorTest.java | 14 +++--- .../tools/lemmatizer/LemmatizerMETest.java | 8 ++- .../tools/namefind/NameFinderMETest.java| 51 ++-- .../tools/util/MockInputStreamFactory.java | 30 4 files changed, 46 insertions(+), 57 deletions(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/a59765cd/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerEvaluatorTest.java -- diff --git a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerEvaluatorTest.java b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerEvaluatorTest.java index 0eb775d..2f4e74d 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerEvaluatorTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerEvaluatorTest.java @@ -18,8 +18,8 @@ package opennlp.tools.lemmatizer; import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.IOException; -import java.io.InputStream; import java.io.OutputStream; import org.junit.Assert; @@ -47,18 +47,18 @@ public class LemmatizerEvaluatorTest { */ @Test public void testEvaluator() throws IOException { -InputStream inPredicted = getClass().getClassLoader() -.getResourceAsStream("opennlp/tools/lemmatizer/output.txt"); -InputStream inExpected = getClass().getClassLoader() -.getResourceAsStream("opennlp/tools/lemmatizer/output.txt"); +String inPredicted = "opennlp/tools/lemmatizer/output.txt"; +String inExpected = "opennlp/tools/lemmatizer/output.txt"; String encoding = "UTF-8"; DummyLemmaSampleStream predictedSample = new DummyLemmaSampleStream( -new PlainTextByLineStream(new MockInputStreamFactory(inPredicted), encoding), true); +new PlainTextByLineStream( + new MockInputStreamFactory(new File(inPredicted)), encoding), true); DummyLemmaSampleStream expectedSample = new DummyLemmaSampleStream( -new PlainTextByLineStream(new MockInputStreamFactory(inExpected), encoding), false); +new PlainTextByLineStream( + new MockInputStreamFactory(new File(inExpected)), encoding), false); Lemmatizer dummyLemmatizer = new DummyLemmatizer(predictedSample); http://git-wip-us.apache.org/repos/asf/opennlp/blob/a59765cd/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java -- diff --git a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java index 97dcc3c..4631763 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java @@ -17,8 +17,8 @@ package opennlp.tools.lemmatizer; +import java.io.File; import java.io.IOException; -import java.io.InputStream; import org.junit.Assert; import org.junit.Before; @@ -63,11 +63,9 @@ public class LemmatizerMETest { public void startup() throws IOException { // train the lemmatizer -InputStream in = getClass().getClassLoader() -.getResourceAsStream("opennlp/tools/lemmatizer/trial.old.tsv"); - ObjectStream sampleStream = new LemmaSampleStream( -new PlainTextByLineStream(new MockInputStreamFactory(in), "UTF-8")); +new PlainTextByLineStream(new MockInputStreamFactory( + new File("opennlp/tools/lemmatizer/trial.old.tsv")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(100)); http://git-wip-us.apache.org/repos/asf/opennlp/blob/a59765cd/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java -- diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java index
opennlp git commit: OPENNLP-1020: MockInputStreamFactory.createInputStream should create a new InputStream
Repository: opennlp Updated Branches: refs/heads/OPENNLP-1020 [created] 2691b7106 OPENNLP-1020: MockInputStreamFactory.createInputStream should create a new InputStream Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/2691b710 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/2691b710 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/2691b710 Branch: refs/heads/OPENNLP-1020 Commit: 2691b7106f5d5698c658af85a814029a32a4331e Parents: 2721401 Author: kojiAuthored: Mon Apr 17 09:44:04 2017 +0900 Committer: koji Committed: Mon Apr 17 09:44:04 2017 +0900 -- .../lemmatizer/LemmatizerEvaluatorTest.java | 14 +++--- .../tools/lemmatizer/LemmatizerMETest.java | 8 ++- .../tools/namefind/NameFinderMETest.java| 51 ++-- .../tools/util/MockInputStreamFactory.java | 30 4 files changed, 46 insertions(+), 57 deletions(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/2691b710/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerEvaluatorTest.java -- diff --git a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerEvaluatorTest.java b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerEvaluatorTest.java index 0eb775d..2f4e74d 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerEvaluatorTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerEvaluatorTest.java @@ -18,8 +18,8 @@ package opennlp.tools.lemmatizer; import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.IOException; -import java.io.InputStream; import java.io.OutputStream; import org.junit.Assert; @@ -47,18 +47,18 @@ public class LemmatizerEvaluatorTest { */ @Test public void testEvaluator() throws IOException { -InputStream inPredicted = getClass().getClassLoader() -.getResourceAsStream("opennlp/tools/lemmatizer/output.txt"); -InputStream inExpected = getClass().getClassLoader() -.getResourceAsStream("opennlp/tools/lemmatizer/output.txt"); +String inPredicted = "opennlp/tools/lemmatizer/output.txt"; +String inExpected = "opennlp/tools/lemmatizer/output.txt"; String encoding = "UTF-8"; DummyLemmaSampleStream predictedSample = new DummyLemmaSampleStream( -new PlainTextByLineStream(new MockInputStreamFactory(inPredicted), encoding), true); +new PlainTextByLineStream( + new MockInputStreamFactory(new File(inPredicted)), encoding), true); DummyLemmaSampleStream expectedSample = new DummyLemmaSampleStream( -new PlainTextByLineStream(new MockInputStreamFactory(inExpected), encoding), false); +new PlainTextByLineStream( + new MockInputStreamFactory(new File(inExpected)), encoding), false); Lemmatizer dummyLemmatizer = new DummyLemmatizer(predictedSample); http://git-wip-us.apache.org/repos/asf/opennlp/blob/2691b710/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java -- diff --git a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java index 97dcc3c..4631763 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java @@ -17,8 +17,8 @@ package opennlp.tools.lemmatizer; +import java.io.File; import java.io.IOException; -import java.io.InputStream; import org.junit.Assert; import org.junit.Before; @@ -63,11 +63,9 @@ public class LemmatizerMETest { public void startup() throws IOException { // train the lemmatizer -InputStream in = getClass().getClassLoader() -.getResourceAsStream("opennlp/tools/lemmatizer/trial.old.tsv"); - ObjectStream sampleStream = new LemmaSampleStream( -new PlainTextByLineStream(new MockInputStreamFactory(in), "UTF-8")); +new PlainTextByLineStream(new MockInputStreamFactory( + new File("opennlp/tools/lemmatizer/trial.old.tsv")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(100)); http://git-wip-us.apache.org/repos/asf/opennlp/blob/2691b710/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java -- diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java index c258d07..876df5b 100644
opennlp git commit: OPENNLP-1024:Add unit tests and javadocs for DirectorySampleStream, this closes apache/opennlp#160
Repository: opennlp Updated Branches: refs/heads/master cff6e0009 -> 27214015c OPENNLP-1024:Add unit tests and javadocs for DirectorySampleStream, this closes apache/opennlp#160 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/27214015 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/27214015 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/27214015 Branch: refs/heads/master Commit: 27214015c725619e0098e227fd29816d0891d4c5 Parents: cff6e00 Author: jzonthemtnAuthored: Sun Apr 16 17:24:16 2017 -0400 Committer: smarthi Committed: Sun Apr 16 17:24:19 2017 -0400 -- .../tools/formats/DirectorySampleStream.java| 34 +++- .../formats/DirectorySampleStreamTest.java | 187 +++ 2 files changed, 215 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/27214015/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java index 3a5621a..da73507 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java @@ -28,24 +28,30 @@ import java.util.Stack; import opennlp.tools.util.ObjectStream; /** - * The directory sample stream scans a directory (recursively) for plain text - * files and outputs each file as a String object. + * The directory sample stream allows for creating a stream + * from a directory listing of files. */ public class DirectorySampleStream implements ObjectStream { private final List inputDirectories; - private final boolean isRecursiveScan; + private final boolean recursive; private final FileFilter fileFilter; private Stack directories = new Stack<>(); private Stack textFiles = new Stack<>(); - + + /** + * Creates a new directory sample stream. + * @param dirs The directories to read. + * @param fileFilter The {@link FileFilter filter} to apply while enumerating files. + * @param recursive Enables or disables recursive file listing. + */ public DirectorySampleStream(File[] dirs, FileFilter fileFilter, boolean recursive) { this.fileFilter = fileFilter; -isRecursiveScan = recursive; +this.recursive = recursive; List inputDirectoryList = new ArrayList<>(dirs.length); @@ -64,10 +70,17 @@ public class DirectorySampleStream implements ObjectStream { directories.addAll(inputDirectories); } + /** + * Creates a new directory sample stream. + * @param dir The {@link File directory}. + * @param fileFilter The {@link FileFilter filter} to apply while enumerating files. + * @param recursive Enables or disables recursive file listing. + */ public DirectorySampleStream(File dir, FileFilter fileFilter, boolean recursive) { this(new File[]{dir}, fileFilter, recursive); } + @Override public File read() throws IOException { while (textFiles.isEmpty() && !directories.isEmpty()) { @@ -86,7 +99,7 @@ public class DirectorySampleStream implements ObjectStream { if (file.isFile()) { textFiles.push(file); } -else if (isRecursiveScan && file.isDirectory()) { +else if (recursive && file.isDirectory()) { directories.push(file); } } @@ -100,6 +113,7 @@ public class DirectorySampleStream implements ObjectStream { } } + @Override public void reset() { directories.clear(); textFiles.clear(); @@ -107,6 +121,14 @@ public class DirectorySampleStream implements ObjectStream { directories.addAll(inputDirectories); } + /** + * {@inheritDoc} + * Calling this function has no effect on + * the stream. + */ + @Override public void close() throws IOException { + } + } http://git-wip-us.apache.org/repos/asf/opennlp/blob/27214015/opennlp-tools/src/test/java/opennlp/tools/formats/DirectorySampleStreamTest.java -- diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/DirectorySampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/DirectorySampleStreamTest.java new file mode 100644 index 000..d17188e --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/formats/DirectorySampleStreamTest.java @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information
opennlp git commit: OPENNLP-1023:Remove unused HashList class, this closes apache/opennlp#159
Repository: opennlp Updated Branches: refs/heads/master 4efd950d4 -> e76ba3694 OPENNLP-1023:Remove unused HashList class, this closes apache/opennlp#159 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/e76ba369 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/e76ba369 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/e76ba369 Branch: refs/heads/master Commit: e76ba36948d3e58d6d726162f27245ff4a347b85 Parents: 4efd950 Author: jzonthemtnAuthored: Sun Apr 16 16:22:28 2017 -0400 Committer: smarthi Committed: Sun Apr 16 16:28:24 2017 -0400 -- .../main/java/opennlp/tools/util/HashList.java | 92 1 file changed, 92 deletions(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/e76ba369/opennlp-tools/src/main/java/opennlp/tools/util/HashList.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/HashList.java b/opennlp-tools/src/main/java/opennlp/tools/util/HashList.java deleted file mode 100644 index e88a907..000 --- a/opennlp-tools/src/main/java/opennlp/tools/util/HashList.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package opennlp.tools.util; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.List; - -/** - * Class which creates mapping between keys and a list of values. - */ -@SuppressWarnings("unchecked") -public class HashList extends HashMap { - - private static final long serialVersionUID = 1; - - public HashList() { - } - - public Object get(Object key, int index) { -if (get(key) != null) { - return ((List) get(key)).get(index); -} -else { - return null; -} - } - - public Object putAll(Object key, Collection values) { -List o = (List) get(key); - -if (o == null) { - o = new ArrayList(); - super.put(key, o); -} - -o.addAll(values); - -if (o.size() == values.size()) - return null; -else - return o; - } - - @Override - public List put(Object key, Object value) { -List o = (List) get(key); - -if (o == null) { - o = new ArrayList(); - super.put(key, o); -} - -o.add(value); - -if (o.size() == 1) - return null; -else - return o; - } - - public boolean remove(Object key, Object value) { -List l = (List) get(key); -if (l == null) { - return false; -} -else { - boolean r = l.remove(value); - if (l.size() == 0) { -remove(key); - } - return r; -} - } -}
[25/50] [abbrv] opennlp git commit: OPENNLP-996:Remove heap memory settings from Opennlp-tools
OPENNLP-996:Remove heap memory settings from Opennlp-tools Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/81acc6e6 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/81acc6e6 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/81acc6e6 Branch: refs/heads/parser_regression Commit: 81acc6e69a7120b3f9644d54c30cae34b02b78f1 Parents: 82caa55 Author: smarthiAuthored: Sun Feb 26 12:56:04 2017 -0500 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:54 2017 +0200 -- opennlp-tools/pom.xml | 5 - 1 file changed, 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/81acc6e6/opennlp-tools/pom.xml -- diff --git a/opennlp-tools/pom.xml b/opennlp-tools/pom.xml index d2630c9..663e903 100644 --- a/opennlp-tools/pom.xml +++ b/opennlp-tools/pom.xml @@ -33,10 +33,6 @@ bundle Apache OpenNLP Tools - --Xmx4096m - - org.osgi @@ -81,7 +77,6 @@ org.apache.maven.plugins maven-surefire-plugin - @{argLine} /opennlp/tools/eval/**/*
[12/50] [abbrv] opennlp git commit: NoJira: Fix badge rendering
NoJira: Fix badge rendering Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/a2049d6f Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/a2049d6f Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/a2049d6f Branch: refs/heads/parser_regression Commit: a2049d6fa4715b681a8b1ab7fb70a2c8923f8975 Parents: 6ecc17e Author: smarthiAuthored: Tue Feb 7 22:51:13 2017 -0500 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:52 2017 +0200 -- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/a2049d6f/README.md -- diff --git a/README.md b/README.md index 2d31eb1..02b146a 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ Welcome to Apache OpenNLP! [![Build Status](https://api.travis-ci.org/apache/opennlp.svg?branch=master)](https://travis-ci.org/apache/opennlp) [![Coverage Status](https://coveralls.io/repos/github/apache/opennlp/badge.svg?branch=master)](https://coveralls.io/github/apache/opennlp?branch=master) -[![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.apache.opennlp/opennlp/badge.svg?style=plastic])](https://maven-badges.herokuapp.com/maven-central/org.apache.opennlp/opennlp) +[![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.apache.opennlp/opennlp/badge.svg?style=plastic)](https://maven-badges.herokuapp.com/maven-central/org.apache.opennlp/opennlp) [![Documentation Status](https://img.shields.io/:docs-latest-green.svg)](http://opennlp.apache.org/documentation.html) [![GitHub license](https://img.shields.io/badge/license-Apache%202-blue.svg)](https://raw.githubusercontent.com/apache/opennlp/master/LICENSE) [![Twitter Follow](https://img.shields.io/twitter/follow/ApacheOpennlp.svg?style=social)](https://twitter.com/ApacheOpenNLP)
[16/50] [abbrv] opennlp git commit: OPENNLP-990 Fix all array style violations and add a checkstyle rule
http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/parser/PosSampleStream.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/PosSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/parser/PosSampleStream.java index 1e90ecc..259d9f4 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/PosSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/PosSampleStream.java @@ -37,8 +37,8 @@ public class PosSampleStream extends FilterObjectStream{ Parse[] nodes = parse.getTagNodes(); - String toks[] = new String[nodes.length]; - String preds[] = new String[nodes.length]; + String[] toks = new String[nodes.length]; + String[] preds = new String[nodes.length]; for (int ti = 0; ti < nodes.length; ti++) { Parse tok = nodes[ti]; http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java index f103450..5f5eb25 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java +++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java @@ -99,7 +99,7 @@ public class POSDictionary implements Iterable, MutableTagDictionary { return dictionary.keySet().iterator(); } - private static String tagsToString(String tags[]) { + private static String tagsToString(String[] tags) { StringBuilder tagString = new StringBuilder(); http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/postag/POSEvaluator.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSEvaluator.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSEvaluator.java index 26cb79c..eaf6baf 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSEvaluator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSEvaluator.java @@ -57,8 +57,8 @@ public class POSEvaluator extends Evaluator { @Override protected POSSample processSample(POSSample reference) { -String predictedTags[] = tagger.tag(reference.getSentence(), reference.getAddictionalContext()); -String referenceTags[] = reference.getTags(); +String[] predictedTags = tagger.tag(reference.getSentence(), reference.getAddictionalContext()); +String[] referenceTags = reference.getTags(); for (int i = 0; i < referenceTags.length; i++) { if (referenceTags[i].equals(predictedTags[i])) { http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSample.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSSample.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSample.java index b1b2d32..9512e38 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSSample.java +++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSample.java @@ -37,7 +37,7 @@ public class POSSample { private final String[][] additionalContext; - public POSSample(String sentence[], String tags[]) { + public POSSample(String[] sentence, String[] tags) { this(sentence, tags, null); } @@ -66,7 +66,7 @@ public class POSSample { this.additionalContext = ac; } - public POSSample(String sentence[], String tags[], + public POSSample(String[] sentence, String[] tags, String[][] additionalContext) { this(Arrays.asList(sentence), Arrays.asList(tags), additionalContext); } @@ -120,10 +120,10 @@ public class POSSample { public static POSSample parse(String sentenceString) throws InvalidFormatException { -String tokenTags[] = WhitespaceTokenizer.INSTANCE.tokenize(sentenceString); +String[] tokenTags = WhitespaceTokenizer.INSTANCE.tokenize(sentenceString); -String sentence[] = new String[tokenTags.length]; -String tags[] = new String[tokenTags.length]; +String[] sentence = new String[tokenTags.length]; +String[] tags = new String[tokenTags.length]; for (int i = 0; i < tokenTags.length; i++) { int split = tokenTags[i].lastIndexOf("_"); http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleEventStream.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleEventStream.java index 4dd31e0..aa3c99d 100644 ---
[27/50] [abbrv] opennlp git commit: NoJira: Run jacoco during build and not afterwards
NoJira: Run jacoco during build and not afterwards Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/96107813 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/96107813 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/96107813 Branch: refs/heads/parser_regression Commit: 9610781359e2ffb9a5f09b0c94f3a56a24ca78bc Parents: 40cdacb Author: Jörn KottmannAuthored: Mon Feb 20 14:22:56 2017 +0100 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:54 2017 +0200 -- .travis.yml | 4 ++-- pom.xml | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/96107813/.travis.yml -- diff --git a/.travis.yml b/.travis.yml index 49d902e..b3399b2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,7 +20,7 @@ before_install: - export M2_HOME=$PWD/apache-maven-3.3.9 - export PATH=$M2_HOME/bin:$PATH -script: mvn clean install +script: mvn clean install -Pjacoco after_success: - - mvn clean test -Pjacoco jacoco:report coveralls:report + - mvn jacoco:report coveralls:report http://git-wip-us.apache.org/repos/asf/opennlp/blob/96107813/pom.xml -- diff --git a/pom.xml b/pom.xml index 8e37452..45d3c37 100644 --- a/pom.xml +++ b/pom.xml @@ -214,9 +214,6 @@ org.eluder.coveralls coveralls-maven-plugin ${coveralls.maven.plugin} - - BD8e0j90KZlQdko7H3wEo5a0mTLhmoeyk - @@ -397,6 +394,9 @@ jacoco + + 1 +
[02/50] [abbrv] opennlp git commit: NoJira: Fix Coveralls Report, this closes apache/opennlp#116
NoJira: Fix Coveralls Report, this closes apache/opennlp#116 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/6ecc17e8 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/6ecc17e8 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/6ecc17e8 Branch: refs/heads/parser_regression Commit: 6ecc17e88b096cd7a12f65b869d9ce6a9444727e Parents: 6f33261 Author: smarthiAuthored: Tue Feb 7 22:46:28 2017 -0500 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:51 2017 +0200 -- README.md | 5 ++--- opennlp-tools/pom.xml | 10 +++--- 2 files changed, 9 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/6ecc17e8/README.md -- diff --git a/README.md b/README.md index faff141..2d31eb1 100644 --- a/README.md +++ b/README.md @@ -20,10 +20,9 @@ Welcome to Apache OpenNLP! [![Build Status](https://api.travis-ci.org/apache/opennlp.svg?branch=master)](https://travis-ci.org/apache/opennlp) [![Coverage Status](https://coveralls.io/repos/github/apache/opennlp/badge.svg?branch=master)](https://coveralls.io/github/apache/opennlp?branch=master) +[![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.apache.opennlp/opennlp/badge.svg?style=plastic])](https://maven-badges.herokuapp.com/maven-central/org.apache.opennlp/opennlp) [![Documentation Status](https://img.shields.io/:docs-latest-green.svg)](http://opennlp.apache.org/documentation.html) [![GitHub license](https://img.shields.io/badge/license-Apache%202-blue.svg)](https://raw.githubusercontent.com/apache/opennlp/master/LICENSE) -[![GitHub forks](https://img.shields.io/github/forks/apache/opennlp.svg)](https://github.com/apache/opennlp/network) -[![GitHub stars](https://img.shields.io/github/stars/apache/opennlp.svg)](https://github.com/apache/opennlp/stargazers) [![Twitter Follow](https://img.shields.io/twitter/follow/ApacheOpennlp.svg?style=social)](https://twitter.com/ApacheOpenNLP) The Apache OpenNLP library is a machine learning based toolkit for the processing of natural language text. @@ -38,7 +37,7 @@ well as the annotated text resources that those models are derived from. For additional information about OpenNLP, visit the [OpenNLP Home Page](http://opennlp.apache.org/) -Documentation for OpenNLP, including JavaDocs, code usage and command line interface are available[here](http://opennlp.apache.org/documentation.html) +Documentation for OpenNLP, including JavaDocs, code usage and command line interface are available [here](http://opennlp.apache.org/documentation.html) Using OpenNLP as a Library Running any application that uses OpenNLP will require installing a binary or source version and setting the environment. http://git-wip-us.apache.org/repos/asf/opennlp/blob/6ecc17e8/opennlp-tools/pom.xml -- diff --git a/opennlp-tools/pom.xml b/opennlp-tools/pom.xml index 22fc017..c7e9624 100644 --- a/opennlp-tools/pom.xml +++ b/opennlp-tools/pom.xml @@ -33,6 +33,10 @@ bundle Apache OpenNLP Tools + +-Xmx4096m + + org.osgi @@ -41,7 +45,7 @@ provided true - + org.osgi org.osgi.compendium @@ -49,7 +53,7 @@ provided true - + junit junit @@ -77,7 +81,7 @@ org.apache.maven.plugins maven-surefire-plugin - -Xmx4096m + @{argLine} /opennlp/tools/eval/**/*
[49/50] [abbrv] opennlp git commit: OPENNLP-1016: Add more tests for StringList
OPENNLP-1016: Add more tests for StringList Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/5eb8ff8d Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/5eb8ff8d Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/5eb8ff8d Branch: refs/heads/parser_regression Commit: 5eb8ff8deb442ece89fad5f14368c6cbe10772d4 Parents: a1ced40 Author: kojiAuthored: Mon Apr 10 14:16:34 2017 +0900 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:57 2017 +0200 -- .../test/java/opennlp/tools/util/StringListTest.java | 15 ++- 1 file changed, 14 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/5eb8ff8d/opennlp-tools/src/test/java/opennlp/tools/util/StringListTest.java -- diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/StringListTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/StringListTest.java index a57a2ae..d8c7ca2 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/util/StringListTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/util/StringListTest.java @@ -28,6 +28,16 @@ import org.junit.Test; public class StringListTest { /** + * Tests {@link StringList} which uses {@link String#intern}. + */ + @Test + public void testIntern() { +StringList l1 = new StringList("a"); +StringList l2 = new StringList("a", "b"); +Assert.assertTrue(l1.getToken(0) == l2.getToken(0)); + } + + /** * Tests {@link StringList#getToken(int)}. */ @Test @@ -90,6 +100,8 @@ public class StringListTest { public void testHashCode() { Assert.assertEquals(new StringList("a", "b").hashCode(), new StringList("a", "b").hashCode()); +Assert.assertNotEquals(new StringList("a", "b").hashCode(), +new StringList("a", "c").hashCode()); } /** @@ -97,6 +109,7 @@ public class StringListTest { */ @Test public void testToString() { -new StringList("a", "b").toString(); +Assert.assertEquals("[a]", new StringList("a").toString()); +Assert.assertEquals("[a,b]", new StringList("a", "b").toString()); } }
[19/50] [abbrv] opennlp git commit: NoJira: Adding public RepoToken to investigate Travis coveralls build failures, this closes apache/opennlp#128
NoJira: Adding public RepoToken to investigate Travis coveralls build failures, this closes apache/opennlp#128 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/fc3b12fa Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/fc3b12fa Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/fc3b12fa Branch: refs/heads/parser_regression Commit: fc3b12fa42c58b1065a62150cb4831ad56ec Parents: 41f153a Author: smarthiAuthored: Thu Feb 16 00:17:53 2017 -0500 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:53 2017 +0200 -- .travis.yml | 6 ++ pom.xml | 3 +++ 2 files changed, 9 insertions(+) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/fc3b12fa/.travis.yml -- diff --git a/.travis.yml b/.travis.yml index b4c83ad..49d902e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,12 @@ jdk: oraclejdk8 sudo: false +env: + global: + # The next declaration is the encrypted COVERITY_SCAN_TOKEN, created + # via the "travis encrypt" command using the project repo's public key + - secure: "WLRKO/tD2rFN+a/HKSf9iZkaMaFE8/luXcJCXGfewoHysF7LgIJ76AN9HY50woVJykl+T/tEhK5c/+H/IKO5zH8Rvz/Q9XxPTvUTOwH7oFOHCQ66mKTvn27Z4fp+JbkPKJuhWDUzPvS/Alo3wE70UELnFRTFoRsemfNNa95uPJobfx5deOfX80mipHOn16dA1q8LuzQa6iF2HIVuh7ygLleTV0cDJyXmIlg3EbKGEitozIv0WkwALrBjLS7KmCcXTKxXqCm1Be2MFRoh9ab2bEooXlv2zRh2wT0c04RckFm1AJGpGQelXLl3NxxcRJSpIN9OTkpVUfwm28TIXk2SzdgPMrP11yFK/DPKTv0jwyk1bFrmZMMso5Y2rP6wjNEtw5ExYSpk3xebcieLJwXhCwkkWAT3DdAAeXO5z4Nf36lryjRgqvlsVF1ofqAK5Sh+qH93/TJOE+hVEj74xUT9pVaxemY61ymvSt8L21XkUsp8T5ILq9jWoaMQCaAwZIaJiHXYjQhmsrFRkNaY4cl9AUGwpHmm750uqhmoVfuJzQg5/vGMZ0LWeCgR9qsG5MG0yijE8ghExUOe7R4gcNAJW2XOfjzMTy74jdsJbsJPUeci/R4wzrXTSCQVJ5nj2LhBF6HyqPyUrIV2MB14gAIItc1LASuB1GLkGoXjIdt0HN8=" + cache: directories: - $HOME/.m2 http://git-wip-us.apache.org/repos/asf/opennlp/blob/fc3b12fa/pom.xml -- diff --git a/pom.xml b/pom.xml index 268a54e..8e37452 100644 --- a/pom.xml +++ b/pom.xml @@ -214,6 +214,9 @@ org.eluder.coveralls coveralls-maven-plugin ${coveralls.maven.plugin} + + BD8e0j90KZlQdko7H3wEo5a0mTLhmoeyk +
[21/50] [abbrv] opennlp git commit: NoJira: Add checkstyle for new lines and fix existing files
NoJira: Add checkstyle for new lines and fix existing files This closes #123 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/73cf5600 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/73cf5600 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/73cf5600 Branch: refs/heads/parser_regression Commit: 73cf5600206d225f46c17797793f2e2b77e7d422 Parents: 91352d5 Author: jzonthemtnAuthored: Mon Feb 13 15:20:30 2017 -0500 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:53 2017 +0200 -- checkstyle.xml| 2 ++ .../java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java | 3 ++- .../src/main/java/opennlp/tools/chunker/package-info.java | 2 +- .../main/java/opennlp/tools/cmdline/AbstractEvaluatorTool.java| 2 +- .../src/main/java/opennlp/tools/cmdline/BasicCmdLineTool.java | 2 +- .../src/main/java/opennlp/tools/cmdline/ObjectStreamFactory.java | 2 +- .../java/opennlp/tools/cmdline/chunker/ChunkerConverterTool.java | 2 +- .../java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java | 2 +- .../main/java/opennlp/tools/cmdline/lemmatizer/package-info.java | 2 +- .../main/java/opennlp/tools/cmdline/params/BasicFormatParams.java | 2 +- .../tools/cmdline/sentdetect/SentenceDetectorConverterTool.java | 2 +- .../opennlp/tools/cmdline/tokenizer/TokenizerConverterTool.java | 2 +- .../src/main/java/opennlp/tools/dictionary/package-info.java | 2 +- .../src/main/java/opennlp/tools/doccat/package-info.java | 2 +- .../java/opennlp/tools/formats/AbstractSampleStreamFactory.java | 2 +- .../java/opennlp/tools/formats/ChunkerSampleStreamFactory.java| 2 +- .../opennlp/tools/formats/DetokenizerSampleStreamFactory.java | 2 +- .../java/opennlp/tools/formats/LemmatizerSampleStreamFactory.java | 2 +- .../main/java/opennlp/tools/formats/ParseSampleStreamFactory.java | 2 +- .../java/opennlp/tools/formats/SentenceSampleStreamFactory.java | 2 +- .../main/java/opennlp/tools/formats/TokenSampleStreamFactory.java | 2 +- .../tools/formats/ontonotes/OntoNotesNameSampleStream.java| 2 +- .../src/main/java/opennlp/tools/formats/package-info.java | 2 +- .../src/main/java/opennlp/tools/languagemodel/package-info.java | 2 +- .../src/main/java/opennlp/tools/lemmatizer/package-info.java | 2 +- .../java/opennlp/tools/namefind/NameFinderSequenceValidator.java | 2 +- .../src/main/java/opennlp/tools/namefind/package-info.java| 2 +- opennlp-tools/src/main/java/opennlp/tools/ngram/package-info.java | 2 +- opennlp-tools/src/main/java/opennlp/tools/package-info.java | 2 +- opennlp-tools/src/main/java/opennlp/tools/parser/HeadRules.java | 2 +- .../java/opennlp/tools/parser/ParserChunkerSequenceValidator.java | 2 +- .../src/main/java/opennlp/tools/parser/ParserEventTypeEnum.java | 2 +- .../src/main/java/opennlp/tools/parser/chunking/package-info.java | 2 +- .../src/main/java/opennlp/tools/parser/package-info.java | 2 +- .../java/opennlp/tools/parser/treeinsert/ParserEventStream.java | 2 +- .../main/java/opennlp/tools/parser/treeinsert/package-info.java | 2 +- .../src/main/java/opennlp/tools/postag/package-info.java | 2 +- .../src/main/java/opennlp/tools/sentdetect/package-info.java | 2 +- .../src/main/java/opennlp/tools/tokenize/package-info.java| 2 +- .../src/main/java/opennlp/tools/util/CollectionObjectStream.java | 2 +- .../src/main/java/opennlp/tools/util/InputStreamFactory.java | 2 +- .../src/main/java/opennlp/tools/util/ext/package-info.java| 2 +- .../opennlp/tools/util/featuregen/BigramNameFeatureGenerator.java | 2 +- .../src/main/java/opennlp/tools/util/featuregen/package-info.java | 2 +- .../main/java/opennlp/tools/util/model/DictionarySerializer.java | 2 +- .../main/java/opennlp/tools/util/model/PropertiesSerializer.java | 2 +- opennlp-tools/src/main/java/opennlp/tools/util/package-info.java | 2 +- .../src/test/java/opennlp/tools/dictionary/DictionaryTest.java| 2 +- .../java/opennlp/tools/formats/Conll03NameSampleStreamTest.java | 2 +- .../test/java/opennlp/tools/ml/maxent/ScaleDoesntMatterTest.java | 2 +- .../opennlp/tools/ml/maxent/io/RealValueFileEventStreamTest.java | 2 +- .../opennlp/tools/ml/naivebayes/NaiveBayesModelReadWriteTest.java | 2 +- .../java/opennlp/tools/namefind/DictionaryNameFinderTest.java | 2 +- .../src/test/java/opennlp/tools/namefind/RegexNameFinderTest.java | 2 +- .../src/test/java/opennlp/tools/postag/DummyPOSTaggerFactory.java | 2 +- .../src/test/java/opennlp/tools/postag/POSModelTest.java | 2 +- .../src/test/java/opennlp/tools/postag/POSSampleTest.java | 2 +- .../src/test/java/opennlp/tools/tokenize/SimpleTokenizerTest.java | 2 +-
[15/50] [abbrv] opennlp git commit: OPENNLP-983: Make suffix/prefix length configurable
OPENNLP-983: Make suffix/prefix length configurable This closes #121 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/1cd2658d Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/1cd2658d Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/1cd2658d Branch: refs/heads/parser_regression Commit: 1cd2658d0179afcf982229fab9c24da62f944c58 Parents: fc3b12f Author: jzonthemtnAuthored: Mon Feb 13 07:57:21 2017 -0500 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:53 2017 +0200 -- .../tools/util/featuregen/GeneratorFactory.java | 22 - .../util/featuregen/PrefixFeatureGenerator.java | 32 +-- .../util/featuregen/SuffixFeatureGenerator.java | 33 +-- .../featuregen/PrefixFeatureGeneratorTest.java | 92 .../featuregen/SuffixFeatureGeneratorTest.java | 92 5 files changed, 251 insertions(+), 20 deletions(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/1cd2658d/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java index fa97f43..ef08cfb 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java @@ -555,7 +555,16 @@ public class GeneratorFactory { public AdaptiveFeatureGenerator create(Element generatorElement, FeatureGeneratorResourceProvider resourceManager) { - return new PrefixFeatureGenerator(); + + String attribute = generatorElement.getAttribute("length"); + + int prefixLength = PrefixFeatureGenerator.DEFAULT_MAX_LENGTH; + + if (!Objects.equals(attribute, "")) { +prefixLength = Integer.parseInt(attribute); + } + + return new PrefixFeatureGenerator(prefixLength); } static void register(Map factoryMap) { @@ -570,7 +579,16 @@ public class GeneratorFactory { public AdaptiveFeatureGenerator create(Element generatorElement, FeatureGeneratorResourceProvider resourceManager) { - return new SuffixFeatureGenerator(); + + String attribute = generatorElement.getAttribute("length"); + + int suffixLength = SuffixFeatureGenerator.DEFAULT_MAX_LENGTH; + + if (!Objects.equals(attribute, "")) { +suffixLength = Integer.parseInt(attribute); + } + + return new SuffixFeatureGenerator(suffixLength); } static void register(Map factoryMap) { http://git-wip-us.apache.org/repos/asf/opennlp/blob/1cd2658d/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java index 8cdd48f..04fcd15 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java @@ -21,21 +21,35 @@ import java.util.List; public class PrefixFeatureGenerator implements AdaptiveFeatureGenerator { - private static final int PREFIX_LENGTH = 4; - - private static String[] getPrefixes(String lex) { -String[] prefs = new String[PREFIX_LENGTH]; -for (int li = 0; li < PREFIX_LENGTH; li++) { - prefs[li] = lex.substring(0, Math.min(li + 1, lex.length())); -} -return prefs; + static final int DEFAULT_MAX_LENGTH = 4; + + private final int prefixLength; + + public PrefixFeatureGenerator() { +prefixLength = DEFAULT_MAX_LENGTH; + } + + public PrefixFeatureGenerator(int prefixLength) { +this.prefixLength = prefixLength; } + @Override public void createFeatures(List features, String[] tokens, int index, String[] previousOutcomes) { -String[] prefs = PrefixFeatureGenerator.getPrefixes(tokens[index]); +String[] prefs = getPrefixes(tokens[index]); for (String pref : prefs) { features.add("pre=" + pref); } } + + private String[] getPrefixes(String lex) { + +int prefixes = Math.min(prefixLength, lex.length()); + +String[] prefs = new String[prefixes]; +for (int li = 0; li < prefixes; li++) { + prefs[li] = lex.substring(0, Math.min(li + 1, lex.length())); +} +return prefs;
[33/50] [abbrv] opennlp git commit: OPENNLP-997: Exclude the generated stemmer code from the coverage report, this closes apache/opennlp#135
OPENNLP-997: Exclude the generated stemmer code from the coverage report, this closes apache/opennlp#135 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/20d0a76f Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/20d0a76f Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/20d0a76f Branch: refs/heads/parser_regression Commit: 20d0a76fe092993c25abf7aa3dfce34bcb72db5f Parents: 76609f5 Author: smarthiAuthored: Tue Feb 28 08:28:05 2017 -0500 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:55 2017 +0200 -- pom.xml | 14 -- 1 file changed, 12 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/20d0a76f/pom.xml -- diff --git a/pom.xml b/pom.xml index 45d3c37..12c9ee6 100644 --- a/pom.xml +++ b/pom.xml @@ -126,7 +126,7 @@ 2.17 1.0C 4.3.0 - 0.7.8 + 0.7.9 2.19.1 @@ -185,7 +185,13 @@ org.jacoco jacoco-maven-plugin - 0.7.8 + ${jacoco.maven.plugin} + + +**/stemmer/* + **/stemmer/snowball/* + + jacoco-prepare-agent @@ -222,6 +228,10 @@ ${maven.surefire.plugin} ${opennlp.forkCount} + + **/stemmer/* + **/stemmer/snowball/* +
[20/50] [abbrv] opennlp git commit: OPENNLP-986 - Stupid Backoff as default LM discounting
OPENNLP-986 - Stupid Backoff as default LM discounting Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/41f153aa Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/41f153aa Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/41f153aa Branch: refs/heads/parser_regression Commit: 41f153aa946641afb076b86c243a7b93667778f6 Parents: 73cf560 Author: Tommaso TeofiliAuthored: Tue Feb 14 14:49:09 2017 +0100 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:53 2017 +0200 -- .../tools/languagemodel/NGramLanguageModel.java | 74 +--- .../java/opennlp/tools/ngram/NGramUtils.java| 3 +- .../LanguageModelEvaluationTest.java| 2 +- .../languagemodel/NgramLanguageModelTest.java | 15 ++-- 4 files changed, 28 insertions(+), 66 deletions(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/41f153aa/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java b/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java index e11c107..501c1bc 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java @@ -26,52 +26,30 @@ import opennlp.tools.util.StringList; /** * A {@link opennlp.tools.languagemodel.LanguageModel} based on a {@link opennlp.tools.ngram.NGramModel} - * using Laplace smoothing probability estimation to get the probabilities of the ngrams. - * See also {@link NGramUtils#calculateLaplaceSmoothingProbability( - *opennlp.tools.util.StringList, Iterable, int, Double)}. + * using Stupid Backoff to get the probabilities of the ngrams. */ public class NGramLanguageModel extends NGramModel implements LanguageModel { private static final int DEFAULT_N = 3; - private static final double DEFAULT_K = 1d; private final int n; - private final double k; public NGramLanguageModel() { -this(DEFAULT_N, DEFAULT_K); +this(DEFAULT_N); } public NGramLanguageModel(int n) { -this(n, DEFAULT_K); - } - - public NGramLanguageModel(double k) { -this(DEFAULT_N, k); - } - - public NGramLanguageModel(int n, double k) { this.n = n; -this.k = k; } public NGramLanguageModel(InputStream in) throws IOException { -this(in, DEFAULT_N, DEFAULT_K); - } - - public NGramLanguageModel(InputStream in, double k) throws IOException { -this(in, DEFAULT_N, k); - } - - public NGramLanguageModel(InputStream in, int n) throws IOException { -this(in, n, DEFAULT_K); +this(in, DEFAULT_N); } - public NGramLanguageModel(InputStream in, int n, double k) + public NGramLanguageModel(InputStream in, int n) throws IOException { super(in); this.n = n; -this.k = k; } @Override @@ -79,24 +57,13 @@ public class NGramLanguageModel extends NGramModel implements LanguageModel { double probability = 0d; if (size() > 0) { for (StringList ngram : NGramUtils.getNGrams(sample, n)) { -StringList nMinusOneToken = NGramUtils -.getNMinusOneTokenFirst(ngram); -if (size() > 100) { - // use stupid backoff - probability += Math.log( - getStupidBackoffProbability(ngram, nMinusOneToken)); -} else { - // use laplace smoothing - probability += Math.log( - getLaplaceSmoothingProbability(ngram, nMinusOneToken)); +double score = stupidBackoff(ngram); +probability += Math.log(score); +if (Double.isNaN(probability)) { + probability = 0d; } } - if (Double.isNaN(probability)) { -probability = 0d; - } else if (probability != 0) { -probability = Math.exp(probability); - } - + probability = Math.exp(probability); } return probability; } @@ -125,24 +92,21 @@ public class NGramLanguageModel extends NGramModel implements LanguageModel { return token; } - private double getLaplaceSmoothingProbability(StringList ngram, -StringList nMinusOneToken) { -return (getCount(ngram) + k) / (getCount(nMinusOneToken) + k * size()); - } - - private double getStupidBackoffProbability(StringList ngram, - StringList nMinusOneToken) { + private double stupidBackoff(StringList ngram) { int count = getCount(ngram); +StringList nMinusOneToken = NGramUtils.getNMinusOneTokenFirst(ngram); if (nMinusOneToken == null || nMinusOneToken.size()
[41/50] [abbrv] opennlp git commit: OPENNLP-1004: Write a test case for the BilouCodec class
OPENNLP-1004: Write a test case for the BilouCodec class This closes #142 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/bc99b72f Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/bc99b72f Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/bc99b72f Branch: refs/heads/parser_regression Commit: bc99b72feaa8e3416feaa5df59c6198bfe7dbd8b Parents: 5c9f6ab Author: Peter ThygesenAuthored: Wed Mar 15 18:54:26 2017 +0100 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:56 2017 +0200 -- .../opennlp/tools/namefind/BilouCodecTest.java | 209 +++ 1 file changed, 209 insertions(+) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/bc99b72f/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java -- diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java new file mode 100644 index 000..96d939f --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java @@ -0,0 +1,209 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.namefind; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.junit.Assert; +import org.junit.Test; + +import opennlp.tools.util.Span; + +/** + * This is the test class for {@link BilouCodec}. + */ +public class BilouCodecTest { + + private static final BilouCodec codec = new BilouCodec(); + + private static final String A_TYPE = "atype"; + private static final String A_START = A_TYPE + "-" + BilouCodec.START; + private static final String A_CONTINUE = A_TYPE + "-" + BilouCodec.CONTINUE; + private static final String A_LAST = A_TYPE + "-" + BilouCodec.LAST; + private static final String A_UNIT = A_TYPE + "-" + BilouCodec.UNIT; + + private static final String B_TYPE = "btype"; + private static final String B_START = B_TYPE + "-" + BilouCodec.START; + private static final String B_CONTINUE = B_TYPE + "-" + BilouCodec.CONTINUE; + private static final String B_LAST = B_TYPE + "-" + BilouCodec.LAST; + private static final String B_UNIT = B_TYPE + "-" + BilouCodec.UNIT; + + private static final String C_TYPE = "ctype"; + private static final String C_UNIT = C_TYPE + "-" + BilouCodec.UNIT; + + private static final String OTHER = BilouCodec.OTHER; + + @Test + public void testEncodeNoNames() { +NameSample nameSample = new NameSample("Once upon a time.".split(" "), new Span[] {}, true); +String[] expected = new String[] {OTHER, OTHER, OTHER, OTHER}; +String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length); +Assert.assertArrayEquals("Only 'Other' is expected.", expected, acutal); + } + + @Test + public void testEncodeSingleUnitTokenSpan() { +String[] sentence = "I called Julie again.".split(" "); +Span[] singleSpan = new Span[] { new Span(2,3, A_TYPE)}; +NameSample nameSample = new NameSample(sentence, singleSpan, true); +String[] expected = new String[] {OTHER, OTHER, A_UNIT, OTHER}; +String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length); +Assert.assertArrayEquals("'Julie' should be 'unit' only, the rest should be 'other'.", expected, acutal); + } + + @Test + public void testEncodeDoubleTokenSpan() { +String[] sentence = "I saw Stefanie Schmidt today.".split(" "); +Span[] singleSpan = new Span[] { new Span(2,4, A_TYPE)}; +NameSample nameSample = new NameSample(sentence, singleSpan, true); +String[] expected = new String[] {OTHER, OTHER, A_START, A_LAST, OTHER}; +String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length); +Assert.assertArrayEquals("'Stefanie' should be 'start' only, 'Schmidt' is 'last' " + +"and the rest should be 'other'.", expected, acutal); + } + + @Test + public void
[42/50] [abbrv] opennlp git commit: OPENNLP-1005: Implement areOutcomesCompatible for BilouCodec
OPENNLP-1005: Implement areOutcomesCompatible for BilouCodec This issue closes #144 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/36de0131 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/36de0131 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/36de0131 Branch: refs/heads/parser_regression Commit: 36de0131947d98e8246ef0fcf8eaf56d546d27b4 Parents: bc99b72 Author: Peter ThygesenAuthored: Thu Mar 16 11:19:58 2017 +0100 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:56 2017 +0200 -- .../java/opennlp/tools/namefind/BilouCodec.java | 61 +++ .../opennlp/tools/namefind/BilouCodecTest.java | 375 +++ 2 files changed, 436 insertions(+) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/36de0131/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java index 7e8508a..50cc4bf 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java @@ -19,7 +19,9 @@ package opennlp.tools.namefind; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashSet; import java.util.List; +import java.util.Set; import opennlp.tools.util.SequenceCodec; import opennlp.tools.util.SequenceValidator; @@ -111,8 +113,67 @@ public class BilouCodec implements SequenceCodec { return new BilouNameFinderSequenceValidator(); } + /** + * B requires CL or L + * C requires BL + * L requires B + * O requires any valid combo/unit + * U requires none + * + * @param outcomes all possible model outcomes + * + * @return true, if model outcomes are compatible + */ @Override public boolean areOutcomesCompatible(String[] outcomes) { +Set start = new HashSet<>(); +Set cont = new HashSet<>(); +Set last = new HashSet<>(); +Set unit = new HashSet<>(); + +for (int i = 0; i < outcomes.length; i++) { + String outcome = outcomes[i]; + if (outcome.endsWith(BilouCodec.START)) { +start.add(outcome.substring(0, outcome.length() +- BilouCodec.START.length())); + } else if (outcome.endsWith(BilouCodec.CONTINUE)) { +cont.add(outcome.substring(0, outcome.length() +- BilouCodec.CONTINUE.length())); + } else if (outcome.endsWith(BilouCodec.LAST)) { +last.add(outcome.substring(0, outcome.length() +- BilouCodec.LAST.length())); + } else if (outcome.endsWith(BilouCodec.UNIT)) { +unit.add(outcome.substring(0, outcome.length() +- BilouCodec.UNIT.length())); + } else if (!outcome.equals(BilouCodec.OTHER)) { +return false; + } +} + +if (start.size() == 0 && unit.size() == 0) { + return false; +} else { + // Start, must have matching Last + for (String startPrefix : start) { +if (!last.contains(startPrefix)) { + return false; +} + } + // Cont, must have matching Start and Last + for (String contPrefix : cont) { +if (!start.contains(contPrefix) && !last.contains(contPrefix)) { + return false; +} + } + // Last, must have matching Start + for (String lastPrefix : last) { +if (!start.contains(lastPrefix)) { + return false; +} + } + +} + return true; } } http://git-wip-us.apache.org/repos/asf/opennlp/blob/36de0131/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java -- diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java index 96d939f..353c7e4 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java @@ -206,4 +206,379 @@ public class BilouCodecTest { Assert.assertArrayEquals(expected, actual); } + + @Test + public void testCompatibilityEmpty() { +Assert.assertFalse(codec.areOutcomesCompatible(new String[] {})); + } + + /** + * Singles and singles in combination with other valid type (unit/start+last) + */ + + /** + * B-Start => Fail + * A-Unit, B-Start => Fail + * A-Start, A-Last, B-Start => Fail + */ + @Test + public void testCompatibilitySinglesStart() { +Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_START})); +Assert.assertFalse(codec.areOutcomesCompatible(new
[18/50] [abbrv] opennlp git commit: OPENNLP-176: Switch language codes to ISO-639-3
OPENNLP-176: Switch language codes to ISO-639-3 This closes #114 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/eee42316 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/eee42316 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/eee42316 Branch: refs/heads/parser_regression Commit: eee423166308c454dc176d2d52b12c29e2a08f19 Parents: fdff127 Author: Jörn KottmannAuthored: Sun Jan 29 11:06:08 2017 +0100 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:53 2017 +0200 -- .../cmdline/namefind/CensusDictionaryCreatorTool.java | 2 +- .../opennlp/tools/cmdline/parser/ParserTrainerTool.java | 4 ++-- .../tools/formats/AbstractSampleStreamFactory.java| 2 +- .../tools/formats/Conll03NameSampleStreamFactory.java | 6 +++--- .../main/java/opennlp/tools/sentdetect/lang/Factory.java | 10 +- 5 files changed, 12 insertions(+), 12 deletions(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/eee42316/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java index 6042510..f9bf5e0 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java @@ -50,7 +50,7 @@ public class CensusDictionaryCreatorTool extends BasicCmdLineTool { interface Parameters { @ParameterDescription(valueName = "code") -@OptionalParameter(defaultValue = "en") +@OptionalParameter(defaultValue = "eng") String getLang(); @ParameterDescription(valueName = "charsetName") http://git-wip-us.apache.org/repos/asf/opennlp/blob/eee42316/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java index 3a8dd5a..2709fd5 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java @@ -90,10 +90,10 @@ public final class ParserTrainerTool extends AbstractTrainerTool
[37/50] [abbrv] opennlp git commit: OPENNLP-1000: Add a test case for the BilouNameFinderSequenceValidator
OPENNLP-1000: Add a test case for the BilouNameFinderSequenceValidator This closes #139 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/5c9f6ab3 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/5c9f6ab3 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/5c9f6ab3 Branch: refs/heads/parser_regression Commit: 5c9f6ab305ae806580f720fb738dac125e42e0b5 Parents: 17493d1 Author: Peter ThygesenAuthored: Mon Mar 13 00:49:49 2017 +0100 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:56 2017 +0200 -- .../BilouNameFinderSequenceValidator.java | 23 +- .../BilouNameFinderSequenceValidatorTest.java | 435 +++ 2 files changed, 449 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/5c9f6ab3/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouNameFinderSequenceValidator.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouNameFinderSequenceValidator.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouNameFinderSequenceValidator.java index 6e73504..19700fb 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouNameFinderSequenceValidator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouNameFinderSequenceValidator.java @@ -23,19 +23,22 @@ public class BilouNameFinderSequenceValidator implements SequenceValidator { public boolean validSequence(int i, String[] inputSequence, - String[] outcomesSequence, String outcome) { + String[] outcomesSequence, String outcome) { -if (outcome.endsWith(NameFinderME.CONTINUE) || outcome.endsWith(BilouCodec.LAST)) { +if (outcome.endsWith(BilouCodec.CONTINUE) || outcome.endsWith(BilouCodec.LAST)) { int li = outcomesSequence.length - 1; if (li == -1) { return false; - } else if (outcomesSequence[li].endsWith(NameFinderME.OTHER) || + } else if (outcomesSequence[li].endsWith(BilouCodec.OTHER) || outcomesSequence[li].endsWith(BilouCodec.UNIT)) { return false; - } else if (outcomesSequence[li].endsWith(NameFinderME.CONTINUE) || - outcomesSequence[li].endsWith(NameFinderME.START)) { + } else if (outcomesSequence[li].endsWith(BilouCodec.LAST) && + (outcome.endsWith(BilouCodec.CONTINUE) || outcome.endsWith(BilouCodec.LAST))) { +return false; + } else if (outcomesSequence[li].endsWith(BilouCodec.CONTINUE) || + outcomesSequence[li].endsWith(BilouCodec.START)) { // if it is continue, we have to check if previous match was of the same type String previousNameType = NameFinderME.extractNameType(outcomesSequence[li]); String nameType = NameFinderME.extractNameType(outcome); @@ -50,10 +53,12 @@ public class BilouNameFinderSequenceValidator implements } } -if (outcomesSequence.length - 1 > 0) { - if (outcome.endsWith(NameFinderME.OTHER)) { -if (outcomesSequence[outcomesSequence.length - 1].endsWith(NameFinderME.START) -|| outcomesSequence[outcomesSequence.length - 1].endsWith(NameFinderME.CONTINUE)) { +if (outcomesSequence.length > 0) { + if (outcome.endsWith(BilouCodec.START) + || outcome.endsWith(BilouCodec.OTHER) + || outcome.endsWith(BilouCodec.UNIT)) { +if (outcomesSequence[outcomesSequence.length - 1].endsWith(BilouCodec.START) +|| outcomesSequence[outcomesSequence.length - 1].endsWith(BilouCodec.CONTINUE)) { return false; } } http://git-wip-us.apache.org/repos/asf/opennlp/blob/5c9f6ab3/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouNameFinderSequenceValidatorTest.java -- diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouNameFinderSequenceValidatorTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouNameFinderSequenceValidatorTest.java new file mode 100644 index 000..a234beb --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouNameFinderSequenceValidatorTest.java @@ -0,0 +1,435 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless
[26/50] [abbrv] opennlp git commit: OPENNLP-982: Allow loading of 1.5.x models
OPENNLP-982: Allow loading of 1.5.x models This closes #129 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/ebb5b248 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/ebb5b248 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/ebb5b248 Branch: refs/heads/parser_regression Commit: ebb5b24851706ec3684d4b0ddf4e35542984dfc5 Parents: eee4231 Author: Jörn KottmannAuthored: Fri Feb 17 12:32:11 2017 +0100 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:54 2017 +0200 -- .../src/main/java/opennlp/tools/util/model/BaseModel.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/ebb5b248/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java b/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java index 20acd9d..f70fb03 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java @@ -416,7 +416,7 @@ public abstract class BaseModel implements ArtifactProvider, Serializable { // Major and minor version must match, revision might be // this check allows for the use of models of n minor release behind current minor release if (Version.currentVersion().getMajor() != version.getMajor() || -Version.currentVersion().getMinor() - 2 > version.getMinor()) { +Version.currentVersion().getMinor() - 3 > version.getMinor()) { throw new InvalidFormatException("Model version " + version + " is not supported by this (" + Version.currentVersion() + ") version of OpenNLP!"); }
[09/50] [abbrv] opennlp git commit: OPENNLP-984: Remove type parameter from POS Tagger Trainer cli
OPENNLP-984: Remove type parameter from POS Tagger Trainer cli Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/672f1b09 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/672f1b09 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/672f1b09 Branch: refs/heads/parser_regression Commit: 672f1b09dfcb1c854f930b6f1c7911e41141e1fa Parents: cd23b58 Author: Jörn KottmannAuthored: Sat Feb 11 00:20:45 2017 +0100 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:52 2017 +0200 -- .../cmdline/postag/POSTaggerTrainerTool.java| 25 .../tools/cmdline/postag/TrainingParams.java| 6 - 2 files changed, 31 deletions(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/672f1b09/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java index 4a78602..1e6fb54 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java @@ -34,8 +34,6 @@ import opennlp.tools.postag.POSTaggerFactory; import opennlp.tools.postag.POSTaggerME; import opennlp.tools.postag.TagDictionary; import opennlp.tools.util.InvalidFormatException; -import opennlp.tools.util.TrainingParameters; -import opennlp.tools.util.model.ModelType; import opennlp.tools.util.model.ModelUtil; public final class POSTaggerTrainerTool @@ -63,7 +61,6 @@ public final class POSTaggerTrainerTool if (mlParams == null) { mlParams = ModelUtil.createDefaultTrainingParameters(); - mlParams.put(TrainingParameters.ALGORITHM_PARAM, getModelType(params.getType()).toString()); } File modelOutFile = params.getModel(); @@ -142,26 +139,4 @@ public final class POSTaggerTrainerTool CmdLineUtil.writeModel("pos tagger", modelOutFile, model); } - - static ModelType getModelType(String modelString) { -ModelType model; -if (modelString == null) - modelString = "maxent"; - -switch (modelString) { - case "maxent": -model = ModelType.MAXENT; -break; - case "perceptron": -model = ModelType.PERCEPTRON; -break; - case "perceptron_sequence": -model = ModelType.PERCEPTRON_SEQUENCE; -break; - default: -model = null; -break; -} -return model; - } } http://git-wip-us.apache.org/repos/asf/opennlp/blob/672f1b09/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java index 221dcbe..690b359 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java @@ -29,12 +29,6 @@ import opennlp.tools.cmdline.params.BasicTrainingParams; * Note: Do not use this class, internal use only! */ interface TrainingParams extends BasicTrainingParams { - - @ParameterDescription(valueName = "maxent|perceptron|perceptron_sequence", - description = "The type of the token name finder model. One of maxent|perceptron|perceptron_sequence.") - @OptionalParameter(defaultValue = "maxent") - String getType(); - @ParameterDescription(valueName = "dictionaryPath", description = "The XML tag dictionary file") @OptionalParameter File getDict();
[40/50] [abbrv] opennlp git commit: OPENNLP-1003: Write a test case for the BioCodec class
OPENNLP-1003: Write a test case for the BioCodec class This closes #141 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/17493d1c Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/17493d1c Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/17493d1c Branch: refs/heads/parser_regression Commit: 17493d1cd7505441a363846fbd0a7ec2a8d67a06 Parents: fc10d2e Author: Peter ThygesenAuthored: Tue Mar 14 23:22:24 2017 +0100 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:56 2017 +0200 -- .../opennlp/tools/namefind/BioCodecTest.java| 263 +++ 1 file changed, 263 insertions(+) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/17493d1c/opennlp-tools/src/test/java/opennlp/tools/namefind/BioCodecTest.java -- diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/BioCodecTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/BioCodecTest.java new file mode 100644 index 000..c894742 --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/BioCodecTest.java @@ -0,0 +1,263 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.namefind; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.junit.Assert; +import org.junit.Test; + +import opennlp.tools.util.Span; + +/** + * This is the test class for {@link BioCodec}. + */ +public class BioCodecTest { + + private static final BioCodec codec = new BioCodec(); + + private static final String A_TYPE = "atype"; + private static final String A_START = A_TYPE + "-" + BioCodec.START; + private static final String A_CONTINUE = A_TYPE + "-" + BioCodec.CONTINUE; + + private static final String B_TYPE = "btype"; + private static final String B_START = B_TYPE + "-" + BioCodec.START; + private static final String B_CONTINUE = B_TYPE + "-" + BioCodec.CONTINUE; + + private static final String C_TYPE = "ctype"; + private static final String C_START = C_TYPE + "-" + BioCodec.START; + + private static final String OTHER = BioCodec.OTHER; + + @Test + public void testEncodeNoNames() { +NameSample nameSample = new NameSample("Once upon a time.".split(" "), new Span[] {}, true); +String[] expected = new String[] { OTHER, OTHER, OTHER, OTHER}; +String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); +Assert.assertArrayEquals("Only 'Other' is expected.", expected, actual); + } + + @Test + public void testEncodeSingleTokenSpan() { +String[] sentence = "I called Julie again.".split(" "); +Span[] spans = new Span[] { new Span(2,3, A_TYPE)}; +NameSample nameSample = new NameSample(sentence, spans, true); +String[] expected = new String[] {OTHER, OTHER, A_START, OTHER}; +String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); +Assert.assertArrayEquals("'Julie' should be 'start' only, the rest should be 'other'.", expected, actual); + } + + @Test + public void testEncodeDoubleTokenSpan() { +String[] sentence = "I saw Stefanie Schmidt today.".split(" "); +Span[] span = new Span[] { new Span(2,4, A_TYPE)}; +NameSample nameSample = new NameSample(sentence, span, true); +String[] expected = new String[] {OTHER, OTHER, A_START, A_CONTINUE, OTHER}; +String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); +Assert.assertArrayEquals("'Stefanie' should be 'start' only, 'Schmidt' is " + +"'continue' and the rest should be 'other'.", expected, actual); + } + + @Test + public void testEncodeDoubleTokenSpanNoType() { +final String DEFAULT_START = "default" + "-" + BioCodec.START; +final String DEFAULT_CONTINUE = "default" + "-" + BioCodec.CONTINUE; +String[] sentence = "I saw Stefanie Schmidt today.".split(" "); +Span[] span = new Span[] { new Span(2,4, null)}; +NameSample nameSample =
[04/50] [abbrv] opennlp git commit: NoJira: Run coveralls:report after build
NoJira: Run coveralls:report after build Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/6f332610 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/6f332610 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/6f332610 Branch: refs/heads/parser_regression Commit: 6f332610eb7a4fcc0b978b90f1fc9c8b253aa2b8 Parents: 51cd809 Author: Jörn KottmannAuthored: Tue Feb 7 18:49:39 2017 +0100 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:51 2017 +0200 -- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/6f332610/.travis.yml -- diff --git a/.travis.yml b/.travis.yml index 81bf0f7..b4c83ad 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,4 +17,4 @@ before_install: script: mvn clean install after_success: - - mvn clean test -Pjacoco jacoco:report \ No newline at end of file + - mvn clean test -Pjacoco jacoco:report coveralls:report
[13/50] [abbrv] opennlp git commit: NoJira: Update pom and jira version to 1.8.0
NoJira: Update pom and jira version to 1.8.0 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/daa9fcaa Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/daa9fcaa Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/daa9fcaa Branch: refs/heads/parser_regression Commit: daa9fcaa0722f59222c374028e2cee097a29fa12 Parents: ac787a4 Author: Jörn KottmannAuthored: Thu Feb 9 18:41:20 2017 +0100 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:52 2017 +0200 -- opennlp-brat-annotator/pom.xml | 2 +- opennlp-distr/pom.xml| 4 ++-- opennlp-docs/pom.xml | 2 +- opennlp-morfologik-addon/pom.xml | 2 +- opennlp-tools/pom.xml| 2 +- opennlp-uima/pom.xml | 2 +- pom.xml | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/daa9fcaa/opennlp-brat-annotator/pom.xml -- diff --git a/opennlp-brat-annotator/pom.xml b/opennlp-brat-annotator/pom.xml index 1633deb..6c7be0d 100644 --- a/opennlp-brat-annotator/pom.xml +++ b/opennlp-brat-annotator/pom.xml @@ -17,7 +17,7 @@ org.apache.opennlp opennlp - 1.7.3-SNAPSHOT + 1.8.0-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/opennlp/blob/daa9fcaa/opennlp-distr/pom.xml -- diff --git a/opennlp-distr/pom.xml b/opennlp-distr/pom.xml index c0a57c3..1ce102d 100644 --- a/opennlp-distr/pom.xml +++ b/opennlp-distr/pom.xml @@ -24,7 +24,7 @@ org.apache.opennlp opennlp - 1.7.3-SNAPSHOT + 1.8.0-SNAPSHOT ../pom.xml @@ -127,7 +127,7 @@ generate-resources jira-report - 12339150 + 12339249 ${basedir}/target/issuesFixed/ 1000 http://git-wip-us.apache.org/repos/asf/opennlp/blob/daa9fcaa/opennlp-docs/pom.xml -- diff --git a/opennlp-docs/pom.xml b/opennlp-docs/pom.xml index fd2b0d1..fbf0b5c 100644 --- a/opennlp-docs/pom.xml +++ b/opennlp-docs/pom.xml @@ -24,7 +24,7 @@ org.apache.opennlp opennlp - 1.7.3-SNAPSHOT + 1.8.0-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/opennlp/blob/daa9fcaa/opennlp-morfologik-addon/pom.xml -- diff --git a/opennlp-morfologik-addon/pom.xml b/opennlp-morfologik-addon/pom.xml index 1c384c7..c46f101 100644 --- a/opennlp-morfologik-addon/pom.xml +++ b/opennlp-morfologik-addon/pom.xml @@ -24,7 +24,7 @@ org.apache.opennlp opennlp - 1.7.3-SNAPSHOT + 1.8.0-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/opennlp/blob/daa9fcaa/opennlp-tools/pom.xml -- diff --git a/opennlp-tools/pom.xml b/opennlp-tools/pom.xml index c7e9624..d2630c9 100644 --- a/opennlp-tools/pom.xml +++ b/opennlp-tools/pom.xml @@ -25,7 +25,7 @@ org.apache.opennlp opennlp -1.7.3-SNAPSHOT +1.8.0-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/opennlp/blob/daa9fcaa/opennlp-uima/pom.xml -- diff --git a/opennlp-uima/pom.xml b/opennlp-uima/pom.xml index 070fec9..7cfdb72 100644 --- a/opennlp-uima/pom.xml +++ b/opennlp-uima/pom.xml @@ -25,7 +25,7 @@ org.apache.opennlp opennlp - 1.7.3-SNAPSHOT + 1.8.0-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/opennlp/blob/daa9fcaa/pom.xml -- diff --git a/pom.xml b/pom.xml index 98acfb1..268a54e 100644 --- a/pom.xml +++ b/pom.xml @@ -31,7 +31,7 @@ org.apache.opennlp opennlp - 1.7.3-SNAPSHOT + 1.8.0-SNAPSHOT pom Apache OpenNLP Reactor
[22/50] [abbrv] opennlp git commit: OPENNLP-978: Set name finder defaults to perceptron and cutoff zero
OPENNLP-978: Set name finder defaults to perceptron and cutoff zero Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/91352d5f Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/91352d5f Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/91352d5f Branch: refs/heads/parser_regression Commit: 91352d5fe620ad3fab988222878d4953603db6e3 Parents: 672f1b0 Author: Jörn KottmannAuthored: Tue Feb 7 23:58:43 2017 +0100 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:53 2017 +0200 -- .../namefind/TokenNameFinderCrossValidatorTool.java| 4 ++-- .../cmdline/namefind/TokenNameFinderTrainerTool.java | 3 ++- .../main/java/opennlp/tools/namefind/NameFinderME.java | 6 ++ .../java/opennlp/tools/util/TrainingParameters.java| 13 + .../java/opennlp/tools/namefind/NameFinderMETest.java | 1 + 5 files changed, 24 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/91352d5f/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java index 333abd9..153d6f7 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java @@ -42,8 +42,8 @@ import opennlp.tools.namefind.TokenNameFinderEvaluationMonitor; import opennlp.tools.namefind.TokenNameFinderFactory; import opennlp.tools.util.InvalidFormatException; import opennlp.tools.util.SequenceCodec; +import opennlp.tools.util.TrainingParameters; import opennlp.tools.util.eval.EvaluationMonitor; -import opennlp.tools.util.model.ModelUtil; public final class TokenNameFinderCrossValidatorTool extends AbstractCrossValidatorTool { @@ -65,7 +65,7 @@ public final class TokenNameFinderCrossValidatorTool mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), true); if (mlParams == null) { - mlParams = ModelUtil.createDefaultTrainingParameters(); + mlParams = new TrainingParameters(); } byte featureGeneratorBytes[] = http://git-wip-us.apache.org/repos/asf/opennlp/blob/91352d5f/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java index a8d4417..fb73506 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java @@ -40,6 +40,7 @@ import opennlp.tools.namefind.TokenNameFinderFactory; import opennlp.tools.namefind.TokenNameFinderModel; import opennlp.tools.util.InvalidFormatException; import opennlp.tools.util.SequenceCodec; +import opennlp.tools.util.TrainingParameters; import opennlp.tools.util.featuregen.GeneratorFactory; import opennlp.tools.util.model.ArtifactSerializer; import opennlp.tools.util.model.ModelUtil; @@ -166,7 +167,7 @@ public final class TokenNameFinderTrainerTool mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), true); if (mlParams == null) { - mlParams = ModelUtil.createDefaultTrainingParameters(); + mlParams = new TrainingParameters(); } File modelOutFile = params.getModel(); http://git-wip-us.apache.org/repos/asf/opennlp/blob/91352d5f/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java index 6ce0b83..5a16f34 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java @@ -37,6 +37,7 @@ import opennlp.tools.ml.TrainerFactory.TrainerType; import opennlp.tools.ml.model.Event; import opennlp.tools.ml.model.MaxentModel; import opennlp.tools.ml.model.SequenceClassificationModel; +import opennlp.tools.ml.perceptron.PerceptronTrainer; import opennlp.tools.util.ObjectStream; import opennlp.tools.util.Sequence; import opennlp.tools.util.SequenceCodec; @@ -219,6
[24/50] [abbrv] opennlp git commit: OPENNLP-995: Add a PR Review Template for contributors
OPENNLP-995: Add a PR Review Template for contributors Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/82caa558 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/82caa558 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/82caa558 Branch: refs/heads/parser_regression Commit: 82caa558d8942c9366af07a5a80bd088aff6c10b Parents: 6cdca66 Author: smarthiAuthored: Thu Feb 23 07:46:17 2017 -0500 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:54 2017 +0200 -- .github/CONTRIBUTING.md | 11 +++ .github/PULL_REQUEST_TEMPLATE.md | 27 +++ 2 files changed, 38 insertions(+) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/82caa558/.github/CONTRIBUTING.md -- diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 000..577eb16 --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,11 @@ +# How to contribute to Apache OpenNLP + +Thank you for your intention to contribute to the Apache OpenNLP project. As an open-source community, we highly appreciate external contributions to our project. + +To make the process smooth for the project *committers* (those who review and accept changes) and *contributors* (those who propose new changes via pull requests), there are a few rules to follow. + +## Contribution Guidelines + +Please check out the [How to get involved](http://opennlp.apache.org/get-involved.html) to understand how contributions are made. +A detailed list of coding standards can be found at [Apache OpenNLP Code Conventions](http://opennlp.apache.org/code-conventions.html) which also contains a list of coding guidelines that you should follow. +For pull requests, there is a [check list](PULL_REQUEST_TEMPLATE.md) with criteria for acceptable contributions. http://git-wip-us.apache.org/repos/asf/opennlp/blob/82caa558/.github/PULL_REQUEST_TEMPLATE.md -- diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 000..579e2e0 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,27 @@ +Thank you for contributing to Apache OpenNLP. + +In order to streamline the review of the contribution we ask you +to ensure the following steps have been taken: + +### For all changes: +- [ ] Is there a JIRA ticket associated with this PR? Is it referenced + in the commit message? + +- [ ] Does your PR title start with OPENNLP- where is the JIRA number you are trying to resolve? Pay particular attention to the hyphen "-" character. + +- [ ] Has your PR been rebased against the latest commit within the target branch (typically master)? + +- [ ] Is your initial contribution a single, squashed commit? + +### For code changes: +- [ ] Have you ensured that the full suite of tests is executed via mvn clean install at the root opennlp folder? +- [ ] Have you written or updated unit tests to verify your changes? +- [ ] If adding new dependencies to the code, are these dependencies licensed in a way that is compatible for inclusion under [ASF 2.0](http://www.apache.org/legal/resolved.html#category-a)? +- [ ] If applicable, have you updated the LICENSE file, including the main LICENSE file in opennlp folder? +- [ ] If applicable, have you updated the NOTICE file, including the main NOTICE file found in opennlp folder? + +### For documentation related changes: +- [ ] Have you ensured that format looks appropriate for the output in which it is rendered? + +### Note: +Please ensure that once the PR is submitted, you check travis-ci for build issues and submit an update to your PR as soon as possible.
[32/50] [abbrv] opennlp git commit: Revert "OPENNLP-1002 Remove deprecated GIS class"
Revert "OPENNLP-1002 Remove deprecated GIS class" This reverts commit efa257676280abd316bb677e5a8de5cb9fe1dd73. Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/1b6ad719 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/1b6ad719 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/1b6ad719 Branch: refs/heads/parser_regression Commit: 1b6ad719760ead028810715b49ff15219385ee42 Parents: 7487812 Author: Jörn KottmannAuthored: Fri Mar 10 17:22:28 2017 +0100 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:55 2017 +0200 -- .../main/java/opennlp/tools/ml/maxent/GIS.java | 303 +++ 1 file changed, 303 insertions(+) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/1b6ad719/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java new file mode 100644 index 000..97c214d --- /dev/null +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java @@ -0,0 +1,303 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.ml.maxent; + +import java.io.IOException; + +import opennlp.tools.ml.AbstractEventTrainer; +import opennlp.tools.ml.model.AbstractModel; +import opennlp.tools.ml.model.DataIndexer; +import opennlp.tools.ml.model.Event; +import opennlp.tools.ml.model.Prior; +import opennlp.tools.ml.model.UniformPrior; +import opennlp.tools.util.ObjectStream; +import opennlp.tools.util.TrainingParameters; + +/** + * A Factory class which uses instances of GISTrainer to create and train + * GISModels. + * @deprecated use {@link GISTrainer} + */ +@Deprecated +public class GIS extends AbstractEventTrainer { + + public static final String MAXENT_VALUE = "MAXENT"; + + /** + * Set this to false if you don't want messages about the progress of model + * training displayed. Alternately, you can use the overloaded version of + * trainModel() to conditionally enable progress messages. + */ + public static boolean PRINT_MESSAGES = true; + + /** + * If we are using smoothing, this is used as the "number" of times we want + * the trainer to imagine that it saw a feature that it actually didn't see. + * Defaulted to 0.1. + */ + private static final double SMOOTHING_OBSERVATION = 0.1; + + private static final String SMOOTHING_PARAM = "smoothing"; + private static final boolean SMOOTHING_DEFAULT = false; + + public GIS() { + } + + public GIS(TrainingParameters parameters) { +super(parameters); + } + + public boolean isValid() { + +if (!super.isValid()) { + return false; +} + +String algorithmName = getAlgorithm(); + +return !(algorithmName != null && !(MAXENT_VALUE.equals(algorithmName))); + } + + public boolean isSortAndMerge() { +return true; + } + + public AbstractModel doTrain(DataIndexer indexer) throws IOException { +int iterations = getIterations(); + +AbstractModel model; + +boolean printMessages = trainingParameters.getBooleanParameter(VERBOSE_PARAM, VERBOSE_DEFAULT); +boolean smoothing = trainingParameters.getBooleanParameter(SMOOTHING_PARAM, SMOOTHING_DEFAULT); +int threads = trainingParameters.getIntParameter(TrainingParameters.THREADS_PARAM, 1); + +model = trainModel(iterations, indexer, printMessages, smoothing, null, threads); + +return model; + } + + // << members related to AbstractEventTrainer + + /** + * Train a model using the GIS algorithm, assuming 100 iterations and no + * cutoff. + * + * @param eventStream + * The EventStream holding the data on which this model will be + * trained. + * @return The newly trained model, which can be used immediately or saved to + * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object. + */ + public static GISModel trainModel(ObjectStream eventStream)
[48/50] [abbrv] opennlp git commit: OPENNLP-1015: Add tests for DataIndexers
OPENNLP-1015: Add tests for DataIndexers Closes #152 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/7589af69 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/7589af69 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/7589af69 Branch: refs/heads/parser_regression Commit: 7589af69ea8a73326bed5e2f5b5c0445f95112eb Parents: 5eb8ff8 Author: kojiAuthored: Fri Apr 7 21:50:02 2017 +0900 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:57 2017 +0200 -- .../tools/ml/model/OnePassDataIndexerTest.java | 64 ++ .../model/OnePassRealValueDataIndexerTest.java | 116 +++ .../ml/model/SimpleEventStreamBuilder.java | 76 .../tools/ml/model/TwoPassDataIndexerTest.java | 64 ++ 4 files changed, 320 insertions(+) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/7589af69/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassDataIndexerTest.java -- diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassDataIndexerTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassDataIndexerTest.java new file mode 100644 index 000..e629e7a --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassDataIndexerTest.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.ml.model; + +import java.io.IOException; +import java.util.Collections; + +import org.junit.Assert; +import org.junit.Test; + +import opennlp.tools.util.ObjectStream; +import opennlp.tools.util.TrainingParameters; + +public class OnePassDataIndexerTest { + + @Test + public void testIndex() throws IOException { +// He belongs to Apache Software Foundation . +ObjectStream eventStream = new SimpleEventStreamBuilder() +.add("other/w=he n1w=belongs n2w=to po=other pow=other,He powf=other,ic ppo=other") +.add("other/w=belongs p1w=he n1w=to n2w=apache po=other pow=other,belongs powf=other,lc ppo=other") +.add("other/w=to p1w=belongs p2w=he n1w=apache n2w=software po=other pow=other,to" + + " powf=other,lc ppo=other") +.add("org-start/w=apache p1w=to p2w=belongs n1w=software n2w=foundation po=other pow=other,Apache" + + " powf=other,ic ppo=other") +.add("org-cont/w=software p1w=apache p2w=to n1w=foundation n2w=. po=org-start" + + " pow=org-start,Software powf=org-start,ic ppo=other") +.add("org-cont/w=foundation p1w=software p2w=apache n1w=. po=org-cont pow=org-cont,Foundation" + + " powf=org-cont,ic ppo=org-start") +.add("other/w=. p1w=foundation p2w=software po=org-cont pow=org-cont,. powf=org-cont,other" + + " ppo=org-cont") +.build(); + +DataIndexer indexer = new OnePassDataIndexer(); +indexer.init(new TrainingParameters(Collections.emptyMap()), null); +indexer.index(eventStream); +Assert.assertEquals(3, indexer.getContexts().length); +Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[0]); +Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[1]); +Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[2]); +Assert.assertNull(indexer.getValues()); +Assert.assertEquals(5, indexer.getNumEvents()); +Assert.assertArrayEquals(new int[]{0, 1, 2}, indexer.getOutcomeList()); +Assert.assertArrayEquals(new int[]{3, 1, 1}, indexer.getNumTimesEventsSeen()); +Assert.assertArrayEquals(new String[]{"ppo=other"}, indexer.getPredLabels()); +Assert.assertArrayEquals(new String[]{"other", "org-start", "org-cont"}, indexer.getOutcomeLabels()); +Assert.assertArrayEquals(new int[]{5}, indexer.getPredCounts()); + } +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/7589af69/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassRealValueDataIndexerTest.java
[36/50] [abbrv] opennlp git commit: OPENNLP-125: Make POS Tagger feature generation configurable
OPENNLP-125: Make POS Tagger feature generation configurable Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/dd39d066 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/dd39d066 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/dd39d066 Branch: refs/heads/parser_regression Commit: dd39d06629294f3c9bd3980d02ba0f1716839e0e Parents: 711d70b Author: Jörn KottmannAuthored: Thu Feb 9 18:54:27 2017 +0100 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:55 2017 +0200 -- .../namefind/TokenNameFinderTrainerTool.java| 2 +- .../postag/POSTaggerCrossValidatorTool.java | 10 +- .../cmdline/postag/POSTaggerTrainerTool.java| 26 +-- .../tools/cmdline/postag/TrainingParams.java| 13 +- .../postag/ConfigurablePOSContextGenerator.java | 105 +++ .../opennlp/tools/postag/POSDictionary.java | 8 +- .../java/opennlp/tools/postag/POSModel.java | 40 +++-- .../tools/postag/POSTaggerCrossValidator.java | 44 ++--- .../opennlp/tools/postag/POSTaggerFactory.java | 179 ++- .../tools/util/featuregen/GeneratorFactory.java | 12 ++ .../featuregen/PosTaggerFeatureGenerator.java | 62 +++ .../tools/postag/pos-default-features.xml | 38 .../ConfigurablePOSContextGeneratorTest.java| 55 ++ .../tools/postag/DummyPOSTaggerFactory.java | 14 +- .../tools/postag/POSTaggerFactoryTest.java | 11 +- 15 files changed, 534 insertions(+), 85 deletions(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/dd39d066/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java index 5bb18d2..4fb8cb9 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java @@ -67,7 +67,7 @@ public final class TokenNameFinderTrainerTool return null; } - static byte[] openFeatureGeneratorBytes(File featureGenDescriptorFile) { + public static byte[] openFeatureGeneratorBytes(File featureGenDescriptorFile) { byte[] featureGeneratorBytes = null; // load descriptor file into memory if (featureGenDescriptorFile != null) { http://git-wip-us.apache.org/repos/asf/opennlp/blob/dd39d066/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java index d91d4ee..67ad2b9 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java @@ -22,10 +22,12 @@ import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; +import java.util.Map; import opennlp.tools.cmdline.AbstractCrossValidatorTool; import opennlp.tools.cmdline.CmdLineUtil; import opennlp.tools.cmdline.TerminateToolException; +import opennlp.tools.cmdline.namefind.TokenNameFinderTrainerTool; import opennlp.tools.cmdline.params.CVParams; import opennlp.tools.cmdline.params.FineGrainedEvaluatorParams; import opennlp.tools.cmdline.postag.POSTaggerCrossValidatorTool.CVToolParams; @@ -75,10 +77,16 @@ public final class POSTaggerCrossValidatorTool } } +Map resources = TokenNameFinderTrainerTool.loadResources( +params.getResources(), params.getFeaturegen()); + +byte[] featureGeneratorBytes = + TokenNameFinderTrainerTool.openFeatureGeneratorBytes(params.getFeaturegen()); + POSTaggerCrossValidator validator; try { validator = new POSTaggerCrossValidator(params.getLang(), mlParams, - params.getDict(), params.getNgram(), params.getTagDictCutoff(), + params.getDict(), featureGeneratorBytes, resources, params.getTagDictCutoff(), params.getFactory(), missclassifiedListener, reportListener); validator.evaluate(sampleStream, params.getFolds()); http://git-wip-us.apache.org/repos/asf/opennlp/blob/dd39d066/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java -- diff --git
[14/50] [abbrv] opennlp git commit: OPENNLP-980: Deprecate low-level feature constructors and methods
OPENNLP-980: Deprecate low-level feature constructors and methods Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/ac787a4d Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/ac787a4d Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/ac787a4d Branch: refs/heads/parser_regression Commit: ac787a4dabf9823bf8b7e91f1b73d60d17e01006 Parents: b41fcd6 Author: Jörn KottmannAuthored: Wed Feb 1 21:38:19 2017 +0100 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:52 2017 +0200 -- .../src/main/java/opennlp/tools/ml/maxent/GISModel.java | 1 + .../java/opennlp/tools/ml/maxent/quasinewton/QNModel.java | 1 + .../src/main/java/opennlp/tools/ml/model/AbstractModel.java | 9 + .../java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java | 2 ++ .../java/opennlp/tools/ml/perceptron/PerceptronModel.java | 5 + 5 files changed, 18 insertions(+) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/ac787a4d/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java index e546d1c..14c7fa3 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java @@ -152,6 +152,7 @@ public final class GISModel extends AbstractModel { * string representation of the outcomes can be obtained from the * method getOutcome(int i). */ + @Deprecated // visibility will be reduced in 1.8.1 public static double[] eval(int[] context, float[] values, double[] prior, EvalParameters model) { Context[] params = model.getParams(); http://git-wip-us.apache.org/repos/asf/opennlp/blob/ac787a4d/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java index a35d54c..f02ee75 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java @@ -103,6 +103,7 @@ public class QNModel extends AbstractModel { * Model parameters * @return Normalized probabilities for the outcomes given the context. */ + @Deprecated // visibility will be reduced in 1.8.1 public static double[] eval(int[] context, float[] values, double[] probs, int nOutcomes, int nPredLabels, double[] parameters) { http://git-wip-us.apache.org/repos/asf/opennlp/blob/ac787a4d/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java index e5a60a7..eb80f1b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java @@ -39,6 +39,15 @@ public abstract class AbstractModel implements MaxentModel { /** The type of the model. */ protected ModelType modelType; + /** + * @deprecated this will be removed in 1.8.1, pmap should be private + * + * @param params + * @param predLabels + * @param pmap + * @param outcomeNames + */ + @Deprecated public AbstractModel(Context[] params, String[] predLabels, Map pmap, String[] outcomeNames) { this.pmap = pmap; http://git-wip-us.apache.org/repos/asf/opennlp/blob/ac787a4d/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java index ec3d9bd..0a28704 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java @@ -31,6 +31,7 @@ public class NaiveBayesModel extends AbstractModel { protected double[] outcomeTotals; protected long vocabulary; + @Deprecated public NaiveBayesModel(Context[] params, String[] predLabels, Map pmap, String[] outcomeNames) { super(params, predLabels, pmap, outcomeNames); @@ -87,6 +88,7 @@ public
[34/50] [abbrv] opennlp git commit: OPENNLP-904 Harmonize lemmatizer API and function to get multiple lemmas
OPENNLP-904 Harmonize lemmatizer API and function to get multiple lemmas OPENNLP-904 add minor correction after PR comment Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/d3c16d53 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/d3c16d53 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/d3c16d53 Branch: refs/heads/parser_regression Commit: d3c16d53633595619963114e9499c92fe1d7ee2a Parents: b78abfb Author: Rodrigo AgerriAuthored: Fri Feb 3 16:00:38 2017 +0100 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:55 2017 +0200 -- .../cmdline/lemmatizer/LemmatizerMETool.java| 4 +- .../tools/lemmatizer/DictionaryLemmatizer.java | 70 ++-- .../lemmatizer/LemmaSampleEventStream.java | 2 +- .../tools/lemmatizer/LemmaSampleStream.java | 4 +- .../opennlp/tools/lemmatizer/Lemmatizer.java| 16 - .../opennlp/tools/lemmatizer/LemmatizerME.java | 64 -- .../tools/lemmatizer/DummyLemmatizer.java | 7 ++ .../tools/lemmatizer/LemmatizerMETest.java | 3 +- 8 files changed, 136 insertions(+), 34 deletions(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/d3c16d53/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java index e4e47b5..90ba95d 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java @@ -72,10 +72,8 @@ public class LemmatizerMETool extends BasicCmdLineTool { continue; } - String[] preds = lemmatizer.lemmatize(posSample.getSentence(), + String[] lemmas = lemmatizer.lemmatize(posSample.getSentence(), posSample.getTags()); - String[] lemmas = lemmatizer.decodeLemmas(posSample.getSentence(), - preds); System.out.println(new LemmaSample(posSample.getSentence(), posSample.getTags(), lemmas).toString()); http://git-wip-us.apache.org/repos/asf/opennlp/blob/d3c16d53/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java index b1b04a1..9f0b0b0 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java @@ -37,7 +37,7 @@ public class DictionaryLemmatizer implements Lemmatizer { /** * The hashmap containing the dictionary. */ - private final Map dictMap; + private final Map
dictMap; /** * Construct a hashmap from the input tab separated dictionary. @@ -47,26 +47,24 @@ public class DictionaryLemmatizer implements Lemmatizer { * @param dictionary * the input dictionary via inputstream */ - public DictionaryLemmatizer(final InputStream dictionary) { + public DictionaryLemmatizer(final InputStream dictionary) throws IOException { this.dictMap = new HashMap<>(); -final BufferedReader breader = new BufferedReader(new InputStreamReader(dictionary)); +final BufferedReader breader = new BufferedReader( +new InputStreamReader(dictionary)); String line; -try { - while ((line = breader.readLine()) != null) { -final String[] elems = line.split("\t"); -this.dictMap.put(Arrays.asList(elems[0], elems[1]), elems[2]); - } -} catch (final IOException e) { - e.printStackTrace(); +while ((line = breader.readLine()) != null) { + final String[] elems = line.split("\t"); + this.dictMap.put(Arrays.asList(elems[0], elems[1]), Arrays.asList(elems[2])); } } + /** * Get the Map containing the dictionary. * * @return dictMap the Map */ - public Map
getDictMap() { + public Map
getDictMap() { return this.dictMap; } @@ -85,31 +83,65 @@ public class DictionaryLemmatizer implements Lemmatizer { return keys; } + public String[] lemmatize(final String[] tokens, final String[] postags) { List lemmas = new ArrayList<>(); for (int i = 0; i < tokens.length; i++) { - lemmas.add(this.apply(tokens[i], postags[i])); + lemmas.add(this.lemmatize(tokens[i], postags[i]));
[38/50] [abbrv] opennlp git commit: OPENNLP-1002 Remove deprecated GIS class
OPENNLP-1002 Remove deprecated GIS class Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/fc10d2e9 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/fc10d2e9 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/fc10d2e9 Branch: refs/heads/parser_regression Commit: fc10d2e9ec3c98e93bdae7d503f1e09848a28a6a Parents: 1b6ad71 Author: Jörn KottmannAuthored: Sun Mar 12 11:10:43 2017 +0100 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:56 2017 +0200 -- .../cmdline/parser/BuildModelUpdaterTool.java | 7 +- .../cmdline/parser/CheckModelUpdaterTool.java | 7 +- .../main/java/opennlp/tools/ml/maxent/GIS.java | 303 --- .../tools/ml/maxent/GISIndexingTest.java| 78 +++-- .../tools/ml/maxent/ScaleDoesntMatterTest.java | 20 +- 5 files changed, 80 insertions(+), 335 deletions(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/fc10d2e9/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java index 327355b..7efd342 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java @@ -20,7 +20,8 @@ package opennlp.tools.cmdline.parser; import java.io.IOException; import opennlp.tools.dictionary.Dictionary; -import opennlp.tools.ml.maxent.GIS; +import opennlp.tools.ml.EventTrainer; +import opennlp.tools.ml.TrainerFactory; import opennlp.tools.ml.model.Event; import opennlp.tools.ml.model.MaxentModel; import opennlp.tools.parser.Parse; @@ -28,6 +29,7 @@ import opennlp.tools.parser.ParserEventTypeEnum; import opennlp.tools.parser.ParserModel; import opennlp.tools.parser.chunking.ParserEventStream; import opennlp.tools.util.ObjectStream; +import opennlp.tools.util.model.ModelUtil; public final class BuildModelUpdaterTool extends ModelUpdaterTool { @@ -50,7 +52,8 @@ public final class BuildModelUpdaterTool extends ModelUpdaterTool { ObjectStream bes = new ParserEventStream(parseSamples, originalModel.getHeadRules(), ParserEventTypeEnum.BUILD, mdict); -GIS trainer = new GIS(); +EventTrainer trainer = TrainerFactory.getEventTrainer( +ModelUtil.createDefaultTrainingParameters(), null); MaxentModel buildModel = trainer.train(bes); parseSamples.close(); http://git-wip-us.apache.org/repos/asf/opennlp/blob/fc10d2e9/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java index 55e96ba..0c98812 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java @@ -20,7 +20,8 @@ package opennlp.tools.cmdline.parser; import java.io.IOException; import opennlp.tools.dictionary.Dictionary; -import opennlp.tools.ml.maxent.GIS; +import opennlp.tools.ml.EventTrainer; +import opennlp.tools.ml.TrainerFactory; import opennlp.tools.ml.model.Event; import opennlp.tools.ml.model.MaxentModel; import opennlp.tools.parser.Parse; @@ -28,6 +29,7 @@ import opennlp.tools.parser.ParserEventTypeEnum; import opennlp.tools.parser.ParserModel; import opennlp.tools.parser.chunking.ParserEventStream; import opennlp.tools.util.ObjectStream; +import opennlp.tools.util.model.ModelUtil; // trains a new check model ... public final class CheckModelUpdaterTool extends ModelUpdaterTool { @@ -51,7 +53,8 @@ public final class CheckModelUpdaterTool extends ModelUpdaterTool { ObjectStream bes = new ParserEventStream(parseSamples, originalModel.getHeadRules(), ParserEventTypeEnum.CHECK, mdict); -GIS trainer = new GIS(); +EventTrainer trainer = TrainerFactory.getEventTrainer( +ModelUtil.createDefaultTrainingParameters(), null); MaxentModel checkModel = trainer.train(bes); parseSamples.close(); http://git-wip-us.apache.org/repos/asf/opennlp/blob/fc10d2e9/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java deleted file mode 100644
[50/50] [abbrv] opennlp git commit: OPENNLP-1010: Fix NaiveBayes model writer
OPENNLP-1010: Fix NaiveBayes model writer The previous sortValues method was based on Perceptron, but for some reason it was not working for NaiveBayes. Changed it to the one from GIS fixed it. this closes apache/opennlp#154 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/3ac2fb37 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/3ac2fb37 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/3ac2fb37 Branch: refs/heads/parser_regression Commit: 3ac2fb37750595dfff573bb813b48a9d889052a2 Parents: ef4c667 Author: William D C M SILVAAuthored: Fri Apr 14 09:35:36 2017 -0300 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:58 2017 +0200 -- .../ml/naivebayes/NaiveBayesModelWriter.java| 71 --- .../NaiveBayesSerializedCorrectnessTest.java| 184 +++ 2 files changed, 225 insertions(+), 30 deletions(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/3ac2fb37/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModelWriter.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModelWriter.java b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModelWriter.java index bbb6eee..510bf76 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModelWriter.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModelWriter.java @@ -55,44 +55,55 @@ public abstract class NaiveBayesModelWriter extends AbstractModelWriter { } } + protected ComparablePredicate[] sortValues() { -ComparablePredicate[] sortPreds; -ComparablePredicate[] tmpPreds = new ComparablePredicate[PARAMS.length]; -int[] tmpOutcomes = new int[numOutcomes]; -double[] tmpParams = new double[numOutcomes]; -int numPreds = 0; -//remove parameters with 0 weight and predicates with no parameters -for (int pid = 0; pid < PARAMS.length; pid++) { - int numParams = 0; - double[] predParams = PARAMS[pid].getParameters(); - int[] outcomePattern = PARAMS[pid].getOutcomes(); - for (int pi = 0; pi < predParams.length; pi++) { -if (predParams[pi] != 0d) { - tmpOutcomes[numParams] = outcomePattern[pi]; - tmpParams[numParams] = predParams[pi]; - numParams++; -} - } - int[] activeOutcomes = new int[numParams]; - double[] activeParams = new double[numParams]; +ComparablePredicate[] sortPreds = new ComparablePredicate[PARAMS.length]; - for (int pi = 0; pi < numParams; pi++) { -activeOutcomes[pi] = tmpOutcomes[pi]; -activeParams[pi] = tmpParams[pi]; - } - if (numParams != 0) { -tmpPreds[numPreds] = new ComparablePredicate(PRED_LABELS[pid], activeOutcomes, activeParams); -numPreds++; - } +int numParams = 0; +for (int pid = 0; pid < PARAMS.length; pid++) { + int[] predkeys = PARAMS[pid].getOutcomes(); + // Arrays.sort(predkeys); + int numActive = predkeys.length; + double[] activeParams = PARAMS[pid].getParameters(); + + numParams += numActive; + /* + * double[] activeParams = new double[numActive]; + * + * int id = 0; for (int i=0; i < predkeys.length; i++) { int oid = + * predkeys[i]; activeOutcomes[id] = oid; activeParams[id] = + * PARAMS[pid].getParams(oid); id++; } + */ + sortPreds[pid] = new ComparablePredicate(PRED_LABELS[pid], + predkeys, activeParams); } -System.err.println("Compressed " + PARAMS.length + " parameters to " + numPreds); -sortPreds = new ComparablePredicate[numPreds]; -System.arraycopy(tmpPreds, 0, sortPreds, 0, numPreds); + Arrays.sort(sortPreds); return sortPreds; } + protected List compressOutcomes(ComparablePredicate[] sorted) { +List
outcomePatterns = new ArrayList<>(); +if (sorted.length > 0) { + ComparablePredicate cp = sorted[0]; + List newGroup = new ArrayList<>(); + for (int i = 0; i < sorted.length; i++) { +if (cp.compareTo(sorted[i]) == 0) { + newGroup.add(sorted[i]); +} else { + cp = sorted[i]; + outcomePatterns.add(newGroup); + newGroup = new ArrayList<>(); + newGroup.add(sorted[i]); +} + } + outcomePatterns.add(newGroup); +} +return outcomePatterns; + } + + protected List
computeOutcomePatterns(ComparablePredicate[] sorted) { ComparablePredicate cp = sorted[0]; http://git-wip-us.apache.org/repos/asf/opennlp/blob/3ac2fb37/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesSerializedCorrectnessTest.java
[01/50] [abbrv] opennlp git commit: Rolling back Release 1.7.2 RC
Repository: opennlp Updated Branches: refs/heads/parser_regression [created] 3ac2fb377 Rolling back Release 1.7.2 RC Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/a81f37b3 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/a81f37b3 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/a81f37b3 Branch: refs/heads/parser_regression Commit: a81f37b3c89c37b092f0a83d1c5cf5959bafd10c Parents: c91d353 Author: smarthiAuthored: Wed Feb 1 09:15:41 2017 -0500 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:50 2017 +0200 -- opennlp-brat-annotator/pom.xml | 2 +- opennlp-distr/pom.xml| 2 +- opennlp-docs/pom.xml | 2 +- opennlp-morfologik-addon/pom.xml | 2 +- opennlp-tools/pom.xml| 2 +- opennlp-uima/pom.xml | 2 +- pom.xml | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/a81f37b3/opennlp-brat-annotator/pom.xml -- diff --git a/opennlp-brat-annotator/pom.xml b/opennlp-brat-annotator/pom.xml index 1633deb..4bf95cf 100644 --- a/opennlp-brat-annotator/pom.xml +++ b/opennlp-brat-annotator/pom.xml @@ -17,7 +17,7 @@ org.apache.opennlp opennlp - 1.7.3-SNAPSHOT + 1.7.2-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/opennlp/blob/a81f37b3/opennlp-distr/pom.xml -- diff --git a/opennlp-distr/pom.xml b/opennlp-distr/pom.xml index c0a57c3..613bd80 100644 --- a/opennlp-distr/pom.xml +++ b/opennlp-distr/pom.xml @@ -24,7 +24,7 @@ org.apache.opennlp opennlp - 1.7.3-SNAPSHOT + 1.7.2-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/opennlp/blob/a81f37b3/opennlp-docs/pom.xml -- diff --git a/opennlp-docs/pom.xml b/opennlp-docs/pom.xml index fd2b0d1..7b916c0 100644 --- a/opennlp-docs/pom.xml +++ b/opennlp-docs/pom.xml @@ -24,7 +24,7 @@ org.apache.opennlp opennlp - 1.7.3-SNAPSHOT + 1.7.2-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/opennlp/blob/a81f37b3/opennlp-morfologik-addon/pom.xml -- diff --git a/opennlp-morfologik-addon/pom.xml b/opennlp-morfologik-addon/pom.xml index 1c384c7..d62a70a 100644 --- a/opennlp-morfologik-addon/pom.xml +++ b/opennlp-morfologik-addon/pom.xml @@ -24,7 +24,7 @@ org.apache.opennlp opennlp - 1.7.3-SNAPSHOT + 1.7.2-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/opennlp/blob/a81f37b3/opennlp-tools/pom.xml -- diff --git a/opennlp-tools/pom.xml b/opennlp-tools/pom.xml index 22fc017..9441ebb 100644 --- a/opennlp-tools/pom.xml +++ b/opennlp-tools/pom.xml @@ -25,7 +25,7 @@ org.apache.opennlp opennlp -1.7.3-SNAPSHOT +1.7.2-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/opennlp/blob/a81f37b3/opennlp-uima/pom.xml -- diff --git a/opennlp-uima/pom.xml b/opennlp-uima/pom.xml index 070fec9..1e99c3d 100644 --- a/opennlp-uima/pom.xml +++ b/opennlp-uima/pom.xml @@ -25,7 +25,7 @@ org.apache.opennlp opennlp - 1.7.3-SNAPSHOT + 1.7.2-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/opennlp/blob/a81f37b3/pom.xml -- diff --git a/pom.xml b/pom.xml index 98acfb1..7081f25 100644 --- a/pom.xml +++ b/pom.xml @@ -31,7 +31,7 @@ org.apache.opennlp opennlp - 1.7.3-SNAPSHOT + 1.7.2-SNAPSHOT pom Apache OpenNLP Reactor
[45/50] [abbrv] opennlp git commit: OPENNLP-1006: Refactor usage of tag constants in sequence validators
OPENNLP-1006: Refactor usage of tag constants in sequence validators Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/8abe90d3 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/8abe90d3 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/8abe90d3 Branch: refs/heads/parser_regression Commit: 8abe90d3f79f4e8bd8da0780bb8368b018aee64b Parents: 81b07ec Author: Peter ThygesenAuthored: Tue Mar 28 16:59:34 2017 +0200 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:57 2017 +0200 -- .../src/main/java/opennlp/tools/namefind/BioCodec.java | 10 +- .../tools/namefind/NameFinderSequenceValidator.java | 11 +++ 2 files changed, 12 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/8abe90d3/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java index 2218021..c0570a5 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java @@ -118,13 +118,13 @@ public class BioCodec implements SequenceCodec { for (int i = 0; i < outcomes.length; i++) { String outcome = outcomes[i]; - if (outcome.endsWith(NameFinderME.START)) { + if (outcome.endsWith(BioCodec.START)) { start.add(outcome.substring(0, outcome.length() -- NameFinderME.START.length())); - } else if (outcome.endsWith(NameFinderME.CONTINUE)) { +- BioCodec.START.length())); + } else if (outcome.endsWith(BioCodec.CONTINUE)) { cont.add(outcome.substring(0, outcome.length() -- NameFinderME.CONTINUE.length())); - } else if (!outcome.equals(NameFinderME.OTHER)) { +- BioCodec.CONTINUE.length())); + } else if (!outcome.equals(BioCodec.OTHER)) { // got unexpected outcome return false; } http://git-wip-us.apache.org/repos/asf/opennlp/blob/8abe90d3/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java index 5143468..bb6700e 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java @@ -19,6 +19,9 @@ package opennlp.tools.namefind; import opennlp.tools.util.SequenceValidator; +/** + * This class is created by the {@link BioCodec}. + */ public class NameFinderSequenceValidator implements SequenceValidator { @@ -27,16 +30,16 @@ public class NameFinderSequenceValidator implements // outcome is formatted like "cont" or "sometype-cont", so we // can check if it ends with "cont". -if (outcome.endsWith(NameFinderME.CONTINUE)) { +if (outcome.endsWith(BioCodec.CONTINUE)) { int li = outcomesSequence.length - 1; if (li == -1) { return false; - } else if (outcomesSequence[li].endsWith(NameFinderME.OTHER)) { + } else if (outcomesSequence[li].endsWith(BioCodec.OTHER)) { return false; - } else if (outcomesSequence[li].endsWith(NameFinderME.CONTINUE) || - outcomesSequence[li].endsWith(NameFinderME.START)) { + } else if (outcomesSequence[li].endsWith(BioCodec.CONTINUE) || + outcomesSequence[li].endsWith(BioCodec.START)) { // if it is continue or start, we have to check if previous match was of the same type String previousNameType = NameFinderME.extractNameType(outcomesSequence[li]); String nameType = NameFinderME.extractNameType(outcome);
[10/50] [abbrv] opennlp git commit: OpenNLP-981: Add training stream hash to AbstractEventTrainer. This closes #118.
OpenNLP-981: Add training stream hash to AbstractEventTrainer. This closes #118. Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/cd23b58a Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/cd23b58a Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/cd23b58a Branch: refs/heads/parser_regression Commit: cd23b58a3c04053d8c6cafa761aa0fc533774304 Parents: daa9fca Author: Daniel RussAuthored: Thu Feb 9 09:56:12 2017 -0500 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:52 2017 +0200 -- .../opennlp/tools/ml/AbstractEventTrainer.java | 1 + .../ml/perceptron/PerceptronPrepAttachTest.java| 17 + 2 files changed, 18 insertions(+) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/cd23b58a/opennlp-tools/src/main/java/opennlp/tools/ml/AbstractEventTrainer.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/AbstractEventTrainer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/AbstractEventTrainer.java index c465f88..bb11aaa 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/AbstractEventTrainer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/AbstractEventTrainer.java @@ -88,6 +88,7 @@ public abstract class AbstractEventTrainer extends AbstractTrainer implements Ev HashSumEventStream hses = new HashSumEventStream(events); DataIndexer indexer = getDataIndexer(hses); +addToReport("Training-Eventhash", hses.calculateHashSum().toString(16)); return train(indexer); } } http://git-wip-us.apache.org/repos/asf/opennlp/blob/cd23b58a/opennlp-tools/src/test/java/opennlp/tools/ml/perceptron/PerceptronPrepAttachTest.java -- diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/perceptron/PerceptronPrepAttachTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/perceptron/PerceptronPrepAttachTest.java index d4d70ca..eda49f8 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/ml/perceptron/PerceptronPrepAttachTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/ml/perceptron/PerceptronPrepAttachTest.java @@ -23,6 +23,7 @@ import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.util.HashMap; +import java.util.Map; import org.junit.Assert; import org.junit.Test; @@ -134,4 +135,20 @@ public class PerceptronPrepAttachTest { Assert.assertEquals(modelA, modelB); Assert.assertEquals(modelA.hashCode(), modelB.hashCode()); } + + @Test + public void verifyReportMap() throws IOException { +TrainingParameters trainParams = new TrainingParameters(); +trainParams.put(AbstractTrainer.ALGORITHM_PARAM, PerceptronTrainer.PERCEPTRON_VALUE); +trainParams.put(AbstractTrainer.CUTOFF_PARAM, Integer.toString(1)); +// Since we are verifying the report map, we don't need to have more than 1 iteration +trainParams.put(AbstractTrainer.ITERATIONS_PARAM, Integer.toString(1)); +trainParams.put("UseSkippedAveraging", Boolean.toString(true)); + +Map reportMap = new HashMap<>(); +EventTrainer trainer = TrainerFactory.getEventTrainer(trainParams, reportMap); +trainer.train(PrepAttachDataUtil.createTrainingStream()); +Assert.assertTrue("Report Map does not contain the training event hash", +reportMap.containsKey("Training-Eventhash")); + } }
[23/50] [abbrv] opennlp git commit: OPENNLP-994: Remove deprecated methods from the Document Categorizer, this closes apache/opennlp#133
OPENNLP-994: Remove deprecated methods from the Document Categorizer, this closes apache/opennlp#133 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/76609f5c Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/76609f5c Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/76609f5c Branch: refs/heads/parser_regression Commit: 76609f5c105bcfc3abab6e2d19de283d945c96a6 Parents: 81acc6e Author: smarthiAuthored: Mon Feb 27 17:23:40 2017 -0500 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:54 2017 +0200 -- .../doccat/DoccatCrossValidatorTool.java| 7 +- .../tools/cmdline/doccat/DoccatTool.java| 11 +- .../tools/cmdline/doccat/DoccatTrainerTool.java | 5 +- .../opennlp/tools/doccat/DoccatFactory.java | 93 + .../tools/doccat/DocumentCategorizer.java | 54 ++ .../doccat/DocumentCategorizerEvaluator.java| 2 +- .../tools/doccat/DocumentCategorizerME.java | 101 ++- .../opennlp/tools/doccat/DocumentSample.java| 6 -- .../formats/LeipzigDoccatSampleStream.java | 19 ++-- .../tools/doccat/DocumentCategorizerMETest.java | 18 ++-- .../tools/doccat/DocumentCategorizerNBTest.java | 17 ++-- .../tools/doccat/DocumentSampleTest.java| 4 +- .../doccat/AbstractDocumentCategorizer.java | 29 +++--- .../java/opennlp/uima/util/AnnotatorUtil.java | 6 +- 14 files changed, 66 insertions(+), 306 deletions(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/76609f5c/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java index f0f1712..a73aba7 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java @@ -36,7 +36,6 @@ import opennlp.tools.doccat.DoccatEvaluationMonitor; import opennlp.tools.doccat.DoccatFactory; import opennlp.tools.doccat.DocumentSample; import opennlp.tools.doccat.FeatureGenerator; -import opennlp.tools.tokenize.Tokenizer; import opennlp.tools.util.eval.EvaluationMonitor; import opennlp.tools.util.model.ModelUtil; @@ -84,16 +83,12 @@ public final class DoccatCrossValidatorTool extends FeatureGenerator[] featureGenerators = DoccatTrainerTool .createFeatureGenerators(params.getFeatureGenerators()); -Tokenizer tokenizer = DoccatTrainerTool.createTokenizer(params -.getTokenizer()); - DoccatEvaluationMonitor[] listenersArr = listeners .toArray(new DoccatEvaluationMonitor[listeners.size()]); DoccatCrossValidator validator; try { - DoccatFactory factory = DoccatFactory.create(params.getFactory(), - tokenizer, featureGenerators); + DoccatFactory factory = DoccatFactory.create(params.getFactory(), featureGenerators); validator = new DoccatCrossValidator(params.getLang(), mlParams, factory, listenersArr); http://git-wip-us.apache.org/repos/asf/opennlp/blob/76609f5c/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java index a01d354..49a640c 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java @@ -28,6 +28,7 @@ import opennlp.tools.cmdline.SystemInputStreamFactory; import opennlp.tools.doccat.DoccatModel; import opennlp.tools.doccat.DocumentCategorizerME; import opennlp.tools.doccat.DocumentSample; +import opennlp.tools.tokenize.WhitespaceTokenizer; import opennlp.tools.util.ObjectStream; import opennlp.tools.util.ParagraphStream; import opennlp.tools.util.PlainTextByLineStream; @@ -36,7 +37,7 @@ public class DoccatTool extends BasicCmdLineTool { @Override public String getShortDescription() { -return "learnable document categorizer"; +return "learned document categorizer"; } @Override @@ -53,7 +54,7 @@ public class DoccatTool extends BasicCmdLineTool { DoccatModel model = new DoccatModelLoader().load(new File(args[0])); - DocumentCategorizerME doccat = new DocumentCategorizerME(model); + DocumentCategorizerME documentCategorizerME = new DocumentCategorizerME(model); /* * moved
[07/50] [abbrv] opennlp git commit: [maven-release-plugin] prepare for next development iteration
[maven-release-plugin] prepare for next development iteration Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/f9db192d Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/f9db192d Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/f9db192d Branch: refs/heads/parser_regression Commit: f9db192d33138c9d5048a2349f06e70b13719ff6 Parents: 4b8ebad Author: smarthiAuthored: Wed Feb 1 11:03:42 2017 -0500 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:51 2017 +0200 -- opennlp-brat-annotator/pom.xml | 2 +- opennlp-distr/pom.xml| 2 +- opennlp-docs/pom.xml | 2 +- opennlp-morfologik-addon/pom.xml | 2 +- opennlp-tools/pom.xml| 2 +- opennlp-uima/pom.xml | 2 +- pom.xml | 4 ++-- 7 files changed, 8 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/f9db192d/opennlp-brat-annotator/pom.xml -- diff --git a/opennlp-brat-annotator/pom.xml b/opennlp-brat-annotator/pom.xml index 53517ca..1633deb 100644 --- a/opennlp-brat-annotator/pom.xml +++ b/opennlp-brat-annotator/pom.xml @@ -17,7 +17,7 @@ org.apache.opennlp opennlp - 1.7.2 + 1.7.3-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/opennlp/blob/f9db192d/opennlp-distr/pom.xml -- diff --git a/opennlp-distr/pom.xml b/opennlp-distr/pom.xml index ae86021..c0a57c3 100644 --- a/opennlp-distr/pom.xml +++ b/opennlp-distr/pom.xml @@ -24,7 +24,7 @@ org.apache.opennlp opennlp - 1.7.2 + 1.7.3-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/opennlp/blob/f9db192d/opennlp-docs/pom.xml -- diff --git a/opennlp-docs/pom.xml b/opennlp-docs/pom.xml index b765d91..fd2b0d1 100644 --- a/opennlp-docs/pom.xml +++ b/opennlp-docs/pom.xml @@ -24,7 +24,7 @@ org.apache.opennlp opennlp - 1.7.2 + 1.7.3-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/opennlp/blob/f9db192d/opennlp-morfologik-addon/pom.xml -- diff --git a/opennlp-morfologik-addon/pom.xml b/opennlp-morfologik-addon/pom.xml index 50844f2..1c384c7 100644 --- a/opennlp-morfologik-addon/pom.xml +++ b/opennlp-morfologik-addon/pom.xml @@ -24,7 +24,7 @@ org.apache.opennlp opennlp - 1.7.2 + 1.7.3-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/opennlp/blob/f9db192d/opennlp-tools/pom.xml -- diff --git a/opennlp-tools/pom.xml b/opennlp-tools/pom.xml index 6cdb688..22fc017 100644 --- a/opennlp-tools/pom.xml +++ b/opennlp-tools/pom.xml @@ -25,7 +25,7 @@ org.apache.opennlp opennlp -1.7.2 +1.7.3-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/opennlp/blob/f9db192d/opennlp-uima/pom.xml -- diff --git a/opennlp-uima/pom.xml b/opennlp-uima/pom.xml index 39f1040..070fec9 100644 --- a/opennlp-uima/pom.xml +++ b/opennlp-uima/pom.xml @@ -25,7 +25,7 @@ org.apache.opennlp opennlp - 1.7.2 + 1.7.3-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/opennlp/blob/f9db192d/pom.xml -- diff --git a/pom.xml b/pom.xml index bbb48c8..98acfb1 100644 --- a/pom.xml +++ b/pom.xml @@ -31,7 +31,7 @@ org.apache.opennlp opennlp - 1.7.2 + 1.7.3-SNAPSHOT pom Apache OpenNLP Reactor @@ -40,7 +40,7 @@ scm:git:g...@github.com:apache/opennlp.git scm:git:https://git-wip-us.apache.org/repos/asf/opennlp.git https://git-wip-us.apache.org/repos/asf?p=opennlp.git - opennlp-1.7.2 + HEAD
[17/50] [abbrv] opennlp git commit: OPENNLP-990 Fix all array style violations and add a checkstyle rule
OPENNLP-990 Fix all array style violations and add a checkstyle rule This closes #127 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/fdff127b Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/fdff127b Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/fdff127b Branch: refs/heads/parser_regression Commit: fdff127b38dafb2bbb8df186385bfdb8abc0e9d1 Parents: 1cd2658 Author: Peter ThygesenAuthored: Thu Feb 16 12:48:12 2017 +0100 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:53 2017 +0200 -- checkstyle.xml | 1 + .../opennlp/bratann/NameFinderAnnService.java | 2 +- .../opennlp/bratann/NameFinderResource.java | 6 ++--- .../java/opennlp/morfologik/cmdline/CLI.java| 2 +- .../chunker/ChunkSampleSequenceStream.java | 4 +-- .../java/opennlp/tools/chunker/Chunker.java | 4 +-- .../tools/cmdline/AbstractConverterTool.java| 2 +- .../opennlp/tools/cmdline/ArgumentParser.java | 16 +-- .../opennlp/tools/cmdline/BasicCmdLineTool.java | 2 +- .../main/java/opennlp/tools/cmdline/CLI.java| 2 +- .../java/opennlp/tools/cmdline/CmdLineUtil.java | 10 +++ .../tools/cmdline/EvaluationErrorPrinter.java | 10 +++ .../cmdline/FineGrainedReportListener.java | 2 +- .../tools/cmdline/ObjectStreamFactory.java | 2 +- .../opennlp/tools/cmdline/TypedCmdLineTool.java | 2 +- .../tools/cmdline/doccat/DoccatTool.java| 2 +- .../cmdline/entitylinker/EntityLinkerTool.java | 2 +- .../TokenNameFinderCrossValidatorTool.java | 4 +-- .../namefind/TokenNameFinderEvaluatorTool.java | 2 +- .../cmdline/namefind/TokenNameFinderTool.java | 6 ++--- .../namefind/TokenNameFinderTrainerTool.java| 8 +++--- .../tools/cmdline/postag/POSTaggerTool.java | 2 +- .../tokenizer/DictionaryDetokenizerTool.java| 2 +- .../opennlp/tools/dictionary/Dictionary.java| 2 +- .../tools/doccat/DocumentCategorizer.java | 6 ++--- .../DocumentCategorizerContextGenerator.java| 2 +- .../doccat/DocumentCategorizerEvaluator.java| 4 +-- .../tools/doccat/DocumentCategorizerME.java | 4 +-- .../opennlp/tools/doccat/DocumentSample.java| 4 +-- .../tools/doccat/DocumentSampleStream.java | 4 +-- .../formats/BioNLP2004NameSampleStream.java | 2 +- .../tools/formats/Conll02NameSampleStream.java | 2 +- .../tools/formats/Conll03NameSampleStream.java | 2 +- .../tools/formats/ConllXPOSSampleStream.java| 2 +- .../tools/formats/DirectorySampleStream.java| 4 +-- .../tools/formats/EvalitaNameSampleStream.java | 2 +- .../formats/LeipzigDoccatSampleStream.java | 2 +- .../LeipzigDocumentSampleStreamFactory.java | 4 +-- .../formats/brat/BratAnnotationStream.java | 10 +++ .../tools/formats/brat/BratDocument.java| 2 +- .../formats/brat/BratNameSampleStream.java | 2 +- .../convert/FileToByteArraySampleStream.java| 2 +- .../convert/FileToStringSampleStream.java | 2 +- .../formats/muc/MucNameContentHandler.java | 2 +- .../ontonotes/OntoNotesNameSampleStream.java| 2 +- .../lemmatizer/LemmaSampleSequenceStream.java | 6 ++--- .../opennlp/tools/lemmatizer/Lemmatizer.java| 2 +- .../main/java/opennlp/tools/ml/BeamSearch.java | 4 +-- .../java/opennlp/tools/ml/maxent/GISModel.java | 2 +- .../opennlp/tools/ml/model/MaxentModel.java | 2 +- .../SimplePerceptronSequenceTrainer.java| 4 +-- .../java/opennlp/tools/namefind/BioCodec.java | 2 +- .../namefind/DefaultNameContextGenerator.java | 4 +-- .../tools/namefind/DictionaryNameFinder.java| 2 +- .../tools/namefind/NameFinderEventStream.java | 2 +- .../opennlp/tools/namefind/NameFinderME.java| 2 +- .../java/opennlp/tools/namefind/NameSample.java | 2 +- .../namefind/NameSampleSequenceStream.java | 4 +-- .../opennlp/tools/namefind/RegexNameFinder.java | 8 +++--- .../opennlp/tools/namefind/TokenNameFinder.java | 2 +- .../namefind/TokenNameFinderCrossValidator.java | 4 +-- .../namefind/TokenNameFinderEvaluator.java | 4 +-- .../tools/namefind/TokenNameFinderFactory.java | 2 +- .../tools/namefind/TokenNameFinderModel.java| 2 +- .../tools/parser/AbstractBottomUpParser.java| 6 ++--- .../tools/parser/ChunkContextGenerator.java | 2 +- .../tools/parser/ParserChunkerFactory.java | 2 +- .../parser/ParserChunkerSequenceValidator.java | 2 +- .../opennlp/tools/parser/PosSampleStream.java | 4 +-- .../opennlp/tools/postag/POSDictionary.java | 2 +- .../java/opennlp/tools/postag/POSEvaluator.java | 4 +-- .../java/opennlp/tools/postag/POSSample.java| 10 +++ .../tools/postag/POSSampleEventStream.java | 6 ++--- .../tools/postag/POSSampleSequenceStream.java | 4 +--
[11/50] [abbrv] opennlp git commit: OPENNLP-964: Ignore LICENSE, NOTICE and README files in the model
OPENNLP-964: Ignore LICENSE, NOTICE and README files in the model Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/b41fcd69 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/b41fcd69 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/b41fcd69 Branch: refs/heads/parser_regression Commit: b41fcd69baef80ed1e99656e9a3b7424aa294bb8 Parents: a2049d6 Author: Jörn KottmannAuthored: Thu Feb 2 19:13:02 2017 +0100 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:52 2017 +0200 -- .../tagdict/MorfologikPOSTaggerFactory.java | 15 +-- .../tools/namefind/TokenNameFinderModel.java| 14 +- .../opennlp/tools/util/model/BaseModel.java | 2 + .../tools/util/model/ByteArraySerializer.java | 33 ++ .../util/model/ByteArraySerializerTest.java | 45 5 files changed, 82 insertions(+), 27 deletions(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/b41fcd69/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java -- diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java index 370b4d0..592ef7d 100644 --- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java +++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java @@ -22,7 +22,6 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; -import java.io.OutputStream; import java.nio.file.Files; import java.nio.file.Path; import java.util.Map; @@ -33,7 +32,7 @@ import opennlp.tools.dictionary.Dictionary; import opennlp.tools.postag.POSTaggerFactory; import opennlp.tools.postag.TagDictionary; import opennlp.tools.util.model.ArtifactSerializer; -import opennlp.tools.util.model.ModelUtil; +import opennlp.tools.util.model.ByteArraySerializer; public class MorfologikPOSTaggerFactory extends POSTaggerFactory { @@ -150,16 +149,4 @@ public class MorfologikPOSTaggerFactory extends POSTaggerFactory { info)); return new MorfologikTagDictionary(dict); } - - static class ByteArraySerializer implements ArtifactSerializer { - -public byte[] create(InputStream in) throws IOException { - return ModelUtil.read(in); -} - -public void serialize(byte[] artifact, OutputStream out) throws IOException { - out.write(artifact); -} - } - } http://git-wip-us.apache.org/repos/asf/opennlp/blob/b41fcd69/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java index 05a3615..09eefc5 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java @@ -21,7 +21,6 @@ package opennlp.tools.namefind; import java.io.File; import java.io.IOException; import java.io.InputStream; -import java.io.OutputStream; import java.net.URL; import java.util.Map; import java.util.Properties; @@ -36,7 +35,7 @@ import opennlp.tools.util.featuregen.BrownCluster; import opennlp.tools.util.featuregen.WordClusterDictionary; import opennlp.tools.util.model.ArtifactSerializer; import opennlp.tools.util.model.BaseModel; -import opennlp.tools.util.model.ModelUtil; +import opennlp.tools.util.model.ByteArraySerializer; /** * The {@link TokenNameFinderModel} is the model used @@ -53,17 +52,6 @@ public class TokenNameFinderModel extends BaseModel { } } - private static class ByteArraySerializer implements ArtifactSerializer { - -public byte[] create(InputStream in) throws IOException { - return ModelUtil.read(in); -} - -public void serialize(byte[] artifact, OutputStream out) throws IOException { - out.write(artifact); -} - } - private static final String COMPONENT_NAME = "NameFinderME"; private static final String MAXENT_MODEL_ENTRY_NAME = "nameFinder.model"; http://git-wip-us.apache.org/repos/asf/opennlp/blob/b41fcd69/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java
[28/50] [abbrv] opennlp git commit: OPENNLP-229: Add test for NameFinderSequenceValidator
OPENNLP-229: Add test for NameFinderSequenceValidator This closes #125 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/40cdacb5 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/40cdacb5 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/40cdacb5 Branch: refs/heads/parser_regression Commit: 40cdacb55583cf70d7e47b26fc0108fa71f3ab51 Parents: ebb5b24 Author: Peter ThygesenAuthored: Wed Feb 15 21:12:48 2017 +0100 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:54 2017 +0200 -- .../NameFinderSequenceValidatorTest.java| 186 +++ 1 file changed, 186 insertions(+) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/40cdacb5/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderSequenceValidatorTest.java -- diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderSequenceValidatorTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderSequenceValidatorTest.java new file mode 100644 index 000..35752c1 --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderSequenceValidatorTest.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package opennlp.tools.namefind; + +import org.junit.Assert; +import org.junit.Ignore; +import org.junit.Test; + +/** + * This is the test class for {@link NameFinderSequenceValidator}.. + */ +public class NameFinderSequenceValidatorTest { + + private static NameFinderSequenceValidator validator = new NameFinderSequenceValidator(); + private static String START_A = "TypeA-" + NameFinderME.START; + private static String CONTINUE_A = "TypeA-" + NameFinderME.CONTINUE; + private static String START_B = "TypeB-" + NameFinderME.START; + private static String CONTINUE_B = "TypeB-" + NameFinderME.CONTINUE; + private static String OTHER = NameFinderME.OTHER; + + @Test + public void testContinueCannotBeFirstOutcome() { + +final String outcome = CONTINUE_A; + +String[] inputSequence = new String[] {"PersonA", "is", "here"}; +String[] outcomesSequence = new String[] {}; +Assert.assertFalse(validator.validSequence(0, inputSequence, outcomesSequence, outcome)); + + } + + @Test + public void testContinueAfterStartAndSameType() { + +final String outcome = CONTINUE_A; + +// previous start, same name type +String[] inputSequence = new String[] {"Stefanie", "Schmidt", "is", "German"}; +String[] outcomesSequence = new String[] {START_A}; +Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome)); + + } + + @Ignore + @Test + public void testContinueAfterStartAndNotSameType() { + +final String outcome = CONTINUE_B; + +// previous start, not same name type +String[] inputSequence = new String[] {"PersonA", "LocationA", "something"}; +String[] outcomesSequence = new String[] {START_A}; +Assert.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome)); + } + + @Test + public void testContinueAfterContinueAndSameType() { + +final String outcome = CONTINUE_A; + +// previous continue, same name type +String[] inputSequence = new String[] {"FirstName", "MidleName", "LastName", "is", "a", "long", "name"}; +String[] outcomesSequence = new String[] {START_A, CONTINUE_A}; +Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome)); + } + + @Test + public void testContinueAfterContinueAndNotSameType() { + +final String outcome = CONTINUE_B; + +// previous continue, not same name type +String[] inputSequence = new String[] {"FirstName", "LastName", "LocationA", "something"}; +String[] outcomesSequence = new String[] {START_A, CONTINUE_A}; +Assert.assertFalse(validator.validSequence(2, inputSequence, outcomesSequence, outcome)); + } + + @Test + public void testContinueAfterOther() { + +final
[05/50] [abbrv] opennlp git commit: OpenNLP-977: Remove deprecated map methods
OpenNLP-977: Remove deprecated map methods Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/51cd8091 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/51cd8091 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/51cd8091 Branch: refs/heads/parser_regression Commit: 51cd80914b9cddb5771232929b1e9326cecc6170 Parents: 212cf14 Author: Daniel RussAuthored: Mon Feb 6 13:39:59 2017 -0500 Committer: Jörn Kottmann Committed: Sun Apr 16 19:24:51 2017 +0200 -- .../java/opennlp/tools/chunker/ChunkerME.java | 6 +- .../java/opennlp/tools/cmdline/CmdLineUtil.java | 4 +- .../tools/cmdline/parser/ParserTrainerTool.java | 10 +-- .../cmdline/postag/POSTaggerTrainerTool.java| 2 +- .../sentdetect/SentenceDetectorTrainerTool.java | 2 +- .../cmdline/tokenizer/TokenizerTrainerTool.java | 4 +- .../tools/doccat/DocumentCategorizerME.java | 2 +- .../opennlp/tools/lemmatizer/LemmatizerME.java | 8 +- .../tools/ml/EventModelSequenceTrainer.java | 3 + .../java/opennlp/tools/ml/EventTrainer.java | 3 + .../java/opennlp/tools/ml/SequenceTrainer.java | 3 + .../java/opennlp/tools/ml/TrainerFactory.java | 83 +--- .../opennlp/tools/namefind/NameFinderME.java| 8 +- .../opennlp/tools/parser/chunking/Parser.java | 4 +- .../opennlp/tools/parser/treeinsert/Parser.java | 6 +- .../java/opennlp/tools/postag/POSTaggerME.java | 8 +- .../tools/sentdetect/SentenceDetectorME.java| 2 +- .../opennlp/tools/tokenize/TokenizerME.java | 2 +- .../java/opennlp/tools/ml/MockEventTrainer.java | 6 ++ .../opennlp/tools/ml/MockSequenceTrainer.java | 6 ++ .../opennlp/tools/ml/TrainerFactoryTest.java| 12 +-- .../tools/ml/maxent/GISIndexingTest.java| 4 +- .../tools/ml/maxent/MaxentPrepAttachTest.java | 5 +- .../ml/maxent/quasinewton/QNPrepAttachTest.java | 11 ++- .../ml/naivebayes/NaiveBayesPrepAttachTest.java | 5 +- .../ml/perceptron/PerceptronPrepAttachTest.java | 11 ++- .../java/opennlp/uima/util/OpennlpUtil.java | 4 +- 27 files changed, 114 insertions(+), 110 deletions(-) -- http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java index a59b5ce..71917fb 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java @@ -171,7 +171,7 @@ public class ChunkerME implements Chunker { Map manifestInfoEntries = new HashMap<>(); -TrainerType trainerType = TrainerFactory.getTrainerType(mlParams.getSettings()); +TrainerType trainerType = TrainerFactory.getTrainerType(mlParams); MaxentModel chunkerModel = null; @@ -179,13 +179,13 @@ public class ChunkerME implements Chunker { if (TrainerType.EVENT_MODEL_TRAINER.equals(trainerType)) { ObjectStream es = new ChunkerEventStream(in, factory.getContextGenerator()); - EventTrainer trainer = TrainerFactory.getEventTrainer(mlParams.getSettings(), + EventTrainer trainer = TrainerFactory.getEventTrainer(mlParams, manifestInfoEntries); chunkerModel = trainer.train(es); } else if (TrainerType.SEQUENCE_TRAINER.equals(trainerType)) { SequenceTrainer trainer = TrainerFactory.getSequenceModelTrainer( - mlParams.getSettings(), manifestInfoEntries); + mlParams, manifestInfoEntries); // TODO: This will probably cause issue, since the feature generator uses the outcomes array http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java -- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java index 6855898..7ea2a0b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java @@ -324,11 +324,11 @@ public final class CmdLineUtil { throw new TerminateToolException(-1, "Error during parameters loading: " + e.getMessage(), e); } - if (!TrainerFactory.isValid(params.getSettings())) { + if (!TrainerFactory.isValid(params)) { throw new TerminateToolException(1, "Training parameters file '" + paramFile + "' is invalid!"); } - TrainerFactory.TrainerType trainerType =