OPENNLP-1056: Fix NullPointerException in DictionaryLemmatizer
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/cb6ee2cb Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/cb6ee2cb Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/cb6ee2cb Branch: refs/heads/LangDetect Commit: cb6ee2cbdeadad5d277a6e7293d88bb915090c4f Parents: 60792b8 Author: Daniel Russ <dr...@mail.nih.gov> Authored: Thu May 11 10:56:58 2017 -0400 Committer: Daniel Russ <dr...@mail.nih.gov> Committed: Thu May 11 11:06:49 2017 -0400 ---------------------------------------------------------------------- .../tools/lemmatizer/DictionaryLemmatizer.java | 2 +- .../lemmatizer/DictionaryLemmatizerTest.java | 49 ++++++++++++++++++++ .../tools/lemmatizer/smalldictionary.dict | 5 ++ 3 files changed, 55 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/opennlp/blob/cb6ee2cb/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java index 9f0b0b0..37d488c 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java @@ -114,7 +114,7 @@ public class DictionaryLemmatizer implements Lemmatizer { final List<String> keys = this.getDictKeys(word, postag); // lookup lemma as value of the map final List<String> keyValues = this.dictMap.get(keys); - if (!keyValues.isEmpty()) { + if ( keyValues != null && !keyValues.isEmpty()) { lemma = keyValues.get(0); } else { lemma = "O"; http://git-wip-us.apache.org/repos/asf/opennlp/blob/cb6ee2cb/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/DictionaryLemmatizerTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/DictionaryLemmatizerTest.java b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/DictionaryLemmatizerTest.java new file mode 100644 index 0000000..6cf72cf --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/DictionaryLemmatizerTest.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.lemmatizer; + +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +public class DictionaryLemmatizerTest { + + private static DictionaryLemmatizer dictionaryLemmatizer; + + @BeforeClass + public static void loadDictionary() throws Exception { + dictionaryLemmatizer = new DictionaryLemmatizer( + DictionaryLemmatizerTest.class.getResourceAsStream("/opennlp/tools/lemmatizer/smalldictionary.dict") + ); + } + + @Test + public void testForNullPointerException() { + String[] sentence = new String[]{"The","dogs","were","running","and","barking","down","the","street"}; + String[] sentencePOS = new String[]{"DT","NNS","VBD","VBG","CC","VBG","RP","DT","NN"}; + String[] expectedLemma = new String[]{"the","dog","is","run","and","bark","down","the","street"}; + + String[] actualLemma = dictionaryLemmatizer.lemmatize(sentence, sentencePOS); + + for (int i = 0;i < sentence.length;i++) { + // don't compare cases where the word is not in the dictionary... + if (!actualLemma[i].equals("O")) Assert.assertEquals(expectedLemma[i], actualLemma[i]); + } + } + +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/cb6ee2cb/opennlp-tools/src/test/resources/opennlp/tools/lemmatizer/smalldictionary.dict ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/resources/opennlp/tools/lemmatizer/smalldictionary.dict b/opennlp-tools/src/test/resources/opennlp/tools/lemmatizer/smalldictionary.dict new file mode 100644 index 0000000..edeb7a0 --- /dev/null +++ b/opennlp-tools/src/test/resources/opennlp/tools/lemmatizer/smalldictionary.dict @@ -0,0 +1,5 @@ +barking VBG bark +dogs NNS dog +running VBG run +down RP down +street NN street \ No newline at end of file