This is an automated email from the ASF dual-hosted git repository. mawiesne pushed a commit to branch OPENNLP-1229_stem_function_giving_wrong_output in repository https://gitbox.apache.org/repos/asf/opennlp.git
commit e36ef2ee60e22935298576aa1b2c1a40bc9a9dc7 Author: Martin Wiesner <[email protected]> AuthorDate: Sun Feb 26 16:34:01 2023 +0100 OPENNLP-1229 PorterStemmer stem function giving wrong output - adds unit test to verify "this" is stemmed to "thi", as expected for `PorterStemmer` - adds unit test to verify "this" is stemmed to "this" for `SnowballStemmer` - adds additional ParameterizedTest in PorterStemmerTest - improves JavaDoc along the path --- .../java/opennlp/tools/stemmer/PorterStemmer.java | 12 ++++---- .../opennlp/tools/stemmer/PorterStemmerTest.java | 35 +++++++++++++++------- .../opennlp/tools/stemmer/SnowballStemmerTest.java | 6 ++++ 3 files changed, 37 insertions(+), 16 deletions(-) diff --git a/opennlp-tools/src/main/java/opennlp/tools/stemmer/PorterStemmer.java b/opennlp-tools/src/main/java/opennlp/tools/stemmer/PorterStemmer.java index a9e6f18b..65cdbdb5 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/stemmer/PorterStemmer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/stemmer/PorterStemmer.java @@ -44,12 +44,12 @@ package opennlp.tools.stemmer; /** - * - * Stemmer, implementing the Porter Stemming Algorithm - * - * The Stemmer class transforms a word into its root form. The input - * word can be provided a character at time (by calling add()), or at once - * by calling one of the various stem(something) methods. + * A {@link Stemmer}, implementing the <a href="https://tartarus.org/martin/PorterStemmer/"> + * Porter Stemming Algorithm</a> + * <p> + * The Stemmer implementation transforms a word into its root form. The input + * word can be provided a character at time (by calling {@link #add(char)}), + * or at once by calling one of the various {@code stem(..)} methods. */ // CHECKSTYLE:OFF public class PorterStemmer implements Stemmer { diff --git a/opennlp-tools/src/test/java/opennlp/tools/stemmer/PorterStemmerTest.java b/opennlp-tools/src/test/java/opennlp/tools/stemmer/PorterStemmerTest.java index f43c66b6..aab212f4 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/stemmer/PorterStemmerTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/stemmer/PorterStemmerTest.java @@ -18,23 +18,38 @@ package opennlp.tools.stemmer; import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; public class PorterStemmerTest { - private PorterStemmer stemmer = new PorterStemmer(); + private PorterStemmer stemmer; - @Test - void testNotNull() { - Assertions.assertNotNull(stemmer); + @BeforeEach + public void setup() { + stemmer = new PorterStemmer(); } @Test - void testStemming() { - Assertions.assertEquals(stemmer.stem("deny"), "deni"); - Assertions.assertEquals(stemmer.stem("declining"), "declin"); - Assertions.assertEquals(stemmer.stem("diversity"), "divers"); - Assertions.assertEquals(stemmer.stem("divers"), "diver"); - Assertions.assertEquals(stemmer.stem("dental"), "dental"); + void testStem() { + Assertions.assertEquals("deni", stemmer.stem("deny")); + Assertions.assertEquals("declin", stemmer.stem("declining")); + Assertions.assertEquals("divers", stemmer.stem("diversity")); + Assertions.assertEquals("diver", stemmer.stem("divers")); + Assertions.assertEquals("dental", stemmer.stem("dental")); + } + + @ParameterizedTest + @ValueSource(strings = {"likes", "liked", "likely", "liking"}) + void testStemLike(String input) { + Assertions.assertEquals("like", stemmer.stem(input)); + } + + + @Test // Context: OpenNLP-1229 - This is here to demonstrate & verify. + void testStemThis() { + Assertions.assertEquals("thi", stemmer.stem("this")); } } diff --git a/opennlp-tools/src/test/java/opennlp/tools/stemmer/SnowballStemmerTest.java b/opennlp-tools/src/test/java/opennlp/tools/stemmer/SnowballStemmerTest.java index 54335417..3c33fde3 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/stemmer/SnowballStemmerTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/stemmer/SnowballStemmerTest.java @@ -69,6 +69,12 @@ public class SnowballStemmerTest { } + @Test // Context: OpenNLP-1229 - This is here to demonstrate & verify. + void testStemThis() { + SnowballStemmer stemmer = new SnowballStemmer(ALGORITHM.ENGLISH); + Assertions.assertEquals("this", stemmer.stem("this")); + } + @Test void testFinnish() { SnowballStemmer stemmer = new SnowballStemmer(ALGORITHM.FINNISH);
