This is an automated email from the ASF dual-hosted git repository.
rzo1 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/main by this push:
new 3c6670b9 OPENNLP-1229 PorterStemmer stem function giving wrong output
3c6670b9 is described below
commit 3c6670b948be1b7c44a2909a14790e0522e4c545
Author: Martin Wiesner <[email protected]>
AuthorDate: Sun Feb 26 16:34:01 2023 +0100
OPENNLP-1229 PorterStemmer stem function giving wrong output
- adds unit test to verify "this" is stemmed to "thi", as expected for
`PorterStemmer`
- adds unit test to verify "this" is stemmed to "this" for `SnowballStemmer`
- adds additional ParameterizedTest in PorterStemmerTest
- improves JavaDoc along the path
---
.../java/opennlp/tools/stemmer/PorterStemmer.java | 12 ++++----
.../opennlp/tools/stemmer/PorterStemmerTest.java | 35 +++++++++++++++-------
.../opennlp/tools/stemmer/SnowballStemmerTest.java | 6 ++++
3 files changed, 37 insertions(+), 16 deletions(-)
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/stemmer/PorterStemmer.java
b/opennlp-tools/src/main/java/opennlp/tools/stemmer/PorterStemmer.java
index a9e6f18b..65cdbdb5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/stemmer/PorterStemmer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/stemmer/PorterStemmer.java
@@ -44,12 +44,12 @@
package opennlp.tools.stemmer;
/**
- *
- * Stemmer, implementing the Porter Stemming Algorithm
- *
- * The Stemmer class transforms a word into its root form. The input
- * word can be provided a character at time (by calling add()), or at once
- * by calling one of the various stem(something) methods.
+ * A {@link Stemmer}, implementing the <a
href="https://tartarus.org/martin/PorterStemmer/">
+ * Porter Stemming Algorithm</a>
+ * <p>
+ * The Stemmer implementation transforms a word into its root form. The input
+ * word can be provided a character at time (by calling {@link #add(char)}),
+ * or at once by calling one of the various {@code stem(..)} methods.
*/
// CHECKSTYLE:OFF
public class PorterStemmer implements Stemmer {
diff --git
a/opennlp-tools/src/test/java/opennlp/tools/stemmer/PorterStemmerTest.java
b/opennlp-tools/src/test/java/opennlp/tools/stemmer/PorterStemmerTest.java
index f43c66b6..aab212f4 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/stemmer/PorterStemmerTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/stemmer/PorterStemmerTest.java
@@ -18,23 +18,38 @@
package opennlp.tools.stemmer;
import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
public class PorterStemmerTest {
- private PorterStemmer stemmer = new PorterStemmer();
+ private PorterStemmer stemmer;
- @Test
- void testNotNull() {
- Assertions.assertNotNull(stemmer);
+ @BeforeEach
+ public void setup() {
+ stemmer = new PorterStemmer();
}
@Test
- void testStemming() {
- Assertions.assertEquals(stemmer.stem("deny"), "deni");
- Assertions.assertEquals(stemmer.stem("declining"), "declin");
- Assertions.assertEquals(stemmer.stem("diversity"), "divers");
- Assertions.assertEquals(stemmer.stem("divers"), "diver");
- Assertions.assertEquals(stemmer.stem("dental"), "dental");
+ void testStem() {
+ Assertions.assertEquals("deni", stemmer.stem("deny"));
+ Assertions.assertEquals("declin", stemmer.stem("declining"));
+ Assertions.assertEquals("divers", stemmer.stem("diversity"));
+ Assertions.assertEquals("diver", stemmer.stem("divers"));
+ Assertions.assertEquals("dental", stemmer.stem("dental"));
+ }
+
+ @ParameterizedTest
+ @ValueSource(strings = {"likes", "liked", "likely", "liking"})
+ void testStemLike(String input) {
+ Assertions.assertEquals("like", stemmer.stem(input));
+ }
+
+
+ @Test // Context: OpenNLP-1229 - This is here to demonstrate & verify.
+ void testStemThis() {
+ Assertions.assertEquals("thi", stemmer.stem("this"));
}
}
diff --git
a/opennlp-tools/src/test/java/opennlp/tools/stemmer/SnowballStemmerTest.java
b/opennlp-tools/src/test/java/opennlp/tools/stemmer/SnowballStemmerTest.java
index 54335417..3c33fde3 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/stemmer/SnowballStemmerTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/stemmer/SnowballStemmerTest.java
@@ -69,6 +69,12 @@ public class SnowballStemmerTest {
}
+ @Test // Context: OpenNLP-1229 - This is here to demonstrate & verify.
+ void testStemThis() {
+ SnowballStemmer stemmer = new SnowballStemmer(ALGORITHM.ENGLISH);
+ Assertions.assertEquals("this", stemmer.stem("this"));
+ }
+
@Test
void testFinnish() {
SnowballStemmer stemmer = new SnowballStemmer(ALGORITHM.FINNISH);