This is an automated email from the ASF dual-hosted git repository.
mawiesne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git
The following commit(s) were added to refs/heads/master by this push:
new 78af085 OPENNLP-1593 Provide tests for all Summarizer implementations
(#156)
78af085 is described below
commit 78af0852513152663bb471cc27e564b4781a0759
Author: Martin Wiesner <[email protected]>
AuthorDate: Thu Jul 11 11:02:37 2024 +0200
OPENNLP-1593 Provide tests for all Summarizer implementations (#156)
- provides JUnit tests for LexicalChainingSummarizer and TextRankSummarizer
- improves the existing code so that summarization works, fixes a bug in
LexicalChainingSummarizer#summarize
- improves the code quality
- adds JavaDoc where possible
---
.../lexicalchaining/LexicalChainingSummarizer.java | 10 ++--
.../opennlp/summarization/meta/MetaSummarizer.java | 5 +-
.../summarization/textrank/TextRankSummarizer.java | 19 +++++---
...arizerTest.java => AbstractSummarizerTest.java} | 29 +++++-------
.../LexicalChainingSummarizerTest.java | 43 +++++++++++++++++
.../summarization/meta/MetaSummarizerTest.java | 55 ++++------------------
.../textrank/TextRankSummarizerTest.java | 42 +++++++++++++++++
7 files changed, 130 insertions(+), 73 deletions(-)
diff --git
a/summarizer/src/main/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizer.java
b/summarizer/src/main/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizer.java
index 513fb54..f243d69 100755
---
a/summarizer/src/main/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizer.java
+++
b/summarizer/src/main/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizer.java
@@ -34,6 +34,9 @@ import opennlp.summarization.Summarizer;
* that share a word that are very closely related. Thus, the longest chain
represents the most important
* topic and so forth. A summary can then be formed by identifying the most
important lexical chains
* and "pulling" out sentences from them.
+ *
+ * @see LexicalChain
+ * @see Summarizer
*/
public class LexicalChainingSummarizer implements Summarizer {
@@ -186,11 +189,12 @@ public class LexicalChainingSummarizer implements
Summarizer {
List<Sentence> summ = new ArrayList<>();
StringBuilder sb = new StringBuilder();
for (int i = 0; i < lc.size(); i++) {
- for (int j = 0; j < lc.size(); j++) {
- Sentence candidate = lc.get(i).sentences.get(j);
+ LexicalChain chain = lc.get(i);
+ for (int j = 0; j < chain.sentences.size(); j++) {
+ Sentence candidate = chain.sentences.get(j);
if (!summ.contains(candidate)) {
summ.add(candidate);
- sb.append(candidate.getStringVal());
+ sb.append(candidate.getStringVal()).append(" ");
summSize += candidate.getWordCnt();
break;
}
diff --git
a/summarizer/src/main/java/opennlp/summarization/meta/MetaSummarizer.java
b/summarizer/src/main/java/opennlp/summarization/meta/MetaSummarizer.java
index bed3c2d..7fa1155 100644
--- a/summarizer/src/main/java/opennlp/summarization/meta/MetaSummarizer.java
+++ b/summarizer/src/main/java/opennlp/summarization/meta/MetaSummarizer.java
@@ -35,8 +35,11 @@ import opennlp.summarization.DocProcessor;
/**
* A summarizer that combines results from the text rank algorithm and the
lexical chaining algorithm.
- * It runs both algorithm and uses the lexical chains to identify the main
topics and relative importance
+ * It runs both algorithms and uses the lexical chains to identify the main
topics and relative importance
* and the text rank to pick sentences from lexical chains.
+ *
+ * @see TextRankSummarizer
+ * @see LexicalChainingSummarizer
*/
public class MetaSummarizer implements Summarizer {
diff --git
a/summarizer/src/main/java/opennlp/summarization/textrank/TextRankSummarizer.java
b/summarizer/src/main/java/opennlp/summarization/textrank/TextRankSummarizer.java
index 47c2514..765bb94 100755
---
a/summarizer/src/main/java/opennlp/summarization/textrank/TextRankSummarizer.java
+++
b/summarizer/src/main/java/opennlp/summarization/textrank/TextRankSummarizer.java
@@ -27,10 +27,18 @@ import java.util.ArrayList;
import java.util.Hashtable;
import java.util.List;
-/*
- * A wrapper around the text rank algorithm. This class
- * a) Sets up the data for the TextRank class
- * b) Takes the ranked sentences and does some basic rearranging (e.g.
ordering) to provide a more reasonable summary.
+/**
+ * A wrapper {@link Summarizer} implementation around the {@link TextRank text
rank} algorithm.
+ * <p>
+ * This implementation:
+ * <ol>
+ * <li>sets up the data for the {@link TextRank} class</li>
+ * <li>takes the ranked sentences and conducts rearranging (e.g. ordering) to
provide
+ * a more reasonable summary.</li>
+ * </ol>
+ *
+ * @see TextRank
+ * @see Summarizer
*/
public class TextRankSummarizer implements Summarizer {
@@ -98,7 +106,6 @@ public class TextRankSummarizer implements Summarizer {
return null;
}
- //Returns the summary as a string.
@Override
public String summarize(String article, int maxWords) {
List<Sentence> sentences = docProcessor.getSentencesFromStr(article);
@@ -112,7 +119,7 @@ public class TextRankSummarizer implements Summarizer {
int i = 0;
while (b.length() < maxWords && i < scores.size()) {
String sent = sentences.get(scores.get(i).getSentId()).getStringVal();
- b.append(sent).append(scores.get(i));
+ b.append(sent); //.append(scores.get(i));
i++;
}
return b.toString();
diff --git
a/summarizer/src/test/java/opennlp/summarization/meta/MetaSummarizerTest.java
b/summarizer/src/test/java/opennlp/summarization/AbstractSummarizerTest.java
similarity index 75%
copy from
summarizer/src/test/java/opennlp/summarization/meta/MetaSummarizerTest.java
copy to
summarizer/src/test/java/opennlp/summarization/AbstractSummarizerTest.java
index bd69886..ce7bc50 100644
---
a/summarizer/src/test/java/opennlp/summarization/meta/MetaSummarizerTest.java
+++ b/summarizer/src/test/java/opennlp/summarization/AbstractSummarizerTest.java
@@ -15,14 +15,12 @@
* limitations under the License.
*/
-package opennlp.summarization.meta;
+package opennlp.summarization;
-import opennlp.summarization.Summarizer;
import opennlp.summarization.lexicalchaining.OpenNLPPOSTagger;
import opennlp.summarization.preprocess.DefaultDocProcessor;
import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import org.slf4j.Logger;
@@ -34,26 +32,23 @@ import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
-public class MetaSummarizerTest {
+public abstract class AbstractSummarizerTest {
- private static final Logger log =
LoggerFactory.getLogger(MetaSummarizerTest.class);
-
- private static DefaultDocProcessor docProcessor;
- private static OpenNLPPOSTagger posTagger;
+ private static final Logger log =
LoggerFactory.getLogger(AbstractSummarizerTest.class);
- // SUT
- private Summarizer metaSummarizer;
+ protected static DefaultDocProcessor docProcessor;
+ protected static OpenNLPPOSTagger posTagger;
@BeforeAll
static void initEnv() throws IOException {
- docProcessor = new
DefaultDocProcessor(MetaSummarizerTest.class.getResourceAsStream("/en-sent.bin"));
- posTagger = new OpenNLPPOSTagger(docProcessor,
MetaSummarizerTest.class.getResourceAsStream("/en-pos-maxent.bin"));
+ docProcessor = new
DefaultDocProcessor(AbstractSummarizerTest.class.getResourceAsStream("/en-sent.bin"));
+ posTagger = new OpenNLPPOSTagger(docProcessor,
AbstractSummarizerTest.class.getResourceAsStream("/en-pos-maxent.bin"));
}
- @BeforeEach
- void setUp() {
- metaSummarizer = new MetaSummarizer(docProcessor, posTagger);
- }
+ /**
+ * @return Obtains the {@link Summarizer} under test.
+ */
+ public abstract Summarizer getSummarizer();
@ParameterizedTest(name = "news story {index}")
@ValueSource(strings = {
@@ -69,7 +64,7 @@ public class MetaSummarizerTest {
})
public void testSummarize(String filename) {
String article = docProcessor.docToString(filename);
- String summary = metaSummarizer.summarize(article, 20);
+ String summary = getSummarizer().summarize(article, 20);
assertNotNull(summary);
assertFalse(summary.isBlank());
assertTrue(summary.length() > 20);
diff --git
a/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizerTest.java
b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizerTest.java
new file mode 100644
index 0000000..5d23bef
--- /dev/null
+++
b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizerTest.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.summarization.lexicalchaining;
+
+import opennlp.summarization.AbstractSummarizerTest;
+import opennlp.summarization.Summarizer;
+
+import org.junit.jupiter.api.BeforeEach;
+
+/**
+ * Tests the implementation of {@link LexicalChainingSummarizer}.
+ */
+public class LexicalChainingSummarizerTest extends AbstractSummarizerTest {
+
+ // SUT
+ private Summarizer lexicalChainSummarizer;
+
+ @BeforeEach
+ void setUp() {
+ lexicalChainSummarizer = new LexicalChainingSummarizer(docProcessor,
posTagger);
+ }
+
+ @Override
+ public Summarizer getSummarizer() {
+ return lexicalChainSummarizer;
+ }
+
+}
diff --git
a/summarizer/src/test/java/opennlp/summarization/meta/MetaSummarizerTest.java
b/summarizer/src/test/java/opennlp/summarization/meta/MetaSummarizerTest.java
index bd69886..2a80782 100644
---
a/summarizer/src/test/java/opennlp/summarization/meta/MetaSummarizerTest.java
+++
b/summarizer/src/test/java/opennlp/summarization/meta/MetaSummarizerTest.java
@@ -17,64 +17,27 @@
package opennlp.summarization.meta;
+import opennlp.summarization.AbstractSummarizerTest;
import opennlp.summarization.Summarizer;
-import opennlp.summarization.lexicalchaining.OpenNLPPOSTagger;
-import opennlp.summarization.preprocess.DefaultDocProcessor;
-import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.params.ParameterizedTest;
-import org.junit.jupiter.params.provider.ValueSource;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import java.io.IOException;
-
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-public class MetaSummarizerTest {
-
- private static final Logger log =
LoggerFactory.getLogger(MetaSummarizerTest.class);
-
- private static DefaultDocProcessor docProcessor;
- private static OpenNLPPOSTagger posTagger;
+/**
+ * Tests the implementation of {@link MetaSummarizer}.
+ */
+public class MetaSummarizerTest extends AbstractSummarizerTest {
// SUT
private Summarizer metaSummarizer;
- @BeforeAll
- static void initEnv() throws IOException {
- docProcessor = new
DefaultDocProcessor(MetaSummarizerTest.class.getResourceAsStream("/en-sent.bin"));
- posTagger = new OpenNLPPOSTagger(docProcessor,
MetaSummarizerTest.class.getResourceAsStream("/en-pos-maxent.bin"));
- }
-
@BeforeEach
void setUp() {
metaSummarizer = new MetaSummarizer(docProcessor, posTagger);
}
- @ParameterizedTest(name = "news story {index}")
- @ValueSource(strings = {
- "/meta/0a2035f3f73b06a5150a6f01cffdf45d027bbbed.story",
- "/meta/0a2278bec4a80aec1bc3e9e7a9dac10ac1b6425b.story",
- "/meta/0a3040b6c1bba95efca727158f128a19c44ec8ba.story",
- "/meta/0a3479b53796863a664c32ca20d8672583335d2a.story",
- "/meta/0a3639cb86487e72e2ba084211f99799918aedf8.story",
- "/meta/0a4092bef1801863296777ebcfeceb1aec23c78f.story",
- "/meta/0a5458d3427b290524a8df11d8503a5b57b32747.story",
- "/meta/0a5691b8fe654b6b2cdace5ab87aff2ee4c23577.story",
- "/meta/0a6790f886a42a76945d4a21ed27c4ebd9ca1025.story"
- })
- public void testSummarize(String filename) {
- String article = docProcessor.docToString(filename);
- String summary = metaSummarizer.summarize(article, 20);
- assertNotNull(summary);
- assertFalse(summary.isBlank());
- assertTrue(summary.length() > 20);
- if (log.isDebugEnabled()) {
- log.debug(summary);
- }
+ @Override
+ public Summarizer getSummarizer() {
+ return metaSummarizer;
}
+
}
diff --git
a/summarizer/src/test/java/opennlp/summarization/textrank/TextRankSummarizerTest.java
b/summarizer/src/test/java/opennlp/summarization/textrank/TextRankSummarizerTest.java
new file mode 100644
index 0000000..31f89f6
--- /dev/null
+++
b/summarizer/src/test/java/opennlp/summarization/textrank/TextRankSummarizerTest.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.summarization.textrank;
+
+import opennlp.summarization.AbstractSummarizerTest;
+import opennlp.summarization.Summarizer;
+
+import org.junit.jupiter.api.BeforeEach;
+
+/**
+ * Tests the implementation of {@link TextRankSummarizer}.
+ */
+public class TextRankSummarizerTest extends AbstractSummarizerTest {
+
+ // SUT
+ private Summarizer textRankSummarizer;
+
+ @BeforeEach
+ void setUp() {
+ textRankSummarizer = new TextRankSummarizer(docProcessor);
+ }
+
+ @Override
+ public Summarizer getSummarizer() {
+ return textRankSummarizer;
+ }
+}