Added:
opennlp/sandbox/summarizer/src/main/java/opennlp/summarization/textrank/TextRank.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/summarizer/src/main/java/opennlp/summarization/textrank/TextRank.java?rev=1658901&view=auto
==============================================================================
---
opennlp/sandbox/summarizer/src/main/java/opennlp/summarization/textrank/TextRank.java
(added)
+++
opennlp/sandbox/summarizer/src/main/java/opennlp/summarization/textrank/TextRank.java
Wed Feb 11 08:53:14 2015
@@ -0,0 +1,298 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version
2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package opennlp.summarization.textrank;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.Hashtable;
+import java.util.List;
+
+import opennlp.summarization.*;
+import opennlp.summarization.preprocess.DefaultDocProcessor;
+import opennlp.summarization.preprocess.IDFWordWeight;
+import opennlp.summarization.preprocess.PorterStemmer;
+import opennlp.summarization.preprocess.StopWords;
+import opennlp.summarization.preprocess.WordWeight;
+
+/*
+ * Implements the TextRank algorithm by Mihalcea et al.
+ * This basically applies the page rank algorithm to a graph where each
sentence is a node and a connection between sentences
+ * indicates that a word is shared between them. It returns a ranking of
sentences where highest rank means most important etc.
+ * Currently only stemming is done to the words - a more sophisticated way
might use a resource like Wordnet to match synonyms etc.
+ */
+public class TextRank {
+ private StopWords sw;
+ private String article;
+ private Hashtable<Integer, List<Integer>> links;
+ private List<String> sentences = new ArrayList<String>();
+ private List<String> processedSent = new ArrayList<String>();
+ private WordWeight wordWt;
+ private int NO_OF_IT = 100;
+ private double maxErr = 0.1;
+ private DocProcessor docProc;
+
+ private double title_wt = 0;
+ private Hashtable<Integer, String[]> wordsInSent;
+
+ // DAMPING FACTOR..
+ private static double df = 0.15;
+ private boolean HIGHER_TITLE_WEIGHT = true;
+ private static double TITLE_WRD_WT = 2d;
+ private String resources = "./resources";
+
+ public TextRank(DocProcessor dp) {
+ sw = new StopWords();
+ setLinks(new Hashtable<Integer, List<Integer>>());
+ processedSent = new ArrayList<String>();
+ docProc = dp;
+ wordWt = IDFWordWeight.getInstance(resources + "/idf.csv");
+ }
+
+ public TextRank(StopWords sw, WordWeight wordWts) {
+ this.sw = sw;
+ this.wordWt = wordWts;
+ }
+
+ // Returns similarity of two sentences. Wrd wts contains tf-idf of the
+ // words..
+ public double getWeightedSimilarity(String sent1, String sent2,
+ Hashtable<String, Double> wrdWts) {
+ String[] words1 = sent1.split(" ");
+ String[] words2 = sent2.split(" ");
+ double wordsInCommon = 0;
+ Hashtable<String, Boolean> dups = new Hashtable<String,
Boolean>();
+ for (int i = 0; i < words1.length; i++) {
+ String currWrd1 = words1[i].trim();
+ // skip over duplicate words of sentence
+ if (dups.get(currWrd1) == null) {
+ dups.put(currWrd1, true);
+ for (int j = 0; j < words2.length; j++) {
+ if (!sw.isStopWord(currWrd1) &&
!currWrd1.isEmpty()
+ &&
words1[i].equals(words2[j])) {
+ Double wt;
+
+ wt = wrdWts.get(currWrd1);
+ if (wt != null)
+ wordsInCommon +=
wt.doubleValue();
+ else
+ wordsInCommon++;
+ }
+ }
+ }
+ }
+ return ((double) ((wordsInCommon)))
+ / (words1.length + words2.length);
+ }
+
+ // Gets the current score from the list of scores passed ...
+ public double getScoreFrom(List<Score> scores, int id) {
+ for (Score s : scores) {
+ if (s.getSentId() == id)
+ return s.getScore();
+ }
+ return 1;
+ }
+
+ // This method runs the page rank algorithm for the sentences.
+ // TR(Vi) = (1-d) + d * sigma over neighbors Vj( wij/sigma over k
neighbor
+ // of j(wjk) * PR(Vj) )
+ public List<Score> getTextRankScore(List<Score> rawScores,
+ List<String> sentences, Hashtable<String, Double>
wrdWts) {
+ List<Score> currWtScores = new ArrayList<Score>();
+ // Start with equal weights for all sentences
+ for (int i = 0; i < rawScores.size(); i++) {
+ Score ns = new Score();
+ ns.setSentId(rawScores.get(i).getSentId());
+ ns.setScore((1 - title_wt) / (rawScores.size()));//
this.getSimilarity();
+ currWtScores.add(ns);
+ }
+ // currWtScores.get(0).score = this.title_wt;
+
+ // Page rank..
+ for (int i = 0; i < NO_OF_IT; i++) {
+ double totErr = 0;
+ List<Score> newWtScores = new ArrayList<Score>();
+
+ // Update the scores for the current iteration..
+ for (Score rs : rawScores) {
+ int sentId = rs.getSentId();
+ Score ns = new Score();
+ ns.setSentId(sentId);
+
+ List<Integer> neighbors =
getLinks().get(sentId);
+ double sum = 0;
+ if (neighbors != null) {
+ for (Integer j : neighbors) {
+ // sum +=
getCurrentScore(rawScores,
+ //
sentId)/(getCurrentScore(rawScores, neigh)) *
+ //
getCurrentScore(currWtScores, neigh);
+ double wij =
this.getWeightedSimilarity(sentences
+ .get(sentId),
sentences.get(j), wrdWts);
+ double sigmawjk =
getScoreFrom(rawScores, j);
+ double txtRnkj =
getScoreFrom(currWtScores, j);
+ sum += wij / sigmawjk * txtRnkj;
+ }
+ }
+ ns.setScore((1d - df) + sum * df);// * rs.score
+ totErr += ns.getScore() -
getScoreFrom(rawScores, sentId);
+ newWtScores.add(ns);
+ }
+ currWtScores = newWtScores;
+ if (i > 2 && totErr / rawScores.size() < maxErr)
+ break;
+ }
+
+ for (int i = 0; i < currWtScores.size(); i++) {
+ Score s = currWtScores.get(i);
+ s.setScore(s.getScore() * getScoreFrom(rawScores,
s.getSentId()));
+ }
+ return currWtScores;
+ }
+
+ // Raw score is sigma wtsimilarity of neighbors..
+ // Used in the denominator of the Text rank formula..
+ public List<Score> getNeighborsSigmaWtSim(List<String> sentences,
+ Hashtable<String, List<Integer>> iidx,
Hashtable<String, Double> wts) {
+ List<Score> allScores = new ArrayList<Score>();
+
+ for (int i = 0; i < sentences.size(); i++) {
+ String nextSent = sentences.get(i);
+ String[] words = nextSent.split(" ");
+ List<Integer> processed = new ArrayList<Integer>();
+ Score s = new Score();
+ s.setSentId(i);
+
+ for (int j = 0; j < words.length; j++) {
+ String currWrd =
docProc.getStemmer().stem(words[j]).toString();//stemmer.toString();
+
+ List<Integer> otherSents = iidx.get(currWrd);
+ if (otherSents == null)
+ continue;
+
+ for (int k = 0; k < otherSents.size(); k++) {
+ int idx = otherSents.get(k);
+
+ if (idx != i &&
!processed.contains(idx)) {
+ double currS =
getWeightedSimilarity(sentences.get(i),
+
sentences.get(idx), wts);
+ s.setScore(s.getScore() +
currS);
+
+ if (currS > 0) {
+ addLink(i, idx);
+ }
+ processed.add(idx);
+ }
+ }
+ }
+ allScores.add(s);
+ }
+ return allScores;
+ }
+
+ public List<Score> getWeightedScores(List<Score> rawScores,
+ List<String> sentences, Hashtable<String, Double>
wordWts) {
+ List<Score> weightedScores = this.getTextRankScore(rawScores,
+ sentences, wordWts);
+ Collections.sort(weightedScores);
+ return weightedScores;
+ }
+
+ private Hashtable<String, Double> toWordWtHashtable(WordWeight wwt,
+ Hashtable<String, List<Integer>> iidx) {
+ Hashtable<String, Double> wrdWt = new Hashtable<String,
Double>();
+ Enumeration<String> keys = iidx.keys();
+ while (keys.hasMoreElements()) {
+ String key = keys.nextElement();
+ wrdWt.put(key, wwt.getWordWeight(key));
+ }
+ return wrdWt;
+ }
+
+ public List<Score> getRankedSentences(String doc, List<String>
sentences,
+ Hashtable<String, List<Integer>> iidx, List<String>
processedSent) {
+ this.sentences = sentences;
+ this.processedSent = processedSent;
+
+ List<Integer> chosenOnes = new ArrayList<Integer>();
+
+ Hashtable<String, Double> wrdWts =
toWordWtHashtable(this.wordWt, iidx);// new
+
// Hashtable<String,
+
// Double>();
+
+ if (HIGHER_TITLE_WEIGHT && getSentences().size()>0) {
+ String sent = getSentences().get(0);
+ String[] wrds = sent.split(" ");
+ for (String wrd : wrds)
+ wrdWts.put(wrd, new Double(TITLE_WRD_WT));
+ }
+
+ List<Score> rawScores = getNeighborsSigmaWtSim(getSentences(),
iidx,
+ wrdWts);
+ List<Score> finalScores = getWeightedScores(rawScores,
getSentences(),
+ wrdWts);
+
+ Score bestScr = null;
+ int next = 0;
+
+ return finalScores;
+ }
+
+ // Set a link between two sentences..
+ private void addLink(int i, int idx) {
+ List<Integer> endNodes = getLinks().get(i);
+ if (endNodes == null)
+ endNodes = new ArrayList<Integer>();
+ endNodes.add(idx);
+ getLinks().put(i, endNodes);
+ }
+
+ public void setSentences(List<String> sentences) {
+ this.sentences = sentences;
+ }
+
+ public List<String> getSentences() {
+ return sentences;
+ }
+
+ public void setArticle(String article) {
+ this.article = article;
+ }
+
+ public String getArticle() {
+ return article;
+ }
+
+ private void setLinks(Hashtable<Integer, List<Integer>> links) {
+ this.links = links;
+ }
+
+ public Hashtable<Integer, List<Integer>> getLinks() {
+ return links;
+ }
+}
+
+/*
+ * public double getScore(String sent1, String sent2, boolean toPrint) {
+ * String[] words1 = sent1.split(" "); String[] words2 = sent2.split(" ");
+ * double wordsInCommon = 0; for(int i=0;i< words1.length;i++) { for(int
+ * j=0;j<words2.length;j++) { if(!sw.isStopWord(words1[i]) &&
+ * !words1[i].trim().isEmpty() && words1[i].equals(words2[j])) {
wordsInCommon+=
+ * wordWt.getWordWeight(words1[i]); } } } return ((double)wordsInCommon) /
+ * (Math.log(1+words1.length) + Math.log(1+words2.length)); }
+ */
\ No newline at end of file
Propchange:
opennlp/sandbox/summarizer/src/main/java/opennlp/summarization/textrank/TextRank.java
------------------------------------------------------------------------------
svn:executable = *
Added:
opennlp/sandbox/summarizer/src/main/java/opennlp/summarization/textrank/TextRankSummarizer.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/summarizer/src/main/java/opennlp/summarization/textrank/TextRankSummarizer.java?rev=1658901&view=auto
==============================================================================
---
opennlp/sandbox/summarizer/src/main/java/opennlp/summarization/textrank/TextRankSummarizer.java
(added)
+++
opennlp/sandbox/summarizer/src/main/java/opennlp/summarization/textrank/TextRankSummarizer.java
Wed Feb 11 08:53:14 2015
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version
2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package opennlp.summarization.textrank;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.LineNumberReader;
+import java.io.PrintWriter;
+import java.util.*;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import opennlp.summarization.*;
+import opennlp.summarization.preprocess.DefaultDocProcessor;
+import opennlp.summarization.preprocess.IDFWordWeight;
+import opennlp.summarization.preprocess.WordWeight;
+
+/*
+ * A wrapper around the text rank algorithm. This class
+ * a) Sets up the data for the TextRank class
+ * b) Takes the ranked sentences and does some basic rearranging (e.g.
ordering) to provide a more reasonable summary.
+ */
+public class TextRankSummarizer implements Summarizer
+{
+ //An optional file to store idf of words. If idf is not available it
uses a default equal weight for all words.
+ private String idfFile = "resources/idf.csv";
+ public TextRankSummarizer() throws Exception
+ {
+ }
+
+ /*Sets up data and calls the TextRank algorithm..*/
+ public List<Score> rankSentences(String doc, List<Sentence> sentences,
+ DocProcessor dp,
int maxWords )
+ {
+ try {
+ //Rank sentences
+ TextRank summ = new TextRank(dp);
+ List<String> sentenceStrL = new ArrayList<String>();
+ List<String> processedSent = new ArrayList<String>();
+ Hashtable<String, List<Integer>> iidx = new Hashtable<String,
List<Integer>>();
+ // dp.getSentences(sentences, sentenceStrL, iidx, processedSent);
+
+ for(Sentence s : sentences){
+ sentenceStrL.add(s.getStringVal());
+ String stemmedSent = s.stem();
+ processedSent.add(stemmedSent);
+
+ String[] wrds = stemmedSent.split(" ");
+ for(String w: wrds)
+ {
+ if(iidx.get(w)!=null)
+ iidx.get(w).add(s.getSentId());
+ else{
+ List<Integer> l = new ArrayList<Integer>();
+ l.add(s.getSentId());
+ iidx.put(w, l);
+ }
+ }
+ }
+
+ WordWeight wordWt = new IDFWordWeight(idfFile);////new
+
+ List<Score> finalScores = summ.getRankedSentences(doc,
sentenceStrL, iidx, processedSent);
+ List<String> sentenceStrList = summ.getSentences();
+
+ // SentenceClusterer clust = new SentenceClusterer();
+ // clust.runClusterer(doc, summ.processedSent);
+
+ Hashtable<Integer,List<Integer>> links= summ.getLinks();
+
+ for(int i=0;i<sentences.size();i++)
+ {
+ Sentence st = sentences.get(i);
+
+ //Add links..
+ List<Integer> currLnks = links.get(i);
+ if(currLnks==null) continue;
+ for(int j=0;j<currLnks.size();j++)
+ {
+ if(j<i) st.addLink(sentences.get(j));
+ }
+ }
+
+ for(int i=0;i<finalScores.size();i++)
+ {
+ Score s = finalScores.get(i);
+ Sentence st = sentences.get(s.getSentId());
+ st.setPageRankScore(s);
+ }
+
+ List<Score> reRank = finalScores;//reRank(sentences,
finalScores, iidx, wordWt, maxWords);
+
+ return reRank;
+ } catch (Exception e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ return null;
+ }
+
+ //Returns the summary as a string.
+ @Override
+ public String summarize(String article, DocProcessor dp, int maxWords) {
+ List<Sentence> sentences = dp.getSentencesFromStr(article);
+ List<Score> scores = this.rankSentences(article, sentences, dp,
maxWords);
+ return scores2String(sentences, scores, maxWords);
+ }
+
+ /* Use the page rank scores to determine the summary.*/
+ public String scores2String(List<Sentence> sentences, List<Score> scores,
int maxWords)
+ {
+ StringBuffer b = new StringBuffer();
+ // for(int i=0;i< min(maxWords, scores.size()-1);i++)
+ int i=0;
+ while(b.length()< maxWords && i< scores.size())
+ {
+ String sent =
sentences.get(scores.get(i).getSentId()).getStringVal();
+ b.append(sent + scores.get(i));
+ i++;
+ }
+ return b.toString();
+ }
+
+}
Propchange:
opennlp/sandbox/summarizer/src/main/java/opennlp/summarization/textrank/TextRankSummarizer.java
------------------------------------------------------------------------------
svn:executable = *
Added: opennlp/sandbox/summarizer/src/test/java/unittests/DocProcessorTest.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/summarizer/src/test/java/unittests/DocProcessorTest.java?rev=1658901&view=auto
==============================================================================
--- opennlp/sandbox/summarizer/src/test/java/unittests/DocProcessorTest.java
(added)
+++ opennlp/sandbox/summarizer/src/test/java/unittests/DocProcessorTest.java
Wed Feb 11 08:53:14 2015
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version
2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+
+package unittests;
+
+import static org.junit.Assert.*;
+
+import org.junit.Assert.*;
+
+import java.io.UnsupportedEncodingException;
+import java.util.List;
+
+import opennlp.summarization.Sentence;
+import opennlp.summarization.preprocess.DefaultDocProcessor;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class DocProcessorTest {
+
+ @BeforeClass
+ public static void setUpBeforeClass() throws Exception {
+ }
+
+ @Test
+ public void testGetSentencesFromStr() {
+ String sentFragModel = "resources/en-sent.bin";
+ DefaultDocProcessor dp =new DefaultDocProcessor(sentFragModel);
+ String sent="This is a sentence, with some punctuations; to
test if the sentence breaker can handle it! Is every thing working OK ? Yes.";
+ List<Sentence> doc =
dp.getSentencesFromStr(sent);//dp.docToString(fileName);//
+ assertEquals(doc.size(),3);
+ }
+
+}
Added: opennlp/sandbox/summarizer/src/test/java/unittests/LexChainTest.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/summarizer/src/test/java/unittests/LexChainTest.java?rev=1658901&view=auto
==============================================================================
--- opennlp/sandbox/summarizer/src/test/java/unittests/LexChainTest.java (added)
+++ opennlp/sandbox/summarizer/src/test/java/unittests/LexChainTest.java Wed
Feb 11 08:53:14 2015
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version
2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package unittests;
+
+import static org.junit.Assert.*;
+import opennlp.summarization.Sentence;
+import opennlp.summarization.lexicalchaining.LexicalChainingSummarizer;
+import opennlp.summarization.lexicalchaining.LexicalChain;
+import opennlp.summarization.lexicalchaining.*;
+import opennlp.summarization.lexicalchaining.Word;
+import opennlp.summarization.lexicalchaining.WordRelation;
+import opennlp.summarization.lexicalchaining.WordRelationshipDetermination;
+import opennlp.summarization.lexicalchaining.WordnetWord;
+import opennlp.summarization.preprocess.DefaultDocProcessor;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import edu.mit.jwi.item.IIndexWord;
+import edu.mit.jwi.item.POS;
+
+import java.util.Collections;
+import java.util.Hashtable;
+import java.util.List;
+
+public class LexChainTest {
+
+ @BeforeClass
+ public static void setUpBeforeClass() throws Exception {
+ }
+
+
+ @Test
+ public void testBuildLexicalChains() {
+ try {
+ /*
+ String article = "US President Barack Obama has
welcomed an agreement between the US and Russia under which Syria's chemical
weapons must be destroyed or removed by mid-2014 as an \"important step\"."
+ + "But a White House statement
cautioned that the US expected Syria to live up to its public commitments. "
+ + "The US-Russian framework document
stipulates that Syria must provide details of its stockpile within a week. "
+ + "If Syria fails to comply, the deal
could be enforced by a UN resolution. "
+ + " China, France, the UK, the UN and
Nato have all expressed satisfaction at the agreement. "
+ + " In Beijing, Foreign Minister Wang
Yi said on Sunday that China welcomes the general agreement between the US and
Russia.";
+*/
+ String sentFragModel = "resources/en-sent.bin";
+ DefaultDocProcessor dp =new
DefaultDocProcessor(sentFragModel);
+ String article =
dp.docToString("/Users/ram/dev/summarizer/test/forram/technology/output/summary/9.txt");
+ LexicalChainingSummarizer lcs;
+ lcs = new
LexicalChainingSummarizer(dp,"resources/en-pos-maxent.bin");
+
+ long strt = System.currentTimeMillis();
+
+ List<Sentence> sent = dp.getSentencesFromStr(article);
+ List<LexicalChain> vh = lcs.buildLexicalChains(article,
sent);
+ Collections.sort(vh);
+
+ List<Sentence> s = dp.getSentencesFromStr(article);
+ Hashtable<String, Boolean> comp = new Hashtable<String,
Boolean>();
+ System.out.println(vh.size());
+ POSTagger t = new
OpenNLPPOSTagger(dp,"resources/en-pos-maxent.bin");
+ System.out.println(t.getTaggedString(article));
+ for(int i=vh.size()-1;i>=Math.max(vh.size()-50, 0);i--)
+ {
+ LexicalChain lc = vh.get(i);
+
+ if(!
(comp.containsKey(lc.getWord().get(0).getLexicon())))
+ {
+
comp.put(lc.getWord().get(0).getLexicon(), new Boolean(true));
+ for(int j=0;j<lc.getWord().size();j++)
+
System.out.print(lc.getWord().get(j) + "-- ");
+ System.out.println(lc.score());
+ for(Sentence sid : lc.getSentences())
+ {
+ //if(sid>=0 && sid<s.size())
+ System.out.println(sid);
+ }
+ }
+ System.out.println("--------");
+ }
+ System.out.println((System.currentTimeMillis() -
strt)/1000);
+ } catch (Exception e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+
+ }
+
+ @Test
+ public void testGetRelation() {
+ try {
+
+ WordRelationshipDetermination lcs = new
WordRelationshipDetermination();
+ LexicalChain l = new LexicalChain();
+ List<Word> words = lcs.getWordSenses("music");
+
+ l.addWord(words.get(0));
+// int rel = lcs.getRelation(l, "nation");
+ WordRelation rel2 = lcs.getRelation(l, "tune", true);
+ WordRelation rel3 = lcs.getRelation(l, "vocal", true);
+ System.out.println(rel2.relation);
+ System.out.println(rel3.relation);
+ // assertEquals(rel,
LexicalChainingSummarizer.STRONG_RELATION);
+ assertEquals( WordRelation.MED_RELATION, rel2.relation);
+ assertEquals( WordRelation.MED_RELATION, rel3.relation);
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+}
Added:
opennlp/sandbox/summarizer/src/test/java/unittests/LexChainingKeywordExtractorTest.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/summarizer/src/test/java/unittests/LexChainingKeywordExtractorTest.java?rev=1658901&view=auto
==============================================================================
---
opennlp/sandbox/summarizer/src/test/java/unittests/LexChainingKeywordExtractorTest.java
(added)
+++
opennlp/sandbox/summarizer/src/test/java/unittests/LexChainingKeywordExtractorTest.java
Wed Feb 11 08:53:14 2015
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version
2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package unittests;
+
+import static org.junit.Assert.*;
+
+import java.util.List;
+
+import opennlp.summarization.Sentence;
+import opennlp.summarization.lexicalchaining.LexChainingKeywordExtractor;
+import opennlp.summarization.lexicalchaining.LexicalChain;
+import opennlp.summarization.lexicalchaining.LexicalChainingSummarizer;
+import opennlp.summarization.preprocess.DefaultDocProcessor;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class LexChainingKeywordExtractorTest {
+
+ @BeforeClass
+ public static void setUpBeforeClass() throws Exception {
+ }
+
+ @Test
+ public void testGetKeywords() {
+ try {
+ String sentFragModel = "resources/en-sent.bin";
+ DefaultDocProcessor dp =new
DefaultDocProcessor(sentFragModel);
+ String article =
dp.docToString("/Users/ram/dev/summarizer/test/forram/topnews/input/8.txt");
+ LexicalChainingSummarizer lcs;
+ lcs = new
LexicalChainingSummarizer(dp,"resources/en-pos-maxent.bin");
+
+ long strt = System.currentTimeMillis();
+
+ List<Sentence> sent = dp.getSentencesFromStr(article);
+ List<LexicalChain> vh = lcs.buildLexicalChains(article,
sent);
+ LexChainingKeywordExtractor ke = new
LexChainingKeywordExtractor();
+ List<String> keywords = ke.getKeywords(vh, 5);
+ //lazy
+ System.out.println(keywords);
+ } catch (Exception e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+
+ }
+
+}