http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PTMatcherTest.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PTMatcherTest.java b/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PTMatcherTest.java deleted file mode 100644 index 7d2ebef..0000000 --- a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PTMatcherTest.java +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.tools.parse_thicket.matching; - -import java.io.File; -import java.util.ArrayList; -import java.util.List; - -import opennlp.tools.parse_thicket.ParseThicket; -import opennlp.tools.parse_thicket.VerbNetProcessor; -import opennlp.tools.parse_thicket.WordWordInterSentenceRelationArc; -import opennlp.tools.textsimilarity.ParseTreeChunk; -import junit.framework.TestCase; - -public class PTMatcherTest extends TestCase { - //public static String resourceDir = new File(".").getAbsolutePath().replace("/.", "") + "/src/test/resources"; - //VerbNetProcessor proc = VerbNetProcessor.getInstance(resourceDir); - Matcher m = new Matcher(); - - public void testMatchTwoParaTestReduced(){ - String q = "I am a US citizen living abroad, and concerned about the health reform regulation of 2014. I do not want to wait till I am sick to buy health insurance. I am afraid I will end up paying the tax."; - String a = "People are worried about having to pay a fine for not carrying health insurance coverage got more guidance this week with some new federal regulations. "+ - "Hardly anyone will end up paying the tax when the health reform law takes full effect in 2014. "+ - "The individual mandate makes sure that people don�t wait until they are sick to buy health insurance. "+ - "People are exempt from health insurance fine if they make too little money to file an income tax return, or US citizens living abroad."; - List<List<ParseTreeChunk>> res = m.assessRelevance(q, a); - System.out.print(res); - assertTrue(res!=null); - assertTrue(res.size()>0); - assertEquals( "[[NP [NNP-us (LOCATION) NN*-citizen VB-living RB-abroad ], NP [,-, CC-* ], NP [DT-the NN-* NN-health NN-reform NN-* CD-2014 ], NP [NN-health NN-* NN-* IN-* ], NP [DT-the NN-health NN-reform NN-* ], NP [NN-health NN-insurance ], NP [NN*-* NN-* JJ-* NN-* ]], [VP [VB-* {phrStr=[], phrDescr=[], roles=[A, *, *]} DT-a NN*-* NN-health NN-* NN-* NN*-regulation ], VP [VB-* NN*-* NN-* VB-* RB*-* IN-* DT-* NN*-regulation ], VP [VB-* NN-* NN-health NN-* NN-* ], VP [IN-about NN-health NN-* NN-* NN*-regulation ], VP [VB-living RB-abroad ], VP [TO-to VB-* VB-wait IN-* PRP-* VB-* JJ-sick TO-to VB-buy NN-health NN-insurance ], VP [VB-* TO-to VB-* VB-* NN-health NN-insurance ], UCP [MD-will VB-end RP-up VB-paying DT-the NN-tax ], VP [TO-to VB-* VB-buy NN-health NN-insurance ], VP [VB-* TO-to VB-* JJ-sick TO-to VB-buy NN-health NN-insurance ]]]" - , res.toString()); - - } - - public void testMatchTwoParaTest1(){ - List<List<ParseTreeChunk>> res = m.assessRelevance("Iran refuses to accept the UN proposal to end its dispute over its work on nuclear weapons."+ - "UN nuclear watchdog passes a resolution condemning Iran for developing its second uranium enrichment site in secret. " + - "A recent IAEA report presented diagrams that suggested Iran was secretly working on nuclear weapons. " + - "Iran envoy says its nuclear development is for peaceful purpose, and the material evidence against it has been fabricated by the US. " - - , "Iran refuses the UN offer to end a conflict over its nuclear weapons."+ - "UN passes a resolution prohibiting Iran from developing its uranium enrichment site. " + - "A recent UN report presented charts saying Iran was working on nuclear weapons. " + - "Iran envoy to UN states its nuclear development is for peaceful purpose, and the evidence against its claim is fabricated by the US. "); - System.out.print(res); - assertTrue(res!=null); - assertTrue(res.size()>0); - assertEquals(res.toString(), - "[[NP [DT-a NN-* PRP$-its JJ-* NN-* ], NP [DT-a NN-resolution VB-* NNP-iran (LOCATION) IN-* VB-developing PRP$-its NN-uranium NN-enrichment NN-site ], NP [DT-a IN-for ], NP [DT-a PRP$-its ], NP [VB-* JJ-nuclear NN*-* ], NP [JJ-nuclear NNS-weapons ], NP [PRP$-its JJ-nuclear NN-development ], NP [DT-the NN-* NN-evidence IN-against PR*-it ], NP [DT-the NNP-un (ORGANIZATION) NN-* ], NP [VB-* NN-* NN-* NN-* ], NP [VB-* NNP-iran (LOCATION) NN*-* ], NP [NNP-iran (LOCATION) NN-envoy ]], [VP [VB-refuses TO-to VB-* DT-* NN*-* ], VP [VB-* DT-the NNP-un (ORGANIZATION) NN-* TO-to VB-end PRP$-its ], VP [VB-* NN-* NN-work IN-on JJ-nuclear NN*-weapons.un ], VP [VB-* DT-a NN-* NN-resolution VB-* NNP-iran (LOCATION) IN-* VB-developing PRP$-its ], VP [VB-* DT-a NN-* PRP$-its JJ-* NN-* ], VP [VB-passes DT-a NN-resolution VB-* NNP-iran (LOCATION) IN-* VB-developing PRP$-its NN-uranium NN-enrichment NN-site ], VP [PRP$-its JJ-* NN-* NN-uranium NN-enrichment NN-site ], VP [VB-presented NNS-* NNP-iran (LOCATION) VB-was VB-working IN-on JJ-nuclear NNS-weapons ], VP [VB-* VB-fabricated IN-by DT-the NNP-us (LOCATION) ], VP [VB-* DT-the NNP-un (ORGANIZATION) NN-* TO-to VB-end NN-* IN-over PRP$-its NNP-* ], VP [TO-to VB-* DT-* NN*-* VB-end PRP$-its ], VP [PRP$-its JJ-nuclear NN-weapons.un ], VP [IN-* VB-* PRP$-its NN-* ], VP [DT-a PRP$-its JJ-nuclear NN-* VB-* NN-development ], VP [DT-a VB-* PRP$-its ], VP [VB-* NN-development NN-* ], VP [NN*-* VB-says JJ-nuclear NN*-* ], VP [VB-is IN-for JJ-peaceful NN-purpose ]]]" ) ; } - - public void testMatchTwoParaTest2(){ - List<List<ParseTreeChunk>> res = m.assessRelevance("I am a US citizen living abroad, and concerned about the health reform regulation of 2014. "+ - "I do not want to wait till I am sick to buy health insurance. "+ - "I am afraid I will end up paying the tax. "+ - "I am worried about having to pay a fine for not having health insurance coverage. " - , - "People are worried about having to pay a fine for not carrying health insurance coverage got more guidance this week with some new federal regulations. "+ - "Hardly anyone will end up paying the tax when the health reform law takes full effect in 2014. "+ - "The individual mandate makes sure that people don�t wait until they are sick to buy health insurance. "+ - "People are exempt from health insurance fine if they make too little money to file an income tax return, or US citizens living abroad."); - System.out.print(res); - assertTrue(res!=null); - assertTrue(res.size()>0); - assertEquals(res.toString(), "[[NP [NNP-us (LOCATION) NN*-citizen VB-living RB-abroad ], NP [,-, CC-* ], NP [DT-the NN-* NN-health NN-reform NN-* CD-2014 ], NP [NN-health NN-* NN-* IN-* ], NP [DT-the NN-health NN-reform NN-* ], UCP [NN-health NN-insurance NN-coverage ], UCP [TO-to VB-* {phrStr=[], phrDescr=[], roles=[A, *, *]} DT-a NN-* ], NP [NN*-* NN-* JJ-* NN-* ]], [VP [VB-* {phrStr=[], phrDescr=[], roles=[A, *, *]} DT-a NN*-* NN-health NN-* NN-* NN*-regulation ], VP [VB-* NN*-* NN-* VB-* RB*-* IN-* DT-* NN*-regulation ], VP [IN-about NN-health NN-* NN-* NN*-regulation ], VP [VB-living RB-abroad ], VP [TO-to VB-* VB-wait IN-* PRP-* VB-* JJ-sick TO-to VB-buy NN-health NN-insurance ], VP [VB-* VB-pay DT-* NN-* NN-health NN-* NN-* ], VP [VB-having NN-health NN-insurance NN-coverage ], UCP [MD-will VB-end RP-up VB-paying DT-the NN-tax ], VP [VB-* TO-to VB-* VB-* NN-health NN-insurance ], VP [TO-to VB-* VB-buy NN-health NN-insurance ], VP [VB-* TO-to VB-* JJ-sick TO-to VB-buy NN-hea lth NN-insurance ], VP [VB-* TO-to VB-* VB-pay {phrStr=[NP V NP PP.theme, NP V NP], phrDescr=[NP-PPfor-PP, (SUBCAT MP)], roles=[A, A, T]} DT-a NN-fine IN-for RB-not VB-* NN-health NN-insurance NN-coverage ], VP [VB-paying DT-the NN-tax NN-health NN-* NN-* ], VP [VB-* TO-to VB-* NN-health NN-insurance ], UCP [VB-* VB-worried IN-about VB-having TO-to VB-pay {phrStr=[NP V NP PP.theme, NP V NP], phrDescr=[NP-PPfor-PP, (SUBCAT MP)], roles=[A, A, T]} DT-a NN-fine IN-for RB-not VB-* NN-health NN-insurance NN-coverage ], VP [VB-paying DT-* NN-* DT-a NN-fine IN-for RB-not VB-* NN-health NN-insurance NN-coverage ]]]" - ); - } - - - public void testMatchTwoParaTestCA(){ - List<List<ParseTreeChunk>> res = m.assessRelevance("As a US citizen living abroad, I am concerned about the health reform regulation of 2014. "+ - "I do not want to wait till I am sick to buy health insurance. "+ - "Yet I am afraid I will end up paying the tax. "+ - "Although I live abroad, I am worried about having to pay a fine for being reported as not having health insurance coverage. " - , - "People are worried about paying a fine for not carrying health insurance coverage, having been informed by IRS about new regulations. "+ - "Yet hardly anyone is expected to pay the tax, when the health reform law takes full effect in 2014. "+ - "The individual mandate confirms that people don�t wait until they are sick to buy health insurance. "+ - "People are exempt from health insurance fine if they report they make too little money, or US citizens living abroad."); - System.out.print(res); - assertTrue(res!=null); - assertTrue(res.size()>0); - } - - public void testMatchTwoParaTestCA1(){ - String text1 = "As a US citizen living abroad, I am concerned about the health reform regulation of 2014. "+ - "I do not want to wait till I am sick to buy health insurance. "+ - "Yet I am afraid I will end up being requested to pay the tax. "+ - "Although I live abroad, I am worried about having to pay a fine for being reported as not having health insurance coverage. "; - - String text2 = "People are worried about paying a fine for not carrying health insurance coverage, having been informed by IRS about new regulations. "+ - "Yet hardly anyone is expected to pay the tax, when the health reform law takes full effect in 2014. "+ - "The individual mandate confirms that people don�t wait until they are sick to buy health insurance. "+ - "People are exempt from health insurance fine if they report they make too little money, or US citizens living abroad."; - List<List<ParseTreeChunk>> res = m.assessRelevance(text1, text2); - System.out.print(res); - assertTrue(res!=null); - assertTrue(res.size()>0); - } - - public void testMatchTwoParaTestREq1(){ - String q = "I am buying a foreclosed house. " - + "A bank offered me to waive inspection; however I am afraid I will not identify " - + "some problems in this property unless I call a specialist."; - - String a1 = "I am a foreclosure specialist in a bank which is subject to an inspection. " - + "FTC offered us to waive inspection " - + "if we can identify our potential problems with customers we lent money to buy their properties."; - - String a2 = "My wife and I are buying a foreclosure from a bank. " - + "In return for accepting a lower offer, they want me to waive the inspection. " - + "I prefer to let the bank know that I would not waive the inspection."; - List<List<ParseTreeChunk>> res = m.assessRelevance(q, a1); - assertEquals(res.toString(), "[[NP [DT-a NN-bank ], NP [NNS-problems ], NP [NN*-property ], NP [PRP-i ]], [VP [VB-am {phrStr=[NP V ADVP-Middle PP, NP V ADVP-MIddle], phrDescr=[Middle Construction, Middle Construction], roles=[A, P, P, P]} DT-a ], VP [VB-* TO-to NN-inspection ], VP [VB-offered PRP-* TO-to VB-waive NN-inspection ], VP [VB-* TO-to VB-* ], VP [VB-am {phrStr=[NP V ADVP-Middle PP, NP V ADVP-MIddle], phrDescr=[Middle Construction, Middle Construction], roles=[A, P, P, P]} NN*-* IN-in DT-* NN-* ], VP [VB-* VB-identify NNS-problems IN-* NN*-property ], VP [VB-* DT-* NN*-* VB-* ], VP [VB-* {phrStr=[], phrDescr=[], roles=[A, *, *]} DT-a NN-* ]]]"); - System.out.println(res); - res = m.assessRelevance(q, a2); - assertEquals(res.toString(), "[[NP [DT-a NN-bank ], NP [PRP-i ]], [VP [VB-* VB-buying DT-a ], VP [VB-* PRP-me TO-to VB-waive NN-inspection ], VP [TO-to VB-* VB-waive NN-inspection ], VP [VB-* {phrStr=[], phrDescr=[], roles=[]} PRP-i MD-* RB-not VB-* DT-* NN*-* ], VP [VB-* DT-* NN*-* VB-* DT-* NN-* ], VP [VB-* DT-a NN-* ]]]"); - System.out.println(res); - assertTrue(res!=null); - assertTrue(res.size()>0); - } - -} - -
http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PTPhraseBuilderTest.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PTPhraseBuilderTest.java b/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PTPhraseBuilderTest.java deleted file mode 100644 index 88132d0..0000000 --- a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PTPhraseBuilderTest.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.tools.parse_thicket.matching; - -import java.util.ArrayList; -import java.util.List; - -import edu.stanford.nlp.trees.Tree; - -import opennlp.tools.parse_thicket.ParseCorefsBuilder; -import opennlp.tools.parse_thicket.ParseThicket; -import opennlp.tools.parse_thicket.ParseTreeNode; -import opennlp.tools.parse_thicket.WordWordInterSentenceRelationArc; -import opennlp.tools.textsimilarity.ParseTreeChunk; -import junit.framework.TestCase; - -public class PTPhraseBuilderTest extends TestCase { - private ParseCorefsBuilder ptBuilder = ParseCorefsBuilder.getInstance(); - private PT2ThicketPhraseBuilder phraseBuilder = new PT2ThicketPhraseBuilder(); - - public void testBuildPhraseForUCP(){ - String q = "I am a US citizen living abroad, and concerned about the health reform regulation of 2014. " + - "I do not want to wait till I am sick to buy health insurance. I am afraid I will end up paying the tax."; - - ParseThicket pt = ptBuilder.buildParseThicket(q); - List<ParseTreeNode> sentence = pt.getNodesThicket().get(0); - Tree ptree = pt.getSentences().get(0); - List<List<ParseTreeNode>> res = phraseBuilder.buildPT2ptPhrasesForASentence(ptree, sentence ); - assertTrue(res!=null); - assertEquals(res.get(7).toString(), - "[<10>ADJP'concerned':JJ, <11>ADJP'about':IN, <12>ADJP'the':DT, <13>ADJP'health':NN, <14>ADJP'reform':NN, <15>ADJP'regulation':NN, <16>ADJP'of':IN, <17>ADJP'2014':CD]"); - - assertTrue(res.size()>12); - - sentence = pt.getNodesThicket().get(1); - ptree = pt.getSentences().get(1); - ptree.pennPrint(); - res = phraseBuilder.buildPT2ptPhrasesForASentence(ptree, sentence ); - assertTrue(res!=null); - assertTrue(res.size()>0); - - } - - public void testParsePhrase(){ - String line = "(NP (NNP Iran)) (VP (VBZ refuses) (S (VP (TO to) (VP (VB accept) (S (NP (DT the) " + - "(NNP UN) (NN proposal)) (VP (TO to) (VP (VB end) (NP (PRP$ its) (NN dispute))))))))"; - - List<ParseTreeNode> res = phraseBuilder. parsePhrase("NP", line); - System.out.println(res); - assertEquals(res.toString(), - "[NP'Iran':NNP, NP'refuses':VBZ, NP'to':TO, NP'accept':VB, NP'the':DT, NP'UN':NNP, NP'proposal':NN, NP'to':TO, NP'end':VB, NP'its':PRP$, NP'dispute':NN]"); - - - line = "(VP (VBP am) (NP (NP (DT a) (NNP US) (NN citizen)) (UCP (VP (VBG living) (ADVP (RB abroad))) (, ,) (CC and) (ADJP (JJ concerned) (PP (IN about) (NP (NP (DT the) (NN health) (NN reform) (NN regulation)) (PP (IN of) (NP (CD 2014)))))))))"; - res = phraseBuilder. parsePhrase("VP", line); - System.out.println(res); - assertEquals(res.toString(), "[VP'am':VBP, VP'a':DT, VP'US':NNP, VP'citizen':NN, VP'living':VBG, VP'abroad':RB, VP',':,, VP'and':CC, VP'concerned':JJ, VP'about':IN, VP'the':DT, VP'health':NN, VP'reform':NN, VP'regulation':NN, VP'of':IN, VP'2014':CD]"); - - - line = "(VP (TO to) (VP (VB wait) (SBAR (IN till) (S (NP (PRP I)) (VP (VBP am) (ADJP (JJ sick) (S (VP (TO to) (VP (VB buy) (NP (NN health) (NN insurance)))))))))))"; - res = phraseBuilder. parsePhrase("VP", line); - assertEquals(res.toString(), "[VP'to':TO, VP'wait':VB, VP'till':IN, VP'I':PRP, VP'am':VBP, VP'sick':JJ, VP'to':TO, VP'buy':VB, VP'health':NN, VP'insurance':NN]"); - System.out.println(res); - } - - public void testBuilderPTPhrase(){ - String q = "I am a US citizen living abroad, and concerned about the health reform regulation of 2014. " + - "I do not want to wait till I am sick to buy health insurance. I am afraid I will end up paying the tax."; - ParseThicket pt = ptBuilder.buildParseThicket(q); - List<List<ParseTreeNode>> res = phraseBuilder.buildPT2ptPhrases(pt); - assertTrue(res!=null); - assertTrue(res.size()>0); - - } - -} - - http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PairwiseMatcherTest.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PairwiseMatcherTest.java b/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PairwiseMatcherTest.java deleted file mode 100644 index de758a9..0000000 --- a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PairwiseMatcherTest.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.tools.parse_thicket.matching; - -import java.util.ArrayList; -import java.util.List; - -import opennlp.tools.parse_thicket.ParseThicket; -import opennlp.tools.parse_thicket.WordWordInterSentenceRelationArc; -import opennlp.tools.textsimilarity.ParseTreeChunk; -import opennlp.tools.textsimilarity.SentencePairMatchResult; -import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor; -import junit.framework.TestCase; - -public class PairwiseMatcherTest extends TestCase { - public void testMatchTwoParaTestReduced(){ - String q = "I am a US citizen living abroad, and concerned about the health reform regulation of 2014. I do not want to wait till I am sick to buy health insurance. I am afraid I will end up paying the tax."; - String a = "People are worried about having to pay a fine for not carrying health insurance coverage got more guidance this week with some new federal regulations. "+ - "Hardly anyone will end up paying the tax when the health reform law takes full effect in 2014. "+ - "The individual mandate makes sure that people don�t wait until they are sick to buy health insurance. "+ - "People are exempt from health insurance fine if they make too little money to file an income tax return, or US citizens living abroad."; - ParserChunker2MatcherProcessor sm = ParserChunker2MatcherProcessor.getInstance(); - SentencePairMatchResult res1 = sm.assessRelevance(a, q); - System.out.print(res1.getMatchResult()); - System.out.print(res1); - assertTrue(res1!=null); - assertTrue(res1.getMatchResult().size()>0); - - } - - - -} - - http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/pattern_structure/PatternStructureTest.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/pattern_structure/PatternStructureTest.java b/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/pattern_structure/PatternStructureTest.java deleted file mode 100644 index 47324a2..0000000 --- a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/pattern_structure/PatternStructureTest.java +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.tools.parse_thicket.pattern_structure; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.LinkedHashSet; -import java.util.List; - -import junit.framework.TestCase; -import opennlp.tools.fca.ConceptLattice; -import opennlp.tools.fca.FcaWriter; -import opennlp.tools.fca.FormalConcept; -import opennlp.tools.similarity.apps.BingWebQueryRunner; -import opennlp.tools.similarity.apps.HitBase; -import opennlp.tools.textsimilarity.ParseTreeChunk; -import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor; - -public class PatternStructureTest extends TestCase{ - ParserChunker2MatcherProcessor chunk_maker = ParserChunker2MatcherProcessor.getInstance(); - LinguisticPatternStructure ps = new LinguisticPatternStructure(0,0); - BingWebQueryRunner bqr = new BingWebQueryRunner(); - - public void test6texts() { - - String text1 = "I rent an office space. This office is for my business. I can deduct office rental expense from my business profit to calculate net income."; - String text2 = "To run my business, I have to rent an office. The net business profit is calculated as follows. Rental expense needs to be subtracted from revenue."; - String text3 = "To store goods for my retail business I rent some space. When I calculate the net income, I take revenue and subtract business expenses such as office rent."; - String text4 = "I rent out a first floor unit of my house to a travel business. I need to add the rental income to my profit. However, when I repair my house, I can deduct the repair expense from my rental income."; - String text5 = "I receive rental income from my office. I have to claim it as a profit in my tax forms. I need to add my rental income to my profits, but subtract rental expenses such as repair from it."; - String text6 = "I advertised my property as a business rental. Advertisement and repair expenses can be subtracted from the rental income. Remaining rental income needs to be added to my profit and be reported as taxable profit. "; - - List<List<ParseTreeChunk>> chunks1 = chunk_maker.formGroupedPhrasesFromChunksForPara(text1); - List<List<ParseTreeChunk>> chunks2 = chunk_maker.formGroupedPhrasesFromChunksForPara(text2); - List<List<ParseTreeChunk>> chunks3 = chunk_maker.formGroupedPhrasesFromChunksForPara(text3); - List<List<ParseTreeChunk>> chunks4 = chunk_maker.formGroupedPhrasesFromChunksForPara(text4); - List<List<ParseTreeChunk>> chunks5 = chunk_maker.formGroupedPhrasesFromChunksForPara(text5); - List<List<ParseTreeChunk>> chunks6 = chunk_maker.formGroupedPhrasesFromChunksForPara(text6); - //ArrayList<ParseTreeChunk> lst = new ArrayList<ParseTreeChunk>(); - - - LinkedHashSet<Integer> obj = null; - obj = new LinkedHashSet<Integer>(); - obj.add(0); - ps.AddIntent(chunks1, obj, 0); - obj = new LinkedHashSet<Integer>(); - obj.add(1); - ps.AddIntent(chunks2, obj, 0); - obj = new LinkedHashSet<Integer>(); - obj.add(2); - ps.AddIntent(chunks3, obj, 0); - obj = new LinkedHashSet<Integer>(); - obj.add(3); - ps.AddIntent(chunks4, obj, 0); - obj = new LinkedHashSet<Integer>(); - obj.add(4); - ps.AddIntent(chunks5, obj, 0); - obj = new LinkedHashSet<Integer>(); - obj.add(5); - ps.AddIntent(chunks6, obj, 0); - - ps.logStability(); - ps.printLatticeExtended(); - - int [][] binaryContext = ps.toContext(3); - for (int i = 0; i < binaryContext.length; i++ ){ - System.out.println(Arrays.toString(binaryContext[i])); - } - - ConceptLattice new_cl = new ConceptLattice(binaryContext.length, binaryContext[0].length, binaryContext,true); - new_cl.printLatticeStats(); - new_cl.printLatticeFull(); - assertEquals(new_cl.getLattice().size(), 7); - assertEquals(new_cl.getAttributesCount(), 21); - assertEquals(new_cl.getObjectCount(), 3); - } - - // TEST 2 QUERY NEWS - public void testQueryNews(){ - List<List<ParseTreeChunk>> chunks = null; - BingWebQueryRunner bq = new BingWebQueryRunner(); - - String q = ""; -// q = "barack obama"; -// q = "lady gaga"; - q = "angela merkel"; -// q = "putin"; - ArrayList <HitBase> hb = (ArrayList<HitBase>) bq.runSearch(q, 10); - int cnt = 0; - for (HitBase news: hb){ - LinkedHashSet<Integer> obj = null; - obj = new LinkedHashSet<Integer>(); - obj.add(cnt); - chunks = chunk_maker.formGroupedPhrasesFromChunksForPara(news.getAbstractText()); - System.out.println(chunks); - ps.AddIntent(chunks,obj, 0); - cnt++; - } - - ps.logStability(); - System.out.println("LATTICE"); - ps.printLatticeExtended(); - - int [][] binaryContext = ps.toContext(cnt); - for (int i = 0; i < binaryContext.length; i++ ){ - System.out.println(Arrays.toString(binaryContext[i])); - } - - ConceptLattice new_cl = new ConceptLattice(binaryContext.length, binaryContext[0].length, binaryContext,true); - new_cl.printLatticeStats(); - new_cl.printLatticeFull(); - - FcaWriter wr = new FcaWriter(); - wr.WriteAsCxt("res.cxt", new_cl); - - System.out.println("Extent PS "+ps.conceptList.size()); - //for (int i = 0; i<ps.conceptList.size();i++){ - // System.out.println(ps.conceptList.get(i).extent); - //} - System.out.println("Extent CL "+new_cl.getLattice().size()); - //for (int i = 0; i<new_cl.getLattice().size();i++){ - // System.out.println(new_cl.getLattice().get(i).getExtent()); - //} - } - public void testNews(){ - List<List<ParseTreeChunk>> chunks = null; - - ArrayList <HitBase> result = (ArrayList<HitBase>) bqr.runSearch("site:http://news.yahoo.com " + "merkel", 10); - System.out.println(" ResultSize " + result.size()); - int ind = -1; - String text_result = ""; - for (int i = 0; i < result.size(); i++ ){ - System.out.println(result.get(i).getAbstractText()); - ind = result.get(i).getAbstractText().indexOf(") -"); - if (ind < 0) - ind = result.get(i)//.getDescription() - .getAbstractText().indexOf(") �"); - if (ind > 0) - text_result = result.get(i)//.getDescription() - .getAbstractText().substring(ind + 3); - else - text_result = result.get(i)//.getDescription() - .getAbstractText(); - - LinkedHashSet<Integer> obj = null; - obj = new LinkedHashSet<Integer>(); - obj.add(i); - chunks = chunk_maker.formGroupedPhrasesFromChunksForPara(text_result); - ps.AddIntent(chunks,obj, 0); - } - - ps.logStability(); - ps.printLatticeExtended(); - - int [][] binaryContext = ps.toContext(result.size()); - - ConceptLattice new_cl = new ConceptLattice(binaryContext.length, binaryContext[0].length, binaryContext,true); - - FcaWriter wt = new FcaWriter(); - wt.WriteStatsToTxt("merkel_stats.txt", new_cl, 0); - wt.WriteStatsToCvs("merkel_stats.csv", new_cl, ps.conceptList.size()); - wt.WriteAsCxt("merkel_lattice.cxt", new_cl); - - PatternStructureWriter pswt = new PatternStructureWriter(); - pswt.WriteStatsToTxt("ps_res.txt", ps); - - System.out.println("Extent PS "+ps.conceptList.size()); - System.out.println("Extent CL "+new_cl.getLattice().size()); - } - - - - -} http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/pattern_structure/PhrasePatternStructureTest.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/pattern_structure/PhrasePatternStructureTest.java b/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/pattern_structure/PhrasePatternStructureTest.java deleted file mode 100644 index 7a8cdec..0000000 --- a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/pattern_structure/PhrasePatternStructureTest.java +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.tools.parse_thicket.pattern_structure; - -import java.util.*; -import java.io.*; - -import junit.framework.TestCase; - -import opennlp.tools.parse_thicket.ParseCorefsBuilder; -import opennlp.tools.parse_thicket.ParseThicket; -import opennlp.tools.parse_thicket.ParseTreeNode; -import opennlp.tools.parse_thicket.matching.PT2ThicketPhraseBuilder; -import opennlp.tools.textsimilarity.ParseTreeChunk; -import opennlp.tools.textsimilarity.ParseTreeMatcherDeterministic; - - -public class PhrasePatternStructureTest extends TestCase{ - ParseTreeMatcherDeterministic md = new ParseTreeMatcherDeterministic(); - ParseCorefsBuilder ptBuilder = ParseCorefsBuilder.getInstance(); - PT2ThicketPhraseBuilder phraseBuilder = new PT2ThicketPhraseBuilder(); - - public void testLeoTolstoyTest() { - PhrasePatternStructure lat = new PhrasePatternStructure(3,1); - - String description; - ParseThicket pt1; - List<List<ParseTreeNode>> phrs1; - List<List<ParseTreeChunk>> sent1GrpLst; - //Example 1 - description = "Eh bien, mon prince, so Genoa and Lucca are now no more than family estates of the Bonapartes. No, I warn you, if you don�t say that this means war, if you still permit yourself to condone all the infamies, all the atrocities, of this Antichrist�and that�s what I really believe he is�I will have nothing more to do with you, you are no longer my friend, my faithful slave, as you say. But how do you do, how do you do? I see that I am frightening you. Sit down and tell me all about it."; - pt1 = ptBuilder.buildParseThicket(description); - phrs1 = phraseBuilder.buildPT2ptPhrases(pt1); - sent1GrpLst = lat.formGroupedPhrasesFromChunksForPara(phrs1); - lat.AddIntent(sent1GrpLst, 0); - - description = "Well, Prince, so Genoa and Lucca are now just family estates of the Buonapartes. But I warn you, if you don't tell me that this means war, if you still try to defend the infamies and horrors perpetrated by that Antichrist�I really believe he is Antichrist�I will have nothing more to do with you and you are no longer my friend, no longer my 'faithful slave,' as you call yourself! But how do you do? I see I have frightened you�sit down and tell me all the news"; - pt1 = ptBuilder.buildParseThicket(description); - phrs1 = phraseBuilder.buildPT2ptPhrases(pt1); - sent1GrpLst = lat.formGroupedPhrasesFromChunksForPara(phrs1); - lat.AddIntent(sent1GrpLst, 0); - - - description = "Well, Prince, Genoa and Lucca are now nothing more than estates taken over by the Buonaparte family.1 No, I give you fair warning. If you won�t say this means war, if you will allow yourself to condone all the ghastly atrocities perpetrated by that Antichrist � yes, that�s what I think he is � I shall disown you. You�re no friend of mine � not the �faithful slave� you claim to be . . . But how are you? How are you keeping? I can see I�m intimidating you. Do sit down and talk to me."; - pt1 = ptBuilder.buildParseThicket(description); - phrs1 = phraseBuilder.buildPT2ptPhrases(pt1); - sent1GrpLst = lat.formGroupedPhrasesFromChunksForPara(phrs1); - lat.AddIntent(sent1GrpLst, 0); - - description = "Well, prince, Genoa and Lucca are now nothing more than the apanages, than the private property of the Bonaparte family. I warn you that if you do not tell me we are going to have war, if you still allow yourself to condone all the infamies, all the atrocities of this Antichrist - on my word I believe he is Antichrist - that is the end of our acquaintance; you are no longer my friend, you are no longer my faithful slave, as you call yourself. Now, be of good courage, I see I frighten you. Come, sit down and tell me all about it."; - pt1 = ptBuilder.buildParseThicket(description); - phrs1 = phraseBuilder.buildPT2ptPhrases(pt1); - sent1GrpLst = lat.formGroupedPhrasesFromChunksForPara(phrs1); - lat.AddIntent(sent1GrpLst, 0); - - lat.printLattice(); - lat.printLatticeStats(); - } - //Example 2 - public void testNewsTest() { - PhrasePatternStructure lat = new PhrasePatternStructure(3,4); - - String description; - ParseThicket pt1; - List<List<ParseTreeNode>> phrs1; - List<List<ParseTreeChunk>> sent1GrpLst; - /*List<List<ParseTreeChunk>> res = m.assessRelevance("At least 9 people were killed and 43 others wounded in shootings and bomb attacks, including four car bombings, in central and western Iraq on Thursday, the police said. A car bomb parked near the entrance of the local government compound in Anbar's provincial capital of Ramadi, some 110 km west of Baghdad, detonated in the morning near a convoy of vehicles carrying the provincial governor Qassim al-Fahdawi, a provincial police source told Xinhua on condition of anonymity.", - "Officials say a car bomb in northeast Baghdad killed four people, while another bombing at a market in the central part of the capital killed at least two and wounded many more. Security officials also say at least two policemen were killed by a suicide car bomb attack in the northern city of Mosul. No group has claimed responsibility for the attacks, which occurred in both Sunni and Shi'ite neighborhoods." - );*/ - description = "At least 9 people were killed and 43 others wounded in shootings and bomb attacks, including four car bombings, in central and western Iraq on Thursday, the police said. A car bomb parked near the entrance of the local government compound in Anbar's provincial capital of Ramadi, some 110 km west of Baghdad, detonated in the morning near a convoy of vehicles carrying the provincial governor Qassim al-Fahdawi, a provincial police source told Xinhua on condition of anonymity."; - pt1 = ptBuilder.buildParseThicket(description); - phrs1 = phraseBuilder.buildPT2ptPhrases(pt1); - sent1GrpLst = lat.formGroupedPhrasesFromChunksForPara(phrs1); - lat.AddIntent(sent1GrpLst, 0); - - description = "Officials say a car bomb in northeast Baghdad killed four people, while another bombing at a market in the central part of the capital killed at least two and wounded many more. Security officials also say at least two policemen were killed by a suicide car bomb attack in the northern city of Mosul. No group has claimed responsibility for the attacks, which occurred in both Sunni and Shi'ite neighborhoods."; - pt1 = ptBuilder.buildParseThicket(description); - phrs1 = phraseBuilder.buildPT2ptPhrases(pt1); - sent1GrpLst = lat.formGroupedPhrasesFromChunksForPara(phrs1); - lat.AddIntent(sent1GrpLst, 0); - - description = "Two car bombs killed at least four people and wounded dozens of others on Monday in one of the bloodiest attacks this year in Dagestan, a turbulent province in Russia's North Caucasus region where armed groups are waging an Islamist insurgency. Car bombs, suicide bombings and firefights are common in Dagestan, at the centre of an insurgency rooted in two post-Soviet wars against separatist rebels in neighbouring Chechnya. Such attacks are rare in other parts of Russia, but in a separate incident in a suburb of Moscow on Monday, security forces killed two suspected militants alleged to have been plotting an attack in the capital and arrested a third suspect after a gunbattle"; - // Description = "AMMAN, Jordan (AP) � A Syrian government official says a car bomb has exploded in a suburb of the capital Damascus, killing three people and wounding several others. The Britain-based Syrian Observatory for Human Rights confirmed the Sunday explosion in Jouber, which it said has seen heavy clashes recently between rebels and the Syrian army. It did not have any immediate word on casualties. It said the blast targeted a police station and was carried out by the Jabhat al-Nusra, a militant group linked to al-Qaida, did not elaborate."; - // Description = "A car bombing in Damascus has killed at least nine security forces, with aid groups urging the evacuation of civilians trapped in the embattled Syrian town of Qusayr. The Syrian Observatory for Human Rights said on Sunday the explosion, in the east of the capital, appeared to have been carried out by the extremist Al-Nusra Front, which is allied to al-Qaeda, although there was no immediate confirmation. In Lebanon, security sources said two rockets fired from Syria landed in a border area, and Israeli war planes could be heard flying low over several parts of the country."; - pt1 = ptBuilder.buildParseThicket(description); - phrs1 = phraseBuilder.buildPT2ptPhrases(pt1); - sent1GrpLst = lat.formGroupedPhrasesFromChunksForPara(phrs1); - lat.AddIntent(sent1GrpLst, 0); - - - - lat.printLattice(); - lat.printConceptByPosition(0); - /* - Set<Integer> intent = new HashSet<Integer>(); - intent.add(0); - intent.add(1); - int gen = lat.GetMaximalConcept(intent,0); - System.out.println("generator: " + gen); - intent.clear(); - intent.add(0); - intent.add(3); - - lat.AddIntent(intent, 0); - //System.out.println("after first addintent"); - //lat.printConceptByPosition(0); - //lat.printConceptByPosition(1); - intent.clear(); - intent.add(0); - intent.add(2); - lat.AddIntent(intent, 0); - - intent.clear(); - intent.add(1); - intent.add(2); - - lat.AddIntent(intent, 0); - intent.clear(); - intent.add(tes1); - intent.add(2); - intent.add(3); - lat.AddIntent(intent, 0); - lat.printLattice(); - lat.printLatticeStats(); - */ - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureMarkerTest.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureMarkerTest.java b/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureMarkerTest.java deleted file mode 100644 index 4a39a7f..0000000 --- a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureMarkerTest.java +++ /dev/null @@ -1,67 +0,0 @@ -package opennlp.tools.parse_thicket.rhetoric_structure; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import junit.framework.TestCase; - -import opennlp.tools.parse_thicket.IGeneralizer; -import opennlp.tools.parse_thicket.Pair; -import opennlp.tools.parse_thicket.ParseThicket; -import opennlp.tools.parse_thicket.ParseTreeNode; -import opennlp.tools.parse_thicket.matching.Matcher; -import opennlp.tools.textsimilarity.ParseTreeChunk; - - -public class RhetoricStructureMarkerTest extends TestCase { - - private RhetoricStructureMarker rstMarker = new RhetoricStructureMarker(); - private Matcher matcher = new Matcher(); - - public RhetoricStructureMarkerTest(){ - - - } - - public void testRSTmarker(){ - String text1 = "As a US citizen living abroad, I am concerned about the health reform regulation of 2014. "+ - "I do not want to wait till I am sick to buy health insurance. "+ - "Yet I am afraid I will end up being requested to pay the tax. "+ - "Although I live abroad, I am worried about having to pay a fine for being reported as not having health insurance coverage. "; - - String text2 = "People are worried about paying a fine for not carrying health insurance coverage, having been informed by IRS about new regulations. "+ - "Yet hardly anyone is expected to pay the tax, when the health reform law takes full effect in 2014. "+ - "The individual mandate confirms that people don�t wait until they are sick to buy health insurance. "+ - "People are exempt from health insurance fine as long as they report they make too little money, or US citizens living abroad."; - ParseThicket pt = matcher.buildParseThicketFromTextWithRST(text1); - for(List<ParseTreeNode> sent: pt.getNodesThicket()){ - List<Pair<String, Integer[]>> res = rstMarker .extractRSTrelationInSentenceGetBoundarySpan(sent); - System.out.println(rstMarker.markerToString(res)); - } - - //assertTrue(res.size()>1); - - - pt = matcher.buildParseThicketFromTextWithRST(text2); - for(List<ParseTreeNode> sent: pt.getNodesThicket()){ - List<Pair<String, Integer[]>> res = rstMarker .extractRSTrelationInSentenceGetBoundarySpan(sent); - System.out.println(rstMarker.markerToString(res)); - } - - } - - public void testLocal(){ - ParseTreeNode[] sent = - new ParseTreeNode[]{new ParseTreeNode("he","prn"), new ParseTreeNode("was","vbz"), new ParseTreeNode("more","jj"), - new ParseTreeNode(",",","), new ParseTreeNode("than",","), new ParseTreeNode("little","jj"), new ParseTreeNode("boy","nn"), - new ParseTreeNode(",",","), new ParseTreeNode("however","*"), new ParseTreeNode(",",","), - new ParseTreeNode("he","prp"), new ParseTreeNode("was","vbz"), new ParseTreeNode("adult","jj") - }; - - List<Pair<String, Integer[]>> res = rstMarker.extractRSTrelationInSentenceGetBoundarySpan(Arrays.asList(sent)); - assertTrue(res.size()>2); - assertTrue(res.get(0).getFirst().startsWith("contrast")); - System.out.println(rstMarker.markerToString(res)); - } -}
