http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/EdgeProductBuilder.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/EdgeProductBuilder.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/EdgeProductBuilder.java deleted file mode 100644 index 6b72e47..0000000 --- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/EdgeProductBuilder.java +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package opennlp.tools.parse_thicket.parse_thicket2graph; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.List; -import java.util.Set; - -import opennlp.tools.parse_thicket.ParseCorefsBuilder; -import opennlp.tools.parse_thicket.ParseThicket; -import opennlp.tools.parse_thicket.ParseTreeNode; -import opennlp.tools.parse_thicket.matching.Matcher; -import opennlp.tools.textsimilarity.ParseTreeChunk; - -import org.jgrapht.Graph; -import org.jgrapht.alg.BronKerboschCliqueFinder; -import org.jgrapht.graph.DefaultEdge; -import org.jgrapht.graph.SimpleGraph; - - -public class EdgeProductBuilder { - private Matcher matcher = new Matcher(); - private ParseCorefsBuilder ptBuilder = ParseCorefsBuilder.getInstance(); - private GraphFromPTreeBuilder graphBuilder = new GraphFromPTreeBuilder(); - - - public Graph<ParseGraphNode[], DefaultEdge> - buildEdgeProduct(Graph<ParseGraphNode, DefaultEdge> g1, Graph<ParseGraphNode, DefaultEdge> g2 ){ - Graph<ParseGraphNode[], DefaultEdge> gp = - new SimpleGraph<ParseGraphNode[], DefaultEdge>(DefaultEdge.class); - - Set<DefaultEdge> edges1 = g1.edgeSet(); - Set<DefaultEdge> edges2 = g2.edgeSet(); - // build nodes of product graph - for(DefaultEdge e1:edges1){ - for(DefaultEdge e2:edges2){ - ParseGraphNode sourceE1s = g1.getEdgeSource(e1), sourceE1t = g1.getEdgeTarget(e1); - ParseGraphNode sourceE2s = g2.getEdgeSource(e2), sourceE2t = g2.getEdgeTarget(e2); - - if (isNotEmpty(matcher.generalize(sourceE1s.getPtNodes(), sourceE2s.getPtNodes())) && - isNotEmpty(matcher.generalize(sourceE1t.getPtNodes(), sourceE2t.getPtNodes())) - ) - gp.addVertex(new ParseGraphNode[] {sourceE1s, sourceE1t, sourceE2s, sourceE2t } ); - } - } - - Set<ParseGraphNode[]> productVerticesSet = gp.vertexSet(); - List<ParseGraphNode[]> productVerticesList = new ArrayList<ParseGraphNode[]>(productVerticesSet); - for(int i=0; i<productVerticesList.size(); i++){ - for(int j=i+1; j<productVerticesList.size(); j++){ - ParseGraphNode[] prodVertexI = productVerticesList.get(i); - ParseGraphNode[] prodVertexJ = productVerticesList.get(j); - if (bothAjacentOrNeitherAdjacent(prodVertexI, prodVertexJ)){ - gp.addEdge(prodVertexI, prodVertexJ); - } - } - } - - - return gp; - - } - /* - * Finding the maximal clique is the slowest part - */ - - public Collection<Set<ParseGraphNode[]>> getMaximalCommonSubgraphs(Graph<ParseGraphNode[], DefaultEdge> g){ - BronKerboschCliqueFinder<ParseGraphNode[], DefaultEdge> finder = - new BronKerboschCliqueFinder<ParseGraphNode[], DefaultEdge>(g); - - Collection<Set<ParseGraphNode[]>> cliques = finder.getBiggestMaximalCliques(); - return cliques; - } - - - private boolean bothAjacentOrNeitherAdjacent(ParseGraphNode[] prodVertexI, - ParseGraphNode[] prodVertexJ) { - List<ParseGraphNode> prodVertexIlist = - new ArrayList<ParseGraphNode>(Arrays.asList(prodVertexI)); - List<ParseGraphNode> prodVertexJlist = - new ArrayList<ParseGraphNode>(Arrays.asList(prodVertexJ)); - prodVertexIlist.retainAll(prodVertexJlist); - return (prodVertexIlist.size()==2 || prodVertexIlist.size()==4); - } - - - private boolean isNotEmpty(List<List<ParseTreeChunk>> generalize) { - if (generalize!=null && generalize.get(0)!=null && generalize.get(0).size()>0) - return true; - else - return false; - } - - public Collection<Set<ParseGraphNode[]>> assessRelevanceViaMaximalCommonSubgraphs(String para1, String para2) { - // first build PTs for each text - ParseThicket pt1 = ptBuilder.buildParseThicket(para1); - ParseThicket pt2 = ptBuilder.buildParseThicket(para2); - // then build phrases and rst arcs - Graph<ParseGraphNode, DefaultEdge> g1 = graphBuilder.buildGraphFromPT(pt1); - Graph<ParseGraphNode, DefaultEdge> g2 = graphBuilder.buildGraphFromPT(pt2); - - Graph<ParseGraphNode[], DefaultEdge> gp = buildEdgeProduct(g1, g2); - Collection<Set<ParseGraphNode[]>> col = getMaximalCommonSubgraphs(gp); - return col; - } - - public static void main(String[] args){ - EdgeProductBuilder b = new EdgeProductBuilder(); - Collection<Set<ParseGraphNode[]>> col = b.assessRelevanceViaMaximalCommonSubgraphs("Iran refuses to accept the UN proposal to end its dispute over its work on nuclear weapons."+ - "UN nuclear watchdog passes a resolution condemning Iran for developing its second uranium enrichment site in secret. " + - "A recent IAEA report presented diagrams that suggested Iran was secretly working on nuclear weapons. " + - "Iran envoy says its nuclear development is for peaceful purpose, and the material evidence against it has been fabricated by the US. " - - , "Iran refuses the UN offer to end a conflict over its nuclear weapons."+ - "UN passes a resolution prohibiting Iran from developing its uranium enrichment site. " + - "A recent UN report presented charts saying Iran was working on nuclear weapons. " + - "Iran envoy to UN states its nuclear development is for peaceful purpose, and the evidence against its claim is fabricated by the US. "); - System.out.print(col); - } -} -
http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/GraphFromPTreeBuilder.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/GraphFromPTreeBuilder.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/GraphFromPTreeBuilder.java deleted file mode 100644 index d19d7db..0000000 --- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/GraphFromPTreeBuilder.java +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package opennlp.tools.parse_thicket.parse_thicket2graph; - -import java.io.PrintWriter; -import java.util.List; - -import opennlp.tools.parse_thicket.PTTree; -import opennlp.tools.parse_thicket.ParseThicket; -import opennlp.tools.parse_thicket.ParseTreeNode; -import org.jgrapht.Graph; -import org.jgrapht.graph.DefaultDirectedWeightedGraph; -import org.jgrapht.graph.DefaultEdge; -import org.jgrapht.graph.SimpleGraph; - - -import edu.stanford.nlp.trees.LabeledScoredTreeNode; -import edu.stanford.nlp.trees.Tree; - -public class GraphFromPTreeBuilder { - - - public Graph<ParseGraphNode, DefaultEdge> buildGraphFromPT(ParseThicket pt){ - PrintWriter out = new PrintWriter(System.out); - - - List<Tree> ts = pt.getSentences(); - ts.get(0).pennPrint(out); - Graph<ParseGraphNode, DefaultEdge> gfragment = buildGGraphFromTree(ts.get(0)); - - //ParseTreeVisualizer applet = new ParseTreeVisualizer(); - //applet.showGraph(gfragment); - - return gfragment; - - } - - - private Graph<ParseGraphNode, DefaultEdge> buildGGraphFromTree(Tree tree) { - Graph<ParseGraphNode, DefaultEdge> g = - new SimpleGraph<ParseGraphNode, DefaultEdge>(DefaultEdge.class); - ParseGraphNode root = new ParseGraphNode(tree,"S 0"); - g.addVertex(root); - navigate(tree, g, 0, root); - - return g; - } - - - - private void navigate(Tree tree, Graph<ParseGraphNode, DefaultEdge> g, int l, ParseGraphNode currParent) { - //String currParent = tree.label().value()+" $"+Integer.toString(l); - //g.addVertex(currParent); - if (tree.getChildrenAsList().size()==1) - navigate(tree.getChildrenAsList().get(0), g, l+1, currParent); - else - if (tree.getChildrenAsList().size()==0) - return; - - for(Tree child: tree.getChildrenAsList()){ - String currChild = null; - ParseGraphNode currChildNode = null; - try { - if (child.isLeaf()) - continue; - if (child.label().value().startsWith("S")) - navigate(child.getChildrenAsList().get(0), g, l+1, currParent); - - if (!child.isPhrasal() || child.isPreTerminal()) - currChild = child.toString()+" #"+Integer.toString(l); - else - currChild = child.label().value()+" #"+Integer.toString(l); - currChildNode = new ParseGraphNode(child, currChild); - g.addVertex(currChildNode); - g.addEdge(currParent, currChildNode); - } catch (Exception e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - navigate(child, g, l+1, currChildNode); - } - } - - - /* - private static void navigateChildren(PTTree[] trChildren, int indent, boolean parentLabelNull, boolean onlyLabelValue, List<LabeledScoredTreeNode> phrases) { - boolean firstSibling = true; - boolean leftSibIsPreTerm = true; // counts as true at beginning - for (PTTree currentTree : trChildren) { - currentTree.navigate(indent, parentLabelNull, firstSibling, leftSibIsPreTerm, false, onlyLabelValue, phrases); - leftSibIsPreTerm = currentTree.isPreTerminal(); - // CC is a special case for English, but leave it in so we can exactly match PTB3 tree formatting - if (currentTree.value() != null && currentTree.value().startsWith("CC")) { - leftSibIsPreTerm = false; - } - firstSibling = false; - } - } - - - private void navigate(int indent, boolean parentLabelNull, boolean firstSibling, boolean leftSiblingPreTerminal, boolean topLevel, boolean onlyLabelValue, List<LabeledScoredTreeNode> phrases) { - // the condition for staying on the same line in Penn Treebank - boolean suppressIndent = (parentLabelNull || (firstSibling && isPreTerminal()) || (leftSiblingPreTerminal && isPreTerminal() && (label() == null || !label().value().startsWith("CC")))); - if (suppressIndent) { - //pw.print(" "); - // pw.flush(); - } else { - if (!topLevel) { - //pw.println(); - } - for (int i = 0; i < indent; i++) { - //pw.print(" "); - // pw.flush(); - } - } - if (isLeaf() || isPreTerminal()) { - String terminalString = toStringBuilder(new StringBuilder(), onlyLabelValue).toString(); - //pw.print(terminalString); - //pw.flush(); - return; - } - //pw.print("("); - String nodeString = onlyLabelValue ? value() : nodeString(); - //pw.print(nodeString); - // pw.flush(); - boolean parentIsNull = label() == null || label().value() == null; - navigateChildren(children(), indent + 1, parentIsNull, true, phrases); - //pw.print(")"); - - } - */ - -} http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/ParseGraphNode.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/ParseGraphNode.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/ParseGraphNode.java deleted file mode 100644 index 6f9c3ea..0000000 --- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/ParseGraphNode.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package opennlp.tools.parse_thicket.parse_thicket2graph; - -import java.util.List; - -import opennlp.tools.parse_thicket.ParseTreeNode; -import opennlp.tools.parse_thicket.matching.PT2ThicketPhraseBuilder; - - -import edu.stanford.nlp.trees.Tree; - -public class ParseGraphNode { - PT2ThicketPhraseBuilder phraseBuilder = new PT2ThicketPhraseBuilder(); - - private Tree tree; - private String label; - private List<List<ParseTreeNode>> ptNodes; - - - - public List<List<ParseTreeNode>> getPtNodes() { - return ptNodes; - } - - public ParseGraphNode(Tree tree, String label) { - super(); - this.tree = tree; - this.label = label; - ptNodes = phraseBuilder.buildPT2ptPhrasesForASentence(tree, null); - } - - public Tree getTree() { - return tree; - } - - public void setTree(Tree tree) { - this.tree = tree; - } - - public String getLabel() { - return label; - } - - public void setLabel(String label) { - this.label = label; - } - - public String toString(){ - return label; - } -} - http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/ParseTreeVisualizer.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/ParseTreeVisualizer.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/ParseTreeVisualizer.java deleted file mode 100644 index 71c1fa3..0000000 --- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/ParseTreeVisualizer.java +++ /dev/null @@ -1,187 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------- - * JGraphAdapterDemo.java - * ---------------------- - * (C) Copyright 2003-2008, by Barak Naveh and Contributors. - * - * Original Author: Barak Naveh - * Contributor(s): - - * - * $Id: JGraphAdapterDemo.java 725 2010-11-26 01:24:28Z perfecthash $ - * - * Changes - * ------- - * 03-Aug-2003 : Initial revision (BN); - * 07-Nov-2003 : Adaptation to JGraph 3.0 (BN); - * - */ -package opennlp.tools.parse_thicket.parse_thicket2graph; - -import java.awt.*; -import java.awt.geom.*; -import java.util.HashMap; -import java.util.Map; -import java.util.Set; - -import javax.swing.*; - - -import org.jgraph.*; -import org.jgraph.graph.*; - -import org.jgrapht.*; -import org.jgrapht.ext.*; -import org.jgrapht.graph.*; - - -import org.jgrapht.graph.DefaultEdge; - -public class ParseTreeVisualizer -extends JApplet -{ - //~ Static fields/initializers --------------------------------------------- - - private static final long serialVersionUID = 3256346823498765434L; - private static final Color DEFAULT_BG_COLOR = Color.decode("#FAFBFF"); - private static final Dimension DEFAULT_SIZE = new Dimension(1200, 800); - - //~ Instance fields -------------------------------------------------------- - - // - private JGraphModelAdapter<String, DefaultEdge> jgAdapter; - - public void showGraph(Graph g){ - ParseTreeVisualizer applet = new ParseTreeVisualizer(); - applet.importGraph(g); - - JFrame frame = new JFrame(); - frame.getContentPane().add(applet); - frame.setTitle("Showing parse thicket"); - frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); - frame.pack(); - frame.setVisible(true); - } - - // TODO cast to ParseGraphNode - private void importGraph(Graph g) { - // create a visualization using JGraph, via an adapter - jgAdapter = new JGraphModelAdapter<String, DefaultEdge>(g); - - JGraph jgraph = new JGraph(jgAdapter); - - adjustDisplaySettings(jgraph); - getContentPane().add(jgraph); - resize(DEFAULT_SIZE); - - Set<String> vertexSet = ( Set<String>)g.vertexSet(); - int count=0; - Map<Integer, Integer> level_count = new HashMap<Integer, Integer> (); - - for(String vertexStr: vertexSet){ - Integer key = 0; - try { - if (vertexStr.indexOf('#')>-1) - key = Integer.parseInt(vertexStr.split("#")[1]); - } catch (Exception e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - Integer howManyAlready = 0; - - if (key>0){ - howManyAlready = level_count.get(key); - if (howManyAlready==null){ - howManyAlready=0; - level_count.put(key, 1); - } else { - level_count.put(key, howManyAlready+1); - } - } - positionVertexAt(vertexStr, count+howManyAlready*50, count); - count+=20; - } - - - } - - /** - * An alternative starting point for this demo, to also allow running this - * applet as an application. - * - * @param args ignored. - */ - public static void main(String [] args) - { - ParseTreeVisualizer applet = new ParseTreeVisualizer(); - applet.init(); - - JFrame frame = new JFrame(); - frame.getContentPane().add(applet); - frame.setTitle("JGraphT Adapter to JGraph Demo"); - frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); - frame.pack(); - frame.setVisible(true); - } - - - - private void adjustDisplaySettings(JGraph jg) - { - jg.setPreferredSize(DEFAULT_SIZE); - - Color c = DEFAULT_BG_COLOR; - String colorStr = null; - - try { - colorStr = getParameter("bgcolor"); - } catch (Exception e) { - } - - if (colorStr != null) { - c = Color.decode(colorStr); - } - - jg.setBackground(c); - } - - @SuppressWarnings("unchecked") // FIXME hb 28-nov-05: See FIXME below - private void positionVertexAt(Object vertex, int x, int y) - { - DefaultGraphCell cell = jgAdapter.getVertexCell(vertex); - AttributeMap attr = cell.getAttributes(); - Rectangle2D bounds = GraphConstants.getBounds(attr); - - Rectangle2D newBounds = - new Rectangle2D.Double( - x, - y, - bounds.getWidth(), - bounds.getHeight()); - - GraphConstants.setBounds(attr, newBounds); - - // TODO: Clean up generics once JGraph goes generic - AttributeMap cellAttr = new AttributeMap(); - cellAttr.put(cell, attr); - jgAdapter.edit(cellAttr, null, null, null); - } - -} - -// End JGraphAdapterDemo.java http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/request_response_recognizer/TreeKernelBasedRecognizerOfRequest_Response.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/request_response_recognizer/TreeKernelBasedRecognizerOfRequest_Response.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/request_response_recognizer/TreeKernelBasedRecognizerOfRequest_Response.java deleted file mode 100644 index e33e089..0000000 --- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/request_response_recognizer/TreeKernelBasedRecognizerOfRequest_Response.java +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package opennlp.tools.parse_thicket.request_response_recognizer; - - -import java.util.ArrayList; -import java.util.List; - -import opennlp.tools.parse_thicket.ParseThicket; -import opennlp.tools.parse_thicket.VerbNetProcessor; -import opennlp.tools.parse_thicket.external_rst.MatcherExternalRST; -import opennlp.tools.parse_thicket.external_rst.ParseThicketWithDiscourseTree; -import opennlp.tools.parse_thicket.kernel_interface.TreeKernelBasedClassifierMultiplePara; - -/* - * This class performs TK learning based on parse thicket which includes RST relations only - * based on Surdeanu at al RST parser. It does sentence parsing and NLP pipeline of - * Surdeanu's wrapper of Stanford NLP - */ -public class TreeKernelBasedRecognizerOfRequest_Response extends TreeKernelBasedClassifierMultiplePara{ - - private MatcherExternalRST matcherRST = new MatcherExternalRST(); - - protected List<String> formTreeKernelStructuresMultiplePara(List<String> texts, String flag) { - //TODO - this.setShortRun(); - List<String> extendedTreesDumpTotal = new ArrayList<String>(); - try { - - for(String text: texts){ - // get the parses from original documents, and form the training dataset - try { - System.out.print("About to build pt with external rst from "+text + "\n..."); - ParseThicket pt = matcherRST.buildParseThicketFromTextWithRST(text); - if (pt == null) - continue; - System.out.print("About to build extended forest with external rst..."); - List<String> extendedTreesDump = // use direct option (true - buildReptresentationForDiscourseTreeAndExtensions((ParseThicketWithDiscourseTree)pt, true); - for(String line: extendedTreesDump) - extendedTreesDumpTotal.add(flag + " |BT| "+line + " |ET| "); - System.out.println("DONE"); - } catch (Exception e) { - e.printStackTrace(); - } - } - } catch (Exception e) { - e.printStackTrace(); - } - return extendedTreesDumpTotal; - } - - private List<String> buildReptresentationForDiscourseTreeAndExtensions(ParseThicketWithDiscourseTree pt, boolean bDirectDT){ - List<String> extendedTreesDump = new ArrayList<String>(); - if (!bDirectDT) - // option 1: use RST relation for extended trees - extendedTreesDump = treeExtender.buildForestForRSTArcs(pt); - else { - // option 2: use DT directly - extendedTreesDump.add(pt.getDtDump()); - extendedTreesDump.add(pt.getDtDumpWithPOS()); - extendedTreesDump.add(pt.getDtDumpWithEmbeddedTrees()); - extendedTreesDump.add(pt.getDtDumpWithVerbNet()); - } - return extendedTreesDump; - } - - public static void main(String[] args){ - VerbNetProcessor p = VerbNetProcessor. - getInstance("/Users/bgalitsky/Documents/relevance-based-on-parse-trees/src/test/resources"); - - TreeKernelBasedRecognizerOfRequest_Response proc = new TreeKernelBasedRecognizerOfRequest_Response(); - proc.setKernelPath("/Users/bgalitsky/Documents/relevance-based-on-parse-trees/src/test/resources/tree_kernel/"); - proc.trainClassifier( - YahooAnswersTrainingSetCreator.origFilesDir, - YahooAnswersTrainingSetCreator.origFilesDir.replace("/text", "/neg_text") - ); - } - -} http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/request_response_recognizer/YahooAnswersTrainingSetCreator.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/request_response_recognizer/YahooAnswersTrainingSetCreator.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/request_response_recognizer/YahooAnswersTrainingSetCreator.java deleted file mode 100644 index c060c95..0000000 --- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/request_response_recognizer/YahooAnswersTrainingSetCreator.java +++ /dev/null @@ -1,118 +0,0 @@ -package opennlp.tools.parse_thicket.request_response_recognizer; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import opennlp.tools.similarity.apps.BingQueryRunner; - -import org.apache.commons.io.FileUtils; - -public class YahooAnswersTrainingSetCreator { - protected List<File> queuePos = new ArrayList<File>(), queueNeg = new ArrayList<File>(); - public static String origFilesDir = "/Users/bgalitsky/Downloads/NewCategoryIdentification/text"; - //private BingQueryRunner searcher = new BingQueryRunner(); - protected void addFilesPos(File file) { - - if (!file.exists()) { - System.out.println(file + " does not exist."); - } - if (file.isDirectory()) { - for (File f : file.listFiles()) { - addFilesPos(f); - System.out.println(f.getName()); - } - } else { - queuePos.add(file); - } - } - - protected void addFilesNeg(File file) { - - if (!file.exists()) { - System.out.println(file + " does not exist."); - } - if (file.isDirectory()) { - for (File f : file.listFiles()) { - addFilesNeg(f); - System.out.println(f.getName()); - } - } else { - queueNeg.add(file); - } - } - - public void formNegTrainingSet(String posPath , String negPath){ - if (!new File(negPath).exists()) - new File(negPath).mkdir(); - - addFilesPos(new File(posPath)); - for(int i=0; i< queuePos.size()-1; i+=2){ //take two files at a time - File f1 = queuePos.get(i), f2 = queuePos.get(i+1); - String content1 = null, content2 = null; - try { - content1 = FileUtils.readFileToString(f1); - content2 = FileUtils.readFileToString(f2); - } catch (IOException e) { - e.printStackTrace(); - } - String[] portions1 = content1.split("\n\n"); - String[] portions2 = content2.split("\n\n"); - - portions1 = splitIntoRR(portions1, content1); - portions2 = splitIntoRR(portions2, content2); - if (portions1==null || portions2==null) - continue; - // do cross-breeding - try { - FileUtils.writeStringToFile(new File(negPath+"/" + f1.getName()+".txt"), - portions1[0] + "\n\n" + portions2[1] ); - FileUtils.writeStringToFile(new File(negPath+"/" + f2.getName()+".txt"), - portions2[0] + "\n\n" + portions1[1] ); - } catch (IOException e) { - e.printStackTrace(); - } - } - - - } - private String[] splitIntoRR(String[] portions, String content) { - if (portions.length<2 ){ - portions = content.replace("?","#_#").split("#_#"); - } - if (portions.length<2 ){ - portions = content.split("\n"); - } - if (portions.length<2) - return null; - if (portions.length>2){ - String q= "", a = ""; - boolean bQ = true; - for(int p=0; p<portions.length; p++){ - if ( bQ ) - q+=portions[p]+" \n"; - else - a +=portions[p]+" \n"; - - if (portions[p].endsWith("?")){ - bQ=false; - } - - } - if (!bQ) { - portions = new String[2]; - portions[0] = q; - portions[1] = a; - } else - return null; - } - - return portions; - } - - public static void main(String[] args){ - String dir = YahooAnswersTrainingSetCreator.origFilesDir; - new YahooAnswersTrainingSetCreator().formNegTrainingSet(dir, dir.replace("/text", "/neg_text")); - } -} http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureArcsBuilder.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureArcsBuilder.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureArcsBuilder.java deleted file mode 100644 index 96bec44..0000000 --- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureArcsBuilder.java +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package opennlp.tools.parse_thicket.rhetoric_structure; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import opennlp.tools.parse_thicket.ArcType; -import opennlp.tools.parse_thicket.Pair; -import opennlp.tools.parse_thicket.ParseThicket; -import opennlp.tools.parse_thicket.ParseTreeNode; -import opennlp.tools.parse_thicket.WordWordInterSentenceRelationArc; - -import org.jgrapht.Graph; -import org.jgrapht.graph.DefaultEdge; -import org.jgrapht.graph.SimpleGraph; - - -import edu.stanford.nlp.trees.Tree; - -public class RhetoricStructureArcsBuilder { - private RhetoricStructureMarker markerBuilderForSentence = new RhetoricStructureMarker(); - - private Map<Integer, List<Pair<String, Integer[]>>> buildMarkers(ParseThicket pt){ - - Map<Integer, List<Pair<String, Integer[]>>> sentNumMarkers = new - HashMap<Integer, List<Pair<String, Integer[]>>>(); - int count = 0; - for( List<ParseTreeNode> sent: pt.getNodesThicket()){ - List<Pair<String, Integer[]>> markersForSentence = markerBuilderForSentence. - extractRSTrelationInSentenceGetBoundarySpan(sent); - sentNumMarkers.put(count, markersForSentence); - count++; - } - return sentNumMarkers; - } - - - /* - * Induced RST algorithm - * - * Input: obtained RST markers (numbers of words which - * splits sentence in potential RST relation arguments) + - * Current Parse Thicket with arcs for coreferences - * - * We search for parts of sentences on the opposite side of RST markers - * - * $sentPosFrom$ marker - * | == == == [ ] == == == | - * \ \ - * \ \ - * coref RST arc being formed - * \ \ - * \ \ - * | == == == == == [ ] == == ==| - * - * Mark yelled at his dog, but it disobeyed - * | \ - * coref RST arc for CONTRAST being formed - * | \ - * He was upset, however he did not show it - * $sentPosTo$ - */ - public List<WordWordInterSentenceRelationArc> buildRSTArcsFromMarkersAndCorefs( - List<WordWordInterSentenceRelationArc> arcs, - Map<Integer, List<List<ParseTreeNode>>> sentNumPhrasesMap, - ParseThicket pt ) { - List<WordWordInterSentenceRelationArc> arcsRST = new ArrayList<WordWordInterSentenceRelationArc>(); - - Map<Integer, List<Pair<String, Integer[]>>> rstMarkersMap = buildMarkers(pt); - - for(int nSentFrom=0; nSentFrom<pt.getSentences().size(); nSentFrom++){ - for(int nSentTo=nSentFrom+1; nSentTo<pt.getSentences().size(); nSentTo++){ - // for given arc, find phrases connected by this arc and add to the list of phrases - - List<List<ParseTreeNode>> phrasesFrom = sentNumPhrasesMap.get(nSentFrom); - List<List<ParseTreeNode>> phrasesTo = sentNumPhrasesMap.get(nSentTo); - List<Pair<String, Integer[]>> markersFrom = rstMarkersMap.get(nSentFrom); - List<Pair<String, Integer[]>> markersTo = rstMarkersMap.get(nSentTo); - for(WordWordInterSentenceRelationArc arc: arcs){ - // arc should be coref and link these sentences - if (nSentFrom != arc.getCodeFrom().getFirst() || - nSentTo != arc.getCodeTo().getFirst() || - !arc.getArcType().getType().startsWith("coref") - ) - continue; - int sentPosFrom = arc.getCodeFrom().getSecond(); - int sentPosTo = arc.getCodeTo().getSecond(); - // not more than a single RST link for a pair of sentences - boolean bFound = false; - for(List<ParseTreeNode> vpFrom: phrasesFrom){ - if (bFound) - break; - for(List<ParseTreeNode> vpTo: phrasesTo){ - for(Pair<String, Integer[]> mFrom: markersFrom){ - for(Pair<String, Integer[]> mTo: markersTo) { - { - // the phrases should be on an opposite side of rst marker for a coref link - if (isSequence( new Integer[] { sentPosFrom, vpFrom.get(0).getId(), mFrom.getSecond()[0]}) && - isSequence( new Integer[] { sentPosTo, vpTo.get(0).getId(), mTo.getSecond()[0]}) ){ - ArcType arcType = new ArcType("rst", mFrom.getFirst(), 0, 0); - - WordWordInterSentenceRelationArc arcRST = - new WordWordInterSentenceRelationArc(new Pair<Integer, Integer>(nSentFrom, mFrom.getSecond()[1]), - new Pair<Integer, Integer>(nSentTo, mTo.getSecond()[1]), "", "", arcType); - arcsRST.add(arcRST); - bFound = true; - break; - } - } - } - } - } - } - } - } - } - - return arcs; - } - -// check if the word positions occur in sentence in the order Integer[] -// TODO make more sensitive algo - private static boolean isSequence(Integer[] integers) { - //TODO better construction of array - if (integers==null || integers.length<3) - return false; - try { - for(Integer i: integers) - if (i==0) - return false; - } catch (Exception e) { - return false; - } - - Boolean bWrongOrder = false; - for(int i=1; i< integers.length; i++){ - if (integers[i-1]>integers[i]){ - bWrongOrder = true; - break; - } - } - - Boolean bWrongInverseOrder = false; - for(int i=1; i< integers.length; i++){ - if (integers[i-1]<integers[i]){ - bWrongInverseOrder = true; - break; - } - } - - return !(bWrongOrder && bWrongInverseOrder); - } - - - - public static void main(String[] args){ - - - } -} http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureMarker.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureMarker.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureMarker.java deleted file mode 100644 index 3b1c576..0000000 --- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureMarker.java +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package opennlp.tools.parse_thicket.rhetoric_structure; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import opennlp.tools.parse_thicket.IGeneralizer; -import opennlp.tools.parse_thicket.Pair; -import opennlp.tools.parse_thicket.ParseTreeNode; - - -public class RhetoricStructureMarker implements IGeneralizer<Integer[]> { - //private static String rstRelations[] = {"antithesis", "concession", "contrast", "elaboration"}; - List<Pair<String, ParseTreeNode[]>> rstMarkers = new ArrayList<Pair<String, ParseTreeNode[]>>(); - - public RhetoricStructureMarker(){ - - rstMarkers.add(new Pair<String, ParseTreeNode[]>("contrast", new ParseTreeNode[]{new ParseTreeNode(",",","), new ParseTreeNode("than",",") })); - rstMarkers.add(new Pair<String, ParseTreeNode[]>( "antithesis", new ParseTreeNode[]{new ParseTreeNode("although",","), new ParseTreeNode("*","*") })); - rstMarkers.add(new Pair<String, ParseTreeNode[]>( "contrast", new ParseTreeNode[]{new ParseTreeNode(",",","), new ParseTreeNode("however","*") })); - rstMarkers.add(new Pair<String, ParseTreeNode[]>( "contrast", new ParseTreeNode[]{new ParseTreeNode("however","*"), new ParseTreeNode(",",","), - new ParseTreeNode("*","prp"), })); - rstMarkers.add(new Pair<String, ParseTreeNode[]>( "elaboration", new ParseTreeNode[]{new ParseTreeNode(",",","), new ParseTreeNode("*","NN") })); - rstMarkers.add(new Pair<String, ParseTreeNode[]>( "elaboration", new ParseTreeNode[]{new ParseTreeNode("as","*"), new ParseTreeNode("a","*") })); - - rstMarkers.add(new Pair<String, ParseTreeNode[]>("explanation", new ParseTreeNode[]{new ParseTreeNode(",",","), new ParseTreeNode("because",",") })); - rstMarkers.add(new Pair<String, ParseTreeNode[]>( "example", new ParseTreeNode[]{new ParseTreeNode("for","IN"), new ParseTreeNode("example","NN") })); - rstMarkers.add(new Pair<String, ParseTreeNode[]>( "contrast", new ParseTreeNode[]{new ParseTreeNode(",",","), new ParseTreeNode("ye","*") })); - rstMarkers.add(new Pair<String, ParseTreeNode[]>( "contrast", new ParseTreeNode[]{new ParseTreeNode("yet","*"), new ParseTreeNode(",",","), - new ParseTreeNode("*","prp"), })); - - rstMarkers.add(new Pair<String, ParseTreeNode[]>( "contrast", new ParseTreeNode[]{new ParseTreeNode("yet","*"), new ParseTreeNode("i","*"), - })); - - rstMarkers.add(new Pair<String, ParseTreeNode[]>( "explanation", new ParseTreeNode[]{new ParseTreeNode(",",","), new ParseTreeNode("where","*") })); - //as long as - rstMarkers.add(new Pair<String, ParseTreeNode[]>( "temp_sequence", new ParseTreeNode[]{/*new ParseTreeNode("as","*"),*/ new ParseTreeNode("*","RB"), - new ParseTreeNode("as","IN"),})); - rstMarkers.add(new Pair<String, ParseTreeNode[]>( "temp_sequence", new ParseTreeNode[]{/*new ParseTreeNode("as","*"),*/ new ParseTreeNode("*","VB*"), - new ParseTreeNode("until","IN"),})); - - } - - /* For a sentence, we obtain a list of markers with the CA word and position in the sentence - * Output span is an integer array with start/end occurrence of an RST marker in a sentence - * */ - public List<Pair<String, Integer[]>> extractRSTrelationInSentenceGetBoundarySpan(List<ParseTreeNode> sentence){ - List<Pair<String, Integer[]>> results = new ArrayList<Pair<String, Integer[]>> (); - - for(Pair<String, ParseTreeNode[]> template: rstMarkers){ - List<Integer[]> spanList = generalize(sentence,template.getSecond() ); - if (!spanList.isEmpty()) - results.add(new Pair<String, Integer[]>(template.getFirst(), spanList.get(0))); - } - return results; - } - - /* Rule application in the form of generalization - * Generalizing a sentence with a rule (a template), we obtain the occurrence of rhetoric marker - * - * o1 - sentence - * o2 - rule/template, specifying lemmas and/or POS, including punctuation - * @see opennlp.tools.parse_thicket.IGeneralizer#generalize(java.lang.Object, java.lang.Object) - * returns the span Integer[] - */ - @Override - public List<Integer[]> generalize(Object o1, Object o2) { - List<Integer[]> result = new ArrayList<Integer[]>(); - - List<ParseTreeNode> sentence = (List<ParseTreeNode> )o1; - ParseTreeNode[] template = (ParseTreeNode[]) o2; - - boolean bBeingMatched = false; - for(int wordIndexInSentence=0; wordIndexInSentence<sentence.size(); wordIndexInSentence++){ - ParseTreeNode word = sentence.get(wordIndexInSentence); - int wordIndexInSentenceEnd = wordIndexInSentence; //init iterators for internal loop - int templateIterator=0; - while (wordIndexInSentenceEnd<sentence.size() && templateIterator< template.length){ - ParseTreeNode tword = template[templateIterator]; - ParseTreeNode currWord=sentence.get(wordIndexInSentenceEnd); - List<ParseTreeNode> gRes = tword.generalize(tword, currWord); - if (gRes.isEmpty()|| gRes.get(0)==null || ( gRes.get(0).getWord().equals("*") - && gRes.get(0).getPos().equals("*") )){ - bBeingMatched = false; - break; - } else { - bBeingMatched = true; - } - wordIndexInSentenceEnd++; - templateIterator++; - } - // template iteration is done - // the only condition for successful match is IF we are at the end of template - if (templateIterator == template.length){ - result.add(new Integer[]{wordIndexInSentence, wordIndexInSentenceEnd-1}); - return result; - } - - // no match for current sentence word: proceed to the next - } - return result; - } - - public String markerToString(List<Pair<String, Integer[]>> res){ - StringBuffer buf = new StringBuffer(); - buf.append("["); - for(Pair<String, Integer[]> marker: res){ - buf.append(marker.getFirst()+":"); - for(int a: marker.getSecond()){ - buf.append(a+" "); - } - buf.append (" | "); - } - buf.append("]"); - return buf.toString(); - } - - public static void main(String[] args){ - ParseTreeNode[] sent = - new ParseTreeNode[]{new ParseTreeNode("he","prn"), new ParseTreeNode("was","vbz"), new ParseTreeNode("more","jj"), - new ParseTreeNode(",",","), new ParseTreeNode("than",","), new ParseTreeNode("little","jj"), new ParseTreeNode("boy","nn"), - new ParseTreeNode(",",","), new ParseTreeNode("however","*"), new ParseTreeNode(",",","), - new ParseTreeNode("he","prp"), new ParseTreeNode("was","vbz"), new ParseTreeNode("adult","jj") - }; - - List<Pair<String, Integer[]>> res = new RhetoricStructureMarker().extractRSTrelationInSentenceGetBoundarySpan(Arrays.asList(sent)); - System.out.println( new RhetoricStructureMarker().markerToString(res)); - } -} http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SearchResultsReRankerStanfRequestHandler.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SearchResultsReRankerStanfRequestHandler.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SearchResultsReRankerStanfRequestHandler.java deleted file mode 100644 index 477f022..0000000 --- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SearchResultsReRankerStanfRequestHandler.java +++ /dev/null @@ -1,306 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package opennlp.tools.similarity.apps.solr; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.logging.Logger; - -import opennlp.tools.parse_thicket.apps.SnippetToParagraph; -import opennlp.tools.parse_thicket.matching.Matcher; -import opennlp.tools.similarity.apps.BingQueryRunner; -import opennlp.tools.similarity.apps.HitBase; -import opennlp.tools.similarity.apps.HitBaseComparable; -import opennlp.tools.similarity.apps.utils.Pair; -import opennlp.tools.textsimilarity.ParseTreeChunk; -import opennlp.tools.textsimilarity.ParseTreeChunkListScorer; -import opennlp.tools.textsimilarity.SentencePairMatchResult; -import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor; - -import org.apache.commons.lang.ArrayUtils; -import org.apache.commons.lang.StringUtils; -import org.apache.lucene.document.Document; -import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.queryparser.classic.ParseException; -import org.apache.lucene.search.BooleanClause.Occur; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.CachingWrapperFilter; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.Filter; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.QueryWrapperFilter; -import org.apache.lucene.search.ScoreDoc; -import org.apache.solr.common.SolrDocument; -import org.apache.solr.common.SolrDocumentList; -import org.apache.solr.common.SolrException; -import org.apache.solr.common.params.CommonParams; -import org.apache.solr.common.params.ModifiableSolrParams; -import org.apache.solr.common.params.ShardParams; -import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.util.NamedList; -import org.apache.solr.handler.component.SearchHandler; -import org.apache.solr.request.SolrQueryRequest; -import org.apache.solr.response.SolrQueryResponse; - - - -public class SearchResultsReRankerStanfRequestHandler extends SearchHandler { - private static Logger LOG = Logger - .getLogger("com.become.search.requestHandlers.SearchResultsReRankerRequestHandler"); - private final static int MAX_SEARCH_RESULTS = 100; - private ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer(); - private int MAX_QUERY_LENGTH_NOT_TO_RERANK=3; - private Matcher matcher = new Matcher(); - private BingQueryRunner bingSearcher = new BingQueryRunner(); - private SnippetToParagraph snp = new SnippetToParagraph(); - - - public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp){ - // get query string - String requestExpression = req.getParamString(); - String[] exprParts = requestExpression.split("&"); - for(String part: exprParts){ - if (part.startsWith("q=")) - requestExpression = part; - } - String query = StringUtils.substringAfter(requestExpression, ":"); - LOG.info(requestExpression); - - - SolrParams ps = req.getOriginalParams(); - Iterator<String> iter = ps.getParameterNamesIterator(); - List<String> keys = new ArrayList<String>(); - while(iter.hasNext()){ - keys.add(iter.next()); - } - - List<HitBase> searchResults = new ArrayList<HitBase>(); - - - - - - for ( Integer i=0; i< MAX_SEARCH_RESULTS; i++){ - String title = req.getParams().get("t"+i.toString()); - String descr = req.getParams().get("d"+i.toString()); - - if(title==null || descr==null) - continue; - - HitBase hit = new HitBase(); - hit.setTitle(title); - hit.setAbstractText(descr); - hit.setSource(i.toString()); - searchResults.add(hit); - } - - /* - * http://173.255.254.250:8983/solr/collection1/reranker/? - * q=search_keywords:design+iphone+cases&fields=spend+a+day+with+a+custom+iPhone+case&fields=Add+style+to+your+every+day+fresh+design+with+a+custom+iPhone+case&fields=Add+style+to+your+every+day+with+mobile+case+for+your+family&fields=Add+style+to+your+iPhone+and+iPad&fields=Add+Apple+fashion+to+your+iPhone+and+iPad - * - */ - - if (searchResults.size()<1) { - int count=0; - for(String val : exprParts){ - if (val.startsWith("fields=")){ - val = StringUtils.mid(val, 7, val.length()); - HitBase hit = new HitBase(); - hit.setTitle(""); - hit.setAbstractText(val); - hit.setSource(new Integer(count).toString()); - searchResults.add(hit); - count++; - } - - } - } - - - List<HitBase> reRankedResults = null; - query = query.replace('+', ' '); - if (tooFewKeywords(query)|| orQuery(query)){ - reRankedResults = searchResults; - LOG.info("No re-ranking for "+query); - } - else - reRankedResults = calculateMatchScoreResortHits(searchResults, query); - /* - * <scores> -<score index="2">3.0005</score> -<score index="1">2.101</score> -<score index="3">2.1003333333333334</score> -<score index="4">2.00025</score> -<score index="5">1.1002</score> -</scores> - * - * - */ - StringBuffer buf = new StringBuffer(); - buf.append("<scores>"); - for(HitBase hit: reRankedResults){ - buf.append("<score index=\""+hit.getSource()+"\">"+hit.getGenerWithQueryScore()+"</score>"); - } - buf.append("</scores>"); - - NamedList<Object> scoreNum = new NamedList<Object>(); - for(HitBase hit: reRankedResults){ - scoreNum.add(hit.getSource(), hit.getGenerWithQueryScore()); - } - - StringBuffer bufNums = new StringBuffer(); - bufNums.append("order>"); - for(HitBase hit: reRankedResults){ - bufNums.append(hit.getSource()+"_"); - } - bufNums.append("/order>"); - - LOG.info("re-ranking results: "+buf.toString()); - NamedList<Object> values = rsp.getValues(); - values.remove("response"); - values.add("response", scoreNum); - values.add("new_order", bufNums.toString().trim()); - rsp.setAllValues(values); - - } - - private boolean orQuery(String query) { - if (query.indexOf('|')>-1) - return true; - - return false; - } - - private boolean tooFewKeywords(String query) { - String[] parts = query.split(" "); - if (parts!=null && parts.length< MAX_QUERY_LENGTH_NOT_TO_RERANK) - return true; - - return false; - } - - protected List<HitBase> calculateMatchScoreResortHits(List<HitBase> hits, - String searchQuery) { - - List<HitBase> newHitList = new ArrayList<HitBase>(); - int count = 0; - for (HitBase hit : hits) { - if (count>10) - break; - count++; - String[] pageSentsAndSnippet = formTextForReRankingFromHit(hit); - - Double score = 0.0; - try { - List<List<ParseTreeChunk>> match = null; - if (pageSentsAndSnippet!=null && pageSentsAndSnippet[0].length()>50){ - match = matcher.assessRelevanceCache(pageSentsAndSnippet[0] , - searchQuery); - score = parseTreeChunkListScorer.getParseTreeChunkListScore(match); - hit.setSource(match.toString()); - } - if (score < 2){ // attempt to match with snippet, if not much luck with original text - match = matcher.assessRelevanceCache(pageSentsAndSnippet[0] , - searchQuery); - score = parseTreeChunkListScorer.getParseTreeChunkListScore(match); - } - LOG.info(score + " | " +pageSentsAndSnippet[1]); - } catch (Exception e) { - LOG.severe("Problem processing snapshot " + pageSentsAndSnippet[1]); - e.printStackTrace(); - } - hit.setGenerWithQueryScore(score); - newHitList.add(hit); - } - - System.out.println("\n\n ============= old ORDER ================= "); - for (HitBase hit : newHitList) { - System.out.println(hit.getOriginalSentences().toString() + " => "+hit.getGenerWithQueryScore()); - System.out.println("match = "+hit.getSource()); - } - Collections.sort(newHitList, new HitBaseComparable()); - - System.out.println("\n\n ============= NEW ORDER ================= "); - for (HitBase hit : newHitList) { - System.out.println(hit.getOriginalSentences().toString() + " => "+hit.getGenerWithQueryScore()); - System.out.println("match = "+hit.getSource()); - } - - return newHitList; - } - - protected String[] formTextForReRankingFromHit(HitBase hit) { - HitBase hitWithFullSents = snp.formTextFromOriginalPageGivenSnippet(hit); - String textFromOriginalPage = ""; - try { - List<String> sents = hitWithFullSents.getOriginalSentences(); - for(String s: sents){ - textFromOriginalPage+=s+" "; - } - - if (textFromOriginalPage.startsWith(".")){ - textFromOriginalPage = textFromOriginalPage.substring(2); - } - textFromOriginalPage = textFromOriginalPage.replace(" . .", ". ").replace(". . ", ". "). - replace("..", ". ").trim(); - } catch (Exception e1) { - e1.printStackTrace(); - LOG.info("Problem processing snapshot "+hit.getAbstractText()); - } - hit.setPageContent(textFromOriginalPage); - String snapshot = hit.getAbstractText().replace("<b>...</b>", ". ").replace("<span class='best-phrase'>", " ").replace("<span>", " ").replace("<span>", " ") - .replace("<b>", "").replace("</b>", ""); - snapshot = snapshot.replace("</B>", "").replace("<B>", "") - .replace("<br>", "").replace("</br>", "").replace("...", ". ") - .replace("|", " ").replace(">", " ").replace(". .", ". "); - snapshot += " . " + hit.getTitle(); - - return new String[] { textFromOriginalPage, snapshot }; - } - - - public class HitBaseComparable implements Comparator<HitBase> { - // @Override - public int compare(HitBase o1, HitBase o2) { - return (o1.getGenerWithQueryScore() > o2.getGenerWithQueryScore() ? -1 - : (o1 == o2 ? 0 : 1)); - } - } - -} - -/* - -http://dev1.exava.us:8086/solr/collection1/reranker/?q=search_keywords:I+want+style+in+my+every+day+fresh+design+iphone+cases -&t1=Personalized+iPhone+4+Cases&d1=spend+a+day+with+a+custom+iPhone+case -&t2=iPhone+Cases+to+spend+a+day&d2=Add+style+to+your+every+day+fresh+design+with+a+custom+iPhone+case -&t3=Plastic+iPhone+Cases&d3=Add+style+to+your+every+day+with+mobile+case+for+your+family -&t4=Personalized+iPhone+and+iPad+Cases&d4=Add+style+to+your+iPhone+and+iPad -&t5=iPhone+accessories+from+Apple&d5=Add+Apple+fashion+to+your+iPhone+and+iPad - -http://dev1.exava.us:8086/solr/collection1/reranker/?q=search_keywords:I+want+style+in+my+every+day+fresh+design+iphone+cases&t1=Personalized+iPhone+4+Cases&d1=spend+a+day+with+a+custom+iPhone+case&t2=iPhone+Cases+to+spend+a+day&d2=Add+style+to+your+every+day+fresh+design+with+a+custom+iPhone+case&t3=Plastic+iPhone+Cases&d3=Add+style+to+your+every+day+with+mobile+case+for+your+family&t4=Personalized+iPhone+and+iPad+Cases&d4=Add+style+to+your+iPhone+and+iPad&t5=iPhone+accessories+from+Apple&d5=Add+Apple+fashion+to+your+iPhone+and+iPad - */ \ No newline at end of file http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/DomainTaxonomyExtender.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/DomainTaxonomyExtender.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/DomainTaxonomyExtender.java index 8538c25..78ce8f5 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/DomainTaxonomyExtender.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/DomainTaxonomyExtender.java @@ -25,13 +25,11 @@ import java.util.logging.Logger; import org.apache.commons.lang.StringUtils; -import opennlp.tools.parse_thicket.matching.Matcher; import opennlp.tools.similarity.apps.BingQueryRunner; import opennlp.tools.similarity.apps.HitBase; import opennlp.tools.similarity.apps.utils.StringCleaner; import opennlp.tools.stemmer.PStemmer; import opennlp.tools.textsimilarity.ParseTreeChunk; -import opennlp.tools.textsimilarity.ParseTreeChunkListScorer; import opennlp.tools.textsimilarity.SentencePairMatchResult; import opennlp.tools.textsimilarity.TextProcessor; import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor; @@ -46,9 +44,10 @@ public class DomainTaxonomyExtender { .getLogger("opennlp.tools.similarity.apps.taxo_builder.DomainTaxonomyExtender"); private BingQueryRunner brunner = new BingQueryRunner(); + private ParserChunker2MatcherProcessor matcher = ParserChunker2MatcherProcessor.getInstance(); protected static String BING_KEY = "WFoNMM706MMJ5JYfcHaSEDP+faHj3xAxt28CPljUAHA"; - Matcher matcher = new Matcher(); + private final static String TAXO_FILENAME = "taxo_data.dat"; @@ -161,8 +160,8 @@ public class DomainTaxonomyExtender { .getTitle() + " " + h1.getAbstractText()); String snapshot2 = StringCleaner.processSnapshotForMatching(h2 .getTitle() + " " + h2.getAbstractText()); - List<List<ParseTreeChunk>> overlaps =matcher.assessRelevance(snapshot1, snapshot2); - genResult.addAll(overlaps); + SentencePairMatchResult overlaps = matcher.assessRelevance(snapshot1, snapshot2); + genResult.addAll(overlaps.matchResult); } } } http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunk.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunk.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunk.java index f151768..27f457c 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunk.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunk.java @@ -17,6 +17,7 @@ package opennlp.tools.textsimilarity; +import java.io.Serializable; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -27,7 +28,7 @@ import org.apache.commons.lang3.StringUtils; import opennlp.tools.parse_thicket.ParseTreeNode; -public class ParseTreeChunk { +public class ParseTreeChunk implements Serializable{ private String mainPOS; private List<String> lemmas; http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/test/java/opennlp/tools/fca/FCATest.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/test/java/opennlp/tools/fca/FCATest.java b/opennlp-similarity/src/test/java/opennlp/tools/fca/FCATest.java deleted file mode 100755 index 531e6ec..0000000 --- a/opennlp-similarity/src/test/java/opennlp/tools/fca/FCATest.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package opennlp.tools.fca; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.Arrays; -import java.util.LinkedHashSet; - -import junit.framework.TestCase; - -public class FCATest extends TestCase{ - ConceptLattice cl=null; -/* - public void testConceptLattice() { - - - try { - cl = new ConceptLattice("src/test/resources/fca/sports.cxt",true); - cl.printLatticeStats(); - cl.printLatticeFull(); - cl.printBinContext(); - - FcaWriter wt = new FcaWriter(); - wt.WriteStatsToCvs("stats.csv", cl, 0); - } catch (FileNotFoundException e) { - e.printStackTrace(); - } catch (IOException e) { - - e.printStackTrace(); - } - - FcaConverter converter = new FcaConverter(); - int [][] binCon = converter.latticeToContext(cl); - - if (binCon!=null){ - ConceptLattice new_cl = new ConceptLattice(binCon.length, binCon[0].length, binCon, false); - new_cl.printLatticeStats(); - new_cl.printLatticeFull(); - new_cl.printBinContext(); - FcaWriter wt = new FcaWriter(); - wt.WriteStatsToCvs("stats.txt", cl, 0); - //wt.WriteAsCxt("cl.cxt", cl); - wt.WriteAsCxt("cl_new.cxt", new_cl); - } - } - - public void testRandom(){ - RandomNoiseGenerator rng = new RandomNoiseGenerator(); - try { - cl = new ConceptLattice("src/test/resources/fca/sports.cxt",true); - } catch (FileNotFoundException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } - //int[][] bc = rng.AddObjectsAttributesWithProbability(10, 0.5, cl.binaryContext); - int[][] bc = rng.AlterCellsWithProbability(0.2, cl.binaryContext); - ConceptLattice new_cl = new ConceptLattice(bc.length, bc[0].length, bc, false); - new_cl.printLatticeStats(); - new_cl.printLattice(); - } -*/ -} - http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/RelatedSentenceFinderTest.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/RelatedSentenceFinderTest.java b/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/RelatedSentenceFinderTest.java index f385a69..f5c6222 100644 --- a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/RelatedSentenceFinderTest.java +++ b/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/RelatedSentenceFinderTest.java @@ -43,7 +43,7 @@ public class RelatedSentenceFinderTest extends TestCase { //assertTrue(result.getFragments().get(0).getFragment().indexOf("Swiss Patent Office")>-1); } - + /* public void testBuildParagraphOfGeneratedTextTest(){ HitBase input = new HitBase(); input.setAbstractText("Albert Einstein was a German-born theoretical physicist who developed the general theory of relativity, one of the two pillars of modern physics (alongside ..."); @@ -57,7 +57,7 @@ public class RelatedSentenceFinderTest extends TestCase { assertTrue(result.getFragments().size()>0); assertTrue(result.getFragments().get(0).getFragment().indexOf("Albert Einstein")>-1); } - +*/ public void testBuildParagraphOfGeneratedTextTestYearInTheEnd(){ @@ -74,6 +74,7 @@ public class RelatedSentenceFinderTest extends TestCase { assertTrue(result.getFragments().get(0).getFragment().indexOf("Albert Einstein")>-1); } + /* public void testBuildParagraphOfGeneratedTextTestBio1(){ HitBase input = new HitBase(); input.setAbstractText("Today, the practical applications of Einstein�s theories ..."); @@ -87,7 +88,7 @@ public class RelatedSentenceFinderTest extends TestCase { assertTrue(result.getFragments().size()>0); assertTrue(result.getFragments().get(0).getFragment().indexOf("Einstein")>-1); } -/* + public void testBuildParagraphOfGeneratedTextTestBio2(){ HitBase input = new HitBase(); input.setAbstractText("The theory of relativity is a beautiful example of ..."); http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/SnippetToParagraphTest.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/SnippetToParagraphTest.java b/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/SnippetToParagraphTest.java deleted file mode 100644 index fb6259b..0000000 --- a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/SnippetToParagraphTest.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package opennlp.tools.parse_thicket.apps; - - -import opennlp.tools.similarity.apps.HitBase; -import junit.framework.TestCase; - - -public class SnippetToParagraphTest extends TestCase { - SnippetToParagraph converter = new SnippetToParagraph(); - - public void testConversionTest(){ - HitBase input = new HitBase(); - input.setAbstractText("... complicity in the military's latest failure to uphold their own standards of conduct. Nor do I see a distinction between the service member who orchestrated this offense ..."); - input.setUrl("http://armedservices.house.gov/index.cfm/press-releases?ContentRecord_id=b5d9aeab-6745-4eba-94ea-12295fd40e67"); - input.setTitle("Press Releases - News - Armed Services Republicans"); - HitBase result = converter.formTextFromOriginalPageGivenSnippet(input); - assertTrue(result.getOriginalSentences()!=null); - assertTrue(result.getOriginalSentences().size()>0); - } - -} http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/communicative_actions/CommunicativeActionsArcBuilderTest.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/communicative_actions/CommunicativeActionsArcBuilderTest.java b/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/communicative_actions/CommunicativeActionsArcBuilderTest.java deleted file mode 100644 index bbce9e8..0000000 --- a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/communicative_actions/CommunicativeActionsArcBuilderTest.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package opennlp.tools.parse_thicket.communicative_actions; - -import java.util.ArrayList; -import java.util.List; - -import opennlp.tools.parse_thicket.ParseThicket; -import opennlp.tools.parse_thicket.WordWordInterSentenceRelationArc; -import opennlp.tools.parse_thicket.matching.Matcher; -import opennlp.tools.similarity.apps.HitBase; - -import junit.framework.TestCase; - -public class CommunicativeActionsArcBuilderTest extends TestCase { - Matcher matcher = new Matcher(); - - public void testCommunicativeActionsArcBuilderTestQ(){ - String text = "As a US citizen living abroad, I am concerned about the health reform regulation of 2014. "+ - "I do not want to wait till I am sick to buy health insurance. "+ - "Yet I am afraid I will end up being requested to pay the tax. "+ - "Although I live abroad, I am worried about having to pay a fine for being reported as not having health insurance coverage. "; - ParseThicket pt = matcher.buildParseThicketFromTextWithRST(text); - List<WordWordInterSentenceRelationArc> results = new ArrayList<WordWordInterSentenceRelationArc>(); - for(WordWordInterSentenceRelationArc arc: pt.getArcs()){ - if(arc.getArcType().getType().startsWith("ca")){ - results.add(arc); - System.out.println(arc); - } - } - assertTrue(results.size()>11); - - } - public void testCommunicativeActionsArcBuilderTestA(){ - String text = "People are worried about paying a fine for not carrying health insurance coverage, having been informed by IRS about new regulations. "+ - "Yet hardly anyone is expected to pay the tax, when the health reform law takes full effect in 2014. "+ - "The individual mandate confirms that people don�t wait until they are sick to buy health insurance. "+ - "People are exempt from health insurance fine if they report they make too little money, or US citizens living abroad."; - ParseThicket pt = matcher.buildParseThicketFromTextWithRST(text); - List<WordWordInterSentenceRelationArc> results = new ArrayList<WordWordInterSentenceRelationArc>(); - for(WordWordInterSentenceRelationArc arc: pt.getArcs()){ - if(arc.getArcType().getType().startsWith("ca")){ - results.add(arc); - System.out.println(arc); - } - } - assertTrue(results.size()>5); - } - - - - -} http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/external_rst/ExternalRSTImporterTest.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/external_rst/ExternalRSTImporterTest.java b/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/external_rst/ExternalRSTImporterTest.java deleted file mode 100644 index c2b5877..0000000 --- a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/external_rst/ExternalRSTImporterTest.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package opennlp.tools.parse_thicket.external_rst; - - -import java.util.List; - -import junit.framework.TestCase; -import opennlp.tools.parse_thicket.ParseThicket; -import opennlp.tools.parse_thicket.WordWordInterSentenceRelationArc; -import opennlp.tools.parse_thicket.matching.Matcher; - -public class ExternalRSTImporterTest extends TestCase{ - - - public void testBuildParseThicketFromTextWithRSTtest(){ - Matcher m = new Matcher(); - // We combine our own RST rules with those of Joty 2014 to produce an augmented parse thicket - String externalRSTresultFilename = "/external_rst/resInput.txt"; - - ParseThicket pt = m.buildParseThicketFromTextWithRST("I explained that I made a deposit, and then wrote a check, which bounced due to a bank error. A customer service representative confirmed that it usually takes a day to process the deposit. " - + "I reminded that I was unfairly charged an overdraft fee amonth ago in a similar situation. "+ - " They explained that the overdraft fee was due to insufficient funds as disclosed in my account information. I disagreed with their fee because I made a deposit well in "+ - " advance and wanted this fee back. They denied responsibility saying that nothing an be done at this point. They also confirmed that I needed to look into the account rules closer."); - ExternalRSTImporter imp = new ExternalRSTImporter(); - - List<WordWordInterSentenceRelationArc> arcsRST = imp.buildPT2ptPhrases( pt , externalRSTresultFilename); - assertTrue(arcsRST .size() > 10); - - - } - -} http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PT2ThicketPhraseBuilderTest.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PT2ThicketPhraseBuilderTest.java b/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PT2ThicketPhraseBuilderTest.java deleted file mode 100644 index 0517f4c..0000000 --- a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PT2ThicketPhraseBuilderTest.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.tools.parse_thicket.matching; - -import java.util.List; - -import opennlp.tools.parse_thicket.ParseTreeNode; - - -import junit.framework.TestCase; - -public class PT2ThicketPhraseBuilderTest extends TestCase { - private PT2ThicketPhraseBuilder builder = new PT2ThicketPhraseBuilder(); - - public void testParsePhrase(){ - String line = "(NP (NNP Iran)) (VP (VBZ refuses) (S (VP (TO to) (VP (VB accept) (S (NP (DT the) " + - "(NNP UN) (NN proposal)) (VP (TO to) (VP (VB end) (NP (PRP$ its) (NN dispute))))))))"; - - List<ParseTreeNode> res = builder.parsePhrase("NP", line); - System.out.println(res); - assertTrue(res!=null); - assertTrue(res.size()>0); - - } -}
