http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/1f97041b/opennlp-similarity/src/main/java/opennlp/tools/fca/BasicLevelMetrics.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/main/java/opennlp/tools/fca/BasicLevelMetrics.java b/opennlp-similarity/src/main/java/opennlp/tools/fca/BasicLevelMetrics.java new file mode 100755 index 0000000..f1f6bd0 --- /dev/null +++ b/opennlp-similarity/src/main/java/opennlp/tools/fca/BasicLevelMetrics.java @@ -0,0 +1,676 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.fca; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; + +import org.apache.commons.collections.ListUtils; + +import edu.stanford.nlp.io.EncodingPrintWriter.out; + +public class BasicLevelMetrics { + + + ConceptLattice cl; + ArrayList<ArrayList<Integer>> attributesExtent = null; + ArrayList<ArrayList<Integer>> objectsIntent = null; + ArrayList<Integer> attributes = null; + double[][] objectsSimilarityJ = null; + double [][] objectsSimilaritySMC = null; + + + public BasicLevelMetrics (ConceptLattice cl){ + this.cl = cl; + this.attributesExtent = null; + objectsSimilarityJ = new double [cl.objectCount][cl.objectCount]; + objectsSimilaritySMC = new double [cl.objectCount][cl.objectCount]; + } + + public void setUp(){ + attributesExtent = new ArrayList<ArrayList<Integer>>(); + objectsIntent = new ArrayList<ArrayList<Integer>>(); + attributes = new ArrayList<Integer>(); + + for (int i=0;i<cl.attributeCount;i++){ + attributesExtent.add((ArrayList<Integer>) cl.getAttributeExtByID(i)); + attributes.add(i); + } + + for (int i=0;i<cl.objectCount;i++){ + objectsIntent.add((ArrayList<Integer>) cl.getObjectIntByID(i)); + } + + double [] buf = new double[2]; + + for (int i = 0; i < cl.objectCount; i++){ + for (int j = i + 1 ; j < cl.objectCount; j++){ + buf = simJ_SMC(objectsIntent.get(i), objectsIntent.get(j)); + objectsSimilarityJ[i][j] = buf[0]; + objectsSimilarityJ[j][i] = buf[0]; + objectsSimilaritySMC[i][j] = buf[1]; + objectsSimilaritySMC[j][i] = buf[1]; + } + objectsSimilarityJ[i][i] = 1; + objectsSimilaritySMC[i][i] = 1; + } + + //System.out.println("J"); + //System.out.println(Arrays.deepToString(objectsSimilarityJ)); + //System.out.println("SMC"); + //System.out.println(Arrays.deepToString(objectsSimilaritySMC)); + + } + + //Utility functions for Similarity approach (S) + public double simSMC (ArrayList<Integer> intent1, ArrayList<Integer>intent2){ + int tp = (ListUtils.intersection(intent1,intent2)).size(); + ArrayList<Integer> fnlst = new ArrayList<Integer>(); + fnlst.addAll(this.attributes); + fnlst.removeAll(ListUtils.union(intent1,intent2)); + int fn = fnlst.size(); + return (this.attributes.size()>0) ? 1.*(tp + fn)/this.attributes.size() : 0; + } + + public double simJ (ArrayList<Integer> intent1, ArrayList<Integer>intent2){ + return 1.*(ListUtils.intersection(intent1,intent2)).size()/(ListUtils.union(intent1,intent2)).size() ; + } + + public double [] simJ_SMC(ArrayList<Integer> intent1, ArrayList<Integer>intent2){ + double simJ = 0; + double simSMC = 0; + Set<Integer> intersection = new HashSet<Integer>(); + intersection.addAll(intent1); + intersection.retainAll(intent2); + + Set<Integer> union = new HashSet<Integer>(); + union.addAll(intent1); + union.addAll(intent2); + int fn = 0; + Set<Integer> unionOut = new HashSet<Integer>(); + unionOut.addAll(this.attributes); + unionOut.removeAll(union); + simSMC = (this.attributes.size() > 0) ? 1.*(intersection.size() + unionOut.size())/this.attributes.size() : 0; + simJ = (union.size() > 0) ? 1.*intersection.size()/union.size() : 0; + return new double[] {simJ, simSMC}; + } + + + + public double avgCohSMC (FormalConcept c){ + double sum = 0; + if (c.extent.size() == 1) + return 1.;//c.intent.size(); + if (c.extent.size() == 0) + return 0.; + else { + for (Integer i:c.extent){ + for (Integer j: c.extent){ + if (i<j) + sum+=objectsSimilaritySMC[i][j]; + } + } + return (c.extent.size() > 1 ) ? 2.*sum/c.extent.size()/(c.extent.size()-1) : 0;// �� 2, ��� ��� ��������� ������ + } + } + + public double avgCohJ (FormalConcept c){ + double sum = 0; + if (c.extent.size() == 1) + return 1.;//c.intent.size(); + if (c.extent.size() == 0) + return 0.; + else { + for (Integer i:c.extent){ + for (Integer j: c.extent){ + if (i<j){ + sum+=objectsSimilarityJ[i][j]; + } + } + } + return (c.extent.size() > 1 ) ? 2.*sum/c.extent.size()/(c.extent.size()-1) : 0; + } + } + + public double minCohJ (FormalConcept c){ + double min = Integer.MAX_VALUE, + val = 0; + + for (Integer i:c.extent){ + for (Integer j: c.extent){ + val = objectsSimilarityJ[i][j]; + if (val<min) + min = val; + } + } + return (min < Integer.MAX_VALUE) ? min : 0; + } + + public double minCohSMC (FormalConcept c){ + double min = Integer.MAX_VALUE, + val = 0; + for (Integer i:c.extent){ + for (Integer j: c.extent){ + val = objectsSimilaritySMC[i][j]; + if (val<min) + min = val; + } + } + return (min < Integer.MAX_VALUE) ? min : 0; + } + + + public double upperCohAvgByAvgJ(FormalConcept c, float tetta){ + //average alpha with average cohesion J + double sum = 0; + Set<Integer> upperNeighbors =c.parents; + int rightNeighborsNumber = 0; + float truthDegree = 0; + for (Integer i: upperNeighbors){ + if (c.cohAvgJ > cl.conceptList.get(i).cohAvgJ){ + rightNeighborsNumber++; + sum+=1.*cl.conceptList.get(i).cohAvgJ/c.cohAvgJ; + } + } + if (rightNeighborsNumber == 0) + return 0; + else{ + truthDegree = (1.*rightNeighborsNumber/upperNeighbors.size()>=tetta) ? 1 : 0; + //truthDegree = (float) Math.min(1.0, truthDegree); + return (1-sum/rightNeighborsNumber)*truthDegree; + } + } + + public double upperCohAvgByMinJ(FormalConcept c,float tetta){ + //average alpha with min cohesion J + double sum = 0; + Set<Integer> upperNeighbors =c.parents; + int rightNeighborsNumber = 0; + float truthDegree = 0; + for (Integer i: upperNeighbors){ + if (c.cohMinJ > cl.conceptList.get(i).cohMinJ){ + rightNeighborsNumber++; + sum+=1.*cl.conceptList.get(i).cohMinJ/c.cohMinJ; + } + } + if (rightNeighborsNumber == 0) + return 0; + else{ + truthDegree = (1.*rightNeighborsNumber/upperNeighbors.size()>=tetta) ? 1 : 0; + truthDegree = (float) Math.min(1.0, truthDegree); + return (1-sum/rightNeighborsNumber)*truthDegree; + } + } + + public double upperCohMinByAvgJ(FormalConcept c,float tetta){ + //min alpha whth average cohesion J + double max = Integer.MIN_VALUE, + val = 0; + int rightNeighborsNumber = 0; + float truthDegree = 0; + Set<Integer> upperNeighbors =c.parents; + for (Integer i: upperNeighbors){ + if (c.cohAvgJ > cl.conceptList.get(i).cohAvgJ){ + rightNeighborsNumber++; + val = 1.*cl.conceptList.get(i).cohAvgJ/c.cohAvgJ; + if (val>max) + max = val; + } + } + if (rightNeighborsNumber == 0) + return 0; + else{ + truthDegree = (1.*rightNeighborsNumber/upperNeighbors.size()>=tetta) ? 1 : 0; + truthDegree = (float) Math.min(1.0, truthDegree); + return (1-max)*truthDegree; + } + } + + public double upperCohMinByMinJ(FormalConcept c,float tetta){ + //min alpha whth average cohesion J + double max = Integer.MIN_VALUE, + val = 0; + int rightNeighborsNumber = 0; + float truthDegree = 0; + Set<Integer> upperNeighbors =c.parents; + for (Integer i: upperNeighbors){ + if (c.cohMinJ > cl.conceptList.get(i).cohMinJ){ + rightNeighborsNumber++; + val = 1.*cl.conceptList.get(i).cohMinJ/c.cohMinJ; + if (val>max) + max = val; + } + } + if (rightNeighborsNumber == 0) + return 0; + else{ + //truthDegree = rightNeighborsNumber/tetta/upperNeighbors.size(); + truthDegree = (1.*rightNeighborsNumber/upperNeighbors.size()>=tetta) ? 1 : 0; + truthDegree = (float) Math.min(1.0, truthDegree); + return (1-max)*truthDegree; + } + } + + public double upperCohAvgByAvgSMC(FormalConcept c,float tetta){ + //average alpha with average cohesion SMC + double sum = 0; + Set<Integer> upperNeighbors =c.parents; + int rightNeighborsNumber = 0; + float truthDegree = 0; + for (Integer i: upperNeighbors){ + if (c.cohAvgSMC > cl.conceptList.get(i).cohAvgSMC){ + rightNeighborsNumber++; + sum+=1.*cl.conceptList.get(i).cohAvgSMC/c.cohAvgSMC; + } + } + if (rightNeighborsNumber == 0) + return 0; + else{ + truthDegree = (1.*rightNeighborsNumber/upperNeighbors.size()>=tetta) ? 1 : 0; + return (1-sum/rightNeighborsNumber)*truthDegree; + } + } + + public double upperCohAvgByMinSMC(FormalConcept c,float tetta){ + //average alpha with min cohesion SMC + double sum = 0; + Set<Integer> upperNeighbors =c.parents; + int rightNeighborsNumber = 0; + float truthDegree = 0; + for (Integer i: upperNeighbors){ + if (c.cohMinSMC > cl.conceptList.get(i).cohMinSMC){ + rightNeighborsNumber++; + sum+=1.*cl.conceptList.get(i).cohMinSMC/c.cohMinSMC; + } + } + if (rightNeighborsNumber == 0) + return 0; + else{ + truthDegree = (1.*rightNeighborsNumber/upperNeighbors.size()>=tetta) ? 1 : 0; + return (1-sum/rightNeighborsNumber)*truthDegree; + } + } + + + public double upperCohMinByAvgSMC(FormalConcept c,float tetta){ + //min alpha whth average cohesion J + + double max = Integer.MIN_VALUE, + val = 0; + int rightNeighborsNumber = 0; + float truthDegree = 0; + Set<Integer> upperNeighbors =c.parents; + for (Integer i: upperNeighbors){ + if (c.cohAvgSMC > cl.conceptList.get(i).cohAvgSMC){ + rightNeighborsNumber++; + val = 1.*cl.conceptList.get(i).cohAvgSMC/c.cohAvgSMC; + if (val>max) + max = val; + } + } + if (rightNeighborsNumber == 0) + return 0; + else{ + truthDegree = (1.*rightNeighborsNumber/upperNeighbors.size()>=tetta) ? 1 : 0; + truthDegree = (float) Math.min(1.0, truthDegree); + return (1-max)*truthDegree; + } + } + + public double upperCohMinByMinSMC(FormalConcept c,float tetta){ + //min alpha whth average cohesion J + double max = Integer.MIN_VALUE, + val = 0; + int rightNeighborsNumber = 0; + float truthDegree = 0; + Set<Integer> upperNeighbors =c.parents; + for (Integer i: upperNeighbors){ + if (c.cohMinSMC > cl.conceptList.get(i).cohMinSMC){ + rightNeighborsNumber++; + val = 1.*cl.conceptList.get(i).cohMinSMC/c.cohMinSMC; + if (val>max) + max = val; + } + } + if (rightNeighborsNumber == 0) + return 0; + else{ + truthDegree = (1.*rightNeighborsNumber/upperNeighbors.size()>=tetta) ? 1 : 0; + truthDegree = (float) Math.min(1.0, truthDegree); + return (1-max)*truthDegree; + } + } + + + + public double lowerCohAvgByAvgJ(FormalConcept c,float tetta){ + double sum = 0; + Set<Integer> lowerNeighbors =c.childs; + int rightNeighborsNumber = 0; + float truthDegree = 0; + for (Integer i: lowerNeighbors){ + if (c.cohAvgJ < cl.conceptList.get(i).cohAvgJ){ + rightNeighborsNumber++; + sum+=1.*c.cohAvgJ/cl.conceptList.get(i).cohAvgJ; + } + } + if (rightNeighborsNumber == 0) + return 0; + else{ + truthDegree = (1.*rightNeighborsNumber/lowerNeighbors.size()>=tetta) ? 1 : 0; + truthDegree = (float) Math.min(1.0, truthDegree); + return (sum/rightNeighborsNumber)*truthDegree; + } + } + + public double lowerCohAvgByMinJ(FormalConcept c,float tetta){ + double sum = 0; + int rightNeighborsNumber = 0; + float truthDegree = 0; + Set<Integer> lowerNeighbors =c.childs; + for (Integer i: lowerNeighbors){ + if (c.cohMinJ< cl.conceptList.get(i).cohMinJ){ + rightNeighborsNumber++; + sum+=1.*c.cohMinJ/cl.conceptList.get(i).cohMinJ; + } + } + if (rightNeighborsNumber == 0) + return 0; + else{ + //truthDegree = rightNeighborsNumber/tetta/lowerNeighbors.size(); + truthDegree = (1.*rightNeighborsNumber/lowerNeighbors.size()>=tetta) ? 1 : 0; + truthDegree = (float) Math.min(1.0, truthDegree); + return (sum/rightNeighborsNumber)*truthDegree; + } + } + + public double lowerCohMinByAvgJ(FormalConcept c,float tetta){ + double min = Integer.MAX_VALUE, + val = 0; + int rightNeighborsNumber = 0; + float truthDegree = 0; + Set<Integer> lowerNeighbors =c.childs; + for (Integer i: lowerNeighbors){ + if (c.cohAvgJ< cl.conceptList.get(i).cohAvgJ){ + rightNeighborsNumber++; + val = 1.*c.cohAvgJ/cl.conceptList.get(i).cohAvgJ; + if (val<min) + min = val; + } + } + if (rightNeighborsNumber == 0) + return 0; + else{ + truthDegree = (1.*rightNeighborsNumber/lowerNeighbors.size()>=tetta) ? 1 : 0; + truthDegree = (float) Math.min(1.0, truthDegree); + return (min)*truthDegree; + } + } + + public double lowerCohMinByMinJ(FormalConcept c,float tetta){ + double min = Integer.MAX_VALUE, + val = 0; + int rightNeighborsNumber = 0; + float truthDegree = 0; + Set<Integer> lowerNeighbors =c.childs; + for (Integer i: lowerNeighbors){ + if (c.cohMinJ< cl.conceptList.get(i).cohMinJ){ + rightNeighborsNumber++; + val = 1.*c.cohMinJ/cl.conceptList.get(i).cohMinJ; + if (val<min) + min = val; + } + } + if (rightNeighborsNumber == 0) + return 0; + else{ + truthDegree = (1.*rightNeighborsNumber/lowerNeighbors.size()>=tetta) ? 1 : 0; + truthDegree = (float) Math.min(1.0, truthDegree); + return (min)*truthDegree; + } + } + + public double lowerCohAvgByAvgSMC(FormalConcept c,float tetta){ + double sum = 0; + int rightNeighborsNumber = 0; + float truthDegree = 0; + Set<Integer> lowerNeighbors =c.childs; + for (Integer i: lowerNeighbors){ + if (c.cohAvgSMC < cl.conceptList.get(i).cohAvgSMC){ + rightNeighborsNumber++; + sum+=1.*c.cohAvgSMC/cl.conceptList.get(i).cohAvgSMC; + } + } + if (rightNeighborsNumber == 0) + return 0; + else{ + truthDegree = (1.*rightNeighborsNumber/lowerNeighbors.size()>=tetta) ? 1 : 0; + truthDegree = (float) Math.min(1.0, truthDegree); + return (sum/rightNeighborsNumber)*truthDegree; + } + } + + public double lowerCohAvgByMinSMC(FormalConcept c,float tetta){ + double sum = 0; + int rightNeighborsNumber = 0; + float truthDegree = 0; + Set<Integer> lowerNeighbors =c.childs; + for (Integer i: lowerNeighbors){ + if (c.cohMinSMC < cl.conceptList.get(i).cohMinSMC){ + rightNeighborsNumber++; + sum+=1.*c.cohMinSMC/cl.conceptList.get(i).cohMinSMC; + } + } + if (rightNeighborsNumber == 0) + return 0; + else{ + truthDegree = (1.*rightNeighborsNumber/lowerNeighbors.size()>=tetta) ? 1 : 0; + truthDegree = (float) Math.min(1.0, truthDegree); + return (sum/rightNeighborsNumber)*truthDegree; + } + } + + public double lowerCohMinByAvgSMC(FormalConcept c,float tetta){ + double min = Integer.MAX_VALUE, + val = 0; + int rightNeighborsNumber = 0; + float truthDegree = 0; + Set<Integer> lowerNeighbors =c.childs; + for (Integer i: lowerNeighbors){ + if (c.cohAvgSMC<=cl.conceptList.get(i).cohAvgSMC){ + rightNeighborsNumber++; + val = 1.*c.cohAvgSMC/cl.conceptList.get(i).cohAvgSMC; + if (val<min) + min = val; + } + } + if (rightNeighborsNumber == 0) + return 0; + else{ + truthDegree = (1.*rightNeighborsNumber/lowerNeighbors.size()>=tetta) ? 1 : 0; + truthDegree = (float) Math.min(1.0, truthDegree); + return (min)*truthDegree; + } + } + + + public double lowerCohMinByMinSMC(FormalConcept c,float tetta){ + double min = Integer.MAX_VALUE, + val = 0; + int rightNeighborsNumber = 0; + float truthDegree = 0; + Set<Integer> lowerNeighbors =c.childs; + for (Integer i: lowerNeighbors){ + if (c.cohMinSMC< cl.conceptList.get(i).cohMinSMC){ + rightNeighborsNumber++; + val = 1.*c.cohMinSMC/cl.conceptList.get(i).cohMinSMC; + if (val<min) + min = val; + } + } + if (rightNeighborsNumber == 0) + return 0; + else{ + truthDegree = (1.*rightNeighborsNumber/lowerNeighbors.size()>=tetta) ? 1 : 0; + return (min)*truthDegree; + } + } + + + //Similarity approach (S) + //group of metrics: + // SMC avg_avg, avg_min, min_avg + // J avg_avg, avg_min, min_avg + public void similarityGoguenNorm(){ + + // ��� ������� �� ��������� �������� �� ���� �������� ������ � ����� + // ����� ����������� � ������� ������� ������ � ����� + double[] buf = new double[2]; + + if (objectsSimilaritySMC == null) + this.setUp(); + + //upper neighbors - alpha2 + //lower neighbors - alpha3 + for (FormalConcept c: cl.conceptList){ + c.cohAvgJ = avgCohJ(c); + c.cohMinJ = minCohJ(c); + c.cohAvgSMC = avgCohSMC(c); + c.cohMinSMC = minCohSMC(c); + + } + for (FormalConcept c: cl.conceptList){ + float tetta = 1; + c.blS_Jaa = (c.cohAvgJ != 0) ? c.cohAvgJ * upperCohAvgByAvgJ(c,tetta) * lowerCohAvgByAvgJ(c,tetta) : 0; + c.blS_Jma = (c.cohMinJ != 0) ? c.cohMinJ * upperCohAvgByMinJ(c,tetta) * lowerCohAvgByMinJ(c,tetta) : 0; + c.blS_Jam = (c.cohAvgJ != 0) ? c.cohAvgJ * upperCohMinByAvgJ(c,tetta) * lowerCohMinByAvgJ(c,tetta) : 0; + c.blS_Jmm = (c.cohMinJ != 0) ? c.cohMinJ * upperCohMinByMinJ(c,tetta) * lowerCohMinByMinJ(c,tetta) : 0; + c.blS_SMCaa = (c.cohAvgSMC != 0) ? c.cohAvgSMC * upperCohAvgByAvgSMC(c,tetta) * lowerCohAvgByAvgSMC(c,tetta) : 0; + c.blS_SMCma = (c.cohMinSMC != 0) ? c.cohMinSMC * upperCohAvgByMinSMC(c,tetta) * lowerCohAvgByMinSMC(c,tetta) : 0; + c.blS_SMCam = (c.cohAvgSMC != 0) ? c.cohAvgSMC * upperCohMinByAvgSMC(c,tetta) * lowerCohMinByAvgSMC(c,tetta) : 0; + c.blS_SMCmm = (c.cohMinSMC != 0) ? c.cohMinSMC * upperCohMinByMinSMC(c,tetta) * lowerCohMinByMinSMC(c,tetta) : 0; + } + } + + //Cue validity approach (CV) + public void cueValidity(){ + + if (attributesExtent == null) + this.setUp(); + + ArrayList<Integer> attrExtent; + Set<Integer> intersection; + double sum = 0; + for (FormalConcept c: cl.conceptList){ + sum = 0; + for (Integer i: c.intent){ + intersection = new HashSet<Integer>(); + intersection.addAll(c.extent); + attrExtent = attributesExtent.get(i); + intersection.retainAll(attrExtent); + sum+=(double)intersection.size()*1./attrExtent.size(); + } + c.blCV = Double.isNaN(sum) ? 0 : sum; + + } + } + //Category feature collocation approach + public void categoryFeatureCollocation(){ + if (attributesExtent == null) + this.setUp(); + + ArrayList<Integer> attrExtent; + Set<Integer> intersection; + double sum = 0; + int latticeSize = cl.conceptList.size(); + for (FormalConcept c: cl.conceptList){ + sum = 0; + for (int i = 0; i < cl.attributeCount; i++){ + intersection = new HashSet<Integer>(); + intersection.addAll(c.extent); + attrExtent = attributesExtent.get(i); + intersection.retainAll(attrExtent); + sum+=(double)intersection.size()*1./attrExtent.size()*intersection.size()/c.extent.size(); + } + c.blCFC = Double.isNaN(sum) ? 0 : sum; + } + } + + //Category utility approach + public void categoryUtility(){ + if (attributesExtent == null) + this.setUp(); + + ArrayList<Integer> attrExtent; + Set<Integer> intersection; + double sum = 0; + int attrSize = cl.objectCount; + int cExtentSize = 0; + for (FormalConcept c: cl.conceptList){ + sum = 0; + for (int i = 0; i < cl.attributeCount; i++){ + intersection = new HashSet<Integer>(); + intersection.addAll(c.extent); + cExtentSize = c.extent.size(); + attrExtent = attributesExtent.get(i); + intersection.retainAll(attrExtent); + sum+=(double)Math.pow(intersection.size()*1./cExtentSize,2)-Math.pow(1.*attrExtent.size()/attrSize,2); + } + c.blCU =Double.isNaN(1.*cExtentSize/attrSize*sum) ? 0 : 1.*cExtentSize/attrSize*sum; + } + } + + //Predictability approach (P) + public void predictability(){ + + if (attributesExtent == null) + this.setUp(); + ArrayList<Integer> attributes = new ArrayList<Integer>(); + ArrayList<Integer> outOfIntent = new ArrayList<Integer>(); + Set<Integer> intersection; + ArrayList<Integer> attrExtent; + double sum, term; + + for (int i = 0; i< cl.attributeCount; i++){ + attributes.add(i); + } + for (FormalConcept c: cl.conceptList){ + sum = 0; + outOfIntent = new ArrayList<Integer>(); + outOfIntent.addAll(attributes); + outOfIntent.removeAll(c.intent); + for (Integer y: outOfIntent){ + intersection = new HashSet<Integer>(); + intersection.addAll(c.extent); + attrExtent = attributesExtent.get(y); + intersection.retainAll(attrExtent); + term = 1.*intersection.size()/c.extent.size(); + + if (term > 0){ + sum-=term*Math.log(term); + } + } + c.blP = Double.isNaN(1-sum/outOfIntent.size()) ? 0 : 1-sum/outOfIntent.size(); + } + + } +}
http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/1f97041b/opennlp-similarity/src/main/java/opennlp/tools/fca/ConceptLattice.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/main/java/opennlp/tools/fca/ConceptLattice.java b/opennlp-similarity/src/main/java/opennlp/tools/fca/ConceptLattice.java new file mode 100755 index 0000000..6bd546c --- /dev/null +++ b/opennlp-similarity/src/main/java/opennlp/tools/fca/ConceptLattice.java @@ -0,0 +1,298 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.fca; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.ListIterator; +import java.util.Set; + +import org.apache.commons.collections.ListUtils; + + +public class ConceptLattice { + int objectCount; + int attributeCount; + ArrayList<FormalConcept> conceptList; + int [][] binaryContext; + static Measures ms; + static BasicLevelMetrics blm; + + public ConceptLattice(int objCount, int attrCount, int [][] binaryContext,boolean stats) { + this.objectCount = objCount; + this.attributeCount = attrCount; + this.binaryContext = binaryContext; + this.conceptList = new ArrayList<FormalConcept>(); + FormalConcept bottom = new FormalConcept(); + bottom.setPosition(0); + conceptList.add(bottom); + ms = new Measures(this); + blm = new BasicLevelMetrics(this); + if (this.binaryContext !=null) + createLatticeFromBinaryContext(); + if (stats){ + ms.logStabilityExt(); + ms.separation(); + ms.probability(); + blm.cueValidity(); + blm.categoryFeatureCollocation(); + blm.categoryUtility(); + blm.predictability(); + blm.similarityGoguenNorm(); + } + } + + public ConceptLattice(String filename, boolean stats) throws FileNotFoundException, IOException { + + FcaReader fr = new FcaReader(); + fr.ReadContextFromCxt(filename); + this.objectCount = fr.getObjectsCount(); + this.attributeCount = fr.getAttributesCount(); + this.binaryContext = fr.getBinaryContext(); + + this.conceptList = new ArrayList<FormalConcept>(); + FormalConcept bottom = new FormalConcept(); + bottom.setPosition(0); + conceptList.add(bottom); + ms = new Measures(this); + blm = new BasicLevelMetrics(this); + if (this.binaryContext !=null) + createLatticeFromBinaryContext(); + if (stats){ + ms.logStabilityExt(); + ms.separation(); + ms.probability(); + blm.cueValidity(); + blm.categoryFeatureCollocation(); + blm.categoryUtility(); + blm.predictability(); + blm.similarityGoguenNorm(); + } + } + + + public int GetMaximalConcept(List<Integer> intent, int Generator) { + boolean parentIsMaximal = true; + while(parentIsMaximal) { + parentIsMaximal = false; + for (int parent : conceptList.get(Generator).getParents()) { + if (conceptList.get(parent).getIntent().containsAll(intent)) { + Generator = parent; + parentIsMaximal = true; + break; + } + } + } + return Generator; + } + + public void AddExtentToAncestors(LinkedHashSet<Integer>extent, int curNode) { + if (conceptList.get(curNode).parents.size()>0){ + for (int parent : conceptList.get(curNode).parents){ + conceptList.get(parent).addExtents(extent); + AddExtentToAncestors(extent, parent); + } + } + } + + public int AddIntent(List<Integer> intent,LinkedHashSet<Integer>extent, int generator) { + //System.out.println("add intent "+intent+extent+generator); + int generator_tmp = GetMaximalConcept(intent, generator); + generator = generator_tmp; + //System.out.println(" max gen "+generator); + if (conceptList.get(generator).getIntent().equals(intent)) { + conceptList.get(generator).addExtents(extent); + AddExtentToAncestors(extent, generator); + return generator; + } + Set<Integer> generatorParents = conceptList.get(generator).getParents(); + Set<Integer> newParents = new HashSet<Integer>(); + for (int candidate : generatorParents) { + if (!intent.containsAll(conceptList.get(candidate).getIntent())) { + List<Integer> intersection = ListUtils.intersection(intent, conceptList.get(candidate).getIntent()); + LinkedHashSet<Integer> new_extent = new LinkedHashSet<Integer>(); + new_extent.addAll(conceptList.get(candidate).extent); + new_extent.addAll(extent); + candidate = AddIntent(intersection,new_extent,candidate); + } + + boolean addParents = true; + Iterator<Integer> iterator = newParents.iterator(); + while (iterator.hasNext()) { + Integer parent = iterator.next(); + if (conceptList.get(parent).getIntent().containsAll(conceptList.get(candidate).getIntent())) { + addParents = false; + break; + } + else { + if (conceptList.get(candidate).getIntent().containsAll(conceptList.get(parent).getIntent())) { + iterator.remove(); + } + } + } + if (addParents) { + newParents.add(candidate); + } + } + + FormalConcept newConcept = new FormalConcept(); + newConcept.setIntent(intent); + LinkedHashSet<Integer> new_extent = new LinkedHashSet<Integer>(); + new_extent.addAll(conceptList.get(generator).extent); + new_extent.addAll(extent); + newConcept.addExtents(new_extent); + newConcept.setPosition(conceptList.size()); + conceptList.add(newConcept); + conceptList.get(generator).getParents().add(newConcept.position); + conceptList.get(newConcept.position).childs.add(generator); + for (int newParent: newParents) { + if (conceptList.get(generator).getParents().contains(newParent)) { + conceptList.get(generator).getParents().remove(newParent); + conceptList.get(newParent).childs.remove(generator); + } + conceptList.get(newConcept.position).getParents().add(newParent); + conceptList.get(newParent).addExtents(new_extent); + AddExtentToAncestors(new_extent, newParent); + conceptList.get(newParent).childs.add(newConcept.position); + } + + return newConcept.position; + } + + public void printLatticeStats() { + System.out.println("Lattice stats"); + System.out.println("max_object_index = " + objectCount); + System.out.println("max_attribute_index = " + attributeCount); + System.out.println("Current concept count = " + conceptList.size()); + } + + public void printLattice() { + for (int i = 0; i < conceptList.size(); ++i) { + printConceptByPosition(i); + } + } + + public void printLatticeFull() { + for (int i = 0; i < conceptList.size(); ++i) { + printConceptByPositionFull(i); + } + } + + public void printContext() { + for (int i = 0; i<objectCount; i++){ + for (int j = 0; j<attributeCount; j++){ + System.out.print(binaryContext[i][j]+" "); + } + System.out.println(); + } + } + + public void printConceptByPosition(int index) { + System.out.println("Concept at position " + index); + conceptList.get(index).printConcept(); + } + + public void printConceptByPositionFull(int index) { + System.out.println("Concept at position " + index); + conceptList.get(index).printConceptFull(); + } + + public void createLatticeFromBinaryContext(){ + LinkedHashSet<Integer> obj; + ArrayList<Integer> intent; + // attributes list + ArrayList<Integer> attributes = new ArrayList<Integer>(); + for (int i = 0; i <attributeCount; i++){ + attributes.add(i); + } + // objects set + LinkedHashSet<Integer> objects = new LinkedHashSet<Integer>(); + for (int i = 0; i <objectCount; i++){ + objects.add(i); + } + + this.conceptList.get(0).setIntent(attributes); + for (int i = 0; i < objectCount; i++){ + intent = new ArrayList<Integer>(); + obj = new LinkedHashSet<Integer>(); + obj.add(i); + for (int j = 0; j < attributeCount; j++){ + if (binaryContext[i][j] == 1){ + intent.add(j); + } + } + this.AddIntent(intent,obj,0); + } + } + + public static void main(String []args) throws FileNotFoundException, IOException { + + ConceptLattice cl = new ConceptLattice("sports.cxt", true); + cl.printLattice(); + } + + + public List<Integer> getAttributeExtByID(int ind){ + ArrayList<Integer> attrExt = new ArrayList<Integer>(); + for (int i=0;i<objectCount; i++) + if (binaryContext[i][ind]==1) + attrExt.add(i); + return attrExt; + } + + public ArrayList<Integer> getObjectIntByID(int ind){ + ArrayList<Integer> objInt = new ArrayList<Integer>(); + for (int i=0;i<attributeCount; i++) + if (binaryContext[ind][i]==1) + objInt.add(i); + return objInt; + } + + public ArrayList<FormalConcept> getLattice(){ + return conceptList; + } + + public int getAttributesCount() { + return attributeCount; + } + + public int getObjectCount() { + return objectCount; + } + + public int getSize(){ + return conceptList.size(); + } + + + public void printBinContext() { + for (int i = 0; i < binaryContext.length; i++ ){ + System.out.println(Arrays.toString(binaryContext[i])); + } + } + + + +} + http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/1f97041b/opennlp-similarity/src/main/java/opennlp/tools/fca/FcaConverter.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/main/java/opennlp/tools/fca/FcaConverter.java b/opennlp-similarity/src/main/java/opennlp/tools/fca/FcaConverter.java new file mode 100755 index 0000000..b1eaa93 --- /dev/null +++ b/opennlp-similarity/src/main/java/opennlp/tools/fca/FcaConverter.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.fca; + +import java.util.ArrayList; +import java.util.Set; + +public class FcaConverter { + + public FcaConverter (){ + + } + + public int [][] latticeToContext(ConceptLattice cl){ + + int newAttrCount = cl.conceptList.size(); + int [][] binaryContext = null; + ArrayList<FormalConcept> cList = new ArrayList<FormalConcept>(); + cList.addAll(cl.conceptList); + boolean run = true; + int k=0; + while (run && k<cl.conceptList.size()){ + if (cl.conceptList.get(k).intent.size() == cl.attributeCount){ + for (Integer i:cl.conceptList.get(k).parents){ + //cList.remove(cl.conceptList.get(i)); + } + //cList.remove(cl.conceptList.get(k)); + run=false; + } + else{ + //cList.add(arg0, arg1); + + } + + } + //System.out.println("cList.size() " + cList.size()); + run = true; + k=0; + while (run && k<=newAttrCount){ + if (cList.get(k).extent.size()==0) + k++; + run = false; + } + newAttrCount = cList.size(); + Set<Integer> nodeExtend; + binaryContext = new int[cl.objectCount][newAttrCount]; + for (int j = 0; j<newAttrCount; j++){ + nodeExtend = cList.get(j).extent; + //System.out.println(cList.get(j).position+" nodeExtend " + nodeExtend); + for (Integer i: nodeExtend){ + binaryContext[i][j]=1; + } + } + return binaryContext; + } + +} http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/1f97041b/opennlp-similarity/src/main/java/opennlp/tools/fca/FcaReader.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/main/java/opennlp/tools/fca/FcaReader.java b/opennlp-similarity/src/main/java/opennlp/tools/fca/FcaReader.java new file mode 100755 index 0000000..cca4e6a --- /dev/null +++ b/opennlp-similarity/src/main/java/opennlp/tools/fca/FcaReader.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package opennlp.tools.fca; + +import java.io.BufferedReader; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; + +public class FcaReader { + + ArrayList<String> obNames = null; + ArrayList<String> atNames = null; + int [][] binContext = null; + int objectsNumber = 0; + int attributesNumber = 0; + + public void ReadContextFromCxt(String filename) throws FileNotFoundException, IOException{ + + obNames = new ArrayList<String>(); + atNames = new ArrayList<String>(); + + BufferedReader br = new BufferedReader(new FileReader(filename)); + try { + String line; + br.readLine(); //B + br.readLine(); + objectsNumber = Integer.parseInt(br.readLine()); + attributesNumber = Integer.parseInt(br.readLine()); + br.readLine(); + + binContext = new int [objectsNumber][attributesNumber]; + + for (int i=0;i<objectsNumber;i++){ + obNames.add(br.readLine()); + } + + for (int i=0;i<attributesNumber;i++){ + atNames.add(br.readLine()); + } + + int i=0; + while ((line = br.readLine()) != null) { + for (int j = 0; j<line.length();j++){ + if (line.charAt(j)=='.'){ + binContext[i][j] = 0; + } + else + binContext[i][j] = 1; + } + i+=1; + } + } catch (Exception e){ + e.printStackTrace(); + } + } + + public int getAttributesCount(){ + return attributesNumber; + } + + public int getObjectsCount(){ + return objectsNumber; + } + + public int[][] getBinaryContext(){ + return binContext; + } + + + public static void main(String []args) throws FileNotFoundException, IOException{ + + FcaReader loader = new FcaReader(); + loader.ReadContextFromCxt("C://Users/Tanya/Desktop/�����/1 �������/������������� ��������� � ������� ������/�������/sports.cxt"); + + } + + +} http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/1f97041b/opennlp-similarity/src/main/java/opennlp/tools/fca/FcaWriter.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/main/java/opennlp/tools/fca/FcaWriter.java b/opennlp-similarity/src/main/java/opennlp/tools/fca/FcaWriter.java new file mode 100755 index 0000000..10a4851 --- /dev/null +++ b/opennlp-similarity/src/main/java/opennlp/tools/fca/FcaWriter.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package opennlp.tools.fca; + +import java.io.BufferedWriter; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Writer; + +public class FcaWriter { + + public void WriteAsCxt(String filename, ConceptLattice cl){ + + Writer writer = null; + + try { + writer = new BufferedWriter(new OutputStreamWriter( + new FileOutputStream(filename), "utf-8")); + writer.write("B\n"); + writer.write("\n"); + writer.write(String.valueOf(cl.objectCount)+"\n"); + writer.write(cl.attributeCount + "\n"); + writer.write("\n"); + + for (int obj = 0; obj < cl.objectCount; obj++){ + writer.write(obj + "\n"); + } + for (int attr = 0; attr < cl.attributeCount; attr++){ + writer.write(attr + "\n"); + } + + for (int i = 0; i < cl.objectCount; i++){ + for (int j = 0; j < cl.attributeCount; j++){ + writer.write((cl.binaryContext[i][j] == 0) ? '.':'X'); + } + writer.write("\n"); + } + + writer.close(); + + } catch (IOException ex) { + System.err.println(ex.getMessage()); + } finally { + try {writer.close();} catch (Exception ex) {} + } + } + + public void WriteStatsToTxt(String filename, ConceptLattice cl, int patternStructureSize){ + ///2n+1 + int intentSize = 2*cl.attributeCount+1; //4*cl.ob+1; + String formatStrHeader = "%-9s %-9s %-13s %-9s %-9s %-9s %-9s %-9s %-9s %-9s %-9s %-9s %-9s %-9s %-9s %-"+ intentSize +"s%n"; + String formatStr = "%6.3f %6.3f [%5.3f; %5.3f] %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %"+ intentSize +"s%n"; + String formatStr2 = "%9.2f %9.2f [%5.2f; %5.2f] %9.2f %9.2f %9.2f %9.2f %9.2f %9.2f %9.2f %9.2f %9.2f %9.2f %9.2f %9.2f %-"+ intentSize +"s%n"; + Writer writer = null; + + try { + writer = new BufferedWriter(new OutputStreamWriter( + new FileOutputStream(filename), "utf-8")); + writer.write("Lattice size: " + cl.getSize() +", obj: " + cl.objectCount + ", attr: " + cl.attributeCount +"\n"); + writer.write("PatternStructure size: " + patternStructureSize + "\n"); + writer.write(String.format(formatStrHeader, "probability", "separation", "stability", "blCV", "blCFC", "blCU", "blP", "blS_SMCaa", "blS_SMCam", "blS_SMCma", "blS_SMCma", "blS_Jaa", "blS_Jam", "blS_Jma", "blS_Jmm", "extent")); + + for (FormalConcept c : cl.conceptList){ + writer.write(String.format(formatStr2, c.probability, c.separation, c.intLogStabilityBottom, c.intLogStabilityUp, c.blCV, c.blCFC, c.blCU, c.blP, + c.blS_SMCaa, c.blS_SMCam, c.blS_SMCma, c.blS_SMCmm, c.blS_Jaa, c.blS_Jam, c.blS_Jma, c.blS_Jmm, c.extent)); + } + + writer.close(); + + } catch (IOException ex) { + System.err.println(ex.getMessage()); + } finally { + try {writer.close();} catch (Exception ex) {} + } + } + + + public void WriteStatsToCvs(String filename, ConceptLattice cl, int patternStructureSize){ + ///2n+1 + int intentSize = 2*cl.attributeCount+1; //4*cl.ob+1; + String formatStrHeader = "%s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s%n"; + String formatStr2 = "%.2f; %.2f; [%5.2f : %5.2f]; %.2f; %.2f; %.2f; %.2f; %.2f; %.2f; %.2f; %.2f; %.2f; %.2f; %.2f; %.2f; %s%n"; + Writer writer = null; + + try { + writer = new BufferedWriter(new OutputStreamWriter( + new FileOutputStream(filename), "utf-8")); + writer.write("Lattice size: " + cl.getSize() +", obj: " + cl.objectCount + ", attr: " + cl.attributeCount +"\n"); + writer.write("PatternStructure size: " + patternStructureSize + "\n"); + writer.write(String.format(formatStrHeader, "probability", "separation", "stability", "blCV", "blCFC", "blCU", "blP", "blS_SMCaa", "blS_SMCam", "blS_SMCma", "blS_SMCma", "blS_Jaa", "blS_Jam", "blS_Jma", "blS_Jmm", "extent")); + + for (FormalConcept c : cl.conceptList){ + writer.write(String.format(formatStr2, c.probability, c.separation, c.intLogStabilityBottom, c.intLogStabilityUp, c.blCV, c.blCFC, c.blCU, c.blP, + c.blS_SMCaa, c.blS_SMCam, c.blS_SMCma, c.blS_SMCmm, c.blS_Jaa, c.blS_Jam, c.blS_Jma, c.blS_Jmm, c.extent)); + } + + writer.close(); + + } catch (IOException ex) { + System.err.println(ex.getMessage()); + } finally { + try {writer.close();} catch (Exception ex) {} + } + } + + +public static void main(String []args) throws FileNotFoundException, IOException{ + + ConceptLattice cl = new ConceptLattice("sports.cxt",false); + FcaWriter writer = new FcaWriter(); + writer.WriteAsCxt("res.cxt",cl); + + } + +} http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/1f97041b/opennlp-similarity/src/main/java/opennlp/tools/fca/FormalConcept.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/main/java/opennlp/tools/fca/FormalConcept.java b/opennlp-similarity/src/main/java/opennlp/tools/fca/FormalConcept.java new file mode 100755 index 0000000..3f5ecb8 --- /dev/null +++ b/opennlp-similarity/src/main/java/opennlp/tools/fca/FormalConcept.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package opennlp.tools.fca; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; + +public class FormalConcept { + int position; + ArrayList<Integer>intent; + Set<Integer> childs; + Set<Integer> parents; + Set<Integer> extent; + double intLogStabilityBottom = 0, + intLogStabilityUp = 0, + separation = 0, + probability = 0, + blS_Jaa = 0, + blS_SMCaa = 0, + blS_Jam = 0, + blS_SMCam = 0, + blS_Jmm = 0, + blS_Jma = 0, + blS_SMCma = 0, + blS_SMCmm = 0, + blCV = 0, + blCFC = 0, + blCU = 0, + blP = 0; + + double cohAvgJ = 0, + cohMinJ = 0, + cohAvgSMC = 0, + cohMinSMC = 0; + + + public FormalConcept() { + position = -1; + intent = new ArrayList<Integer>(); + extent = new LinkedHashSet<Integer>(); + parents = new HashSet<Integer>(); + childs = new HashSet<Integer>(); + } + + public void setPosition(int newPosition ){ + position = newPosition; + } + + public void addExtents(LinkedHashSet<Integer> ext){ + extent.addAll(ext); + } + + public LinkedHashSet<Integer> getExtent(){ + return (LinkedHashSet<Integer>) extent; + } + + public void setIntent(List<Integer> newIntent ){ + intent.clear(); + intent.addAll(newIntent); + } + + public void setChilds(Set<Integer> newChilds ){ + childs.clear(); + childs.addAll(newChilds); + } + + public void addChild(Integer child ){ + childs.add(child); + } + + public void setParents( Set<Integer> newParents ){ + //parents = newParents; + parents.clear(); + parents.addAll(newParents); + } + + public void printConcept() { + System.out.println("Concept position:" + position); + System.out.println("Concept intent:" + getIntent()); + System.out.println("Concept extent:" + extent); + System.out.println("Concept parents:" + getParents()); + System.out.println("Concept childs:" + childs); + System.out.println("--------------------"); + } + + public void printConceptFull() { + System.out.println("Concept position:" + position); + System.out.println("Concept intent:" + getIntent()); + System.out.println("Concept extent:" + extent); + System.out.println("Concept parents:" + getParents()); + System.out.println("Concept childs:" + childs); + System.out.format("Prob. blSaaJ blSaaSMC blSmaJ blSmmJ blSmaSMC blSamJ blSamSMC blCV blCFC blCU blP separ.\n"); + System.out.format("%5.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n", probability, blS_Jaa, blS_SMCaa, + blS_Jma, blS_Jmm, blS_SMCma, blS_Jam, blS_SMCam, blCV, blCFC, blCU, blP, separation); + System.out.format("Concept cohAvgJ: %.3f \n", cohAvgJ); + System.out.format("Concept cohAvgSMC: %.3f \n", cohAvgSMC); + System.out.format("Concept cohMinJ: %.3f \n", cohMinJ); + System.out.format("Concept cohMinSMC: %.3f \n", cohMinSMC); + //System.out.format("Concept stability: [ %.3f; %.3f] \n", getIntLogStabilityBottom(), getIntLogStabilityUp()); + System.out.println("--------------------"); + } + + public static void main(String []args) { + FormalConcept c = new FormalConcept(); + c.printConcept(); + } + + public ArrayList<Integer> getIntent() { + return intent; + } + + public void setIntent(ArrayList<Integer> intent) { + this.intent = intent; + } + + public Set<Integer> getParents() { + return parents; + } + + public double getIntLogStabilityBottom() { + return intLogStabilityBottom; + } + + public void setIntLogStabilityBottom(double intLogStabilityBottom) { + this.intLogStabilityBottom = intLogStabilityBottom; + } + + public double getIntLogStabilityUp() { + return intLogStabilityUp; + } + + public void setIntLogStabilityUp(double intLogStabilityUp) { + this.intLogStabilityUp = intLogStabilityUp; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/1f97041b/opennlp-similarity/src/main/java/opennlp/tools/fca/Measures.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/main/java/opennlp/tools/fca/Measures.java b/opennlp-similarity/src/main/java/opennlp/tools/fca/Measures.java new file mode 100755 index 0000000..a4b17d8 --- /dev/null +++ b/opennlp-similarity/src/main/java/opennlp/tools/fca/Measures.java @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package opennlp.tools.fca; + + +import org.apache.commons.math3.special.*; +import java.util.ArrayList; +import java.util.Set; + +public class Measures { + + ConceptLattice cl; + + public Measures(ConceptLattice cl) { + this.cl = cl; + } + + public void logStabilityInt(){ + int min_delta = -1, delta = -1; + float sum = 0; + for (int i = 0; i < cl.conceptList.size(); ++i) { + min_delta = cl.attributeCount; + sum = 0; + FormalConcept fc = cl.conceptList.get(i); + Set<Integer> parents = fc.parents; + for (Integer j: parents) { + delta = fc.getIntent().size() - cl.conceptList.get(j).intent.size(); + if (delta<min_delta) + min_delta = delta; + sum += Math.pow(2, -delta); + } + fc.intLogStabilityBottom=-(Math.log(sum)/Math.log(2.0)); + fc.intLogStabilityUp = min_delta; + } + } + + public void logStabilityExt(){ + int min_delta = -1, delta = -1; + float sum = 0; + for (int i = 0; i < cl.conceptList.size(); ++i) { + min_delta = cl.attributeCount; + sum = 0; + FormalConcept fc = cl.conceptList.get(i); + Set<Integer> childs = fc.childs; + for (Integer j: childs) { + delta = fc.getExtent().size() - cl.conceptList.get(j).getExtent().size(); + if (delta<min_delta) + min_delta = delta; + sum += Math.pow(2, -delta); + } + fc.intLogStabilityBottom=-(Math.log(sum)/Math.log(2.0)); + fc.intLogStabilityUp = min_delta; + } + } + + public void separation(){ + ArrayList<Integer> intent; + Set<Integer> extentMembers; + int extentVolume = 0, intentVolume = 0; + double sz = 0; + for (int i = 0; i < cl.conceptList.size(); ++i) { + intent = cl.conceptList.get(i).getIntent(); + extentMembers = cl.conceptList.get(i).extent; + extentVolume = 0; + intentVolume = 0; + for (Integer ext : extentMembers){ + for (int attr = 0; attr<cl.attributeCount;attr++){ + extentVolume += cl.binaryContext[ext][attr]; + } + } + for (int attr = 0; attr<intent.size();attr++ ){ + for (int obj = 0; obj < cl.objectCount; obj++){ + intentVolume += cl.binaryContext[obj][intent.get(attr)]; + } + } + sz = intent.size()*extentMembers.size(); + if (extentVolume+extentVolume-sz!=0) + cl.conceptList.get(i).separation = sz/(extentVolume+intentVolume-sz); + else + cl.conceptList.get(i).separation = 0; + } + } + public double attributeProbability(int attrNmb){ + double pAttr = 0; + for (int i = 0; i<cl.objectCount;i++){ + pAttr+=cl.binaryContext[i][attrNmb]; + } + pAttr/=cl.objectCount; + return pAttr; + } + + public double intentProbability(ArrayList<Integer> intent){ + double pB = 1; + for (int i=0;i<intent.size();i++){ + pB*=attributeProbability(intent.get(i)); + } + return pB; + } + + public void probability(){ + //the probability of B being closed + for (int i = 0; i<cl.conceptList.size();i++){ + ArrayList<Integer> intent = cl.conceptList.get(i).getIntent(); + // out of concept intent + double pB = intentProbability(intent); + ArrayList<Integer> outOfIntent = new ArrayList<Integer>(); + ArrayList<Double> outOfIntentAttrProb = new ArrayList<Double>(); + for (int j=0;j<cl.attributeCount; j++){ + outOfIntent.add(j); + } + for (int j=intent.size()-1;j>=0;j--){ + outOfIntent.remove(intent.get(j)); + } + for (int j=0;j<outOfIntent.size();j++){ + outOfIntentAttrProb.add(attributeProbability(outOfIntent.get(j))); + } + double prob = 0, mult = 1, mult1=1; + int n = cl.objectCount; + for (int k=0; k<= n; k++){ + mult = 1; + mult1 = 1; + for (int j=0;j<outOfIntentAttrProb.size();j++){ + mult*=(1-Math.pow(outOfIntentAttrProb.get(j),k)); + } + mult1 = Math.pow(pB,k)*Math.pow(1-pB,n-k); + prob+=mult1*mult*Gamma.digamma(n+1)/Gamma.digamma(k+1)/Gamma.digamma(n-k+1); + } + + cl.conceptList.get(i).probability = prob; + } + } + +} http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/1f97041b/opennlp-similarity/src/main/java/opennlp/tools/fca/RandomNoiseGenerator.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/main/java/opennlp/tools/fca/RandomNoiseGenerator.java b/opennlp-similarity/src/main/java/opennlp/tools/fca/RandomNoiseGenerator.java new file mode 100755 index 0000000..ff5a24f --- /dev/null +++ b/opennlp-similarity/src/main/java/opennlp/tools/fca/RandomNoiseGenerator.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package opennlp.tools.fca; + +import java.util.Random; + +public class RandomNoiseGenerator { + + public int [][] AlterCellsWithProbability(double p, int [][] binaryContext){ + + Random rnd = new Random(); + int n = binaryContext.length; + int m = binaryContext[0].length; + + for (int i = 0; i < n; i++ ) + for (int j = 0; j < m; j++) + if (rnd.nextFloat() <= p){ + if (binaryContext[i][j] == 0) + binaryContext[i][j] = 1; + else + binaryContext[i][j] = 0; + } + return binaryContext; + } + + public int[][] AddObjectsAttributesWithProbability(int numberObjAttr, double d, int [][] binaryContext){ + + Random rnd = new Random(); + int nRandObjs = rnd.nextInt(numberObjAttr); + int nRandAttr = numberObjAttr - nRandObjs; + + int [][] newBinaryContext = new int [binaryContext.length + nRandObjs][binaryContext[0].length + nRandAttr]; + for (int i = 0; i < binaryContext.length; i++) + for (int j = 0; j < binaryContext[0].length; j++) + newBinaryContext[i][j] = binaryContext[i][j]; + + for (int i = binaryContext.length; i < binaryContext.length + nRandObjs; i++) + for (int j = 0; j < binaryContext[0].length + nRandAttr; j++) + newBinaryContext[i][j] = (rnd.nextFloat() <= d) ? 1 : 0; + + for (int j = binaryContext[0].length; j < binaryContext[0].length + nRandAttr; j++) + for (int i = 0; i < binaryContext.length; i++) + newBinaryContext[i][j] = (rnd.nextFloat() <= d) ? 1 : 0; + + return newBinaryContext; + } +} + + \ No newline at end of file http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/1f97041b/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/JSMDecision.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/JSMDecision.java b/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/JSMDecision.java new file mode 100644 index 0000000..f451b39 --- /dev/null +++ b/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/JSMDecision.java @@ -0,0 +1,78 @@ +package opennlp.tools.jsmlearning; + +import java.util.ArrayList; +import java.util.List; + +import opennlp.tools.textsimilarity.ParseTreeChunk; + +public class JSMDecision { + String keywordClassName; + Boolean bPositiveClass; + List<List<List<ParseTreeChunk>>> posHypotheses, + negHypotheses; + List<List<List<ParseTreeChunk>>> posIntersectionsUnderNeg , + negIntersectionsUnderPos; + private String[] separationKeywords; + + public String[] getSeparationKeywords() { + return separationKeywords; + } + public void setSeparationKeywords(String[] separationKeywords) { + this.separationKeywords = separationKeywords; + } + public String getKeywordClassName() { + return keywordClassName; + } + public void setKeywordClassName(String keywordClassName) { + this.keywordClassName = keywordClassName; + } + public Boolean getbPositiveClass() { + return bPositiveClass; + } + public void setbPositiveClass(Boolean bPositiveClass) { + this.bPositiveClass = bPositiveClass; + } + public List<List<List<ParseTreeChunk>>> getPosHypotheses() { + return posHypotheses; + } + public void setPosHypotheses(List<List<List<ParseTreeChunk>>> posHypotheses) { + this.posHypotheses = posHypotheses; + } + public List<List<List<ParseTreeChunk>>> getNegHypotheses() { + return negHypotheses; + } + public void setNegHypotheses(List<List<List<ParseTreeChunk>>> negHypotheses) { + this.negHypotheses = negHypotheses; + } + public List<List<List<ParseTreeChunk>>> getPosIntersectionsUnderNeg() { + return posIntersectionsUnderNeg; + } + public void setPosIntersectionsUnderNeg( + List<List<List<ParseTreeChunk>>> posIntersectionsUnderNeg) { + this.posIntersectionsUnderNeg = posIntersectionsUnderNeg; + } + public List<List<List<ParseTreeChunk>>> getNegIntersectionsUnderPos() { + return negIntersectionsUnderPos; + } + public void setNegIntersectionsUnderPos( + List<List<List<ParseTreeChunk>>> negIntersectionsUnderPos) { + this.negIntersectionsUnderPos = negIntersectionsUnderPos; + } + public JSMDecision(String keywordClassName, Boolean bPositiveClass, + List<List<List<ParseTreeChunk>>> posHypotheses, + List<List<List<ParseTreeChunk>>> negHypotheses, + List<List<List<ParseTreeChunk>>> posIntersectionsUnderNeg, + List<List<List<ParseTreeChunk>>> negIntersectionsUnderPos, String[] separationKeywords) { + super(); + this.keywordClassName = keywordClassName; + this.bPositiveClass = bPositiveClass; + this.posHypotheses = posHypotheses; + this.negHypotheses = negHypotheses; + this.posIntersectionsUnderNeg = posIntersectionsUnderNeg; + this.negIntersectionsUnderPos = negIntersectionsUnderPos; + this.separationKeywords = separationKeywords; + } + + + +} http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/1f97041b/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/JSMLearnerOnLatticeBase.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/JSMLearnerOnLatticeBase.java b/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/JSMLearnerOnLatticeBase.java new file mode 100644 index 0000000..52154cd --- /dev/null +++ b/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/JSMLearnerOnLatticeBase.java @@ -0,0 +1,338 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.jsmlearning; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; +import opennlp.tools.parse_thicket.pattern_structure.LinguisticPatternStructure; +import opennlp.tools.similarity.apps.utils.Pair; +import opennlp.tools.textsimilarity.ParseTreeChunk; +import opennlp.tools.textsimilarity.ParseTreeChunkListScorer; +import opennlp.tools.textsimilarity.ParseTreeMatcherDeterministic; +import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor; + +public class JSMLearnerOnLatticeBase { + ParserChunker2MatcherProcessor chunk_maker = ParserChunker2MatcherProcessor.getInstance(); + LinguisticPatternStructure psPos = new LinguisticPatternStructure(0,0), psNeg = new LinguisticPatternStructure(0,0); + ParseTreeMatcherDeterministic md = new ParseTreeMatcherDeterministic(); + + + + + public JSMDecision buildLearningModel(List<String> posTexts, List<String> negTexts, + String unknown, String[] separationKeywords){ + psPos = new LinguisticPatternStructure(0,0); psNeg = new LinguisticPatternStructure(0,0); + + if (separationKeywords!=null){ // re-sort by occurrence of separation keyword + Pair<List<String>, List<String>> pair = reGroupByOccurrenceOfSeparationKeyword(posTexts, negTexts, separationKeywords ); + posTexts = pair.getFirst(); negTexts = pair.getSecond(); + } + + List<List<List<ParseTreeChunk>>> lingRepsPos = new ArrayList<List<List<ParseTreeChunk>>>(), + lingRepsNeg = new ArrayList<List<List<ParseTreeChunk>>>(); + for(String text: posTexts) + lingRepsPos.add(chunk_maker.formGroupedPhrasesFromChunksForPara(text)); + + for(String text: negTexts) + lingRepsNeg.add(chunk_maker.formGroupedPhrasesFromChunksForPara(text)); + + LinkedHashSet<Integer> obj = null; + int i=0; + for(List<List<ParseTreeChunk>> chunk: lingRepsPos){ + obj = new LinkedHashSet<Integer>(); + obj.add(i); + psPos.AddIntent(chunk, obj, 0); + i++; + } + i=0; + for(List<List<ParseTreeChunk>> chunk: lingRepsNeg){ + obj = new LinkedHashSet<Integer>(); + obj.add(i); + psNeg.AddIntent(chunk, obj, 0); + i++; + } + + + + List<List<ParseTreeChunk>> chunksUnknown = chunk_maker.formGroupedPhrasesFromChunksForPara(unknown); + List<List<List<ParseTreeChunk>>> posIntersections = new ArrayList<List<List<ParseTreeChunk>>>(), + negIntersections = new ArrayList<List<List<ParseTreeChunk>>>(); + List<List<ParseTreeChunk>> intersection = null; + for(int iConcept = 0; iConcept<psPos.conceptList.size(); iConcept++){ + if (psPos.conceptList.get(iConcept).intent!=null && psPos.conceptList.get(iConcept).intent.size()>0){ + intersection = computeIntersectionWithIntentExtendedByDeduction(psPos, iConcept, chunksUnknown); + if (reduceList(intersection).size()>0) + posIntersections.add(reduceList(intersection)); + } + if (psNeg.conceptList.get(iConcept).intent!=null && psNeg.conceptList.get(iConcept).intent.size()>0){ + intersection = md + .matchTwoSentencesGroupedChunksDeterministic(psNeg.conceptList.get(iConcept).intent, chunksUnknown); + if (reduceList(intersection).size()>0) + negIntersections.add(reduceList(intersection)); + } + } + + Pair<List<List<List<ParseTreeChunk>>>, List<List<List<ParseTreeChunk>>>> pair = + removeInconsistenciesFromPosNegIntersections( posIntersections, + negIntersections); + + posIntersections = pair.getFirst(); + negIntersections = pair.getSecond(); + + List<List<List<ParseTreeChunk>>> posIntersectionsUnderNeg = new ArrayList<List<List<ParseTreeChunk>>>(), + negIntersectionsUnderPos = new ArrayList<List<List<ParseTreeChunk>>>(); + + for(int iConcept = 0; iConcept<psNeg.conceptList.size(); iConcept++){ + for(int iConceptJ = 0; iConceptJ<negIntersections.size(); iConceptJ++){ + intersection = md + .matchTwoSentencesGroupedChunksDeterministic(psNeg.conceptList.get(iConcept).intent, negIntersections.get(iConceptJ)); + if (reduceList(intersection).size()>0) + posIntersectionsUnderNeg.add(reduceList(intersection)); + } + } + + for(int iConcept = 0; iConcept<psPos.conceptList.size(); iConcept++){ + for(int iConceptJ = 0; iConceptJ<posIntersections.size(); iConceptJ++){ + intersection = md + .matchTwoSentencesGroupedChunksDeterministic(psPos.conceptList.get(iConcept).intent, posIntersections.get(iConceptJ)); + if (reduceList(intersection).size()>0) + negIntersectionsUnderPos.add(reduceList(intersection)); + } + } + + List<ParseTreeChunk>posIntersectionsUnderNegLst = flattenParseTreeChunkLst(posIntersectionsUnderNeg); + List<ParseTreeChunk>negIntersectionsUnderPosLst=flattenParseTreeChunkLst(negIntersectionsUnderPos); + + posIntersectionsUnderNegLst = subtract(posIntersectionsUnderNegLst, negIntersectionsUnderPosLst); + negIntersectionsUnderPosLst= subtract(negIntersectionsUnderPosLst, posIntersectionsUnderNegLst); + + System.out.println("Pos - neg inters = "+posIntersectionsUnderNegLst); + System.out.println("Neg - pos inters = "+negIntersectionsUnderPosLst); + + Boolean bPositiveClass = (float)posIntersectionsUnderNegLst.size()/(float)negIntersectionsUnderPosLst.size() > 1f; + + JSMDecision decision = new JSMDecision("keywordClassName", bPositiveClass, + posIntersections , negIntersections, + posIntersectionsUnderNeg, + negIntersectionsUnderPos, separationKeywords); + + + return decision; + + } + + private List<List<ParseTreeChunk>> computeIntersectionWithIntentExtendedByDeduction( + LinguisticPatternStructure psPos, int iConcept, + List<List<ParseTreeChunk>> chunksUnknown) { + + return md + .matchTwoSentencesGroupedChunksDeterministic(psPos.conceptList.get(iConcept).intent, chunksUnknown); + + } + + public Pair<List<String>, List<String>> reGroupByOccurrenceOfSeparationKeyword(List<String> posTexts, List<String> negTexts, String[] keywords){ + // do nothing in base class + + return new Pair<List<String>, List<String>>(posTexts, negTexts); + } + + public List<List<ParseTreeChunk>> reduceList(List<List<ParseTreeChunk>> list){ + float minScore = 1.3f; + List<List<ParseTreeChunk>> newList = new ArrayList<List<ParseTreeChunk>>(); + + + ParseTreeChunkListScorer scorer = new ParseTreeChunkListScorer(); + for( List<ParseTreeChunk> group: list){ + List<ParseTreeChunk> newGroup = new ArrayList<ParseTreeChunk>(); + for(ParseTreeChunk ch: group){ + if (scorer.getScore(ch) > minScore) + newGroup.add(ch); + } + if (newGroup.size()>0) + newList.add(newGroup); + } + + return newList; + + } + + public List<List<ParseTreeChunk>> flattenParseTreeChunkListList(List<List<List<ParseTreeChunk>>> listOfLists){ + List<List<ParseTreeChunk>> newList = new ArrayList<List<ParseTreeChunk>>(); + + for( List<List<ParseTreeChunk>> member: listOfLists){ + Set<ParseTreeChunk> newSet= new HashSet<ParseTreeChunk>(); + for( List<ParseTreeChunk> group: member){ + if (group.size()>0) + newSet.addAll(group); + } + newList.add(new ArrayList<ParseTreeChunk>(newSet)); + } + + return newList; + } + + public List<ParseTreeChunk> flattenParseTreeChunkLst(List<List<List<ParseTreeChunk>>> listOfLists){ + List<ParseTreeChunk> newList = new ArrayList<ParseTreeChunk>(); + Set<ParseTreeChunk> newSetAll = new HashSet<ParseTreeChunk>(); + + + for( List<List<ParseTreeChunk>> member: listOfLists){ + Set<ParseTreeChunk> newSet= new HashSet<ParseTreeChunk>(); + for( List<ParseTreeChunk> group: member){ + if (group.size()>0) + newSet.addAll(group); + } + newSetAll.addAll(newSet); + } + + return removeDuplicates(new ArrayList<ParseTreeChunk>(newSetAll)); + } + + public List<ParseTreeChunk> removeDuplicates(List<ParseTreeChunk> dupes){ + List<Integer> toDelete = new ArrayList<Integer>(); + for(int i=0; i<dupes.size(); i++) + for(int j=i+1; j<dupes.size(); j++){ + if (dupes.get(i).equals(dupes.get(j))){ + toDelete.add(j); + } + } + List<ParseTreeChunk> cleaned = new ArrayList<ParseTreeChunk>(); + for(int i=0; i<dupes.size(); i++){ + if (!toDelete.contains(i)) + cleaned.add(dupes.get(i)); + } + return cleaned; + } + + public List<ParseTreeChunk> subtract(List<ParseTreeChunk> main, List<ParseTreeChunk> toSubtract){ + List<Integer> toDelete = new ArrayList<Integer>(); + for(int i=0; i<main.size(); i++) + for(int j=0; j<toSubtract.size(); j++){ + if (main.get(i).equals(toSubtract.get(j))){ + toDelete.add(i); + } + } + List<ParseTreeChunk> cleaned = new ArrayList<ParseTreeChunk>(); + for(int i=0; i<main.size(); i++){ + if (!toDelete.contains(i)) + cleaned.add(main.get(i)); + } + return cleaned; + } + public List<ParseTreeChunk> intesectParseTreeChunkLists(List<ParseTreeChunk> a, List<ParseTreeChunk> b){ + List<Integer> inters = new ArrayList<Integer>(); + for(int i=0; i<a.size(); i++) + for(int j=0; j<b.size(); j++){ + if (a.get(i).equals(b.get(j))){ + inters.add(i); + } + } + List<ParseTreeChunk> cleaned = new ArrayList<ParseTreeChunk>(); + for(int i=0; i<a.size(); i++){ + if (inters.contains(i)) + cleaned.add(a.get(i)); + } + return cleaned; + } + + public Pair<List<List<List<ParseTreeChunk>>>, List<List<List<ParseTreeChunk>>>> + removeInconsistenciesFromPosNegIntersections(List<List<List<ParseTreeChunk>>> pos, + List<List<List<ParseTreeChunk>>> neg ){ + + List<ParseTreeChunk> posIntersectionsFl = flattenParseTreeChunkLst(pos); + List<ParseTreeChunk> negIntersectionsFl = flattenParseTreeChunkLst(neg); + + List<ParseTreeChunk> intersParseTreeChunkLists = intesectParseTreeChunkLists(posIntersectionsFl, negIntersectionsFl); + + List<List<List<ParseTreeChunk>>> cleanedFromInconsPos = new ArrayList<List<List<ParseTreeChunk>>>(), + cleanedFromInconsNeg = new ArrayList<List<List<ParseTreeChunk>>>(); + /* + System.out.println("pos = "+ pos); + System.out.println("neg = "+ neg); + System.out.println("pos flat = "+ posIntersectionsFl); + System.out.println("neg flat = "+ negIntersectionsFl); + System.out.println("inters = "+ intersParseTreeChunkLists); + */ + + for( List<List<ParseTreeChunk>> member: pos){ + List<List<ParseTreeChunk>> memberList = new ArrayList<List<ParseTreeChunk>>(); + for( List<ParseTreeChunk> group: member){ + List<ParseTreeChunk> newGroup = new ArrayList<ParseTreeChunk>(); + for(ParseTreeChunk ch: group){ + boolean bSkip = false; + for(ParseTreeChunk check: intersParseTreeChunkLists){ + if (check.equals(ch)) + bSkip=true; + } + if (!bSkip) + newGroup.add(ch); + } + if (newGroup.size()>0) + memberList.add(newGroup); + } + if (memberList.size()>0) + cleanedFromInconsPos.add(memberList); + } + + for( List<List<ParseTreeChunk>> member: neg){ + List<List<ParseTreeChunk>> memberList = new ArrayList<List<ParseTreeChunk>>(); + for( List<ParseTreeChunk> group: member){ + List<ParseTreeChunk> newGroup = new ArrayList<ParseTreeChunk>(); + for(ParseTreeChunk ch: group){ + boolean bSkip = false; + for(ParseTreeChunk check: intersParseTreeChunkLists){ + if (check.equals(ch)) + bSkip=true; + } + if (!bSkip) + newGroup.add(ch); + } + if (newGroup.size()>0) + memberList.add(newGroup); + } + if (memberList.size()>0) + cleanedFromInconsNeg.add(memberList); + } + return new Pair<List<List<List<ParseTreeChunk>>>, List<List<List<ParseTreeChunk>>>>(cleanedFromInconsPos, cleanedFromInconsNeg); + } + + + public static void main (String[] args) { + + String[] posArr = new String[] {"I rent an office space. This office is for my business. I can deduct office rental expense from my business profit to calculate net income. ", + "To run my business, I have to rent an office. The net business profit is calculated as follows. Rental expense needs to be subtracted from revenue. ", + "To store goods for my retail business I rent some space. When I calculate the net income, I take revenue and subtract business expenses such as office rent. ", + "I rent some space for my business. To calculate my net income, I subtract from revenue my rental business expense."}; + + String[] negArr = new String[] {"I rent out a first floor unit of my house to a travel business. I need to add the rental income to my profit. However, when I repair my house, I can deduct the repair expense from my rental income. ", + "I receive rental income from my office. I have to claim it as a profit in my tax forms. I need to add my rental income to my profits, but subtract rental expenses such as repair from it. ", + "I advertised my property as a business rental. Advertisement and repair expenses can be subtracted from the rental income. Remaining rental income needs to be added to my profit and be reported as taxable profit. ", + "I showed my property to a business owner to rent. Expenses on my time spent on advertisement are subtracted from the rental income. My rental profits are added to my taxable income. "}; + + String unknown = "I do not want to rent anything to anyone. I just want to rent a space for myself. I neither calculate deduction of individual or business tax. I subtract my tax from my income"; + + JSMDecision dec = new JSMLearnerOnLatticeBase(). + buildLearningModel(Arrays.asList(posArr), Arrays.asList(negArr), unknown, null); + + + } +} http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/1f97041b/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/JSMLearnerOnLatticeWithAbduction.java ---------------------------------------------------------------------- diff --git a/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/JSMLearnerOnLatticeWithAbduction.java b/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/JSMLearnerOnLatticeWithAbduction.java new file mode 100644 index 0000000..501e908 --- /dev/null +++ b/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/JSMLearnerOnLatticeWithAbduction.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.jsmlearning; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; + +import org.apache.commons.collections.ListUtils; + +import edu.stanford.nlp.util.StringUtils; +import opennlp.tools.fca.ConceptLattice; +import opennlp.tools.fca.FcaWriter; +import opennlp.tools.fca.FormalConcept; +import opennlp.tools.parse_thicket.pattern_structure.LinguisticPatternStructure; +import opennlp.tools.similarity.apps.BingWebQueryRunner; +import opennlp.tools.similarity.apps.HitBase; +import opennlp.tools.similarity.apps.utils.Pair; +import opennlp.tools.textsimilarity.ParseTreeChunk; +import opennlp.tools.textsimilarity.ParseTreeChunkListScorer; +import opennlp.tools.textsimilarity.ParseTreeMatcherDeterministic; +import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor; + +public class JSMLearnerOnLatticeWithAbduction extends JSMLearnerOnLatticeWithDeduction{ + + + + + public JSMDecision buildLearningModel(List<String> posTexts, List<String> negTexts, + String unknown, String[] separationKeywords){ + JSMDecision decision = super.buildLearningModel(posTexts, negTexts, unknown, separationKeywords); + // verify each hypothesis + //TODO + return decision; + + } + + + + + public static void main (String[] args) { + + String[] posArr = new String[] {"I rent an office space. This office is for my business. I can deduct office rental expense from my business profit to calculate net income. ", + "To run my business, I have to rent an office. The net business profit is calculated as follows. Rental expense needs to be subtracted from revenue. ", + "To store goods for my retail business I rent some space. When I calculate the net income, I take revenue and subtract business expenses such as office rent. ", + "I rent some space for my business. To calculate my net income, I subtract from revenue my rental business expense."}; + + String[] negArr = new String[] {"I rent out a first floor unit of my house to a travel business. I need to add the rental income to my profit. However, when I repair my house, I can deduct the repair expense from my rental income. ", + "I receive rental income from my office. I have to claim it as a profit in my tax forms. I need to add my rental income to my profits, but subtract rental expenses such as repair from it. ", + "I advertised my property as a business rental. Advertisement and repair expenses can be subtracted from the rental income. Remaining rental income needs to be added to my profit and be reported as taxable profit. ", + "I showed my property to a business owner to rent. Expenses on my time spent on advertisement are subtracted from the rental income. My rental profits are added to my taxable income. "}; + + String unknown = "I do not want to rent anything to anyone. I just want to rent a space for myself. I neither calculate deduction of individual or business tax. I subtract my tax from my income"; + JSMLearnerOnLatticeWithAbduction jsm = new JSMLearnerOnLatticeWithAbduction(); + JSMDecision dec1 = // may be determined by 'subtract' + jsm.buildLearningModel(Arrays.asList(posArr), Arrays.asList(negArr), unknown , new String[]{"subtract"}); + JSMDecision dec2 = // may be determined by ... + jsm.buildLearningModel(Arrays.asList(posArr), Arrays.asList(negArr), unknown , new String[]{"business"}); + JSMDecision dec3 = // may be determined by ... + jsm.buildLearningModel(Arrays.asList(posArr), Arrays.asList(negArr), unknown , new String[]{"property"}); + // Finally, do prediction + JSMDecision dec = // may be determined by ... + jsm.buildLearningModel(Arrays.asList(posArr), Arrays.asList(negArr), unknown , new String[]{"property"}); + + + + + + } +}
