Author: reto
Date: Thu Mar 4 15:30:46 2010
New Revision: 919022
URL: http://svn.apache.org/viewvc?rev=919022&view=rev
Log:
CLEREZZA-145: added Smusher doing IFP smushing
Added:
incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java
incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/test/java/org/apache/clerezza/rdf/utils/IfpSmushTest.java
Added:
incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java
URL:
http://svn.apache.org/viewvc/incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java?rev=919022&view=auto
==============================================================================
---
incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java
(added)
+++
incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java
Thu Mar 4 15:30:46 2010
@@ -0,0 +1,210 @@
+/*
+ * Copyright 2010 reto.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * under the License.
+ */
+package org.apache.clerezza.rdf.utils;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.BNode;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.TripleCollection;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.clerezza.rdf.ontologies.OWL;
+import org.apache.clerezza.rdf.ontologies.RDF;
+
+/**
+ * A utility to equate duplicate nodes in an Mgarph, currently only nodes with
+ * a shared ifp are equated.
+ *
+ * @author reto
+ */
+public class Smusher {
+
+ /**
+ * smush mGaph given the ontological facts. Currently it does only
+ * one step ifp smushin, i.e. only ifps are taken in account and only
+ * nodes that have the same node as ifp object in the orignal graph are
+ * equates. (calling the method a second time might lead to additional
+ * smushings.)
+ *
+ * @param mGraph
+ * @param tBox
+ */
+ public static void smush(MGraph mGraph, TripleCollection tBox) {
+ final Set<UriRef> ifps = getIfps(tBox);
+ final Map<PredicateObject, Set<NonLiteral>> ifp2nodesMap = new
HashMap<PredicateObject, Set<NonLiteral>>();
+ for (Iterator<Triple> it = mGraph.iterator(); it.hasNext();) {
+ final Triple triple = it.next();
+ final UriRef predicate = triple.getPredicate();
+ if (!ifps.contains(predicate)) {
+ continue;
+ }
+ final PredicateObject po = new
PredicateObject(predicate, triple.getObject());
+ Set<NonLiteral> equivalentNodes = ifp2nodesMap.get(po);
+ if (equivalentNodes == null) {
+ equivalentNodes = new HashSet<NonLiteral>();
+ ifp2nodesMap.put(po, equivalentNodes);
+ }
+ equivalentNodes.add(triple.getSubject());
+ }
+ Set<Set<NonLiteral>> unitedEquivalenceSets =
uniteEquivalenceSets(ifp2nodesMap.values());
+ Map<NonLiteral, NonLiteral> current2ReplacementMap = new
HashMap<NonLiteral, NonLiteral>();
+ final MGraph owlSameAsGraph = new SimpleMGraph();
+ for (Set<NonLiteral> equivalenceSet : unitedEquivalenceSets) {
+ final NonLiteral replacement =
getReplacementFor(equivalenceSet, owlSameAsGraph);
+ for (NonLiteral current : equivalenceSet) {
+ if (!current.equals(replacement)) {
+ current2ReplacementMap.put(current,
replacement);
+ }
+ }
+ }
+ final Set<Triple> newTriples = new HashSet<Triple>();
+ for (Iterator<Triple> it = mGraph.iterator(); it.hasNext();) {
+ final Triple triple = it.next();
+ Triple replacementTriple = null;
+ final NonLiteral subject = triple.getSubject();
+ NonLiteral subjectReplacement =
+ current2ReplacementMap.get(subject);
+ final Resource object = triple.getObject();
+ @SuppressWarnings("element-type-mismatch")
+ Resource objectReplacement =
current2ReplacementMap.get(object);
+ if ((subjectReplacement != null) || (objectReplacement
!= null)) {
+ it.remove();
+ if (subjectReplacement == null) {
+ subjectReplacement = subject;
+ }
+ if (objectReplacement == null) {
+ objectReplacement = object;
+ }
+ newTriples.add(new
TripleImpl(subjectReplacement,
+ triple.getPredicate(),
objectReplacement));
+ }
+ }
+ for (Triple triple : newTriples) {
+ mGraph.add(triple);
+ }
+ mGraph.addAll(owlSameAsGraph);
+ }
+
+ private static Set<UriRef> getIfps(TripleCollection tBox) {
+ final Iterator<Triple> ifpDefinitions = tBox.filter(null,
RDF.type,
+ OWL.InverseFunctionalProperty);
+ final Set<UriRef> ifps = new HashSet<UriRef>();
+ while (ifpDefinitions.hasNext()) {
+ final Triple triple = ifpDefinitions.next();
+ ifps.add((UriRef) triple.getSubject());
+ }
+ return ifps;
+ }
+
+ private static NonLiteral getReplacementFor(Set<NonLiteral>
equivalenceSet,
+ MGraph owlSameAsGraph) {
+ final Set<UriRef> uriRefs = new HashSet<UriRef>();
+ for (NonLiteral nonLiteral : equivalenceSet) {
+ if (nonLiteral instanceof UriRef) {
+ uriRefs.add((UriRef) nonLiteral);
+ }
+ }
+ switch (uriRefs.size()) {
+ case 1:
+ return uriRefs.iterator().next();
+ case 0:
+ return new BNode();
+ }
+ final Iterator<UriRef> uriRefIter = uriRefs.iterator();
+ //instead of an arbitrary one we might either decide
lexicographically
+ //or look at their frequency in mGraph
+ final UriRef first = uriRefIter.next();
+ while (uriRefIter.hasNext()) {
+ UriRef uriRef = uriRefIter.next();
+ owlSameAsGraph.add(new TripleImpl(uriRef, OWL.sameAs,
first));
+ }
+ return first;
+ }
+
+ private static Set<Set<NonLiteral>> uniteEquivalenceSets(
+ Collection<Set<NonLiteral>> originalSets) {
+ final Map<NonLiteral, Set<Set<NonLiteral>>> node2OriginalSets =
+ new HashMap<NonLiteral, Set<Set<NonLiteral>>>();
+ for (Set<NonLiteral> set : originalSets) {
+ for (NonLiteral nonLiteral : set) {
+ Set<Set<NonLiteral>> sets =
node2OriginalSets.get(nonLiteral);
+ if (sets == null) {
+ sets = new HashSet<Set<NonLiteral>>();
+ node2OriginalSets.put(nonLiteral, sets);
+ }
+ sets.add(set);
+ }
+ }
+ Set<Set<NonLiteral>> result = new HashSet<Set<NonLiteral>>();
+ for (Set<Set<NonLiteral>> sets2Unite :
node2OriginalSets.values()) {
+ Set<NonLiteral> newSet = new HashSet<NonLiteral>();
+ for (Set<NonLiteral> existingSet : sets2Unite) {
+ newSet.addAll(existingSet);
+ }
+ result.add(newSet);
+ }
+ return result;
+
+ }
+
+ static class PredicateObject {
+
+ final UriRef predicate;
+ final Resource object;
+
+ public PredicateObject(UriRef predicate, Resource object) {
+ this.predicate = predicate;
+ this.object = object;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ final PredicateObject other = (PredicateObject) obj;
+ if (this.predicate != other.predicate ||
!this.predicate.equals(other.predicate)) {
+ return false;
+ }
+ if (this.object != other.object &&
!this.object.equals(other.object)) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int hash = 3;
+ hash = 29 * hash + this.predicate.hashCode();
+ hash = 13 * hash + this.object.hashCode();
+ return hash;
+ }
+ };
+}
Added:
incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/test/java/org/apache/clerezza/rdf/utils/IfpSmushTest.java
URL:
http://svn.apache.org/viewvc/incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/test/java/org/apache/clerezza/rdf/utils/IfpSmushTest.java?rev=919022&view=auto
==============================================================================
---
incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/test/java/org/apache/clerezza/rdf/utils/IfpSmushTest.java
(added)
+++
incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/test/java/org/apache/clerezza/rdf/utils/IfpSmushTest.java
Thu Mar 4 15:30:46 2010
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2010 reto.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * under the License.
+ */
+
+package org.apache.clerezza.rdf.utils;
+
+import org.apache.clerezza.rdf.core.BNode;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.clerezza.rdf.core.serializedform.Serializer;
+import org.apache.clerezza.rdf.ontologies.FOAF;
+import org.apache.clerezza.rdf.ontologies.OWL;
+import org.apache.clerezza.rdf.ontologies.RDF;
+import org.apache.clerezza.rdf.ontologies.RDFS;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ *
+ * @author reto
+ */
+public class IfpSmushTest {
+
+ private MGraph ontology = new SimpleMGraph();
+ {
+ ontology.add(new TripleImpl(FOAF.mbox, RDF.type,
OWL.InverseFunctionalProperty));
+ }
+
+ @Test
+ public void simpleBNode() {
+ MGraph mGraph = new SimpleMGraph();
+ UriRef mbox1 = new UriRef("mailto:[email protected]");
+ final BNode bNode1 = new BNode();
+ mGraph.add(new TripleImpl(bNode1, FOAF.mbox, mbox1));
+ mGraph.add(new TripleImpl(bNode1, RDFS.comment,
+ new PlainLiteralImpl("a comment")));
+ final BNode bNode2 = new BNode();
+ mGraph.add(new TripleImpl(bNode2, FOAF.mbox, mbox1));
+ mGraph.add(new TripleImpl(bNode2, RDFS.comment,
+ new PlainLiteralImpl("another comment")));
+ Smusher.smush(mGraph, ontology);
+ Assert.assertEquals(3, mGraph.size());
+ }
+
+ @Test
+ public void overlappingEquivalenceClasses() {
+ MGraph mGraph = new SimpleMGraph();
+ UriRef mbox1 = new UriRef("mailto:[email protected]");
+ final BNode bNode1 = new BNode();
+ mGraph.add(new TripleImpl(bNode1, FOAF.mbox, mbox1));
+ mGraph.add(new TripleImpl(bNode1, RDFS.comment,
+ new PlainLiteralImpl("a comment")));
+ final BNode bNode2 = new BNode();
+ UriRef mbox2 = new UriRef("mailto:[email protected]");
+ mGraph.add(new TripleImpl(bNode2, FOAF.mbox, mbox1));
+ mGraph.add(new TripleImpl(bNode2, FOAF.mbox, mbox2));
+ mGraph.add(new TripleImpl(bNode2, RDFS.comment,
+ new PlainLiteralImpl("another comment")));
+ final BNode bNode3 = new BNode();
+ mGraph.add(new TripleImpl(bNode3, FOAF.mbox, mbox2));
+ mGraph.add(new TripleImpl(bNode3, RDFS.comment,
+ new PlainLiteralImpl("yet another comment")));
+ Smusher.smush(mGraph, ontology);
+ Assert.assertEquals(5, mGraph.size());
+ }
+
+ @Test
+ public void oneUriRef() {
+ MGraph mGraph = new SimpleMGraph();
+ UriRef mbox1 = new UriRef("mailto:[email protected]");
+ final UriRef resource = new UriRef("http://example.org/");
+ mGraph.add(new TripleImpl(resource, FOAF.mbox, mbox1));
+ mGraph.add(new TripleImpl(resource, RDFS.comment,
+ new PlainLiteralImpl("a comment")));
+ final BNode bNode2 = new BNode();
+ mGraph.add(new TripleImpl(bNode2, FOAF.mbox, mbox1));
+ mGraph.add(new TripleImpl(bNode2, RDFS.comment,
+ new PlainLiteralImpl("another comment")));
+ Smusher.smush(mGraph, ontology);
+ Assert.assertEquals(3, mGraph.size());
+ }
+
+ @Test
+ public void twoUriRefs() {
+ MGraph mGraph = new SimpleMGraph();
+ UriRef mbox1 = new UriRef("mailto:[email protected]");
+ final UriRef resource1 = new UriRef("http://example.org/");
+ mGraph.add(new TripleImpl(resource1, FOAF.mbox, mbox1));
+ mGraph.add(new TripleImpl(resource1, RDFS.comment,
+ new PlainLiteralImpl("a comment")));
+ final UriRef resource2 = new UriRef("http://2.example.org/");
+ mGraph.add(new TripleImpl(resource2, FOAF.mbox, mbox1));
+ mGraph.add(new TripleImpl(resource2, RDFS.comment,
+ new PlainLiteralImpl("another comment")));
+ Smusher.smush(mGraph, ontology);
+ for (Object object : mGraph) {
+ System.out.println(object);
+ }
+ Assert.assertEquals(4, mGraph.size());
+ }
+
+}