Author: mir Date: Tue Mar 2 15:46:53 2010 New Revision: 918082 URL: http://svn.apache.org/viewvc?rev=918082&view=rev Log: CLEREZZA-20: made stable serializer bundle.
Added: incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/ (with props) incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/pom.xml incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/clerezza/ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/clerezza/rdf/ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/clerezza/rdf/jena/ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/clerezza/rdf/jena/serializer/ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/clerezza/rdf/stable/ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/clerezza/rdf/stable/serializer/ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/clerezza/rdf/stable/serializer/StableSerializerProvider.java incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/resources/ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/resources/META-INF/ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/resources/META-INF/services/ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/resources/META-INF/services/org.apache.clerezza.rdf.core.serializedform.SerializingProvider incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/jena/ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/jena/serializer/ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/serializer/ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/serializer/RandomGraph.java incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/serializer/StableSerializerProviderTest.java incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/target/ Propchange: incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/ ------------------------------------------------------------------------------ --- svn:ignore (added) +++ svn:ignore Tue Mar 2 15:46:53 2010 @@ -0,0 +1 @@ +target Added: incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/pom.xml URL: http://svn.apache.org/viewvc/incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/pom.xml?rev=918082&view=auto ============================================================================== --- incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/pom.xml (added) +++ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/pom.xml Tue Mar 2 15:46:53 2010 @@ -0,0 +1,42 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?><project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <modelVersion>4.0.0</modelVersion> + <parent> + <artifactId>org.apache.clerezza.parent</artifactId> + <groupId>org.apache.clerezza</groupId> + <version>0.2-incubating-SNAPSHOT</version> + </parent> + <groupId>org.apache.clerezza</groupId> + <artifactId>org.apache.clerezza.rdf.stable.serializer</artifactId> + <packaging>bundle</packaging> + <version>0.1-incubating-SNAPSHOT</version> + <name>Clerezza - Stable Serializer</name> + <description>A SerializingProvider that prodocues a stable output. This means + if the graph to be serialized changes a little, then also the output + changes a little.</description> + <dependencies> + <dependency> + <groupId>org.apache.clerezza</groupId> + <artifactId>org.apache.clerezza.rdf.core</artifactId> + </dependency> + <dependency> + <groupId>org.apache.felix</groupId> + <artifactId>org.apache.felix.scr.annotations</artifactId> + </dependency> + <dependency> + <groupId>org.apache.clerezza</groupId> + <artifactId>org.apache.clerezza.rdf.jena.serializer</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>commons-lang</groupId> + <artifactId>commons-lang</artifactId> + <version>2.4</version> + <scope>test</scope> + </dependency> + </dependencies> +</project> Added: incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/clerezza/rdf/stable/serializer/StableSerializerProvider.java URL: http://svn.apache.org/viewvc/incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/clerezza/rdf/stable/serializer/StableSerializerProvider.java?rev=918082&view=auto ============================================================================== --- incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/clerezza/rdf/stable/serializer/StableSerializerProvider.java (added) +++ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/clerezza/rdf/stable/serializer/StableSerializerProvider.java Tue Mar 2 15:46:53 2010 @@ -0,0 +1,421 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.clerezza.rdf.stable.serializer; + +import java.io.BufferedReader; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.io.StringReader; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Vector; +import org.apache.clerezza.rdf.core.BNode; +import org.apache.clerezza.rdf.core.Literal; +import org.apache.clerezza.rdf.core.Triple; +import org.apache.clerezza.rdf.core.TripleCollection; +import org.apache.clerezza.rdf.core.UriRef; +import org.apache.clerezza.rdf.core.impl.SimpleMGraph; +import org.apache.clerezza.rdf.core.serializedform.SerializingProvider; +import org.apache.clerezza.rdf.core.serializedform.SupportedFormat; +import org.apache.felix.scr.annotations.Component; +import org.apache.felix.scr.annotations.Property; +import org.apache.felix.scr.annotations.Reference; +import org.apache.felix.scr.annotations.Service; +import org.osgi.service.component.ComponentContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/* + * Credits: + * + * Blank node labeling algorithm by Jeremy J. Carroll (see "Signing RDF Graphs", + * HP technical report 2003) + * + * Minimum Self-contained Graph (MSG) decomposition algorithm by + * Giovanni Tummarello, Christian Morbidoni, Paolo Puliti, Francesco Piazza, + * Università Politecnica delle Marche, Italy + * (see "Signing individual fragments of an RDF graph", 14th International + * World Wide Web Conference WWW2005, Poster track, May 2005, Chiba, Japan) + */ + +/** + * A {...@link org.apache.clerezza.rdf.core.serializedform.SerializingProvider} that tries + * to provide similar results when serializing graphs. Specifically it tries to + * label blank nodes deterministically with reasonable complexity. + * + * This serilaizer does not guarantee a deterministic result but it may minimze + * the ammount of modified lines in serilaized output. + * + * @author Daniel Spicar (daniel.spi...@access.uzh.ch) + */ +...@component +...@service(SerializingProvider.class) +...@supportedformat({SupportedFormat.N_TRIPLE}) +public class StableSerializerProvider implements SerializingProvider { + + @Property(description="Specifies maximum ammount of blank node " + + "labeling recursions, may increase performance at the expense of stability " + + "(0 = no limit).", intValue=0) + public static final String MAX_LABELING_ITERATIONS = "max_labeling_iterations"; + + public static final String PARSER_FILTER = + "(supportedFormat=" + SupportedFormat.N3 +")"; + + @Reference(target=PARSER_FILTER) + SerializingProvider serializer; + + private int maxLabelingIterations = -1; + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + protected void activate(ComponentContext cCtx) { + maxLabelingIterations = (Integer) cCtx.getProperties(). + get(MAX_LABELING_ITERATIONS); + logger.info("StableSerializerProvider activated"); + } + + @Override + public void serialize(OutputStream os, TripleCollection tc, + String formatIdentifier) { + + try { + List<String> lines = new Vector<String>(); + + for (TripleCollection msg : decomposeGraphToMSGs(tc)) { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + + serializer. + serialize(baos, msg, formatIdentifier); + BufferedReader serializedGraph = + new BufferedReader(new StringReader(baos.toString())); + lines.addAll(labelBlankNodes(serializedGraph, + getMDHexString(msg, "MD5"))); + } + Collections.sort(lines); + for (String l : lines) { + os.write((l + "\n").getBytes()); + } + } catch (IOException ex) { + logger.error("Exception while parsing serilaized graph: {}", ex); + } catch (NoSuchAlgorithmException ex) { + logger.error("Exception while trying to generate graph hash: {}", ex); + } + } + + private List<TripleCollection> decomposeGraphToMSGs(TripleCollection tc) { + + TripleCollection tmp = new SimpleMGraph(); + tmp.addAll(tc); + + List<TripleCollection> msgSet = new Vector<TripleCollection>(); + + while (tmp.size() > 0) { + Triple triple = tmp.iterator().next(); + TripleCollection msg = new SimpleMGraph(); + + fillMSG(triple, tmp, msg); + msgSet.add(msg); + } + + return msgSet; + } + + private void fillMSG(Triple triple, TripleCollection tc, + TripleCollection msg) { + + msg.add(triple); + tc.remove(triple); + + if (triple.getSubject() instanceof BNode) { + Iterator<Triple> it = tc.filter(null, null, triple.getSubject()); + while (it.hasNext()) { + fillMSG(it.next(), tc, msg); + } + } + if (triple.getObject() instanceof BNode) { + Iterator<Triple> it = + tc.filter((BNode) triple.getObject(), null, null); + while (it.hasNext()) { + fillMSG(it.next(), tc, msg); + } + } + } + + private List<String> labelBlankNodes(BufferedReader serializedGraph, + String prefix) throws IOException { + + String line = null; + List<String> lines = new Vector<String>(); + + long commentedIdentifiers = 0; + while ((line = serializedGraph.readLine()) != null) { + try { + commentedIdentifiers = commentBlankNodeLabels(line, + commentedIdentifiers, lines); + } catch(IOException ex) { + logger.error("Exception while trying to parse line: " + + line + "\n{}", ex); + } + } + + Collections.sort(lines); + + Map<String, Long> labels = new HashMap<String, Long>(); + long[] counters = {1, commentedIdentifiers}; //counter[0] = genSymCounter + long commentedIdentifierBefore; + int ctr = 0; + do { + commentedIdentifierBefore = counters[1]; + counters = generateBlankNodeLabels(lines, labels, counters[0], + counters[1], prefix); + applyLabels(lines, labels, prefix); + Collections.sort(lines); + if(++ctr == maxLabelingIterations) { + break; + } + } while (counters[1] > 0 && commentedIdentifierBefore != counters[1]); + + if (counters[1] > 0) { + labelBlankNodesNonDeterministically(lines, counters, labels, prefix); + } + + return lines; + } + + + private long[] generateBlankNodeLabels(List<String> lines, + Map<String, Long> labels, long genSymCounter, long comments, + String prefix) { + + for (int i = 2; i <= lines.size(); ++i) { + StringBuilder previousLine = new StringBuilder(lines.get(i - 2)); + StringBuilder currentLine = new StringBuilder(lines.get(i - 1)); + StringBuilder nextLine = new StringBuilder(); + if (i < lines.size()) { + nextLine.append(lines.get(i)); + } + + String currentLineWithoutComments = stripComments(currentLine); + if (stripComments(previousLine).equals(currentLineWithoutComments) || + stripComments(nextLine).equals(currentLineWithoutComments)) { + continue; + } + + int indexOfObject = checkObject(currentLineWithoutComments); + if (indexOfObject != -1) { + genSymCounter = applyGenSymIdentifier(labels, genSymCounter, + currentLine, indexOfObject, prefix); + --comments; + } + + int indexOfSubject = checkSubject(currentLineWithoutComments); + if (indexOfSubject != -1) { + genSymCounter = applyGenSymIdentifier(labels, genSymCounter, + currentLine, indexOfSubject, prefix); + --comments; + } + + lines.set(i - 1, currentLine.toString()); + } + + long[] result = {genSymCounter, comments}; + return result; + } + + private void applyLabels(List<String> lines, Map<String, Long> labels, + String prefix) { + + for (int i = 0; i < lines.size(); ++i) { + StringBuilder line = new StringBuilder(lines.get(i)); + + int indexOfObject = checkObject(stripComments(line)); + if (indexOfObject != -1) { + int indexOfComment = line.lastIndexOf("#_:"); + String identifier = + line.substring(indexOfComment + 1, line.length()); + + if (labels.containsKey(identifier)) { + line.delete(indexOfComment, line.length()); + line.delete(indexOfObject, indexOfObject + 1); + line.insert(indexOfObject, "_:" + prefix + + labels.get(identifier)); + } + } + + int indexOfSubject = checkSubject(stripComments(line)); + if (indexOfSubject != -1) { + int indexOfComment = line.lastIndexOf("#_:"); + String identifier = + line.substring(indexOfComment + 1, line.length()); + + if (labels.containsKey(identifier)) { + line.delete(indexOfComment, line.length()); + line.delete(indexOfSubject, indexOfSubject + 1); + line.insert(indexOfSubject, "_:" + prefix + + labels.get(identifier)); + } + } + + lines.set(i, line.toString()); + } + } + + private long commentBlankNodeLabels(String line, long commentedIdentifiers, + List<String> lines) throws IOException { + + StringReader lineReader = new StringReader(line); + int data = lineReader.read(); + while (data != -1) { + if (data == '<') { + //skip until end tag + while ((data = lineReader.read()) != '>') { + checkForEndOfStream(data); + } + } else if (data == '"') { + break; + } else if (data == '_') { + if ((data = lineReader.read()) == ':') { + String identifier = "_:"; + while ((data = lineReader.read()) != ' ') { + checkForEndOfStream(data); + identifier = identifier.concat( + Character.toString((char) data)); + } + line = line.replaceFirst(identifier, "~"); + line = line.concat(" #" + identifier); + ++commentedIdentifiers; + checkForEndOfStream(data); + } + } + data = lineReader.read(); + } + lines.add(line); + return commentedIdentifiers; + } + + private long applyGenSymIdentifier(Map<String, Long> labels, + long genSymCounter, StringBuilder currentLine, int where, + String prefix) { + + int index = currentLine.lastIndexOf("#_:"); + String identifier = + currentLine.substring(index + 1, currentLine.length()); + currentLine.delete(index, currentLine.length()); + if (!labels.containsKey(identifier)) { + labels.put(identifier, genSymCounter++); + } + currentLine.delete(where, where + 1); + currentLine.insert(where, "_:" + prefix + labels.get(identifier)); + + return genSymCounter; + } + + private void labelBlankNodesNonDeterministically(List<String> lines, + long[] counters, Map<String, Long> labels, String prefix) { + + for (int i = 0; i < lines.size(); ++i) { + StringBuilder currentLine = new StringBuilder(lines.get(i)); + String currentLineWithoutComments = stripComments(currentLine); + int indexOfObject = checkObject(currentLineWithoutComments); + if (indexOfObject != -1) { + counters[0] = applyGenSymIdentifier(labels, counters[0], + currentLine, indexOfObject, prefix); + --(counters[1]); + } + int indexOfSubject = checkSubject(currentLineWithoutComments); + if (indexOfSubject != -1) { + counters[0] = applyGenSymIdentifier(labels, counters[0], + currentLine, indexOfSubject, prefix); + --(counters[1]); + } + lines.set(i, currentLine.toString()); + } + Collections.sort(lines); + } + + private void checkForEndOfStream(int data) throws IOException { + if (data == -1) { + throw new IOException("Parsing Error!"); + } + } + + private int checkObject(String line) { + int index = -1; + if (line.charAt((index = line.length() - 3)) == '~') { + return index; + } + return -1; + } + + private int checkSubject(String line) { + if (line.charAt(0) == '~') { + return 0; + } + return -1; + } + + + private String stripComments(StringBuilder line) { + if (line.length() < 3) { + return ""; + } + return line.substring(0, line.lastIndexOf(" .") + 2); + } + + private String getMDHexString(TripleCollection tc, String algorithm) + throws NoSuchAlgorithmException { + + MessageDigest md = MessageDigest.getInstance(algorithm); + StringBuffer input = new StringBuffer(); + for (Triple t : tc) { + if (!(t.getSubject() instanceof BNode)) { + input.append(((UriRef) t.getSubject()).hashCode()); + } + input.append(t.getPredicate().hashCode()); + if (!(t.getObject() instanceof BNode)) { + if (t.getObject() instanceof Literal) { + input.append(((Literal) t.getObject()).getLexicalForm()); + } else { + input.append(((UriRef) t.getObject()).hashCode()); + } + } + } + + md.update(input.toString().getBytes()); + byte[] hash = md.digest(); + + StringBuffer hexString = new StringBuffer(); + for (int i = 0; i < hash.length; i++) { + String hex = Integer.toHexString(0xFF & hash[i]); + if (hex.length() == 1) { + hexString.append('0'); + } + + hexString.append(hex); + } + + return hexString.toString(); + } +} \ No newline at end of file Added: incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/resources/META-INF/services/org.apache.clerezza.rdf.core.serializedform.SerializingProvider URL: http://svn.apache.org/viewvc/incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/resources/META-INF/services/org.apache.clerezza.rdf.core.serializedform.SerializingProvider?rev=918082&view=auto ============================================================================== --- incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/resources/META-INF/services/org.apache.clerezza.rdf.core.serializedform.SerializingProvider (added) +++ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/resources/META-INF/services/org.apache.clerezza.rdf.core.serializedform.SerializingProvider Tue Mar 2 15:46:53 2010 @@ -0,0 +1 @@ +org.apache.clerezza.rdf.jena.serializer.JenaSerializerProvider \ No newline at end of file Added: incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/serializer/RandomGraph.java URL: http://svn.apache.org/viewvc/incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/serializer/RandomGraph.java?rev=918082&view=auto ============================================================================== --- incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/serializer/RandomGraph.java (added) +++ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/serializer/RandomGraph.java Tue Mar 2 15:46:53 2010 @@ -0,0 +1,178 @@ +/* + * Copyright 2010 mir. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * under the License. + */ +package org.apache.clerezza.rdf.stable.serializer; + +import java.util.Iterator; +import java.util.UUID; +import org.apache.clerezza.rdf.core.BNode; +import org.apache.clerezza.rdf.core.NonLiteral; +import org.apache.clerezza.rdf.core.Resource; +import org.apache.clerezza.rdf.core.Triple; +import org.apache.clerezza.rdf.core.UriRef; +import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl; +import org.apache.clerezza.rdf.core.impl.SimpleMGraph; +import org.apache.clerezza.rdf.core.impl.TripleImpl; +import org.apache.commons.lang.RandomStringUtils; + +/** + * + * @author mir + */ +class RandomGraph extends SimpleMGraph { + + public int growCount = 0; + public int removeCount =0; + public Triple evolve() { + Triple triple; + int random = rollDice(3); + if (random == 0 && size() != 0) { + triple = getRandomTriple(); + remove(triple); + removeCount++; + } else { + triple = createRandomTriple(); + add(triple); + growCount ++; + } + return triple; + } + + private Triple createRandomTriple() { + return new TripleImpl(getSubject(), getPredicate(), getObject()); + } + + private NonLiteral getSubject() { + int random = rollDice(2); + if (size() == 0) { + random = 0; + } + switch (random) { + case 0: // create new NonLiteral + Resource newResource; + do { + newResource = createRandomResource(); + } while (!(newResource instanceof NonLiteral)); + return (NonLiteral) newResource; + case 1: // get existing NonLiteral + Resource existingResource; + do { + existingResource = getExistingResource(); + if (existingResource == null) { + random = 0; + } + } while (!(existingResource instanceof NonLiteral)); + + return (NonLiteral) existingResource; + } + throw new RuntimeException("in getSubject()"); + } + + private UriRef getPredicate() { + int random = rollDice(2); + if (size() == 0) { + random = 0; + } + switch (random) { + case 0: // create new UriRef + return createRandomUriRef(); + case 1: // get existing UriRef + Resource existingResource; + do { + existingResource = getExistingResource(); + if (existingResource == null) { + random = 0; + } + } while (!(existingResource instanceof UriRef)); + return (UriRef) existingResource; + } + throw new RuntimeException("in getPredicate()"); + } + + private Resource getObject() { + int random = rollDice(2); + if (size() == 0) { + random = 0; + } + switch (random) { + case 0: // create new resource + return createRandomResource(); + case 1: // get existing resource + Resource existingResource = getExistingResource(); + if (existingResource == null) { + random = 0; + } + return existingResource; + } + throw new RuntimeException("in getObject()"); + } + + private static int rollDice(int faces) { + return Double.valueOf(Math.random() * faces).intValue(); + } + + private Resource createRandomResource() { + switch (rollDice(3)) { + case 0: + return new BNode(); + case 1: + return createRandomUriRef(); + case 2: + return new PlainLiteralImpl(RandomStringUtils.random(rollDice(100) + 1)); + } + throw new RuntimeException("in createRandomResource()"); + } + + private Resource getExistingResource() { + Triple triple = getRandomTriple(); + if (triple == null) { + return null; + } + switch (rollDice(3)) { + case 0: + return triple.getSubject(); + case 1: + return triple.getPredicate(); + case 2: + return triple.getObject(); + } + return null; + } + + private UriRef createRandomUriRef() { + return new UriRef("http://" + UUID.randomUUID().toString()); + } + + private Triple getRandomTriple() { + int size = this.size(); + if (size == 0) { + return null; + } + Iterator<Triple> iter = iterator(); + while (rollDice(size * 2) != 0) { + if (!iter.hasNext()) { + iter = iterator(); + } + } + Triple triple; + if (iter.hasNext()) { + triple = iter.next(); + } else { + triple = iterator().next(); + } + return triple; + } +} Added: incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/serializer/StableSerializerProviderTest.java URL: http://svn.apache.org/viewvc/incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/serializer/StableSerializerProviderTest.java?rev=918082&view=auto ============================================================================== --- incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/serializer/StableSerializerProviderTest.java (added) +++ incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/serializer/StableSerializerProviderTest.java Tue Mar 2 15:46:53 2010 @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.clerezza.rdf.stable.serializer; + +import java.io.ByteArrayOutputStream; +import junit.framework.Assert; +import org.apache.clerezza.rdf.core.Triple; +import org.apache.clerezza.rdf.core.serializedform.SupportedFormat; +import org.apache.clerezza.rdf.jena.serializer.JenaSerializerProvider; +import org.apache.commons.lang.StringUtils; +import org.junit.Test; + + +/** + * Serializes a Graph to different formats + * + * @author mir + */ +public class StableSerializerProviderTest { + + /* + * + */ + @Test + public void testTurtleSerializer() { + StableSerializerProvider ssp = new StableSerializerProvider(); + JenaSerializerProvider jsp = new JenaSerializerProvider(); + ssp.serializer = jsp; + int cycles = 50; + int totalJenaDistance = 0; + int totalStableDistance = 0; + RandomGraph rGraph = new RandomGraph(); + Triple triple = null; + String newJenaResult = "", oldJenaResult = "", + newStableResult = "", oldStableResult = ""; + for (int i = 0; i < cycles; i++) { + for (int j = 0; j < 3; j++) { + triple = rGraph.evolve(); + } + + int tripleSize = triple.toString().length(); + oldJenaResult = newJenaResult; + newJenaResult = serializeWithJena(jsp, rGraph); + + oldStableResult = newStableResult; + newStableResult = serializeWithStable(ssp, rGraph); + + int jenaDistance = StringUtils.getLevenshteinDistance(oldJenaResult, newJenaResult); + int stableDistance = StringUtils.getLevenshteinDistance(oldStableResult, newStableResult); + totalJenaDistance += jenaDistance; + totalStableDistance += stableDistance; + } + Assert.assertTrue((totalStableDistance/cycles) < (totalJenaDistance/cycles)); + } + + private String serializeWithJena(JenaSerializerProvider jsp, RandomGraph rGraph) { + String jenaResult; + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + jsp.serialize(baos, rGraph, SupportedFormat.N_TRIPLE); + jenaResult = new String(baos.toByteArray()); + return jenaResult; + } + + private String serializeWithStable(StableSerializerProvider ssp, RandomGraph rGraph) { + String stableResult; + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + ssp.serialize(baos, rGraph, SupportedFormat.N_TRIPLE); + stableResult = new String(baos.toByteArray()); + return stableResult; + } + +}