Author: dspicar
Date: Wed Nov 2 15:23:01 2011
New Revision: 1196625
URL: http://svn.apache.org/viewvc?rev=1196625&view=rev
Log:
CLEREZZA-643: applied partial patch by Rupert Westenthaler: increased
performance of RDF/JSON serializer.
Modified:
incubator/clerezza/trunk/parent/rdf.rdfjson/src/main/java/org/apache/clerezza/rdf/rdfjson/serializer/RdfJsonSerializingProvider.java
incubator/clerezza/trunk/parent/rdf.rdfjson/src/test/java/org/apache/clerezza/rdf/rdfjson/serializer/RdfJsonSerializerProviderTest.java
Modified:
incubator/clerezza/trunk/parent/rdf.rdfjson/src/main/java/org/apache/clerezza/rdf/rdfjson/serializer/RdfJsonSerializingProvider.java
URL:
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/rdf.rdfjson/src/main/java/org/apache/clerezza/rdf/rdfjson/serializer/RdfJsonSerializingProvider.java?rev=1196625&r1=1196624&r2=1196625&view=diff
==============================================================================
---
incubator/clerezza/trunk/parent/rdf.rdfjson/src/main/java/org/apache/clerezza/rdf/rdfjson/serializer/RdfJsonSerializingProvider.java
(original)
+++
incubator/clerezza/trunk/parent/rdf.rdfjson/src/main/java/org/apache/clerezza/rdf/rdfjson/serializer/RdfJsonSerializingProvider.java
Wed Nov 2 15:23:01 2011
@@ -16,17 +16,17 @@
*/
package org.apache.clerezza.rdf.rdfjson.serializer;
+import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.util.Arrays;
+import java.util.Comparator;
import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
import java.util.Map;
-import java.util.Set;
-import org.json.simple.JSONArray;
-import org.json.simple.JSONObject;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import java.util.Map.Entry;
+
import org.apache.clerezza.rdf.core.BNode;
import org.apache.clerezza.rdf.core.NonLiteral;
import org.apache.clerezza.rdf.core.PlainLiteral;
@@ -39,53 +39,96 @@ import org.apache.clerezza.rdf.core.seri
import org.apache.clerezza.rdf.core.serializedform.SupportedFormat;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.Service;
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
/**
- * A {@link org.apache.clerezza.rdf.core.serializedform.SerializingProvider}
for rdf/json
+ * A {@link org.apache.clerezza.rdf.core.serializedform.SerializingProvider}
for rdf/json.
+ *
+ * This implementation is based on first sorting the triples within the parsed
+ * {@link TripleCollection} based on the {@link #SUBJECT_COMPARATOR
subject}.<p>
+ * The serialization is done on a subject scope. Meaning that all triples for a
+ * subject are serialized and instantly written to the provided
+ * {@link OutputStream}.<p>
+ * 'UFT-8' is used as encoding to write the data.
*
- * @author tio, hasan
+ * @author tio, hasan, rwesten
*/
@Component(immediate=true)
@Service(SerializingProvider.class)
@SupportedFormat(SupportedFormat.RDF_JSON)
public class RdfJsonSerializingProvider implements SerializingProvider {
- private final Logger logger = LoggerFactory.getLogger(getClass());
-
+ @SuppressWarnings("unchecked")
@Override
public void serialize(OutputStream serializedGraph, TripleCollection
tc, String formatIdentifier) {
- JSONObject root = new JSONObject();
-
- Set<NonLiteral> processedSubject = new HashSet<NonLiteral>();
+ if (tc.isEmpty()) { //ensure writing an empty element in case
of an empty collection
+ try {
+ serializedGraph.write(new
JSONObject().toJSONString().getBytes("UTF-8"));
+ } catch (IOException e) {
+ throw new IllegalStateException("Exception
while writing to parsed OutputStream", e);
+ }
+ return;
+ }
BNodeManager bNodeMgr = new BNodeManager();
+ BufferedWriter out;
+ try {
+ out = new BufferedWriter(
+ new OutputStreamWriter(serializedGraph,
"UTF-8"));
+ } catch (UnsupportedEncodingException e) {
+ throw new IllegalStateException("Encoding 'UTF-8' is
not supported by this System", e);
+ }
+ Triple[] sortedTriples = tc.toArray(new Triple[tc.size()]);
+ Arrays.sort(sortedTriples, SUBJECT_COMPARATOR);
+ Triple triple;
NonLiteral subject = null;
String subjectStr = null;
- Iterator<Triple> triples = tc.iterator();
- while (triples.hasNext()) {
- subject = triples.next().getSubject();
- if (!processedSubject.contains(subject)) {
- if (subject instanceof BNode) {
- subjectStr =
bNodeMgr.getBNodeId((BNode)subject);
- } else { // if (subject instanceof UriRef)
- subjectStr =
((UriRef)subject).getUnicodeString();
+ UriRef predicate = null;
+ Map<UriRef, JSONArray> predicateValues = new HashMap<UriRef,
JSONArray>();
+ JSONObject jSubject = new JSONObject();
+ try {
+ out.write("{"); //start the root object
+ for (int i = 0; i < sortedTriples.length; i++) {
+ triple = sortedTriples[i];
+ boolean subjectChange =
!triple.getSubject().equals(subject);
+ if (subjectChange) {
+ if (subject != null) {
+ //write the predicate values
+ for (Entry<UriRef, JSONArray>
predicates : predicateValues.entrySet()) {
+
jSubject.put(predicates.getKey().getUnicodeString(), predicates.getValue());
+ }
+ //write subject
+
out.write(JSONObject.toString(subjectStr, jSubject));
+ out.write(",");
+ jSubject.clear(); //just clear
+ predicateValues.clear();
+ }
+ //init next subject
+ subject = triple.getSubject();
+ if (subject instanceof BNode) {
+ subjectStr =
bNodeMgr.getBNodeId((BNode) subject);
+ } else { // if (subject instanceof
UriRef)
+ subjectStr = ((UriRef)
subject).getUnicodeString();
+ }
}
- JSONObject predicatesAsJSONObjects = new
JSONObject();
- Iterator<Triple> triplesOfSubject =
tc.filter(subject, null, null);
- while (triplesOfSubject.hasNext()) {
- UriRef predicate =
triplesOfSubject.next().getPredicate();
- JSONArray jsonValues =
addValuesToJSONArray(tc, subject, predicate, bNodeMgr);
-
predicatesAsJSONObjects.put(predicate.getUnicodeString(), jsonValues);
+ predicate = triple.getPredicate();
+ JSONArray values =
predicateValues.get(predicate);
+ if (values == null) {
+ values = new JSONArray();
+ predicateValues.put(predicate, values);
}
- root.put(subjectStr, predicatesAsJSONObjects);
-
- processedSubject.add(subject);
+ values.add(writeObject(bNodeMgr,
triple.getObject()));
}
- }
- try {
-
serializedGraph.write(root.toJSONString().getBytes("UTF-8"));
- } catch (IOException ioe) {
- logger.error(ioe.getMessage());
- throw new RuntimeException(ioe.getMessage());
+ if (subjectStr != null) {
+ for (Entry<UriRef, JSONArray> predicates :
predicateValues.entrySet()) {
+
jSubject.put(predicates.getKey().getUnicodeString(), predicates.getValue());
+ }
+ out.write(JSONObject.toString(subjectStr,
jSubject));
+ }
+ out.write("}");//end the root object
+ out.flush();
+ } catch (IOException e) {
+ throw new IllegalStateException("Exception while
writing on the parsed OutputStream", e);
}
}
@@ -103,38 +146,51 @@ public class RdfJsonSerializingProvider
}
}
- private JSONArray addValuesToJSONArray(TripleCollection tc, NonLiteral
subject, UriRef predicate,
- BNodeManager bNodeMgr) {
-
- JSONArray jsonValues = new JSONArray();
-
- Iterator<Triple> objectsOfPredicate = tc.filter(subject,
predicate, null);
- while (objectsOfPredicate.hasNext()) {
- Resource object = objectsOfPredicate.next().getObject();
- JSONObject objectAsJSONObject = new JSONObject();
- if (object instanceof PlainLiteral) {
- PlainLiteral plainLiteral = (PlainLiteral)
object;
- objectAsJSONObject.put("value",
plainLiteral.getLexicalForm());
- objectAsJSONObject.put("type", "literal");
- if (plainLiteral.getLanguage() != null) {
- objectAsJSONObject.put("lang",
plainLiteral.getLanguage().toString());
- }
- } else if (object instanceof TypedLiteral) {
- TypedLiteral literal = (TypedLiteral) object;
- objectAsJSONObject.put("value",
literal.getLexicalForm());
- objectAsJSONObject.put("type", "literal");
- objectAsJSONObject.put("datatype",
literal.getDataType().getUnicodeString());
- } else if (object instanceof UriRef) {
- UriRef uriRef = (UriRef) object;
- objectAsJSONObject.put("value",
uriRef.getUnicodeString());
- objectAsJSONObject.put("type", "uri");
- } else if (object instanceof BNode) {
- String bNodeId =
bNodeMgr.getBNodeId((BNode)object);
- objectAsJSONObject.put("value", bNodeId);
- objectAsJSONObject.put("type", "bnode");
+ /**
+ * Converts the {@link Resource object} of an triple to JSON
+ *
+ * @param bNodeMgr used to lookup {@link BNode} instances
+ * @param object the object of the triple
+ * @return the JSON representation of parsed object
+ */
+ @SuppressWarnings("unchecked")
+ private JSONObject writeObject(BNodeManager bNodeMgr, Resource object) {
+ JSONObject jObject = new JSONObject();
+ if (object instanceof PlainLiteral) {
+ PlainLiteral plainLiteral = (PlainLiteral) object;
+ jObject.put("value", plainLiteral.getLexicalForm());
+ jObject.put("type", "literal");
+ if (plainLiteral.getLanguage() != null) {
+ jObject.put("lang",
plainLiteral.getLanguage().toString());
}
- jsonValues.add(objectAsJSONObject);
+ } else if (object instanceof TypedLiteral) {
+ TypedLiteral literal = (TypedLiteral) object;
+ jObject.put("value", literal.getLexicalForm());
+ jObject.put("type", "literal");
+ jObject.put("datatype",
literal.getDataType().getUnicodeString());
+ } else if (object instanceof UriRef) {
+ UriRef uriRef = (UriRef) object;
+ jObject.put("value", uriRef.getUnicodeString());
+ jObject.put("type", "uri");
+ } else if (object instanceof BNode) {
+ String bNodeId = bNodeMgr.getBNodeId((BNode) object);
+ jObject.put("value", bNodeId);
+ jObject.put("type", "bnode");
}
- return jsonValues;
+ return jObject;
}
+
+ /**
+ * Compares only the subjects of the triples. If they are equals
<code>0</code>
+ * is returned. This will ensure that all triples with the same
subjects are
+ * sorted correctly. However it does not sort predicates and objects!
+ */
+ public static final Comparator<Triple> SUBJECT_COMPARATOR = new
Comparator<Triple>() {
+
+ @Override
+ public int compare(Triple a, Triple b) {
+ return a.getSubject().equals(b.getSubject()) ? 0
+ :
a.getSubject().toString().compareTo(b.getSubject().toString());
+ }
+ };
}
Modified:
incubator/clerezza/trunk/parent/rdf.rdfjson/src/test/java/org/apache/clerezza/rdf/rdfjson/serializer/RdfJsonSerializerProviderTest.java
URL:
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/rdf.rdfjson/src/test/java/org/apache/clerezza/rdf/rdfjson/serializer/RdfJsonSerializerProviderTest.java?rev=1196625&r1=1196624&r2=1196625&view=diff
==============================================================================
---
incubator/clerezza/trunk/parent/rdf.rdfjson/src/test/java/org/apache/clerezza/rdf/rdfjson/serializer/RdfJsonSerializerProviderTest.java
(original)
+++
incubator/clerezza/trunk/parent/rdf.rdfjson/src/test/java/org/apache/clerezza/rdf/rdfjson/serializer/RdfJsonSerializerProviderTest.java
Wed Nov 2 15:23:01 2011
@@ -18,12 +18,19 @@ package org.apache.clerezza.rdf.rdfjson.
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
import org.junit.Assert;
import org.junit.Test;
import org.apache.clerezza.rdf.core.BNode;
+import org.apache.clerezza.rdf.core.Language;
import org.apache.clerezza.rdf.core.LiteralFactory;
import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.PlainLiteral;
import org.apache.clerezza.rdf.core.TypedLiteral;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
@@ -31,7 +38,9 @@ import org.apache.clerezza.rdf.core.impl
import org.apache.clerezza.rdf.core.impl.TripleImpl;
import org.apache.clerezza.rdf.core.serializedform.ParsingProvider;
import org.apache.clerezza.rdf.core.serializedform.SerializingProvider;
+import org.apache.clerezza.rdf.ontologies.FOAF;
import org.apache.clerezza.rdf.ontologies.RDF;
+import org.apache.clerezza.rdf.ontologies.RDFS;
import org.apache.clerezza.rdf.rdfjson.parser.RdfJsonParsingProvider;
import org.junit.After;
import org.junit.AfterClass;
@@ -44,6 +53,7 @@ import org.junit.BeforeClass;
*/
public class RdfJsonSerializerProviderTest {
+ private final static LiteralFactory lf = LiteralFactory.getInstance();
private final static UriRef RDF_NIL = new
UriRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil");
private final static UriRef node1 = new
UriRef("http://example.org/node1");
private final static UriRef node2 = new
UriRef("http://example.org/node2");
@@ -58,7 +68,7 @@ public class RdfJsonSerializerProviderTe
private final static PlainLiteralImpl plainLiteralA = new
PlainLiteralImpl("A");
private final static PlainLiteralImpl plainLiteralB = new
PlainLiteralImpl("B");
private final static PlainLiteralImpl plainLiteralC = new
PlainLiteralImpl("C");
- private final static TypedLiteral typedLiteralA =
LiteralFactory.getInstance().createTypedLiteral("A");
+ private final static TypedLiteral typedLiteralA =
lf.createTypedLiteral("A");
private MGraph mGraph;
@@ -125,6 +135,7 @@ public class RdfJsonSerializerProviderTe
SerializingProvider provider = new RdfJsonSerializingProvider();
ByteArrayOutputStream serializedGraph = new
ByteArrayOutputStream();
provider.serialize(serializedGraph, mGraph,
"application/rdf+json");
+// System.out.println(serializedGraph.toString());
ParsingProvider parsingProvider = new RdfJsonParsingProvider();
ByteArrayInputStream jsonIn = new
ByteArrayInputStream(serializedGraph.toByteArray());
MGraph parsedMGraph = new SimpleMGraph();
@@ -133,4 +144,96 @@ public class RdfJsonSerializerProviderTe
Assert.assertEquals(6, parsedMGraph.size());
Assert.assertEquals(mGraph.getGraph(), parsedMGraph.getGraph());
}
+
+ /**
+ * For local performance testing
+ */
+ //@Test
+ public void testBigGraph() {
+ int NUM_TRIPLES = 100000;
+ //randoms are in the range [0..3]
+ double l = 1.0; //literal
+ double i = l / 3; //int
+ double d = l * 2 / 3;//double
+ double b = 2.0;//bNode
+ double nb = b - (l * 2 / 3); //create new bNode
+ double random;
+ NonLiteral subject = null;
+ UriRef predicate = null;
+ List<UriRef> predicateList = new ArrayList<UriRef>();
+ predicateList.add(RDF.first);
+ predicateList.add(RDF.rest);
+ predicateList.add(RDF.type);
+ predicateList.add(RDFS.label);
+ predicateList.add(RDFS.comment);
+ predicateList.add(RDFS.range);
+ predicateList.add(RDFS.domain);
+ predicateList.add(FOAF.name);
+ predicateList.add(FOAF.nick);
+ predicateList.add(FOAF.homepage);
+ predicateList.add(FOAF.age);
+ predicateList.add(FOAF.depiction);
+ String URI_PREFIX = "http://www.test.org/bigGraph/ref";
+ Language DE = new Language("de");
+ Language EN = new Language("en");
+ Iterator<UriRef> predicates = predicateList.iterator();
+ List<BNode> bNodes = new ArrayList<BNode>();
+ bNodes.add(new BNode());
+ for (int count = 0; count < NUM_TRIPLES; count++) {
+ random = Math.random() * 3;
+ if (random >= 2.5 || count == 0) {
+ if (random <= 2.75) {
+ subject = new UriRef(URI_PREFIX +
count);
+ } else {
+ int rndIndex = (int) ((random - 2.75) *
bNodes.size() / (3.0 - 2.75));
+ subject = bNodes.get(rndIndex);
+ }
+ }
+ if (random > 2.0 || count == 0) {
+ if (!predicates.hasNext()) {
+ Collections.shuffle(predicateList);
+ predicates = predicateList.iterator();
+ }
+ predicate = predicates.next();
+ }
+ if (random <= l) { //literal
+ if (random <= i) {
+ mGraph.add(new TripleImpl(subject,
predicate, lf.createTypedLiteral(count)));
+ } else if (random <= d) {
+ mGraph.add(new TripleImpl(subject,
predicate, lf.createTypedLiteral(random)));
+ } else {
+ PlainLiteral text;
+ if (random <= i) {
+ text = new
PlainLiteralImpl("Literal for " + count);
+ } else if (random <= d) {
+ text = new PlainLiteralImpl("An
English literal for " + count, EN);
+ } else {
+ text = new
PlainLiteralImpl("Ein Dutsches Literal für" + count, DE);
+ }
+ mGraph.add(new TripleImpl(subject,
predicate, text));
+ }
+ } else if (random <= b) { //bnode
+ BNode bnode;
+ if (random <= nb) {
+ bnode = new BNode();
+ bNodes.add(bnode);
+ } else { //>nb <b
+ int rndIndex = (int) ((random - nb) *
bNodes.size() / (b - nb));
+ bnode = bNodes.get(rndIndex);
+ }
+ mGraph.add(new TripleImpl(subject, predicate,
bnode));
+ } else { //UriRef
+ mGraph.add(new TripleImpl(subject, predicate,
+ new UriRef(URI_PREFIX + (int)
count * random)));
+ }
+ }
+ SerializingProvider provider = new RdfJsonSerializingProvider();
+ for (int count = 0; i < 10; i++) {
+ long start = System.currentTimeMillis();
+ ByteArrayOutputStream serializedGraph = new
ByteArrayOutputStream();
+
+ provider.serialize(serializedGraph, mGraph,
"application/rdf+json");
+ System.out.println("Serialized " + mGraph.size() +
"Triples in " + (System.currentTimeMillis() - start) + "ms");
+ }
+ }
}