Author: dspicar
Date: Wed Nov  2 15:23:01 2011
New Revision: 1196625

URL: http://svn.apache.org/viewvc?rev=1196625&view=rev
Log:
CLEREZZA-643: applied partial patch by Rupert Westenthaler: increased 
performance of RDF/JSON serializer. 

Modified:
    
incubator/clerezza/trunk/parent/rdf.rdfjson/src/main/java/org/apache/clerezza/rdf/rdfjson/serializer/RdfJsonSerializingProvider.java
    
incubator/clerezza/trunk/parent/rdf.rdfjson/src/test/java/org/apache/clerezza/rdf/rdfjson/serializer/RdfJsonSerializerProviderTest.java

Modified: 
incubator/clerezza/trunk/parent/rdf.rdfjson/src/main/java/org/apache/clerezza/rdf/rdfjson/serializer/RdfJsonSerializingProvider.java
URL: 
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/rdf.rdfjson/src/main/java/org/apache/clerezza/rdf/rdfjson/serializer/RdfJsonSerializingProvider.java?rev=1196625&r1=1196624&r2=1196625&view=diff
==============================================================================
--- 
incubator/clerezza/trunk/parent/rdf.rdfjson/src/main/java/org/apache/clerezza/rdf/rdfjson/serializer/RdfJsonSerializingProvider.java
 (original)
+++ 
incubator/clerezza/trunk/parent/rdf.rdfjson/src/main/java/org/apache/clerezza/rdf/rdfjson/serializer/RdfJsonSerializingProvider.java
 Wed Nov  2 15:23:01 2011
@@ -16,17 +16,17 @@
  */
 package org.apache.clerezza.rdf.rdfjson.serializer;
 
+import java.io.BufferedWriter;
 import java.io.IOException;
 import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.util.Arrays;
+import java.util.Comparator;
 import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
 import java.util.Map;
-import java.util.Set;
-import org.json.simple.JSONArray;
-import org.json.simple.JSONObject;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import java.util.Map.Entry;
+
 import org.apache.clerezza.rdf.core.BNode;
 import org.apache.clerezza.rdf.core.NonLiteral;
 import org.apache.clerezza.rdf.core.PlainLiteral;
@@ -39,53 +39,96 @@ import org.apache.clerezza.rdf.core.seri
 import org.apache.clerezza.rdf.core.serializedform.SupportedFormat;
 import org.apache.felix.scr.annotations.Component;
 import org.apache.felix.scr.annotations.Service;
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
 
 /**
- * A {@link org.apache.clerezza.rdf.core.serializedform.SerializingProvider} 
for rdf/json
+ * A {@link org.apache.clerezza.rdf.core.serializedform.SerializingProvider} 
for rdf/json.
+ * 
+ * This implementation is based on first sorting the triples within the parsed
+ * {@link TripleCollection} based on the {@link #SUBJECT_COMPARATOR 
subject}.<p>
+ * The serialization is done on a subject scope. Meaning that all triples for a
+ * subject are serialized and instantly written to the provided 
+ * {@link OutputStream}.<p>
+ * 'UFT-8' is used as encoding to write the data.  
  * 
- * @author tio, hasan
+ * @author tio, hasan, rwesten
  */
 @Component(immediate=true)
 @Service(SerializingProvider.class)
 @SupportedFormat(SupportedFormat.RDF_JSON)
 public class RdfJsonSerializingProvider implements SerializingProvider {
 
-       private final Logger logger = LoggerFactory.getLogger(getClass());
-
+       @SuppressWarnings("unchecked")
        @Override
        public void serialize(OutputStream serializedGraph, TripleCollection 
tc, String formatIdentifier) {
-               JSONObject root = new JSONObject();
-
-               Set<NonLiteral> processedSubject = new HashSet<NonLiteral>();
+               if (tc.isEmpty()) { //ensure writing an empty element in case 
of an empty collection
+                       try {
+                               serializedGraph.write(new 
JSONObject().toJSONString().getBytes("UTF-8"));
+                       } catch (IOException e) {
+                               throw new IllegalStateException("Exception 
while writing to parsed OutputStream", e);
+                       }
+                       return;
+               }
                BNodeManager bNodeMgr = new BNodeManager();
+               BufferedWriter out;
+               try {
+                       out = new BufferedWriter(
+                                       new OutputStreamWriter(serializedGraph, 
"UTF-8"));
+               } catch (UnsupportedEncodingException e) {
+                       throw new IllegalStateException("Encoding 'UTF-8' is 
not supported by this System", e);
+               }
+               Triple[] sortedTriples = tc.toArray(new Triple[tc.size()]);
+               Arrays.sort(sortedTriples, SUBJECT_COMPARATOR);
+               Triple triple;
                NonLiteral subject = null;
                String subjectStr = null;
-               Iterator<Triple> triples = tc.iterator();
-               while (triples.hasNext()) {
-                       subject = triples.next().getSubject();
-                       if (!processedSubject.contains(subject)) {
-                               if (subject instanceof BNode) {
-                                       subjectStr = 
bNodeMgr.getBNodeId((BNode)subject);
-                               } else { // if (subject instanceof UriRef)
-                                       subjectStr = 
((UriRef)subject).getUnicodeString();
+               UriRef predicate = null;
+               Map<UriRef, JSONArray> predicateValues = new HashMap<UriRef, 
JSONArray>();
+               JSONObject jSubject = new JSONObject();
+               try {
+                       out.write("{"); //start the root object
+                       for (int i = 0; i < sortedTriples.length; i++) {
+                               triple = sortedTriples[i];
+                               boolean subjectChange = 
!triple.getSubject().equals(subject);
+                               if (subjectChange) {
+                                       if (subject != null) {
+                                               //write the predicate values
+                                               for (Entry<UriRef, JSONArray> 
predicates : predicateValues.entrySet()) {
+                                                       
jSubject.put(predicates.getKey().getUnicodeString(), predicates.getValue());
+                                               }
+                                               //write subject
+                                               
out.write(JSONObject.toString(subjectStr, jSubject));
+                                               out.write(",");
+                                               jSubject.clear(); //just clear
+                                               predicateValues.clear();
+                                       }
+                                       //init next subject
+                                       subject = triple.getSubject();
+                                       if (subject instanceof BNode) {
+                                               subjectStr = 
bNodeMgr.getBNodeId((BNode) subject);
+                                       } else { // if (subject instanceof 
UriRef)
+                                               subjectStr = ((UriRef) 
subject).getUnicodeString();
+                                       }
                                }
-                               JSONObject predicatesAsJSONObjects = new 
JSONObject();
-                               Iterator<Triple> triplesOfSubject = 
tc.filter(subject, null, null);
-                               while (triplesOfSubject.hasNext()) {
-                                       UriRef predicate = 
triplesOfSubject.next().getPredicate();
-                                       JSONArray jsonValues = 
addValuesToJSONArray(tc, subject, predicate, bNodeMgr);
-                                       
predicatesAsJSONObjects.put(predicate.getUnicodeString(), jsonValues);
+                               predicate = triple.getPredicate();
+                               JSONArray values = 
predicateValues.get(predicate);
+                               if (values == null) {
+                                       values = new JSONArray();
+                                       predicateValues.put(predicate, values);
                                }
-                               root.put(subjectStr, predicatesAsJSONObjects);
-
-                               processedSubject.add(subject);
+                               values.add(writeObject(bNodeMgr, 
triple.getObject()));
                        }
-               }
-               try {
-                       
serializedGraph.write(root.toJSONString().getBytes("UTF-8"));
-               } catch (IOException ioe) {
-                       logger.error(ioe.getMessage());
-                       throw new RuntimeException(ioe.getMessage());
+                       if (subjectStr != null) {
+                               for (Entry<UriRef, JSONArray> predicates : 
predicateValues.entrySet()) {
+                                       
jSubject.put(predicates.getKey().getUnicodeString(), predicates.getValue());
+                               }
+                               out.write(JSONObject.toString(subjectStr, 
jSubject));
+                       }
+                       out.write("}");//end the root object
+                       out.flush();
+               } catch (IOException e) {
+                       throw new IllegalStateException("Exception while 
writing on the parsed OutputStream", e);
                }
        }
 
@@ -103,38 +146,51 @@ public class RdfJsonSerializingProvider 
                }
        }
 
-       private JSONArray addValuesToJSONArray(TripleCollection tc, NonLiteral 
subject, UriRef predicate,
-                       BNodeManager bNodeMgr) {
-
-               JSONArray jsonValues = new JSONArray();
-
-               Iterator<Triple> objectsOfPredicate = tc.filter(subject, 
predicate, null);
-               while (objectsOfPredicate.hasNext()) {
-                       Resource object = objectsOfPredicate.next().getObject();
-                       JSONObject objectAsJSONObject = new JSONObject();
-                       if (object instanceof PlainLiteral) {
-                               PlainLiteral plainLiteral = (PlainLiteral) 
object;
-                               objectAsJSONObject.put("value", 
plainLiteral.getLexicalForm());
-                               objectAsJSONObject.put("type", "literal");
-                               if (plainLiteral.getLanguage() != null) {
-                                       objectAsJSONObject.put("lang", 
plainLiteral.getLanguage().toString());
-                               }
-                       } else if (object instanceof TypedLiteral) {
-                               TypedLiteral literal = (TypedLiteral) object;
-                               objectAsJSONObject.put("value", 
literal.getLexicalForm());
-                               objectAsJSONObject.put("type", "literal");
-                               objectAsJSONObject.put("datatype", 
literal.getDataType().getUnicodeString());
-                       } else if (object instanceof UriRef) {
-                               UriRef uriRef = (UriRef) object;
-                               objectAsJSONObject.put("value", 
uriRef.getUnicodeString());
-                               objectAsJSONObject.put("type", "uri");
-                       } else if (object instanceof BNode) {
-                               String bNodeId = 
bNodeMgr.getBNodeId((BNode)object);
-                               objectAsJSONObject.put("value", bNodeId);
-                               objectAsJSONObject.put("type", "bnode");
+       /**
+        * Converts the {@link Resource object} of an triple to JSON
+        * 
+        * @param bNodeMgr      used to lookup {@link BNode} instances
+        * @param object        the object of the triple
+        * @return      the JSON representation of parsed object
+        */
+       @SuppressWarnings("unchecked")
+       private JSONObject writeObject(BNodeManager bNodeMgr, Resource object) {
+               JSONObject jObject = new JSONObject();
+               if (object instanceof PlainLiteral) {
+                       PlainLiteral plainLiteral = (PlainLiteral) object;
+                       jObject.put("value", plainLiteral.getLexicalForm());
+                       jObject.put("type", "literal");
+                       if (plainLiteral.getLanguage() != null) {
+                               jObject.put("lang", 
plainLiteral.getLanguage().toString());
                        }
-                       jsonValues.add(objectAsJSONObject);
+               } else if (object instanceof TypedLiteral) {
+                       TypedLiteral literal = (TypedLiteral) object;
+                       jObject.put("value", literal.getLexicalForm());
+                       jObject.put("type", "literal");
+                       jObject.put("datatype", 
literal.getDataType().getUnicodeString());
+               } else if (object instanceof UriRef) {
+                       UriRef uriRef = (UriRef) object;
+                       jObject.put("value", uriRef.getUnicodeString());
+                       jObject.put("type", "uri");
+               } else if (object instanceof BNode) {
+                       String bNodeId = bNodeMgr.getBNodeId((BNode) object);
+                       jObject.put("value", bNodeId);
+                       jObject.put("type", "bnode");
                }
-               return jsonValues;
+               return jObject;
        }
+       
+       /**
+        * Compares only the subjects of the triples. If they are equals 
<code>0</code>
+        * is returned. This will ensure that all triples with the same 
subjects are
+        * sorted correctly. However it does not sort predicates and objects!
+        */
+       public static final Comparator<Triple> SUBJECT_COMPARATOR = new 
Comparator<Triple>() {
+
+               @Override
+               public int compare(Triple a, Triple b) {
+                       return a.getSubject().equals(b.getSubject()) ? 0
+                                       : 
a.getSubject().toString().compareTo(b.getSubject().toString());
+               }
+       };
 }

Modified: 
incubator/clerezza/trunk/parent/rdf.rdfjson/src/test/java/org/apache/clerezza/rdf/rdfjson/serializer/RdfJsonSerializerProviderTest.java
URL: 
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/rdf.rdfjson/src/test/java/org/apache/clerezza/rdf/rdfjson/serializer/RdfJsonSerializerProviderTest.java?rev=1196625&r1=1196624&r2=1196625&view=diff
==============================================================================
--- 
incubator/clerezza/trunk/parent/rdf.rdfjson/src/test/java/org/apache/clerezza/rdf/rdfjson/serializer/RdfJsonSerializerProviderTest.java
 (original)
+++ 
incubator/clerezza/trunk/parent/rdf.rdfjson/src/test/java/org/apache/clerezza/rdf/rdfjson/serializer/RdfJsonSerializerProviderTest.java
 Wed Nov  2 15:23:01 2011
@@ -18,12 +18,19 @@ package org.apache.clerezza.rdf.rdfjson.
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
 
 import org.junit.Assert;
 import org.junit.Test;
 import org.apache.clerezza.rdf.core.BNode;
+import org.apache.clerezza.rdf.core.Language;
 import org.apache.clerezza.rdf.core.LiteralFactory;
 import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.PlainLiteral;
 import org.apache.clerezza.rdf.core.TypedLiteral;
 import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
@@ -31,7 +38,9 @@ import org.apache.clerezza.rdf.core.impl
 import org.apache.clerezza.rdf.core.impl.TripleImpl;
 import org.apache.clerezza.rdf.core.serializedform.ParsingProvider;
 import org.apache.clerezza.rdf.core.serializedform.SerializingProvider;
+import org.apache.clerezza.rdf.ontologies.FOAF;
 import org.apache.clerezza.rdf.ontologies.RDF;
+import org.apache.clerezza.rdf.ontologies.RDFS;
 import org.apache.clerezza.rdf.rdfjson.parser.RdfJsonParsingProvider;
 import org.junit.After;
 import org.junit.AfterClass;
@@ -44,6 +53,7 @@ import org.junit.BeforeClass;
  */
 public class RdfJsonSerializerProviderTest {
 
+    private final static LiteralFactory lf = LiteralFactory.getInstance();
        private final static UriRef RDF_NIL = new 
UriRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil";);
        private final static UriRef node1 = new 
UriRef("http://example.org/node1";);
        private final static UriRef node2 = new 
UriRef("http://example.org/node2";);
@@ -58,7 +68,7 @@ public class RdfJsonSerializerProviderTe
        private final static PlainLiteralImpl plainLiteralA = new 
PlainLiteralImpl("A");
        private final static PlainLiteralImpl plainLiteralB = new 
PlainLiteralImpl("B");
        private final static PlainLiteralImpl plainLiteralC = new 
PlainLiteralImpl("C");
-       private final static TypedLiteral typedLiteralA = 
LiteralFactory.getInstance().createTypedLiteral("A");
+       private final static TypedLiteral typedLiteralA = 
lf.createTypedLiteral("A");
 
        private MGraph mGraph;
 
@@ -125,6 +135,7 @@ public class RdfJsonSerializerProviderTe
                SerializingProvider provider = new RdfJsonSerializingProvider();
                ByteArrayOutputStream serializedGraph = new 
ByteArrayOutputStream();
                provider.serialize(serializedGraph, mGraph, 
"application/rdf+json");
+//        System.out.println(serializedGraph.toString());
                ParsingProvider parsingProvider = new RdfJsonParsingProvider();
                ByteArrayInputStream jsonIn = new 
ByteArrayInputStream(serializedGraph.toByteArray());
                MGraph parsedMGraph = new SimpleMGraph();
@@ -133,4 +144,96 @@ public class RdfJsonSerializerProviderTe
                Assert.assertEquals(6, parsedMGraph.size());
                Assert.assertEquals(mGraph.getGraph(), parsedMGraph.getGraph());
        }
+       
+       /**
+        * For local performance testing
+        */
+       //@Test
+       public void testBigGraph() {
+               int NUM_TRIPLES = 100000;
+               //randoms are in the range [0..3]
+               double l = 1.0; //literal
+               double i = l / 3; //int
+               double d = l * 2 / 3;//double
+               double b = 2.0;//bNode
+               double nb = b - (l * 2 / 3); //create new bNode
+               double random;
+               NonLiteral subject = null;
+               UriRef predicate = null;
+               List<UriRef> predicateList = new ArrayList<UriRef>();
+               predicateList.add(RDF.first);
+               predicateList.add(RDF.rest);
+               predicateList.add(RDF.type);
+               predicateList.add(RDFS.label);
+               predicateList.add(RDFS.comment);
+               predicateList.add(RDFS.range);
+               predicateList.add(RDFS.domain);
+               predicateList.add(FOAF.name);
+               predicateList.add(FOAF.nick);
+               predicateList.add(FOAF.homepage);
+               predicateList.add(FOAF.age);
+               predicateList.add(FOAF.depiction);
+               String URI_PREFIX = "http://www.test.org/bigGraph/ref";;
+               Language DE = new Language("de");
+               Language EN = new Language("en");
+               Iterator<UriRef> predicates = predicateList.iterator();
+               List<BNode> bNodes = new ArrayList<BNode>();
+               bNodes.add(new BNode());
+               for (int count = 0; count < NUM_TRIPLES; count++) {
+                       random = Math.random() * 3;
+                       if (random >= 2.5 || count == 0) {
+                               if (random <= 2.75) {
+                                       subject = new UriRef(URI_PREFIX + 
count);
+                               } else {
+                                       int rndIndex = (int) ((random - 2.75) * 
bNodes.size() / (3.0 - 2.75));
+                                       subject = bNodes.get(rndIndex);
+                               }
+                       }
+                       if (random > 2.0 || count == 0) {
+                               if (!predicates.hasNext()) {
+                                       Collections.shuffle(predicateList);
+                                       predicates = predicateList.iterator();
+                               }
+                               predicate = predicates.next();
+                       }
+                       if (random <= l) { //literal
+                               if (random <= i) {
+                                       mGraph.add(new TripleImpl(subject, 
predicate, lf.createTypedLiteral(count)));
+                               } else if (random <= d) {
+                                       mGraph.add(new TripleImpl(subject, 
predicate, lf.createTypedLiteral(random)));
+                               } else {
+                                       PlainLiteral text;
+                                       if (random <= i) {
+                                               text = new 
PlainLiteralImpl("Literal for " + count);
+                                       } else if (random <= d) {
+                                               text = new PlainLiteralImpl("An 
English literal for " + count, EN);
+                                       } else {
+                                               text = new 
PlainLiteralImpl("Ein Dutsches Literal für" + count, DE);
+                                       }
+                                       mGraph.add(new TripleImpl(subject, 
predicate, text));
+                               }
+                       } else if (random <= b) { //bnode
+                               BNode bnode;
+                               if (random <= nb) {
+                                       bnode = new BNode();
+                                       bNodes.add(bnode);
+                               } else { //>nb <b
+                                       int rndIndex = (int) ((random - nb) * 
bNodes.size() / (b - nb));
+                                       bnode = bNodes.get(rndIndex);
+                               }
+                               mGraph.add(new TripleImpl(subject, predicate, 
bnode));
+                       } else { //UriRef
+                               mGraph.add(new TripleImpl(subject, predicate,
+                                               new UriRef(URI_PREFIX + (int) 
count * random)));
+                       }
+               }
+               SerializingProvider provider = new RdfJsonSerializingProvider();
+               for (int count = 0; i < 10; i++) {
+                       long start = System.currentTimeMillis();
+                       ByteArrayOutputStream serializedGraph = new 
ByteArrayOutputStream();
+
+                       provider.serialize(serializedGraph, mGraph, 
"application/rdf+json");
+                       System.out.println("Serialized " + mGraph.size() + 
"Triples in " + (System.currentTimeMillis() - start) + "ms");
+               }
+       }
 }


Reply via email to