On 04/09/15 11:26, François-Paul Servant wrote:
Hi Rob,

thanks.

Of course I can be mistaken but as far as I can tell, the servlet does consume 
the ResultSet and writes the result back to the client (and I check in the 
client that I get the same, non empty results with it and with fuseki - at 
least when there is no LIMIT clause in the query)

Here the code of the servlet. It loads on init the rdf data (one dataset around 
one memory model).
Note that the servlet doesn’t take care of content negotiation: it uses a “as” 
parameter in the query instead

fps

----

package test.test.sparql;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.apache.jena.query.Dataset;
import org.apache.jena.query.DatasetFactory;
import org.apache.jena.query.Query;
import org.apache.jena.query.QueryExecution;
import org.apache.jena.query.QueryExecutionFactory;
import org.apache.jena.query.QueryFactory;
import org.apache.jena.query.ResultSet;
import org.apache.jena.query.ResultSetFormatter;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.log4j.Logger;

/** a simple sparql servlet loaded with one mem model */
public class SparqlServlet extends HttpServlet {

/** use getter! */
protected Dataset dataset;
protected String getXMLBase() { return null; }

//
// INIT
//

protected Dataset initDataset() {
        Model m = ModelFactory.createDefaultModel();
        InputStream in = 
getServletContext().getResourceAsStream("/files/tags.rdf");
        // use the same base as fuseki
        m.read(in, "http://127.0.0.1:8080/fuseki/ds/";);
        // m.write(System.out, "TTL");
        return DatasetFactory.create(m);
}

/** The only dataset used by this servlet. */
protected Dataset getDataset() {
        if (this.dataset == null) this.dataset = initDataset();
        return this.dataset;
}

@Override public void init() {
        initDataset();
}

//
// DOGET
//

@Override public void doGet(HttpServletRequest req, HttpServletResponse res) 
throws IOException, ServletException {
        Logger logger = Logger.getLogger(getClass());
        logger.info("SparqlServletTest.doGet");
        long t0 = System.currentTimeMillis();
        req.setCharacterEncoding("UTF-8");

        String servletPath =  req.getServletPath();
        if (servletPath.endsWith("/sparql")) { // /sparql/?query=...
                logger.info(req.getParameter("query"));
                doGetSparql(req, res);
        }
        logger.info("SparqlServletTest.doGet DONE " + 
(System.currentTimeMillis() - t0));
}

protected void doGetSparql(HttpServletRequest req, HttpServletResponse res) 
throws ServletException, IOException {
        String queryString = getQueryString(req);
        if (queryString == null) throw new RuntimeException("no query");
        
        Query query = QueryFactory.create(queryString);
        QueryExecution qexec = QueryExecutionFactory.create(query, 
getDataset(req));
        
        boolean isRDFQuery = (query.isDescribeType() || 
(query.isConstructType()));
        
        String as = req.getParameter("as");
        if (as != null) as = as.toLowerCase();
        
        res.setHeader("Access-Control-Allow-Origin", "*"); // CORS 2012-08

        if (isRDFQuery) {
                Model resultModel = getResultModel(query, qexec, true);
                if (("n3".equals(as) || ("ttl".equals(as)))) {
                        writeRDF(resultModel,res, "N3", getXMLBase());

Do not use N3.  Use Turtle.

If you want speed, use N-Triples/N-Quads.

                } else if ("jsonld".equals(as)) {
                        writeRDF(resultModel,res, "JSON-LD", getXMLBase() );
                } else {
                        writeRDF(resultModel,res, "RDF/XML", getXMLBase() );
                }
        } else if (query.isAskType()) {
                        boolean b = qexec.execAsk();
                        String s = Boolean.toString(b);
                        res.getOutputStream().write(s.getBytes());
        } else if (query.isSelectType()) {
                  ResultSet results = qexec.execSelect() ;
                  if ("json".equals(as)) {
                        ResultSetFormatter.outputAsJSON(res.getOutputStream(), 
results);
                  } else {
                        ResultSetFormatter.outputAsXML(res.getOutputStream(), 
results);
                  }
                
        }
}

/** all queries sent to same dataset */
protected Dataset getDataset(HttpServletRequest req) {
        return getDataset();
}

public String getQueryString(HttpServletRequest req) {
        String queryString = req.getParameter("query");
        if (queryString == null) throw new RuntimeException("No query param in the 
request");
        
        // cf Tomcat "feature" wrt uri encoding
        try {
                queryString = java.net.URLEncoder.encode(queryString, 
"ISO-8859-1");
                queryString = java.net.URLDecoder.decode(queryString, "UTF-8");
        } catch (UnsupportedEncodingException e) { throw new 
RuntimeException(e); }
        return queryString;
}

static Model getResultModel(Query query, QueryExecution qexec, boolean close) {
        Model resultModel = ModelFactory.createDefaultModel();
        if (query.isDescribeType()) {
                resultModel = qexec.execDescribe(resultModel) ;
        } else if (query.isConstructType()) {
                resultModel = qexec.execConstruct(resultModel) ;
        } else {
                throw new IllegalArgumentException("not a rdf query");
        }
        if (close) qexec.close();
        return resultModel;
}

static void writeRDF(Model model, HttpServletResponse response, String 
jenaLang, String base) throws IOException, ServletException {    
        if ("N3".equals(jenaLang) || ("TURTLE".equals(jenaLang))) {
                response.setContentType("text/turtle; charset=UTF-8");
        } else if ("JSON-LD".equals(jenaLang)) {
                response.setContentType("application/ld+json; charset=UTF-8");
        } else {
                jenaLang = null;
                response.setContentType("application/rdf+xml; charset=UTF-8");
        }       
        model.write(response.getOutputStream(), jenaLang, base);
}
}
Le 4 sept. 2015 à 10:21, Rob Vesse <[email protected]> a écrit :

You haven't shown your code so I can only guess at what may/may not be
going on

Firstly did you actually consume the result set in your servlet?

A ResultSet is typically streamed so the fact that execSelect() returned
doesn't mean the actual query was fully evaluated simply that the first
result is available.  So if you did something like the following:

long start = System.currentTimeMillis();
qe.execSelect()
long elapsed = System.currentTimeMillis() - start;

Then all your have measured is the time to first solution not the time to
get all results so if this is the case you need to ensure you fully
consume the ResultSet somehow (whether by iterating over it, passing it to
some IO method that writes it out, call ResultSetFormatter.consume() on it
etc.) thus forcing ARQ to fully evaluate the query

On the point of IO, did your servlet actually write the results back to
the client since depending on the size of the results that can add
significant overhead relative to the actual query execution and Fuseki is
always going to do this.

Finally most of the queries exhibiting large differences are DESCRIBE
queries which are two pass evaluation, firstly the WHERE clause is
evaluated (via execSelect() internally) and then the description is built.
If your servlet is only calling execSelect() for those queries then it is
only timing the first pass of the WHERE clause (and possibly subject to
timing only the first result as noted above) rather than timing the full
query evaluation which Fuseki will be doing.

Rob

On 03/09/2015 23:19, "François-Paul Servant"
<[email protected]> wrote:

Hi,

shouldn’t we have the same level of performance with Fuseki and with a
simple servlet that calls ARQ?

I hadn’t try fuseki until now. Yesterday, I downloaded the 2.3.0 release,
started the server in a terminal window of my mac (osx 10.10.5) with:
./fuseki-server --mem /ds
I uploaded a rdf file (skos-like data, 21K triples), and I began to make
some queries. I’m used to play with that data in jena memory models, and
to query it. Getting results in Fuseki GUI seemed slow to me, I decided
to compare with a simple servlet that loads a memory model with the same
data on init, and calls ARQ in its doGet method.

I loaded both fuseki and my simple servlet in an instance of tomcat 8,
both loaded with the same data (default graph, memory model), and I
measured the time for some GET queries as seen by a client I wrote using
jersey.

Here are the results. For each sparql query, times with the simple
servlet, and with fuseki: the time for the first call, and the mean when
calling it 10 times (with the simple servlet, it is generally much faster
after the first call, but this is not related to HTTP caching: I took
attention to it, and I verified, in the case of the simple servlet, that
its doGet method gets actually called)
Depending on the query, differences are small, or huge.

PREFIX tag: <http://127.0.0.1:8080/fuseki/ds/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT ?tag WHERE {
        ?tag skos:broader tag:semantic_web.
}
SIMPLE FIST CALL: 0.039
SIMPLE MEAN: 0.0213
FUSEKI FIST CALL: 0.025
FUSEKI MEAN: 0.0215

PREFIX tag: <http://127.0.0.1:8080/fuseki/ds/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
DESCRIBE ?tag WHERE {
        ?tag skos:broader tag:afrique.
}
SIMPLE FIST CALL: 0.039
SIMPLE MEAN: 0.0216
FUSEKI FIST CALL: 0.485
FUSEKI MEAN: 0.2284

PREFIX tag: <http://127.0.0.1:8080/fuseki/ds/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT ?tag WHERE {
        ?tag skos:broader* tag:science.
}
SIMPLE FIST CALL: 0.172
SIMPLE MEAN: 0.0225
FUSEKI FIST CALL: 3.981
FUSEKI MEAN: 3.1274

PREFIX tag: <http://127.0.0.1:8080/fuseki/ds/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
DESCRIBE ?tag WHERE {
        ?tag skos:broader* tag:linked_data.
}
SIMPLE FIST CALL: 0.131
SIMPLE MEAN: 0.0417
FUSEKI FIST CALL: 1.46
FUSEKI MEAN: 1.3244

PREFIX tag: <http://127.0.0.1:8080/fuseki/ds/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT ?tag WHERE {
        ?tag a <http://www.semanlink.net/2001/00/semanlink-schema#Tag>.
}
LIMIT 1000
SIMPLE FIST CALL: 0.07
SIMPLE MEAN: 0.0269
FUSEKI FIST CALL: 0.037
FUSEKI MEAN: 0.024399999999999998

PREFIX tag: <http://127.0.0.1:8080/fuseki/ds/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
DESCRIBE ?tag WHERE {
        ?tag a <http://www.semanlink.net/2001/00/semanlink-schema#Tag>.
}
LIMIT 1000
SIMPLE FIST CALL: 0.181
SIMPLE MEAN: 0.13440000000000002
FUSEKI FIST CALL: 6.471
FUSEKI MEAN: 5.497999999999999

Do you have an explanation?

Best Regards,

fps






Reply via email to