[
https://issues.apache.org/jira/browse/JENA-2325?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Jan Martin Keil updated JENA-2325:
----------------------------------
Description:
The execution time of equivalent SPARQL queries using [Path
Alternative|https://www.w3.org/TR/sparql11-query/#rPathAlternative] or
[UNION|https://www.w3.org/TR/sparql11-query/#rGroupOrUnionGraphPattern] differ
considerably. Example:
{code:sql}
SELECT * WHERE {?a
<http://example.org/property1>|<http://example.org/property2> ?b}
{code}
{code:sql}
SELECT * WHERE {{?a <http://example.org/property1> ?b } UNION { ?a
<http://example.org/property2> ?b}}
{code}
Java MWE with 100000 bindings for each alternative:
{code:java}
import org.apache.jena.query.Query;
import org.apache.jena.query.QueryExecutionFactory;
import org.apache.jena.query.QueryFactory;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.Property;
import org.apache.jena.rdf.model.ResourceFactory;
import org.junit.jupiter.api.Test;
public class PathAlternativeVsUnion {
@Test
public void compare() {
Model model = ModelFactory.createDefaultModel();
int scale = 100000;
Property property1 =
ResourceFactory.createProperty("http://example.org/property1");
Property property2 =
ResourceFactory.createProperty("http://example.org/property2");
for (int i = 0; i < scale; i++) {
model.createResource("http://example.org/r" +
i).addProperty(property1,
ResourceFactory.createResource("http://example.org/r" + (scale + i)));
model.createResource("http://example.org/r" + (scale *
2 + i)).addProperty(property2,
ResourceFactory.createResource("http://example.org/r" + (scale * 3 + i)));
}
Query pathAlternativeQuery = QueryFactory.create("SELECT *
WHERE {?a <" + property1 + ">|<" + property2 + "> ?b}");
Query unionQuery = QueryFactory
.create("SELECT * WHERE {{?a <" + property1 +
"> ?b } UNION { ?a <" + property2 + "> ?b}}");
// warm up pathAlternativeQuery
QueryExecutionFactory.create(pathAlternativeQuery,
model).execSelect().forEachRemaining(qs -> {
/* consume but do nothing */});
// measure pathAlternativeQuery
long start = System.currentTimeMillis();
QueryExecutionFactory.create(pathAlternativeQuery,
model).execSelect().forEachRemaining(qs -> {
/* consume but do nothing */});
long finish = System.currentTimeMillis();
System.out.println("Time pathAlternativeQuery: " + (finish -
start) + " ms");
// warm up unionQuery
QueryExecutionFactory.create(unionQuery,
model).execSelect().forEachRemaining(qs -> {
/* consume but do nothing */});
// measure unionQuery
start = System.currentTimeMillis();
QueryExecutionFactory.create(unionQuery,
model).execSelect().forEachRemaining(qs -> {
/* consume but do nothing */});
finish = System.currentTimeMillis();
System.out.println("Time unionQuery: " + (finish -
start) + " ms");
}
}
{code}
Result:
{code:java}
Time pathAlternativeQuery: 10940 ms
Time unionQuery: 145 ms
{code}
Is it possible to add some automatic execution plan optimization for that to
Apache Jena?
was:
The execution time of equivalent SPARQL queries using [Path
Alternative|https://www.w3.org/TR/sparql11-query/#rPathAlternative] or
[UNION|https://www.w3.org/TR/sparql11-query/#rGroupOrUnionGraphPattern] differ
considerably. Example:
Compared queries:
{code:sql}
SELECT * WHERE {?a
<http://example.org/property1>|<http://example.org/property2> ?b}
{code}
{code:sql}
SELECT * WHERE {{?a <http://example.org/property1> ?b } UNION { ?a
<http://example.org/property2> ?b}}
{code}
Both alternatives have 100000 bindings.
Java MWE:
{code:java}
import org.apache.jena.query.Query;
import org.apache.jena.query.QueryExecutionFactory;
import org.apache.jena.query.QueryFactory;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.Property;
import org.apache.jena.rdf.model.ResourceFactory;
import org.junit.jupiter.api.Test;
public class PathAlternativeVsUnion {
@Test
public void compare() {
Model model = ModelFactory.createDefaultModel();
int scale = 100000;
Property property1 =
ResourceFactory.createProperty("http://example.org/property1");
Property property2 =
ResourceFactory.createProperty("http://example.org/property2");
for (int i = 0; i < scale; i++) {
model.createResource("http://example.org/r" +
i).addProperty(property1,
ResourceFactory.createResource("http://example.org/r" + (scale + i)));
model.createResource("http://example.org/r" + (scale *
2 + i)).addProperty(property2,
ResourceFactory.createResource("http://example.org/r" + (scale * 3 + i)));
}
Query pathAlternativeQuery = QueryFactory.create("SELECT *
WHERE {?a <" + property1 + ">|<" + property2 + "> ?b}");
Query unionQuery = QueryFactory
.create("SELECT * WHERE {{?a <" + property1 +
"> ?b } UNION { ?a <" + property2 + "> ?b}}");
// warm up pathAlternativeQuery
QueryExecutionFactory.create(pathAlternativeQuery,
model).execSelect().forEachRemaining(qs -> {
/* consume but do nothing */});
// measure pathAlternativeQuery
long start = System.currentTimeMillis();
QueryExecutionFactory.create(pathAlternativeQuery,
model).execSelect().forEachRemaining(qs -> {
/* consume but do nothing */});
long finish = System.currentTimeMillis();
System.out.println("Time pathAlternativeQuery: " + (finish -
start) + " ms");
// warm up unionQuery
QueryExecutionFactory.create(unionQuery,
model).execSelect().forEachRemaining(qs -> {
/* consume but do nothing */});
// measure unionQuery
start = System.currentTimeMillis();
QueryExecutionFactory.create(unionQuery,
model).execSelect().forEachRemaining(qs -> {
/* consume but do nothing */});
finish = System.currentTimeMillis();
System.out.println("Time unionQuery: " + (finish -
start) + " ms");
}
}
{code}
Result:
{code}
Time pathAlternativeQuery: 10940 ms
Time unionQuery: 145 ms
{code}
Is it possible to add some automatic execution plan optimization for that to
Apache Jena?
> Performance optimization for SPARQ Path Alternative
> ---------------------------------------------------
>
> Key: JENA-2325
> URL: https://issues.apache.org/jira/browse/JENA-2325
> Project: Apache Jena
> Issue Type: Improvement
> Components: ARQ
> Affects Versions: Jena 4.4.0
> Reporter: Jan Martin Keil
> Priority: Major
> Labels: performance
>
> The execution time of equivalent SPARQL queries using [Path
> Alternative|https://www.w3.org/TR/sparql11-query/#rPathAlternative] or
> [UNION|https://www.w3.org/TR/sparql11-query/#rGroupOrUnionGraphPattern]
> differ considerably. Example:
> {code:sql}
> SELECT * WHERE {?a
> <http://example.org/property1>|<http://example.org/property2> ?b}
> {code}
> {code:sql}
> SELECT * WHERE {{?a <http://example.org/property1> ?b } UNION { ?a
> <http://example.org/property2> ?b}}
> {code}
> Java MWE with 100000 bindings for each alternative:
> {code:java}
> import org.apache.jena.query.Query;
> import org.apache.jena.query.QueryExecutionFactory;
> import org.apache.jena.query.QueryFactory;
> import org.apache.jena.rdf.model.Model;
> import org.apache.jena.rdf.model.ModelFactory;
> import org.apache.jena.rdf.model.Property;
> import org.apache.jena.rdf.model.ResourceFactory;
> import org.junit.jupiter.api.Test;
> public class PathAlternativeVsUnion {
> @Test
> public void compare() {
> Model model = ModelFactory.createDefaultModel();
> int scale = 100000;
> Property property1 =
> ResourceFactory.createProperty("http://example.org/property1");
> Property property2 =
> ResourceFactory.createProperty("http://example.org/property2");
> for (int i = 0; i < scale; i++) {
> model.createResource("http://example.org/r" +
> i).addProperty(property1,
>
> ResourceFactory.createResource("http://example.org/r" + (scale + i)));
> model.createResource("http://example.org/r" + (scale *
> 2 + i)).addProperty(property2,
>
> ResourceFactory.createResource("http://example.org/r" + (scale * 3 + i)));
> }
> Query pathAlternativeQuery = QueryFactory.create("SELECT *
> WHERE {?a <" + property1 + ">|<" + property2 + "> ?b}");
> Query unionQuery = QueryFactory
> .create("SELECT * WHERE {{?a <" + property1 +
> "> ?b } UNION { ?a <" + property2 + "> ?b}}");
> // warm up pathAlternativeQuery
> QueryExecutionFactory.create(pathAlternativeQuery,
> model).execSelect().forEachRemaining(qs -> {
> /* consume but do nothing */});
> // measure pathAlternativeQuery
> long start = System.currentTimeMillis();
> QueryExecutionFactory.create(pathAlternativeQuery,
> model).execSelect().forEachRemaining(qs -> {
> /* consume but do nothing */});
> long finish = System.currentTimeMillis();
> System.out.println("Time pathAlternativeQuery: " + (finish -
> start) + " ms");
> // warm up unionQuery
> QueryExecutionFactory.create(unionQuery,
> model).execSelect().forEachRemaining(qs -> {
> /* consume but do nothing */});
> // measure unionQuery
> start = System.currentTimeMillis();
> QueryExecutionFactory.create(unionQuery,
> model).execSelect().forEachRemaining(qs -> {
> /* consume but do nothing */});
> finish = System.currentTimeMillis();
> System.out.println("Time unionQuery: " + (finish -
> start) + " ms");
> }
> }
> {code}
> Result:
> {code:java}
> Time pathAlternativeQuery: 10940 ms
> Time unionQuery: 145 ms
> {code}
> Is it possible to add some automatic execution plan optimization for that to
> Apache Jena?
--
This message was sent by Atlassian Jira
(v8.20.7#820007)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]