This is an automated email from the ASF dual-hosted git repository.
andy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/jena.git
The following commit(s) were added to refs/heads/main by this push:
new ac26a8ad9a GH-2992: Service Enhancer: Fix bnode naming in bulk
requests.
ac26a8ad9a is described below
commit ac26a8ad9a61cbf7b100526cdc11c662bedad4cb
Author: Claus Stadler <[email protected]>
AuthorDate: Thu Feb 6 12:56:03 2025 +0100
GH-2992: Service Enhancer: Fix bnode naming in bulk requests.
---
.../service/enhancer/impl/BatchQueryRewriter.java | 19 +++++-
.../TestServiceEnhancerBatchQueryRewriter.java | 75 ++++++++++++++++++++--
2 files changed, 88 insertions(+), 6 deletions(-)
diff --git
a/jena-extras/jena-serviceenhancer/src/main/java/org/apache/jena/sparql/service/enhancer/impl/BatchQueryRewriter.java
b/jena-extras/jena-serviceenhancer/src/main/java/org/apache/jena/sparql/service/enhancer/impl/BatchQueryRewriter.java
index 95dee419e0..ba64add20a 100644
---
a/jena-extras/jena-serviceenhancer/src/main/java/org/apache/jena/sparql/service/enhancer/impl/BatchQueryRewriter.java
+++
b/jena-extras/jena-serviceenhancer/src/main/java/org/apache/jena/sparql/service/enhancer/impl/BatchQueryRewriter.java
@@ -31,17 +31,23 @@ import java.util.Optional;
import java.util.Set;
import org.apache.jena.atlas.logging.Log;
+import org.apache.jena.graph.Node;
+import org.apache.jena.graph.NodeFactory;
import org.apache.jena.query.Query;
import org.apache.jena.query.SortCondition;
import org.apache.jena.sparql.algebra.Op;
import org.apache.jena.sparql.algebra.OpAsQuery;
import org.apache.jena.sparql.algebra.OpLib;
-import org.apache.jena.sparql.algebra.op.*;
+import org.apache.jena.sparql.algebra.op.OpExtend;
+import org.apache.jena.sparql.algebra.op.OpOrder;
+import org.apache.jena.sparql.algebra.op.OpSlice;
+import org.apache.jena.sparql.algebra.op.OpUnion;
import org.apache.jena.sparql.core.Var;
import org.apache.jena.sparql.engine.binding.Binding;
import org.apache.jena.sparql.engine.main.QC;
import org.apache.jena.sparql.expr.ExprVar;
import org.apache.jena.sparql.expr.NodeValue;
+import org.apache.jena.sparql.graph.NodeTransformLib;
import org.apache.jena.sparql.service.enhancer.impl.util.BindingUtils;
/**
@@ -163,6 +169,9 @@ public class BatchQueryRewriter {
op = QC.substitute(op, normedBinding);
+ // Relabel any blank nodes
+ op = NodeTransformLib.transform(node -> relabelBnode(node, idx),
op);
+
long o = req.hasOffset() ? req.getOffset() : Query.NOLIMIT;
long l = req.hasLimit() ? req.getLimit() : Query.NOLIMIT;
@@ -188,4 +197,12 @@ public class BatchQueryRewriter {
renames.put(idxVar, idxVar);
return new BatchQueryRewriteResult(newOp, renames);
}
+
+ private static Node relabelBnode(Node node, long idx) {
+ return Var.isBlankNodeVar(node) // Typically, only bnode vars should
occur at this stage.
+ ? Var.alloc(node.getName() + "_" + idx)
+ : node.isBlank() // Conventional bnodes handled here for
robustness.
+ ? NodeFactory.createBlankNode(node.getBlankNodeLabel() + "_" +
idx)
+ : node;
+ }
}
diff --git
a/jena-extras/jena-serviceenhancer/src/test/java/org/apache/jena/sparql/service/enhancer/impl/TestServiceEnhancerBatchQueryRewriter.java
b/jena-extras/jena-serviceenhancer/src/test/java/org/apache/jena/sparql/service/enhancer/impl/TestServiceEnhancerBatchQueryRewriter.java
index 7cfc6dafb8..f533eccadc 100644
---
a/jena-extras/jena-serviceenhancer/src/test/java/org/apache/jena/sparql/service/enhancer/impl/TestServiceEnhancerBatchQueryRewriter.java
+++
b/jena-extras/jena-serviceenhancer/src/test/java/org/apache/jena/sparql/service/enhancer/impl/TestServiceEnhancerBatchQueryRewriter.java
@@ -18,6 +18,12 @@
package org.apache.jena.sparql.service.enhancer.impl;
+import java.util.LinkedHashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.apache.jena.graph.Node;
import org.apache.jena.graph.NodeFactory;
import org.apache.jena.query.Query;
import org.apache.jena.query.QueryFactory;
@@ -25,10 +31,12 @@ import org.apache.jena.query.Syntax;
import org.apache.jena.sparql.algebra.Algebra;
import org.apache.jena.sparql.algebra.Op;
import org.apache.jena.sparql.algebra.OpAsQuery;
+import org.apache.jena.sparql.algebra.OpVars;
import org.apache.jena.sparql.algebra.op.OpService;
import org.apache.jena.sparql.core.Var;
import org.apache.jena.sparql.engine.binding.Binding;
import org.apache.jena.sparql.engine.binding.BindingFactory;
+import org.apache.jena.sparql.syntax.syntaxtransform.QueryTransformOps;
import org.junit.Assert;
import org.junit.Test;
@@ -51,11 +59,7 @@ public class TestServiceEnhancerBatchQueryRewriter {
batch.put(0, new PartitionRequest<>(0, BindingFactory.binding(o,
NodeFactory.createLiteralString("x1")), 1, 5));
batch.put(1, new PartitionRequest<>(1, BindingFactory.binding(o,
NodeFactory.createLiteralString("x2")), 2, 6));
- BatchQueryRewriter rewriter = new BatchQueryRewriter(new
OpServiceInfo(op), Var.alloc("idx"), false, false, false);
- BatchQueryRewriteResult rewrite = rewriter.rewrite(batch);
- Op resultOp = rewrite.getOp();
- Query actualQuery = OpAsQuery.asQuery(resultOp);
-
+ Query actualQuery = defaultRewrite(op, batch);
Query expectedQuery = QueryFactory.create(String.join("\n",
"SELECT *",
"WHERE",
@@ -86,4 +90,65 @@ public class TestServiceEnhancerBatchQueryRewriter {
Assert.assertEquals(expectedQuery, actualQuery);
}
+
+ /** GH-2992: Blank nodes must be relabeled wher building batch unions. */
+ @Test
+ public void testReport_01() {
+ OpService op = (OpService)Algebra.compile(QueryFactory.create("""
+ PREFIX sachem: <http://bioinfo.uochb.cas.cz/rdf/v1.0/sachem#>
+ SELECT * {
+ SERVICE <http://example.org/> { ?COMPOUND
sachem:substructureSearch [ sachem:query ?STRUCTURE ] }
+ }
+ """, Syntax.syntaxARQ).getQueryPattern());
+
+ Batch<Integer, PartitionRequest<Binding>> batch =
BatchImpl.forInteger();
+ Var o = Var.alloc("STRUCTURE");
+ batch.put(0, new PartitionRequest<>(0, BindingFactory.binding(o,
NodeFactory.createLiteralString("[He]")), 0, Long.MAX_VALUE));
+ batch.put(1, new PartitionRequest<>(1, BindingFactory.binding(o,
NodeFactory.createLiteralString("[Ar]")), 0, Long.MAX_VALUE));
+
+ Query actualQuery = defaultRewrite(op, batch);
+ Query expectedQuery = harmonizeBnodes(QueryFactory.create("""
+ SELECT *
+ WHERE
+ { { ?COMPOUND
<http://bioinfo.uochb.cas.cz/rdf/v1.0/sachem#substructureSearch> _:b0 .
+ _:b0
<http://bioinfo.uochb.cas.cz/rdf/v1.0/sachem#query> "[He]"
+ BIND(0 AS ?idx)
+ }
+ UNION
+ { { ?COMPOUND
<http://bioinfo.uochb.cas.cz/rdf/v1.0/sachem#substructureSearch> _:b1 .
+ _:b1
<http://bioinfo.uochb.cas.cz/rdf/v1.0/sachem#query> "[Ar]"
+ BIND(1 AS ?idx)
+ }
+ UNION
+ { BIND(1000000000 AS ?idx) }
+ }
+ }
+ ORDER BY ASC(?idx)
+ """));
+
+ Assert.assertEquals(expectedQuery, actualQuery);
+ }
+
+ private static Query defaultRewrite(OpService op , Batch<Integer,
PartitionRequest<Binding>> batch) {
+ BatchQueryRewriter rewriter = new BatchQueryRewriter(new
OpServiceInfo(op), Var.alloc("idx"), false, false, false);
+ BatchQueryRewriteResult rewrite = rewriter.rewrite(batch);
+ Op resultOp = rewrite.getOp();
+ Query result = harmonizeBnodes(OpAsQuery.asQuery(resultOp));
+ return result;
+ }
+
+ /** Relabel blank nodes in the order of their occurrence in the query. */
+ private static Query harmonizeBnodes(Query query) {
+ Op op = Algebra.compile(query);
+ Set<Var> vars = new LinkedHashSet<>();
+ OpVars.mentionedVars(op, vars);
+
+ int nextBnodeId[] = {0};
+ Map<Var, Node> varMap = vars.stream()
+ .filter(v -> Var.isBlankNodeVar(v))
+ .collect(Collectors.toMap(v -> v, v ->
NodeFactory.createBlankNode("b" + (++nextBnodeId[0]))));
+
+ Query result = QueryTransformOps.replaceVars(query, varMap);
+ return result;
+ }
}