Github user afs commented on a diff in the pull request: https://github.com/apache/jena/pull/94#discussion_r44024970 --- Diff: jena-arq/src/main/java/org/apache/jena/sparql/core/assembler/InMemDatasetAssembler.java --- @@ -0,0 +1,67 @@ +package org.apache.jena.sparql.core.assembler; + +import static org.apache.jena.assembler.JA.MemoryDataset; +import static org.apache.jena.assembler.JA.data; +import static org.apache.jena.query.DatasetFactory.createTxnMem; +import static org.apache.jena.riot.RDFDataMgr.loadModel; +import static org.apache.jena.riot.RDFDataMgr.read; +import static org.apache.jena.sparql.core.assembler.AssemblerUtils.setContext; +import static org.apache.jena.sparql.core.assembler.DatasetAssemblerVocab.*; +import static org.apache.jena.sparql.util.FmtUtils.stringForRDFNode; +import static org.apache.jena.sparql.util.graph.GraphUtils.*; + +import java.util.function.Predicate; + +import org.apache.jena.assembler.Assembler; +import org.apache.jena.assembler.Mode; +import org.apache.jena.assembler.assemblers.AssemblerBase; +import org.apache.jena.query.Dataset; +import org.apache.jena.rdf.model.Model; +import org.apache.jena.rdf.model.RDFNode; +import org.apache.jena.rdf.model.Resource; + +/** + * An {@link Assembler} that creates in-memory {@link Dataset}s. + * + */ +public class InMemDatasetAssembler extends AssemblerBase { + + @Override + public Dataset open(final Assembler assembler, final Resource root, final Mode mode) { + checkType(root, MemoryDataset); + final Dataset dataset = createTxnMem(); + setContext(root, dataset.getContext()); + // Default graph can be defined with ja:graph or ja:defaultGraph + final Resource defaultGraphDef = root.hasProperty(pDefaultGraph) ? getResourceValue(root, + pDefaultGraph) : root.hasProperty(pGraph) ? getResourceValue(root, pGraph) : null; + if (defaultGraphDef != null) dataset.setDefaultModel(retrieve(defaultGraphDef, assembler, mode)); + // or with ja:data + final Predicate<RDFNode> isResource = n -> { + if (n.isResource()) return true; + throw new DatasetAssemblerException(root, "Not a resource: " + stringForRDFNode(n)); + }; + multiValue(root, data).parallelStream().filter(isResource) + .forEach(defaultGraphNode -> read(dataset, defaultGraphNode.asResource().getURI())); + --- End diff -- .parallelStream() does not make sense here. I think it needs to be sequential and all in the same transaction. The memory dataset does not support parallel updates. At best, it'll just get queued on the lock. But then error messages are going to be random order.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---