[ https://issues.apache.org/jira/browse/RYA-303?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16120003#comment-16120003 ]
ASF GitHub Bot commented on RYA-303: ------------------------------------ Github user meiercaleb commented on a diff in the pull request: https://github.com/apache/incubator-rya/pull/172#discussion_r132192625 --- Diff: extras/indexing/src/main/java/org/apache/rya/indexing/mongodb/pcj/PcjQueryNode.java --- @@ -0,0 +1,184 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.indexing.mongodb.pcj; + +import static com.google.common.base.Preconditions.checkNotNull; + +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; + +import org.apache.accumulo.core.client.AccumuloException; +import org.apache.accumulo.core.client.AccumuloSecurityException; +import org.apache.accumulo.core.client.TableNotFoundException; +import org.apache.accumulo.core.security.Authorizations; +import org.apache.hadoop.conf.Configuration; +import org.apache.rya.api.utils.IteratorWrapper; +import org.apache.rya.indexing.external.tupleSet.ExternalTupleSet; +import org.apache.rya.indexing.external.tupleSet.ParsedQueryUtil; +import org.apache.rya.indexing.pcj.matching.PCJOptimizerUtilities; +import org.apache.rya.indexing.pcj.storage.PcjException; +import org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage.CloseableIterator; +import org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage.PCJStorageException; +import org.apache.rya.indexing.pcj.storage.mongo.MongoPcjDocuments; +import org.apache.rya.rdftriplestore.evaluation.ExternalBatchingIterator; +import org.openrdf.query.BindingSet; +import org.openrdf.query.MalformedQueryException; +import org.openrdf.query.QueryEvaluationException; +import org.openrdf.query.algebra.Projection; +import org.openrdf.query.algebra.TupleExpr; +import org.openrdf.query.parser.ParsedTupleQuery; +import org.openrdf.query.parser.sparql.SPARQLParser; +import org.openrdf.sail.SailException; + +import com.google.common.base.Optional; +import com.google.common.base.Preconditions; + +import edu.umd.cs.findbugs.annotations.DefaultAnnotation; +import edu.umd.cs.findbugs.annotations.NonNull; +import info.aduna.iteration.CloseableIteration; + +/** + * Indexing Node for PCJs expressions to be inserted into execution plan to + * delegate entity portion of query to {@link MongoPrecomputedJoinIndexer}. + */ +@DefaultAnnotation(NonNull.class) +public class PcjQueryNode extends ExternalTupleSet implements ExternalBatchingIterator { + private final String tablename; + private final PcjIndexer indexer; + private final MongoPcjDocuments pcjDocs; + + /** + * + * @param sparql - name of sparql query whose results will be stored in PCJ table + * @param conf - Rya Configuration + * @param tablename - name of an existing PCJ table + * @throws MalformedQueryException + * @throws SailException + * @throws QueryEvaluationException + * @throws TableNotFoundException + * @throws AccumuloSecurityException + * @throws AccumuloException + * @throws PCJStorageException + */ + public PcjQueryNode(final String sparql, final String tablename, final MongoPcjDocuments pcjDocs) + throws MalformedQueryException, SailException, QueryEvaluationException, TableNotFoundException, + AccumuloException, AccumuloSecurityException, PCJStorageException { + this.pcjDocs = checkNotNull(pcjDocs); + indexer = new MongoPrecomputedJoinIndexer(); + this.tablename = tablename; + final SPARQLParser sp = new SPARQLParser(); + final ParsedTupleQuery pq = (ParsedTupleQuery) sp.parseQuery(sparql, null); + final TupleExpr te = pq.getTupleExpr(); + Preconditions.checkArgument(PCJOptimizerUtilities.isPCJValid(te), "TupleExpr is an invalid PCJ."); + + final Optional<Projection> projection = new ParsedQueryUtil().findProjection(pq); + if (!projection.isPresent()) { + throw new MalformedQueryException("SPARQL query '" + sparql + "' does not contain a Projection."); + } + setProjectionExpr(projection.get()); + } + + /** + * + * @param accCon + * - connection to a valid Accumulo instance + * @param tablename + * - name of an existing PCJ table + * @throws PCJStorageException + * @throws MalformedQueryException + * @throws SailException + * @throws QueryEvaluationException + * @throws TableNotFoundException + * @throws AccumuloSecurityException + * @throws AccumuloException + */ + public PcjQueryNode(final Configuration conf, final String tablename) + throws PCJStorageException, MalformedQueryException { + indexer = new MongoPrecomputedJoinIndexer(); + pcjDocs = indexer.getPcjStorage(conf); + this.tablename = tablename; + } + + /** + * returns size of table for query planning + */ + @Override + public double cardinality() { + double cardinality = 0; + try { + cardinality = pcjDocs.getPcjMetadata(tablename).getCardinality(); + } catch (final PcjException e) { + e.printStackTrace(); + } + return cardinality; + } + + @Override + public CloseableIteration<BindingSet, QueryEvaluationException> evaluate(final BindingSet bindingset) + throws QueryEvaluationException { + return this.evaluate(Collections.singleton(bindingset)); + } + + /** + * Core evaluation method used during query evaluation - given a collection + * of binding set constraints, this method finds common binding labels + * between the constraints and table, uses those to build a prefix scan of + * the Accumulo table, and creates a solution binding set by iterating of + * the scan results. + * + * @param bindingset + * - collection of {@link BindingSet}s to be joined with PCJ + * @return - CloseableIteration over joined results + */ + @Override + public CloseableIteration<BindingSet, QueryEvaluationException> evaluate(final Collection<BindingSet> bindingset) + throws QueryEvaluationException { + + if (bindingset.isEmpty()) { + return new IteratorWrapper<BindingSet, QueryEvaluationException>(new HashSet<BindingSet>().iterator()); + } + final CloseableIterator<BindingSet> iter = pcjDocs.getResults(tablename, new Authorizations(), bindingset); + return new CloseableIteration<BindingSet, QueryEvaluationException>() { + @Override + public boolean hasNext() throws QueryEvaluationException { --- End diff -- You need to take more care with this iterator. I notice that your getResults method for pcjDocs takes in the BindingSet as a constraint. That's good. You are taking into account the common Bindings between the constraint BindingSet and the docs in PcjDocStorage. However, you are not joining the non-overlapping Bindings of the constraint BindingSets with the BindingSets returned by PcjDocumentStorage. For example, if you have the BindingSet [a = <uri:Joe>, b = <uri:Bob>], and all BindingSets stored in PcjDocStorage have the form [a=..., c=...], you want to return BindingSets of the form [a = <uri:Joe>, b = <uri:Bob>, c =...]. That is, you need to include the b and c Bindings. Where is this happening? > Mongo PCJ indexer support > ------------------------- > > Key: RYA-303 > URL: https://issues.apache.org/jira/browse/RYA-303 > Project: Rya > Issue Type: Improvement > Reporter: Andrew Smith > Assignee: Andrew Smith > -- This message was sent by Atlassian JIRA (v6.4.14#64029)