Github user meiercaleb commented on a diff in the pull request:

    https://github.com/apache/incubator-rya/pull/172#discussion_r132192625
  
    --- Diff: 
extras/indexing/src/main/java/org/apache/rya/indexing/mongodb/pcj/PcjQueryNode.java
 ---
    @@ -0,0 +1,184 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing,
    + * software distributed under the License is distributed on an
    + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    + * KIND, either express or implied.  See the License for the
    + * specific language governing permissions and limitations
    + * under the License.
    + */
    +package org.apache.rya.indexing.mongodb.pcj;
    +
    +import static com.google.common.base.Preconditions.checkNotNull;
    +
    +import java.util.Collection;
    +import java.util.Collections;
    +import java.util.HashSet;
    +
    +import org.apache.accumulo.core.client.AccumuloException;
    +import org.apache.accumulo.core.client.AccumuloSecurityException;
    +import org.apache.accumulo.core.client.TableNotFoundException;
    +import org.apache.accumulo.core.security.Authorizations;
    +import org.apache.hadoop.conf.Configuration;
    +import org.apache.rya.api.utils.IteratorWrapper;
    +import org.apache.rya.indexing.external.tupleSet.ExternalTupleSet;
    +import org.apache.rya.indexing.external.tupleSet.ParsedQueryUtil;
    +import org.apache.rya.indexing.pcj.matching.PCJOptimizerUtilities;
    +import org.apache.rya.indexing.pcj.storage.PcjException;
    +import 
org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage.CloseableIterator;
    +import 
org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage.PCJStorageException;
    +import org.apache.rya.indexing.pcj.storage.mongo.MongoPcjDocuments;
    +import org.apache.rya.rdftriplestore.evaluation.ExternalBatchingIterator;
    +import org.openrdf.query.BindingSet;
    +import org.openrdf.query.MalformedQueryException;
    +import org.openrdf.query.QueryEvaluationException;
    +import org.openrdf.query.algebra.Projection;
    +import org.openrdf.query.algebra.TupleExpr;
    +import org.openrdf.query.parser.ParsedTupleQuery;
    +import org.openrdf.query.parser.sparql.SPARQLParser;
    +import org.openrdf.sail.SailException;
    +
    +import com.google.common.base.Optional;
    +import com.google.common.base.Preconditions;
    +
    +import edu.umd.cs.findbugs.annotations.DefaultAnnotation;
    +import edu.umd.cs.findbugs.annotations.NonNull;
    +import info.aduna.iteration.CloseableIteration;
    +
    +/**
    + * Indexing Node for PCJs expressions to be inserted into execution plan to
    + * delegate entity portion of query to {@link MongoPrecomputedJoinIndexer}.
    + */
    +@DefaultAnnotation(NonNull.class)
    +public class PcjQueryNode extends ExternalTupleSet implements 
ExternalBatchingIterator {
    +    private final String tablename;
    +    private final PcjIndexer indexer;
    +    private final MongoPcjDocuments pcjDocs;
    +
    +    /**
    +     *
    +     * @param sparql - name of sparql query whose results will be stored 
in PCJ table
    +     * @param conf - Rya Configuration
    +     * @param tablename - name of an existing PCJ table
    +     * @throws MalformedQueryException
    +     * @throws SailException
    +     * @throws QueryEvaluationException
    +     * @throws TableNotFoundException
    +     * @throws AccumuloSecurityException
    +     * @throws AccumuloException
    +     * @throws PCJStorageException
    +     */
    +    public PcjQueryNode(final String sparql, final String tablename, final 
MongoPcjDocuments pcjDocs)
    +            throws MalformedQueryException, SailException, 
QueryEvaluationException, TableNotFoundException,
    +            AccumuloException, AccumuloSecurityException, 
PCJStorageException {
    +        this.pcjDocs = checkNotNull(pcjDocs);
    +        indexer = new MongoPrecomputedJoinIndexer();
    +        this.tablename = tablename;
    +        final SPARQLParser sp = new SPARQLParser();
    +        final ParsedTupleQuery pq = (ParsedTupleQuery) 
sp.parseQuery(sparql, null);
    +        final TupleExpr te = pq.getTupleExpr();
    +        Preconditions.checkArgument(PCJOptimizerUtilities.isPCJValid(te), 
"TupleExpr is an invalid PCJ.");
    +
    +        final Optional<Projection> projection = new 
ParsedQueryUtil().findProjection(pq);
    +        if (!projection.isPresent()) {
    +            throw new MalformedQueryException("SPARQL query '" + sparql + 
"' does not contain a Projection.");
    +        }
    +        setProjectionExpr(projection.get());
    +    }
    +
    +    /**
    +     *
    +     * @param accCon
    +     *            - connection to a valid Accumulo instance
    +     * @param tablename
    +     *            - name of an existing PCJ table
    +     * @throws PCJStorageException
    +     * @throws MalformedQueryException
    +     * @throws SailException
    +     * @throws QueryEvaluationException
    +     * @throws TableNotFoundException
    +     * @throws AccumuloSecurityException
    +     * @throws AccumuloException
    +     */
    +    public PcjQueryNode(final Configuration conf, final String tablename)
    +            throws PCJStorageException, MalformedQueryException {
    +        indexer = new MongoPrecomputedJoinIndexer();
    +        pcjDocs = indexer.getPcjStorage(conf);
    +        this.tablename = tablename;
    +    }
    +
    +    /**
    +     * returns size of table for query planning
    +     */
    +    @Override
    +    public double cardinality() {
    +        double cardinality = 0;
    +        try {
    +            cardinality = 
pcjDocs.getPcjMetadata(tablename).getCardinality();
    +        } catch (final PcjException e) {
    +            e.printStackTrace();
    +        }
    +        return cardinality;
    +    }
    +
    +    @Override
    +    public CloseableIteration<BindingSet, QueryEvaluationException> 
evaluate(final BindingSet bindingset)
    +            throws QueryEvaluationException {
    +        return this.evaluate(Collections.singleton(bindingset));
    +    }
    +
    +    /**
    +     * Core evaluation method used during query evaluation - given a 
collection
    +     * of binding set constraints, this method finds common binding labels
    +     * between the constraints and table, uses those to build a prefix 
scan of
    +     * the Accumulo table, and creates a solution binding set by iterating 
of
    +     * the scan results.
    +     *
    +     * @param bindingset
    +     *            - collection of {@link BindingSet}s to be joined with PCJ
    +     * @return - CloseableIteration over joined results
    +     */
    +    @Override
    +    public CloseableIteration<BindingSet, QueryEvaluationException> 
evaluate(final Collection<BindingSet> bindingset)
    +            throws QueryEvaluationException {
    +
    +        if (bindingset.isEmpty()) {
    +            return new IteratorWrapper<BindingSet, 
QueryEvaluationException>(new HashSet<BindingSet>().iterator());
    +        }
    +        final CloseableIterator<BindingSet> iter = 
pcjDocs.getResults(tablename, new Authorizations(), bindingset);
    +        return new CloseableIteration<BindingSet, 
QueryEvaluationException>() {
    +            @Override
    +            public boolean hasNext() throws QueryEvaluationException {
    --- End diff --
    
    You need to take more care with this iterator.  I notice that your 
getResults method for pcjDocs takes in the BindingSet as a constraint.  That's 
good.  You are taking into account the common Bindings between the constraint 
BindingSet and the docs in PcjDocStorage.  However, you are not joining the 
non-overlapping Bindings of the constraint BindingSets with the BindingSets 
returned by PcjDocumentStorage.  For example, if you have the BindingSet [a = 
<uri:Joe>, b = <uri:Bob>], and all BindingSets stored in PcjDocStorage have the 
form [a=..., c=...], you want to return BindingSets of the form [a = <uri:Joe>, 
b = <uri:Bob>, c =...].  That is, you need to include the b and c Bindings.  
Where is this happening? 


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

Reply via email to