[ https://issues.apache.org/jira/browse/TINKERPOP-962?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15135275#comment-15135275 ]
ASF GitHub Bot commented on TINKERPOP-962: ------------------------------------------ Github user twilmes commented on a diff in the pull request: https://github.com/apache/incubator-tinkerpop/pull/210#discussion_r52088164 --- Diff: gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/GraphFilter.java --- @@ -0,0 +1,193 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tinkerpop.gremlin.process.computer; + +import org.apache.tinkerpop.gremlin.process.traversal.Traversal; +import org.apache.tinkerpop.gremlin.process.traversal.step.filter.RangeGlobalStep; +import org.apache.tinkerpop.gremlin.process.traversal.step.map.VertexStep; +import org.apache.tinkerpop.gremlin.process.traversal.util.TraversalHelper; +import org.apache.tinkerpop.gremlin.process.traversal.util.TraversalUtil; +import org.apache.tinkerpop.gremlin.structure.Direction; +import org.apache.tinkerpop.gremlin.structure.Edge; +import org.apache.tinkerpop.gremlin.structure.Vertex; + +import java.io.Serializable; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; + +/** + * GraphFilter is used by {@link GraphComputer} implementations to prune the source graph data being loaded into the OLAP system. + * There are two types of filters: a {@link Vertex} filter and an {@link Edge} filter. + * The vertex filter is a {@link Traversal} that can only check the id, label, and properties of the vertex. + * The edge filter is a {@link Traversal} that starts at the vertex are emits all legal incident edges. + * If no vertex filter is provided, then no vertices are filtered. If no edge filter is provided, then no edges are filtered. + * The use of a GraphFilter can greatly reduce the amount of data processed by the {@link GraphComputer}. + * For instance, for {@code g.V().count()}, there is no reason to load edges, and thus, the edge filter can be {@code bothE().limit(0)}. + * + * @author Marko A. Rodriguez (http://markorodriguez.com) + */ +public final class GraphFilter implements Cloneable, Serializable { + + public enum Legal { + YES, NO, MAYBE; + + public boolean positive() { + return this != NO; + } + + public boolean negative() { + return this == NO; + } + } + + private Traversal.Admin<Vertex, Vertex> vertexFilter = null; + private Traversal.Admin<Vertex, Edge> edgeFilter = null; + + private boolean allowNoEdges = false; + private Direction allowedEdgeDirection = Direction.BOTH; + private Set<String> allowedEdgeLabels = new HashSet<>(); + //private boolean allowAllRemainingEdges = false; + + public void setVertexFilter(final Traversal<Vertex, Vertex> vertexFilter) { + if (!TraversalHelper.isLocalVertex(vertexFilter.asAdmin())) + throw GraphComputer.Exceptions.vertexFilterAccessesIncidentEdges(vertexFilter); + this.vertexFilter = vertexFilter.asAdmin().clone(); + } + + public void setEdgeFilter(final Traversal<Vertex, Edge> edgeFilter) { + if (!TraversalHelper.isLocalStarGraph(edgeFilter.asAdmin())) + throw GraphComputer.Exceptions.edgeFilterAccessesAdjacentVertices(edgeFilter); + this.edgeFilter = edgeFilter.asAdmin().clone(); + if (this.edgeFilter.getEndStep() instanceof RangeGlobalStep && 0 == ((RangeGlobalStep) this.edgeFilter.getEndStep()).getHighRange()) + this.allowNoEdges = true; + else if (this.edgeFilter.getStartStep() instanceof VertexStep) { + this.allowedEdgeLabels.clear(); + this.allowedEdgeLabels.addAll(Arrays.asList(((VertexStep) this.edgeFilter.getStartStep()).getEdgeLabels())); + this.allowedEdgeDirection = ((VertexStep) this.edgeFilter.getStartStep()).getDirection(); + //this.allowAllRemainingEdges = 1 == this.edgeFilter.getSteps().size(); + } + } + + /*public void compileFilters() { --- End diff -- Did you mean to comment this out and remove calls to it? Figure you meant to but I recognized it from a previous commit. > Provide "vertex query" selectivity when importing data in OLAP. > --------------------------------------------------------------- > > Key: TINKERPOP-962 > URL: https://issues.apache.org/jira/browse/TINKERPOP-962 > Project: TinkerPop > Issue Type: Improvement > Components: process > Affects Versions: 3.1.0-incubating > Reporter: Marko A. Rodriguez > Assignee: Marko A. Rodriguez > Labels: breaking > Fix For: 3.2.0-incubating > > > Currently, when you do: > {code} > graph.compute().program(PageRankVertexProgram).submit() > {code} > We are pulling the entire {{graph}} into the OLAP engine. We should allow the > user to limit the amount of data pulled via "vertex query"-type filter. For > instance, we could support the following two new methods on {{GraphComputer}}. > {code} > graph.compute().program(PageRankVertexProgram).vertices(hasLabel('person')).edges(out, > hasLabel('knows','friend').has('weight',gt(0.8)).submit() > {code} > The two methods would be defined as: > {code} > public interface GraphComputer { > ... > GraphComputer vertices(final Traversal<Vertex,Vertex> vertexFilter) > GraphComputer edges(final Direction direction, final Traversal<Edge,Edge> > edgeFilter) > {code} > If the user does NOT provide a {{vertices()}} (or {{edges()}}) call, then the > {{Traversal}} is assumed to be {{IdentityTraversal}}. Finally, in terms of > execution order, first {{vertices()}} is called and if "false" then don't > call edges. Else, call edges on all the respective incoming and outgoing > edges. Don't really like {{Direction}} there and perhaps its just: > {code} > GraphComputer edges(final Traversal<Vertex,Edge> edgeFilter) > {code} > And then all edges that pass through are added to OLAP vertex. You don't want > {{both}}? Then its {{outE('knows',friend').has('weight',gt(0.8))}}. -- This message was sent by Atlassian JIRA (v6.3.4#6332)