This is an automated email from the ASF dual-hosted git repository. spmallette pushed a commit to branch TINKERPOP-2601 in repository https://gitbox.apache.org/repos/asf/tinkerpop.git
commit dfec9eaece5c768467b7375925020b268ed2e1f4 Author: Stephen Mallette <[email protected]> AuthorDate: Fri Aug 20 14:42:36 2021 -0400 TINKERPOP-2601 Added gherkin tests to Spark Refactored runner to better handle olap by not exiting the star graph when looking up edges for assertions/parameters --- .../tinkerpop/gremlin/features/StepDefinition.java | 6 ++- .../gremlin/hadoop/HadoopGraphFeatureTest.java | 8 ---- spark-gremlin/pom.xml | 12 +++++ .../gremlin/spark/SparkGraphFeatureTest.java | 52 ++++++++++++---------- .../src/test/resources/cucumber.properties | 1 + 5 files changed, 46 insertions(+), 33 deletions(-) diff --git a/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/features/StepDefinition.java b/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/features/StepDefinition.java index e97c264..e5095e3 100644 --- a/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/features/StepDefinition.java +++ b/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/features/StepDefinition.java @@ -38,6 +38,7 @@ import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSo import org.apache.tinkerpop.gremlin.structure.Direction; import org.apache.tinkerpop.gremlin.structure.Edge; import org.apache.tinkerpop.gremlin.structure.T; +import org.apache.tinkerpop.gremlin.structure.Vertex; import org.apache.tinkerpop.gremlin.util.iterator.IteratorUtils; import org.apache.tinkerpop.shaded.jackson.databind.JsonNode; import org.apache.tinkerpop.shaded.jackson.databind.ObjectMapper; @@ -410,7 +411,10 @@ public final class StepDefinition { private static Edge getEdge(final GraphTraversalSource g, final String e) { final Triplet<String,String,String> t = getEdgeTriplet(e); - return g.V().has("name", t.getValue0()).outE(t.getValue1()).where(inV().has("name", t.getValue2())).next(); + + // make this OLAP proof since you can't leave the star graph + return g.V().has("name", t.getValue0()).outE(t.getValue1()).toStream(). + filter(edge -> g.V(edge.inVertex().id()).has("name", t.getValue2()).hasNext()).findFirst().get(); } private static Object getEdgeId(final GraphTraversalSource g, final String e) { diff --git a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphFeatureTest.java b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphFeatureTest.java index e43b00b..8f8f6a4 100644 --- a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphFeatureTest.java +++ b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphFeatureTest.java @@ -96,7 +96,6 @@ public class HadoopGraphFeatureTest { private static final HadoopGraph crew = HadoopGraph.open(new MapConfiguration(getBaseConfiguration(GraphData.CREW))); private static final HadoopGraph sink = HadoopGraph.open(new MapConfiguration(getBaseConfiguration(GraphData.SINK))); private static final HadoopGraph grateful = HadoopGraph.open(new MapConfiguration(getBaseConfiguration(GraphData.GRATEFUL))); - private static final HadoopGraph empty = HadoopGraph.open(new MapConfiguration(getBaseConfiguration(null))); static { readIntoGraph(modern, GraphData.MODERN); @@ -130,13 +129,6 @@ public class HadoopGraphFeatureTest { filter(s -> s.getValue0().equals(scenario.getName())).findFirst(); if (skipped.isPresent()) throw new AssumptionViolatedException(skipped.get().getValue1()); - - cleanEmpty(); - } - - private void cleanEmpty() { - final GraphTraversalSource g = empty.traversal(); - g.V().drop().iterate(); } private static void readIntoGraph(final Graph graph, final GraphData graphData) { diff --git a/spark-gremlin/pom.xml b/spark-gremlin/pom.xml index 8eb7fba..089e99a 100644 --- a/spark-gremlin/pom.xml +++ b/spark-gremlin/pom.xml @@ -206,6 +206,18 @@ limitations under the License. </exclusions> </dependency> <dependency> + <groupId>com.google.inject</groupId> + <artifactId>guice</artifactId> + <version>4.2.3</version> + <scope>test</scope> + <exclusions> + <exclusion> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> <groupId>org.apache.tinkerpop</groupId> <artifactId>tinkergraph-gremlin</artifactId> <version>${project.version}</version> diff --git a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphFeatureTest.java b/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/SparkGraphFeatureTest.java similarity index 77% copy from hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphFeatureTest.java copy to spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/SparkGraphFeatureTest.java index e43b00b..9112448 100644 --- a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphFeatureTest.java +++ b/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/SparkGraphFeatureTest.java @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.tinkerpop.gremlin.hadoop; +package org.apache.tinkerpop.gremlin.spark; import com.google.inject.AbstractModule; import com.google.inject.Guice; @@ -29,15 +29,19 @@ import io.cucumber.java.Scenario; import io.cucumber.junit.Cucumber; import io.cucumber.junit.CucumberOptions; import org.apache.commons.configuration2.MapConfiguration; -import org.apache.commons.lang3.RandomStringUtils; -import org.apache.tinkerpop.gremlin.LoadGraphWith; +import org.apache.spark.launcher.SparkLauncher; +import org.apache.spark.serializer.KryoSerializer; import org.apache.tinkerpop.gremlin.TestHelper; import org.apache.tinkerpop.gremlin.features.TestFiles; import org.apache.tinkerpop.gremlin.features.World; +import org.apache.tinkerpop.gremlin.hadoop.Constants; import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph; import org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoInputFormat; import org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoOutputFormat; +import org.apache.tinkerpop.gremlin.process.computer.Computer; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource; +import org.apache.tinkerpop.gremlin.spark.process.computer.SparkGraphComputer; +import org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoRegistrator; import org.apache.tinkerpop.gremlin.structure.Graph; import org.javatuples.Pair; import org.junit.AssumptionViolatedException; @@ -45,7 +49,6 @@ import org.junit.runner.RunWith; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.File; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -61,13 +64,14 @@ import static org.apache.tinkerpop.gremlin.LoadGraphWith.GraphData; objectFactory = GuiceFactory.class, features = { "../gremlin-test/features" }, plugin = {"pretty", "junit:target/cucumber.xml"}) -public class HadoopGraphFeatureTest { - private static final Logger logger = LoggerFactory.getLogger(HadoopGraphFeatureTest.class); +public class SparkGraphFeatureTest { + private static final Logger logger = LoggerFactory.getLogger(SparkGraphFeatureTest.class); + private static final int AVAILABLE_PROCESSORS = Runtime.getRuntime().availableProcessors(); - private static final String skipReasonLength = "Hadoop-Gremlin is OLAP-oriented and for OLTP operations, linear-scan joins are required. This particular tests takes many minutes to execute."; + private static final String skipReasonLength = "Spark-Gremlin is OLAP-oriented and for OLTP operations, linear-scan joins are required. This particular tests takes many minutes to execute."; private static final List<Pair<String, String>> skip = new ArrayList<Pair<String,String>>() {{ - add(Pair.with("g_V_both_both_count", skipReasonLength)); + add(Pair.with("g_V_both_both_count", skipReasonLength)); add(Pair.with("g_V_repeatXoutX_timesX3X_count", skipReasonLength)); add(Pair.with("g_V_repeatXoutX_timesX8X_count", skipReasonLength)); add(Pair.with("g_V_repeatXoutX_timesX5X_asXaX_outXwrittenByX_asXbX_selectXa_bX_count", skipReasonLength)); @@ -85,18 +89,17 @@ public class HadoopGraphFeatureTest { public static final class ServiceModule extends AbstractModule { @Override protected void configure() { - bind(World.class).to(HadoopGraphWorld.class); + bind(World.class).to(SparkGraphWorld.class); } } - public static class HadoopGraphWorld implements World { + public static class SparkGraphWorld implements World { private static final HadoopGraph modern = HadoopGraph.open(new MapConfiguration(getBaseConfiguration(GraphData.MODERN))); private static final HadoopGraph classic = HadoopGraph.open(new MapConfiguration(getBaseConfiguration(GraphData.CLASSIC))); private static final HadoopGraph crew = HadoopGraph.open(new MapConfiguration(getBaseConfiguration(GraphData.CREW))); private static final HadoopGraph sink = HadoopGraph.open(new MapConfiguration(getBaseConfiguration(GraphData.SINK))); private static final HadoopGraph grateful = HadoopGraph.open(new MapConfiguration(getBaseConfiguration(GraphData.GRATEFUL))); - private static final HadoopGraph empty = HadoopGraph.open(new MapConfiguration(getBaseConfiguration(null))); static { readIntoGraph(modern, GraphData.MODERN); @@ -111,15 +114,15 @@ public class HadoopGraphFeatureTest { if (null == graphData) throw new AssumptionViolatedException("HadoopGraph does not support graph mutations"); else if (graphData == GraphData.CLASSIC) - return classic.traversal(); + return classic.traversal().withComputer(Computer.compute(SparkGraphComputer.class)); else if (graphData == GraphData.CREW) - return crew.traversal(); + return crew.traversal().withComputer(Computer.compute(SparkGraphComputer.class)); else if (graphData == GraphData.MODERN) - return modern.traversal(); + return modern.traversal().withComputer(Computer.compute(SparkGraphComputer.class)); else if (graphData == GraphData.SINK) - return sink.traversal(); + return sink.traversal().withComputer(Computer.compute(SparkGraphComputer.class)); else if (graphData == GraphData.GRATEFUL) - return grateful.traversal(); + return grateful.traversal().withComputer(Computer.compute(SparkGraphComputer.class)); else throw new UnsupportedOperationException("GraphData not supported: " + graphData.name()); } @@ -130,13 +133,6 @@ public class HadoopGraphFeatureTest { filter(s -> s.getValue0().equals(scenario.getName())).findFirst(); if (skipped.isPresent()) throw new AssumptionViolatedException(skipped.get().getValue1()); - - cleanEmpty(); - } - - private void cleanEmpty() { - final GraphTraversalSource g = empty.traversal(); - g.V().drop().iterate(); } private static void readIntoGraph(final Graph graph, final GraphData graphData) { @@ -144,7 +140,7 @@ public class HadoopGraphFeatureTest { } private static String getWorkingDirectory() { - return TestHelper.makeTestDataDirectory(HadoopGraphFeatureTest.class, "graph-provider-data"); + return TestHelper.makeTestDataDirectory(SparkGraphFeatureTest.class, "graph-provider-data"); } private static Map<String, Object> getBaseConfiguration(final GraphData graphData) { @@ -154,6 +150,14 @@ public class HadoopGraphFeatureTest { put(Constants.GREMLIN_HADOOP_GRAPH_WRITER, GryoOutputFormat.class.getCanonicalName()); put(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, getWorkingDirectory()); put(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, false); + + put(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true); // this makes the test suite go really fast + + put(Constants.GREMLIN_HADOOP_DEFAULT_GRAPH_COMPUTER, SparkGraphComputer.class.getCanonicalName()); + put(SparkLauncher.SPARK_MASTER, "local[" + AVAILABLE_PROCESSORS + "]"); + put(Constants.SPARK_SERIALIZER, KryoSerializer.class.getCanonicalName()); + put(Constants.SPARK_KRYO_REGISTRATOR, GryoRegistrator.class.getCanonicalName()); + put(Constants.SPARK_KRYO_REGISTRATION_REQUIRED, true); }}; } } diff --git a/spark-gremlin/src/test/resources/cucumber.properties b/spark-gremlin/src/test/resources/cucumber.properties new file mode 100644 index 0000000..0180176 --- /dev/null +++ b/spark-gremlin/src/test/resources/cucumber.properties @@ -0,0 +1 @@ +guice.injector-source=org.apache.tinkerpop.gremlin.spark.SparkGraphFeatureTest$WorldInjectorSource \ No newline at end of file
