Repository: tinkerpop Updated Branches: refs/heads/TINKERPOP-1389 a0ce3178e -> 532ed59c2
UnshadedKryoShimService is now the default for SparkGraphComputer. Users no longer have to System.setProperty() as its all handled in the SparkGraphComputer constructor. I would really like to get rid of the 'service model' (cc @dalaro) as UnshadedKryoShimService only works with Spark and thus, we can't have it leak over to Giraph (and other Hadoop-based graph computeters). Its leak-free right now, but in general, it would be good to not even have this as a priority/service thing. Trying to think how to do this.... Project: http://git-wip-us.apache.org/repos/asf/tinkerpop/repo Commit: http://git-wip-us.apache.org/repos/asf/tinkerpop/commit/532ed59c Tree: http://git-wip-us.apache.org/repos/asf/tinkerpop/tree/532ed59c Diff: http://git-wip-us.apache.org/repos/asf/tinkerpop/diff/532ed59c Branch: refs/heads/TINKERPOP-1389 Commit: 532ed59c2ed05d48e9a3eeed67105fb4b9ce32fa Parents: a0ce317 Author: Marko A. Rodriguez <[email protected]> Authored: Tue Oct 25 08:02:07 2016 -0600 Committer: Marko A. Rodriguez <[email protected]> Committed: Tue Oct 25 08:02:07 2016 -0600 ---------------------------------------------------------------------- CHANGELOG.asciidoc | 1 + docs/src/upgrade/release-3.3.x.asciidoc | 21 ++++++++ .../process/computer/GiraphGraphComputer.java | 3 ++ hadoop-gremlin/conf/hadoop-graphson.properties | 2 + .../conf/hadoop-grateful-gryo.properties | 25 +++++----- hadoop-gremlin/conf/hadoop-gryo.properties | 6 ++- hadoop-gremlin/conf/hadoop-script.properties | 5 +- .../tinkerpop/gremlin/hadoop/Constants.java | 2 + .../process/computer/SparkGraphComputer.java | 11 +++++ .../unshaded/UnshadedKryoShimService.java | 22 +++++---- ...tratorGraphComputerProcessIntegrateTest.java | 33 ------------- ...alizerGraphComputerProcessIntegrateTest.java | 33 +++++++++++++ ...SparkHadoopGraphGryoRegistratorProvider.java | 52 -------------------- .../SparkHadoopGraphGryoSerializerProvider.java | 45 +++++++++++++++++ .../computer/SparkHadoopGraphProvider.java | 17 +++++-- 15 files changed, 162 insertions(+), 116 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/532ed59c/CHANGELOG.asciidoc ---------------------------------------------------------------------- diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 11d747e..b2db5c1 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -27,6 +27,7 @@ TinkerPop 3.3.0 (Release Date: NOT OFFICIALLY RELEASED YET) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * Bumped to support Spark 2.0.0. +* Added `UnshadedKryoShimService` as the new default serializer model for `SparkGraphComputer`. * Replaced term `REST` with `HTTP` to remove any confusion as to the design of the API. * Moved `gremlin-benchmark` under `gremlin-tools` module. * Added `gremlin-tools` and its submodule `gremlin-coverage`. http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/532ed59c/docs/src/upgrade/release-3.3.x.asciidoc ---------------------------------------------------------------------- diff --git a/docs/src/upgrade/release-3.3.x.asciidoc b/docs/src/upgrade/release-3.3.x.asciidoc index a026771..d1a19d9 100644 --- a/docs/src/upgrade/release-3.3.x.asciidoc +++ b/docs/src/upgrade/release-3.3.x.asciidoc @@ -31,3 +31,24 @@ Please see the link:https://github.com/apache/tinkerpop/blob/3.3.3/CHANGELOG.asc Upgrading for Users ~~~~~~~~~~~~~~~~~~~ + +SparkGraphComputer GryoRegistrator +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Historically, `SparkGraphComputer` has used `GryoSerializer` to handle the serialization of objects in Spark. The reason +this exists is because TinkerPop uses a shaded version of Kryo and thus, couldn't use the standard `KryoSerializer`-model +provided by Spark. However, a "shim model" was created which allows for the shaded and unshaded versions of Kryo to +interact with one another. To this end, `KryoSerializer` can now be used with a `GryoRegistrator`. The properties file +for a `SparkGraphComputer` now looks as follows: + +``` +spark.serializer=org.apache.spark.serializer.KryoSerializer +spark.kryo.registrator=org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoRegistrator +``` + +If the old `GryoSerializer` model is desired, then the properties file should simply look as before: + +``` +spark.serializer=org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoSerializer +``` + http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/532ed59c/giraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/giraph/process/computer/GiraphGraphComputer.java ---------------------------------------------------------------------- diff --git a/giraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/giraph/process/computer/GiraphGraphComputer.java b/giraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/giraph/process/computer/GiraphGraphComputer.java index b06b40a..6ffd5ea 100644 --- a/giraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/giraph/process/computer/GiraphGraphComputer.java +++ b/giraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/giraph/process/computer/GiraphGraphComputer.java @@ -44,6 +44,7 @@ import org.apache.tinkerpop.gremlin.hadoop.process.computer.util.MapReduceHelper import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph; import org.apache.tinkerpop.gremlin.hadoop.structure.io.FileSystemStorage; import org.apache.tinkerpop.gremlin.hadoop.structure.io.GraphFilterAware; +import org.apache.tinkerpop.gremlin.hadoop.structure.io.HadoopPoolShimService; import org.apache.tinkerpop.gremlin.hadoop.structure.io.InputOutputHelper; import org.apache.tinkerpop.gremlin.hadoop.structure.io.ObjectWritable; import org.apache.tinkerpop.gremlin.hadoop.structure.io.ObjectWritableIterator; @@ -57,6 +58,7 @@ import org.apache.tinkerpop.gremlin.process.computer.VertexProgram; import org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult; import org.apache.tinkerpop.gremlin.process.computer.util.MapMemory; import org.apache.tinkerpop.gremlin.structure.io.Storage; +import org.apache.tinkerpop.gremlin.structure.io.gryo.kryoshim.KryoShimServiceLoader; import org.apache.tinkerpop.gremlin.util.Gremlin; import org.apache.tinkerpop.gremlin.util.iterator.IteratorUtils; @@ -82,6 +84,7 @@ public final class GiraphGraphComputer extends AbstractHadoopGraphComputer imple public GiraphGraphComputer(final HadoopGraph hadoopGraph) { super(hadoopGraph); + System.setProperty(KryoShimServiceLoader.KRYO_SHIM_SERVICE, HadoopPoolShimService.class.getCanonicalName()); // HadoopPools only with Giraph final Configuration configuration = hadoopGraph.configuration(); configuration.getKeys().forEachRemaining(key -> this.giraphConfiguration.set(key, configuration.getProperty(key).toString())); this.giraphConfiguration.setMasterComputeClass(GiraphMemory.class); http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/532ed59c/hadoop-gremlin/conf/hadoop-graphson.properties ---------------------------------------------------------------------- diff --git a/hadoop-gremlin/conf/hadoop-graphson.properties b/hadoop-gremlin/conf/hadoop-graphson.properties index 10025de..c37cf28 100644 --- a/hadoop-gremlin/conf/hadoop-graphson.properties +++ b/hadoop-gremlin/conf/hadoop-graphson.properties @@ -30,6 +30,8 @@ gremlin.hadoop.jarsInDistributedCache=true # SparkGraphComputer Configuration # #################################### spark.master=local[4] +spark.serializer=org.apache.spark.serializer.KryoSerializer +spark.kryo.registrator=org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoRegistrator ##################################### # GiraphGraphComputer Configuration # http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/532ed59c/hadoop-gremlin/conf/hadoop-grateful-gryo.properties ---------------------------------------------------------------------- diff --git a/hadoop-gremlin/conf/hadoop-grateful-gryo.properties b/hadoop-gremlin/conf/hadoop-grateful-gryo.properties index 17aeadd..92ed942 100644 --- a/hadoop-gremlin/conf/hadoop-grateful-gryo.properties +++ b/hadoop-gremlin/conf/hadoop-grateful-gryo.properties @@ -15,9 +15,6 @@ # specific language governing permissions and limitations # under the License. -# -# Hadoop Graph Configuration -# gremlin.graph=org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph gremlin.hadoop.graphReader=org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoInputFormat gremlin.hadoop.graphWriter=org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoOutputFormat @@ -25,9 +22,17 @@ gremlin.hadoop.inputLocation=grateful-dead.kryo gremlin.hadoop.outputLocation=output gremlin.hadoop.jarsInDistributedCache=true -# -# GiraphGraphComputer Configuration -# +#################################### +# SparkGraphComputer Configuration # +#################################### +spark.master=local[1] +spark.executor.memory=1g +spark.serializer=org.apache.spark.serializer.KryoSerializer +spark.kryo.registrator=org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoRegistrator + +##################################### +# GiraphGraphComputer Configuration # +##################################### giraph.minWorkers=1 giraph.maxWorkers=1 giraph.useOutOfCoreGraph=true @@ -37,11 +42,3 @@ mapred.reduce.child.java.opts=-Xmx1024m giraph.numInputThreads=4 giraph.numComputeThreads=4 giraph.maxMessagesInMemory=100000 - -# -# SparkGraphComputer Configuration -# -spark.master=local[1] -spark.executor.memory=1g -spark.serializer=org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoSerializer - http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/532ed59c/hadoop-gremlin/conf/hadoop-gryo.properties ---------------------------------------------------------------------- diff --git a/hadoop-gremlin/conf/hadoop-gryo.properties b/hadoop-gremlin/conf/hadoop-gryo.properties index aaab24d..c156a98 100644 --- a/hadoop-gremlin/conf/hadoop-gryo.properties +++ b/hadoop-gremlin/conf/hadoop-gryo.properties @@ -14,6 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. + gremlin.graph=org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph gremlin.hadoop.graphReader=org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoInputFormat gremlin.hadoop.graphWriter=org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoOutputFormat @@ -28,7 +29,9 @@ gremlin.hadoop.outputLocation=output #################################### spark.master=local[4] spark.executor.memory=1g -spark.serializer=org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoSerializer +spark.serializer=org.apache.spark.serializer.KryoSerializer +spark.kryo.registrator=org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoRegistrator +# spark.serializer=org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoSerializer (3.2.x model) # gremlin.spark.graphStorageLevel=MEMORY_AND_DISK # gremlin.spark.persistContext=true # gremlin.spark.graphWriter=org.apache.tinkerpop.gremlin.spark.structure.io.PersistedOutputRDD @@ -39,7 +42,6 @@ spark.serializer=org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoSerial # spark.eventLog.dir=/tmp/spark-event-logs # spark.ui.killEnabled=true - ##################################### # GiraphGraphComputer Configuration # ##################################### http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/532ed59c/hadoop-gremlin/conf/hadoop-script.properties ---------------------------------------------------------------------- diff --git a/hadoop-gremlin/conf/hadoop-script.properties b/hadoop-gremlin/conf/hadoop-script.properties index 0c27dd7..88bf6d9 100644 --- a/hadoop-gremlin/conf/hadoop-script.properties +++ b/hadoop-gremlin/conf/hadoop-script.properties @@ -14,6 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. + gremlin.graph=org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph gremlin.hadoop.graphReader=org.apache.tinkerpop.gremlin.hadoop.structure.io.script.ScriptInputFormat gremlin.hadoop.graphWriter=org.apache.tinkerpop.gremlin.hadoop.structure.io.graphson.GraphSONOutputFormat @@ -28,7 +29,9 @@ gremlin.hadoop.outputLocation=output #################################### spark.master=local[4] spark.executor.memory=1g -spark.serializer=org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoSerializer +spark.serializer=org.apache.spark.serializer.KryoSerializer +spark.kryo.registrator=org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoRegistrator +# spark.serializer=org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoSerializer (3.2.x model) # spark.kryo.registrationRequired=true # spark.storage.memoryFraction=0.2 # spark.eventLog.enabled=true http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/532ed59c/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java ---------------------------------------------------------------------- diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java index 9c79b53..3ff8e2a 100644 --- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java +++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java @@ -61,6 +61,8 @@ public final class Constants { public static final String GREMLIN_SPARK_SKIP_PARTITIONER = "gremlin.spark.skipPartitioner"; // don't partition the loadedGraphRDD public static final String GREMLIN_SPARK_SKIP_GRAPH_CACHE = "gremlin.spark.skipGraphCache"; // don't cache the loadedGraphRDD (ignores graphStorageLevel) public static final String SPARK_SERIALIZER = "spark.serializer"; + public static final String SPARK_KRYO_REGISTRATOR = "spark.kryo.registrator"; + public static final String SPARK_KRYO_REGISTRATION_REQUIRED = "spark.kryo.registrationRequired"; public static String getGraphLocation(final String location) { return location.endsWith("/") ? location + Constants.HIDDEN_G : location + "/" + Constants.HIDDEN_G; http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/532ed59c/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java ---------------------------------------------------------------------- diff --git a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java index 36618a1..c7d0cfb 100644 --- a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java +++ b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java @@ -35,6 +35,7 @@ import org.apache.spark.SparkContext; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.launcher.SparkLauncher; +import org.apache.spark.serializer.KryoSerializer; import org.apache.spark.storage.StorageLevel; import org.apache.tinkerpop.gremlin.hadoop.Constants; import org.apache.tinkerpop.gremlin.hadoop.process.computer.AbstractHadoopGraphComputer; @@ -43,6 +44,7 @@ import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopConfiguration; import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph; import org.apache.tinkerpop.gremlin.hadoop.structure.io.FileSystemStorage; import org.apache.tinkerpop.gremlin.hadoop.structure.io.GraphFilterAware; +import org.apache.tinkerpop.gremlin.hadoop.structure.io.HadoopPoolShimService; import org.apache.tinkerpop.gremlin.hadoop.structure.io.VertexWritable; import org.apache.tinkerpop.gremlin.hadoop.structure.util.ConfUtil; import org.apache.tinkerpop.gremlin.process.computer.ComputerResult; @@ -67,7 +69,9 @@ import org.apache.tinkerpop.gremlin.spark.structure.io.OutputRDD; import org.apache.tinkerpop.gremlin.spark.structure.io.PersistedInputRDD; import org.apache.tinkerpop.gremlin.spark.structure.io.PersistedOutputRDD; import org.apache.tinkerpop.gremlin.spark.structure.io.SparkContextStorage; +import org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoRegistrator; import org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoSerializer; +import org.apache.tinkerpop.gremlin.spark.structure.io.gryo.kryoshim.unshaded.UnshadedKryoShimService; import org.apache.tinkerpop.gremlin.structure.Direction; import org.apache.tinkerpop.gremlin.structure.io.Storage; import org.apache.tinkerpop.gremlin.structure.io.gryo.kryoshim.KryoShimServiceLoader; @@ -106,6 +110,13 @@ public final class SparkGraphComputer extends AbstractHadoopGraphComputer { super(hadoopGraph); this.sparkConfiguration = new HadoopConfiguration(); ConfigurationUtils.copy(this.hadoopGraph.configuration(), this.sparkConfiguration); + if (KryoSerializer.class.getCanonicalName().equals(this.sparkConfiguration.getString(Constants.SPARK_SERIALIZER, null)) && + GryoRegistrator.class.getCanonicalName().equals(this.sparkConfiguration.getString(Constants.SPARK_KRYO_REGISTRATOR, null))) { + System.setProperty(KryoShimServiceLoader.KRYO_SHIM_SERVICE, UnshadedKryoShimService.class.getCanonicalName()); + } else if (GryoSerializer.class.getCanonicalName().equals(this.sparkConfiguration.getString(Constants.SPARK_SERIALIZER, null)) && + !this.sparkConfiguration.containsKey(Constants.SPARK_KRYO_REGISTRATOR)) { + System.setProperty(KryoShimServiceLoader.KRYO_SHIM_SERVICE, HadoopPoolShimService.class.getCanonicalName()); + } if (null != System.getProperty(KryoShimServiceLoader.KRYO_SHIM_SERVICE, null)) { final String shimService = System.getProperty(KryoShimServiceLoader.KRYO_SHIM_SERVICE); this.sparkConfiguration.setProperty(SparkLauncher.EXECUTOR_EXTRA_JAVA_OPTIONS, http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/532ed59c/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/structure/io/gryo/kryoshim/unshaded/UnshadedKryoShimService.java ---------------------------------------------------------------------- diff --git a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/structure/io/gryo/kryoshim/unshaded/UnshadedKryoShimService.java b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/structure/io/gryo/kryoshim/unshaded/UnshadedKryoShimService.java index 41e0001..0789d6a 100644 --- a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/structure/io/gryo/kryoshim/unshaded/UnshadedKryoShimService.java +++ b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/structure/io/gryo/kryoshim/unshaded/UnshadedKryoShimService.java @@ -30,6 +30,7 @@ import com.esotericsoftware.kryo.io.Output; import org.apache.commons.configuration.BaseConfiguration; import org.apache.commons.configuration.Configuration; import org.apache.spark.SparkConf; +import org.apache.tinkerpop.gremlin.hadoop.Constants; import org.apache.tinkerpop.gremlin.spark.structure.io.gryo.IoRegistryAwareKryoSerializer; import org.apache.tinkerpop.gremlin.structure.io.gryo.GryoPool; import org.apache.tinkerpop.gremlin.structure.io.gryo.kryoshim.KryoShimService; @@ -48,7 +49,8 @@ public class UnshadedKryoShimService implements KryoShimService { private static volatile boolean initialized; - public UnshadedKryoShimService() { } + public UnshadedKryoShimService() { + } @Override public Object readClassAndObject(final InputStream source) { @@ -123,23 +125,23 @@ public class UnshadedKryoShimService implements KryoShimService { sparkConf.set(GryoPool.CONFIG_IO_REGISTRY, regStr); } // Setting spark.serializer here almost certainly isn't necessary, but it doesn't hurt - sparkConf.set("spark.serializer", IoRegistryAwareKryoSerializer.class.getCanonicalName()); + sparkConf.set(Constants.SPARK_SERIALIZER, IoRegistryAwareKryoSerializer.class.getCanonicalName()); - final String registrator = conf.getString("spark.kryo.registrator"); + final String registrator = conf.getString(Constants.SPARK_KRYO_REGISTRATOR); if (null != registrator) { - sparkConf.set("spark.kryo.registrator", registrator); - log.info("Copied spark.kryo.registrator: {}", registrator); + sparkConf.set(Constants.SPARK_KRYO_REGISTRATOR, registrator); + log.info("Copied " + Constants.SPARK_KRYO_REGISTRATOR + ": {}", registrator); } else { - log.info("Not copying spark.kryo.registrator"); + log.info("Not copying " + Constants.SPARK_KRYO_REGISTRATOR); } - // Reuse Gryo poolsize for Kryo poolsize (no need to copy this to SparkConf) - final int poolSize = conf.getInt(GryoPool.CONFIG_IO_GRYO_POOL_SIZE, - GryoPool.CONFIG_IO_GRYO_POOL_SIZE_DEFAULT); // Instantiate the spark.serializer final IoRegistryAwareKryoSerializer ioReg = new IoRegistryAwareKryoSerializer(sparkConf); - // Setup a pool backed by our spark.serializer instance + // Setup a pool backed by our spark.serializer instance + // Reuse Gryo poolsize for Kryo poolsize (no need to copy this to SparkConf) + final int poolSize = conf.getInt(GryoPool.CONFIG_IO_GRYO_POOL_SIZE, + GryoPool.CONFIG_IO_GRYO_POOL_SIZE_DEFAULT); for (int i = 0; i < poolSize; i++) { KRYOS.add(ioReg.newKryo()); } http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/532ed59c/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGryoRegistratorGraphComputerProcessIntegrateTest.java ---------------------------------------------------------------------- diff --git a/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGryoRegistratorGraphComputerProcessIntegrateTest.java b/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGryoRegistratorGraphComputerProcessIntegrateTest.java deleted file mode 100644 index 29f627d..0000000 --- a/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGryoRegistratorGraphComputerProcessIntegrateTest.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.tinkerpop.gremlin.spark.process.computer; - -import org.apache.tinkerpop.gremlin.GraphProviderClass; -import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph; -import org.apache.tinkerpop.gremlin.process.ProcessComputerSuite; -import org.junit.runner.RunWith; - -/** - * @author Marko A. Rodriguez (http://markorodriguez.com) - */ -@RunWith(ProcessComputerSuite.class) -@GraphProviderClass(provider = SparkHadoopGraphGryoRegistratorProvider.class, graph = HadoopGraph.class) -public class SparkGryoRegistratorGraphComputerProcessIntegrateTest { -} http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/532ed59c/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGryoSerializerGraphComputerProcessIntegrateTest.java ---------------------------------------------------------------------- diff --git a/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGryoSerializerGraphComputerProcessIntegrateTest.java b/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGryoSerializerGraphComputerProcessIntegrateTest.java new file mode 100644 index 0000000..b04513c --- /dev/null +++ b/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGryoSerializerGraphComputerProcessIntegrateTest.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tinkerpop.gremlin.spark.process.computer; + +import org.apache.tinkerpop.gremlin.GraphProviderClass; +import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph; +import org.apache.tinkerpop.gremlin.process.ProcessComputerSuite; +import org.junit.runner.RunWith; + +/** + * @author Marko A. Rodriguez (http://markorodriguez.com) + */ +@RunWith(ProcessComputerSuite.class) +@GraphProviderClass(provider = SparkHadoopGraphGryoSerializerProvider.class, graph = HadoopGraph.class) +public class SparkGryoSerializerGraphComputerProcessIntegrateTest { +} http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/532ed59c/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphGryoRegistratorProvider.java ---------------------------------------------------------------------- diff --git a/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphGryoRegistratorProvider.java b/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphGryoRegistratorProvider.java deleted file mode 100644 index fcebbd0..0000000 --- a/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphGryoRegistratorProvider.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.tinkerpop.gremlin.spark.process.computer; - -import org.apache.spark.serializer.KryoSerializer; -import org.apache.tinkerpop.gremlin.LoadGraphWith; -import org.apache.tinkerpop.gremlin.hadoop.Constants; -import org.apache.tinkerpop.gremlin.spark.structure.Spark; -import org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoRegistrator; -import org.apache.tinkerpop.gremlin.spark.structure.io.gryo.kryoshim.unshaded.UnshadedKryoShimService; -import org.apache.tinkerpop.gremlin.structure.io.gryo.kryoshim.KryoShimServiceLoader; - -import java.util.Map; - -import static org.apache.tinkerpop.gremlin.structure.io.gryo.kryoshim.KryoShimServiceLoader.KRYO_SHIM_SERVICE; - -/** - * @author Marko A. Rodriguez (http://markorodriguez.com) - */ -public final class SparkHadoopGraphGryoRegistratorProvider extends SparkHadoopGraphProvider { - - public Map<String, Object> getBaseConfiguration(final String graphName, final Class<?> test, final String testMethodName, final LoadGraphWith.GraphData loadGraphWith) { - Spark.close(); - final Map<String, Object> config = super.getBaseConfiguration(graphName, test, testMethodName, loadGraphWith); - // ensure the context doesn't stay open for the GryoSerializer tests to follow - // this is primarily to ensure that the KryoShimService loaded specifically in these tests don't leak to the other tests - config.put(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, false); - config.put("spark.serializer", KryoSerializer.class.getCanonicalName()); - config.put("spark.kryo.registrator", GryoRegistrator.class.getCanonicalName()); - System.setProperty(KRYO_SHIM_SERVICE, UnshadedKryoShimService.class.getCanonicalName()); - KryoShimServiceLoader.load(true); - System.clearProperty(KRYO_SHIM_SERVICE); - return config; - } -} http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/532ed59c/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphGryoSerializerProvider.java ---------------------------------------------------------------------- diff --git a/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphGryoSerializerProvider.java b/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphGryoSerializerProvider.java new file mode 100644 index 0000000..9820b7b --- /dev/null +++ b/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphGryoSerializerProvider.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tinkerpop.gremlin.spark.process.computer; + +import org.apache.tinkerpop.gremlin.LoadGraphWith; +import org.apache.tinkerpop.gremlin.hadoop.Constants; +import org.apache.tinkerpop.gremlin.hadoop.structure.io.HadoopPoolShimService; +import org.apache.tinkerpop.gremlin.spark.structure.Spark; +import org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoSerializer; +import org.apache.tinkerpop.gremlin.structure.io.gryo.kryoshim.KryoShimServiceLoader; + +import java.util.Map; + +/** + * @author Marko A. Rodriguez (http://markorodriguez.com) + */ +public final class SparkHadoopGraphGryoSerializerProvider extends SparkHadoopGraphProvider { + + public Map<String, Object> getBaseConfiguration(final String graphName, final Class<?> test, final String testMethodName, final LoadGraphWith.GraphData loadGraphWith) { + if (this.getClass().equals(SparkHadoopGraphGryoSerializerProvider.class) && + !HadoopPoolShimService.class.getCanonicalName().equals(System.getProperty(KryoShimServiceLoader.KRYO_SHIM_SERVICE, null))) + Spark.close(); + final Map<String, Object> config = super.getBaseConfiguration(graphName, test, testMethodName, loadGraphWith); + config.put(Constants.SPARK_SERIALIZER, GryoSerializer.class.getCanonicalName()); + config.remove(Constants.SPARK_KRYO_REGISTRATOR); + return config; + } +} http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/532ed59c/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphProvider.java ---------------------------------------------------------------------- diff --git a/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphProvider.java b/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphProvider.java index d4201b5..c5b5083 100644 --- a/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphProvider.java +++ b/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphProvider.java @@ -18,6 +18,8 @@ */ package org.apache.tinkerpop.gremlin.spark.process.computer; +import org.apache.spark.launcher.SparkLauncher; +import org.apache.spark.serializer.KryoSerializer; import org.apache.tinkerpop.gremlin.GraphProvider; import org.apache.tinkerpop.gremlin.LoadGraphWith; import org.apache.tinkerpop.gremlin.groovy.util.SugarTestHelper; @@ -39,8 +41,10 @@ import org.apache.tinkerpop.gremlin.spark.structure.Spark; import org.apache.tinkerpop.gremlin.spark.structure.io.PersistedOutputRDD; import org.apache.tinkerpop.gremlin.spark.structure.io.SparkContextStorageCheck; import org.apache.tinkerpop.gremlin.spark.structure.io.ToyGraphInputRDD; -import org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoSerializer; +import org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoRegistrator; +import org.apache.tinkerpop.gremlin.spark.structure.io.gryo.kryoshim.unshaded.UnshadedKryoShimService; import org.apache.tinkerpop.gremlin.structure.Graph; +import org.apache.tinkerpop.gremlin.structure.io.gryo.kryoshim.KryoShimServiceLoader; import java.util.Map; @@ -52,6 +56,10 @@ public class SparkHadoopGraphProvider extends HadoopGraphProvider { @Override public Map<String, Object> getBaseConfiguration(final String graphName, final Class<?> test, final String testMethodName, final LoadGraphWith.GraphData loadGraphWith) { + if (this.getClass().equals(SparkHadoopGraphProvider.class) && + !UnshadedKryoShimService.class.getCanonicalName().equals(System.getProperty(KryoShimServiceLoader.KRYO_SHIM_SERVICE, null))) + Spark.close(); + final Map<String, Object> config = super.getBaseConfiguration(graphName, test, testMethodName, loadGraphWith); config.put(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true); // this makes the test suite go really fast @@ -82,9 +90,10 @@ public class SparkHadoopGraphProvider extends HadoopGraphProvider { } config.put(Constants.GREMLIN_HADOOP_DEFAULT_GRAPH_COMPUTER, SparkGraphComputer.class.getCanonicalName()); - config.put("spark.master", "local[4]"); - config.put("spark.serializer", GryoSerializer.class.getCanonicalName()); - config.put("spark.kryo.registrationRequired", true); + config.put(SparkLauncher.SPARK_MASTER, "local[4]"); + config.put(Constants.SPARK_SERIALIZER, KryoSerializer.class.getCanonicalName()); + config.put(Constants.SPARK_KRYO_REGISTRATOR, GryoRegistrator.class.getCanonicalName()); + config.put(Constants.SPARK_KRYO_REGISTRATION_REQUIRED, true); return config; }
