[06/50] tinkerpop git commit: TINKERPOP-1113 Added spark configuration options as concrete methods CTR
TINKERPOP-1113 Added spark configuration options as concrete methods CTR Project: http://git-wip-us.apache.org/repos/asf/tinkerpop/repo Commit: http://git-wip-us.apache.org/repos/asf/tinkerpop/commit/bd85e5fe Tree: http://git-wip-us.apache.org/repos/asf/tinkerpop/tree/bd85e5fe Diff: http://git-wip-us.apache.org/repos/asf/tinkerpop/diff/bd85e5fe Branch: refs/heads/TINKERPOP-1643 Commit: bd85e5febee56434c4de4e7ab31e337a9f5e Parents: f36eb4f Author: Stephen Mallette Authored: Tue May 22 06:55:46 2018 -0400 Committer: Stephen Mallette Committed: Tue May 22 06:55:46 2018 -0400 -- CHANGELOG.asciidoc | 1 + .../process/computer/SparkGraphComputer.java| 90 +--- .../computer/SparkHadoopGraphProvider.java | 5 +- 3 files changed, 80 insertions(+), 16 deletions(-) -- http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/bd85e5fe/CHANGELOG.asciidoc -- diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 21fde2c..395bb55 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -26,6 +26,7 @@ image::https://raw.githubusercontent.com/apache/tinkerpop/master/docs/static/ima * Removed recursive handling of streaming results from Gremlin-Python driver to avoid max recursion depth errors. * Improved performance of `TraversalVertexProgram` and related infrastructure. * Fixed bug in `GroovyTranslator` that didn't properly handle empty `Map` objects. +* Added concrete configuration methods to `SparkGraphComputer` to make a more clear API for configuring it. [[release-3-2-9]] === TinkerPop 3.2.9 (Release Date: May 8, 2018) http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/bd85e5fe/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java -- diff --git a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java index 00a2e46..4c896cd 100644 --- a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java +++ b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java @@ -30,11 +30,10 @@ import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.spark.HashPartitioner; import org.apache.spark.Partitioner; -import org.apache.spark.SparkConf; -import org.apache.spark.SparkContext; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.launcher.SparkLauncher; +import org.apache.spark.serializer.Serializer; import org.apache.spark.storage.StorageLevel; import org.apache.tinkerpop.gremlin.hadoop.Constants; import org.apache.tinkerpop.gremlin.hadoop.process.computer.AbstractHadoopGraphComputer; @@ -79,7 +78,16 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.ThreadFactory; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_PERSIST_CONTEXT; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_PERSIST_STORAGE_LEVEL; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_SKIP_GRAPH_CACHE; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_SKIP_PARTITIONER; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.SPARK_SERIALIZER; + /** + * {@link GraphComputer} implementation for Apache Spark. + * * @author Marko A. Rodriguez (http://markorodriguez.com) */ public final class SparkGraphComputer extends AbstractHadoopGraphComputer { @@ -107,8 +115,12 @@ public final class SparkGraphComputer extends AbstractHadoopGraphComputer { ConfigurationUtils.copy(this.hadoopGraph.configuration(), this.sparkConfiguration); } +/** + * Sets the number of workers. If the {@code spark.master} configuration is configured with "local" then it will + * change that configuration to use the specified number of worker threads. + */ @Override -public GraphComputer workers(final int workers) { +public SparkGraphComputer workers(final int workers) { super.workers(workers); if (this.sparkConfiguration.containsKey(SparkLauncher.SPARK_MASTER) && this.sparkConfiguration.getString(SparkLauncher.SPARK_MASTER).startsWith("local")) { this.sparkConfiguration.setProperty(SparkLauncher.SPARK_MASTER, "local[" + this.workers + "]"); @@
[05/50] tinkerpop git commit: TINKERPOP-1113 Added spark configuration options as concrete methods CTR
TINKERPOP-1113 Added spark configuration options as concrete methods CTR Project: http://git-wip-us.apache.org/repos/asf/tinkerpop/repo Commit: http://git-wip-us.apache.org/repos/asf/tinkerpop/commit/bd85e5fe Tree: http://git-wip-us.apache.org/repos/asf/tinkerpop/tree/bd85e5fe Diff: http://git-wip-us.apache.org/repos/asf/tinkerpop/diff/bd85e5fe Branch: refs/heads/TINKERPOP-1836 Commit: bd85e5febee56434c4de4e7ab31e337a9f5e Parents: f36eb4f Author: Stephen Mallette Authored: Tue May 22 06:55:46 2018 -0400 Committer: Stephen Mallette Committed: Tue May 22 06:55:46 2018 -0400 -- CHANGELOG.asciidoc | 1 + .../process/computer/SparkGraphComputer.java| 90 +--- .../computer/SparkHadoopGraphProvider.java | 5 +- 3 files changed, 80 insertions(+), 16 deletions(-) -- http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/bd85e5fe/CHANGELOG.asciidoc -- diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 21fde2c..395bb55 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -26,6 +26,7 @@ image::https://raw.githubusercontent.com/apache/tinkerpop/master/docs/static/ima * Removed recursive handling of streaming results from Gremlin-Python driver to avoid max recursion depth errors. * Improved performance of `TraversalVertexProgram` and related infrastructure. * Fixed bug in `GroovyTranslator` that didn't properly handle empty `Map` objects. +* Added concrete configuration methods to `SparkGraphComputer` to make a more clear API for configuring it. [[release-3-2-9]] === TinkerPop 3.2.9 (Release Date: May 8, 2018) http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/bd85e5fe/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java -- diff --git a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java index 00a2e46..4c896cd 100644 --- a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java +++ b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java @@ -30,11 +30,10 @@ import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.spark.HashPartitioner; import org.apache.spark.Partitioner; -import org.apache.spark.SparkConf; -import org.apache.spark.SparkContext; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.launcher.SparkLauncher; +import org.apache.spark.serializer.Serializer; import org.apache.spark.storage.StorageLevel; import org.apache.tinkerpop.gremlin.hadoop.Constants; import org.apache.tinkerpop.gremlin.hadoop.process.computer.AbstractHadoopGraphComputer; @@ -79,7 +78,16 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.ThreadFactory; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_PERSIST_CONTEXT; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_PERSIST_STORAGE_LEVEL; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_SKIP_GRAPH_CACHE; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_SKIP_PARTITIONER; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.SPARK_SERIALIZER; + /** + * {@link GraphComputer} implementation for Apache Spark. + * * @author Marko A. Rodriguez (http://markorodriguez.com) */ public final class SparkGraphComputer extends AbstractHadoopGraphComputer { @@ -107,8 +115,12 @@ public final class SparkGraphComputer extends AbstractHadoopGraphComputer { ConfigurationUtils.copy(this.hadoopGraph.configuration(), this.sparkConfiguration); } +/** + * Sets the number of workers. If the {@code spark.master} configuration is configured with "local" then it will + * change that configuration to use the specified number of worker threads. + */ @Override -public GraphComputer workers(final int workers) { +public SparkGraphComputer workers(final int workers) { super.workers(workers); if (this.sparkConfiguration.containsKey(SparkLauncher.SPARK_MASTER) && this.sparkConfiguration.getString(SparkLauncher.SPARK_MASTER).startsWith("local")) { this.sparkConfiguration.setProperty(SparkLauncher.SPARK_MASTER, "local[" + this.workers + "]"); @@
[08/50] tinkerpop git commit: TINKERPOP-1113 Added spark configuration options as concrete methods CTR
TINKERPOP-1113 Added spark configuration options as concrete methods CTR Project: http://git-wip-us.apache.org/repos/asf/tinkerpop/repo Commit: http://git-wip-us.apache.org/repos/asf/tinkerpop/commit/bd85e5fe Tree: http://git-wip-us.apache.org/repos/asf/tinkerpop/tree/bd85e5fe Diff: http://git-wip-us.apache.org/repos/asf/tinkerpop/diff/bd85e5fe Branch: refs/heads/TINKERPOP-1967 Commit: bd85e5febee56434c4de4e7ab31e337a9f5e Parents: f36eb4f Author: Stephen Mallette Authored: Tue May 22 06:55:46 2018 -0400 Committer: Stephen Mallette Committed: Tue May 22 06:55:46 2018 -0400 -- CHANGELOG.asciidoc | 1 + .../process/computer/SparkGraphComputer.java| 90 +--- .../computer/SparkHadoopGraphProvider.java | 5 +- 3 files changed, 80 insertions(+), 16 deletions(-) -- http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/bd85e5fe/CHANGELOG.asciidoc -- diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 21fde2c..395bb55 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -26,6 +26,7 @@ image::https://raw.githubusercontent.com/apache/tinkerpop/master/docs/static/ima * Removed recursive handling of streaming results from Gremlin-Python driver to avoid max recursion depth errors. * Improved performance of `TraversalVertexProgram` and related infrastructure. * Fixed bug in `GroovyTranslator` that didn't properly handle empty `Map` objects. +* Added concrete configuration methods to `SparkGraphComputer` to make a more clear API for configuring it. [[release-3-2-9]] === TinkerPop 3.2.9 (Release Date: May 8, 2018) http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/bd85e5fe/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java -- diff --git a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java index 00a2e46..4c896cd 100644 --- a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java +++ b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java @@ -30,11 +30,10 @@ import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.spark.HashPartitioner; import org.apache.spark.Partitioner; -import org.apache.spark.SparkConf; -import org.apache.spark.SparkContext; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.launcher.SparkLauncher; +import org.apache.spark.serializer.Serializer; import org.apache.spark.storage.StorageLevel; import org.apache.tinkerpop.gremlin.hadoop.Constants; import org.apache.tinkerpop.gremlin.hadoop.process.computer.AbstractHadoopGraphComputer; @@ -79,7 +78,16 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.ThreadFactory; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_PERSIST_CONTEXT; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_PERSIST_STORAGE_LEVEL; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_SKIP_GRAPH_CACHE; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_SKIP_PARTITIONER; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.SPARK_SERIALIZER; + /** + * {@link GraphComputer} implementation for Apache Spark. + * * @author Marko A. Rodriguez (http://markorodriguez.com) */ public final class SparkGraphComputer extends AbstractHadoopGraphComputer { @@ -107,8 +115,12 @@ public final class SparkGraphComputer extends AbstractHadoopGraphComputer { ConfigurationUtils.copy(this.hadoopGraph.configuration(), this.sparkConfiguration); } +/** + * Sets the number of workers. If the {@code spark.master} configuration is configured with "local" then it will + * change that configuration to use the specified number of worker threads. + */ @Override -public GraphComputer workers(final int workers) { +public SparkGraphComputer workers(final int workers) { super.workers(workers); if (this.sparkConfiguration.containsKey(SparkLauncher.SPARK_MASTER) && this.sparkConfiguration.getString(SparkLauncher.SPARK_MASTER).startsWith("local")) { this.sparkConfiguration.setProperty(SparkLauncher.SPARK_MASTER, "local[" + this.workers + "]"); @@
[02/19] tinkerpop git commit: TINKERPOP-1113 Added spark configuration options as concrete methods CTR
TINKERPOP-1113 Added spark configuration options as concrete methods CTR Project: http://git-wip-us.apache.org/repos/asf/tinkerpop/repo Commit: http://git-wip-us.apache.org/repos/asf/tinkerpop/commit/bd85e5fe Tree: http://git-wip-us.apache.org/repos/asf/tinkerpop/tree/bd85e5fe Diff: http://git-wip-us.apache.org/repos/asf/tinkerpop/diff/bd85e5fe Branch: refs/heads/TINKERPOP-1968 Commit: bd85e5febee56434c4de4e7ab31e337a9f5e Parents: f36eb4f Author: Stephen Mallette Authored: Tue May 22 06:55:46 2018 -0400 Committer: Stephen Mallette Committed: Tue May 22 06:55:46 2018 -0400 -- CHANGELOG.asciidoc | 1 + .../process/computer/SparkGraphComputer.java| 90 +--- .../computer/SparkHadoopGraphProvider.java | 5 +- 3 files changed, 80 insertions(+), 16 deletions(-) -- http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/bd85e5fe/CHANGELOG.asciidoc -- diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 21fde2c..395bb55 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -26,6 +26,7 @@ image::https://raw.githubusercontent.com/apache/tinkerpop/master/docs/static/ima * Removed recursive handling of streaming results from Gremlin-Python driver to avoid max recursion depth errors. * Improved performance of `TraversalVertexProgram` and related infrastructure. * Fixed bug in `GroovyTranslator` that didn't properly handle empty `Map` objects. +* Added concrete configuration methods to `SparkGraphComputer` to make a more clear API for configuring it. [[release-3-2-9]] === TinkerPop 3.2.9 (Release Date: May 8, 2018) http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/bd85e5fe/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java -- diff --git a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java index 00a2e46..4c896cd 100644 --- a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java +++ b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java @@ -30,11 +30,10 @@ import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.spark.HashPartitioner; import org.apache.spark.Partitioner; -import org.apache.spark.SparkConf; -import org.apache.spark.SparkContext; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.launcher.SparkLauncher; +import org.apache.spark.serializer.Serializer; import org.apache.spark.storage.StorageLevel; import org.apache.tinkerpop.gremlin.hadoop.Constants; import org.apache.tinkerpop.gremlin.hadoop.process.computer.AbstractHadoopGraphComputer; @@ -79,7 +78,16 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.ThreadFactory; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_PERSIST_CONTEXT; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_PERSIST_STORAGE_LEVEL; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_SKIP_GRAPH_CACHE; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_SKIP_PARTITIONER; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.SPARK_SERIALIZER; + /** + * {@link GraphComputer} implementation for Apache Spark. + * * @author Marko A. Rodriguez (http://markorodriguez.com) */ public final class SparkGraphComputer extends AbstractHadoopGraphComputer { @@ -107,8 +115,12 @@ public final class SparkGraphComputer extends AbstractHadoopGraphComputer { ConfigurationUtils.copy(this.hadoopGraph.configuration(), this.sparkConfiguration); } +/** + * Sets the number of workers. If the {@code spark.master} configuration is configured with "local" then it will + * change that configuration to use the specified number of worker threads. + */ @Override -public GraphComputer workers(final int workers) { +public SparkGraphComputer workers(final int workers) { super.workers(workers); if (this.sparkConfiguration.containsKey(SparkLauncher.SPARK_MASTER) && this.sparkConfiguration.getString(SparkLauncher.SPARK_MASTER).startsWith("local")) { this.sparkConfiguration.setProperty(SparkLauncher.SPARK_MASTER, "local[" + this.workers + "]"); @@
[11/23] tinkerpop git commit: TINKERPOP-1113 Added spark configuration options as concrete methods CTR
TINKERPOP-1113 Added spark configuration options as concrete methods CTR Project: http://git-wip-us.apache.org/repos/asf/tinkerpop/repo Commit: http://git-wip-us.apache.org/repos/asf/tinkerpop/commit/bd85e5fe Tree: http://git-wip-us.apache.org/repos/asf/tinkerpop/tree/bd85e5fe Diff: http://git-wip-us.apache.org/repos/asf/tinkerpop/diff/bd85e5fe Branch: refs/heads/TINKERPOP-1897 Commit: bd85e5febee56434c4de4e7ab31e337a9f5e Parents: f36eb4f Author: Stephen MalletteAuthored: Tue May 22 06:55:46 2018 -0400 Committer: Stephen Mallette Committed: Tue May 22 06:55:46 2018 -0400 -- CHANGELOG.asciidoc | 1 + .../process/computer/SparkGraphComputer.java| 90 +--- .../computer/SparkHadoopGraphProvider.java | 5 +- 3 files changed, 80 insertions(+), 16 deletions(-) -- http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/bd85e5fe/CHANGELOG.asciidoc -- diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 21fde2c..395bb55 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -26,6 +26,7 @@ image::https://raw.githubusercontent.com/apache/tinkerpop/master/docs/static/ima * Removed recursive handling of streaming results from Gremlin-Python driver to avoid max recursion depth errors. * Improved performance of `TraversalVertexProgram` and related infrastructure. * Fixed bug in `GroovyTranslator` that didn't properly handle empty `Map` objects. +* Added concrete configuration methods to `SparkGraphComputer` to make a more clear API for configuring it. [[release-3-2-9]] === TinkerPop 3.2.9 (Release Date: May 8, 2018) http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/bd85e5fe/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java -- diff --git a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java index 00a2e46..4c896cd 100644 --- a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java +++ b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java @@ -30,11 +30,10 @@ import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.spark.HashPartitioner; import org.apache.spark.Partitioner; -import org.apache.spark.SparkConf; -import org.apache.spark.SparkContext; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.launcher.SparkLauncher; +import org.apache.spark.serializer.Serializer; import org.apache.spark.storage.StorageLevel; import org.apache.tinkerpop.gremlin.hadoop.Constants; import org.apache.tinkerpop.gremlin.hadoop.process.computer.AbstractHadoopGraphComputer; @@ -79,7 +78,16 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.ThreadFactory; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_PERSIST_CONTEXT; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_PERSIST_STORAGE_LEVEL; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_SKIP_GRAPH_CACHE; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_SKIP_PARTITIONER; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.SPARK_SERIALIZER; + /** + * {@link GraphComputer} implementation for Apache Spark. + * * @author Marko A. Rodriguez (http://markorodriguez.com) */ public final class SparkGraphComputer extends AbstractHadoopGraphComputer { @@ -107,8 +115,12 @@ public final class SparkGraphComputer extends AbstractHadoopGraphComputer { ConfigurationUtils.copy(this.hadoopGraph.configuration(), this.sparkConfiguration); } +/** + * Sets the number of workers. If the {@code spark.master} configuration is configured with "local" then it will + * change that configuration to use the specified number of worker threads. + */ @Override -public GraphComputer workers(final int workers) { +public SparkGraphComputer workers(final int workers) { super.workers(workers); if (this.sparkConfiguration.containsKey(SparkLauncher.SPARK_MASTER) && this.sparkConfiguration.getString(SparkLauncher.SPARK_MASTER).startsWith("local")) {
tinkerpop git commit: TINKERPOP-1113 Added spark configuration options as concrete methods CTR
Repository: tinkerpop Updated Branches: refs/heads/tp32 f36eb4f34 -> bd85e5feb TINKERPOP-1113 Added spark configuration options as concrete methods CTR Project: http://git-wip-us.apache.org/repos/asf/tinkerpop/repo Commit: http://git-wip-us.apache.org/repos/asf/tinkerpop/commit/bd85e5fe Tree: http://git-wip-us.apache.org/repos/asf/tinkerpop/tree/bd85e5fe Diff: http://git-wip-us.apache.org/repos/asf/tinkerpop/diff/bd85e5fe Branch: refs/heads/tp32 Commit: bd85e5febee56434c4de4e7ab31e337a9f5e Parents: f36eb4f Author: Stephen MalletteAuthored: Tue May 22 06:55:46 2018 -0400 Committer: Stephen Mallette Committed: Tue May 22 06:55:46 2018 -0400 -- CHANGELOG.asciidoc | 1 + .../process/computer/SparkGraphComputer.java| 90 +--- .../computer/SparkHadoopGraphProvider.java | 5 +- 3 files changed, 80 insertions(+), 16 deletions(-) -- http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/bd85e5fe/CHANGELOG.asciidoc -- diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 21fde2c..395bb55 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -26,6 +26,7 @@ image::https://raw.githubusercontent.com/apache/tinkerpop/master/docs/static/ima * Removed recursive handling of streaming results from Gremlin-Python driver to avoid max recursion depth errors. * Improved performance of `TraversalVertexProgram` and related infrastructure. * Fixed bug in `GroovyTranslator` that didn't properly handle empty `Map` objects. +* Added concrete configuration methods to `SparkGraphComputer` to make a more clear API for configuring it. [[release-3-2-9]] === TinkerPop 3.2.9 (Release Date: May 8, 2018) http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/bd85e5fe/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java -- diff --git a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java index 00a2e46..4c896cd 100644 --- a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java +++ b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java @@ -30,11 +30,10 @@ import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.spark.HashPartitioner; import org.apache.spark.Partitioner; -import org.apache.spark.SparkConf; -import org.apache.spark.SparkContext; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.launcher.SparkLauncher; +import org.apache.spark.serializer.Serializer; import org.apache.spark.storage.StorageLevel; import org.apache.tinkerpop.gremlin.hadoop.Constants; import org.apache.tinkerpop.gremlin.hadoop.process.computer.AbstractHadoopGraphComputer; @@ -79,7 +78,16 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.ThreadFactory; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_PERSIST_CONTEXT; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_PERSIST_STORAGE_LEVEL; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_SKIP_GRAPH_CACHE; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_SKIP_PARTITIONER; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.SPARK_SERIALIZER; + /** + * {@link GraphComputer} implementation for Apache Spark. + * * @author Marko A. Rodriguez (http://markorodriguez.com) */ public final class SparkGraphComputer extends AbstractHadoopGraphComputer { @@ -107,8 +115,12 @@ public final class SparkGraphComputer extends AbstractHadoopGraphComputer { ConfigurationUtils.copy(this.hadoopGraph.configuration(), this.sparkConfiguration); } +/** + * Sets the number of workers. If the {@code spark.master} configuration is configured with "local" then it will + * change that configuration to use the specified number of worker threads. + */ @Override -public GraphComputer workers(final int workers) { +public SparkGraphComputer workers(final int workers) { super.workers(workers); if (this.sparkConfiguration.containsKey(SparkLauncher.SPARK_MASTER) && this.sparkConfiguration.getString(SparkLauncher.SPARK_MASTER).startsWith("local"))
[1/2] tinkerpop git commit: TINKERPOP-1113 Added spark configuration options as concrete methods CTR
Repository: tinkerpop Updated Branches: refs/heads/tp33 a708cc3bd -> 3891777e4 TINKERPOP-1113 Added spark configuration options as concrete methods CTR Project: http://git-wip-us.apache.org/repos/asf/tinkerpop/repo Commit: http://git-wip-us.apache.org/repos/asf/tinkerpop/commit/bd85e5fe Tree: http://git-wip-us.apache.org/repos/asf/tinkerpop/tree/bd85e5fe Diff: http://git-wip-us.apache.org/repos/asf/tinkerpop/diff/bd85e5fe Branch: refs/heads/tp33 Commit: bd85e5febee56434c4de4e7ab31e337a9f5e Parents: f36eb4f Author: Stephen MalletteAuthored: Tue May 22 06:55:46 2018 -0400 Committer: Stephen Mallette Committed: Tue May 22 06:55:46 2018 -0400 -- CHANGELOG.asciidoc | 1 + .../process/computer/SparkGraphComputer.java| 90 +--- .../computer/SparkHadoopGraphProvider.java | 5 +- 3 files changed, 80 insertions(+), 16 deletions(-) -- http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/bd85e5fe/CHANGELOG.asciidoc -- diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 21fde2c..395bb55 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -26,6 +26,7 @@ image::https://raw.githubusercontent.com/apache/tinkerpop/master/docs/static/ima * Removed recursive handling of streaming results from Gremlin-Python driver to avoid max recursion depth errors. * Improved performance of `TraversalVertexProgram` and related infrastructure. * Fixed bug in `GroovyTranslator` that didn't properly handle empty `Map` objects. +* Added concrete configuration methods to `SparkGraphComputer` to make a more clear API for configuring it. [[release-3-2-9]] === TinkerPop 3.2.9 (Release Date: May 8, 2018) http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/bd85e5fe/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java -- diff --git a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java index 00a2e46..4c896cd 100644 --- a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java +++ b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java @@ -30,11 +30,10 @@ import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.spark.HashPartitioner; import org.apache.spark.Partitioner; -import org.apache.spark.SparkConf; -import org.apache.spark.SparkContext; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.launcher.SparkLauncher; +import org.apache.spark.serializer.Serializer; import org.apache.spark.storage.StorageLevel; import org.apache.tinkerpop.gremlin.hadoop.Constants; import org.apache.tinkerpop.gremlin.hadoop.process.computer.AbstractHadoopGraphComputer; @@ -79,7 +78,16 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.ThreadFactory; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_PERSIST_CONTEXT; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_PERSIST_STORAGE_LEVEL; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_SKIP_GRAPH_CACHE; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_SKIP_PARTITIONER; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.SPARK_SERIALIZER; + /** + * {@link GraphComputer} implementation for Apache Spark. + * * @author Marko A. Rodriguez (http://markorodriguez.com) */ public final class SparkGraphComputer extends AbstractHadoopGraphComputer { @@ -107,8 +115,12 @@ public final class SparkGraphComputer extends AbstractHadoopGraphComputer { ConfigurationUtils.copy(this.hadoopGraph.configuration(), this.sparkConfiguration); } +/** + * Sets the number of workers. If the {@code spark.master} configuration is configured with "local" then it will + * change that configuration to use the specified number of worker threads. + */ @Override -public GraphComputer workers(final int workers) { +public SparkGraphComputer workers(final int workers) { super.workers(workers); if (this.sparkConfiguration.containsKey(SparkLauncher.SPARK_MASTER) && this.sparkConfiguration.getString(SparkLauncher.SPARK_MASTER).startsWith("local"))
[1/3] tinkerpop git commit: TINKERPOP-1113 Added spark configuration options as concrete methods CTR
Repository: tinkerpop Updated Branches: refs/heads/master 6a645c0ae -> 4f46095ba TINKERPOP-1113 Added spark configuration options as concrete methods CTR Project: http://git-wip-us.apache.org/repos/asf/tinkerpop/repo Commit: http://git-wip-us.apache.org/repos/asf/tinkerpop/commit/bd85e5fe Tree: http://git-wip-us.apache.org/repos/asf/tinkerpop/tree/bd85e5fe Diff: http://git-wip-us.apache.org/repos/asf/tinkerpop/diff/bd85e5fe Branch: refs/heads/master Commit: bd85e5febee56434c4de4e7ab31e337a9f5e Parents: f36eb4f Author: Stephen MalletteAuthored: Tue May 22 06:55:46 2018 -0400 Committer: Stephen Mallette Committed: Tue May 22 06:55:46 2018 -0400 -- CHANGELOG.asciidoc | 1 + .../process/computer/SparkGraphComputer.java| 90 +--- .../computer/SparkHadoopGraphProvider.java | 5 +- 3 files changed, 80 insertions(+), 16 deletions(-) -- http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/bd85e5fe/CHANGELOG.asciidoc -- diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 21fde2c..395bb55 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -26,6 +26,7 @@ image::https://raw.githubusercontent.com/apache/tinkerpop/master/docs/static/ima * Removed recursive handling of streaming results from Gremlin-Python driver to avoid max recursion depth errors. * Improved performance of `TraversalVertexProgram` and related infrastructure. * Fixed bug in `GroovyTranslator` that didn't properly handle empty `Map` objects. +* Added concrete configuration methods to `SparkGraphComputer` to make a more clear API for configuring it. [[release-3-2-9]] === TinkerPop 3.2.9 (Release Date: May 8, 2018) http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/bd85e5fe/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java -- diff --git a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java index 00a2e46..4c896cd 100644 --- a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java +++ b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java @@ -30,11 +30,10 @@ import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.spark.HashPartitioner; import org.apache.spark.Partitioner; -import org.apache.spark.SparkConf; -import org.apache.spark.SparkContext; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.launcher.SparkLauncher; +import org.apache.spark.serializer.Serializer; import org.apache.spark.storage.StorageLevel; import org.apache.tinkerpop.gremlin.hadoop.Constants; import org.apache.tinkerpop.gremlin.hadoop.process.computer.AbstractHadoopGraphComputer; @@ -79,7 +78,16 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.ThreadFactory; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_PERSIST_CONTEXT; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_PERSIST_STORAGE_LEVEL; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_SKIP_GRAPH_CACHE; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.GREMLIN_SPARK_SKIP_PARTITIONER; +import static org.apache.tinkerpop.gremlin.hadoop.Constants.SPARK_SERIALIZER; + /** + * {@link GraphComputer} implementation for Apache Spark. + * * @author Marko A. Rodriguez (http://markorodriguez.com) */ public final class SparkGraphComputer extends AbstractHadoopGraphComputer { @@ -107,8 +115,12 @@ public final class SparkGraphComputer extends AbstractHadoopGraphComputer { ConfigurationUtils.copy(this.hadoopGraph.configuration(), this.sparkConfiguration); } +/** + * Sets the number of workers. If the {@code spark.master} configuration is configured with "local" then it will + * change that configuration to use the specified number of worker threads. + */ @Override -public GraphComputer workers(final int workers) { +public SparkGraphComputer workers(final int workers) { super.workers(workers); if (this.sparkConfiguration.containsKey(SparkLauncher.SPARK_MASTER) &&