Repository: incubator-beam Updated Branches: refs/heads/master b21c35d1a -> 204678323
[BEAM-294] Rename dataflow references to beam Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/e233e5f6 Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/e233e5f6 Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/e233e5f6 Branch: refs/heads/master Commit: e233e5f64d8bfeb5b4da7d96515e939c4bfd8b0e Parents: bfd810f Author: Jean-Baptiste Onofré <jbono...@apache.org> Authored: Thu Aug 25 14:32:20 2016 +0200 Committer: Jean-Baptiste Onofré <jbono...@apache.org> Committed: Fri Aug 26 09:12:36 2016 +0200 ---------------------------------------------------------------------- runners/spark/pom.xml | 2 +- .../main/java/org/apache/beam/runners/spark/SparkRunner.java | 2 +- .../java/org/apache/beam/runners/spark/TestSparkRunner.java | 2 +- .../beam/runners/spark/aggregators/NamedAggregators.java | 2 +- .../org/apache/beam/runners/spark/examples/WordCount.java | 2 +- .../runners/spark/io/hadoop/ShardNameTemplateHelper.java | 6 +++--- .../apache/beam/runners/spark/translation/DoFnFunction.java | 2 +- .../runners/spark/translation/SparkPipelineTranslator.java | 2 +- .../beam/runners/spark/translation/SparkRuntimeContext.java | 2 +- .../beam/runners/spark/translation/TransformTranslator.java | 4 ++-- .../translation/streaming/StreamingTransformTranslator.java | 8 ++++---- .../org/apache/beam/runners/spark/util/BroadcastHelper.java | 4 ++-- .../runners/spark/translation/TransformTranslatorTest.java | 2 +- .../spark/translation/streaming/KafkaStreamingTest.java | 2 +- 14 files changed, 21 insertions(+), 21 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/e233e5f6/runners/spark/pom.xml ---------------------------------------------------------------------- diff --git a/runners/spark/pom.xml b/runners/spark/pom.xml index a5e99a0..b924cb8 100644 --- a/runners/spark/pom.xml +++ b/runners/spark/pom.xml @@ -330,7 +330,7 @@ </goals> <configuration> <relocations> - <!-- relocate Guava used by Dataflow (v18) since it conflicts with + <!-- relocate Guava used by Beam (v18) since it conflicts with version used by Hadoop (v11) --> <relocation> <pattern>com.google.common</pattern> http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/e233e5f6/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunner.java ---------------------------------------------------------------------- diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunner.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunner.java index 2ce1ff6..fa85a2e 100644 --- a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunner.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunner.java @@ -50,7 +50,7 @@ import org.slf4j.LoggerFactory; /** * The SparkRunner translate operations defined on a pipeline to a representation * executable by Spark, and then submitting the job to Spark to be executed. If we wanted to run - * a dataflow pipeline with the default options of a single threaded spark instance in local mode, + * a Beam pipeline with the default options of a single threaded spark instance in local mode, * we would do the following: * * {@code http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/e233e5f6/runners/spark/src/main/java/org/apache/beam/runners/spark/TestSparkRunner.java ---------------------------------------------------------------------- diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/TestSparkRunner.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/TestSparkRunner.java index 50ed5f3..376b80f 100644 --- a/runners/spark/src/main/java/org/apache/beam/runners/spark/TestSparkRunner.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/TestSparkRunner.java @@ -28,7 +28,7 @@ import org.apache.beam.sdk.values.POutput; /** * The SparkRunner translate operations defined on a pipeline to a representation executable - * by Spark, and then submitting the job to Spark to be executed. If we wanted to run a dataflow + * by Spark, and then submitting the job to Spark to be executed. If we wanted to run a Beam * pipeline with the default options of a single threaded spark instance in local mode, we would do * the following: * http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/e233e5f6/runners/spark/src/main/java/org/apache/beam/runners/spark/aggregators/NamedAggregators.java ---------------------------------------------------------------------- diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/aggregators/NamedAggregators.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/aggregators/NamedAggregators.java index c15e276..e2cd963 100644 --- a/runners/spark/src/main/java/org/apache/beam/runners/spark/aggregators/NamedAggregators.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/aggregators/NamedAggregators.java @@ -32,7 +32,7 @@ import org.apache.beam.sdk.transforms.Combine; /** * This class wraps a map of named aggregators. Spark expects that all accumulators be declared - * before a job is launched. Dataflow allows aggregators to be used and incremented on the fly. + * before a job is launched. Beam allows aggregators to be used and incremented on the fly. * We create a map of named aggregators and instantiate in the the spark context before the job * is launched. We can then add aggregators on the fly in Spark. */ http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/e233e5f6/runners/spark/src/main/java/org/apache/beam/runners/spark/examples/WordCount.java ---------------------------------------------------------------------- diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/examples/WordCount.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/examples/WordCount.java index 0677030..1af84ad 100644 --- a/runners/spark/src/main/java/org/apache/beam/runners/spark/examples/WordCount.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/examples/WordCount.java @@ -110,7 +110,7 @@ public class WordCount { */ public static interface WordCountOptions extends PipelineOptions { @Description("Path of the file to read from") - @Default.String("gs://dataflow-samples/shakespeare/kinglear.txt") + @Default.String("gs://beam-samples/shakespeare/kinglear.txt") String getInputFile(); void setInputFile(String value); http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/e233e5f6/runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/ShardNameTemplateHelper.java ---------------------------------------------------------------------- diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/ShardNameTemplateHelper.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/ShardNameTemplateHelper.java index 7f8e297..4a7058b 100644 --- a/runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/ShardNameTemplateHelper.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/ShardNameTemplateHelper.java @@ -36,9 +36,9 @@ public final class ShardNameTemplateHelper { private static final Logger LOG = LoggerFactory.getLogger(ShardNameTemplateHelper.class); - public static final String OUTPUT_FILE_PREFIX = "spark.dataflow.fileoutputformat.prefix"; - public static final String OUTPUT_FILE_TEMPLATE = "spark.dataflow.fileoutputformat.template"; - public static final String OUTPUT_FILE_SUFFIX = "spark.dataflow.fileoutputformat.suffix"; + public static final String OUTPUT_FILE_PREFIX = "spark.beam.fileoutputformat.prefix"; + public static final String OUTPUT_FILE_TEMPLATE = "spark.beam.fileoutputformat.template"; + public static final String OUTPUT_FILE_SUFFIX = "spark.beam.fileoutputformat.suffix"; private ShardNameTemplateHelper() { } http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/e233e5f6/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/DoFnFunction.java ---------------------------------------------------------------------- diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/DoFnFunction.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/DoFnFunction.java index 800d614..454b760 100644 --- a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/DoFnFunction.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/DoFnFunction.java @@ -31,7 +31,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** - * Dataflow's Do functions correspond to Spark's FlatMap functions. + * Beam's Do functions correspond to Spark's FlatMap functions. * * @param <InputT> Input element type. * @param <OutputT> Output element type. http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/e233e5f6/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/SparkPipelineTranslator.java ---------------------------------------------------------------------- diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/SparkPipelineTranslator.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/SparkPipelineTranslator.java index 997940b..1f7ccf1 100644 --- a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/SparkPipelineTranslator.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/SparkPipelineTranslator.java @@ -20,7 +20,7 @@ package org.apache.beam.runners.spark.translation; import org.apache.beam.sdk.transforms.PTransform; /** - * Translator to support translation between Dataflow transformations and Spark transformations. + * Translator to support translation between Beam transformations and Spark transformations. */ public interface SparkPipelineTranslator { http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/e233e5f6/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/SparkRuntimeContext.java ---------------------------------------------------------------------- diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/SparkRuntimeContext.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/SparkRuntimeContext.java index 4bc0c00..2634c65 100644 --- a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/SparkRuntimeContext.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/SparkRuntimeContext.java @@ -57,7 +57,7 @@ public class SparkRuntimeContext implements Serializable { private final String serializedPipelineOptions; /** - * Map fo names to dataflow aggregators. + * Map fo names to Beam aggregators. */ private final Map<String, Aggregator<?, ?>> aggregators = new HashMap<>(); private transient CoderRegistry coderRegistry; http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/e233e5f6/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformTranslator.java ---------------------------------------------------------------------- diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformTranslator.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformTranslator.java index 08e3fda..eaceb85 100644 --- a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformTranslator.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformTranslator.java @@ -94,7 +94,7 @@ import org.apache.spark.api.java.function.PairFunction; import scala.Tuple2; /** - * Supports translation between a DataFlow transform, and Spark's operations on RDDs. + * Supports translation between a Beam transform, and Spark's operations on RDDs. */ public final class TransformTranslator { @@ -895,7 +895,7 @@ public final class TransformTranslator { } /** - * Translator matches Dataflow transformation with the appropriate evaluator. + * Translator matches Beam transformation with the appropriate evaluator. */ public static class Translator implements SparkPipelineTranslator { http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/e233e5f6/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/streaming/StreamingTransformTranslator.java ---------------------------------------------------------------------- diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/streaming/StreamingTransformTranslator.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/streaming/StreamingTransformTranslator.java index 5f35ebb..43dcef6 100644 --- a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/streaming/StreamingTransformTranslator.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/streaming/StreamingTransformTranslator.java @@ -68,7 +68,7 @@ import scala.Tuple2; /** - * Supports translation between a DataFlow transform, and Spark's operations on DStreams. + * Supports translation between a Beam transform, and Spark's operations on DStreams. */ public final class StreamingTransformTranslator { @@ -349,13 +349,13 @@ public final class StreamingTransformTranslator { (TransformEvaluator<TransformT>) EVALUATORS.get(clazz); if (transform == null) { if (UNSUPPORTED_EVALUATORS.contains(clazz)) { - throw new UnsupportedOperationException("Dataflow transformation " + clazz + throw new UnsupportedOperationException("Beam transformation " + clazz .getCanonicalName() + " is currently unsupported by the Spark streaming pipeline"); } // DStream transformations will transform an RDD into another RDD // Actions will create output - // In Dataflow it depends on the PTransform's Input and Output class + // In Beam it depends on the PTransform's Input and Output class Class<?> pTOutputClazz = getPTransformOutputClazz(clazz); if (PDone.class.equals(pTOutputClazz)) { return foreachRDD(rddTranslator); @@ -373,7 +373,7 @@ public final class StreamingTransformTranslator { } /** - * Translator matches Dataflow transformation with the appropriate Spark streaming evaluator. + * Translator matches Beam transformation with the appropriate Spark streaming evaluator. * rddTranslator uses Spark evaluators in transform/foreachRDD to evaluate the transformation */ public static class Translator implements SparkPipelineTranslator { http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/e233e5f6/runners/spark/src/main/java/org/apache/beam/runners/spark/util/BroadcastHelper.java ---------------------------------------------------------------------- diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/util/BroadcastHelper.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/util/BroadcastHelper.java index 29c2dd9..5f0c795 100644 --- a/runners/spark/src/main/java/org/apache/beam/runners/spark/util/BroadcastHelper.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/util/BroadcastHelper.java @@ -34,12 +34,12 @@ import org.slf4j.LoggerFactory; public abstract class BroadcastHelper<T> implements Serializable { /** - * If the property {@code dataflow.spark.directBroadcast} is set to + * If the property {@code beam.spark.directBroadcast} is set to * {@code true} then Spark serialization (Kryo) will be used to broadcast values * in View objects. By default this property is not set, and values are coded using * the appropriate {@link Coder}. */ - public static final String DIRECT_BROADCAST = "dataflow.spark.directBroadcast"; + public static final String DIRECT_BROADCAST = "beam.spark.directBroadcast"; private static final Logger LOG = LoggerFactory.getLogger(BroadcastHelper.class); http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/e233e5f6/runners/spark/src/test/java/org/apache/beam/runners/spark/translation/TransformTranslatorTest.java ---------------------------------------------------------------------- diff --git a/runners/spark/src/test/java/org/apache/beam/runners/spark/translation/TransformTranslatorTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/translation/TransformTranslatorTest.java index f61ad1c..f72eba7 100644 --- a/runners/spark/src/test/java/org/apache/beam/runners/spark/translation/TransformTranslatorTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/translation/TransformTranslatorTest.java @@ -56,7 +56,7 @@ public class TransformTranslatorTest { /** * Builds a simple pipeline with TextIO.Read and TextIO.Write, runs the pipeline - * in DirectRunner and on SparkRunner, with the mapped dataflow-to-spark + * in DirectRunner and on SparkRunner, with the mapped beam-to-spark * transforms. Finally it makes sure that the results are the same for both runs. */ @Test http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/e233e5f6/runners/spark/src/test/java/org/apache/beam/runners/spark/translation/streaming/KafkaStreamingTest.java ---------------------------------------------------------------------- diff --git a/runners/spark/src/test/java/org/apache/beam/runners/spark/translation/streaming/KafkaStreamingTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/translation/streaming/KafkaStreamingTest.java index 27d6f5e..ac77922 100644 --- a/runners/spark/src/test/java/org/apache/beam/runners/spark/translation/streaming/KafkaStreamingTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/translation/streaming/KafkaStreamingTest.java @@ -56,7 +56,7 @@ public class KafkaStreamingTest { new EmbeddedKafkaCluster.EmbeddedZookeeper(); private static final EmbeddedKafkaCluster EMBEDDED_KAFKA_CLUSTER = new EmbeddedKafkaCluster(EMBEDDED_ZOOKEEPER.getConnection(), new Properties()); - private static final String TOPIC = "kafka_dataflow_test_topic"; + private static final String TOPIC = "kafka_beam_test_topic"; private static final Map<String, String> KAFKA_MESSAGES = ImmutableMap.of( "k1", "v1", "k2", "v2", "k3", "v3", "k4", "v4" );