This is an automated email from the ASF dual-hosted git repository. echauchot pushed a commit to branch spark-runner_structured-streaming in repository https://gitbox.apache.org/repos/asf/beam.git
commit 0033f898efca4068e8e730363e133b08df9bedf3 Author: Etienne Chauchot <[email protected]> AuthorDate: Thu Nov 22 11:50:17 2018 +0100 Improve javadocs --- .../spark/structuredstreaming/translation/PipelineTranslator.java | 8 ++++---- .../structuredstreaming/translation/TransformTranslator.java | 2 +- .../spark/structuredstreaming/translation/TranslationContext.java | 4 ++++ .../translation/batch/BatchPipelineTranslator.java | 4 +++- .../translation/batch/BatchTranslationContext.java | 2 +- .../translation/streaming/StreamingPipelineTranslator.java | 5 +++++ .../translation/streaming/StreamingTranslationContext.java | 3 +++ 7 files changed, 21 insertions(+), 7 deletions(-) diff --git a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/PipelineTranslator.java b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/PipelineTranslator.java index 185879b..51d65ff 100644 --- a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/PipelineTranslator.java +++ b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/PipelineTranslator.java @@ -14,10 +14,10 @@ import org.apache.beam.sdk.values.PValue; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** - /** - * The role of this class is to detect the pipeline mode and to translate the Beam operators to their Spark counterparts. If we have - * a streaming job, this is instantiated as a {@link StreamingPipelineTranslator}. In other - * case, i.e. for a batch job, a {@link BatchPipelineTranslator} is created. Correspondingly, + * {@link Pipeline.PipelineVisitor} that translates the Beam operators to their Spark counterparts. + * It also does the pipeline preparation: mode detection, transforms replacement, classpath preparation. + * If we have a streaming job, it is instantiated as a {@link StreamingPipelineTranslator}. + * If we have a batch job, it is instantiated as a {@link BatchPipelineTranslator}. */ public abstract class PipelineTranslator extends Pipeline.PipelineVisitor.Defaults{ diff --git a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/TransformTranslator.java b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/TransformTranslator.java index ebb8bf8..54b0a85 100644 --- a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/TransformTranslator.java +++ b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/TransformTranslator.java @@ -4,7 +4,7 @@ import org.apache.beam.sdk.transforms.PTransform; public interface TransformTranslator<TransformT extends PTransform> { - /** A translator of a {@link PTransform}. */ + /** Base class for translators of {@link PTransform}. */ void translateTransform(TransformT transform, TranslationContext context); } diff --git a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/TranslationContext.java b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/TranslationContext.java index 341ed49..3dacde4 100644 --- a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/TranslationContext.java +++ b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/TranslationContext.java @@ -1,7 +1,11 @@ package org.apache.beam.runners.spark.structuredstreaming.translation; import org.apache.beam.sdk.runners.AppliedPTransform; +import org.apache.beam.sdk.transforms.PTransform; +/** + * Base class that gives a context for {@link PTransform} translation. + */ public class TranslationContext { private AppliedPTransform<?, ?, ?> currentTransform; diff --git a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/BatchPipelineTranslator.java b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/BatchPipelineTranslator.java index ff92d89..38324c0 100644 --- a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/BatchPipelineTranslator.java +++ b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/BatchPipelineTranslator.java @@ -11,7 +11,9 @@ import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.runners.TransformHierarchy; import org.apache.beam.sdk.transforms.PTransform; -/** {@link Pipeline.PipelineVisitor} for executing a {@link Pipeline} as a Spark batch job. */ +/** {@link PipelineTranslator} for executing a {@link Pipeline} in Spark in batch mode. + * This contains only the components specific to batch: {@link BatchTranslationContext}, + * registry of batch {@link TransformTranslator} and registry lookup code. */ public class BatchPipelineTranslator extends PipelineTranslator { diff --git a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/BatchTranslationContext.java b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/BatchTranslationContext.java index 71ef315..f08e33c 100644 --- a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/BatchTranslationContext.java +++ b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/BatchTranslationContext.java @@ -11,7 +11,7 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.SparkSession; /** - * Keeps track of the {@link Dataset} and the step the translation is in. + * Keeps track of context of the translation. */ public class BatchTranslationContext extends TranslationContext { private final Map<PValue, Dataset<?>> datasets; diff --git a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/streaming/StreamingPipelineTranslator.java b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/streaming/StreamingPipelineTranslator.java index 9303d59..9cbfbed 100644 --- a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/streaming/StreamingPipelineTranslator.java +++ b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/streaming/StreamingPipelineTranslator.java @@ -3,8 +3,13 @@ package org.apache.beam.runners.spark.structuredstreaming.translation.streaming; import org.apache.beam.runners.spark.structuredstreaming.SparkPipelineOptions; import org.apache.beam.runners.spark.structuredstreaming.translation.PipelineTranslator; import org.apache.beam.runners.spark.structuredstreaming.translation.TransformTranslator; +import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.runners.TransformHierarchy; +/** {@link PipelineTranslator} for executing a {@link Pipeline} in Spark in streaming mode. + * This contains only the components specific to streaming: {@link StreamingTranslationContext}, + * registry of batch {@link TransformTranslator} and registry lookup code. */ + public class StreamingPipelineTranslator extends PipelineTranslator { public StreamingPipelineTranslator(SparkPipelineOptions options) { diff --git a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/streaming/StreamingTranslationContext.java b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/streaming/StreamingTranslationContext.java index 460dbf6..f2ee34b 100644 --- a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/streaming/StreamingTranslationContext.java +++ b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/streaming/StreamingTranslationContext.java @@ -2,6 +2,9 @@ package org.apache.beam.runners.spark.structuredstreaming.translation.streaming; import org.apache.beam.runners.spark.structuredstreaming.translation.TranslationContext; +/** + * * Keeps track of context of the translation. + */ public class StreamingTranslationContext extends TranslationContext { }
