This closes #37
Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/b2b5f429 Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/b2b5f429 Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/b2b5f429 Branch: refs/heads/master Commit: b2b5f429fb390a8c6056accdda34607d7d5ac20c Parents: 0442a24 42fc8fc Author: Sela <ans...@paypal.com> Authored: Thu Mar 10 22:51:23 2016 +0200 Committer: Sela <ans...@paypal.com> Committed: Thu Mar 10 22:51:23 2016 +0200 ---------------------------------------------------------------------- runners/spark/.gitignore | 10 + runners/spark/.travis.yml | 22 + runners/spark/README.md | 113 + runners/spark/build-resources/checkstyle.xml | 222 + runners/spark/build-resources/header-file.txt | 14 + runners/spark/pom.xml | 469 ++ .../com/cloudera/dataflow/hadoop/HadoopIO.java | 202 + .../dataflow/hadoop/NullWritableCoder.java | 71 + .../cloudera/dataflow/hadoop/WritableCoder.java | 120 + .../com/cloudera/dataflow/io/ConsoleIO.java | 60 + .../com/cloudera/dataflow/io/CreateStream.java | 66 + .../java/com/cloudera/dataflow/io/KafkaIO.java | 128 + .../dataflow/spark/BroadcastHelper.java | 121 + .../com/cloudera/dataflow/spark/ByteArray.java | 52 + .../cloudera/dataflow/spark/CoderHelpers.java | 185 + .../cloudera/dataflow/spark/DoFnFunction.java | 93 + .../dataflow/spark/EvaluationContext.java | 283 + .../dataflow/spark/EvaluationResult.java | 62 + .../dataflow/spark/MultiDoFnFunction.java | 115 + .../dataflow/spark/ShardNameBuilder.java | 106 + .../dataflow/spark/ShardNameTemplateAware.java | 28 + .../dataflow/spark/ShardNameTemplateHelper.java | 58 + .../dataflow/spark/SparkContextFactory.java | 66 + .../dataflow/spark/SparkPipelineEvaluator.java | 52 + .../dataflow/spark/SparkPipelineOptions.java | 39 + .../spark/SparkPipelineOptionsFactory.java | 27 + .../spark/SparkPipelineOptionsRegistrar.java | 27 + .../dataflow/spark/SparkPipelineRunner.java | 252 + .../spark/SparkPipelineRunnerRegistrar.java | 27 + .../dataflow/spark/SparkPipelineTranslator.java | 27 + .../dataflow/spark/SparkProcessContext.java | 250 + .../dataflow/spark/SparkRuntimeContext.java | 212 + .../spark/TemplatedAvroKeyOutputFormat.java | 40 + .../TemplatedSequenceFileOutputFormat.java | 40 + .../spark/TemplatedTextOutputFormat.java | 40 + .../dataflow/spark/TransformEvaluator.java | 24 + .../dataflow/spark/TransformTranslator.java | 800 +++ .../dataflow/spark/WindowingHelpers.java | 59 + .../spark/aggregators/AggAccumParam.java | 35 + .../spark/aggregators/NamedAggregators.java | 202 + .../SparkStreamingPipelineOptions.java | 40 + .../SparkStreamingPipelineOptionsFactory.java | 27 + .../SparkStreamingPipelineOptionsRegistrar.java | 28 + .../streaming/StreamingEvaluationContext.java | 226 + .../streaming/StreamingTransformTranslator.java | 414 ++ .../StreamingWindowPipelineDetector.java | 100 + ...ataflow.sdk.options.PipelineOptionsRegistrar | 17 + ...dataflow.sdk.runners.PipelineRunnerRegistrar | 16 + .../dataflow/hadoop/WritableCoderTest.java | 42 + .../dataflow/spark/AvroPipelineTest.java | 103 + .../dataflow/spark/CombineGloballyTest.java | 87 + .../dataflow/spark/CombinePerKeyTest.java | 69 + .../com/cloudera/dataflow/spark/DeDupTest.java | 55 + .../cloudera/dataflow/spark/DoFnOutputTest.java | 57 + .../cloudera/dataflow/spark/EmptyInputTest.java | 64 + .../spark/HadoopFileFormatPipelineTest.java | 105 + .../spark/MultiOutputWordCountTest.java | 148 + .../cloudera/dataflow/spark/NumShardsTest.java | 89 + .../dataflow/spark/SerializationTest.java | 183 + .../dataflow/spark/ShardNameBuilderTest.java | 82 + .../dataflow/spark/SideEffectsTest.java | 77 + .../dataflow/spark/SimpleWordCountTest.java | 117 + .../spark/TestSparkPipelineOptionsFactory.java | 34 + .../com/cloudera/dataflow/spark/TfIdfTest.java | 60 + .../dataflow/spark/TransformTranslatorTest.java | 95 + .../dataflow/spark/WindowedWordCountTest.java | 63 + .../spark/streaming/FlattenStreamingTest.java | 84 + .../spark/streaming/KafkaStreamingTest.java | 133 + .../streaming/SimpleStreamingWordCountTest.java | 73 + .../utils/DataflowAssertStreaming.java | 39 + .../streaming/utils/EmbeddedKafkaCluster.java | 314 ++ runners/spark/src/test/resources/person.avsc | 23 + runners/spark/src/test/resources/pg1112.txt | 4853 ++++++++++++++++++ runners/spark/src/test/resources/pg2264.txt | 3667 +++++++++++++ runners/spark/src/test/resources/test_text.txt | 2 + 75 files changed, 16405 insertions(+) ----------------------------------------------------------------------