Jenkins build became unstable: beam_PostCommit_RunnableOnService_GoogleCloudDataflow #105

2016-04-13 Thread Apache Jenkins Server
See 




[21/74] [partial] incubator-beam git commit: Rename com/google/cloud/dataflow->org/apache/beam

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
--
diff --git 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
 
b/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
deleted file mode 100644
index a43e95e..000
--- 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.google.cloud.dataflow.sdk.coders;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.addString;
-import static com.google.cloud.dataflow.sdk.util.Structs.addStringList;
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.SerializableUtils;
-import com.google.cloud.dataflow.sdk.util.StringUtils;
-import com.google.common.collect.Lists;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.io.Serializable;
-import java.util.Collection;
-
-/**
- * An abstract base class for writing a {@link Coder} class that encodes 
itself via Java
- * serialization.
- *
- * To complete an implementation, subclasses must implement {@link 
Coder#encode}
- * and {@link Coder#decode} methods. Anonymous subclasses must furthermore 
override
- * {@link #getEncodingId}.
- *
- * Not to be confused with {@link SerializableCoder} that encodes objects 
that implement the
- * {@link Serializable} interface.
- *
- * @param  the type of elements handled by this coder
- */
-public abstract class CustomCoder extends AtomicCoder
-implements Serializable {
-  @JsonCreator
-  public static CustomCoder of(
-  // N.B. typeId is a required parameter here, since a field named "@type"
-  // is presented to the deserializer as an input.
-  //
-  // If this method did not consume the field, Jackson2 would observe an
-  // unconsumed field and a returned value of a derived type.  So Jackson2
-  // would attempt to update the returned value with the unconsumed field
-  // data, The standard JsonDeserializer does not implement a mechanism for
-  // updating constructed values, so it would throw an exception, causing
-  // deserialization to fail.
-  @JsonProperty(value = "@type", required = false) String typeId,
-  @JsonProperty(value = "encoding_id", required = false) String encodingId,
-  @JsonProperty("type") String type,
-  @JsonProperty("serialized_coder") String serializedCoder) {
-return (CustomCoder) SerializableUtils.deserializeFromByteArray(
-StringUtils.jsonStringToByteArray(serializedCoder),
-type);
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return A thin {@link CloudObject} wrapping of the Java serialization of 
{@code this}.
-   */
-  @Override
-  public CloudObject asCloudObject() {
-// N.B. We use the CustomCoder class, not the derived class, since during
-// deserialization we will be using the CustomCoder's static factory method
-// to construct an instance of the derived class.
-CloudObject result = CloudObject.forClass(CustomCoder.class);
-addString(result, "type", getClass().getName());
-addString(result, "serialized_coder",
-StringUtils.byteArrayToJsonString(
-SerializableUtils.serializeToByteArray(this)));
-
-String encodingId = getEncodingId();
-checkNotNull(encodingId, "Coder.getEncodingId() must not return null.");
-if (!encodingId.isEmpty()) {
-  addString(result, PropertyNames.ENCODING_ID, encodingId);
-}
-
-Collection allowedEncodings = getAllowedEncodings();
-if (!allowedEncodings.isEmpty()) {
-  addStringList(result, PropertyNames.ALLOWED_ENCODINGS, 
Lists.newArrayList(allowedEncodings));
-}
-
-return result;
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @throws NonDeterministicException a {@link CustomCoder} is presumed
-   * 

[43/74] [partial] incubator-beam git commit: Rename com/google/cloud/dataflow->org/apache/beam

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/examples/java/src/main/java/org/apache/beam/examples/cookbook/CombinePerKeyExamples.java
--
diff --git 
a/examples/java/src/main/java/org/apache/beam/examples/cookbook/CombinePerKeyExamples.java
 
b/examples/java/src/main/java/org/apache/beam/examples/cookbook/CombinePerKeyExamples.java
new file mode 100644
index 000..93304eb
--- /dev/null
+++ 
b/examples/java/src/main/java/org/apache/beam/examples/cookbook/CombinePerKeyExamples.java
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.dataflow.examples.cookbook;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.Combine;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * An example that reads the public 'Shakespeare' data, and for each word in
+ * the dataset that is over a given length, generates a string containing the
+ * list of play names in which that word appears, and saves this information
+ * to a bigquery table.
+ *
+ * Concepts: the Combine.perKey transform, which lets you combine the 
values in a
+ * key-grouped Collection, and how to use an Aggregator to track information 
in the
+ * Monitoring UI.
+ *
+ * Note: Before running this example, you must create a BigQuery dataset to 
contain your output
+ * table.
+ *
+ * To execute this pipeline locally, specify general pipeline configuration:
+ * {@code
+ *   --project=YOUR_PROJECT_ID
+ * }
+ * 
+ * and the BigQuery table for the output:
+ * {@code
+ *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
+ * }
+ *
+ * To execute this pipeline using the Dataflow service, specify pipeline 
configuration:
+ * {@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://
+ *   --runner=BlockingDataflowPipelineRunner
+ * }
+ * 
+ * and the BigQuery table for the output:
+ * {@code
+ *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
+ * }
+ *
+ * The BigQuery input table defaults to {@code 
publicdata:samples.shakespeare} and can
+ * be overridden with {@code --input}.
+ */
+public class CombinePerKeyExamples {
+  // Use the shakespeare public BigQuery sample
+  private static final String SHAKESPEARE_TABLE =
+  "publicdata:samples.shakespeare";
+  // We'll track words >= this word length across all plays in the table.
+  private static final int MIN_WORD_LENGTH = 9;
+
+  /**
+   * Examines each row in the input table. If the word is greater than or 
equal to MIN_WORD_LENGTH,
+   * outputs word, play_name.
+   */
+  static class ExtractLargeWordsFn extends DoFn> {
+private final Aggregator smallerWords =
+createAggregator("smallerWords", new Sum.SumLongFn());
+
+@Override
+public void processElement(ProcessContext c){
+  TableRow row = c.element();
+  String playName = (String) row.get("corpus");
+  String word = (String) row.get("word");
+  if (word.length() >= MIN_WORD_LENGTH) {
+c.output(KV.of(word, playName));
+  } else {
+// Track how many smaller words we're not including. This information 
will be
+// 

[51/74] [partial] incubator-beam git commit: Rename com/google/cloud/dataflow->org/apache/beam

2016-04-13 Thread davor
Rename com/google/cloud/dataflow->org/apache/beam

Applied this script:

mkdir -p $1/org/apache/beam;
git mv $1/com/google/cloud/dataflow/* $1/org/apache;
rmdir -p $1/com/google/cloud/dataflow;

To these directories:
  - sdks/java/core/src/{main,test}
  - sdks/java/java8tests/src/test
  - runners/google-cloud-dataflow-java/src/{main,test}
  - contrib/{join-library,hadoop}/src/{main,test}
  - examples/java{,8}/src/{main,test}


Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/0393a791
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/0393a791
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/0393a791

Branch: refs/heads/master
Commit: 0393a7917318baaa1e580259a74bff2c1dcbe6b8
Parents: c4cbbb1
Author: bchambers 
Authored: Wed Apr 13 14:18:40 2016 -0700
Committer: bchambers 
Committed: Wed Apr 13 17:34:42 2016 -0700

--
 .../contrib/hadoop/HadoopFileSource.java|  486 ---
 .../dataflow/contrib/hadoop/WritableCoder.java  |  111 -
 .../apache/contrib/hadoop/HadoopFileSource.java |  486 +++
 .../apache/contrib/hadoop/WritableCoder.java|  111 +
 .../contrib/hadoop/HadoopFileSourceTest.java|  190 --
 .../contrib/hadoop/WritableCoderTest.java   |   37 -
 .../contrib/hadoop/HadoopFileSourceTest.java|  190 ++
 .../contrib/hadoop/WritableCoderTest.java   |   37 +
 .../dataflow/contrib/joinlibrary/Join.java  |  186 --
 .../org/apache/contrib/joinlibrary/Join.java|  186 ++
 .../contrib/joinlibrary/InnerJoinTest.java  |  143 -
 .../contrib/joinlibrary/OuterLeftJoinTest.java  |  153 -
 .../contrib/joinlibrary/OuterRightJoinTest.java |  153 -
 .../contrib/joinlibrary/InnerJoinTest.java  |  143 +
 .../contrib/joinlibrary/OuterLeftJoinTest.java  |  153 +
 .../contrib/joinlibrary/OuterRightJoinTest.java |  153 +
 .../dataflow/examples/DebuggingWordCount.java   |  199 --
 .../dataflow/examples/MinimalWordCount.java |  118 -
 .../dataflow/examples/WindowedWordCount.java|  270 --
 .../cloud/dataflow/examples/WordCount.java  |  207 --
 .../examples/common/DataflowExampleOptions.java |   37 -
 .../examples/common/DataflowExampleUtils.java   |  488 ---
 .../common/ExampleBigQueryTableOptions.java |   56 -
 ...xamplePubsubTopicAndSubscriptionOptions.java |   47 -
 .../common/ExamplePubsubTopicOptions.java   |   47 -
 .../examples/common/PubsubFileInjector.java |  154 -
 .../examples/complete/AutoComplete.java |  517 ---
 .../cloud/dataflow/examples/complete/README.md  |   44 -
 .../examples/complete/StreamingWordExtract.java |  164 -
 .../cloud/dataflow/examples/complete/TfIdf.java |  432 ---
 .../examples/complete/TopWikipediaSessions.java |  224 --
 .../examples/complete/TrafficMaxLaneFlow.java   |  426 ---
 .../examples/complete/TrafficRoutes.java|  460 ---
 .../examples/cookbook/BigQueryTornadoes.java|  180 --
 .../cookbook/CombinePerKeyExamples.java |  224 --
 .../examples/cookbook/DatastoreWordCount.java   |  270 --
 .../examples/cookbook/DeDupExample.java |  101 -
 .../examples/cookbook/FilterExamples.java   |  267 --
 .../examples/cookbook/JoinExamples.java |  186 --
 .../examples/cookbook/MaxPerKeyExamples.java|  174 -
 .../cloud/dataflow/examples/cookbook/README.md  |   55 -
 .../examples/cookbook/TriggerExample.java   |  565 
 .../beam/examples/DebuggingWordCount.java   |  199 ++
 .../apache/beam/examples/MinimalWordCount.java  |  118 +
 .../apache/beam/examples/WindowedWordCount.java |  270 ++
 .../org/apache/beam/examples/WordCount.java |  207 ++
 .../examples/common/DataflowExampleOptions.java |   37 +
 .../examples/common/DataflowExampleUtils.java   |  488 +++
 .../common/ExampleBigQueryTableOptions.java |   56 +
 ...xamplePubsubTopicAndSubscriptionOptions.java |   47 +
 .../common/ExamplePubsubTopicOptions.java   |   47 +
 .../examples/common/PubsubFileInjector.java |  154 +
 .../beam/examples/complete/AutoComplete.java|  517 +++
 .../org/apache/beam/examples/complete/README.md |   44 +
 .../examples/complete/StreamingWordExtract.java |  164 +
 .../apache/beam/examples/complete/TfIdf.java|  432 +++
 .../examples/complete/TopWikipediaSessions.java |  224 ++
 .../examples/complete/TrafficMaxLaneFlow.java   |  426 +++
 .../beam/examples/complete/TrafficRoutes.java   |  460 +++
 .../examples/cookbook/BigQueryTornadoes.java|  180 ++
 .../cookbook/CombinePerKeyExamples.java |  224 ++
 .../examples/cookbook/DatastoreWordCount.java   |  270 ++
 .../beam/examples/cookbook/DeDupExample.java|  101 +
 .../beam/examples/cookbook/FilterExamples.java  |  267 ++
 .../beam/examples/cookbook/JoinExamples.java|  186 ++
 .../examples/cookbook/MaxPerKeyExamples.java|  174 +
 .../org/apache/beam/examples/cookbook/README.md |  

[jira] [Commented] (BEAM-78) Rename Dataflow to Beam

2016-04-13 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-78?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15240590#comment-15240590
 ] 

ASF GitHub Bot commented on BEAM-78:


Github user asfgit closed the pull request at:

https://github.com/apache/incubator-beam/pull/176


> Rename Dataflow to Beam 
> 
>
> Key: BEAM-78
> URL: https://issues.apache.org/jira/browse/BEAM-78
> Project: Beam
>  Issue Type: Task
>  Components: sdk-java-core
>Reporter: Frances Perry
>Assignee: Jean-Baptiste Onofré
>Priority: Blocker
>
> The initial code drop contains code that uses "Dataflow" to refer to the 
> SDK/model and Cloud Dataflow service. The first usage needs to be swapped to 
> Beam.
> This includes:
> - mentions throughout the javadoc
> - packages of classes that belong to the java sdk core
> And does not include:
> - the DataflowPipelineRunner
> We plan to postpone this rename until other code drops have been integrated 
> into the repository, and we have completed the refactoring that will separate 
> these two uses.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[40/74] [partial] incubator-beam git commit: Rename com/google/cloud/dataflow->org/apache/beam

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/Injector.java
--
diff --git 
a/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/Injector.java
 
b/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/Injector.java
deleted file mode 100644
index 18ff0a3..000
--- 
a/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/Injector.java
+++ /dev/null
@@ -1,416 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.google.cloud.dataflow.examples.complete.game.injector;
-
-import com.google.api.services.pubsub.Pubsub;
-import com.google.api.services.pubsub.model.PublishRequest;
-import com.google.api.services.pubsub.model.PubsubMessage;
-import com.google.common.collect.ImmutableMap;
-
-import org.joda.time.DateTimeZone;
-import org.joda.time.format.DateTimeFormat;
-import org.joda.time.format.DateTimeFormatter;
-
-import java.io.BufferedOutputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.io.PrintWriter;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Random;
-import java.util.TimeZone;
-
-
-/**
- * This is a generator that simulates usage data from a mobile game, and 
either publishes the data
- * to a pubsub topic or writes it to a file.
- *
- *  The general model used by the generator is the following. There is a 
set of teams with team
- * members. Each member is scoring points for their team. After some period, a 
team will dissolve
- * and a new one will be created in its place. There is also a set of 
'Robots', or spammer users.
- * They hop from team to team. The robots are set to have a higher 'click 
rate' (generate more
- * events) than the regular team members.
- *
- *  Each generated line of data has the following form:
- * username,teamname,score,timestamp_in_ms,readable_time
- * e.g.:
- * user2_AsparagusPig,AsparagusPig,10,1445230923951,2015-11-02 09:09:28.224
- *
- *  The Injector writes either to a PubSub topic, or a file. It will use 
the PubSub topic if
- * specified. It takes the following arguments:
- * {@code Injector project-name (topic-name|none) (filename|none)}.
- *
- *  To run the Injector in the mode where it publishes to PubSub, you will 
need to authenticate
- * locally using project-based service account credentials to avoid running 
over PubSub
- * quota.
- * See 
https://developers.google.com/identity/protocols/application-default-credentials
- * for more information on using service account credentials. Set the 
GOOGLE_APPLICATION_CREDENTIALS
- * environment variable to point to your downloaded service account 
credentials before starting the
- * program, e.g.:
- * {@code export 
GOOGLE_APPLICATION_CREDENTIALS=/path/to/your/credentials-key.json}.
- * If you do not do this, then your injector will only run for a few minutes 
on your
- * 'user account' credentials before you will start to see quota error 
messages like:
- * "Request throttled due to user QPS limit being reached", and see this 
exception:
- * ".com.google.api.client.googleapis.json.GoogleJsonResponseException: 429 
Too Many Requests".
- * Once you've set up your credentials, run the Injector like this":
-  * {@code
- * Injector   none
- * }
- * 
- * The pubsub topic will be created if it does not exist.
- *
- *  To run the injector in write-to-file-mode, set the topic name to "none" 
and specify the
- * filename:
- * {@code
- * Injector  none 
- * }
- * 
- */
-class Injector {
-  private static Pubsub pubsub;
-  private static Random random = new Random();
-  private static String topic;
-  private static String project;
-  private static final String TIMESTAMP_ATTRIBUTE = "timestamp_ms";
-
-  // QPS ranges from 800 to 1000.
-  private static final int MIN_QPS = 800;
-  private static final int QPS_RANGE = 200;
-  // How long to sleep, in ms, between creation of the threads that make API 
requests to PubSub.
-  private static final int 

[57/74] incubator-beam git commit: Initial update of imports

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/CustomCoderTest.java
--
diff --git 
a/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/CustomCoderTest.java 
b/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/CustomCoderTest.java
index 4997d9b..10259d9 100644
--- 
a/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/CustomCoderTest.java
+++ 
b/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/CustomCoderTest.java
@@ -15,12 +15,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.google.cloud.dataflow.sdk.coders;
+package org.apache.beam.sdk.coders;
 
-import com.google.cloud.dataflow.sdk.testing.CoderProperties;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.SerializableUtils;
-import com.google.cloud.dataflow.sdk.values.KV;
+import org.apache.beam.sdk.testing.CoderProperties;
+import org.apache.beam.sdk.util.CoderUtils;
+import org.apache.beam.sdk.util.SerializableUtils;
+import org.apache.beam.sdk.values.KV;
 
 import org.junit.Assert;
 import org.junit.Rule;

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/DefaultCoderTest.java
--
diff --git 
a/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/DefaultCoderTest.java 
b/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/DefaultCoderTest.java
index 44571c7..1faf58f 100644
--- 
a/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/DefaultCoderTest.java
+++ 
b/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/DefaultCoderTest.java
@@ -15,14 +15,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.google.cloud.dataflow.sdk.coders;
+package org.apache.beam.sdk.coders;
 
 import static com.google.common.base.Preconditions.checkArgument;
+
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.instanceOf;
 import static org.junit.Assert.assertThat;
 
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+import org.apache.beam.sdk.values.TypeDescriptor;
 
 import org.junit.Before;
 import org.junit.Rule;

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/DelegateCoderTest.java
--
diff --git 
a/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/DelegateCoderTest.java
 
b/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/DelegateCoderTest.java
index 9494a56..b40457c 100644
--- 
a/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/DelegateCoderTest.java
+++ 
b/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/DelegateCoderTest.java
@@ -15,9 +15,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.google.cloud.dataflow.sdk.coders;
+package org.apache.beam.sdk.coders;
+
+import org.apache.beam.sdk.testing.CoderProperties;
 
-import com.google.cloud.dataflow.sdk.testing.CoderProperties;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
 

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/DoubleCoderTest.java
--
diff --git 
a/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/DoubleCoderTest.java 
b/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/DoubleCoderTest.java
index d978c3c..73fb464 100644
--- 
a/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/DoubleCoderTest.java
+++ 
b/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/DoubleCoderTest.java
@@ -15,10 +15,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.google.cloud.dataflow.sdk.coders;
+package org.apache.beam.sdk.coders;
 
-import com.google.cloud.dataflow.sdk.testing.CoderProperties;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import org.apache.beam.sdk.testing.CoderProperties;
+import org.apache.beam.sdk.util.CoderUtils;
 
 import org.junit.Rule;
 import org.junit.Test;
@@ -62,7 +62,7 @@ public class DoubleCoderTest {
 
   /**
* Generated data to check that the wire format has not changed. To 
regenerate, see
-   * {@link com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings}.
+   * {@link org.apache.beam.sdk.coders.PrintBase64Encodings}.
*/
   private static final List TEST_ENCODINGS = Arrays.asList(
   "AAA",


[27/74] [partial] incubator-beam git commit: Rename com/google/cloud/dataflow->org/apache/beam

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/runners/google-cloud-dataflow-java/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
--
diff --git 
a/runners/google-cloud-dataflow-java/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
 
b/runners/google-cloud-dataflow-java/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
deleted file mode 100644
index dd1b3c8..000
--- 
a/runners/google-cloud-dataflow-java/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ /dev/null
@@ -1,890 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.google.cloud.dataflow.sdk.runners;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.addObject;
-import static com.google.cloud.dataflow.sdk.util.Structs.getDictionary;
-import static com.google.cloud.dataflow.sdk.util.Structs.getString;
-import static org.hamcrest.Matchers.containsString;
-import static org.hamcrest.Matchers.hasKey;
-import static org.hamcrest.core.IsInstanceOf.instanceOf;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Matchers.any;
-import static org.mockito.Matchers.anyString;
-import static org.mockito.Matchers.argThat;
-import static org.mockito.Matchers.eq;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
-
-import com.google.api.services.dataflow.Dataflow;
-import com.google.api.services.dataflow.model.DataflowPackage;
-import com.google.api.services.dataflow.model.Job;
-import com.google.api.services.dataflow.model.Step;
-import com.google.api.services.dataflow.model.WorkerPool;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
-import com.google.cloud.dataflow.sdk.coders.VoidCoder;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineWorkerPoolOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import 
com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
-import com.google.cloud.dataflow.sdk.transforms.Count;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.transforms.View;
-import com.google.cloud.dataflow.sdk.transforms.display.DisplayData;
-import com.google.cloud.dataflow.sdk.util.GcsUtil;
-import com.google.cloud.dataflow.sdk.util.OutputReference;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.Structs;
-import com.google.cloud.dataflow.sdk.util.TestCredential;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
-import com.google.cloud.dataflow.sdk.values.PDone;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Iterables;
-
-import org.hamcrest.Matchers;
-import org.junit.Assert;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.internal.matchers.ThrowableMessageMatcher;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-import org.mockito.ArgumentMatcher;
-import org.mockito.invocation.InvocationOnMock;
-import org.mockito.stubbing.Answer;
-
-import java.io.IOException;

[64/74] incubator-beam git commit: Initial update of imports

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextIO.java
--
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextIO.java 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextIO.java
index ed25926..b95fe15 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextIO.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextIO.java
@@ -15,23 +15,24 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.google.cloud.dataflow.sdk.io;
+package org.apache.beam.sdk.io;
 
 import static com.google.common.base.Preconditions.checkState;
 
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.Coder.Context;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.coders.VoidCoder;
-import com.google.cloud.dataflow.sdk.io.Read.Bounded;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.MimeTypes;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PDone;
-import com.google.cloud.dataflow.sdk.values.PInput;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.Coder.Context;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.coders.VoidCoder;
+import org.apache.beam.sdk.io.Read.Bounded;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.runners.DirectPipelineRunner;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.util.IOChannelUtils;
+import org.apache.beam.sdk.util.MimeTypes;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PDone;
+import org.apache.beam.sdk.values.PInput;
+
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.protobuf.ByteString;
@@ -310,20 +311,20 @@ public class TextIO {
 final Bounded read;
 switch(compressionType) {
   case UNCOMPRESSED:
-read = com.google.cloud.dataflow.sdk.io.Read.from(
+read = org.apache.beam.sdk.io.Read.from(
 new TextSource(filepattern, coder));
 break;
   case AUTO:
-read = com.google.cloud.dataflow.sdk.io.Read.from(
+read = org.apache.beam.sdk.io.Read.from(
 CompressedSource.from(new TextSource(filepattern, coder)));
 break;
   case BZIP2:
-read = com.google.cloud.dataflow.sdk.io.Read.from(
+read = org.apache.beam.sdk.io.Read.from(
 CompressedSource.from(new TextSource(filepattern, coder))
 
.withDecompression(CompressedSource.CompressionMode.BZIP2));
 break;
   case GZIP:
-read = com.google.cloud.dataflow.sdk.io.Read.from(
+read = org.apache.beam.sdk.io.Read.from(
 CompressedSource.from(new TextSource(filepattern, coder))
 
.withDecompression(CompressedSource.CompressionMode.GZIP));
 break;
@@ -625,7 +626,7 @@ public class TextIO {
 // Note that custom sinks currently do not expose sharding controls.
 // Thus pipeline runner writers need to individually add support 
internally to
 // apply user requested sharding limits.
-return input.apply("Write", com.google.cloud.dataflow.sdk.io.Write.to(
+return input.apply("Write", org.apache.beam.sdk.io.Write.to(
 new TextSink<>(
 filenamePrefix, filenameSuffix, shardTemplate, coder)));
   }
@@ -769,7 +770,7 @@ public class TextIO {
 }
 
 /**
- * A {@link 
com.google.cloud.dataflow.sdk.io.FileBasedSource.FileBasedReader 
FileBasedReader}
+ * A {@link org.apache.beam.sdk.io.FileBasedSource.FileBasedReader 
FileBasedReader}
  * which can decode records delimited by new line characters.
  *
  * See {@link TextSource} for further details.
@@ -944,7 +945,7 @@ public class TextIO {
 }
 
 /**
- * A {@link 
com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriteOperation
+ * A {@link org.apache.beam.sdk.io.FileBasedSink.FileBasedWriteOperation
  * FileBasedWriteOperation} for text files.
  */
 private static class TextWriteOperation extends 
FileBasedWriteOperation {
@@ -962,7 +963,7 @@ public class TextIO {
 }
 
 /**
- * A {@link com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriter 
FileBasedWriter}
+ * A {@link 

[18/74] [partial] incubator-beam git commit: Rename com/google/cloud/dataflow->org/apache/beam

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
--
diff --git 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java 
b/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
deleted file mode 100644
index c37d6c7..000
--- 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ /dev/null
@@ -1,1685 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.google.cloud.dataflow.sdk.io;
-
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.api.client.json.JsonFactory;
-import com.google.api.services.bigquery.Bigquery;
-import com.google.api.services.bigquery.model.JobConfigurationLoad;
-import com.google.api.services.bigquery.model.QueryRequest;
-import com.google.api.services.bigquery.model.TableReference;
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.api.services.bigquery.model.TableSchema;
-import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.Coder.Context;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.StandardCoder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.coders.TableRowJsonCoder;
-import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
-import com.google.cloud.dataflow.sdk.coders.VoidCoder;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.WriteDisposition;
-import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
-import com.google.cloud.dataflow.sdk.options.GcpOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.BigQueryServices;
-import com.google.cloud.dataflow.sdk.util.BigQueryServices.LoadService;
-import com.google.cloud.dataflow.sdk.util.BigQueryServicesImpl;
-import com.google.cloud.dataflow.sdk.util.BigQueryTableInserter;
-import com.google.cloud.dataflow.sdk.util.BigQueryTableRowIterator;
-import com.google.cloud.dataflow.sdk.util.MimeTypes;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.Reshuffle;
-import com.google.cloud.dataflow.sdk.util.SystemDoFnInternal;
-import com.google.cloud.dataflow.sdk.util.Transport;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
-import com.google.cloud.dataflow.sdk.values.PDone;
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.cloud.hadoop.util.ApiErrorExtractor;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.MoreObjects;
-import com.google.common.base.Strings;
-import com.google.common.collect.Lists;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import 

[14/74] [partial] incubator-beam git commit: Rename com/google/cloud/dataflow->org/apache/beam

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
--
diff --git 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java 
b/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
deleted file mode 100644
index deed9ab..000
--- 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ /dev/null
@@ -1,1048 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.google.cloud.dataflow.sdk.io;
-
-import static com.google.common.base.MoreObjects.firstNonNull;
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.api.client.util.Clock;
-import com.google.api.client.util.DateTime;
-import com.google.api.services.pubsub.Pubsub;
-import com.google.api.services.pubsub.model.AcknowledgeRequest;
-import com.google.api.services.pubsub.model.PublishRequest;
-import com.google.api.services.pubsub.model.PubsubMessage;
-import com.google.api.services.pubsub.model.PullRequest;
-import com.google.api.services.pubsub.model.PullResponse;
-import com.google.api.services.pubsub.model.ReceivedMessage;
-import com.google.api.services.pubsub.model.Subscription;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.coders.VoidCoder;
-import com.google.cloud.dataflow.sdk.options.PubsubOptions;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.Transport;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
-import com.google.cloud.dataflow.sdk.values.PDone;
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Throwables;
-import com.google.common.collect.ImmutableMap;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import javax.annotation.Nullable;
-
-/**
- * Read and Write {@link PTransform}s for Cloud Pub/Sub streams. These 
transforms create
- * and consume unbounded {@link PCollection PCollections}.
- *
- * Permissions
- * Permission requirements depend on the {@link PipelineRunner} that is 
used to execute the
- * Dataflow job. Please refer to the documentation of corresponding
- * {@link PipelineRunner PipelineRunners} for more details.
- */
-public class PubsubIO {
-  private static final Logger LOG = LoggerFactory.getLogger(PubsubIO.class);
-
-  /** The default {@link Coder} used to translate to/from Cloud Pub/Sub 
messages. */
-  public static final Coder DEFAULT_PUBSUB_CODER = 
StringUtf8Coder.of();
-
-  /**
-   * Project IDs must contain 6-63 lowercase letters, digits, or dashes.
-   * IDs must start with a letter and may not end with a dash.
-   * This regex isn't exact - this allows for patterns that would be rejected 
by
-   * the service, but this is sufficient for basic parsing of table references.
-   */
-  private static final Pattern PROJECT_ID_REGEXP =
-  Pattern.compile("[a-z][-a-z0-9:.]{4,61}[a-z0-9]");
-
-  private static final Pattern SUBSCRIPTION_REGEXP =
-  Pattern.compile("projects/([^/]+)/subscriptions/(.+)");
-
-  private static final Pattern 

[60/74] incubator-beam git commit: Initial update of imports

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/windowing/Trigger.java
--
diff --git 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/windowing/Trigger.java
 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/windowing/Trigger.java
index 7c86d04..e97d3bd 100644
--- 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/windowing/Trigger.java
+++ 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/windowing/Trigger.java
@@ -15,13 +15,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.google.cloud.dataflow.sdk.transforms.windowing;
+package org.apache.beam.sdk.transforms.windowing;
 
 import org.apache.beam.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
-import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
-import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
+import org.apache.beam.sdk.util.ExecutableTrigger;
+import org.apache.beam.sdk.util.TimeDomain;
+import org.apache.beam.sdk.util.state.MergingStateAccessor;
+import org.apache.beam.sdk.util.state.StateAccessor;
+
 import com.google.common.base.Joiner;
 
 import org.joda.time.Instant;
@@ -39,7 +40,7 @@ import javax.annotation.Nullable;
  * {@link WindowFn}, and then passed to the associated {@code Trigger} to 
determine if the
  * {@code Window}s contents should be output.
  *
- * See {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey} and 
{@link Window}
+ * See {@link org.apache.beam.sdk.transforms.GroupByKey} and {@link Window}
  * for more information about how grouping with windows works.
  *
  * The elements that are assigned to a window since the last time it was 
fired (or since the

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/windowing/TriggerBuilder.java
--
diff --git 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/windowing/TriggerBuilder.java
 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/windowing/TriggerBuilder.java
index 017fd87..e8bd52b 100644
--- 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/windowing/TriggerBuilder.java
+++ 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/windowing/TriggerBuilder.java
@@ -15,7 +15,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.google.cloud.dataflow.sdk.transforms.windowing;
+package org.apache.beam.sdk.transforms.windowing;
 
 /**
  * Anything that can be used to create an instance of a {@code Trigger} 
implements this interface.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/windowing/Window.java
--
diff --git 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/windowing/Window.java
 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/windowing/Window.java
index e643339..2c162f2 100644
--- 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/windowing/Window.java
+++ 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/windowing/Window.java
@@ -15,19 +15,19 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.google.cloud.dataflow.sdk.transforms.windowing;
+package org.apache.beam.sdk.transforms.windowing;
 
 import org.apache.beam.sdk.annotations.Experimental;
 import org.apache.beam.sdk.annotations.Experimental.Kind;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
-import com.google.cloud.dataflow.sdk.values.PCollection;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.Coder.NonDeterministicException;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.GroupByKey;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.util.WindowingStrategy.AccumulationMode;
+import org.apache.beam.sdk.values.PCollection;
 
 import 

[35/74] [partial] incubator-beam git commit: Rename com/google/cloud/dataflow->org/apache/beam

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/runners/google-cloud-dataflow-java/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java
--
diff --git 
a/runners/google-cloud-dataflow-java/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java
 
b/runners/google-cloud-dataflow-java/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java
deleted file mode 100644
index 8f8c653..000
--- 
a/runners/google-cloud-dataflow-java/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.google.cloud.dataflow.sdk.runners;
-
-import org.apache.beam.sdk.annotations.Experimental;
-
-import com.google.api.services.dataflow.model.Environment;
-
-/**
- * An instance of this class can be passed to the
- * {@link DataflowPipelineRunner} to add user defined hooks to be
- * invoked at various times during pipeline execution.
- */
-@Experimental
-public class DataflowPipelineRunnerHooks {
-  /**
-   * Allows the user to modify the environment of their job before their job 
is submitted
-   * to the service for execution.
-   *
-   * @param environment The environment of the job. Users can make change to 
this instance in order
-   * to change the environment with which their job executes on the 
service.
-   */
-  public void modifyEnvironmentBeforeSubmission(Environment environment) {}
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/runners/google-cloud-dataflow-java/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
--
diff --git 
a/runners/google-cloud-dataflow-java/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
 
b/runners/google-cloud-dataflow-java/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
deleted file mode 100644
index fa7067e..000
--- 
a/runners/google-cloud-dataflow-java/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ /dev/null
@@ -1,1100 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.google.cloud.dataflow.sdk.runners;
-
-import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
-import static 
com.google.cloud.dataflow.sdk.util.SerializableUtils.serializeToByteArray;
-import static 
com.google.cloud.dataflow.sdk.util.StringUtils.byteArrayToJsonString;
-import static 
com.google.cloud.dataflow.sdk.util.StringUtils.jsonStringToByteArray;
-import static com.google.cloud.dataflow.sdk.util.Structs.addBoolean;
-import static com.google.cloud.dataflow.sdk.util.Structs.addDictionary;
-import static com.google.cloud.dataflow.sdk.util.Structs.addList;
-import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
-import static com.google.cloud.dataflow.sdk.util.Structs.addObject;
-import static com.google.cloud.dataflow.sdk.util.Structs.addString;
-import static com.google.cloud.dataflow.sdk.util.Structs.getString;
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.api.services.dataflow.model.AutoscalingSettings;
-import com.google.api.services.dataflow.model.DataflowPackage;

[32/74] [partial] incubator-beam git commit: Rename com/google/cloud/dataflow->org/apache/beam

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/sdk/runners/DataflowPipelineRunner.java
--
diff --git 
a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/sdk/runners/DataflowPipelineRunner.java
 
b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/sdk/runners/DataflowPipelineRunner.java
new file mode 100644
index 000..2818251
--- /dev/null
+++ 
b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/sdk/runners/DataflowPipelineRunner.java
@@ -0,0 +1,3009 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.dataflow.sdk.runners;
+
+import static 
com.google.cloud.dataflow.sdk.util.StringUtils.approximatePTransformName;
+import static 
com.google.cloud.dataflow.sdk.util.StringUtils.approximateSimpleName;
+import static 
com.google.cloud.dataflow.sdk.util.WindowedValue.valueInEmptyWindows;
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkState;
+
+import org.apache.beam.sdk.annotations.Experimental;
+
+import com.google.api.client.googleapis.json.GoogleJsonResponseException;
+import com.google.api.services.clouddebugger.v2.Clouddebugger;
+import com.google.api.services.clouddebugger.v2.model.Debuggee;
+import com.google.api.services.clouddebugger.v2.model.RegisterDebuggeeRequest;
+import com.google.api.services.clouddebugger.v2.model.RegisterDebuggeeResponse;
+import com.google.api.services.dataflow.Dataflow;
+import com.google.api.services.dataflow.model.DataflowPackage;
+import com.google.api.services.dataflow.model.Job;
+import com.google.api.services.dataflow.model.ListJobsResponse;
+import com.google.api.services.dataflow.model.WorkerPool;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
+import com.google.cloud.dataflow.sdk.PipelineResult.State;
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.ListCoder;
+import com.google.cloud.dataflow.sdk.coders.MapCoder;
+import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
+import com.google.cloud.dataflow.sdk.coders.StandardCoder;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
+import com.google.cloud.dataflow.sdk.io.AvroIO;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.FileBasedSink;
+import com.google.cloud.dataflow.sdk.io.PubsubIO;
+import com.google.cloud.dataflow.sdk.io.Read;
+import com.google.cloud.dataflow.sdk.io.ShardNameTemplate;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.io.UnboundedSource;
+import com.google.cloud.dataflow.sdk.io.Write;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineWorkerPoolOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
+import com.google.cloud.dataflow.sdk.options.StreamingOptions;
+import 
com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.JobSpecification;
+import 
com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TransformTranslator;
+import 
com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
+import com.google.cloud.dataflow.sdk.runners.dataflow.AssignWindows;
+import 

[02/74] [partial] incubator-beam git commit: Rename com/google/cloud/dataflow->org/apache/beam

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java
--
diff --git 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java
 
b/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java
deleted file mode 100644
index 689c387..000
--- 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java
+++ /dev/null
@@ -1,1181 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.google.cloud.dataflow.sdk.testing;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.ListCoder;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.UserCodeException;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.common.base.MoreObjects;
-
-import org.hamcrest.BaseMatcher;
-import org.hamcrest.Description;
-import org.hamcrest.Matcher;
-import org.hamcrest.Matchers;
-
-import java.io.Serializable;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.List;
-
-import javax.annotation.Nullable;
-
-/**
- * Static class for building and using {@link SerializableMatcher} instances.
- *
- * Most matchers are wrappers for hamcrest's {@link Matchers}. Please be 
familiar with the
- * documentation there. Values retained by a {@link SerializableMatcher} are 
required to be
- * serializable, either via Java serialization or via a provided {@link Coder}.
- *
- * The following matchers are novel to Dataflow:
- * 
- * {@link #kvWithKey} for matching just the key of a {@link KV}.
- * {@link #kvWithValue} for matching just the value of a {@link KV}.
- * {@link #kv} for matching the key and value of a {@link KV}.
- * 
- *
- * For example, to match a group from
- * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}, which has type
- * {@code KV} for some {@code K} and {@code V} and where the 
order of the iterable
- * is undefined, use a matcher like
- * {@code kv(equalTo("some key"), containsInAnyOrder(1, 2, 3))}.
- */
-class SerializableMatchers implements Serializable {
-
-  // Serializable only because of capture by anonymous inner classes
-  private SerializableMatchers() { } // not instantiable
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link 
Matchers#allOf(Iterable)}.
-   */
-  public static  SerializableMatcher
-  allOf(Iterable serializableMatchers) {
-
-@SuppressWarnings({"rawtypes", "unchecked"}) // safe covariant cast
-final Iterable matchers = (Iterable) 
serializableMatchers;
-
-return fromSupplier(new SerializableSupplier() {
-  @Override
-  public Matcher get() {
-return Matchers.allOf(matchers);
-  }
-});
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link 
Matchers#allOf(Matcher[])}.
-   */
-  @SafeVarargs
-  public static  SerializableMatcher allOf(final 
SerializableMatcher... matchers) {
-return fromSupplier(new SerializableSupplier() {
-  @Override
-  public Matcher get() {
-return Matchers.allOf(matchers);
-  }
-});
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link 
Matchers#anyOf(Iterable)}.
-   */
-  public static  SerializableMatcher
-  anyOf(Iterable serializableMatchers) {
-
-@SuppressWarnings({"rawtypes", "unchecked"}) // safe covariant cast
-final Iterable matchers = (Iterable) 
serializableMatchers;
-
-return fromSupplier(new SerializableSupplier() {
-  @Override
-  public Matcher get() {
-return Matchers.anyOf(matchers);
-  }
-});
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link 
Matchers#anyOf(Matcher[])}.
-   */
-  @SafeVarargs
-  public static  SerializableMatcher anyOf(final 
SerializableMatcher... matchers) {
-return 

[34/74] [partial] incubator-beam git commit: Rename com/google/cloud/dataflow->org/apache/beam

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/runners/google-cloud-dataflow-java/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
--
diff --git 
a/runners/google-cloud-dataflow-java/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
 
b/runners/google-cloud-dataflow-java/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
deleted file mode 100644
index e961066..000
--- 
a/runners/google-cloud-dataflow-java/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
+++ /dev/null
@@ -1,256 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.google.cloud.dataflow.sdk.testing;
-
-import com.google.api.services.dataflow.model.JobMessage;
-import com.google.api.services.dataflow.model.JobMetrics;
-import com.google.api.services.dataflow.model.MetricUpdate;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.PipelineResult.State;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.DataflowJobExecutionException;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineJob;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
-import com.google.cloud.dataflow.sdk.util.MonitoringUtil.JobMessagesHandler;
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.cloud.dataflow.sdk.values.POutput;
-import com.google.common.base.Optional;
-import com.google.common.base.Throwables;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.math.BigDecimal;
-import java.util.List;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.Future;
-import java.util.concurrent.TimeUnit;
-
-/**
- * {@link TestDataflowPipelineRunner} is a pipeline runner that wraps a
- * {@link DataflowPipelineRunner} when running tests against the {@link 
TestPipeline}.
- *
- * @see TestPipeline
- */
-public class TestDataflowPipelineRunner extends 
PipelineRunner {
-  private static final String TENTATIVE_COUNTER = "tentative";
-  private static final Logger LOG = 
LoggerFactory.getLogger(TestDataflowPipelineRunner.class);
-
-  private final TestDataflowPipelineOptions options;
-  private final DataflowPipelineRunner runner;
-  private int expectedNumberOfAssertions = 0;
-
-  TestDataflowPipelineRunner(TestDataflowPipelineOptions options) {
-this.options = options;
-this.runner = DataflowPipelineRunner.fromOptions(options);
-  }
-
-  /**
-   * Constructs a runner from the provided options.
-   */
-  public static TestDataflowPipelineRunner fromOptions(
-  PipelineOptions options) {
-TestDataflowPipelineOptions dataflowOptions = 
options.as(TestDataflowPipelineOptions.class);
-
-return new TestDataflowPipelineRunner(dataflowOptions);
-  }
-
-  @Override
-  public DataflowPipelineJob run(Pipeline pipeline) {
-return run(pipeline, runner);
-  }
-
-  DataflowPipelineJob run(Pipeline pipeline, DataflowPipelineRunner runner) {
-
-final DataflowPipelineJob job;
-try {
-  job = runner.run(pipeline);
-} catch (DataflowJobExecutionException ex) {
-  throw new IllegalStateException("The dataflow failed.");
-}
-
-LOG.info("Running Dataflow job {} with {} expected assertions.",
-job.getJobId(), expectedNumberOfAssertions);
-
-CancelWorkflowOnError messageHandler = new CancelWorkflowOnError(
-job, new MonitoringUtil.PrintHandler(options.getJobMessageOutput()));
-
-try {
-  final Optional result;
-
-  if (options.isStreaming()) {
-Future resultFuture = 
options.getExecutorService().submit(
-new Callable() {
-  @Override
-  public Optional call() throws Exception {
-try {
-  for (;;) {
-

[66/74] incubator-beam git commit: Initial update of imports

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/SparkRuntimeContext.java
--
diff --git 
a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/SparkRuntimeContext.java
 
b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/SparkRuntimeContext.java
index bf618c4..ea125de 100644
--- 
a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/SparkRuntimeContext.java
+++ 
b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/SparkRuntimeContext.java
@@ -26,23 +26,21 @@ import java.util.Map;
 
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.ObjectMapper;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.AggregatorValues;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.Combine;
-import com.google.cloud.dataflow.sdk.transforms.Max;
-import com.google.cloud.dataflow.sdk.transforms.Min;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 import com.google.common.collect.ImmutableList;
-
 import org.apache.beam.runners.spark.aggregators.AggAccumParam;
 import org.apache.beam.runners.spark.aggregators.NamedAggregators;
-
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.coders.CannotProvideCoderException;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.CoderRegistry;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.runners.AggregatorValues;
+import org.apache.beam.sdk.transforms.Aggregator;
+import org.apache.beam.sdk.transforms.Combine;
+import org.apache.beam.sdk.transforms.Max;
+import org.apache.beam.sdk.transforms.Min;
+import org.apache.beam.sdk.transforms.Sum;
+import org.apache.beam.sdk.values.TypeDescriptor;
 import org.apache.spark.Accumulator;
 import org.apache.spark.api.java.JavaSparkContext;
 

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformEvaluator.java
--
diff --git 
a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformEvaluator.java
 
b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformEvaluator.java
index d8481bf..30ab076 100644
--- 
a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformEvaluator.java
+++ 
b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformEvaluator.java
@@ -20,7 +20,7 @@ package org.apache.beam.runners.spark.translation;
 
 import java.io.Serializable;
 
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.PTransform;
 
 public interface TransformEvaluator> extends 
Serializable {
   void evaluate(PT transform, EvaluationContext context);

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformTranslator.java
--
diff --git 
a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformTranslator.java
 
b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformTranslator.java
index 8fe3b24..0366856 100644
--- 
a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformTranslator.java
+++ 
b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformTranslator.java
@@ -18,6 +18,11 @@
 
 package org.apache.beam.runners.spark.translation;
 
+import static 
org.apache.beam.runners.spark.io.hadoop.ShardNameBuilder.getOutputDirectory;
+import static 
org.apache.beam.runners.spark.io.hadoop.ShardNameBuilder.getOutputFilePrefix;
+import static 
org.apache.beam.runners.spark.io.hadoop.ShardNameBuilder.getOutputFileTemplate;
+import static 
org.apache.beam.runners.spark.io.hadoop.ShardNameBuilder.replaceShardCount;
+
 import java.io.IOException;
 import java.lang.reflect.Field;
 import java.util.Arrays;
@@ -25,40 +30,9 @@ import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 
-import static 
org.apache.beam.runners.spark.io.hadoop.ShardNameBuilder.getOutputDirectory;
-import static 
org.apache.beam.runners.spark.io.hadoop.ShardNameBuilder.getOutputFilePrefix;
-import static 

[42/74] [partial] incubator-beam git commit: Rename com/google/cloud/dataflow->org/apache/beam

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/examples/java/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
--
diff --git 
a/examples/java/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
 
b/examples/java/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
deleted file mode 100644
index b1f4f27..000
--- 
a/examples/java/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.google.cloud.dataflow.examples;
-
-import com.google.cloud.dataflow.examples.WordCount.CountWords;
-import com.google.cloud.dataflow.examples.WordCount.ExtractWordsFn;
-import com.google.cloud.dataflow.examples.WordCount.FormatAsTextFn;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.testing.PAssert;
-import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
-import com.google.cloud.dataflow.sdk.transforms.MapElements;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import org.hamcrest.CoreMatchers;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Tests of WordCount.
- */
-@RunWith(JUnit4.class)
-public class WordCountTest {
-
-  /** Example test that tests a specific DoFn. */
-  @Test
-  public void testExtractWordsFn() {
-DoFnTester extractWordsFn =
-DoFnTester.of(new ExtractWordsFn());
-
-Assert.assertThat(extractWordsFn.processBatch(" some  input  words "),
-  CoreMatchers.hasItems("some", "input", "words"));
-Assert.assertThat(extractWordsFn.processBatch(" "),
-  CoreMatchers.hasItems());
-Assert.assertThat(extractWordsFn.processBatch(" some ", " input", " 
words"),
-  CoreMatchers.hasItems("some", "input", "words"));
-  }
-
-  static final String[] WORDS_ARRAY = new String[] {
-"hi there", "hi", "hi sue bob",
-"hi sue", "", "bob hi"};
-
-  static final List WORDS = Arrays.asList(WORDS_ARRAY);
-
-  static final String[] COUNTS_ARRAY = new String[] {
-  "hi: 5", "there: 1", "sue: 2", "bob: 2"};
-
-  /** Example test that tests a PTransform by using an in-memory input and 
inspecting the output. */
-  @Test
-  @Category(RunnableOnService.class)
-  public void testCountWords() throws Exception {
-Pipeline p = TestPipeline.create();
-
-PCollection input = 
p.apply(Create.of(WORDS).withCoder(StringUtf8Coder.of()));
-
-PCollection output = input.apply(new CountWords())
-  .apply(MapElements.via(new FormatAsTextFn()));
-
-PAssert.that(output).containsInAnyOrder(COUNTS_ARRAY);
-p.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/examples/java/src/test/java/com/google/cloud/dataflow/examples/complete/AutoCompleteTest.java
--
diff --git 
a/examples/java/src/test/java/com/google/cloud/dataflow/examples/complete/AutoCompleteTest.java
 
b/examples/java/src/test/java/com/google/cloud/dataflow/examples/complete/AutoCompleteTest.java
deleted file mode 100644
index a70aee1..000
--- 
a/examples/java/src/test/java/com/google/cloud/dataflow/examples/complete/AutoCompleteTest.java
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the 

[63/74] incubator-beam git commit: Initial update of imports

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/inprocess/EncodabilityEnforcementFactory.java
--
diff --git 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/inprocess/EncodabilityEnforcementFactory.java
 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/inprocess/EncodabilityEnforcementFactory.java
index 63a43b5..02a36cf 100644
--- 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/inprocess/EncodabilityEnforcementFactory.java
+++ 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/inprocess/EncodabilityEnforcementFactory.java
@@ -15,18 +15,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
+package org.apache.beam.sdk.runners.inprocess;
 
 import static com.google.common.base.Preconditions.checkArgument;
 
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import 
com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.SerializableUtils;
-import com.google.cloud.dataflow.sdk.util.UserCodeException;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.values.PCollection;
+import org.apache.beam.sdk.coders.Coder;
+import 
org.apache.beam.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
+import org.apache.beam.sdk.transforms.AppliedPTransform;
+import org.apache.beam.sdk.util.CoderUtils;
+import org.apache.beam.sdk.util.SerializableUtils;
+import org.apache.beam.sdk.util.UserCodeException;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.values.PCollection;
 
 /**
  * Enforces that all elements in a {@link PCollection} can be encoded using 
that

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/inprocess/EvaluatorKey.java
--
diff --git 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/inprocess/EvaluatorKey.java
 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/inprocess/EvaluatorKey.java
index c32d820..9d8fc43 100644
--- 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/inprocess/EvaluatorKey.java
+++ 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/inprocess/EvaluatorKey.java
@@ -15,9 +15,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
+package org.apache.beam.sdk.runners.inprocess;
 
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
+import org.apache.beam.sdk.transforms.AppliedPTransform;
 
 import java.util.Objects;
 

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/inprocess/ExecutorServiceFactory.java
--
diff --git 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/inprocess/ExecutorServiceFactory.java
 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/inprocess/ExecutorServiceFactory.java
index 044402c..cfbf7b4 100644
--- 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/inprocess/ExecutorServiceFactory.java
+++ 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/inprocess/ExecutorServiceFactory.java
@@ -15,7 +15,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
+package org.apache.beam.sdk.runners.inprocess;
 
 import java.util.concurrent.ExecutorService;
 

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/inprocess/ExecutorServiceParallelExecutor.java
--
diff --git 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/inprocess/ExecutorServiceParallelExecutor.java
 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/inprocess/ExecutorServiceParallelExecutor.java
index a17292f..3463d08 100644
--- 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/inprocess/ExecutorServiceParallelExecutor.java
+++ 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/inprocess/ExecutorServiceParallelExecutor.java
@@ -15,20 +15,21 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import 

[10/74] [partial] incubator-beam git commit: Rename com/google/cloud/dataflow->org/apache/beam

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
--
diff --git 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
 
b/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
deleted file mode 100644
index 2ca845d..000
--- 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.google.cloud.dataflow.sdk.options;
-
-import com.google.auto.service.AutoService;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import 
com.google.cloud.dataflow.sdk.options.GoogleApiDebugOptions.GoogleApiTracer;
-import 
com.google.cloud.dataflow.sdk.options.ProxyInvocationHandler.Deserializer;
-import com.google.cloud.dataflow.sdk.options.ProxyInvocationHandler.Serializer;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.Context;
-
-import com.fasterxml.jackson.annotation.JsonIgnore;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
-import com.fasterxml.jackson.databind.annotation.JsonSerialize;
-
-import java.lang.reflect.Proxy;
-import java.util.ServiceLoader;
-
-import javax.annotation.concurrent.ThreadSafe;
-
-/**
- * PipelineOptions are used to configure Pipelines. You can extend {@link 
PipelineOptions}
- * to create custom configuration options specific to your {@link Pipeline},
- * for both local execution and execution via a {@link PipelineRunner}.
- *
- * {@link PipelineOptions} and their subinterfaces represent a collection 
of properties
- * which can be manipulated in a type safe manner. {@link PipelineOptions} is 
backed by a
- * dynamic {@link Proxy} which allows for type safe manipulation of properties 
in an extensible
- * fashion through plain old Java interfaces.
- *
- * {@link PipelineOptions} can be created with {@link 
PipelineOptionsFactory#create()}
- * and {@link PipelineOptionsFactory#as(Class)}. They can be created
- * from command-line arguments with {@link 
PipelineOptionsFactory#fromArgs(String[])}.
- * They can be converted to another type by invoking {@link 
PipelineOptions#as(Class)} and
- * can be accessed from within a {@link DoFn} by invoking
- * {@link Context#getPipelineOptions()}.
- *
- * For example:
- * {@code
- * // The most common way to construct PipelineOptions is via command-line 
argument parsing:
- * public static void main(String[] args) {
- *   // Will parse the arguments passed into the application and construct a 
PipelineOptions
- *   // Note that --help will print registered options, and 
--help=PipelineOptionsClassName
- *   // will print out usage for the specific class.
- *   PipelineOptions options =
- *   PipelineOptionsFactory.fromArgs(args).create();
- *
- *   Pipeline p = Pipeline.create(options);
- *   ...
- *   p.run();
- * }
- *
- * // To create options for the DirectPipeline:
- * DirectPipelineOptions directPipelineOptions =
- * PipelineOptionsFactory.as(DirectPipelineOptions.class);
- * directPipelineOptions.setStreaming(true);
- *
- * // To cast from one type to another using the as(Class) method:
- * DataflowPipelineOptions dataflowPipelineOptions =
- * directPipelineOptions.as(DataflowPipelineOptions.class);
- *
- * // Options for the same property are shared between types
- * // The statement below will print out "true"
- * System.out.println(dataflowPipelineOptions.isStreaming());
- *
- * // Prints out registered options.
- * PipelineOptionsFactory.printHelp(System.out);
- *
- * // Prints out options which are available to be set on 
DataflowPipelineOptions
- * PipelineOptionsFactory.printHelp(System.out, DataflowPipelineOptions.class);
- * }
- *
- * Defining Your Own PipelineOptions
- *
- * Defining your own {@link PipelineOptions} is the way for you to 

[67/74] incubator-beam git commit: Initial update of imports

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/sdk/util/DataflowTransport.java
--
diff --git 
a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/sdk/util/DataflowTransport.java
 
b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/sdk/util/DataflowTransport.java
index 18e6654..8fcfccf 100644
--- 
a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/sdk/util/DataflowTransport.java
+++ 
b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/sdk/util/DataflowTransport.java
@@ -15,16 +15,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.google.cloud.dataflow.sdk.util;
+package org.apache.beam.sdk.util;
 
-import static com.google.cloud.dataflow.sdk.util.Transport.getJsonFactory;
-import static com.google.cloud.dataflow.sdk.util.Transport.getTransport;
+import static org.apache.beam.sdk.util.Transport.getJsonFactory;
+import static org.apache.beam.sdk.util.Transport.getTransport;
+
+import org.apache.beam.sdk.options.DataflowPipelineOptions;
 
 import com.google.api.client.auth.oauth2.Credential;
 import com.google.api.client.http.HttpRequestInitializer;
 import com.google.api.services.clouddebugger.v2.Clouddebugger;
 import com.google.api.services.dataflow.Dataflow;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.hadoop.util.ChainingHttpRequestInitializer;
 import com.google.common.collect.ImmutableList;
 

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/sdk/util/GcsStager.java
--
diff --git 
a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/sdk/util/GcsStager.java
 
b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/sdk/util/GcsStager.java
index 7307e83..4f1f673 100644
--- 
a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/sdk/util/GcsStager.java
+++ 
b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/sdk/util/GcsStager.java
@@ -15,12 +15,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.google.cloud.dataflow.sdk.util;
+package org.apache.beam.sdk.util;
+
+import org.apache.beam.sdk.options.DataflowPipelineDebugOptions;
+import org.apache.beam.sdk.options.DataflowPipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptions;
 
 import com.google.api.services.dataflow.model.DataflowPackage;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.common.base.Preconditions;
 
 import java.util.List;

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/sdk/util/MonitoringUtil.java
--
diff --git 
a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/sdk/util/MonitoringUtil.java
 
b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/sdk/util/MonitoringUtil.java
index 2c06a92..5afca52 100644
--- 
a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/sdk/util/MonitoringUtil.java
+++ 
b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/sdk/util/MonitoringUtil.java
@@ -15,16 +15,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.google.cloud.dataflow.sdk.util;
+package org.apache.beam.sdk.util;
 
-import static com.google.cloud.dataflow.sdk.util.TimeUtil.fromCloudTime;
+import static org.apache.beam.sdk.util.TimeUtil.fromCloudTime;
+
+import org.apache.beam.sdk.PipelineResult.State;
+import org.apache.beam.sdk.options.DataflowPipelineOptions;
 
 import com.google.api.services.dataflow.Dataflow;
 import com.google.api.services.dataflow.Dataflow.Projects.Jobs.Messages;
 import com.google.api.services.dataflow.model.JobMessage;
 import com.google.api.services.dataflow.model.ListJobMessagesResponse;
-import com.google.cloud.dataflow.sdk.PipelineResult.State;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.common.base.MoreObjects;
 import com.google.common.collect.ImmutableMap;
 

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/sdk/util/PackageUtil.java
--
diff --git 

[24/74] [partial] incubator-beam git commit: Rename com/google/cloud/dataflow->org/apache/beam

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/sdk/runners/DataflowPipelineTranslatorTest.java
--
diff --git 
a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/sdk/runners/DataflowPipelineTranslatorTest.java
 
b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/sdk/runners/DataflowPipelineTranslatorTest.java
new file mode 100644
index 000..dd1b3c8
--- /dev/null
+++ 
b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -0,0 +1,890 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.dataflow.sdk.runners;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.addObject;
+import static com.google.cloud.dataflow.sdk.util.Structs.getDictionary;
+import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.hasKey;
+import static org.hamcrest.core.IsInstanceOf.instanceOf;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Matchers.argThat;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import com.google.api.services.dataflow.Dataflow;
+import com.google.api.services.dataflow.model.DataflowPackage;
+import com.google.api.services.dataflow.model.Job;
+import com.google.api.services.dataflow.model.Step;
+import com.google.api.services.dataflow.model.WorkerPool;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineWorkerPoolOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import 
com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
+import com.google.cloud.dataflow.sdk.transforms.Count;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.transforms.display.DisplayData;
+import com.google.cloud.dataflow.sdk.util.GcsUtil;
+import com.google.cloud.dataflow.sdk.util.OutputReference;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.Structs;
+import com.google.cloud.dataflow.sdk.util.TestCredential;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
+import com.google.cloud.dataflow.sdk.values.PDone;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Iterables;
+
+import org.hamcrest.Matchers;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.internal.matchers.ThrowableMessageMatcher;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.ArgumentMatcher;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+import java.io.IOException;
+import java.io.Serializable;
+import 

[47/74] [partial] incubator-beam git commit: Rename com/google/cloud/dataflow->org/apache/beam

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java
--
diff --git 
a/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java
 
b/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java
deleted file mode 100644
index f1d8d1a..000
--- 
a/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.google.cloud.dataflow.examples.complete;
-
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.TableRowJsonCoder;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.options.Validation;
-import com.google.cloud.dataflow.sdk.transforms.Count;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.SerializableComparator;
-import com.google.cloud.dataflow.sdk.transforms.Top;
-import com.google.cloud.dataflow.sdk.transforms.windowing.CalendarWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-
-import java.util.List;
-
-/**
- * An example that reads Wikipedia edit data from Cloud Storage and computes 
the user with
- * the longest string of edits separated by no more than an hour within each 
month.
- *
- * Concepts: Using Windowing to perform time-based aggregations of data.
- *
- * It is not recommended to execute this pipeline locally, given the size 
of the default input
- * data.
- *
- * To execute this pipeline using the Dataflow service, specify pipeline 
configuration:
- * {@code
- *   --project=YOUR_PROJECT_ID
- *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
- *   --runner=BlockingDataflowPipelineRunner
- * }
- * 
- * and an output prefix on GCS:
- * {@code
- *   --output=gs://YOUR_OUTPUT_PREFIX
- * }
- *
- * The default input is {@code 
gs://dataflow-samples/wikipedia_edits/*.json} and can be
- * overridden with {@code --input}.
- *
- * The input for this example is large enough that it's a good place to 
enable (experimental)
- * autoscaling:
- * {@code
- *   --autoscalingAlgorithm=BASIC
- *   --maxNumWorkers=20
- * }
- * 
- * This will automatically scale the number of workers up over time until the 
job completes.
- */
-public class TopWikipediaSessions {
-  private static final String EXPORTED_WIKI_TABLE = 
"gs://dataflow-samples/wikipedia_edits/*.json";
-
-  /**
-   * Extracts user and timestamp from a TableRow representing a Wikipedia edit.
-   */
-  static class ExtractUserAndTimestamp extends DoFn {
-@Override
-public void processElement(ProcessContext c) {
-  TableRow row = c.element();
-  int timestamp = (Integer) row.get("timestamp");
-  String userName = (String) row.get("contributor_username");
-  if (userName != null) {
-// Sets the implicit timestamp field to be used in windowing.
-c.outputWithTimestamp(userName, new Instant(timestamp * 1000L));
-  }
-}
-  }
-
-  /**
-   * Computes the number of edits in each user session.  A session is defined 
as
-   * a 

[25/74] [partial] incubator-beam git commit: Rename com/google/cloud/dataflow->org/apache/beam

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/sdk/runners/DataflowPipelineRunnerTest.java
--
diff --git 
a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/sdk/runners/DataflowPipelineRunnerTest.java
 
b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/sdk/runners/DataflowPipelineRunnerTest.java
new file mode 100644
index 000..c6dca2d
--- /dev/null
+++ 
b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/sdk/runners/DataflowPipelineRunnerTest.java
@@ -0,0 +1,1369 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.dataflow.sdk.runners;
+
+import static 
com.google.cloud.dataflow.sdk.util.WindowedValue.valueInGlobalWindow;
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.instanceOf;
+import static org.hamcrest.Matchers.startsWith;
+import static org.hamcrest.collection.IsIterableContainingInOrder.contains;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import com.google.api.services.dataflow.Dataflow;
+import com.google.api.services.dataflow.model.DataflowPackage;
+import com.google.api.services.dataflow.model.Job;
+import com.google.api.services.dataflow.model.ListJobsResponse;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
+import com.google.cloud.dataflow.sdk.io.AvroIO;
+import com.google.cloud.dataflow.sdk.io.AvroSource;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.Read;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions.CheckEnabled;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import 
com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner.BatchViewAsList;
+import 
com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner.BatchViewAsMap;
+import 
com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner.BatchViewAsMultimap;
+import 
com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner.TransformedMap;
+import com.google.cloud.dataflow.sdk.runners.dataflow.TestCountingSource;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecord;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecordCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.MetadataKeyCoder;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.DataflowReleaseInfo;
+import com.google.cloud.dataflow.sdk.util.GcsUtil;
+import com.google.cloud.dataflow.sdk.util.NoopPathValidator;
+import 

[19/74] [partial] incubator-beam git commit: Rename com/google/cloud/dataflow->org/apache/beam

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/coders/protobuf/package-info.java
--
diff --git 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/coders/protobuf/package-info.java
 
b/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/coders/protobuf/package-info.java
deleted file mode 100644
index 8242fad..000
--- 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/coders/protobuf/package-info.java
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * Defines a {@link com.google.cloud.dataflow.sdk.coders.Coder}
- * for Protocol Buffers messages, {@code ProtoCoder}.
- *
- * @see com.google.cloud.dataflow.sdk.coders.protobuf.ProtoCoder
- */
-package com.google.cloud.dataflow.sdk.coders.protobuf;

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
--
diff --git 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java 
b/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
deleted file mode 100644
index 145804d..000
--- a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ /dev/null
@@ -1,811 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.google.cloud.dataflow.sdk.io;
-
-import static com.google.common.base.Preconditions.checkState;
-
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.VoidCoder;
-import com.google.cloud.dataflow.sdk.io.Read.Bounded;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.MimeTypes;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PDone;
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-
-import org.apache.avro.Schema;
-import org.apache.avro.file.DataFileWriter;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.reflect.ReflectData;
-
-import java.io.IOException;
-import java.nio.channels.Channels;
-import java.nio.channels.WritableByteChannel;
-import java.util.regex.Pattern;
-
-import javax.annotation.Nullable;
-
-/**
- * {@link PTransform}s for reading and writing Avro files.
- *
- * To read a {@link PCollection} from one or more Avro files, use
- * {@link AvroIO.Read}, specifying {@link AvroIO.Read#from} to specify
- * the path of the file(s) to read from (e.g., a local filename or
- * filename pattern if running locally, or a Google Cloud Storage
- * filename or filename pattern of the form
- * {@code "gs:///"}), and optionally
- * {@link AvroIO.Read#named} to specify the name of the pipeline step.
- *
- * It is required to specify {@link AvroIO.Read#withSchema}. To
- * read specific records, such as Avro-generated classes, provide an
- * Avro-generated class type. To read {@link 

[48/74] [partial] incubator-beam git commit: Rename com/google/cloud/dataflow->org/apache/beam

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
--
diff --git 
a/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
 
b/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
deleted file mode 100644
index 5b98170..000
--- 
a/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
+++ /dev/null
@@ -1,488 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.google.cloud.dataflow.examples.common;
-
-import com.google.api.client.googleapis.json.GoogleJsonResponseException;
-import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
-import com.google.api.client.util.BackOff;
-import com.google.api.client.util.BackOffUtils;
-import com.google.api.client.util.Sleeper;
-import com.google.api.services.bigquery.Bigquery;
-import com.google.api.services.bigquery.Bigquery.Datasets;
-import com.google.api.services.bigquery.Bigquery.Tables;
-import com.google.api.services.bigquery.model.Dataset;
-import com.google.api.services.bigquery.model.DatasetReference;
-import com.google.api.services.bigquery.model.Table;
-import com.google.api.services.bigquery.model.TableReference;
-import com.google.api.services.bigquery.model.TableSchema;
-import com.google.api.services.dataflow.Dataflow;
-import com.google.api.services.pubsub.Pubsub;
-import com.google.api.services.pubsub.model.Subscription;
-import com.google.api.services.pubsub.model.Topic;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.PipelineResult;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineJob;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.IntraBundleParallelization;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
-import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
-import com.google.cloud.dataflow.sdk.util.Transport;
-import com.google.cloud.dataflow.sdk.values.PBegin;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.common.base.Strings;
-import com.google.common.base.Throwables;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
-import com.google.common.util.concurrent.Uninterruptibles;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.List;
-import java.util.Set;
-import java.util.concurrent.TimeUnit;
-
-import javax.servlet.http.HttpServletResponse;
-
-/**
- * The utility class that sets up and tears down external resources, starts 
the Google Cloud Pub/Sub
- * injector, and cancels the streaming and the injector pipelines once the 
program terminates.
- *
- * It is used to run Dataflow examples, such as TrafficMaxLaneFlow and 
TrafficRoutes.
- */
-public class DataflowExampleUtils {
-
-  private final DataflowPipelineOptions options;
-  private Bigquery bigQueryClient = null;
-  private Pubsub pubsubClient = null;
-  private Dataflow dataflowClient = null;
-  private Set jobsToCancel = Sets.newHashSet();
-  private List pendingMessages = Lists.newArrayList();
-
-  public DataflowExampleUtils(DataflowPipelineOptions options) {
-this.options = options;
-  }
-
-  /**
-   * Do resources and runner options setup.
-   */
-  public DataflowExampleUtils(DataflowPipelineOptions options, boolean 
isUnbounded)
-  throws IOException {
-this.options = options;
-setupResourcesAndRunner(isUnbounded);
-  }
-
-  /**
-   * Sets up external resources that are required by the example,
-   * such as Pub/Sub topics and BigQuery tables.
-   *
-   * @throws IOException if there is a problem setting up the 

[71/74] incubator-beam git commit: Initial update of imports

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/examples/java/src/main/java/org/apache/beam/examples/common/DataflowExampleUtils.java
--
diff --git 
a/examples/java/src/main/java/org/apache/beam/examples/common/DataflowExampleUtils.java
 
b/examples/java/src/main/java/org/apache/beam/examples/common/DataflowExampleUtils.java
index 5b98170..93c2358 100644
--- 
a/examples/java/src/main/java/org/apache/beam/examples/common/DataflowExampleUtils.java
+++ 
b/examples/java/src/main/java/org/apache/beam/examples/common/DataflowExampleUtils.java
@@ -15,7 +15,23 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.google.cloud.dataflow.examples.common;
+package org.apache.beam.examples.common;
+
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.PipelineResult;
+import org.apache.beam.sdk.io.TextIO;
+import org.apache.beam.sdk.options.BigQueryOptions;
+import org.apache.beam.sdk.options.DataflowPipelineOptions;
+import org.apache.beam.sdk.runners.DataflowPipelineJob;
+import org.apache.beam.sdk.runners.DataflowPipelineRunner;
+import org.apache.beam.sdk.runners.DirectPipelineRunner;
+import org.apache.beam.sdk.transforms.IntraBundleParallelization;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.util.AttemptBoundedExponentialBackOff;
+import org.apache.beam.sdk.util.MonitoringUtil;
+import org.apache.beam.sdk.util.Transport;
+import org.apache.beam.sdk.values.PBegin;
+import org.apache.beam.sdk.values.PCollection;
 
 import com.google.api.client.googleapis.json.GoogleJsonResponseException;
 import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
@@ -34,21 +50,6 @@ import com.google.api.services.dataflow.Dataflow;
 import com.google.api.services.pubsub.Pubsub;
 import com.google.api.services.pubsub.model.Subscription;
 import com.google.api.services.pubsub.model.Topic;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.PipelineResult;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineJob;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.IntraBundleParallelization;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
-import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
-import com.google.cloud.dataflow.sdk.util.Transport;
-import com.google.cloud.dataflow.sdk.values.PBegin;
-import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.base.Strings;
 import com.google.common.base.Throwables;
 import com.google.common.collect.Lists;

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/examples/java/src/main/java/org/apache/beam/examples/common/ExampleBigQueryTableOptions.java
--
diff --git 
a/examples/java/src/main/java/org/apache/beam/examples/common/ExampleBigQueryTableOptions.java
 
b/examples/java/src/main/java/org/apache/beam/examples/common/ExampleBigQueryTableOptions.java
index a026f3e..647d508 100644
--- 
a/examples/java/src/main/java/org/apache/beam/examples/common/ExampleBigQueryTableOptions.java
+++ 
b/examples/java/src/main/java/org/apache/beam/examples/common/ExampleBigQueryTableOptions.java
@@ -15,14 +15,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.google.cloud.dataflow.examples.common;
+package org.apache.beam.examples.common;
+
+import org.apache.beam.sdk.options.DataflowPipelineOptions;
+import org.apache.beam.sdk.options.Default;
+import org.apache.beam.sdk.options.DefaultValueFactory;
+import org.apache.beam.sdk.options.Description;
+import org.apache.beam.sdk.options.PipelineOptions;
 
 import com.google.api.services.bigquery.model.TableSchema;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 
 /**
  * Options that can be used to configure BigQuery tables in Dataflow examples.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/examples/java/src/main/java/org/apache/beam/examples/common/ExamplePubsubTopicAndSubscriptionOptions.java
--
diff --git 

[45/74] [partial] incubator-beam git commit: Rename com/google/cloud/dataflow->org/apache/beam

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/examples/java/src/main/java/org/apache/beam/examples/WindowedWordCount.java
--
diff --git 
a/examples/java/src/main/java/org/apache/beam/examples/WindowedWordCount.java 
b/examples/java/src/main/java/org/apache/beam/examples/WindowedWordCount.java
new file mode 100644
index 000..8f49dd2
--- /dev/null
+++ 
b/examples/java/src/main/java/org/apache/beam/examples/WindowedWordCount.java
@@ -0,0 +1,270 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.dataflow.examples;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
+import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
+import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
+import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicOptions;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.PubsubIO;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * An example that counts words in text, and can run over either unbounded or 
bounded input
+ * collections.
+ *
+ * This class, {@link WindowedWordCount}, is the last in a series of four 
successively more
+ * detailed 'word count' examples. First take a look at {@link 
MinimalWordCount},
+ * {@link WordCount}, and {@link DebuggingWordCount}.
+ *
+ * Basic concepts, also in the MinimalWordCount, WordCount, and 
DebuggingWordCount examples:
+ * Reading text files; counting a PCollection; writing to GCS; executing a 
Pipeline both locally
+ * and using the Dataflow service; defining DoFns; creating a custom 
aggregator;
+ * user-defined PTransforms; defining PipelineOptions.
+ *
+ * New Concepts:
+ * 
+ *   1. Unbounded and bounded pipeline input modes
+ *   2. Adding timestamps to data
+ *   3. PubSub topics as sources
+ *   4. Windowing
+ *   5. Re-using PTransforms over windowed PCollections
+ *   6. Writing to BigQuery
+ * 
+ *
+ * To execute this pipeline locally, specify general pipeline configuration:
+ * {@code
+ *   --project=YOUR_PROJECT_ID
+ * }
+ * 
+ *
+ * To execute this pipeline using the Dataflow service, specify pipeline 
configuration:
+ * {@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=BlockingDataflowPipelineRunner
+ * }
+ * 
+ *
+ * Optionally specify the input file path via:
+ * {@code --inputFile=gs://INPUT_PATH},
+ * which defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt}.
+ *
+ * Specify an output BigQuery dataset and optionally, a table for the 
output. If you don't
+ * specify the table, one will be created for you using the job name. If you 
don't specify the
+ * dataset, a dataset called {@code dataflow-examples} must already exist in 
your project.
+ * {@code --bigQueryDataset=YOUR-DATASET --bigQueryTable=YOUR-NEW-TABLE-NAME}.
+ *
+ * Decide whether you want your pipeline to run with 'bounded' (such as 
files in GCS) or
+ * 'unbounded' input (such as a PubSub topic). To run with 

[53/74] incubator-beam git commit: Initial update of imports

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/windowing/WindowTest.java
--
diff --git 
a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/windowing/WindowTest.java
 
b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/windowing/WindowTest.java
index 39a33cc..359ad23 100644
--- 
a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/windowing/WindowTest.java
+++ 
b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/windowing/WindowTest.java
@@ -15,7 +15,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.google.cloud.dataflow.sdk.transforms.windowing;
+package org.apache.beam.sdk.transforms.windowing;
 
 import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertEquals;
@@ -23,20 +23,20 @@ import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
 
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.Coder.NonDeterministicException;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.testing.RunnableOnService;
+import org.apache.beam.sdk.testing.TestPipeline;
+import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.GroupByKey;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.util.WindowingStrategy.AccumulationMode;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.TimestampedValue;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/windowing/WindowingTest.java
--
diff --git 
a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/windowing/WindowingTest.java
 
b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/windowing/WindowingTest.java
index 1df0d96..5cbf044 100644
--- 
a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/windowing/WindowingTest.java
+++ 
b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/windowing/WindowingTest.java
@@ -15,25 +15,25 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.testing.PAssert;
-import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.transforms.Count;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
-import com.google.cloud.dataflow.sdk.transforms.Flatten;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionList;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+package org.apache.beam.sdk.transforms.windowing;
+
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.io.TextIO;
+import org.apache.beam.sdk.testing.PAssert;
+import org.apache.beam.sdk.testing.RunnableOnService;
+import org.apache.beam.sdk.testing.TestPipeline;
+import org.apache.beam.sdk.transforms.Count;
+import org.apache.beam.sdk.transforms.Create;

[33/74] [partial] incubator-beam git commit: Rename com/google/cloud/dataflow->org/apache/beam

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/sdk/options/DataflowWorkerLoggingOptions.java
--
diff --git 
a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/sdk/options/DataflowWorkerLoggingOptions.java
 
b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/sdk/options/DataflowWorkerLoggingOptions.java
new file mode 100644
index 000..ebd42d9
--- /dev/null
+++ 
b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/sdk/options/DataflowWorkerLoggingOptions.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.dataflow.sdk.options;
+
+import com.google.common.base.Preconditions;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Options that are used to control logging configuration on the Dataflow 
worker.
+ */
+@Description("Options that are used to control logging configuration on the 
Dataflow worker.")
+public interface DataflowWorkerLoggingOptions extends PipelineOptions {
+  /**
+   * The set of log levels that can be used on the Dataflow worker.
+   */
+  public enum Level {
+DEBUG, ERROR, INFO, TRACE, WARN
+  }
+
+  /**
+   * This option controls the default log level of all loggers without a log 
level override.
+   */
+  @Description("Controls the default log level of all loggers without a log 
level override.")
+  @Default.Enum("INFO")
+  Level getDefaultWorkerLogLevel();
+  void setDefaultWorkerLogLevel(Level level);
+
+  /**
+   * This option controls the log levels for specifically named loggers.
+   *
+   * Later options with equivalent names override earlier options.
+   *
+   * See {@link WorkerLogLevelOverrides} for more information on how to 
configure logging
+   * on a per {@link Class}, {@link Package}, or name basis. If used from the 
command line,
+   * the expected format is {"Name":"Level",...}, further details on
+   * {@link WorkerLogLevelOverrides#from}.
+   */
+  @Description("This option controls the log levels for specifically named 
loggers. "
+  + "The expected format is {\"Name\":\"Level\",...}. The Dataflow worker 
uses "
+  + "java.util.logging, which supports a logging hierarchy based off of 
names that are '.' "
+  + "separated. For example, by specifying the value 
{\"a.b.c.Foo\":\"DEBUG\"}, the logger "
+  + "for the class 'a.b.c.Foo' will be configured to output logs at the 
DEBUG level. "
+  + "Similarly, by specifying the value {\"a.b.c\":\"WARN\"}, all loggers 
underneath the "
+  + "'a.b.c' package will be configured to output logs at the WARN level. 
Also, note that "
+  + "when multiple overrides are specified, the exact name followed by the 
closest parent "
+  + "takes precedence.")
+  WorkerLogLevelOverrides getWorkerLogLevelOverrides();
+  void setWorkerLogLevelOverrides(WorkerLogLevelOverrides value);
+
+  /**
+   * Defines a log level override for a specific class, package, or name.
+   *
+   * {@code java.util.logging} is used on the Dataflow worker harness and 
supports
+   * a logging hierarchy based off of names that are "." separated. It is a 
common
+   * pattern to have the logger for a given class share the same name as the 
class itself.
+   * Given the classes {@code a.b.c.Foo}, {@code a.b.c.Xyz}, and {@code 
a.b.Bar}, with
+   * loggers named {@code "a.b.c.Foo"}, {@code "a.b.c.Xyz"}, and {@code 
"a.b.Bar"} respectively,
+   * we can override the log levels:
+   * 
+   *for {@code Foo} by specifying the name {@code "a.b.c.Foo"} or the 
{@link Class}
+   *representing {@code a.b.c.Foo}.
+   *for {@code Foo}, {@code Xyz}, and {@code Bar} by specifying the 
name {@code "a.b"} or
+   *the {@link Package} representing {@code a.b}.
+   *for {@code Foo} and {@code Bar} by specifying both of their names 
or classes.
+   * 
+   * Note that by specifying multiple overrides, the exact name followed by 
the closest parent
+   * takes precedence.
+   */
+  public static class WorkerLogLevelOverrides extends 

[08/74] [partial] incubator-beam git commit: Rename com/google/cloud/dataflow->org/apache/beam

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
--
diff --git 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
 
b/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
deleted file mode 100644
index 57e6116..000
--- 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ /dev/null
@@ -1,1320 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.google.cloud.dataflow.sdk.runners;
-
-import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkState;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
-import com.google.cloud.dataflow.sdk.PipelineResult;
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.ListCoder;
-import com.google.cloud.dataflow.sdk.io.AvroIO;
-import com.google.cloud.dataflow.sdk.io.FileBasedSink;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.DirectPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions.CheckEnabled;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.Combine;
-import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.Partition;
-import com.google.cloud.dataflow.sdk.transforms.Partition.PartitionFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
-import com.google.cloud.dataflow.sdk.util.AssignWindows;
-import com.google.cloud.dataflow.sdk.util.GroupByKeyViaGroupByKeyOnly;
-import 
com.google.cloud.dataflow.sdk.util.GroupByKeyViaGroupByKeyOnly.GroupByKeyOnly;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.MapAggregatorValues;
-import com.google.cloud.dataflow.sdk.util.PerKeyCombineFnRunner;
-import com.google.cloud.dataflow.sdk.util.PerKeyCombineFnRunners;
-import com.google.cloud.dataflow.sdk.util.SerializableUtils;
-import com.google.cloud.dataflow.sdk.util.TestCredential;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionList;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.PDone;
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.cloud.dataflow.sdk.values.POutput;
-import com.google.cloud.dataflow.sdk.values.PValue;
-import com.google.cloud.dataflow.sdk.values.TypedPValue;
-import com.google.common.base.Function;
-import com.google.common.collect.Lists;
-
-import org.joda.time.Instant;
-import org.slf4j.Logger;
-import 

[15/74] [partial] incubator-beam git commit: Rename com/google/cloud/dataflow->org/apache/beam

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
--
diff --git 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
 
b/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
deleted file mode 100644
index 3ad32b4..000
--- 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ /dev/null
@@ -1,663 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.google.cloud.dataflow.sdk.io;
-
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkState;
-
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Iterables;
-import com.google.common.util.concurrent.Futures;
-import com.google.common.util.concurrent.ListenableFuture;
-import com.google.common.util.concurrent.ListeningExecutorService;
-import com.google.common.util.concurrent.MoreExecutors;
-
-import org.joda.time.Instant;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.nio.channels.ReadableByteChannel;
-import java.nio.channels.SeekableByteChannel;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-import java.util.ListIterator;
-import java.util.NoSuchElementException;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.Executors;
-
-/**
- * A common base class for all file-based {@link Source}s. Extend this class 
to implement your own
- * file-based custom source.
- *
- * A file-based {@code Source} is a {@code Source} backed by a file pattern 
defined as a Java
- * glob, a single file, or a offset range for a single file. See {@link 
OffsetBasedSource} and
- * {@link com.google.cloud.dataflow.sdk.io.range.RangeTracker} for semantics 
of offset ranges.
- *
- * This source stores a {@code String} that is an {@link IOChannelFactory} 
specification for a
- * file or file pattern. There should be an {@code IOChannelFactory} defined 
for the file
- * specification provided. Please refer to {@link IOChannelUtils} and {@link 
IOChannelFactory} for
- * more information on this.
- *
- * In addition to the methods left abstract from {@code BoundedSource}, 
subclasses must implement
- * methods to create a sub-source and a reader for a range of a single file -
- * {@link #createForSubrangeOfFile} and {@link #createSingleFileReader}. 
Please refer to
- * {@link XmlSource} for an example implementation of {@code FileBasedSource}.
- *
- * @param  Type of records represented by the source.
- */
-public abstract class FileBasedSource extends OffsetBasedSource {
-  private static final Logger LOG = 
LoggerFactory.getLogger(FileBasedSource.class);
-  private static final float FRACTION_OF_FILES_TO_STAT = 0.01f;
-
-  // Package-private for testing
-  static final int MAX_NUMBER_OF_FILES_FOR_AN_EXACT_STAT = 100;
-
-  // Size of the thread pool to be used for performing file operations in 
parallel.
-  // Package-private for testing.
-  static final int THREAD_POOL_SIZE = 128;
-
-  private final String fileOrPatternSpec;
-  private final Mode mode;
-
-  /**
-   * A given {@code FileBasedSource} represents a file resource of one of 
these types.
-   */
-  public enum Mode {
-FILEPATTERN,
-SINGLE_FILE_OR_SUBRANGE
-  }
-
-  /**
-   * Create a {@code FileBaseSource} based on a file or a file pattern 
specification. This
-   * constructor must be used when creating a new {@code FileBasedSource} for 
a file pattern.
-   *
-   * See {@link OffsetBasedSource} for a detailed description of {@code 
minBundleSize}.
-   *
-   * @param fileOrPatternSpec {@link IOChannelFactory} specification of file 
or file pattern
-   *represented by the {@link FileBasedSource}.
-   * 

[09/74] [partial] incubator-beam git commit: Rename com/google/cloud/dataflow->org/apache/beam

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
--
diff --git 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
 
b/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
deleted file mode 100644
index 9e7c16e..000
--- 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
+++ /dev/null
@@ -1,442 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.google.cloud.dataflow.sdk.options;
-
-import 
com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory.JsonIgnorePredicate;
-import 
com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory.Registration;
-import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
-import com.google.cloud.dataflow.sdk.util.common.ReflectHelpers;
-import com.google.common.base.Defaults;
-import com.google.common.base.Function;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ClassToInstanceMap;
-import com.google.common.collect.FluentIterable;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Maps;
-import com.google.common.collect.MutableClassToInstanceMap;
-
-import com.fasterxml.jackson.annotation.JsonIgnore;
-import com.fasterxml.jackson.core.JsonGenerator;
-import com.fasterxml.jackson.core.JsonParser;
-import com.fasterxml.jackson.core.JsonProcessingException;
-import com.fasterxml.jackson.databind.DeserializationContext;
-import com.fasterxml.jackson.databind.JavaType;
-import com.fasterxml.jackson.databind.JsonDeserializer;
-import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.JsonSerializer;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.SerializerProvider;
-import com.fasterxml.jackson.databind.node.ObjectNode;
-
-import java.beans.PropertyDescriptor;
-import java.io.IOException;
-import java.lang.annotation.Annotation;
-import java.lang.reflect.InvocationHandler;
-import java.lang.reflect.Method;
-import java.lang.reflect.Proxy;
-import java.lang.reflect.Type;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.SortedMap;
-import java.util.TreeMap;
-
-import javax.annotation.concurrent.ThreadSafe;
-
-/**
- * Represents and {@link InvocationHandler} for a {@link Proxy}. The 
invocation handler uses bean
- * introspection of the proxy class to store and retrieve values based off of 
the property name.
- *
- * Unset properties use the {@code @Default} metadata on the getter to 
return values. If there
- * is no {@code @Default} annotation on the getter, then a https://docs.oracle.com/javase/tutorial/java/nutsandbolts/datatypes.html;>default
 as
- * per the Java Language Specification for the expected return type is 
returned.
- *
- * In addition to the getter/setter pairs, this proxy invocation handler 
supports
- * {@link Object#equals(Object)}, {@link Object#hashCode()}, {@link 
Object#toString()} and
- * {@link PipelineOptions#as(Class)}.
- */
-@ThreadSafe
-class ProxyInvocationHandler implements InvocationHandler {
-  private static final ObjectMapper MAPPER = new ObjectMapper();
-  /**
-   * No two instances of this class are considered equivalent hence we 
generate a random hash code
-   * between 0 and {@link Integer#MAX_VALUE}.
-   */
-  private final int hashCode = (int) (Math.random() * Integer.MAX_VALUE);
-  private final Set knownInterfaces;
-  private final ClassToInstanceMap interfaceToProxyCache;
-  private final Map options;
-  private final Map jsonOptions;
-  private final Map gettersToPropertyNames;
-  private final Map settersToPropertyNames;
-
-  ProxyInvocationHandler(Map options) {
-this(options, Maps.newHashMap());
-  }
-
-  private ProxyInvocationHandler(Map options, 

[04/74] [partial] incubator-beam git commit: Rename com/google/cloud/dataflow->org/apache/beam

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactory.java
--
diff --git 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactory.java
 
b/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactory.java
deleted file mode 100644
index 1d6563d..000
--- 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactory.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import 
com.google.cloud.dataflow.sdk.runners.inprocess.InProcessExecutionContext.InProcessStepContext;
-import 
com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
-import 
com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import 
com.google.cloud.dataflow.sdk.runners.inprocess.ParDoInProcessEvaluator.BundleOutputManager;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo.BoundMulti;
-import com.google.cloud.dataflow.sdk.util.DoFnRunner;
-import com.google.cloud.dataflow.sdk.util.DoFnRunners;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * The {@link InProcessPipelineRunner} {@link TransformEvaluatorFactory} for 
the
- * {@link BoundMulti} primitive {@link PTransform}.
- */
-class ParDoMultiEvaluatorFactory implements TransformEvaluatorFactory {
-  @Override
-  public  TransformEvaluator forApplication(
-  AppliedPTransform application,
-  CommittedBundle inputBundle,
-  InProcessEvaluationContext evaluationContext) {
-@SuppressWarnings({"cast", "unchecked", "rawtypes"})
-TransformEvaluator evaluator = (TransformEvaluator) 
createMultiEvaluator(
-(AppliedPTransform) application, inputBundle, evaluationContext);
-return evaluator;
-  }
-
-  private static  ParDoInProcessEvaluator createMultiEvaluator(
-  AppliedPTransform> application,
-  CommittedBundle inputBundle,
-  InProcessEvaluationContext evaluationContext) {
-PCollectionTuple output = application.getOutput();
-Map outputs = output.getAll();
-Map outputBundles = new HashMap<>();
-for (Map.Entry outputEntry : 
outputs.entrySet()) {
-  outputBundles.put(
-  outputEntry.getKey(),
-  evaluationContext.createBundle(inputBundle, outputEntry.getValue()));
-}
-InProcessExecutionContext executionContext =
-evaluationContext.getExecutionContext(application, 
inputBundle.getKey());
-String stepName = evaluationContext.getStepName(application);
-InProcessStepContext stepContext =
-executionContext.getOrCreateStepContext(stepName, stepName, null);
-
-CounterSet counters = evaluationContext.createCounterSet();
-
-DoFn fn = application.getTransform().getFn();
-DoFnRunner runner =
-DoFnRunners.createDefault(
-evaluationContext.getPipelineOptions(),
-fn,
-
evaluationContext.createSideInputReader(application.getTransform().getSideInputs()),
-BundleOutputManager.create(outputBundles),
-application.getTransform().getMainOutputTag(),
-application.getTransform().getSideOutputTags().getAll(),
-stepContext,
-counters.getAddCounterMutator(),
-application.getInput().getWindowingStrategy());
-
-runner.startBundle();
-
-return new 

[68/74] incubator-beam git commit: Initial update of imports

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/StateSerializationTest.java
--
diff --git 
a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/StateSerializationTest.java
 
b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/StateSerializationTest.java
index 77a8de6..44f4ecb 100644
--- 
a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/StateSerializationTest.java
+++ 
b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/StateSerializationTest.java
@@ -17,20 +17,32 @@
  */
 package org.apache.beam.runners.flink.streaming;
 
+import static org.junit.Assert.assertEquals;
+
 import 
org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkStateInternals;
 import 
org.apache.beam.runners.flink.translation.wrappers.streaming.state.StateCheckpointReader;
 import 
org.apache.beam.runners.flink.translation.wrappers.streaming.state.StateCheckpointUtils;
 import 
org.apache.beam.runners.flink.translation.wrappers.streaming.state.StateCheckpointWriter;
-import com.google.cloud.dataflow.sdk.coders.*;
-import com.google.cloud.dataflow.sdk.transforms.Combine;
-import com.google.cloud.dataflow.sdk.transforms.CombineWithContext;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFns;
-import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.TimerInternals;
-import com.google.cloud.dataflow.sdk.util.state.*;
+import org.apache.beam.sdk.coders.CannotProvideCoderException;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.DelegateCoder;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.coders.VarIntCoder;
+import org.apache.beam.sdk.transforms.CombineWithContext;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
+import org.apache.beam.sdk.transforms.windowing.OutputTimeFns;
+import org.apache.beam.sdk.util.TimeDomain;
+import org.apache.beam.sdk.util.TimerInternals;
+import org.apache.beam.sdk.util.state.AccumulatorCombiningState;
+import org.apache.beam.sdk.util.state.BagState;
+import org.apache.beam.sdk.util.state.StateNamespace;
+import org.apache.beam.sdk.util.state.StateNamespaces;
+import org.apache.beam.sdk.util.state.StateTag;
+import org.apache.beam.sdk.util.state.StateTags;
+import org.apache.beam.sdk.util.state.ValueState;
+import org.apache.beam.sdk.util.state.WatermarkHoldState;
+
 import org.apache.flink.core.memory.DataInputView;
 import org.apache.flink.runtime.state.AbstractStateBackend;
 import org.apache.flink.runtime.state.memory.MemoryStateBackend;
@@ -38,10 +50,11 @@ import org.apache.flink.runtime.util.DataInputDeserializer;
 import org.joda.time.Instant;
 import org.junit.Test;
 
-import java.nio.ByteBuffer;
-import java.util.*;
-
-import static org.junit.Assert.assertEquals;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
 
 public class StateSerializationTest {
 

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/TopWikipediaSessionsITCase.java
--
diff --git 
a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/TopWikipediaSessionsITCase.java
 
b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/TopWikipediaSessionsITCase.java
index 83c1661..1efb42f 100644
--- 
a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/TopWikipediaSessionsITCase.java
+++ 
b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/TopWikipediaSessionsITCase.java
@@ -18,18 +18,20 @@
 package org.apache.beam.runners.flink.streaming;
 
 import org.apache.beam.runners.flink.FlinkTestPipeline;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.io.TextIO;
+import org.apache.beam.sdk.transforms.Count;
+import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.windowing.Sessions;
+import org.apache.beam.sdk.transforms.windowing.Window;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+
 import com.google.api.services.bigquery.model.TableRow;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import 

[05/74] [partial] incubator-beam git commit: Rename com/google/cloud/dataflow->org/apache/beam

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessEvaluationContext.java
--
diff --git 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessEvaluationContext.java
 
b/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessEvaluationContext.java
deleted file mode 100644
index 078827d..000
--- 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessEvaluationContext.java
+++ /dev/null
@@ -1,411 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import 
com.google.cloud.dataflow.sdk.runners.inprocess.GroupByKeyEvaluatorFactory.InProcessGroupByKeyOnly;
-import 
com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.FiredTimers;
-import 
com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.TransformWatermarks;
-import 
com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
-import 
com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.PCollectionViewWriter;
-import 
com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import 
com.google.cloud.dataflow.sdk.util.state.CopyOnAccessInMemoryStateInternals;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.PValue;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Iterables;
-
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ConcurrentMap;
-
-import javax.annotation.Nullable;
-
-/**
- * The evaluation context for a specific pipeline being executed by the
- * {@link InProcessPipelineRunner}. Contains state shared within the execution 
across all
- * transforms.
- *
- * {@link InProcessEvaluationContext} contains shared state for an 
execution of the
- * {@link InProcessPipelineRunner} that can be used while evaluating a {@link 
PTransform}. This
- * consists of views into underlying state and watermark implementations, 
access to read and write
- * {@link PCollectionView PCollectionViews}, and constructing {@link 
CounterSet CounterSets} and
- * {@link ExecutionContext ExecutionContexts}. This includes executing 
callbacks asynchronously when
- * state changes to the appropriate point (e.g. when a {@link PCollectionView} 
is requested and
- * known to be empty).
- *
- * {@link InProcessEvaluationContext} also handles results by committing 
finalizing bundles based
- * on the current global state and updating the global state appropriately. 
This includes updating
- * the per-{@link StepAndKey} state, updating global watermarks, and executing 
any callbacks that
- * can be executed.
- */
-class InProcessEvaluationContext {
-  /** The step name for each {@link AppliedPTransform} in the {@link 
Pipeline}. */
-  private final Map stepNames;
-
-  /** The options that were used to create this {@link Pipeline}. */
-  private final InProcessPipelineOptions options;
-
- 

[06/74] [partial] incubator-beam git commit: Rename com/google/cloud/dataflow->org/apache/beam

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InMemoryWatermarkManager.java
--
diff --git 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InMemoryWatermarkManager.java
 
b/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InMemoryWatermarkManager.java
deleted file mode 100644
index 193d6a4..000
--- 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InMemoryWatermarkManager.java
+++ /dev/null
@@ -1,1313 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import 
com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.TimerInternals;
-import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PValue;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.MoreObjects;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ComparisonChain;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableSet;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Ordering;
-import com.google.common.collect.SortedMultiset;
-import com.google.common.collect.TreeMultiset;
-
-import org.joda.time.Instant;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.EnumMap;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.NavigableSet;
-import java.util.Objects;
-import java.util.PriorityQueue;
-import java.util.Set;
-import java.util.TreeSet;
-import java.util.concurrent.atomic.AtomicReference;
-
-import javax.annotation.Nullable;
-
-/**
- * Manages watermarks of {@link PCollection PCollections} and input and output 
watermarks of
- * {@link AppliedPTransform AppliedPTransforms} to provide event-time and 
completion tracking for
- * in-memory execution. {@link InMemoryWatermarkManager} is designed to update 
and return a
- * consistent view of watermarks in the presence of concurrent updates.
- *
- * An {@link InMemoryWatermarkManager} is provided with the collection of 
root
- * {@link AppliedPTransform AppliedPTransforms} and a map of {@link 
PCollection PCollections} to
- * all the {@link AppliedPTransform AppliedPTransforms} that consume them at 
construction time.
- *
- * Whenever a root {@link AppliedPTransform transform} produces elements, 
the
- * {@link InMemoryWatermarkManager} is provided with the produced elements and 
the output watermark
- * of the producing {@link AppliedPTransform transform}. The
- * {@link InMemoryWatermarkManager watermark manager} is responsible for 
computing the watermarks
- * of all {@link AppliedPTransform transforms} that consume one or more
- * {@link PCollection PCollections}.
- *
- * Whenever a non-root {@link AppliedPTransform} finishes processing one or 
more in-flight
- * elements (referred to as the input {@link CommittedBundle bundle}), the 
following occurs
- * atomically:
- * 
- *  All of the in-flight elements are removed from the collection of 
pending elements for the
- *  {@link AppliedPTransform}.
- *  All of the elements produced by the {@link AppliedPTransform} are 
added to the collection
- *  of pending elements for each {@link AppliedPTransform} that consumes 
them.
- *  The input watermark for the {@link AppliedPTransform} becomes the 
maximum value of
- *
- *  the previous input watermark
- *  

[07/74] [partial] incubator-beam git commit: Rename com/google/cloud/dataflow->org/apache/beam

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/CachedThreadPoolExecutorServiceFactory.java
--
diff --git 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/CachedThreadPoolExecutorServiceFactory.java
 
b/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/CachedThreadPoolExecutorServiceFactory.java
deleted file mode 100644
index 0989fb5..000
--- 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/CachedThreadPoolExecutorServiceFactory.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-
-/**
- * A {@link ExecutorServiceFactory} that produces cached thread pools via
- * {@link Executors#newCachedThreadPool()}.
- */
-class CachedThreadPoolExecutorServiceFactory
-implements DefaultValueFactory, 
ExecutorServiceFactory {
-  private static final CachedThreadPoolExecutorServiceFactory INSTANCE =
-  new CachedThreadPoolExecutorServiceFactory();
-
-  @Override
-  public ExecutorServiceFactory create(PipelineOptions options) {
-return INSTANCE;
-  }
-
-  @Override
-  public ExecutorService create() {
-return Executors.newCachedThreadPool();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/Clock.java
--
diff --git 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/Clock.java
 
b/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/Clock.java
deleted file mode 100644
index 31c34ad..000
--- 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/Clock.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import org.joda.time.Instant;
-
-/**
- * Access to the current time.
- */
-public interface Clock {
-  /**
-   * Returns the current time as an {@link Instant}.
-   */
-  Instant now();
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/0393a791/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/CompletionCallback.java
--
diff --git 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/CompletionCallback.java
 
b/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/CompletionCallback.java
deleted file mode 100644
index b581616..000
--- 
a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/CompletionCallback.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under 

[70/74] incubator-beam git commit: Initial update of imports

2016-04-13 Thread davor
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/examples/java8/src/main/java/org/apache/beam/examples/complete/game/LeaderBoard.java
--
diff --git 
a/examples/java8/src/main/java/org/apache/beam/examples/complete/game/LeaderBoard.java
 
b/examples/java8/src/main/java/org/apache/beam/examples/complete/game/LeaderBoard.java
index cedd696..594d2b8 100644
--- 
a/examples/java8/src/main/java/org/apache/beam/examples/complete/game/LeaderBoard.java
+++ 
b/examples/java8/src/main/java/org/apache/beam/examples/complete/game/LeaderBoard.java
@@ -15,30 +15,30 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.google.cloud.dataflow.examples.complete.game;
-
-import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
-import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
-import com.google.cloud.dataflow.examples.complete.game.utils.WriteToBigQuery;
-import 
com.google.cloud.dataflow.examples.complete.game.utils.WriteWindowedToBigQuery;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.PipelineResult;
-import com.google.cloud.dataflow.sdk.io.PubsubIO;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.options.Validation;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.AfterProcessingTime;
-import com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark;
-import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Repeatedly;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
+package org.apache.beam.examples.complete.game;
+
+import org.apache.beam.examples.common.DataflowExampleOptions;
+import org.apache.beam.examples.common.DataflowExampleUtils;
+import org.apache.beam.examples.complete.game.utils.WriteToBigQuery;
+import org.apache.beam.examples.complete.game.utils.WriteWindowedToBigQuery;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.PipelineResult;
+import org.apache.beam.sdk.io.PubsubIO;
+import org.apache.beam.sdk.options.Default;
+import org.apache.beam.sdk.options.Description;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.options.Validation;
+import org.apache.beam.sdk.runners.DataflowPipelineRunner;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.windowing.AfterProcessingTime;
+import org.apache.beam.sdk.transforms.windowing.AfterWatermark;
+import org.apache.beam.sdk.transforms.windowing.FixedWindows;
+import org.apache.beam.sdk.transforms.windowing.GlobalWindows;
+import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
+import org.apache.beam.sdk.transforms.windowing.Repeatedly;
+import org.apache.beam.sdk.transforms.windowing.Window;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
 
 import org.joda.time.DateTimeZone;
 import org.joda.time.Duration;

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b9724454/examples/java8/src/main/java/org/apache/beam/examples/complete/game/UserScore.java
--
diff --git 
a/examples/java8/src/main/java/org/apache/beam/examples/complete/game/UserScore.java
 
b/examples/java8/src/main/java/org/apache/beam/examples/complete/game/UserScore.java
index 7babc6e..6a6c1cf 100644
--- 
a/examples/java8/src/main/java/org/apache/beam/examples/complete/game/UserScore.java
+++ 
b/examples/java8/src/main/java/org/apache/beam/examples/complete/game/UserScore.java
@@ -15,27 +15,27 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.google.cloud.dataflow.examples.complete.game;
-
-import com.google.cloud.dataflow.examples.complete.game.utils.WriteToBigQuery;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import 

[72/74] incubator-beam git commit: Initial update of imports

2016-04-13 Thread davor
Initial update of imports

Performed using the following script

find . -iname "*.java" -exec sed -i -e \
"s/package com.google.cloud.dataflow/package org.apache.beam/" {} +

fixup! Rewrite import statements

Performed using the following command:

find . -iname "*.java" -exec sed -i -e \
"s/com.google.cloud.dataflow/org.apache.beam/" {} +

Revert changes to imports of proto2 coders (deployed to maven)

fixup! Update mentions of RunnableOnService

fixup! Organize imports using Eclipse

fixup! move sdk.properties

fixup! update package generated protos will be in

fixup! Move the PipelineOptionsFactoryJava8Test

fixup! Update user.avsc

fixup! Update runner and core pom.xml

fixup! Update dependencies in contrib pom.xmls

fixup! Update ApiSurface

fixup! Fix class names in test strings.

fixup! Update travis/test_wordcount.sh

fixup! Organize imports.

fixup! javadoc beam


Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/b9724454
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/b9724454
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/b9724454

Branch: refs/heads/master
Commit: b972445412d4c7c563e57a2260e99b9b5e3c7a32
Parents: 0393a79
Author: bchambers 
Authored: Wed Apr 13 14:39:02 2016 -0700
Committer: Davor Bonaci 
Committed: Wed Apr 13 21:23:06 2016 -0700

--
 contrib/hadoop/pom.xml  |   8 +-
 .../apache/contrib/hadoop/HadoopFileSource.java |  26 +--
 .../apache/contrib/hadoop/WritableCoder.java|  12 +-
 .../contrib/hadoop/HadoopFileSourceTest.java|  16 +-
 .../contrib/hadoop/WritableCoderTest.java   |   4 +-
 contrib/join-library/pom.xml|   8 +-
 .../org/apache/contrib/joinlibrary/Join.java|  22 +-
 .../contrib/joinlibrary/InnerJoinTest.java  |  16 +-
 .../contrib/joinlibrary/OuterLeftJoinTest.java  |  16 +-
 .../contrib/joinlibrary/OuterRightJoinTest.java |  16 +-
 examples/java/pom.xml   |   4 +-
 .../beam/examples/DebuggingWordCount.java   |  34 +--
 .../apache/beam/examples/MinimalWordCount.java  |  24 +-
 .../apache/beam/examples/WindowedWordCount.java |  39 ++--
 .../org/apache/beam/examples/WordCount.java |  42 ++--
 .../examples/common/DataflowExampleOptions.java |   8 +-
 .../examples/common/DataflowExampleUtils.java   |  33 +--
 .../common/ExampleBigQueryTableOptions.java |  13 +-
 ...xamplePubsubTopicAndSubscriptionOptions.java |  12 +-
 .../common/ExamplePubsubTopicOptions.java   |  12 +-
 .../examples/common/PubsubFileInjector.java |  23 +-
 .../beam/examples/complete/AutoComplete.java|  69 +++---
 .../examples/complete/StreamingWordExtract.java |  29 +--
 .../apache/beam/examples/complete/TfIdf.java|  72 +++---
 .../examples/complete/TopWikipediaSessions.java |  47 ++--
 .../examples/complete/TrafficMaxLaneFlow.java   |  51 +++--
 .../beam/examples/complete/TrafficRoutes.java   |  49 ++--
 .../examples/cookbook/BigQueryTornadoes.java|  29 +--
 .../cookbook/CombinePerKeyExamples.java |  35 +--
 .../examples/cookbook/DatastoreWordCount.java   |  29 +--
 .../beam/examples/cookbook/DeDupExample.java|  26 +--
 .../beam/examples/cookbook/FilterExamples.java  |  31 +--
 .../beam/examples/cookbook/JoinExamples.java|  33 +--
 .../examples/cookbook/MaxPerKeyExamples.java|  29 +--
 .../beam/examples/cookbook/TriggerExample.java  |  68 +++---
 .../beam/examples/DebuggingWordCountTest.java   |   2 +-
 .../org/apache/beam/examples/WordCountTest.java |  26 +--
 .../examples/complete/AutoCompleteTest.java |  36 +--
 .../beam/examples/complete/TfIdfTest.java   |  22 +-
 .../complete/TopWikipediaSessionsTest.java  |  15 +-
 .../cookbook/BigQueryTornadoesTest.java |  11 +-
 .../cookbook/CombinePerKeyExamplesTest.java |  11 +-
 .../examples/cookbook/DeDupExampleTest.java |  18 +-
 .../examples/cookbook/FilterExamplesTest.java   |   9 +-
 .../examples/cookbook/JoinExamplesTest.java |  23 +-
 .../cookbook/MaxPerKeyExamplesTest.java |  11 +-
 .../examples/cookbook/TriggerExampleTest.java   |  33 +--
 .../beam/examples/MinimalWordCountJava8.java|  24 +-
 .../beam/examples/complete/game/GameStats.java  |  64 +++---
 .../examples/complete/game/HourlyTeamScore.java |  31 ++-
 .../examples/complete/game/LeaderBoard.java |  48 ++--
 .../beam/examples/complete/game/UserScore.java  |  42 ++--
 .../complete/game/injector/Injector.java|   2 +-
 .../complete/game/injector/InjectorUtils.java   |   3 +-
 .../injector/RetryHttpInitializerWrapper.java   |   2 +-
 .../complete/game/utils/WriteToBigQuery.java|  29 +--
 .../game/utils/WriteWindowedToBigQuery.java |  21 +-
 .../examples/MinimalWordCountJava8Test.java |  27 +--
 .../examples/complete/game/GameStatsTest.java   | 

[73/74] incubator-beam git commit: Fix test ordering in DataflowPipelineTranslatorTest

2016-04-13 Thread davor
Fix test ordering in DataflowPipelineTranslatorTest


Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/75a1905e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/75a1905e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/75a1905e

Branch: refs/heads/master
Commit: 75a1905ebe09755da5c464a72aafc67bc982b0bc
Parents: b972445
Author: bchambers 
Authored: Wed Apr 13 17:39:34 2016 -0700
Committer: Davor Bonaci 
Committed: Wed Apr 13 21:24:28 2016 -0700

--
 .../runners/DataflowPipelineTranslatorTest.java | 71 +++-
 1 file changed, 38 insertions(+), 33 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75a1905e/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/sdk/runners/DataflowPipelineTranslatorTest.java
--
diff --git 
a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/sdk/runners/DataflowPipelineTranslatorTest.java
 
b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/sdk/runners/DataflowPipelineTranslatorTest.java
index 97d1a50..1429e5a 100644
--- 
a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/sdk/runners/DataflowPipelineTranslatorTest.java
+++ 
b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -74,6 +74,7 @@ import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Iterables;
 
+import org.hamcrest.Matcher;
 import org.hamcrest.Matchers;
 import org.junit.Assert;
 import org.junit.Rule;
@@ -854,39 +855,43 @@ public class DataflowPipelineTranslatorTest implements 
Serializable {
 Map parDo2Properties = steps.get(2).getProperties();
 assertThat(parDo1Properties, hasKey("display_data"));
 
-Collection> fn1displayData =
-(Collection>) 
parDo1Properties.get("display_data");
-Collection> fn2displayData =
-(Collection>) 
parDo2Properties.get("display_data");
-
-ImmutableList expectedFn1DisplayData = ImmutableList.of(
-ImmutableMap.builder()
-  .put("namespace", fn1.getClass().getName())
-  .put("key", "foo")
-  .put("type", "STRING")
-  .put("value", "bar")
-  .build(),
-ImmutableMap.builder()
-  .put("namespace", fn1.getClass().getName())
-  .put("key", "foo2")
-  .put("type", "JAVA_CLASS")
-  .put("value", DataflowPipelineTranslatorTest.class.getName())
-  .put("shortValue", 
DataflowPipelineTranslatorTest.class.getSimpleName())
-  .put("label", "Test Class")
-  .put("linkUrl", "http://www.google.com;)
-  .build()
-);
-
-ImmutableList expectedFn2DisplayData = ImmutableList.of(
+
+@SuppressWarnings("unchecked")
+Collection> fn1displayData =
+(Collection>) 
parDo1Properties.get("display_data");
+@SuppressWarnings("unchecked")
+Collection> fn2displayData =
+(Collection>) 
parDo2Properties.get("display_data");
+
+@SuppressWarnings("unchecked")
+Matcher> fn1expectedData =
+Matchers.>containsInAnyOrder(
+ImmutableMap.builder()
+.put("namespace", fn1.getClass().getName())
+.put("key", "foo")
+.put("type", "STRING")
+.put("value", "bar")
+.build(),
+ImmutableMap.builder()
+.put("namespace", fn1.getClass().getName())
+.put("key", "foo2")
+.put("type", "JAVA_CLASS")
+.put("value", DataflowPipelineTranslatorTest.class.getName())
+.put("shortValue", 
DataflowPipelineTranslatorTest.class.getSimpleName())
+.put("label", "Test Class")
+.put("linkUrl", "http://www.google.com;)
+.build());
+
+@SuppressWarnings("unchecked")
+Matcher> fn2expectedData =
+Matchers.>contains(
 ImmutableMap.builder()
-.put("namespace", fn2.getClass().getName())
-.put("key", "foo3")
-.put("type", "INTEGER")
-.put("value", 1234L)
-.build()
-);
-
-assertEquals(expectedFn1DisplayData, fn1displayData);
- 

[jira] [Commented] (BEAM-151) Create Dataflow Runner Package

2016-04-13 Thread Luke Cwik (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-151?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15240134#comment-15240134
 ] 

Luke Cwik commented on BEAM-151:


No, I still need to update package-info.java and make another pass over the 
remaining classes to see if there are any stragglers.

> Create Dataflow Runner Package
> --
>
> Key: BEAM-151
> URL: https://issues.apache.org/jira/browse/BEAM-151
> Project: Beam
>  Issue Type: Improvement
>  Components: runner-dataflow
>Reporter: Luke Cwik
>Assignee: Luke Cwik
>
> Move Dataflow runner out of SDK core and into new Dataflow runner maven 
> module.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Updated] (BEAM-190) Dead-letter drop for bad BigQuery records

2016-04-13 Thread Davor Bonaci (JIRA)

 [ 
https://issues.apache.org/jira/browse/BEAM-190?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Davor Bonaci updated BEAM-190:
--
Assignee: Davor Bonaci  (was: Frances Perry)

> Dead-letter drop for bad BigQuery records
> -
>
> Key: BEAM-190
> URL: https://issues.apache.org/jira/browse/BEAM-190
> Project: Beam
>  Issue Type: Bug
>  Components: runner-core
>Reporter: Mark Shields
>Assignee: Davor Bonaci
>
> If a BigQuery insert fails for data-specific rather than structural reasons 
> (eg cannot parse a date) then the bundle will be retried indefinitely, first 
> by BigQueryTableInserter.insertAll then by the overall production retry logic 
> of the underlying runner.
> Better would be to allow customer to specify a dead-letter store for records 
> such as those so that overall processing can continue while bad records are 
> quarantined.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Commented] (BEAM-190) Dead-letter drop for bad BigQuery records

2016-04-13 Thread Davor Bonaci (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-190?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15240120#comment-15240120
 ] 

Davor Bonaci commented on BEAM-190:
---

Quarantining failing elements was proposed several times.

It is somewhat orthogonal to losing data -- we can re-process quarantined 
elements at a later point (e.g., periodically retry, after an update, etc.). In 
a general case, however, the pipeline is unlikely to make much progress with 
quarantined elements, unless we figure out something "smart" how to unblock 
progress. It is unclear what the value would be without that.

I would probably try to treat this as a backend-specific feature, as opposed to 
part of Beam. Thoughts?

> Dead-letter drop for bad BigQuery records
> -
>
> Key: BEAM-190
> URL: https://issues.apache.org/jira/browse/BEAM-190
> Project: Beam
>  Issue Type: Bug
>  Components: runner-core
>Reporter: Mark Shields
>Assignee: Frances Perry
>
> If a BigQuery insert fails for data-specific rather than structural reasons 
> (eg cannot parse a date) then the bundle will be retried indefinitely, first 
> by BigQueryTableInserter.insertAll then by the overall production retry logic 
> of the underlying runner.
> Better would be to allow customer to specify a dead-letter store for records 
> such as those so that overall processing can continue while bad records are 
> quarantined.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Commented] (BEAM-151) Create Dataflow Runner Package

2016-04-13 Thread Davor Bonaci (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-151?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15240107#comment-15240107
 ] 

Davor Bonaci commented on BEAM-151:
---

Done?

> Create Dataflow Runner Package
> --
>
> Key: BEAM-151
> URL: https://issues.apache.org/jira/browse/BEAM-151
> Project: Beam
>  Issue Type: Improvement
>  Components: runner-dataflow
>Reporter: Luke Cwik
>Assignee: Luke Cwik
>
> Move Dataflow runner out of SDK core and into new Dataflow runner maven 
> module.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[GitHub] incubator-beam pull request: Update Spark Test to use more standar...

2016-04-13 Thread tgroh
GitHub user tgroh opened a pull request:

https://github.com/apache/incubator-beam/pull/177

Update Spark Test to use more standard Pipeline Running

Be sure to do all of the following to help us incorporate your contribution
quickly and easily:

 - [ ] Make sure the PR title is formatted like:
   `[BEAM-] Description of pull request`
 - [ ] Make sure tests pass via `mvn clean verify`. (Even better, enable
   Travis-CI on your fork and ensure the whole test matrix passes).
 - [ ] Replace `` in the title with the actual Jira issue
   number, if there is one.
 - [ ] If this contribution is large, please file an Apache
   [Individual Contributor License 
Agreement](https://www.apache.org/licenses/icla.txt).

---

Ensures that the Pipeline uses the appropriate runner while constructing 
the graph.

You can merge this pull request into a Git repository by running:

$ git pull https://github.com/tgroh/incubator-beam 
spark_transform_translator_test_fix

Alternatively you can review and apply these changes as the patch at:

https://github.com/apache/incubator-beam/pull/177.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

This closes #177


commit 7591da4161bfb00233a8554179712e10def64ff2
Author: Thomas Groh 
Date:   2016-04-13T16:43:49Z

Update Spark Test to use more standard Pipeline Running




---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] incubator-beam pull request: Rename packages

2016-04-13 Thread bjchambers
GitHub user bjchambers opened a pull request:

https://github.com/apache/incubator-beam/pull/176

Rename packages

Be sure to do all of the following to help us incorporate your contribution
quickly and easily:

 - [ ] Make sure the PR title is formatted like:
   `[BEAM-] Description of pull request`
 - [ ] Make sure tests pass via `mvn clean verify`. (Even better, enable
   Travis-CI on your fork and ensure the whole test matrix passes).
 - [ ] Replace `` in the title with the actual Jira issue
   number, if there is one.
 - [ ] If this contribution is large, please file an Apache
   [Individual Contributor License 
Agreement](https://www.apache.org/licenses/icla.txt).

---

I'm verifying this now so there may be a few more fixups, but I wanted to 
give you a chance to get started on the review.

R: @davorbonaci 

You can merge this pull request into a Git repository by running:

$ git pull https://github.com/bjchambers/incubator-beam rename-packages

Alternatively you can review and apply these changes as the patch at:

https://github.com/apache/incubator-beam/pull/176.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

This closes #176


commit de03fd8a9e812add6d1e35afa437db28164b5c9d
Author: bchambers 
Date:   2016-04-13T16:17:37Z

Apply the package rename and organize imports

commit 083dcebb43bef33a1a2c4352b455e9a0815e09ac
Author: bchambers 
Date:   2016-04-13T17:00:42Z

fixup! Update main pom.xml files.

commit e0807b22a329d02f7f3891c1e4eba48d47507cbc
Author: bchambers 
Date:   2016-04-13T16:52:05Z

fixup! Get the SDK tests passing.

commit b9c3b534ceaaf94325143e08b12d37fceb81315a
Author: bchambers 
Date:   2016-04-13T16:59:45Z

fixup! Move the files in the join-library and update the pom.

commit 3e96cc51395ac322b036084559d979ef8be331c1
Author: bchambers 
Date:   2016-04-13T17:00:07Z

fixup! Move the files in the hadoop contrib directory, and update pom.

commit a5acb43a8674ca96b3e76d0cb0f569adea5ed553
Author: bchambers 
Date:   2016-04-13T17:00:52Z

fixup! Update test_wordcount.sh

commit e1819c64b267ff499bb01457dace9c45d9662eb8
Author: bchambers 
Date:   2016-04-13T17:03:59Z

fixup! move sdk.properties file

commit aa4adc377838ebb5fff78fbf7de977bc824f24a4
Author: bchambers 
Date:   2016-04-13T20:33:35Z

fixup! Update the archetypes




---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[jira] [Commented] (BEAM-190) Dead-letter drop for bad BigQuery records

2016-04-13 Thread Luke Cwik (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-190?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15239815#comment-15239815
 ] 

Luke Cwik commented on BEAM-190:


>From Dan Halperin:
I thought that we were under the impression that rather than losing data it's 
likely better to update your pipeline to handle these?

> Dead-letter drop for bad BigQuery records
> -
>
> Key: BEAM-190
> URL: https://issues.apache.org/jira/browse/BEAM-190
> Project: Beam
>  Issue Type: Bug
>  Components: runner-core
>Reporter: Mark Shields
>Assignee: Frances Perry
>
> If a BigQuery insert fails for data-specific rather than structural reasons 
> (eg cannot parse a date) then the bundle will be retried indefinitely, first 
> by BigQueryTableInserter.insertAll then by the overall production retry logic 
> of the underlying runner.
> Better would be to allow customer to specify a dead-letter store for records 
> such as those so that overall processing can continue while bad records are 
> quarantined.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Commented] (BEAM-121) Publish DisplayData from common PTransforms

2016-04-13 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-121?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15239741#comment-15239741
 ] 

ASF GitHub Bot commented on BEAM-121:
-

Github user asfgit closed the pull request at:

https://github.com/apache/incubator-beam/pull/166


> Publish DisplayData from common PTransforms
> ---
>
> Key: BEAM-121
> URL: https://issues.apache.org/jira/browse/BEAM-121
> Project: Beam
>  Issue Type: Sub-task
>  Components: sdk-java-core
>Reporter: Scott Wegner
>Assignee: Scott Wegner
>




--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[GitHub] incubator-beam pull request: [BEAM-121] DisplayData API tweaks

2016-04-13 Thread asfgit
Github user asfgit closed the pull request at:

https://github.com/apache/incubator-beam/pull/166


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[1/2] incubator-beam git commit: DisplayData API tweaks

2016-04-13 Thread bchambers
Repository: incubator-beam
Updated Branches:
  refs/heads/master 920f14dbe -> 1915503b9


DisplayData API tweaks

- Add additional overloads for conditionally registering null display data
- Serialize DisplayData using JSON primatives
- Fix checkstyle error
- Bind generic type parameter in DataflowPipelineTranslator#addDisplayData
- Add additional tests for new APIs
- Improve readability of DisplayDataTest#populateDisplayData


Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/dba82fea
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/dba82fea
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/dba82fea

Branch: refs/heads/master
Commit: dba82feaca316e4f78357957c67059039589da36
Parents: 920f14d
Author: Scott Wegner 
Authored: Mon Apr 11 09:08:23 2016 -0700
Committer: bchambers 
Committed: Wed Apr 13 11:14:14 2016 -0700

--
 .../sdk/runners/DataflowPipelineTranslator.java |   9 +-
 .../runners/DataflowPipelineTranslatorTest.java |  12 +-
 .../sdk/transforms/display/DisplayData.java | 103 +++---
 .../transforms/display/DisplayDataMatchers.java |  10 +-
 .../sdk/transforms/display/DisplayDataTest.java | 201 +--
 5 files changed, 275 insertions(+), 60 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/dba82fea/runners/google-cloud-dataflow-java/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
--
diff --git 
a/runners/google-cloud-dataflow-java/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
 
b/runners/google-cloud-dataflow-java/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 5bff46c..fa7067e 100644
--- 
a/runners/google-cloud-dataflow-java/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ 
b/runners/google-cloud-dataflow-java/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -81,7 +81,6 @@ import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.cloud.dataflow.sdk.values.TypedPValue;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Strings;
-import com.google.common.collect.Lists;
 
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.ObjectMapper;
@@ -730,12 +729,8 @@ public class DataflowPipelineTranslator {
 }
 
 private void addDisplayData(String name, DisplayData displayData) {
-  List> serializedItems = Lists.newArrayList();
-  for (DisplayData.Item item : displayData.items()) {
-serializedItems.add(MAPPER.convertValue(item, Map.class));
-  }
-
-  addList(getProperties(), name, serializedItems);
+  List> list = MAPPER.convertValue(displayData, 
List.class);
+  addList(getProperties(), name, list);
 }
 
 @Override

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/dba82fea/runners/google-cloud-dataflow-java/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
--
diff --git 
a/runners/google-cloud-dataflow-java/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
 
b/runners/google-cloud-dataflow-java/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 1b32b73..dd1b3c8 100644
--- 
a/runners/google-cloud-dataflow-java/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ 
b/runners/google-cloud-dataflow-java/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -818,8 +818,8 @@ public class DataflowPipelineTranslatorTest implements 
Serializable {
   @Override
   public void populateDisplayData(DisplayData.Builder builder) {
 builder
-.add("foo", "bar")
-.add("foo2", DataflowPipelineTranslatorTest.class)
+.add("foo", "bar")
+.add("foo2", DataflowPipelineTranslatorTest.class)
 .withLabel("Test Class")
 .withLinkUrl("http://www.google.com;);
   }
@@ -833,7 +833,7 @@ public class DataflowPipelineTranslatorTest implements 
Serializable {
 
   @Override
   public void populateDisplayData(DisplayData.Builder builder) {
-builder.add("foo3", "barge");
+builder.add("foo3", 1234);
   }
 };
 
@@ -876,11 +876,11 @@ public class DataflowPipelineTranslatorTest implements 
Serializable {
 );
 
 ImmutableList expectedFn2DisplayData = ImmutableList.of(
-   

Re: [jira] [Commented] (BEAM-190) Dead-letter drop for bad BigQuery records

2016-04-13 Thread Dan Halperin
I thought that we were under the impression that rather than losing data
it's likely better to update your pipeline to handle these?

On Wed, Apr 13, 2016 at 10:59 AM, Luke Cwik (JIRA)  wrote:

>
> [
> https://issues.apache.org/jira/browse/BEAM-190?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15239701#comment-15239701
> ]
>
> Luke Cwik commented on BEAM-190:
> 
>
> I believe this can easily extend beyond BigQuery to having a dead letter
> feature for failing DoFns of any kind.
>
> > Dead-letter drop for bad BigQuery records
> > -
> >
> > Key: BEAM-190
> > URL: https://issues.apache.org/jira/browse/BEAM-190
> > Project: Beam
> >  Issue Type: Bug
> >  Components: runner-core
> >Reporter: Mark Shields
> >Assignee: Frances Perry
> >
> > If a BigQuery insert fails for data-specific rather than structural
> reasons (eg cannot parse a date) then the bundle will be retried
> indefinitely, first by BigQueryTableInserter.insertAll then by the overall
> production retry logic of the underlying runner.
> > Better would be to allow customer to specify a dead-letter store for
> records such as those so that overall processing can continue while bad
> records are quarantined.
>
>
>
> --
> This message was sent by Atlassian JIRA
> (v6.3.4#6332)
>


[jira] [Updated] (BEAM-189) The Spark runner uses valueInEmptyWindow which causes values to be dropped

2016-04-13 Thread Amit Sela (JIRA)

 [ 
https://issues.apache.org/jira/browse/BEAM-189?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Amit Sela updated BEAM-189:
---
Description: 
Values in empty windowed may be dropped at anytime and so the default windowing 
should be with GlobalWindow
 

  was:
In case the Pipeline source is TextIO.Read.Bound it seems that the elements are 
"ValuesInEmptyWindow" which causes all Combine implementations to fail.
 


> The Spark runner uses valueInEmptyWindow which causes values to be dropped
> --
>
> Key: BEAM-189
> URL: https://issues.apache.org/jira/browse/BEAM-189
> Project: Beam
>  Issue Type: Bug
>  Components: runner-spark
>Reporter: Amit Sela
>Assignee: Amit Sela
>
> Values in empty windowed may be dropped at anytime and so the default 
> windowing should be with GlobalWindow
>  



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Updated] (BEAM-189) The Spark runner uses valueInEmptyWindow which causes values to be dropped

2016-04-13 Thread Amit Sela (JIRA)

 [ 
https://issues.apache.org/jira/browse/BEAM-189?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Amit Sela updated BEAM-189:
---
Summary: The Spark runner uses valueInEmptyWindow which causes values to be 
dropped  (was: Combine not working if source is TextIO)

> The Spark runner uses valueInEmptyWindow which causes values to be dropped
> --
>
> Key: BEAM-189
> URL: https://issues.apache.org/jira/browse/BEAM-189
> Project: Beam
>  Issue Type: Bug
>  Components: runner-spark
>Reporter: Amit Sela
>Assignee: Amit Sela
>
> In case the Pipeline source is TextIO.Read.Bound it seems that the elements 
> are "ValuesInEmptyWindow" which causes all Combine implementations to fail.
>  



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[2/2] incubator-beam git commit: This closes #175

2016-04-13 Thread lcwik
This closes #175


Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/920f14db
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/920f14db
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/920f14db

Branch: refs/heads/master
Commit: 920f14dbe6c17b92af299165acee74ed4ee5955c
Parents: d597389 ccaa376
Author: Luke Cwik 
Authored: Wed Apr 13 10:54:10 2016 -0700
Committer: Luke Cwik 
Committed: Wed Apr 13 10:54:10 2016 -0700

--
 .github/PULL_REQUEST_TEMPLATE.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--




[1/2] incubator-beam git commit: Fix third checkbox in Github PR template

2016-04-13 Thread lcwik
Repository: incubator-beam
Updated Branches:
  refs/heads/master d597389f9 -> 920f14dbe


Fix third checkbox in Github PR template

Currently "" doesn't display in the rendered markdown.
Using backticks allows it to.


Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/ccaa376b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/ccaa376b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/ccaa376b

Branch: refs/heads/master
Commit: ccaa376bb9597c7bdd58f40ec4d9c46118d8ce12
Parents: d597389
Author: Thomas Groh 
Authored: Wed Apr 13 10:42:39 2016 -0700
Committer: Luke Cwik 
Committed: Wed Apr 13 10:53:47 2016 -0700

--
 .github/PULL_REQUEST_TEMPLATE.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/ccaa376b/.github/PULL_REQUEST_TEMPLATE.md
--
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index edd5dd9..b17f620 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -5,7 +5,7 @@ quickly and easily:
`[BEAM-] Description of pull request`
  - [ ] Make sure tests pass via `mvn clean verify`. (Even better, enable
Travis-CI on your fork and ensure the whole test matrix passes).
- - [ ] Replace "" in the title with the actual Jira issue
+ - [ ] Replace `` in the title with the actual Jira issue
number, if there is one.
  - [ ] If this contribution is large, please file an Apache
[Individual Contributor License 
Agreement](https://www.apache.org/licenses/icla.txt).



[GitHub] incubator-beam pull request: Fix third checkbox in Github PR templ...

2016-04-13 Thread asfgit
Github user asfgit closed the pull request at:

https://github.com/apache/incubator-beam/pull/175


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] incubator-beam pull request: Fix third checkbox in Github PR templ...

2016-04-13 Thread tgroh
GitHub user tgroh opened a pull request:

https://github.com/apache/incubator-beam/pull/175

Fix third checkbox in Github PR template

Be sure to do all of the following to help us incorporate your contribution
quickly and easily:

 - [ ] Make sure the PR title is formatted like:
   `[BEAM-] Description of pull request`
 - [ ] Make sure tests pass via `mvn clean verify`. (Even better, enable
   Travis-CI on your fork and ensure the whole test matrix passes).
 - [ ] Replace "``" in the title with the actual Jira issue
   number, if there is one.
 - [ ] If this contribution is large, please file an Apache
   [Individual Contributor License 
Agreement](https://www.apache.org/licenses/icla.txt).

---


You can merge this pull request into a Git repository by running:

$ git pull https://github.com/tgroh/incubator-beam fix_pr_template

Alternatively you can review and apply these changes as the patch at:

https://github.com/apache/incubator-beam/pull/175.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

This closes #175


commit 149d68a1db1199e7e24e024f2bd6ed2d455079b2
Author: Thomas Groh 
Date:   2016-04-13T17:42:39Z

Fix third checkbox in Github PR template

Currently "" doesn't display in the rendered markdown.
Using backticks allows it to.




---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[jira] [Created] (BEAM-190) Dead-letter drop for bad BigQuery records

2016-04-13 Thread Mark Shields (JIRA)
Mark Shields created BEAM-190:
-

 Summary: Dead-letter drop for bad BigQuery records
 Key: BEAM-190
 URL: https://issues.apache.org/jira/browse/BEAM-190
 Project: Beam
  Issue Type: Bug
  Components: runner-core
Reporter: Mark Shields
Assignee: Frances Perry


If a BigQuery insert fails for data-specific rather than structural reasons (eg 
cannot parse a date) then the bundle will be retried indefinitely, first by 
BigQueryTableInserter.insertAll then by the overall production retry logic of 
the underlying runner.

Better would be to allow customer to specify a dead-letter store for records 
such as those so that overall processing can continue while bad records are 
quarantined.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[2/3] incubator-beam git commit: Update 'starter' archetype

2016-04-13 Thread davor
Update 'starter' archetype

This makes the test run in the 'install' phase, as opposed to the
'integration-test' phase.


Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/8afd3483
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/8afd3483
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/8afd3483

Branch: refs/heads/master
Commit: 8afd3483bd4a4734dca141ef2d93e3d6a9db9331
Parents: b2978a9
Author: Davor Bonaci 
Authored: Wed Apr 13 10:11:45 2016 -0700
Committer: Davor Bonaci 
Committed: Wed Apr 13 10:11:45 2016 -0700

--
 sdks/java/maven-archetypes/starter/pom.xml | 15 +++
 1 file changed, 15 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/8afd3483/sdks/java/maven-archetypes/starter/pom.xml
--
diff --git a/sdks/java/maven-archetypes/starter/pom.xml 
b/sdks/java/maven-archetypes/starter/pom.xml
index 2c7b6b9..d5357c7 100644
--- a/sdks/java/maven-archetypes/starter/pom.xml
+++ b/sdks/java/maven-archetypes/starter/pom.xml
@@ -50,6 +50,21 @@
 
   maven-archetype-plugin
   2.4
+  
+
+
+  default-integration-test
+  install
+  
+integration-test
+  
+
+  
 
   
 



[1/3] incubator-beam git commit: Update 'examples' archetype

2016-04-13 Thread davor
Repository: incubator-beam
Updated Branches:
  refs/heads/master 243ae8651 -> d597389f9


Update 'examples' archetype

This makes the test run in the 'install' phase, as opposed to the
'integration-test' phase.


Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/b2978a95
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/b2978a95
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/b2978a95

Branch: refs/heads/master
Commit: b2978a95e8fb3afdfe01fdfd0d967e92c403834e
Parents: 243ae86
Author: Davor Bonaci 
Authored: Tue Apr 12 21:43:32 2016 -0700
Committer: Davor Bonaci 
Committed: Tue Apr 12 21:43:32 2016 -0700

--
 sdks/java/maven-archetypes/examples/pom.xml | 15 +++
 1 file changed, 15 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/b2978a95/sdks/java/maven-archetypes/examples/pom.xml
--
diff --git a/sdks/java/maven-archetypes/examples/pom.xml 
b/sdks/java/maven-archetypes/examples/pom.xml
index 3b513da..1d82664 100644
--- a/sdks/java/maven-archetypes/examples/pom.xml
+++ b/sdks/java/maven-archetypes/examples/pom.xml
@@ -49,6 +49,21 @@
 
   maven-archetype-plugin
   2.4
+  
+
+
+  default-integration-test
+  install
+  
+integration-test
+  
+
+  
 
   
 



[3/3] incubator-beam git commit: This closes #174

2016-04-13 Thread davor
This closes #174


Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/d597389f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/d597389f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/d597389f

Branch: refs/heads/master
Commit: d597389f97b04445e76f3543c07579cd88801093
Parents: 243ae86 8afd348
Author: Davor Bonaci 
Authored: Wed Apr 13 10:14:29 2016 -0700
Committer: Davor Bonaci 
Committed: Wed Apr 13 10:14:29 2016 -0700

--
 sdks/java/maven-archetypes/examples/pom.xml | 15 +++
 sdks/java/maven-archetypes/starter/pom.xml  | 15 +++
 2 files changed, 30 insertions(+)
--




[jira] [Created] (BEAM-189) Combine not working if source is TextIO

2016-04-13 Thread Amit Sela (JIRA)
Amit Sela created BEAM-189:
--

 Summary: Combine not working if source is TextIO
 Key: BEAM-189
 URL: https://issues.apache.org/jira/browse/BEAM-189
 Project: Beam
  Issue Type: Bug
  Components: runner-spark
Reporter: Amit Sela
Assignee: Amit Sela


In case the Pipeline source is TextIO.Read.Bound it seems that the elements are 
"ValuesInEmptyWindow" which causes all Combine implementations to fail.
 



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)