[3/3] beam git commit: This closes #2468

2017-04-10 Thread jkff
This closes #2468


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/c58f4f89
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/c58f4f89
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/c58f4f89

Branch: refs/heads/master
Commit: c58f4f89b3026e130b63dc14731a8ee8615e92f5
Parents: 0a0b1c8 159ac58
Author: Eugene Kirpichov 
Authored: Mon Apr 10 22:51:54 2017 -0700
Committer: Eugene Kirpichov 
Committed: Mon Apr 10 22:51:54 2017 -0700

--
 runners/google-cloud-dataflow-java/pom.xml| 2 +-
 .../src/main/java/org/apache/beam/sdk/transforms/DoFn.java| 4 
 .../sdk/transforms/reflect/ByteBuddyDoFnInvokerFactory.java   | 7 ---
 .../org/apache/beam/sdk/transforms/reflect/DoFnInvoker.java   | 4 +---
 4 files changed, 2 insertions(+), 15 deletions(-)
--




[2/3] beam git commit: Removes DoFn.ProcessContinuation completely

2017-04-10 Thread jkff
Removes DoFn.ProcessContinuation completely


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/62e23b71
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/62e23b71
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/62e23b71

Branch: refs/heads/master
Commit: 62e23b71e3b964033c15707629b560dfc8204654
Parents: 0a0b1c8
Author: Eugene Kirpichov 
Authored: Fri Apr 7 17:54:18 2017 -0700
Committer: Eugene Kirpichov 
Committed: Mon Apr 10 22:51:34 2017 -0700

--
 .../src/main/java/org/apache/beam/sdk/transforms/DoFn.java| 4 
 .../sdk/transforms/reflect/ByteBuddyDoFnInvokerFactory.java   | 7 ---
 .../org/apache/beam/sdk/transforms/reflect/DoFnInvoker.java   | 4 +---
 3 files changed, 1 insertion(+), 14 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/beam/blob/62e23b71/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/DoFn.java
--
diff --git 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/DoFn.java 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/DoFn.java
index e35457c..74a1348 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/DoFn.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/DoFn.java
@@ -698,10 +698,6 @@ public abstract class DoFn implements 
Serializable, HasDisplayD
   @Experimental(Kind.SPLITTABLE_DO_FN)
   public @interface UnboundedPerElement {}
 
-  /** Do not use. See https://issues.apache.org/jira/browse/BEAM-1904 */
-  @Deprecated
-  public class ProcessContinuation {}
-
   /**
* Returns an {@link Aggregator} with aggregation logic specified by the 
{@link CombineFn}
* argument. The name provided must be unique across {@link Aggregator}s 
created within the {@link

http://git-wip-us.apache.org/repos/asf/beam/blob/62e23b71/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/ByteBuddyDoFnInvokerFactory.java
--
diff --git 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/ByteBuddyDoFnInvokerFactory.java
 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/ByteBuddyDoFnInvokerFactory.java
index 4b0cbf7..6bef4df 100644
--- 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/ByteBuddyDoFnInvokerFactory.java
+++ 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/ByteBuddyDoFnInvokerFactory.java
@@ -50,7 +50,6 @@ import net.bytebuddy.implementation.bytecode.Throw;
 import net.bytebuddy.implementation.bytecode.assign.Assigner;
 import net.bytebuddy.implementation.bytecode.assign.Assigner.Typing;
 import net.bytebuddy.implementation.bytecode.assign.TypeCasting;
-import net.bytebuddy.implementation.bytecode.constant.NullConstant;
 import net.bytebuddy.implementation.bytecode.constant.TextConstant;
 import net.bytebuddy.implementation.bytecode.member.FieldAccess;
 import net.bytebuddy.implementation.bytecode.member.MethodInvocation;
@@ -659,12 +658,6 @@ public class ByteBuddyDoFnInvokerFactory implements 
DoFnInvokerFactory {
   }
   return new StackManipulation.Compound(pushParameters);
 }
-
-@Override
-protected StackManipulation afterDelegation(MethodDescription 
instrumentedMethod) {
-  return new StackManipulation.Compound(
-  NullConstant.INSTANCE, MethodReturn.REFERENCE);
-}
   }
 
   private static class UserCodeMethodInvocation implements StackManipulation {

http://git-wip-us.apache.org/repos/asf/beam/blob/62e23b71/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/DoFnInvoker.java
--
diff --git 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/DoFnInvoker.java
 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/DoFnInvoker.java
index cc06e70..0fbcc84 100644
--- 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/DoFnInvoker.java
+++ 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/DoFnInvoker.java
@@ -53,10 +53,8 @@ public interface DoFnInvoker {
* Invoke the {@link DoFn.ProcessElement} method on the bound {@link DoFn}.
*
* @param extra Factory for producing extra parameter objects (such as 
window), if necessary.
-   * @return {@code null} - see https://issues.apache.org/jira/browse/BEAM-1904;>JIRA
-   * tracking the complete removal of {@link DoFn.ProcessContinuation}.
*/
-  DoFn.ProcessContinuation invokeProcessElement(ArgumentProvider extra);
+  void invokeProcessElement(ArgumentProvider extra);
 
   /** Invoke the 

[1/3] beam git commit: Bump Dataflow worker to 20170410

2017-04-10 Thread jkff
Repository: beam
Updated Branches:
  refs/heads/master 0a0b1c80c -> c58f4f89b


Bump Dataflow worker to 20170410


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/159ac581
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/159ac581
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/159ac581

Branch: refs/heads/master
Commit: 159ac58122258e42b8faa94dad20e9fb82a4fe34
Parents: 62e23b7
Author: Eugene Kirpichov <kirpic...@google.com>
Authored: Mon Apr 10 15:44:14 2017 -0700
Committer: Eugene Kirpichov <kirpic...@google.com>
Committed: Mon Apr 10 22:51:34 2017 -0700

--
 runners/google-cloud-dataflow-java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/beam/blob/159ac581/runners/google-cloud-dataflow-java/pom.xml
--
diff --git a/runners/google-cloud-dataflow-java/pom.xml 
b/runners/google-cloud-dataflow-java/pom.xml
index 2e3dc8a..05ae8bc 100644
--- a/runners/google-cloud-dataflow-java/pom.xml
+++ b/runners/google-cloud-dataflow-java/pom.xml
@@ -33,7 +33,7 @@
   jar
 
   
-
beam-master-20170405
+    
beam-master-20170410
 
1
 
6
   



[GitHub] beam pull request #2468: Removes DoFn.ProcessContinuation completely

2017-04-10 Thread asfgit
Github user asfgit closed the pull request at:

https://github.com/apache/beam/pull/2468


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[jira] [Commented] (BEAM-1891) Java @Autovalue: currently doesn't have a good coder

2017-04-10 Thread Raghavendra Nayak M (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-1891?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15963833#comment-15963833
 ] 

Raghavendra Nayak M commented on BEAM-1891:
---

Thanks Stephen, Yes I was looking for that.

> Java @Autovalue: currently doesn't have a good coder
> 
>
> Key: BEAM-1891
> URL: https://issues.apache.org/jira/browse/BEAM-1891
> Project: Beam
>  Issue Type: Improvement
>  Components: sdk-java-extensions
>Reporter: Stephen Sisk
>Priority: Minor
>  Labels: newbie, starter
>
> In Java, @AutoValue classes are something that we would like developers to be 
> able to use in PCollections in Beam.
> However, there doesn't appear to be a good existing Beam Coder for Autovalue 
> generated classes:
> * Avrocoder doesn't work
> * Serializable Coder works, but has other problems (larger/less efficient)
> This is discussed fully at 
> https://lists.apache.org/thread.html/29617096819824d5c12247a246d316b763d9e583a21fff2f1c430077@%3Cdev.beam.apache.org%3E
>  
> We may need to implement an AutoValueCoder.
> cc [~pabloem] [~bchambers]



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Commented] (BEAM-463) BoundedHeapCoder should be a StandardCoder and not a CustomCoder

2017-04-10 Thread Luke Cwik (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-463?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15963790#comment-15963790
 ] 

Luke Cwik commented on BEAM-463:


The motivation was about getting introspection into the coder components, even 
if a wrapping coder is only understood by some SDKs, the component coder could 
be understood by a runner and/or reused in other places. I can see the argument 
to treat this is an opaque coder that doesn't encode to anything which makes 
sense outside of the context of a specific SDK.

Also brings up a question as to whether all component coders always be listed 
even for CustomCoder like things?

> BoundedHeapCoder should be a StandardCoder and not a CustomCoder
> 
>
> Key: BEAM-463
> URL: https://issues.apache.org/jira/browse/BEAM-463
> Project: Beam
>  Issue Type: Bug
>  Components: sdk-java-core
>Reporter: Luke Cwik
>Priority: Minor
>  Labels: backward-incompatible
> Fix For: First stable release
>
>
> The issue is that BoundedHeapCoder does not report component encodings which 
> prevents effective runner inspection of the components.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Updated] (BEAM-1491) HDFSFileSource should be able to read the HADOOP_CONF_DIR(YARN_CONF_DIR) environment variable

2017-04-10 Thread yangping wu (JIRA)

 [ 
https://issues.apache.org/jira/browse/BEAM-1491?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

yangping wu updated BEAM-1491:
--
Summary: HDFSFileSource should be able to read the 
HADOOP_CONF_DIR(YARN_CONF_DIR) environment variable  (was: HDFSFileSource 
should be able to read the HADOOP_CONF_DIR(YARN_CONF_DIR) environmen variable)

> HDFSFileSource should be able to read the HADOOP_CONF_DIR(YARN_CONF_DIR) 
> environment variable
> -
>
> Key: BEAM-1491
> URL: https://issues.apache.org/jira/browse/BEAM-1491
> Project: Beam
>  Issue Type: Improvement
>  Components: sdk-java-core
>Affects Versions: 0.6.0
>Reporter: yangping wu
>Assignee: Jean-Baptiste Onofré
>
> Currently, if we want to read file store on HDFS, we will do it as follow:
> {code} 
> PRead.Bounded> from = 
> Read.from(HDFSFileSource.from("hdfs://hadoopserver:8020/tmp/data.txt", 
> TextInputFormat.class, LongWritable.class, Text.class));
> PCollection> data = p.apply(from);
> {code}
> or
> {code}
> Configuration conf = new Configuration();
> conf.set("fs.default.name", "hdfs://hadoopserver:8020");
> PRead.Bounded> from = 
> Read.from(HDFSFileSource.from("/tmp/data.txt", TextInputFormat.class, 
> LongWritable.class, Text.class).withConfiguration(conf));
> PCollection> data = p.apply(from);
> {code}
> As we have seen above, we must be set {{hdfs://hadoopserver:8020}} in the 
> file path
> if we can initialize {{conf}} by reading 
> {{HADOOP_CONF_DIR}}({{YARN_CONF_DIR}}) environmen variable, then we can read 
> HDFS file like this:
> {code}
> PRead.Bounded> from = 
> Read.from(HDFSFileSource.from("/tmp/data.txt", TextInputFormat.class, 
> LongWritable.class, Text.class));
> PCollection> data = p.apply(from);
> {code}
> note we don't specify {{hdfs://hadoopserver:8020}} prefix, because the 
> program read it from  {{HADOOP_CONF_DIR}}({{YARN_CONF_DIR}}) environmen, and 
> the program will read file from HDFS.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


Jenkins build became unstable: beam_PostCommit_Java_MavenInstall #3247

2017-04-10 Thread Apache Jenkins Server
See 




Jenkins build is back to stable : beam_PostCommit_Java_ValidatesRunner_Apex #1046

2017-04-10 Thread Apache Jenkins Server
See 




[jira] [Resolved] (BEAM-1053) ApexGroupByKeyOperator serialization issues

2017-04-10 Thread Thomas Weise (JIRA)

 [ 
https://issues.apache.org/jira/browse/BEAM-1053?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Thomas Weise resolved BEAM-1053.

   Resolution: Fixed
Fix Version/s: First stable release

> ApexGroupByKeyOperator serialization issues
> ---
>
> Key: BEAM-1053
> URL: https://issues.apache.org/jira/browse/BEAM-1053
> Project: Beam
>  Issue Type: Bug
>  Components: runner-apex
>Affects Versions: 0.3.0-incubating
>Reporter: Sandeep Deshmukh
>Assignee: Thomas Weise
> Fix For: First stable release
>
>
> While trying to use Apex Runner for wordcount program, the 
> ApexGroupByKeyOperator  fails with following exception. 
> 2016-11-28 20:54:47,696 INFO com.datatorrent.stram.engine.StreamingContainer: 
> Deploy request: 
> [OperatorDeployInfo[id=9,name=ExtractPayload,type=GENERIC,checkpoint={583c4b1100b3,
>  0, 
> 0},inputs=[OperatorDeployInfo.InputDeployInfo[portName=input,streamId=stream14,sourceNodeId=8,sourcePortName=output,locality=CONTAINER_LOCAL,partitionMask=0,partitionKeys=]],outputs=[OperatorDeployInfo.OutputDeployInfo[portName=output,streamId=stream0,bufferServer=]]],
>  
> OperatorDeployInfo[id=8,name=ReadFromHDFS,type=INPUT,checkpoint={583c4b1100b3,
>  0, 
> 0},inputs=[],outputs=[OperatorDeployInfo.OutputDeployInfo[portName=output,streamId=stream14,bufferServer=]]],
>  
> OperatorDeployInfo[id=11,name=Application.CountWords/Count.PerElement/Init/Map,type=GENERIC,checkpoint={583c4b1100b3,
>  0, 
> 0},inputs=[OperatorDeployInfo.InputDeployInfo[portName=input,streamId=stream17,sourceNodeId=10,sourcePortName=output,locality=CONTAINER_LOCAL,partitionMask=0,partitionKeys=]],outputs=[OperatorDeployInfo.OutputDeployInfo[portName=output,streamId=stream18,bufferServer=]]],
>  
> OperatorDeployInfo[id=10,name=Application.CountWords/ParDo(ExtractWords),type=GENERIC,checkpoint={583c4b1100b3,
>  0, 
> 0},inputs=[OperatorDeployInfo.InputDeployInfo[portName=input,streamId=stream0,sourceNodeId=9,sourcePortName=output,locality=CONTAINER_LOCAL,partitionMask=0,partitionKeys=]],outputs=[OperatorDeployInfo.OutputDeployInfo[portName=output,streamId=stream17,bufferServer=]]],
>  
> OperatorDeployInfo[id=13,name=Application.CountWords/Count.PerElement/Count.PerKey/Combine.GroupedValues/AnonymousParDo,type=GENERIC,checkpoint={583c4b1100b3,
>  0, 
> 0},inputs=[OperatorDeployInfo.InputDeployInfo[portName=input,streamId=stream12,sourceNodeId=12,sourcePortName=output,locality=CONTAINER_LOCAL,partitionMask=0,partitionKeys=]],outputs=[OperatorDeployInfo.OutputDeployInfo[portName=output,streamId=stream3,bufferServer=]]],
>  
> OperatorDeployInfo[id=14,name=WriteToHDFS/Window.Into()/ApexRunner.AssignWindowsAndSetStrategy/AssignWindows/AssignWindows,type=GENERIC,checkpoint={583c4b1100b3,
>  0, 
> 0},inputs=[OperatorDeployInfo.InputDeployInfo[portName=input,streamId=stream3,sourceNodeId=13,sourcePortName=output,locality=CONTAINER_LOCAL,partitionMask=0,partitionKeys=]],outputs=[OperatorDeployInfo.OutputDeployInfo[portName=output,streamId=stream10,bufferServer=goofy]]],
>  
> OperatorDeployInfo[id=12,name=Application.CountWords/Count.PerElement/Count.PerKey/GroupByKey,type=GENERIC,checkpoint={583c4b1100b3,
>  0, 
> 0},inputs=[OperatorDeployInfo.InputDeployInfo[portName=input,streamId=stream18,sourceNodeId=11,sourcePortName=output,locality=CONTAINER_LOCAL,partitionMask=0,partitionKeys=]],outputs=[OperatorDeployInfo.OutputDeployInfo[portName=output,streamId=stream12,bufferServer=
> 2016-11-28 20:54:48,922 ERROR 
> com.datatorrent.stram.engine.StreamingContainer: deploy request failed
> com.esotericsoftware.kryo.KryoException: Class cannot be created (missing 
> no-arg constructor): java.nio.HeapByteBuffer
> Serialization trace:
> activeTimers 
> (org.apache.beam.runners.apex.translation.operators.ApexGroupByKeyOperator)
> at 
> com.esotericsoftware.kryo.Kryo$DefaultInstantiatorStrategy.newInstantiatorOf(Kryo.java:1228)
> at com.esotericsoftware.kryo.Kryo.newInstantiator(Kryo.java:1049)
> at com.esotericsoftware.kryo.Kryo.newInstance(Kryo.java:1058)
> at 
> com.esotericsoftware.kryo.serializers.FieldSerializer.create(FieldSerializer.java:547)
> at 
> com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:523)
> at com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:761)
> at 
> com.esotericsoftware.kryo.serializers.MapSerializer.read(MapSerializer.java:135)
> at 
> com.esotericsoftware.kryo.serializers.MapSerializer.read(MapSerializer.java:21)
> at com.esotericsoftware.kryo.Kryo.readObject(Kryo.java:679)
> at 
> com.esotericsoftware.kryo.serializers.ObjectField.read(ObjectField.java:106)
> at 
> com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:528)
>  

[jira] [Commented] (BEAM-1832) Potentially unclosed OutputStream in ApexYarnLauncher

2017-04-10 Thread Thomas Weise (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-1832?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15963765#comment-15963765
 ] 

Thomas Weise commented on BEAM-1832:


[~davor] can you please add Rekha as contributor?


> Potentially unclosed OutputStream in ApexYarnLauncher
> -
>
> Key: BEAM-1832
> URL: https://issues.apache.org/jira/browse/BEAM-1832
> Project: Beam
>  Issue Type: Bug
>  Components: runner-apex
>Reporter: Ted Yu
>Priority: Minor
> Fix For: First stable release
>
>
> Here is an example from createJar():
> {code}
>   final OutputStream out = 
> Files.newOutputStream(zipfs.getPath(JarFile.MANIFEST_NAME));
>   if (!manifestFile.exists()) {
> new Manifest().write(out);
>   } else {
> FileUtils.copyFile(manifestFile, out);
>   }
>   out.close();
> {code}
> If FileUtils.copyFile throws IOException, out would be left unclosed.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Resolved] (BEAM-1832) Potentially unclosed OutputStream in ApexYarnLauncher

2017-04-10 Thread Thomas Weise (JIRA)

 [ 
https://issues.apache.org/jira/browse/BEAM-1832?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Thomas Weise resolved BEAM-1832.

   Resolution: Fixed
Fix Version/s: First stable release

> Potentially unclosed OutputStream in ApexYarnLauncher
> -
>
> Key: BEAM-1832
> URL: https://issues.apache.org/jira/browse/BEAM-1832
> Project: Beam
>  Issue Type: Bug
>  Components: runner-apex
>Reporter: Ted Yu
>Priority: Minor
> Fix For: First stable release
>
>
> Here is an example from createJar():
> {code}
>   final OutputStream out = 
> Files.newOutputStream(zipfs.getPath(JarFile.MANIFEST_NAME));
>   if (!manifestFile.exists()) {
> new Manifest().write(out);
>   } else {
> FileUtils.copyFile(manifestFile, out);
>   }
>   out.close();
> {code}
> If FileUtils.copyFile throws IOException, out would be left unclosed.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


Build failed in Jenkins: beam_PostCommit_Java_ValidatesRunner_Flink #2272

2017-04-10 Thread Apache Jenkins Server
See 


--
[...truncated 230.29 KB...]
 x [deleted] (none) -> origin/pr/942/head
 x [deleted] (none) -> origin/pr/942/merge
 x [deleted] (none) -> origin/pr/943/head
 x [deleted] (none) -> origin/pr/943/merge
 x [deleted] (none) -> origin/pr/944/head
 x [deleted] (none) -> origin/pr/945/head
 x [deleted] (none) -> origin/pr/945/merge
 x [deleted] (none) -> origin/pr/946/head
 x [deleted] (none) -> origin/pr/946/merge
 x [deleted] (none) -> origin/pr/947/head
 x [deleted] (none) -> origin/pr/947/merge
 x [deleted] (none) -> origin/pr/948/head
 x [deleted] (none) -> origin/pr/948/merge
 x [deleted] (none) -> origin/pr/949/head
 x [deleted] (none) -> origin/pr/949/merge
 x [deleted] (none) -> origin/pr/95/head
 x [deleted] (none) -> origin/pr/95/merge
 x [deleted] (none) -> origin/pr/950/head
 x [deleted] (none) -> origin/pr/951/head
 x [deleted] (none) -> origin/pr/951/merge
 x [deleted] (none) -> origin/pr/952/head
 x [deleted] (none) -> origin/pr/952/merge
 x [deleted] (none) -> origin/pr/953/head
 x [deleted] (none) -> origin/pr/954/head
 x [deleted] (none) -> origin/pr/954/merge
 x [deleted] (none) -> origin/pr/955/head
 x [deleted] (none) -> origin/pr/955/merge
 x [deleted] (none) -> origin/pr/956/head
 x [deleted] (none) -> origin/pr/957/head
 x [deleted] (none) -> origin/pr/958/head
 x [deleted] (none) -> origin/pr/959/head
 x [deleted] (none) -> origin/pr/959/merge
 x [deleted] (none) -> origin/pr/96/head
 x [deleted] (none) -> origin/pr/96/merge
 x [deleted] (none) -> origin/pr/960/head
 x [deleted] (none) -> origin/pr/960/merge
 x [deleted] (none) -> origin/pr/961/head
 x [deleted] (none) -> origin/pr/962/head
 x [deleted] (none) -> origin/pr/962/merge
 x [deleted] (none) -> origin/pr/963/head
 x [deleted] (none) -> origin/pr/963/merge
 x [deleted] (none) -> origin/pr/964/head
 x [deleted] (none) -> origin/pr/965/head
 x [deleted] (none) -> origin/pr/965/merge
 x [deleted] (none) -> origin/pr/966/head
 x [deleted] (none) -> origin/pr/967/head
 x [deleted] (none) -> origin/pr/967/merge
 x [deleted] (none) -> origin/pr/968/head
 x [deleted] (none) -> origin/pr/968/merge
 x [deleted] (none) -> origin/pr/969/head
 x [deleted] (none) -> origin/pr/969/merge
 x [deleted] (none) -> origin/pr/97/head
 x [deleted] (none) -> origin/pr/97/merge
 x [deleted] (none) -> origin/pr/970/head
 x [deleted] (none) -> origin/pr/970/merge
 x [deleted] (none) -> origin/pr/971/head
 x [deleted] (none) -> origin/pr/971/merge
 x [deleted] (none) -> origin/pr/972/head
 x [deleted] (none) -> origin/pr/973/head
 x [deleted] (none) -> origin/pr/974/head
 x [deleted] (none) -> origin/pr/974/merge
 x [deleted] (none) -> origin/pr/975/head
 x [deleted] (none) -> origin/pr/975/merge
 x [deleted] (none) -> origin/pr/976/head
 x [deleted] (none) -> origin/pr/976/merge
 x [deleted] (none) -> origin/pr/977/head
 x [deleted] (none) -> origin/pr/977/merge
 x [deleted] (none) -> origin/pr/978/head
 x [deleted] (none) -> origin/pr/978/merge
 x [deleted] (none) -> origin/pr/979/head
 x [deleted] (none) -> origin/pr/979/merge
 x [deleted] (none) -> origin/pr/98/head
 x [deleted] (none) -> origin/pr/980/head
 x [deleted] (none) -> origin/pr/980/merge
 x [deleted] (none) -> origin/pr/981/head
 x [deleted] (none) -> origin/pr/982/head
 x [deleted] (none) -> origin/pr/982/merge
 x [deleted] (none) -> origin/pr/983/head
 x [deleted] (none) -> origin/pr/983/merge
 x [deleted] (none) -> origin/pr/984/head
 x [deleted] (none) -> origin/pr/984/merge
 x [deleted] (none) -> origin/pr/985/head
 x [deleted] (none) -> origin/pr/985/merge
 x [deleted] (none) -> origin/pr/986/head
 x [deleted] (none) -> origin/pr/986/merge
 x [deleted] (none) -> origin/pr/987/head
 x [deleted] (none) -> origin/pr/988/head
 x [deleted] (none) -> origin/pr/988/merge
 x [deleted] (none) -> 

[GitHub] beam pull request #2486: [BEAM-1928] Move WindowingStrategies to runners-cor...

2017-04-10 Thread asfgit
Github user asfgit closed the pull request at:

https://github.com/apache/beam/pull/2486


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[jira] [Commented] (BEAM-1928) Populate Runner API Components from the Java SDK

2017-04-10 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-1928?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15963749#comment-15963749
 ] 

ASF GitHub Bot commented on BEAM-1928:
--

Github user asfgit closed the pull request at:

https://github.com/apache/beam/pull/2486


> Populate Runner API Components from the Java SDK
> 
>
> Key: BEAM-1928
> URL: https://issues.apache.org/jira/browse/BEAM-1928
> Project: Beam
>  Issue Type: New Feature
>  Components: beam-model-runner-api, runner-core
>Reporter: Thomas Groh
>Assignee: Thomas Groh
>
> Permit conversion of Java SDK components to runner API protocol buffers, and 
> the extraction of those SDK components from the protocol buffers.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[2/2] beam git commit: This closes #2486

2017-04-10 Thread tgroh
This closes #2486


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/0a0b1c80
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/0a0b1c80
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/0a0b1c80

Branch: refs/heads/master
Commit: 0a0b1c80c97949954add4b4357fe29409a08197d
Parents: 7fd9c65 40b3668
Author: Thomas Groh 
Authored: Mon Apr 10 19:24:52 2017 -0700
Committer: Thomas Groh 
Committed: Mon Apr 10 19:24:52 2017 -0700

--
 runners/core-construction-java/pom.xml  |  17 ++
 .../core/construction/WindowingStrategies.java  | 268 +++
 .../construction/WindowingStrategiesTest.java   |  92 +++
 .../beam/sdk/util/WindowingStrategies.java  | 267 --
 .../beam/sdk/util/WindowingStrategiesTest.java  |  91 ---
 5 files changed, 377 insertions(+), 358 deletions(-)
--




[1/2] beam git commit: Move WindowingStrategies to runners-core-construction

2017-04-10 Thread tgroh
Repository: beam
Updated Branches:
  refs/heads/master 7fd9c6516 -> 0a0b1c80c


Move WindowingStrategies to runners-core-construction


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/40b36686
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/40b36686
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/40b36686

Branch: refs/heads/master
Commit: 40b36686bac65995d606fe5cea7aa957d0ef3f9d
Parents: 7fd9c65
Author: Thomas Groh 
Authored: Mon Apr 10 14:35:59 2017 -0700
Committer: Thomas Groh 
Committed: Mon Apr 10 16:44:23 2017 -0700

--
 runners/core-construction-java/pom.xml  |  17 ++
 .../core/construction/WindowingStrategies.java  | 268 +++
 .../construction/WindowingStrategiesTest.java   |  92 +++
 .../beam/sdk/util/WindowingStrategies.java  | 267 --
 .../beam/sdk/util/WindowingStrategiesTest.java  |  91 ---
 5 files changed, 377 insertions(+), 358 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/beam/blob/40b36686/runners/core-construction-java/pom.xml
--
diff --git a/runners/core-construction-java/pom.xml 
b/runners/core-construction-java/pom.xml
index ee64f91..3f323dd 100644
--- a/runners/core-construction-java/pom.xml
+++ b/runners/core-construction-java/pom.xml
@@ -58,17 +58,28 @@
   org.apache.beam
   beam-sdks-common-runner-api
 
+
 
   org.apache.beam
   beam-sdks-java-core
 
 
 
+  com.google.protobuf
+  protobuf-java
+
+
+
   com.fasterxml.jackson.core
   jackson-annotations
 
 
 
+  com.fasterxml.jackson.core
+  jackson-databind
+
+
+
   com.google.code.findbugs
   jsr305
 
@@ -88,6 +99,12 @@
   slf4j-api
 
 
+
+  com.google.auto.value
+  auto-value
+  provided
+
+
 
 
 

http://git-wip-us.apache.org/repos/asf/beam/blob/40b36686/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/WindowingStrategies.java
--
diff --git 
a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/WindowingStrategies.java
 
b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/WindowingStrategies.java
new file mode 100644
index 000..353be05
--- /dev/null
+++ 
b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/WindowingStrategies.java
@@ -0,0 +1,268 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.core.construction;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.protobuf.Any;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.BytesValue;
+import com.google.protobuf.InvalidProtocolBufferException;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.UUID;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.common.runner.v1.RunnerApi;
+import org.apache.beam.sdk.common.runner.v1.RunnerApi.Components;
+import org.apache.beam.sdk.common.runner.v1.RunnerApi.FunctionSpec;
+import org.apache.beam.sdk.common.runner.v1.RunnerApi.SdkFunctionSpec;
+import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
+import org.apache.beam.sdk.transforms.windowing.OutputTimeFns;
+import org.apache.beam.sdk.transforms.windowing.Trigger;
+import org.apache.beam.sdk.transforms.windowing.Triggers;
+import org.apache.beam.sdk.transforms.windowing.Window.ClosingBehavior;
+import org.apache.beam.sdk.transforms.windowing.WindowFn;
+import org.apache.beam.sdk.util.SerializableUtils;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.util.WindowingStrategy.AccumulationMode;
+import org.apache.beam.sdk.util.WindowingStrategy.CombineWindowFnOutputTimes;
+import 

Jenkins build is still unstable: beam_PostCommit_Java_ValidatesRunner_Apex #1045

2017-04-10 Thread Apache Jenkins Server
See 




[GitHub] beam-site pull request #206: [BEAM-1452] Add composite triggers section to p...

2017-04-10 Thread melap
GitHub user melap opened a pull request:

https://github.com/apache/beam-site/pull/206

[BEAM-1452] Add composite triggers section to programming guide



You can merge this pull request into a Git repository by running:

$ git pull https://github.com/melap/beam-site composite

Alternatively you can review and apply these changes as the patch at:

https://github.com/apache/beam-site/pull/206.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

This closes #206


commit 3e14a14fcf34c38e2afd753e4a327ca773ad1327
Author: melissa 
Date:   2017-04-11T00:08:50Z

[BEAM-1452] Add composite triggers section to programming guide




---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


Jenkins build became unstable: beam_PostCommit_Java_MavenInstall #3241

2017-04-10 Thread Apache Jenkins Server
See 




[jira] [Commented] (BEAM-1891) Java @Autovalue: currently doesn't have a good coder

2017-04-10 Thread Stephen Sisk (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-1891?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15963586#comment-15963586
 ] 

Stephen Sisk commented on BEAM-1891:


You'll need to learn about coders, which can be a bit tricky, but this is a 
pretty independent work item and I think very do-able for a newbie to beam. I'd 
suggest starting with a design doc for what you plan on doing and sending it 
out the mailing list for folks to take a look at.

> Java @Autovalue: currently doesn't have a good coder
> 
>
> Key: BEAM-1891
> URL: https://issues.apache.org/jira/browse/BEAM-1891
> Project: Beam
>  Issue Type: Improvement
>  Components: sdk-java-extensions
>Reporter: Stephen Sisk
>Priority: Minor
>  Labels: newbie, starter
>
> In Java, @AutoValue classes are something that we would like developers to be 
> able to use in PCollections in Beam.
> However, there doesn't appear to be a good existing Beam Coder for Autovalue 
> generated classes:
> * Avrocoder doesn't work
> * Serializable Coder works, but has other problems (larger/less efficient)
> This is discussed fully at 
> https://lists.apache.org/thread.html/29617096819824d5c12247a246d316b763d9e583a21fff2f1c430077@%3Cdev.beam.apache.org%3E
>  
> We may need to implement an AutoValueCoder.
> cc [~pabloem] [~bchambers]



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Commented] (BEAM-1865) Input Coder of GroupByKey should be a KV Coder in the Python SDK

2017-04-10 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-1865?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15963584#comment-15963584
 ] 

ASF GitHub Bot commented on BEAM-1865:
--

Github user asfgit closed the pull request at:

https://github.com/apache/beam/pull/2470


> Input Coder of GroupByKey should be a KV Coder in the Python SDK
> 
>
> Key: BEAM-1865
> URL: https://issues.apache.org/jira/browse/BEAM-1865
> Project: Beam
>  Issue Type: Bug
>  Components: sdk-py
>Reporter: Vikas Kedigehalli
>Assignee: Vikas Kedigehalli
>
> `Any` type is consistent with `KV` in python. The coder for `Any` type is a 
> fallback coder or a `FastPrimitivesCoder`, but for a `GroupByKey` operation 
> this needs to be a `TupleCoder` to ensure that the generated pipeline 
> representation is runnable on a runner in a different language (in the Fn API 
> world)



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[GitHub] beam pull request #2470: [BEAM-1865]: Fix GroupByKeyInputVisitor for Direct ...

2017-04-10 Thread asfgit
Github user asfgit closed the pull request at:

https://github.com/apache/beam/pull/2470


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[2/2] beam git commit: This closes #2470

2017-04-10 Thread altay
This closes #2470


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/7fd9c651
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/7fd9c651
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/7fd9c651

Branch: refs/heads/master
Commit: 7fd9c6516ea5f3d290fd95507559b0c99940
Parents: 1761d1c 9e453fa
Author: Ahmet Altay 
Authored: Mon Apr 10 15:28:12 2017 -0700
Committer: Ahmet Altay 
Committed: Mon Apr 10 15:28:12 2017 -0700

--
 .../apache_beam/runners/direct/direct_runner.py | 18 ++---
 sdks/python/apache_beam/runners/runner.py   | 71 
 sdks/python/apache_beam/runners/runner_test.py  | 41 +++
 3 files changed, 93 insertions(+), 37 deletions(-)
--




[1/2] beam git commit: Fix GroupByKeyInputVisitor for Direct Runner

2017-04-10 Thread altay
Repository: beam
Updated Branches:
  refs/heads/master 1761d1cab -> 7fd9c6516


Fix GroupByKeyInputVisitor for Direct Runner


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/9e453fab
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/9e453fab
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/9e453fab

Branch: refs/heads/master
Commit: 9e453fabe2bf448552ab5706130495e5ea4cf1c2
Parents: 1761d1c
Author: Vikas Kedigehalli 
Authored: Fri Apr 7 19:22:27 2017 -0700
Committer: Ahmet Altay 
Committed: Mon Apr 10 15:28:00 2017 -0700

--
 .../apache_beam/runners/direct/direct_runner.py | 18 ++---
 sdks/python/apache_beam/runners/runner.py   | 71 
 sdks/python/apache_beam/runners/runner_test.py  | 41 +++
 3 files changed, 93 insertions(+), 37 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/beam/blob/9e453fab/sdks/python/apache_beam/runners/direct/direct_runner.py
--
diff --git a/sdks/python/apache_beam/runners/direct/direct_runner.py 
b/sdks/python/apache_beam/runners/direct/direct_runner.py
index 1a5775f..9b4e1ac 100644
--- a/sdks/python/apache_beam/runners/direct/direct_runner.py
+++ b/sdks/python/apache_beam/runners/direct/direct_runner.py
@@ -32,6 +32,7 @@ from apache_beam.runners.runner import PipelineResult
 from apache_beam.runners.runner import PipelineRunner
 from apache_beam.runners.runner import PipelineState
 from apache_beam.runners.runner import PValueCache
+from apache_beam.runners.runner import group_by_key_input_visitor
 from apache_beam.utils.pipeline_options import DirectOptions
 from apache_beam.utils.value_provider import RuntimeValueProvider
 
@@ -68,21 +69,22 @@ class DirectRunner(PipelineRunner):
 
 MetricsEnvironment.set_metrics_supported(True)
 logging.info('Running pipeline with DirectRunner.')
-self.visitor = ConsumerTrackingPipelineVisitor()
-pipeline.visit(self.visitor)
+self.consumer_tracking_visitor = ConsumerTrackingPipelineVisitor()
+pipeline.visit(group_by_key_input_visitor())
+pipeline.visit(self.consumer_tracking_visitor)
 
 evaluation_context = EvaluationContext(
 pipeline.options,
 BundleFactory(stacked=pipeline.options.view_as(DirectOptions)
   .direct_runner_use_stacked_bundle),
-self.visitor.root_transforms,
-self.visitor.value_to_consumers,
-self.visitor.step_names,
-self.visitor.views)
+self.consumer_tracking_visitor.root_transforms,
+self.consumer_tracking_visitor.value_to_consumers,
+self.consumer_tracking_visitor.step_names,
+self.consumer_tracking_visitor.views)
 
 evaluation_context.use_pvalue_cache(self._cache)
 
-executor = Executor(self.visitor.value_to_consumers,
+executor = Executor(self.consumer_tracking_visitor.value_to_consumers,
 TransformEvaluatorRegistry(evaluation_context),
 evaluation_context)
 # Start the executor. This is a non-blocking call, it will start the
@@ -90,7 +92,7 @@ class DirectRunner(PipelineRunner):
 
 if pipeline.options:
   RuntimeValueProvider.set_runtime_options(pipeline.options._options_id, 
{})
-executor.start(self.visitor.root_transforms)
+executor.start(self.consumer_tracking_visitor.root_transforms)
 result = DirectPipelineResult(executor, evaluation_context)
 
 if self._cache:

http://git-wip-us.apache.org/repos/asf/beam/blob/9e453fab/sdks/python/apache_beam/runners/runner.py
--
diff --git a/sdks/python/apache_beam/runners/runner.py 
b/sdks/python/apache_beam/runners/runner.py
index 528b03f..de9c892 100644
--- a/sdks/python/apache_beam/runners/runner.py
+++ b/sdks/python/apache_beam/runners/runner.py
@@ -86,6 +86,47 @@ def create_runner(runner_name):
 runner_name, ', '.join(_ALL_KNOWN_RUNNERS)))
 
 
+def group_by_key_input_visitor():
+  # Imported here to avoid circular dependencies.
+  from apache_beam.pipeline import PipelineVisitor
+
+  class GroupByKeyInputVisitor(PipelineVisitor):
+"""A visitor that replaces `Any` element type for input `PCollection` of
+a `GroupByKey` or `GroupByKeyOnly` with a `KV` type.
+
+TODO(BEAM-115): Once Python SDk is compatible with the new Runner API,
+we could directly replace the coder instead of mutating the element type.
+"""
+
+def visit_transform(self, transform_node):
+  # Imported here to avoid circular dependencies.
+  # pylint: disable=wrong-import-order, wrong-import-position
+  from apache_beam import GroupByKey, GroupByKeyOnly
+  from apache_beam import typehints
+  if 

[jira] [Commented] (BEAM-802) Support Dynamic PipelineOptions for python

2017-04-10 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-802?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15963578#comment-15963578
 ] 

ASF GitHub Bot commented on BEAM-802:
-

Github user asfgit closed the pull request at:

https://github.com/apache/beam/pull/2475


> Support Dynamic PipelineOptions for python
> --
>
> Key: BEAM-802
> URL: https://issues.apache.org/jira/browse/BEAM-802
> Project: Beam
>  Issue Type: New Feature
>  Components: sdk-py
>Reporter: María GH
>Assignee: María GH
>Priority: Minor
>   Original Estimate: 1,680h
>  Remaining Estimate: 1,680h
>
> Goal:  Enable users to run pipelines from templates filled via CL (pipeline 
> options)
> Background: Currently, the Runner creates the JSON pipeline description which 
> can be sent to the worker as is, since everything is already defined there 
> (with links to gs:// for input and binaries). With the parametrized approach, 
> those descriptions are empty and filled by the user or defaulted, so the 
> pipeline needs to be stored somewhere first until the values become available.
> Tasks:
> 1- Create template-style pipeline description (TemplateRunner)
> The graph description is now a template (some parts are not filled) that 
> needs to be saved.
> 2- Define values to inject to the template (ValueProviders API)
> The placeholders can be filled with default values (static) or with dynamic 
> key/value pairs provided at runtime (dynamic)



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[1/2] beam git commit: Skip query metrics when creating a template

2017-04-10 Thread altay
Repository: beam
Updated Branches:
  refs/heads/master 1c7a974ab -> 1761d1cab


Skip query metrics when creating a template


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/67883309
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/67883309
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/67883309

Branch: refs/heads/master
Commit: 678833096550cf81da0fe026ab978626e395ef11
Parents: 1c7a974
Author: Maria Garcia Herrero 
Authored: Sat Apr 8 23:08:44 2017 -0700
Committer: Ahmet Altay 
Committed: Mon Apr 10 15:23:18 2017 -0700

--
 sdks/python/apache_beam/examples/wordcount.py   | 16 ++--
 .../runners/dataflow/internal/apiclient.py  |  2 ++
 2 files changed, 12 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/beam/blob/67883309/sdks/python/apache_beam/examples/wordcount.py
--
diff --git a/sdks/python/apache_beam/examples/wordcount.py 
b/sdks/python/apache_beam/examples/wordcount.py
index 27b9dcb..a155148 100644
--- a/sdks/python/apache_beam/examples/wordcount.py
+++ b/sdks/python/apache_beam/examples/wordcount.py
@@ -108,12 +108,16 @@ def run(argv=None):
   # Actually run the pipeline (all operations above are deferred).
   result = p.run()
   result.wait_until_finish()
-  empty_lines_filter = MetricsFilter().with_name('empty_lines')
-  query_result = result.metrics().query(empty_lines_filter)
-  if query_result['counters']:
-empty_lines_counter = query_result['counters'][0]
-logging.info('number of empty lines: %d', empty_lines_counter.committed)
-  # TODO(pabloem)(BEAM-1366): Add querying of MEAN metrics.
+
+  # Do not query metrics when creating a template which doesn't run
+  if (not hasattr(result, 'has_job')# direct runner
+  or result.has_job):   # not just a template creation
+empty_lines_filter = MetricsFilter().with_name('empty_lines')
+query_result = result.metrics().query(empty_lines_filter)
+if query_result['counters']:
+  empty_lines_counter = query_result['counters'][0]
+  logging.info('number of empty lines: %d', empty_lines_counter.committed)
+# TODO(pabloem)(BEAM-1366): Add querying of MEAN metrics.
 
 if __name__ == '__main__':
   logging.getLogger().setLevel(logging.INFO)

http://git-wip-us.apache.org/repos/asf/beam/blob/67883309/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
--
diff --git a/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py 
b/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
index 6d4e538..2b6f3fd 100644
--- a/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
+++ b/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
@@ -438,6 +438,8 @@ class DataflowApplicationClient(object):
 if not template_location:
   return self.submit_job_description(job)
 else:
+  logging.info('A template was just created at location %s',
+   template_location)
   return None
 
   def create_job_description(self, job):



[GitHub] beam pull request #2475: [BEAM-802] Skip query metrics for template creation

2017-04-10 Thread asfgit
Github user asfgit closed the pull request at:

https://github.com/apache/beam/pull/2475


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[2/2] beam git commit: This closes #2475

2017-04-10 Thread altay
This closes #2475


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/1761d1ca
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/1761d1ca
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/1761d1ca

Branch: refs/heads/master
Commit: 1761d1cabf47e34fd9107c5673339aa966148f0e
Parents: 1c7a974 6788330
Author: Ahmet Altay 
Authored: Mon Apr 10 15:23:21 2017 -0700
Committer: Ahmet Altay 
Committed: Mon Apr 10 15:23:21 2017 -0700

--
 sdks/python/apache_beam/examples/wordcount.py   | 16 ++--
 .../runners/dataflow/internal/apiclient.py  |  2 ++
 2 files changed, 12 insertions(+), 6 deletions(-)
--




[jira] [Commented] (BEAM-1928) Populate Runner API Components from the Java SDK

2017-04-10 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-1928?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15963575#comment-15963575
 ] 

ASF GitHub Bot commented on BEAM-1928:
--

GitHub user tgroh opened a pull request:

https://github.com/apache/beam/pull/2487

[BEAM-1928] Add Coder utilities for Proto conversions

Be sure to do all of the following to help us incorporate your contribution
quickly and easily:

 - [ ] Make sure the PR title is formatted like:
   `[BEAM-] Description of pull request`
 - [ ] Make sure tests pass via `mvn clean verify`. (Even better, enable
   Travis-CI on your fork and ensure the whole test matrix passes).
 - [ ] Replace `` in the title with the actual Jira issue
   number, if there is one.
 - [ ] If this contribution is large, please file an Apache
   [Individual Contributor License 
Agreement](https://www.apache.org/licenses/icla.pdf).

---


You can merge this pull request into a Git repository by running:

$ git pull https://github.com/tgroh/beam coders_utilities

Alternatively you can review and apply these changes as the patch at:

https://github.com/apache/beam/pull/2487.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

This closes #2487


commit 9bd7dcf0a1d836fb09e485bf4b9af65b57a1fb08
Author: Thomas Groh 
Date:   2017-04-07T18:46:24Z

Add Coder utilities for Proto conversions




> Populate Runner API Components from the Java SDK
> 
>
> Key: BEAM-1928
> URL: https://issues.apache.org/jira/browse/BEAM-1928
> Project: Beam
>  Issue Type: New Feature
>  Components: beam-model-runner-api, runner-core
>Reporter: Thomas Groh
>Assignee: Thomas Groh
>
> Permit conversion of Java SDK components to runner API protocol buffers, and 
> the extraction of those SDK components from the protocol buffers.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[GitHub] beam pull request #2484: Upgrade dependencies

2017-04-10 Thread asfgit
Github user asfgit closed the pull request at:

https://github.com/apache/beam/pull/2484


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] beam-site pull request #205: Add to in-progress IO list and alphabetize

2017-04-10 Thread asfgit
Github user asfgit closed the pull request at:

https://github.com/apache/beam-site/pull/205


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[1/2] beam git commit: Upgrade dependencies.

2017-04-10 Thread altay
Repository: beam
Updated Branches:
  refs/heads/master 8afa62398 -> 1c7a974ab


Upgrade dependencies.

Minor version changes to use latest versions of
avro, pyyaml, google-apitools, googledatastore.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/11a3af84
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/11a3af84
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/11a3af84

Branch: refs/heads/master
Commit: 11a3af8474998bd2b874cfeede82f353999ae3e3
Parents: 8afa623
Author: Ahmet Altay 
Authored: Mon Apr 10 14:13:00 2017 -0700
Committer: Ahmet Altay 
Committed: Mon Apr 10 15:18:33 2017 -0700

--
 sdks/python/setup.py | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/beam/blob/11a3af84/sdks/python/setup.py
--
diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index 4e1c67d..6c3f426 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py
@@ -87,14 +87,14 @@ else:
 
 
 REQUIRED_PACKAGES = [
-'avro>=1.7.7,<2.0.0',
+'avro>=1.8.1,<2.0.0',
 'crcmod>=1.7,<2.0',
 'dill==0.2.6',
 'httplib2>=0.8,<0.10',
 'mock>=1.0.1,<3.0.0',
 'oauth2client>=2.0.1,<4.0.0',
 'protobuf==3.2.0',
-'pyyaml>=3.10,<4.0.0',
+'pyyaml>=3.12,<4.0.0',
 ]
 
 REQUIRED_TEST_PACKAGES = [
@@ -102,9 +102,9 @@ REQUIRED_TEST_PACKAGES = [
 ]
 
 GCP_REQUIREMENTS = [
-  'google-apitools>=0.5.6,<1.0.0',
+  'google-apitools>=0.5.8,<1.0.0',
   'proto-google-cloud-datastore-v1==0.90.0',
-  'googledatastore==7.0.0',
+  'googledatastore==7.0.1',
   # GCP packages required by tests
   'google-cloud-bigquery>=0.23.0,<0.24.0',
 ]



[2/2] beam git commit: This closes #2484

2017-04-10 Thread altay
This closes #2484


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/1c7a974a
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/1c7a974a
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/1c7a974a

Branch: refs/heads/master
Commit: 1c7a974ab00b8f4a8c089520aeb15ec29e128977
Parents: 8afa623 11a3af8
Author: Ahmet Altay 
Authored: Mon Apr 10 15:18:37 2017 -0700
Committer: Ahmet Altay 
Committed: Mon Apr 10 15:18:37 2017 -0700

--
 sdks/python/setup.py | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)
--




[2/3] beam-site git commit: Regenerate website

2017-04-10 Thread iemejia
Regenerate website


Project: http://git-wip-us.apache.org/repos/asf/beam-site/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam-site/commit/4810a749
Tree: http://git-wip-us.apache.org/repos/asf/beam-site/tree/4810a749
Diff: http://git-wip-us.apache.org/repos/asf/beam-site/diff/4810a749

Branch: refs/heads/asf-site
Commit: 4810a749ad56574fa8fb829c8d43be9e4f46a032
Parents: 732378d
Author: Ismaël Mejía 
Authored: Tue Apr 11 00:16:49 2017 +0200
Committer: Ismaël Mejía 
Committed: Tue Apr 11 00:16:49 2017 +0200

--
 content/documentation/io/built-in/index.html | 40 ---
 1 file changed, 28 insertions(+), 12 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/beam-site/blob/4810a749/content/documentation/io/built-in/index.html
--
diff --git a/content/documentation/io/built-in/index.html 
b/content/documentation/io/built-in/index.html
index 5eed2ea..0cb0338 100644
--- a/content/documentation/io/built-in/index.html
+++ b/content/documentation/io/built-in/index.html
@@ -215,36 +215,52 @@
 NameLanguageJIRA
   
   
+AMQPJava
+https://issues.apache.org/jira/browse/BEAM-1237;>BEAM-1237
+  
+  
 Apache CassandraJava
 https://issues.apache.org/jira/browse/BEAM-245;>BEAM-245
   
   
-Apache ParquetJava
-https://issues.apache.org/jira/browse/BEAM-214;>BEAM-214
+Apache DistributedLogJava
+https://issues.apache.org/jira/browse/BEAM-607;>BEAM-607
   
   
-RedisJava
-https://issues.apache.org/jira/browse/BEAM-1017;>BEAM-1017
+Apache HiveJava
+https://issues.apache.org/jira/browse/BEAM-1158;>BEAM-1158
   
   
-MemcachedJava
-https://issues.apache.org/jira/browse/BEAM-1678;>BEAM-1678
+Apache ParquetJava
+https://issues.apache.org/jira/browse/BEAM-214;>BEAM-214
   
   
 Apache SolrJava
 https://issues.apache.org/jira/browse/BEAM-1236;>BEAM-1236
   
   
-RabbitMQJava
-https://issues.apache.org/jira/browse/BEAM-1240;>BEAM-1240
+Apache SqoopJava
+https://issues.apache.org/jira/browse/BEAM-67;>BEAM-67
   
   
-AMQPJava
-https://issues.apache.org/jira/browse/BEAM-1237;>BEAM-1237
+CouchbaseJava
+https://issues.apache.org/jira/browse/BEAM-1893;>BEAM-1893
   
   
-Apache HiveJava
-https://issues.apache.org/jira/browse/BEAM-1158;>BEAM-1158
+MemcachedJava
+https://issues.apache.org/jira/browse/BEAM-1678;>BEAM-1678
+  
+  
+Neo4jJava
+https://issues.apache.org/jira/browse/BEAM-1857;>BEAM-1857
+  
+  
+RedisJava
+https://issues.apache.org/jira/browse/BEAM-1017;>BEAM-1017
+  
+  
+RabbitMQJava
+https://issues.apache.org/jira/browse/BEAM-1240;>BEAM-1240
   
 
 



[1/3] beam-site git commit: Add to in-progress IO list and alphabetize

2017-04-10 Thread iemejia
Repository: beam-site
Updated Branches:
  refs/heads/asf-site fd7fe0627 -> 0542df3af


Add to in-progress IO list and alphabetize


Project: http://git-wip-us.apache.org/repos/asf/beam-site/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam-site/commit/732378da
Tree: http://git-wip-us.apache.org/repos/asf/beam-site/tree/732378da
Diff: http://git-wip-us.apache.org/repos/asf/beam-site/diff/732378da

Branch: refs/heads/asf-site
Commit: 732378da1dbc66d3ae8b72ffb6893230350390fe
Parents: fd7fe06
Author: Stephen Sisk 
Authored: Fri Apr 7 13:11:51 2017 -0700
Committer: Stephen Sisk 
Committed: Fri Apr 7 13:11:51 2017 -0700

--
 src/documentation/io/built-in.md | 40 ---
 1 file changed, 28 insertions(+), 12 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/beam-site/blob/732378da/src/documentation/io/built-in.md
--
diff --git a/src/documentation/io/built-in.md b/src/documentation/io/built-in.md
index 4d23a4d..7d9d778 100644
--- a/src/documentation/io/built-in.md
+++ b/src/documentation/io/built-in.md
@@ -69,35 +69,51 @@ This table contains I/O transforms that are currently 
planned or in-progress. St
 NameLanguageJIRA
   
   
+AMQPJava
+https://issues.apache.org/jira/browse/BEAM-1237;>BEAM-1237
+  
+  
 Apache CassandraJava
 https://issues.apache.org/jira/browse/BEAM-245;>BEAM-245
   
   
-Apache ParquetJava
-https://issues.apache.org/jira/browse/BEAM-214;>BEAM-214
+Apache DistributedLogJava
+https://issues.apache.org/jira/browse/BEAM-607;>BEAM-607
   
   
-RedisJava
-https://issues.apache.org/jira/browse/BEAM-1017;>BEAM-1017
+Apache HiveJava
+https://issues.apache.org/jira/browse/BEAM-1158;>BEAM-1158
   
   
-MemcachedJava
-https://issues.apache.org/jira/browse/BEAM-1678;>BEAM-1678
+Apache ParquetJava
+https://issues.apache.org/jira/browse/BEAM-214;>BEAM-214
   
   
 Apache SolrJava
 https://issues.apache.org/jira/browse/BEAM-1236;>BEAM-1236
   
   
-RabbitMQJava
-https://issues.apache.org/jira/browse/BEAM-1240;>BEAM-1240
+Apache SqoopJava
+https://issues.apache.org/jira/browse/BEAM-67;>BEAM-67
   
   
-AMQPJava
-https://issues.apache.org/jira/browse/BEAM-1237;>BEAM-1237
+CouchbaseJava
+https://issues.apache.org/jira/browse/BEAM-1893;>BEAM-1893
   
   
-Apache HiveJava
-https://issues.apache.org/jira/browse/BEAM-1158;>BEAM-1158
+MemcachedJava
+https://issues.apache.org/jira/browse/BEAM-1678;>BEAM-1678
+  
+  
+Neo4jJava
+https://issues.apache.org/jira/browse/BEAM-1857;>BEAM-1857
+  
+  
+RedisJava
+https://issues.apache.org/jira/browse/BEAM-1017;>BEAM-1017
+  
+  
+RabbitMQJava
+https://issues.apache.org/jira/browse/BEAM-1240;>BEAM-1240
   
 



[3/3] beam-site git commit: This closes #205

2017-04-10 Thread iemejia
This closes #205


Project: http://git-wip-us.apache.org/repos/asf/beam-site/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam-site/commit/0542df3a
Tree: http://git-wip-us.apache.org/repos/asf/beam-site/tree/0542df3a
Diff: http://git-wip-us.apache.org/repos/asf/beam-site/diff/0542df3a

Branch: refs/heads/asf-site
Commit: 0542df3af9bc4f444f67c5c48156f17803a0c186
Parents: fd7fe06 4810a74
Author: Ismaël Mejía 
Authored: Tue Apr 11 00:17:17 2017 +0200
Committer: Ismaël Mejía 
Committed: Tue Apr 11 00:17:17 2017 +0200

--
 content/documentation/io/built-in/index.html | 40 ---
 src/documentation/io/built-in.md | 40 ---
 2 files changed, 56 insertions(+), 24 deletions(-)
--




[jira] [Commented] (BEAM-1928) Populate Runner API Components from the Java SDK

2017-04-10 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-1928?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15963572#comment-15963572
 ] 

ASF GitHub Bot commented on BEAM-1928:
--

Github user asfgit closed the pull request at:

https://github.com/apache/beam/pull/2459


> Populate Runner API Components from the Java SDK
> 
>
> Key: BEAM-1928
> URL: https://issues.apache.org/jira/browse/BEAM-1928
> Project: Beam
>  Issue Type: New Feature
>  Components: beam-model-runner-api, runner-core
>Reporter: Thomas Groh
>Assignee: Thomas Groh
>
> Permit conversion of Java SDK components to runner API protocol buffers, and 
> the extraction of those SDK components from the protocol buffers.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[GitHub] beam pull request #2459: [BEAM-1928] Add SdkComponents

2017-04-10 Thread asfgit
Github user asfgit closed the pull request at:

https://github.com/apache/beam/pull/2459


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[1/2] beam git commit: This closes #2459

2017-04-10 Thread tgroh
Repository: beam
Updated Branches:
  refs/heads/master fc1006500 -> 8afa62398


This closes #2459


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/8afa6239
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/8afa6239
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/8afa6239

Branch: refs/heads/master
Commit: 8afa62398d51e3b11215970b139ab839ccb2e91a
Parents: fc10065 b7d7adc
Author: Thomas Groh 
Authored: Mon Apr 10 15:14:34 2017 -0700
Committer: Thomas Groh 
Committed: Mon Apr 10 15:14:34 2017 -0700

--
 runners/core-construction-java/pom.xml  |   4 +
 .../core/construction/SdkComponents.java| 152 +++
 .../core/construction/SdkComponentsTest.java| 131 
 .../beam/sdk/transforms/AppliedPTransform.java  |   2 +
 4 files changed, 289 insertions(+)
--




[2/2] beam git commit: Add SdkComponents

2017-04-10 Thread tgroh
Add SdkComponents

This takes SDK objects and assigns IDs to them. It is effectively a
ComponentsBuilder context where a component is referred to by the
Java object which is being translated, rather than by an opaque
string or protocol buffer.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/b7d7adc8
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/b7d7adc8
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/b7d7adc8

Branch: refs/heads/master
Commit: b7d7adc879694cf5b22f80a26a46982730a483ec
Parents: fc10065
Author: Thomas Groh 
Authored: Fri Apr 7 09:17:19 2017 -0700
Committer: Thomas Groh 
Committed: Mon Apr 10 15:14:34 2017 -0700

--
 runners/core-construction-java/pom.xml  |   4 +
 .../core/construction/SdkComponents.java| 152 +++
 .../core/construction/SdkComponentsTest.java| 131 
 .../beam/sdk/transforms/AppliedPTransform.java  |   2 +
 4 files changed, 289 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/beam/blob/b7d7adc8/runners/core-construction-java/pom.xml
--
diff --git a/runners/core-construction-java/pom.xml 
b/runners/core-construction-java/pom.xml
index 78b6819..ee64f91 100644
--- a/runners/core-construction-java/pom.xml
+++ b/runners/core-construction-java/pom.xml
@@ -56,6 +56,10 @@
   
 
   org.apache.beam
+  beam-sdks-common-runner-api
+
+
+  org.apache.beam
   beam-sdks-java-core
 
 

http://git-wip-us.apache.org/repos/asf/beam/blob/b7d7adc8/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/SdkComponents.java
--
diff --git 
a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/SdkComponents.java
 
b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/SdkComponents.java
new file mode 100644
index 000..c4b8cf1
--- /dev/null
+++ 
b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/SdkComponents.java
@@ -0,0 +1,152 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.runners.core.construction;
+
+import com.google.common.base.Equivalence;
+import com.google.common.collect.BiMap;
+import com.google.common.collect.HashBiMap;
+import java.util.Set;
+import org.apache.beam.sdk.annotations.Experimental;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.common.runner.v1.RunnerApi;
+import org.apache.beam.sdk.common.runner.v1.RunnerApi.Components;
+import org.apache.beam.sdk.transforms.AppliedPTransform;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.util.NameUtils;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.values.PCollection;
+
+/** SDK objects that will be represented at some later point within a {@link 
Components} object. */
+class SdkComponents {
+  private final RunnerApi.Components.Builder componentsBuilder;
+
+  private final BiMap transformIds;
+  private final BiMap pCollectionIds;
+  private final BiMap windowingStrategyIds;
+
+  /** A map of Coder to IDs. Coders are stored here with identity equivalence. 
*/
+  private final BiMap, String> 
coderIds;
+  // TODO: Specify environments
+
+  /** Create a new {@link SdkComponents} with no components. */
+  static SdkComponents create() {
+return new SdkComponents();
+  }
+
+  private SdkComponents() {
+this.componentsBuilder = RunnerApi.Components.newBuilder();
+this.transformIds = HashBiMap.create();
+this.pCollectionIds = HashBiMap.create();
+this.windowingStrategyIds = HashBiMap.create();
+this.coderIds = HashBiMap.create();
+  }
+
+  /**
+   * Registers the provided {@link AppliedPTransform} into this {@link 
SdkComponents}, 

[jira] [Created] (BEAM-1928) Populate Runner API Components from the Java SDK

2017-04-10 Thread Thomas Groh (JIRA)
Thomas Groh created BEAM-1928:
-

 Summary: Populate Runner API Components from the Java SDK
 Key: BEAM-1928
 URL: https://issues.apache.org/jira/browse/BEAM-1928
 Project: Beam
  Issue Type: New Feature
  Components: beam-model-runner-api, runner-core
Reporter: Thomas Groh
Assignee: Thomas Groh


Permit conversion of Java SDK components to runner API protocol buffers, and 
the extraction of those SDK components from the protocol buffers.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[GitHub] beam pull request #2486: Move WindowingStrategies to runners-core-constructi...

2017-04-10 Thread tgroh
GitHub user tgroh opened a pull request:

https://github.com/apache/beam/pull/2486

Move WindowingStrategies to runners-core-construction

Be sure to do all of the following to help us incorporate your contribution
quickly and easily:

 - [ ] Make sure the PR title is formatted like:
   `[BEAM-] Description of pull request`
 - [ ] Make sure tests pass via `mvn clean verify`. (Even better, enable
   Travis-CI on your fork and ensure the whole test matrix passes).
 - [ ] Replace `` in the title with the actual Jira issue
   number, if there is one.
 - [ ] If this contribution is large, please file an Apache
   [Individual Contributor License 
Agreement](https://www.apache.org/licenses/icla.pdf).

---


You can merge this pull request into a Git repository by running:

$ git pull https://github.com/tgroh/beam 
windowing_strategies_core_construction

Alternatively you can review and apply these changes as the patch at:

https://github.com/apache/beam/pull/2486.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

This closes #2486


commit 7935452efceed0f1a6a203070ca640df3cba68b3
Author: Thomas Groh 
Date:   2017-04-10T21:35:59Z

Move WindowingStrategies to runners-core-construction




---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[jira] [Commented] (BEAM-1891) Java @Autovalue: currently doesn't have a good coder

2017-04-10 Thread Raghavendra Nayak M (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-1891?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15963558#comment-15963558
 ] 

Raghavendra Nayak M commented on BEAM-1891:
---

I could see this improvement is listed as newbie/starter. Is this a good task 
to start contributing to Beam? Thanks.

> Java @Autovalue: currently doesn't have a good coder
> 
>
> Key: BEAM-1891
> URL: https://issues.apache.org/jira/browse/BEAM-1891
> Project: Beam
>  Issue Type: Improvement
>  Components: sdk-java-extensions
>Reporter: Stephen Sisk
>Priority: Minor
>  Labels: newbie, starter
>
> In Java, @AutoValue classes are something that we would like developers to be 
> able to use in PCollections in Beam.
> However, there doesn't appear to be a good existing Beam Coder for Autovalue 
> generated classes:
> * Avrocoder doesn't work
> * Serializable Coder works, but has other problems (larger/less efficient)
> This is discussed fully at 
> https://lists.apache.org/thread.html/29617096819824d5c12247a246d316b763d9e583a21fff2f1c430077@%3Cdev.beam.apache.org%3E
>  
> We may need to implement an AutoValueCoder.
> cc [~pabloem] [~bchambers]



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Created] (BEAM-1927) Add tfrecord io to built in IO list

2017-04-10 Thread Ahmet Altay (JIRA)
Ahmet Altay created BEAM-1927:
-

 Summary: Add tfrecord io to built in IO list
 Key: BEAM-1927
 URL: https://issues.apache.org/jira/browse/BEAM-1927
 Project: Beam
  Issue Type: Bug
  Components: sdk-py, website
Reporter: Ahmet Altay
Priority: Minor


Add tfrecordio 
(https://github.com/apache/beam/blob/master/sdks/python/apache_beam/io/tfrecordio.py)
 to built-in io list (https://beam.apache.org/documentation/io/built-in/).

cc: [~melap]



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Commented] (BEAM-1708) Better error messages when GCP features are not installed

2017-04-10 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-1708?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15963522#comment-15963522
 ] 

ASF GitHub Bot commented on BEAM-1708:
--

GitHub user sb2nov opened a pull request:

https://github.com/apache/beam/pull/2485

[BEAM-1708] Improve error message when GCP not installed

Be sure to do all of the following to help us incorporate your contribution
quickly and easily:

 - [ ] Make sure the PR title is formatted like:
   `[BEAM-] Description of pull request`
 - [ ] Make sure tests pass via `mvn clean verify`. (Even better, enable
   Travis-CI on your fork and ensure the whole test matrix passes).
 - [ ] Replace `` in the title with the actual Jira issue
   number, if there is one.
 - [ ] If this contribution is large, please file an Apache
   [Individual Contributor License 
Agreement](https://www.apache.org/licenses/icla.pdf).

---

R: @chamikaramj @aaltay PTAL


You can merge this pull request into a Git repository by running:

$ git pull https://github.com/sb2nov/beam 
BEAM-1708-better_error_messages_gcp

Alternatively you can review and apply these changes as the patch at:

https://github.com/apache/beam/pull/2485.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

This closes #2485


commit 6718ff86d3e1a3c8bb58b02a685bf9d080cfaf5e
Author: Sourabh Bajaj 
Date:   2017-04-10T21:45:01Z

[BEAM-1708] Improve error message when GCP not installed




> Better error messages when GCP features are not installed 
> --
>
> Key: BEAM-1708
> URL: https://issues.apache.org/jira/browse/BEAM-1708
> Project: Beam
>  Issue Type: Improvement
>  Components: sdk-py
>Reporter: Sourabh Bajaj
>Assignee: Sourabh Bajaj
>




--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[GitHub] beam pull request #2485: [BEAM-1708] Improve error message when GCP not inst...

2017-04-10 Thread sb2nov
GitHub user sb2nov opened a pull request:

https://github.com/apache/beam/pull/2485

[BEAM-1708] Improve error message when GCP not installed

Be sure to do all of the following to help us incorporate your contribution
quickly and easily:

 - [ ] Make sure the PR title is formatted like:
   `[BEAM-] Description of pull request`
 - [ ] Make sure tests pass via `mvn clean verify`. (Even better, enable
   Travis-CI on your fork and ensure the whole test matrix passes).
 - [ ] Replace `` in the title with the actual Jira issue
   number, if there is one.
 - [ ] If this contribution is large, please file an Apache
   [Individual Contributor License 
Agreement](https://www.apache.org/licenses/icla.pdf).

---

R: @chamikaramj @aaltay PTAL


You can merge this pull request into a Git repository by running:

$ git pull https://github.com/sb2nov/beam 
BEAM-1708-better_error_messages_gcp

Alternatively you can review and apply these changes as the patch at:

https://github.com/apache/beam/pull/2485.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

This closes #2485


commit 6718ff86d3e1a3c8bb58b02a685bf9d080cfaf5e
Author: Sourabh Bajaj 
Date:   2017-04-10T21:45:01Z

[BEAM-1708] Improve error message when GCP not installed




---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[jira] [Commented] (BEAM-1895) Create tranform in python sdk should be a custom source

2017-04-10 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-1895?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15963491#comment-15963491
 ] 

ASF GitHub Bot commented on BEAM-1895:
--

Github user asfgit closed the pull request at:

https://github.com/apache/beam/pull/2446


> Create tranform in python sdk should be a custom source
> ---
>
> Key: BEAM-1895
> URL: https://issues.apache.org/jira/browse/BEAM-1895
> Project: Beam
>  Issue Type: Improvement
>  Components: sdk-py
>Reporter: Vikas Kedigehalli
>Assignee: Vikas Kedigehalli
>
> This allows Create transform to be runner independent. 



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[GitHub] beam pull request #2446: [BEAM-1895] Python Create as a custom source

2017-04-10 Thread asfgit
Github user asfgit closed the pull request at:

https://github.com/apache/beam/pull/2446


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[1/2] beam git commit: Create as custom source

2017-04-10 Thread chamikara
Repository: beam
Updated Branches:
  refs/heads/master 836e8e4aa -> fc1006500


Create as custom source


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/33d4a02b
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/33d4a02b
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/33d4a02b

Branch: refs/heads/master
Commit: 33d4a02bb28e5a1c09513dc3e7701b30df148943
Parents: 836e8e4
Author: Vikas Kedigehalli 
Authored: Mon Apr 3 10:01:45 2017 -0700
Committer: Chamikara Jayalath 
Committed: Mon Apr 10 14:17:15 2017 -0700

--
 sdks/python/apache_beam/internal/pickler.py |  22 ++--
 sdks/python/apache_beam/pipeline.py |   4 +-
 sdks/python/apache_beam/pipeline_test.py|  13 +-
 .../runners/dataflow/dataflow_runner.py |  24 
 .../runners/dataflow/dataflow_runner_test.py|   2 +-
 .../consumer_tracking_pipeline_visitor_test.py  |  22 ++--
 .../runners/direct/transform_evaluator.py   |  31 -
 sdks/python/apache_beam/transforms/core.py  |  92 +-
 .../apache_beam/transforms/create_test.py   | 121 +++
 .../apache_beam/transforms/ptransform_test.py   |  14 +--
 10 files changed, 254 insertions(+), 91 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/beam/blob/33d4a02b/sdks/python/apache_beam/internal/pickler.py
--
diff --git a/sdks/python/apache_beam/internal/pickler.py 
b/sdks/python/apache_beam/internal/pickler.py
index 67f9fc3..a4ab7b9 100644
--- a/sdks/python/apache_beam/internal/pickler.py
+++ b/sdks/python/apache_beam/internal/pickler.py
@@ -181,12 +181,15 @@ logging.getLogger('dill').setLevel(logging.WARN)
 # TODO(ccy): Currently, there are still instances of pickler.dumps() and
 # pickler.loads() being used for data, which results in an unnecessary base64
 # encoding.  This should be cleaned up.
-def dumps(o):
+def dumps(o, enable_trace=True):
   try:
 s = dill.dumps(o)
-  except Exception:  # pylint: disable=broad-except
-dill.dill._trace(True)   # pylint: disable=protected-access
-s = dill.dumps(o)
+  except Exception as e:  # pylint: disable=broad-except
+if enable_trace:
+  dill.dill._trace(True)  # pylint: disable=protected-access
+  s = dill.dumps(o)
+else:
+  raise e
   finally:
 dill.dill._trace(False)  # pylint: disable=protected-access
 
@@ -199,7 +202,7 @@ def dumps(o):
   return base64.b64encode(c)
 
 
-def loads(encoded):
+def loads(encoded, enable_trace=True):
   c = base64.b64decode(encoded)
 
   s = zlib.decompress(c)
@@ -207,9 +210,12 @@ def loads(encoded):
 
   try:
 return dill.loads(s)
-  except Exception:  # pylint: disable=broad-except
-dill.dill._trace(True)   # pylint: disable=protected-access
-return dill.loads(s)
+  except Exception as e:  # pylint: disable=broad-except
+if enable_trace:
+  dill.dill._trace(True)   # pylint: disable=protected-access
+  return dill.loads(s)
+else:
+  raise e
   finally:
 dill.dill._trace(False)  # pylint: disable=protected-access
 

http://git-wip-us.apache.org/repos/asf/beam/blob/33d4a02b/sdks/python/apache_beam/pipeline.py
--
diff --git a/sdks/python/apache_beam/pipeline.py 
b/sdks/python/apache_beam/pipeline.py
index b93167d..2ff9eb3 100644
--- a/sdks/python/apache_beam/pipeline.py
+++ b/sdks/python/apache_beam/pipeline.py
@@ -315,7 +315,9 @@ class Pipeline(object):
   Visitor.ok = False
 try:
   # Transforms must be picklable.
-  pickler.loads(pickler.dumps(transform_node.transform))
+  pickler.loads(pickler.dumps(transform_node.transform,
+  enable_trace=False),
+enable_trace=False)
 except Exception:
   Visitor.ok = False
 

http://git-wip-us.apache.org/repos/asf/beam/blob/33d4a02b/sdks/python/apache_beam/pipeline_test.py
--
diff --git a/sdks/python/apache_beam/pipeline_test.py 
b/sdks/python/apache_beam/pipeline_test.py
index ba219bf..6314609 100644
--- a/sdks/python/apache_beam/pipeline_test.py
+++ b/sdks/python/apache_beam/pipeline_test.py
@@ -173,9 +173,9 @@ class PipelineTest(unittest.TestCase):
  set(visitor.visited))
 self.assertEqual(set(visitor.enter_composite),
  set(visitor.leave_composite))
-self.assertEqual(2, len(visitor.enter_composite))
-self.assertEqual(visitor.enter_composite[1].transform, transform)
-self.assertEqual(visitor.leave_composite[0].transform, transform)
+self.assertEqual(3, len(visitor.enter_composite))
+

[2/2] beam git commit: This closes #2446

2017-04-10 Thread chamikara
This closes #2446


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/fc100650
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/fc100650
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/fc100650

Branch: refs/heads/master
Commit: fc1006500d14afd01a3a34527c0ea9dc94ac09e9
Parents: 836e8e4 33d4a02
Author: Chamikara Jayalath 
Authored: Mon Apr 10 14:18:30 2017 -0700
Committer: Chamikara Jayalath 
Committed: Mon Apr 10 14:18:30 2017 -0700

--
 sdks/python/apache_beam/internal/pickler.py |  22 ++--
 sdks/python/apache_beam/pipeline.py |   4 +-
 sdks/python/apache_beam/pipeline_test.py|  13 +-
 .../runners/dataflow/dataflow_runner.py |  24 
 .../runners/dataflow/dataflow_runner_test.py|   2 +-
 .../consumer_tracking_pipeline_visitor_test.py  |  22 ++--
 .../runners/direct/transform_evaluator.py   |  31 -
 sdks/python/apache_beam/transforms/core.py  |  92 +-
 .../apache_beam/transforms/create_test.py   | 121 +++
 .../apache_beam/transforms/ptransform_test.py   |  14 +--
 10 files changed, 254 insertions(+), 91 deletions(-)
--




[GitHub] beam pull request #2484: Upgrade dependencies

2017-04-10 Thread aaltay
GitHub user aaltay opened a pull request:

https://github.com/apache/beam/pull/2484

Upgrade dependencies

Minor version changes to use latest versions of
avro, pyyaml, google-apitools, googledatastore.

R: @vikkyrk @eddavisson

You can merge this pull request into a Git repository by running:

$ git pull https://github.com/aaltay/beam ds

Alternatively you can review and apply these changes as the patch at:

https://github.com/apache/beam/pull/2484.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

This closes #2484


commit 3628afc3ba7c5b1d779ec0cfccc29921c0f1d01c
Author: Ahmet Altay 
Date:   2017-04-10T21:13:00Z

Upgrade dependencies.

Minor version changes to use latest versions of
avro, pyyaml, google-apitools, googledatastore.




---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] beam pull request #2483: [BEAM-1222] Chunk size should be FS dependent

2017-04-10 Thread sb2nov
GitHub user sb2nov opened a pull request:

https://github.com/apache/beam/pull/2483

[BEAM-1222] Chunk size should be FS dependent

Be sure to do all of the following to help us incorporate your contribution
quickly and easily:

 - [ ] Make sure the PR title is formatted like:
   `[BEAM-] Description of pull request`
 - [ ] Make sure tests pass via `mvn clean verify`. (Even better, enable
   Travis-CI on your fork and ensure the whole test matrix passes).
 - [ ] Replace `` in the title with the actual Jira issue
   number, if there is one.
 - [ ] If this contribution is large, please file an Apache
   [Individual Contributor License 
Agreement](https://www.apache.org/licenses/icla.pdf).

---

R: @chamikaramj PTAL


You can merge this pull request into a Git repository by running:

$ git pull https://github.com/sb2nov/beam BEAM-1222-rename-batch-consistency

Alternatively you can review and apply these changes as the patch at:

https://github.com/apache/beam/pull/2483.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

This closes #2483


commit 48bb983aa3d216356abc60f5919f5e7b914e58b5
Author: Sourabh Bajaj 
Date:   2017-04-10T21:09:48Z

[BEAM-1222] Chunk size should be FS dependent




---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[jira] [Commented] (BEAM-1053) ApexGroupByKeyOperator serialization issues

2017-04-10 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-1053?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15963410#comment-15963410
 ] 

ASF GitHub Bot commented on BEAM-1053:
--

Github user asfgit closed the pull request at:

https://github.com/apache/beam/pull/2473


> ApexGroupByKeyOperator serialization issues
> ---
>
> Key: BEAM-1053
> URL: https://issues.apache.org/jira/browse/BEAM-1053
> Project: Beam
>  Issue Type: Bug
>  Components: runner-apex
>Affects Versions: 0.3.0-incubating
>Reporter: Sandeep Deshmukh
>Assignee: Thomas Weise
>
> While trying to use Apex Runner for wordcount program, the 
> ApexGroupByKeyOperator  fails with following exception. 
> 2016-11-28 20:54:47,696 INFO com.datatorrent.stram.engine.StreamingContainer: 
> Deploy request: 
> [OperatorDeployInfo[id=9,name=ExtractPayload,type=GENERIC,checkpoint={583c4b1100b3,
>  0, 
> 0},inputs=[OperatorDeployInfo.InputDeployInfo[portName=input,streamId=stream14,sourceNodeId=8,sourcePortName=output,locality=CONTAINER_LOCAL,partitionMask=0,partitionKeys=]],outputs=[OperatorDeployInfo.OutputDeployInfo[portName=output,streamId=stream0,bufferServer=]]],
>  
> OperatorDeployInfo[id=8,name=ReadFromHDFS,type=INPUT,checkpoint={583c4b1100b3,
>  0, 
> 0},inputs=[],outputs=[OperatorDeployInfo.OutputDeployInfo[portName=output,streamId=stream14,bufferServer=]]],
>  
> OperatorDeployInfo[id=11,name=Application.CountWords/Count.PerElement/Init/Map,type=GENERIC,checkpoint={583c4b1100b3,
>  0, 
> 0},inputs=[OperatorDeployInfo.InputDeployInfo[portName=input,streamId=stream17,sourceNodeId=10,sourcePortName=output,locality=CONTAINER_LOCAL,partitionMask=0,partitionKeys=]],outputs=[OperatorDeployInfo.OutputDeployInfo[portName=output,streamId=stream18,bufferServer=]]],
>  
> OperatorDeployInfo[id=10,name=Application.CountWords/ParDo(ExtractWords),type=GENERIC,checkpoint={583c4b1100b3,
>  0, 
> 0},inputs=[OperatorDeployInfo.InputDeployInfo[portName=input,streamId=stream0,sourceNodeId=9,sourcePortName=output,locality=CONTAINER_LOCAL,partitionMask=0,partitionKeys=]],outputs=[OperatorDeployInfo.OutputDeployInfo[portName=output,streamId=stream17,bufferServer=]]],
>  
> OperatorDeployInfo[id=13,name=Application.CountWords/Count.PerElement/Count.PerKey/Combine.GroupedValues/AnonymousParDo,type=GENERIC,checkpoint={583c4b1100b3,
>  0, 
> 0},inputs=[OperatorDeployInfo.InputDeployInfo[portName=input,streamId=stream12,sourceNodeId=12,sourcePortName=output,locality=CONTAINER_LOCAL,partitionMask=0,partitionKeys=]],outputs=[OperatorDeployInfo.OutputDeployInfo[portName=output,streamId=stream3,bufferServer=]]],
>  
> OperatorDeployInfo[id=14,name=WriteToHDFS/Window.Into()/ApexRunner.AssignWindowsAndSetStrategy/AssignWindows/AssignWindows,type=GENERIC,checkpoint={583c4b1100b3,
>  0, 
> 0},inputs=[OperatorDeployInfo.InputDeployInfo[portName=input,streamId=stream3,sourceNodeId=13,sourcePortName=output,locality=CONTAINER_LOCAL,partitionMask=0,partitionKeys=]],outputs=[OperatorDeployInfo.OutputDeployInfo[portName=output,streamId=stream10,bufferServer=goofy]]],
>  
> OperatorDeployInfo[id=12,name=Application.CountWords/Count.PerElement/Count.PerKey/GroupByKey,type=GENERIC,checkpoint={583c4b1100b3,
>  0, 
> 0},inputs=[OperatorDeployInfo.InputDeployInfo[portName=input,streamId=stream18,sourceNodeId=11,sourcePortName=output,locality=CONTAINER_LOCAL,partitionMask=0,partitionKeys=]],outputs=[OperatorDeployInfo.OutputDeployInfo[portName=output,streamId=stream12,bufferServer=
> 2016-11-28 20:54:48,922 ERROR 
> com.datatorrent.stram.engine.StreamingContainer: deploy request failed
> com.esotericsoftware.kryo.KryoException: Class cannot be created (missing 
> no-arg constructor): java.nio.HeapByteBuffer
> Serialization trace:
> activeTimers 
> (org.apache.beam.runners.apex.translation.operators.ApexGroupByKeyOperator)
> at 
> com.esotericsoftware.kryo.Kryo$DefaultInstantiatorStrategy.newInstantiatorOf(Kryo.java:1228)
> at com.esotericsoftware.kryo.Kryo.newInstantiator(Kryo.java:1049)
> at com.esotericsoftware.kryo.Kryo.newInstance(Kryo.java:1058)
> at 
> com.esotericsoftware.kryo.serializers.FieldSerializer.create(FieldSerializer.java:547)
> at 
> com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:523)
> at com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:761)
> at 
> com.esotericsoftware.kryo.serializers.MapSerializer.read(MapSerializer.java:135)
> at 
> com.esotericsoftware.kryo.serializers.MapSerializer.read(MapSerializer.java:21)
> at com.esotericsoftware.kryo.Kryo.readObject(Kryo.java:679)
> at 
> com.esotericsoftware.kryo.serializers.ObjectField.read(ObjectField.java:106)
> at 
> 

[GitHub] beam pull request #2473: [BEAM-1053] ApexGroupByKeyOperator serialization is...

2017-04-10 Thread asfgit
Github user asfgit closed the pull request at:

https://github.com/apache/beam/pull/2473


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[1/2] beam git commit: BEAM-1053 ApexGroupByKeyOperator serialization issues

2017-04-10 Thread jkff
Repository: beam
Updated Branches:
  refs/heads/master 80d2548f2 -> 836e8e4aa


BEAM-1053 ApexGroupByKeyOperator serialization issues


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/74e31c35
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/74e31c35
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/74e31c35

Branch: refs/heads/master
Commit: 74e31c350986d093be1a0b53d001b3376def8b69
Parents: 80d2548
Author: Thomas Weise 
Authored: Sat Apr 8 13:01:01 2017 -0700
Committer: Eugene Kirpichov 
Committed: Mon Apr 10 10:55:31 2017 -0700

--
 .../operators/ApexGroupByKeyOperator.java   | 26 ++--
 1 file changed, 13 insertions(+), 13 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/beam/blob/74e31c35/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexGroupByKeyOperator.java
--
diff --git 
a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexGroupByKeyOperator.java
 
b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexGroupByKeyOperator.java
index 4551c9c..230082e 100644
--- 
a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexGroupByKeyOperator.java
+++ 
b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexGroupByKeyOperator.java
@@ -25,12 +25,12 @@ import com.datatorrent.api.DefaultOutputPort;
 import com.datatorrent.api.Operator;
 import com.datatorrent.api.StreamCodec;
 import com.datatorrent.api.annotation.OutputPortFieldAnnotation;
+import com.datatorrent.netlet.util.Slice;
 import com.esotericsoftware.kryo.serializers.FieldSerializer.Bind;
 import com.esotericsoftware.kryo.serializers.JavaSerializer;
 import com.google.common.base.Throwables;
 import com.google.common.collect.HashMultimap;
 import com.google.common.collect.Multimap;
-import java.nio.ByteBuffer;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
@@ -96,7 +96,7 @@ public class ApexGroupByKeyOperator implements Operator 
{
   private final SerializablePipelineOptions serializedOptions;
   @Bind(JavaSerializer.class)
   private final StateInternalsFactory stateInternalsFactory;
-  private Map activeTimers = new 
HashMap<>();
+  private Map activeTimers = new 
HashMap<>();
 
   private transient ProcessContext context;
   private transient OldDoFn, KV> fn;
@@ -177,18 +177,18 @@ public class ApexGroupByKeyOperator implements 
Operator {
* We keep these timers in a Set, so that they are deduplicated, as the same
* timer can be registered multiple times.
*/
-  private Multimap 
getTimersReadyToProcess(
+  private Multimap getTimersReadyToProcess(
   long currentWatermark) {
 
 // we keep the timers to return in a different list and launch them later
 // because we cannot prevent a trigger from registering another trigger,
 // which would lead to concurrent modification exception.
-Multimap toFire = 
HashMultimap.create();
+Multimap toFire = HashMultimap.create();
 
-Iterator> it =
+Iterator> it =
 activeTimers.entrySet().iterator();
 while (it.hasNext()) {
-  Map.Entry keyWithTimers = 
it.next();
+  Map.Entry keyWithTimers = 
it.next();
 
   Iterator timerIt = 
keyWithTimers.getValue().iterator();
   while (timerIt.hasNext()) {
@@ -226,9 +226,9 @@ public class ApexGroupByKeyOperator implements 
Operator {
   }
 
   private void registerActiveTimer(K key, TimerInternals.TimerData timer) {
-final ByteBuffer keyBytes;
+final Slice keyBytes;
 try {
-  keyBytes = ByteBuffer.wrap(CoderUtils.encodeToByteArray(keyCoder, key));
+  keyBytes = new Slice(CoderUtils.encodeToByteArray(keyCoder, key));
 } catch (CoderException e) {
   throw new RuntimeException(e);
 }
@@ -241,9 +241,9 @@ public class ApexGroupByKeyOperator implements 
Operator {
   }
 
   private void unregisterActiveTimer(K key, TimerInternals.TimerData timer) {
-final ByteBuffer keyBytes;
+final Slice keyBytes;
 try {
-  keyBytes = ByteBuffer.wrap(CoderUtils.encodeToByteArray(keyCoder, key));
+  keyBytes = new Slice(CoderUtils.encodeToByteArray(keyCoder, key));
 } catch (CoderException e) {
   throw new RuntimeException(e);
 }
@@ -260,11 +260,11 @@ public class ApexGroupByKeyOperator implements 
Operator {
 
   private 

[2/2] beam git commit: This closes #2473

2017-04-10 Thread jkff
This closes #2473


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/836e8e4a
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/836e8e4a
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/836e8e4a

Branch: refs/heads/master
Commit: 836e8e4aab239dd41f81de2f1553850b3fd3d716
Parents: 80d2548 74e31c3
Author: Eugene Kirpichov 
Authored: Mon Apr 10 10:55:50 2017 -0700
Committer: Eugene Kirpichov 
Committed: Mon Apr 10 10:55:50 2017 -0700

--
 .../operators/ApexGroupByKeyOperator.java   | 26 ++--
 1 file changed, 13 insertions(+), 13 deletions(-)
--




[jira] [Created] (BEAM-1926) Need 3 Python snippets for composite transforms section in programming guide

2017-04-10 Thread Melissa Pashniak (JIRA)
Melissa Pashniak created BEAM-1926:
--

 Summary: Need 3 Python snippets for composite transforms section 
in programming guide
 Key: BEAM-1926
 URL: https://issues.apache.org/jira/browse/BEAM-1926
 Project: Beam
  Issue Type: Bug
  Components: website
Reporter: Melissa Pashniak
Assignee: Charles Chen
Priority: Minor
 Fix For: First stable release


(PR will be out later today, that will have a note in the 3 needed python 
blocks, pointing to this JIRA for ease of finding)




--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Commented] (BEAM-1922) DataSource in JdbcIO is not closed

2017-04-10 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-1922?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15963293#comment-15963293
 ] 

ASF GitHub Bot commented on BEAM-1922:
--

GitHub user XuMingmin opened a pull request:

https://github.com/apache/beam/pull/2482

[BEAM-1922] DataSource in JdbcIO is not closed

add `close()` for `BasicDataSource`. When user call with 
`DataSourceConfiguration.create(dataSource)`, it's not available as no 
`close()` method is there.

You can merge this pull request into a Git repository by running:

$ git pull https://github.com/XuMingmin/beam BEAM1922

Alternatively you can review and apply these changes as the patch at:

https://github.com/apache/beam/pull/2482.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

This closes #2482


commit b0bce28b90743336233be89b645c4f8028b36538
Author: mingmxu 
Date:   2017-04-10T18:19:02Z

add close() for BasicDataSource




> DataSource in JdbcIO is not closed
> --
>
> Key: BEAM-1922
> URL: https://issues.apache.org/jira/browse/BEAM-1922
> Project: Beam
>  Issue Type: Bug
>  Components: sdk-java-extensions
>Reporter: Xu Mingmin
>Assignee: Xu Mingmin
>
> The line below is not closed properly, 
> {code}
> BasicDataSource basicDataSource = new BasicDataSource();
> {code}
> It's a potential issue for the other usage, although {{DataSource}} doesn't 
> have a {{close()}} method.
> {code}
> public static DataSourceConfiguration create(DataSource dataSource)
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


Jenkins build is back to stable : beam_PostCommit_Java_MavenInstall #3237

2017-04-10 Thread Apache Jenkins Server
See 




Build failed in Jenkins: beam_PerformanceTests_Dataflow #292

2017-04-10 Thread Apache Jenkins Server
See 


Changes:

[altay] better log message for bigquery temp tables

[dhalperi] BEAM-1390 Update top level README.md to include Apex Runner

[dhalperi] [BEAM-386] Move UnboundedReadFromBoundedSource to 
core-construction-java

[altay] enable test_multi_valued_singleton_side_input test

--
[...truncated 251.79 KB...]
 ! c666b42...5c504b9 refs/pull/2380/merge -> origin/pr/2380/merge  (unable to 
update local ref)
error: unable to resolve reference refs/remotes/origin/pr/2381/merge: No such 
file or directory
 ! 6c67869...fc1ce54 refs/pull/2381/merge -> origin/pr/2381/merge  (unable to 
update local ref)
error: unable to resolve reference refs/remotes/origin/pr/2382/merge: No such 
file or directory
 ! b642536...e9bfd19 refs/pull/2382/merge -> origin/pr/2382/merge  (unable to 
update local ref)
error: unable to resolve reference refs/remotes/origin/pr/2385/merge: No such 
file or directory
 ! 3caf915...5e05e82 refs/pull/2385/merge -> origin/pr/2385/merge  (unable to 
update local ref)
error: unable to resolve reference refs/remotes/origin/pr/2387/merge: No such 
file or directory
 ! 345486b...d9f176e refs/pull/2387/merge -> origin/pr/2387/merge  (unable to 
update local ref)
error: unable to resolve reference refs/remotes/origin/pr/2389/merge: No such 
file or directory
 ! 836c0be...614baf7 refs/pull/2389/merge -> origin/pr/2389/merge  (unable to 
update local ref)
error: unable to resolve reference refs/remotes/origin/pr/2401/merge: No such 
file or directory
 ! 27d2beb...22cbe49 refs/pull/2401/merge -> origin/pr/2401/merge  (unable to 
update local ref)
error: unable to resolve reference refs/remotes/origin/pr/2403/merge: No such 
file or directory
 ! 580df8b...cd78b7b refs/pull/2403/merge -> origin/pr/2403/merge  (unable to 
update local ref)
error: unable to resolve reference refs/remotes/origin/pr/2406/merge: No such 
file or directory
 ! 382cd34...bfea969 refs/pull/2406/merge -> origin/pr/2406/merge  (unable to 
update local ref)
error: unable to resolve reference refs/remotes/origin/pr/2407/merge: No such 
file or directory
 ! 94750e5...928d4e7 refs/pull/2407/merge -> origin/pr/2407/merge  (unable to 
update local ref)
error: unable to resolve reference refs/remotes/origin/pr/2409/merge: No such 
file or directory
 ! 3b743c1...63f8ac5 refs/pull/2409/merge -> origin/pr/2409/merge  (unable to 
update local ref)
error: unable to resolve reference refs/remotes/origin/pr/2410/merge: No such 
file or directory
 ! a3e4c50...93c22ac refs/pull/2410/merge -> origin/pr/2410/merge  (unable to 
update local ref)
error: unable to resolve reference refs/remotes/origin/pr/2412/merge: No such 
file or directory
 ! 2800ff1...ae52d64 refs/pull/2412/merge -> origin/pr/2412/merge  (unable to 
update local ref)
error: unable to resolve reference refs/remotes/origin/pr/2414/merge: No such 
file or directory
 ! fb3db3b...fec945f refs/pull/2414/merge -> origin/pr/2414/merge  (unable to 
update local ref)
error: unable to resolve reference refs/remotes/origin/pr/2417/merge: No such 
file or directory
 ! 6d079a4...b08de25 refs/pull/2417/merge -> origin/pr/2417/merge  (unable to 
update local ref)
error: unable to resolve reference refs/remotes/origin/pr/2420/merge: No such 
file or directory
 ! 1bea85f...1fde8b4 refs/pull/2420/merge -> origin/pr/2420/merge  (unable to 
update local ref)
error: unable to resolve reference refs/remotes/origin/pr/2421/merge: No such 
file or directory
 ! 272146e...6aabe68 refs/pull/2421/merge -> origin/pr/2421/merge  (unable to 
update local ref)
error: unable to resolve reference refs/remotes/origin/pr/2423/head: No such 
file or directory
 ! 7ae9488..9f7f850  refs/pull/2423/head -> origin/pr/2423/head  (unable to 
update local ref)
error: unable to resolve reference refs/remotes/origin/pr/2423/merge: No such 
file or directory
 ! 5b1da5f...eae8214 refs/pull/2423/merge -> origin/pr/2423/merge  (unable to 
update local ref)
error: unable to resolve reference refs/remotes/origin/pr/2424/merge: No such 
file or directory
 ! 31e480e...6de35a8 refs/pull/2424/merge -> origin/pr/2424/merge  (unable to 
update local ref)
error: unable to resolve reference refs/remotes/origin/pr/2426/merge: No such 
file or directory
 ! 720cf4e...1874e21 refs/pull/2426/merge -> origin/pr/2426/merge  (unable to 
update local ref)
error: unable to resolve reference refs/remotes/origin/pr/2427/merge: No such 
file or directory
 ! 4586537...7de910f refs/pull/2427/merge -> origin/pr/2427/merge  (unable to 
update local ref)
error: unable to resolve reference refs/remotes/origin/pr/2431/merge: No such 
file or directory
 ! 11fd22c...87049e0 refs/pull/2431/merge -> origin/pr/2431/merge  (unable to 
update local ref)
error: unable to resolve reference refs/remotes/origin/pr/2434/merge: No such 
file or directory
 ! 87d000f...dea7fc9 refs/pull/2434/merge -> origin/pr/2434/merge  (unable to 

Jenkins build is back to stable : beam_PostCommit_Java_ValidatesRunner_Apex #1039

2017-04-10 Thread Apache Jenkins Server
See 




[jira] [Commented] (BEAM-1612) Support real Bundle in Flink runner

2017-04-10 Thread Aljoscha Krettek (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-1612?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15963264#comment-15963264
 ] 

Aljoscha Krettek commented on BEAM-1612:


I'll see if we can still get it into Flink 1.3. Then yes, which I would very 
much like to have.

> Support real Bundle in Flink runner
> ---
>
> Key: BEAM-1612
> URL: https://issues.apache.org/jira/browse/BEAM-1612
> Project: Beam
>  Issue Type: Improvement
>  Components: runner-flink
>Reporter: Jingsong Lee
>Assignee: Jingsong Lee
>
> The Bundle is very important in the beam model. Users can use the bundle to 
> flush buffer, can reuse many heavyweight resources in a bundle. Most IO 
> plugins use the bundle to flush. 
> Moreover, FlinkRunner can also use Bundle to reduce access to the FlinkState, 
> such as first placed in JavaHeap, flush into RocksDbState when invoke 
> finishBundle , this can reduce the number of serialization.
> But now FlinkRunner calls the finishBundle every processElement. We need 
> support real Bundle.
> I think we can have the following implementations:
> 1.Invoke finishBundle and next startBundle in {{snapshot}} of Flink. But 
> sometimes this "Bundle" maybe too big. This depends on the user's checkpoint 
> configuration.
> 2.Manually control the size of the bundle. The half-bundle will be flushed to 
> a full-bundle by count or eventTime or processTime or {{snapshot}}. We do not 
> need to wait, just call the startBundle and finishBundle at the right time.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Assigned] (BEAM-464) HolderCoder should be a StandardCoder and not a CustomCoder

2017-04-10 Thread Luke Cwik (JIRA)

 [ 
https://issues.apache.org/jira/browse/BEAM-464?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Luke Cwik reassigned BEAM-464:
--

Assignee: (was: Luke Cwik)

> HolderCoder should be a StandardCoder and not a CustomCoder
> ---
>
> Key: BEAM-464
> URL: https://issues.apache.org/jira/browse/BEAM-464
> Project: Beam
>  Issue Type: Bug
>  Components: sdk-java-core
>Affects Versions: First stable release
>Reporter: Luke Cwik
>Priority: Minor
>  Labels: backward-incompatible
>
> The issue is that the coder does not report component encodings which 
> prevents effective runner inspection of the components.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Commented] (BEAM-1924) pip download failed in post commit

2017-04-10 Thread Sourabh Bajaj (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-1924?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15963254#comment-15963254
 ] 

Sourabh Bajaj commented on BEAM-1924:
-

One improvement that comes to mind is that we should pass the subprocess.PIPE 
options in this call similar to how we do it in the juliaset example

> pip download failed in post commit
> --
>
> Key: BEAM-1924
> URL: https://issues.apache.org/jira/browse/BEAM-1924
> Project: Beam
>  Issue Type: Bug
>  Components: sdk-py
>Reporter: Ahmet Altay
>
> A post commit failed with a pip failure:
> https://builds.apache.org/job/beam_PostCommit_Python_Verify/1801/consoleFull
> Captured output is not clear enough to tell what went wrong:
> ==
> ERROR: test_par_do_with_multiple_outputs_and_using_return 
> (apache_beam.transforms.ptransform_test.PTransformTest)
> --
> Traceback (most recent call last):
>   File 
> "/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/transforms/ptransform_test.py",
>  line 207, in test_par_do_with_multiple_outputs_and_using_return
> pipeline.run()
>   File 
> "/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/test_pipeline.py",
>  line 91, in run
> result = super(TestPipeline, self).run()
>   File 
> "/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/pipeline.py",
>  line 160, in run
> self.to_runner_api(), self.runner, self.options).run(False)
>   File 
> "/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/pipeline.py",
>  line 169, in run
> return self.runner.run(self)
>   File 
> "/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/runners/dataflow/test_dataflow_runner.py",
>  line 39, in run
> self.result = super(TestDataflowRunner, self).run(pipeline)
>   File 
> "/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py",
>  line 175, in run
> self.dataflow_client.create_job(self.job), self)
>   File 
> "/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/utils/retry.py",
>  line 174, in wrapper
> return fun(*args, **kwargs)
>   File 
> "/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py",
>  line 425, in create_job
> self.create_job_description(job)
>   File 
> "/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py",
>  line 446, in create_job_description
> job.options, file_copy=self._gcs_file_copy)
>   File 
> "/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/runners/dataflow/internal/dependency.py",
>  line 306, in stage_job_resources
> setup_options.requirements_file, requirements_cache_path)
>   File 
> "/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/runners/dataflow/internal/dependency.py",
>  line 242, in _populate_requirements_cache
> processes.check_call(cmd_args)
>   File 
> "/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/utils/processes.py",
>  line 40, in check_call
> return subprocess.check_call(*args, **kwargs)
>   File "/usr/lib/python2.7/subprocess.py", line 540, in check_call
> raise CalledProcessError(retcode, cmd)
> CalledProcessError: Command 
> '['/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/bin/python',
>  '-m', 'pip', 'install', '--download', '/tmp/dataflow-requirements-cache', 
> '-r', 'postcommit_requirements.txt', '--no-binary', ':all:']' returned 
> non-zero exit status 2
> Any ideas on how we can debug this failure or improve for the future? (cc:
>  [~markflyhigh] [~sb2nov])



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Resolved] (BEAM-1124) Python ValidateRunner Test test_multi_valued_singleton_side_input Break Postcommit

2017-04-10 Thread Ahmet Altay (JIRA)

 [ 
https://issues.apache.org/jira/browse/BEAM-1124?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Ahmet Altay resolved BEAM-1124.
---
   Resolution: Fixed
Fix Version/s: Not applicable

> Python ValidateRunner Test test_multi_valued_singleton_side_input Break 
> Postcommit
> --
>
> Key: BEAM-1124
> URL: https://issues.apache.org/jira/browse/BEAM-1124
> Project: Beam
>  Issue Type: Bug
>  Components: runner-dataflow, sdk-py, testing
>Reporter: Mark Liu
>Assignee: Ahmet Altay
> Fix For: Not applicable
>
>
> Python test_multi_valued_singleton_side_input test, a ValidatesRunner test 
> that running on dataflow service, failed and broke 
> postcommit(https://builds.apache.org/view/Beam/job/beam_PostCommit_Python_Verify/853/).
> Here is the stack trace:
> {code}
> Traceback (most recent call last):
>   File 
> "/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/dataflow_test.py",
>  line 186, in test_multi_valued_singleton_side_input
> pipeline.run()
>   File 
> "/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/pipeline.py",
>  line 159, in run
> return self.runner.run(self)
>   File 
> "/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/runners/dataflow_runner.py",
>  line 195, in run
> % getattr(self, 'last_error_msg', None), self.result)
> DataflowRuntimeException: Dataflow pipeline failed:
> (99aeafa7a8dffcc7): Traceback (most recent call last):
>   File 
> "/usr/local/lib/python2.7/dist-packages/dataflow_worker/batchworker.py", line 
> 514, in do_work
> work_executor.execute()
>   File "dataflow_worker/executor.py", line 892, in 
> dataflow_worker.executor.MapTaskExecutor.execute 
> (dataflow_worker/executor.c:24008)
> op.start()
>   File "dataflow_worker/executor.py", line 456, in 
> dataflow_worker.executor.DoOperation.start (dataflow_worker/executor.c:13870)
> def start(self):
>   File "dataflow_worker/executor.py", line 483, in 
> dataflow_worker.executor.DoOperation.start (dataflow_worker/executor.c:13685)
> self.dofn_runner = common.DoFnRunner(
>   File "apache_beam/runners/common.py", line 89, in 
> apache_beam.runners.common.DoFnRunner.__init__ 
> (apache_beam/runners/common.c:3469)
> args, kwargs, [side_input[global_window]
>   File 
> "/usr/local/lib/python2.7/dist-packages/apache_beam/transforms/sideinputs.py",
>  line 192, in __getitem__
> _FilteringIterable(self._iterable, target_window), self._view_options)
>   File "/usr/local/lib/python2.7/dist-packages/apache_beam/pvalue.py", line 
> 279, in _from_runtime_iterable
> 'PCollection with more than one element accessed as '
> ValueError: PCollection with more than one element accessed as a singleton 
> view.
> {code}
> Worker logs in here:
> https://builds.apache.org/view/Beam/job/beam_PostCommit_Python_Verify/853/console
> In order to temporarily ignore this test in postcommit, we can comment out 
> annotation "@attr('ValidatesRunner')" of this test. Then it will only run as 
> a unit test (execute by DirectRunner), but not run as a ValidatesRunner test.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Commented] (BEAM-1124) Python ValidateRunner Test test_multi_valued_singleton_side_input Break Postcommit

2017-04-10 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-1124?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15963251#comment-15963251
 ] 

ASF GitHub Bot commented on BEAM-1124:
--

Github user asfgit closed the pull request at:

https://github.com/apache/beam/pull/2467


> Python ValidateRunner Test test_multi_valued_singleton_side_input Break 
> Postcommit
> --
>
> Key: BEAM-1124
> URL: https://issues.apache.org/jira/browse/BEAM-1124
> Project: Beam
>  Issue Type: Bug
>  Components: runner-dataflow, sdk-py, testing
>Reporter: Mark Liu
>Assignee: Ahmet Altay
>
> Python test_multi_valued_singleton_side_input test, a ValidatesRunner test 
> that running on dataflow service, failed and broke 
> postcommit(https://builds.apache.org/view/Beam/job/beam_PostCommit_Python_Verify/853/).
> Here is the stack trace:
> {code}
> Traceback (most recent call last):
>   File 
> "/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/dataflow_test.py",
>  line 186, in test_multi_valued_singleton_side_input
> pipeline.run()
>   File 
> "/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/pipeline.py",
>  line 159, in run
> return self.runner.run(self)
>   File 
> "/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/runners/dataflow_runner.py",
>  line 195, in run
> % getattr(self, 'last_error_msg', None), self.result)
> DataflowRuntimeException: Dataflow pipeline failed:
> (99aeafa7a8dffcc7): Traceback (most recent call last):
>   File 
> "/usr/local/lib/python2.7/dist-packages/dataflow_worker/batchworker.py", line 
> 514, in do_work
> work_executor.execute()
>   File "dataflow_worker/executor.py", line 892, in 
> dataflow_worker.executor.MapTaskExecutor.execute 
> (dataflow_worker/executor.c:24008)
> op.start()
>   File "dataflow_worker/executor.py", line 456, in 
> dataflow_worker.executor.DoOperation.start (dataflow_worker/executor.c:13870)
> def start(self):
>   File "dataflow_worker/executor.py", line 483, in 
> dataflow_worker.executor.DoOperation.start (dataflow_worker/executor.c:13685)
> self.dofn_runner = common.DoFnRunner(
>   File "apache_beam/runners/common.py", line 89, in 
> apache_beam.runners.common.DoFnRunner.__init__ 
> (apache_beam/runners/common.c:3469)
> args, kwargs, [side_input[global_window]
>   File 
> "/usr/local/lib/python2.7/dist-packages/apache_beam/transforms/sideinputs.py",
>  line 192, in __getitem__
> _FilteringIterable(self._iterable, target_window), self._view_options)
>   File "/usr/local/lib/python2.7/dist-packages/apache_beam/pvalue.py", line 
> 279, in _from_runtime_iterable
> 'PCollection with more than one element accessed as '
> ValueError: PCollection with more than one element accessed as a singleton 
> view.
> {code}
> Worker logs in here:
> https://builds.apache.org/view/Beam/job/beam_PostCommit_Python_Verify/853/console
> In order to temporarily ignore this test in postcommit, we can comment out 
> annotation "@attr('ValidatesRunner')" of this test. Then it will only run as 
> a unit test (execute by DirectRunner), but not run as a ValidatesRunner test.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[GitHub] beam pull request #2467: [BEAM-1124] enable test_multi_valued_singleton_side...

2017-04-10 Thread asfgit
Github user asfgit closed the pull request at:

https://github.com/apache/beam/pull/2467


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[1/2] beam git commit: enable test_multi_valued_singleton_side_input test

2017-04-10 Thread altay
Repository: beam
Updated Branches:
  refs/heads/master e0e39a975 -> 80d2548f2


enable test_multi_valued_singleton_side_input test


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/a3a15a53
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/a3a15a53
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/a3a15a53

Branch: refs/heads/master
Commit: a3a15a5370467db72f8be2fa031483349e49f6ab
Parents: e0e39a9
Author: Ahmet Altay 
Authored: Fri Apr 7 17:54:14 2017 -0700
Committer: Ahmet Altay 
Committed: Mon Apr 10 10:49:12 2017 -0700

--
 sdks/python/apache_beam/transforms/sideinputs_test.py | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/beam/blob/a3a15a53/sdks/python/apache_beam/transforms/sideinputs_test.py
--
diff --git a/sdks/python/apache_beam/transforms/sideinputs_test.py 
b/sdks/python/apache_beam/transforms/sideinputs_test.py
index 53669de..bf9aeff 100644
--- a/sdks/python/apache_beam/transforms/sideinputs_test.py
+++ b/sdks/python/apache_beam/transforms/sideinputs_test.py
@@ -145,16 +145,14 @@ class SideInputsTest(unittest.TestCase):
 assert_that(result, equal_to([(1, 'empty'), (2, 'empty')]))
 pipeline.run()
 
-  # @attr('ValidatesRunner')
-  # TODO(BEAM-1124): Temporarily disable it due to test failed running on
-  # Dataflow service.
+  @attr('ValidatesRunner')
   def test_multi_valued_singleton_side_input(self):
 pipeline = self.create_pipeline()
 pcol = pipeline | 'start' >> beam.Create([1, 2])
 side = pipeline | 'side' >> beam.Create([3, 4])  # 2 values in side input.
 pcol | 'compute' >> beam.FlatMap(  # pylint: 
disable=expression-not-assigned
 lambda x, s: [x * s], beam.pvalue.AsSingleton(side))
-with self.assertRaises(ValueError):
+with self.assertRaises(Exception):
   pipeline.run()
 
   @attr('ValidatesRunner')



[jira] [Commented] (BEAM-1925) Make DoFn invocation logic of Python SDK more extensible

2017-04-10 Thread Chamikara Jayalath (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-1925?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15963243#comment-15963243
 ] 

Chamikara Jayalath commented on BEAM-1925:
--

cc: [~sb2nov] [~robertwb] [~jkff]

> Make DoFn invocation logic of Python SDK more extensible
> 
>
> Key: BEAM-1925
> URL: https://issues.apache.org/jira/browse/BEAM-1925
> Project: Beam
>  Issue Type: Improvement
>  Components: sdk-py
>Reporter: Chamikara Jayalath
>Assignee: Chamikara Jayalath
>
> DoFn invocation logic of Python SDK is currently in DoFnRunner class.
> https://github.com/apache/beam/blob/master/sdks/python/apache_beam/runners/common.py#L54
> At initialization of this, we parse a DoFn and create local state. We use 
> this state when invoking DoFn methods process, start_bundle, and 
> finish_bundle. For example, we store a list of  ArgPlaceholder objects within 
> the state of DoFnRunner to facilitate invocation of process method.
> We will need to extend this functionality when adding new features to DoFn 
> class (for example to support Splittable DoFn [1]). So I think it's good to 
> refactor this code to be more extensible. 
> I think a good approach for this is to add DoFnInvoker and DoFnSignature 
> classes similar to Java SDK [2].
> In this approach:
> A DoFnSignature captures the signature of a DoFn including methods and 
> arguments.
> A DoFnInvoker implements a particular way DoFn methods will be executed 
> (initially we'll have simple and per-window invokers [3]).
> A runner uses DoFnRunner to execute methods of a given DoFn. At 
> initialization, DoFnRunner crates a DoFnSignature and a DoFnInvoker for the 
> given DoFn.
> DoFnSignature and DoFnInvoker methods will be used by SplittableDoFn 
> implementation as well. 
> [1] 
> https://docs.google.com/document/d/1h_zprJrOilivK2xfvl4L42vaX4DMYGfH1YDmi-s_ozM/edit#heading=h.e6patunrpiql
> [2]https://github.com/apache/beam/blob/master/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/DoFnSignature.java
> [3] 
> https://github.com/apache/beam/blob/master/sdks/python/apache_beam/runners/common.py#L200



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Updated] (BEAM-1924) pip download failed in post commit

2017-04-10 Thread Ahmet Altay (JIRA)

 [ 
https://issues.apache.org/jira/browse/BEAM-1924?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Ahmet Altay updated BEAM-1924:
--
Description: 
A post commit failed with a pip failure:
https://builds.apache.org/job/beam_PostCommit_Python_Verify/1801/consoleFull

Captured output is not clear enough to tell what went wrong:

==
ERROR: test_par_do_with_multiple_outputs_and_using_return 
(apache_beam.transforms.ptransform_test.PTransformTest)
--
Traceback (most recent call last):
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/transforms/ptransform_test.py",
 line 207, in test_par_do_with_multiple_outputs_and_using_return
pipeline.run()
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/test_pipeline.py",
 line 91, in run
result = super(TestPipeline, self).run()
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/pipeline.py",
 line 160, in run
self.to_runner_api(), self.runner, self.options).run(False)
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/pipeline.py",
 line 169, in run
return self.runner.run(self)
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/runners/dataflow/test_dataflow_runner.py",
 line 39, in run
self.result = super(TestDataflowRunner, self).run(pipeline)
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py",
 line 175, in run
self.dataflow_client.create_job(self.job), self)
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/utils/retry.py",
 line 174, in wrapper
return fun(*args, **kwargs)
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py",
 line 425, in create_job
self.create_job_description(job)
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py",
 line 446, in create_job_description
job.options, file_copy=self._gcs_file_copy)
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/runners/dataflow/internal/dependency.py",
 line 306, in stage_job_resources
setup_options.requirements_file, requirements_cache_path)
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/runners/dataflow/internal/dependency.py",
 line 242, in _populate_requirements_cache
processes.check_call(cmd_args)
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/utils/processes.py",
 line 40, in check_call
return subprocess.check_call(*args, **kwargs)
  File "/usr/lib/python2.7/subprocess.py", line 540, in check_call
raise CalledProcessError(retcode, cmd)
CalledProcessError: Command 
'['/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/bin/python',
 '-m', 'pip', 'install', '--download', '/tmp/dataflow-requirements-cache', 
'-r', 'postcommit_requirements.txt', '--no-binary', ':all:']' returned non-zero 
exit status 2

Any ideas on how we can debug this failure or improve for the future? (cc:
 [~markflyhigh] [~sb2nov])

  was:
A post commit failed with a pip failure:
https://builds.apache.org/job/beam_PostCommit_Python_Verify/1801/consoleFull

Captured output is not clear enough to tell what went wrong:

==
ERROR: test_par_do_with_multiple_outputs_and_using_return 
(apache_beam.transforms.ptransform_test.PTransformTest)
--
Traceback (most recent call last):
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/transforms/ptransform_test.py",
 line 207, in test_par_do_with_multiple_outputs_and_using_return
pipeline.run()
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/test_pipeline.py",
 line 91, in run
result = super(TestPipeline, self).run()
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/pipeline.py",
 line 160, in run
self.to_runner_api(), self.runner, self.options).run(False)
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/pipeline.py",
 line 169, in run
return self.runner.run(self)
  File 

[jira] [Created] (BEAM-1924) pip download failed in post commit

2017-04-10 Thread Ahmet Altay (JIRA)
Ahmet Altay created BEAM-1924:
-

 Summary: pip download failed in post commit
 Key: BEAM-1924
 URL: https://issues.apache.org/jira/browse/BEAM-1924
 Project: Beam
  Issue Type: Bug
  Components: sdk-py
Reporter: Ahmet Altay


A post commit failed with a pip failure:
https://builds.apache.org/job/beam_PostCommit_Python_Verify/1801/consoleFull

Captured output is not clear enough to tell what went wrong:

==
ERROR: test_par_do_with_multiple_outputs_and_using_return 
(apache_beam.transforms.ptransform_test.PTransformTest)
--
Traceback (most recent call last):
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/transforms/ptransform_test.py",
 line 207, in test_par_do_with_multiple_outputs_and_using_return
pipeline.run()
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/test_pipeline.py",
 line 91, in run
result = super(TestPipeline, self).run()
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/pipeline.py",
 line 160, in run
self.to_runner_api(), self.runner, self.options).run(False)
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/pipeline.py",
 line 169, in run
return self.runner.run(self)
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/runners/dataflow/test_dataflow_runner.py",
 line 39, in run
self.result = super(TestDataflowRunner, self).run(pipeline)
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py",
 line 175, in run
self.dataflow_client.create_job(self.job), self)
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/utils/retry.py",
 line 174, in wrapper
return fun(*args, **kwargs)
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py",
 line 425, in create_job
self.create_job_description(job)
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py",
 line 446, in create_job_description
job.options, file_copy=self._gcs_file_copy)
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/runners/dataflow/internal/dependency.py",
 line 306, in stage_job_resources
setup_options.requirements_file, requirements_cache_path)
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/runners/dataflow/internal/dependency.py",
 line 242, in _populate_requirements_cache
processes.check_call(cmd_args)
  File 
"/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/apache_beam/utils/processes.py",
 line 40, in check_call
return subprocess.check_call(*args, **kwargs)
  File "/usr/lib/python2.7/subprocess.py", line 540, in check_call
raise CalledProcessError(retcode, cmd)
CalledProcessError: Command 
'['/home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python_Verify/sdks/python/bin/python',
 '-m', 'pip', 'install', '--download', '/tmp/dataflow-requirements-cache', 
'-r', 'postcommit_requirements.txt', '--no-binary', ':all:']' returned non-zero 
exit status 2

Any ideas on how we can debug this failure or improve for the future? (cc:
 [~markflyhigh][~sb2nov])



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Created] (BEAM-1925) Make DoFn invocation logic of Python SDK more extensible

2017-04-10 Thread Chamikara Jayalath (JIRA)
Chamikara Jayalath created BEAM-1925:


 Summary: Make DoFn invocation logic of Python SDK more extensible
 Key: BEAM-1925
 URL: https://issues.apache.org/jira/browse/BEAM-1925
 Project: Beam
  Issue Type: Improvement
  Components: sdk-py
Reporter: Chamikara Jayalath
Assignee: Chamikara Jayalath


DoFn invocation logic of Python SDK is currently in DoFnRunner class.

https://github.com/apache/beam/blob/master/sdks/python/apache_beam/runners/common.py#L54

At initialization of this, we parse a DoFn and create local state. We use this 
state when invoking DoFn methods process, start_bundle, and finish_bundle. For 
example, we store a list of  ArgPlaceholder objects within the state of 
DoFnRunner to facilitate invocation of process method.

We will need to extend this functionality when adding new features to DoFn 
class (for example to support Splittable DoFn [1]). So I think it's good to 
refactor this code to be more extensible. 

I think a good approach for this is to add DoFnInvoker and DoFnSignature 
classes similar to Java SDK [2].

In this approach:
A DoFnSignature captures the signature of a DoFn including methods and 
arguments.
A DoFnInvoker implements a particular way DoFn methods will be executed 
(initially we'll have simple and per-window invokers [3]).

A runner uses DoFnRunner to execute methods of a given DoFn. At initialization, 
DoFnRunner crates a DoFnSignature and a DoFnInvoker for the given DoFn.

DoFnSignature and DoFnInvoker methods will be used by SplittableDoFn 
implementation as well. 


[1] 
https://docs.google.com/document/d/1h_zprJrOilivK2xfvl4L42vaX4DMYGfH1YDmi-s_ozM/edit#heading=h.e6patunrpiql

[2]https://github.com/apache/beam/blob/master/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/DoFnSignature.java

[3] 
https://github.com/apache/beam/blob/master/sdks/python/apache_beam/runners/common.py#L200



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


Jenkins build became unstable: beam_PostCommit_Java_MavenInstall #3236

2017-04-10 Thread Apache Jenkins Server
See 




[jira] [Commented] (BEAM-1612) Support real Bundle in Flink runner

2017-04-10 Thread Kenneth Knowles (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-1612?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15963239#comment-15963239
 ] 

Kenneth Knowles commented on BEAM-1612:
---

Do think this is possible for the first stable release?

> Support real Bundle in Flink runner
> ---
>
> Key: BEAM-1612
> URL: https://issues.apache.org/jira/browse/BEAM-1612
> Project: Beam
>  Issue Type: Improvement
>  Components: runner-flink
>Reporter: Jingsong Lee
>Assignee: Jingsong Lee
>
> The Bundle is very important in the beam model. Users can use the bundle to 
> flush buffer, can reuse many heavyweight resources in a bundle. Most IO 
> plugins use the bundle to flush. 
> Moreover, FlinkRunner can also use Bundle to reduce access to the FlinkState, 
> such as first placed in JavaHeap, flush into RocksDbState when invoke 
> finishBundle , this can reduce the number of serialization.
> But now FlinkRunner calls the finishBundle every processElement. We need 
> support real Bundle.
> I think we can have the following implementations:
> 1.Invoke finishBundle and next startBundle in {{snapshot}} of Flink. But 
> sometimes this "Bundle" maybe too big. This depends on the user's checkpoint 
> configuration.
> 2.Manually control the size of the bundle. The half-bundle will be flushed to 
> a full-bundle by count or eventTime or processTime or {{snapshot}}. We do not 
> need to wait, just call the startBundle and finishBundle at the right time.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Closed] (BEAM-1905) ParDoTranslation should translate a ParDo more simple?

2017-04-10 Thread Aljoscha Krettek (JIRA)

 [ 
https://issues.apache.org/jira/browse/BEAM-1905?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Aljoscha Krettek closed BEAM-1905.
--
   Resolution: Won't Fix
Fix Version/s: Not applicable

This was changed in 
https://github.com/apache/beam/commit/c6cad209faf67b799d8c44c786a0ac9d45fcbbf2 
where we made Multi-Output ParDo the only required primitive.

The complicated translation is temporary and will be resolved in BEAM-1498.

Please re-open if you disagree.

> ParDoTranslation should translate a ParDo more simple?
> --
>
> Key: BEAM-1905
> URL: https://issues.apache.org/jira/browse/BEAM-1905
> Project: Beam
>  Issue Type: Improvement
>  Components: runner-flink
>Reporter: huangxiaofeng
>Assignee: Aljoscha Krettek
> Fix For: Not applicable
>
>
> now, each Pardo will translate to parDoOperate + select(tag) + flatmap, even 
> if it do'st have sidelnput, and it just have one output.
> it will make the transformations more complicated.
> for (TaggedPValue output : outputs) {
> final int outputTag = tagsToLabels.get(output.getTag());
> TypeInformation outputTypeInfo = context.getTypeInfo((PCollection) 
> output.getValue());
> @SuppressWarnings("unchecked")
> DataStream unwrapped = splitStream.select(String.valueOf(outputTag))
>   .flatMap(new FlatMapFunction() {
> @Override
> public void flatMap(RawUnionValue value, Collector out) 
> throws Exception {
>   out.collect(value.getValue());
> }
>   }).returns(outputTypeInfo);
> context.setOutputDataStream(output.getValue(), unwrapped);
>   }



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Commented] (BEAM-1886) Remove TextIO override in Flink runner

2017-04-10 Thread Kenneth Knowles (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-1886?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15963232#comment-15963232
 ] 

Kenneth Knowles commented on BEAM-1886:
---

CC: [~reuvenlax] who is making changes in that area of the core SDK right now.

> Remove TextIO override in Flink runner
> --
>
> Key: BEAM-1886
> URL: https://issues.apache.org/jira/browse/BEAM-1886
> Project: Beam
>  Issue Type: Bug
>  Components: runner-flink
>Reporter: Kenneth Knowles
> Fix For: First stable release
>
>
> Today, the Flink runner replaces TextIO with a customized version. I believe 
> this is related to adequate support for files HDFS.
> However, the capabilities are less, in particular the recent support for 
> window-and-pane sharded writes of unbounded collections.
> Concretely, we have had to remove WindowedWordCountIT from the precommit 
> Jenkins run.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


Jenkins build is back to normal : beam_PostCommit_Python_Verify #1802

2017-04-10 Thread Apache Jenkins Server
See 




[jira] [Assigned] (BEAM-1105) Adding Beam's pico Wordcount to the existing examples.

2017-04-10 Thread Ahmet Altay (JIRA)

 [ 
https://issues.apache.org/jira/browse/BEAM-1105?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Ahmet Altay reassigned BEAM-1105:
-

Assignee: (was: Neelesh Srinivas Salian)

> Adding Beam's pico Wordcount to the existing examples. 
> ---
>
> Key: BEAM-1105
> URL: https://issues.apache.org/jira/browse/BEAM-1105
> Project: Beam
>  Issue Type: Wish
>  Components: examples-java
>Reporter: Neelesh Srinivas Salian
>Priority: Minor
>  Labels: examples
>
> http://www.jesse-anderson.com/2016/12/beams-pico-wordcount/
> Is a good explanation for the WordCount that would encourage users.
> Adding this to the examples and subsequently the docs is a good step to help 
> new users start from a good foundation.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Resolved] (BEAM-1753) ImportError (cannot import name descriptor) in new venv after 'python setup.py install'

2017-04-10 Thread Ahmet Altay (JIRA)

 [ 
https://issues.apache.org/jira/browse/BEAM-1753?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Ahmet Altay resolved BEAM-1753.
---
   Resolution: Cannot Reproduce
Fix Version/s: Not applicable

> ImportError (cannot import name descriptor) in new venv after 'python 
> setup.py install'
> ---
>
> Key: BEAM-1753
> URL: https://issues.apache.org/jira/browse/BEAM-1753
> Project: Beam
>  Issue Type: Bug
>  Components: sdk-py
>Reporter: María GH
>Assignee: María GH
> Fix For: Not applicable
>
>
> After 'python setup.py install' in a clean virtual environment, I get the 
> following when running nosetest:
> (dataflow) mariagh (ppp_inmaster *) python $ nosetests --logging-level=INFO 
> apache_beam/io/fileio_test.py
> /Users/mariagh/Documents/venvs/dataflow/lib/python2.7/site-packages/nose/plugins/manager.py:395:
>  RuntimeWarning: Unable to load plugin beam_test_plugin = 
> test_config:BeamTestPlugin: (dill 0.2.5 
> (/Users/mariagh/Documents/venvs/dataflow/lib/python2.7/site-packages), 
> Requirement.parse('dill==0.2.6'))
>   RuntimeWarning)
> Failure: ImportError (cannot import name descriptor) ... ERROR
> ==
> ERROR: Failure: ImportError (cannot import name descriptor)
> --
> Traceback (most recent call last):
>   File 
> "/Users/mariagh/Documents/venvs/dataflow/lib/python2.7/site-packages/nose/loader.py",
>  line 418, in loadTestsFromName
> addr.filename, addr.module)
>   File 
> "/Users/mariagh/Documents/venvs/dataflow/lib/python2.7/site-packages/nose/importer.py",
>  line 47, in importFromPath
> return self.importFromDir(dir_path, fqname)
>   File 
> "/Users/mariagh/Documents/venvs/dataflow/lib/python2.7/site-packages/nose/importer.py",
>  line 94, in importFromDir
> mod = load_module(part_fqname, fh, filename, desc)
>   File 
> "/Users/mariagh/Documents/beam/incubator-beam/sdks/python/apache_beam/__init__.py",
>  line 77, in 
> from apache_beam import coders
>   File 
> "/Users/mariagh/Documents/beam/incubator-beam/sdks/python/apache_beam/coders/__init__.py",
>  line 18, in 
> from apache_beam.coders.coders import *
>   File 
> "/Users/mariagh/Documents/beam/incubator-beam/sdks/python/apache_beam/coders/coders.py",
>  line 26, in 
> from apache_beam.utils import proto_utils
>   File 
> "/Users/mariagh/Documents/beam/incubator-beam/sdks/python/apache_beam/utils/proto_utils.py",
>  line 18, in 
> from google.protobuf import any_pb2
>   File "build/bdist.macosx-10.11-x86_64/egg/google/protobuf/any_pb2.py", line 
> 6, in 
> ImportError: cannot import name descriptor
> --
> Ran 1 test in 0.001s
> FAILED (errors=1)



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


Jenkins build is back to stable : beam_PostCommit_Java_ValidatesRunner_Spark #1578

2017-04-10 Thread Apache Jenkins Server
See 




[jira] [Commented] (BEAM-1886) Remove TextIO override in Flink runner

2017-04-10 Thread Aljoscha Krettek (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-1886?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15963221#comment-15963221
 ] 

Aljoscha Krettek commented on BEAM-1886:


+1 I think we should remove these overrides. They are leftovers from earlier 
lack of support in Beam itself.

> Remove TextIO override in Flink runner
> --
>
> Key: BEAM-1886
> URL: https://issues.apache.org/jira/browse/BEAM-1886
> Project: Beam
>  Issue Type: Bug
>  Components: runner-flink
>Reporter: Kenneth Knowles
> Fix For: First stable release
>
>
> Today, the Flink runner replaces TextIO with a customized version. I believe 
> this is related to adequate support for files HDFS.
> However, the capabilities are less, in particular the recent support for 
> window-and-pane sharded writes of unbounded collections.
> Concretely, we have had to remove WindowedWordCountIT from the precommit 
> Jenkins run.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Commented] (BEAM-1612) Support real Bundle in Flink runner

2017-04-10 Thread Aljoscha Krettek (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-1612?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15963220#comment-15963220
 ] 

Aljoscha Krettek commented on BEAM-1612:


Yes, it would also allow multiple buffers to be considered as a bundle.

> Support real Bundle in Flink runner
> ---
>
> Key: BEAM-1612
> URL: https://issues.apache.org/jira/browse/BEAM-1612
> Project: Beam
>  Issue Type: Improvement
>  Components: runner-flink
>Reporter: Jingsong Lee
>Assignee: Jingsong Lee
>
> The Bundle is very important in the beam model. Users can use the bundle to 
> flush buffer, can reuse many heavyweight resources in a bundle. Most IO 
> plugins use the bundle to flush. 
> Moreover, FlinkRunner can also use Bundle to reduce access to the FlinkState, 
> such as first placed in JavaHeap, flush into RocksDbState when invoke 
> finishBundle , this can reduce the number of serialization.
> But now FlinkRunner calls the finishBundle every processElement. We need 
> support real Bundle.
> I think we can have the following implementations:
> 1.Invoke finishBundle and next startBundle in {{snapshot}} of Flink. But 
> sometimes this "Bundle" maybe too big. This depends on the user's checkpoint 
> configuration.
> 2.Manually control the size of the bundle. The half-bundle will be flushed to 
> a full-bundle by count or eventTime or processTime or {{snapshot}}. We do not 
> need to wait, just call the startBundle and finishBundle at the right time.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Comment Edited] (BEAM-1105) Adding Beam's pico Wordcount to the existing examples.

2017-04-10 Thread Neelesh Srinivas Salian (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-1105?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15963215#comment-15963215
 ] 

Neelesh Srinivas Salian edited comment on BEAM-1105 at 4/10/17 5:18 PM:


Not working on it. Feel free to grab [~altay]


was (Author: nssalian):
Feel free to grab [~altay]

> Adding Beam's pico Wordcount to the existing examples. 
> ---
>
> Key: BEAM-1105
> URL: https://issues.apache.org/jira/browse/BEAM-1105
> Project: Beam
>  Issue Type: Wish
>  Components: examples-java
>Reporter: Neelesh Srinivas Salian
>Assignee: Neelesh Srinivas Salian
>Priority: Minor
>  Labels: examples
>
> http://www.jesse-anderson.com/2016/12/beams-pico-wordcount/
> Is a good explanation for the WordCount that would encourage users.
> Adding this to the examples and subsequently the docs is a good step to help 
> new users start from a good foundation.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Commented] (BEAM-1105) Adding Beam's pico Wordcount to the existing examples.

2017-04-10 Thread Neelesh Srinivas Salian (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-1105?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15963215#comment-15963215
 ] 

Neelesh Srinivas Salian commented on BEAM-1105:
---

Feel free to grab [~altay]

> Adding Beam's pico Wordcount to the existing examples. 
> ---
>
> Key: BEAM-1105
> URL: https://issues.apache.org/jira/browse/BEAM-1105
> Project: Beam
>  Issue Type: Wish
>  Components: examples-java
>Reporter: Neelesh Srinivas Salian
>Assignee: Neelesh Srinivas Salian
>Priority: Minor
>  Labels: examples
>
> http://www.jesse-anderson.com/2016/12/beams-pico-wordcount/
> Is a good explanation for the WordCount that would encourage users.
> Adding this to the examples and subsequently the docs is a good step to help 
> new users start from a good foundation.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


Jenkins build became unstable: beam_PostCommit_Java_ValidatesRunner_Apex #1038

2017-04-10 Thread Apache Jenkins Server
See 




[GitHub] beam pull request #2481: [BEAM-386] Move UnboundedReadFromBoundedSource to c...

2017-04-10 Thread asfgit
Github user asfgit closed the pull request at:

https://github.com/apache/beam/pull/2481


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[jira] [Commented] (BEAM-386) Dataflow runner to support Read.Bounded in streaming mode.

2017-04-10 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-386?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15963214#comment-15963214
 ] 

ASF GitHub Bot commented on BEAM-386:
-

Github user asfgit closed the pull request at:

https://github.com/apache/beam/pull/2481


> Dataflow runner to support Read.Bounded in streaming mode.
> --
>
> Key: BEAM-386
> URL: https://issues.apache.org/jira/browse/BEAM-386
> Project: Beam
>  Issue Type: New Feature
>  Components: runner-dataflow
>Reporter: Pei He
>Assignee: Pei He
> Fix For: 0.2.0-incubating
>
>
> UnboundedReadFromBoundedSource is done.
> Make Dataflow runner use it.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[1/3] beam git commit: [BEAM-386] Move UnboundedReadFromBoundedSource to core-construction-java

2017-04-10 Thread dhalperi
Repository: beam
Updated Branches:
  refs/heads/master e53f959f9 -> e0e39a975


http://git-wip-us.apache.org/repos/asf/beam/blob/66229142/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowUnboundedReadFromBoundedSource.java
--
diff --git 
a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowUnboundedReadFromBoundedSource.java
 
b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowUnboundedReadFromBoundedSource.java
deleted file mode 100644
index cfb5ebc..000
--- 
a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowUnboundedReadFromBoundedSource.java
+++ /dev/null
@@ -1,547 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.dataflow;
-
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Function;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Lists;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-import java.util.NoSuchElementException;
-import javax.annotation.Nullable;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.CoderException;
-import org.apache.beam.sdk.coders.ListCoder;
-import org.apache.beam.sdk.coders.NullableCoder;
-import org.apache.beam.sdk.coders.SerializableCoder;
-import org.apache.beam.sdk.coders.StandardCoder;
-import org.apache.beam.sdk.io.BoundedSource;
-import org.apache.beam.sdk.io.BoundedSource.BoundedReader;
-import org.apache.beam.sdk.io.Read;
-import org.apache.beam.sdk.io.UnboundedSource;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.transforms.display.DisplayData;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.util.NameUtils;
-import org.apache.beam.sdk.util.PropertyNames;
-import org.apache.beam.sdk.values.PBegin;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.TimestampedValue;
-import org.joda.time.Instant;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * {@link PTransform} that converts a {@link BoundedSource} as an {@link 
UnboundedSource}.
- *
- * {@link BoundedSource} is read directly without calling {@link 
BoundedSource#splitIntoBundles},
- * and element timestamps are propagated. While any elements remain, the 
watermark is the beginning
- * of time {@link BoundedWindow#TIMESTAMP_MIN_VALUE}, and after all elements 
have been produced
- * the watermark goes to the end of time {@link 
BoundedWindow#TIMESTAMP_MAX_VALUE}.
- *
- * Checkpoints are created by calling {@link BoundedReader#splitAtFraction} 
on inner
- * {@link BoundedSource}.
- * Sources that cannot be split are read entirely into memory, so this 
transform does not work well
- * with large, unsplittable sources.
- *
- * This transform is intended to be used by a runner during pipeline 
translation to convert
- * a Read.Bounded into a Read.Unbounded.
- *
- * @deprecated This class is copied from beam runners core in order to avoid 
pipeline construction
- * time dependency. It should be replaced in the dataflow worker as an 
execution time dependency.
- */
-@Deprecated
-class DataflowUnboundedReadFromBoundedSource extends PTransform {
-
-  private static final Logger LOG =
-  LoggerFactory.getLogger(DataflowUnboundedReadFromBoundedSource.class);
-
-  private final BoundedSource source;
-
-  /**
-   * Constructs a {@link PTransform} that performs an unbounded read from a 
{@link BoundedSource}.
-   */
-  public DataflowUnboundedReadFromBoundedSource(BoundedSource source) {
-this.source = 

[3/3] beam git commit: This closes #2481

2017-04-10 Thread dhalperi
This closes #2481


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/e0e39a97
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/e0e39a97
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/e0e39a97

Branch: refs/heads/master
Commit: e0e39a97560933f99eaee98549877fd5f8f49a52
Parents: e53f959 6622914
Author: Dan Halperin 
Authored: Mon Apr 10 10:16:06 2017 -0700
Committer: Dan Halperin 
Committed: Mon Apr 10 10:16:06 2017 -0700

--
 .../translation/ApexPipelineTranslator.java |   2 +-
 runners/core-construction-java/pom.xml  |  15 +
 .../UnboundedReadFromBoundedSource.java | 542 ++
 .../UnboundedReadFromBoundedSourceTest.java | 373 +
 runners/core-java/pom.xml   |   5 -
 .../core/UnboundedReadFromBoundedSource.java| 542 --
 .../UnboundedReadFromBoundedSourceTest.java | 373 -
 .../beam/runners/dataflow/DataflowRunner.java   |   3 +-
 .../DataflowUnboundedReadFromBoundedSource.java | 547 ---
 ...aflowUnboundedReadFromBoundedSourceTest.java |  79 ---
 .../beam/runners/spark/TestSparkRunner.java |   2 +-
 11 files changed, 934 insertions(+), 1549 deletions(-)
--




[jira] [Updated] (BEAM-976) Update examples README.md to fix instructions to run pipelines

2017-04-10 Thread Ahmet Altay (JIRA)

 [ 
https://issues.apache.org/jira/browse/BEAM-976?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Ahmet Altay updated BEAM-976:
-
Fix Version/s: First stable release

> Update examples README.md to fix instructions to run pipelines
> --
>
> Key: BEAM-976
> URL: https://issues.apache.org/jira/browse/BEAM-976
> Project: Beam
>  Issue Type: Task
>  Components: examples-java
>Reporter: Vikas Kedigehalli
>Assignee: Vikas Kedigehalli
>Priority: Minor
> Fix For: First stable release
>
>




--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Commented] (BEAM-1105) Adding Beam's pico Wordcount to the existing examples.

2017-04-10 Thread Ahmet Altay (JIRA)

[ 
https://issues.apache.org/jira/browse/BEAM-1105?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15963208#comment-15963208
 ] 

Ahmet Altay commented on BEAM-1105:
---

Hey [~nssalian], are you working on this?

> Adding Beam's pico Wordcount to the existing examples. 
> ---
>
> Key: BEAM-1105
> URL: https://issues.apache.org/jira/browse/BEAM-1105
> Project: Beam
>  Issue Type: Wish
>  Components: examples-java
>Reporter: Neelesh Srinivas Salian
>Assignee: Neelesh Srinivas Salian
>Priority: Minor
>  Labels: examples
>
> http://www.jesse-anderson.com/2016/12/beams-pico-wordcount/
> Is a good explanation for the WordCount that would encourage users.
> Adding this to the examples and subsequently the docs is a good step to help 
> new users start from a good foundation.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Resolved] (BEAM-1056) Reference beam version only once in Archetypes

2017-04-10 Thread Ahmet Altay (JIRA)

 [ 
https://issues.apache.org/jira/browse/BEAM-1056?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Ahmet Altay resolved BEAM-1056.
---
   Resolution: Fixed
Fix Version/s: Not applicable

> Reference beam version only once in Archetypes
> --
>
> Key: BEAM-1056
> URL: https://issues.apache.org/jira/browse/BEAM-1056
> Project: Beam
>  Issue Type: Improvement
>  Components: examples-java
>Reporter: Thomas Groh
>Assignee: Tim Taschke
>Priority: Trivial
>  Labels: easyfix, starter
> Fix For: Not applicable
>
>
> Instead of referring to the version of the SDK inline, reference it once in a 
> beam.version property and use that to determine sdk and runner versions. This 
> reduces maintenance burden for updating archetypes.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Assigned] (BEAM-933) Findbugs doesn't pass in Java Examples

2017-04-10 Thread Ahmet Altay (JIRA)

 [ 
https://issues.apache.org/jira/browse/BEAM-933?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Ahmet Altay reassigned BEAM-933:


Assignee: (was: Davor Bonaci)

> Findbugs doesn't pass in Java Examples
> --
>
> Key: BEAM-933
> URL: https://issues.apache.org/jira/browse/BEAM-933
> Project: Beam
>  Issue Type: Bug
>  Components: examples-java
>Reporter: Daniel Halperin
>  Labels: newbie, starter
>
> {code}
> [INFO] --- findbugs-maven-plugin:3.0.1:check (default) @ beam-examples-java 
> ---
> [INFO] BugInstance size is 4
> [INFO] Error size is 0
> [INFO] Total bugs: 4
> [INFO] Possible null pointer dereference in 
> org.apache.beam.examples.complete.TfIdf.listInputDocuments(TfIdf$Options) due 
> to return value of called method [org.apache.beam.examples.complete.TfIdf, 
> org.apache.beam.examples.complete.TfIdf] Dereferenced at TfIdf.java:[line 
> 124]Known null at TfIdf.java:[line 124]
> [INFO] Bad attempt to compute absolute value of signed 32-bit hashcode in 
> org.apache.beam.examples.complete.TopWikipediaSessions$ComputeTopSessions$1.processElement(DoFn$ProcessContext)
>  
> [org.apache.beam.examples.complete.TopWikipediaSessions$ComputeTopSessions$1] 
> At TopWikipediaSessions.java:[line 165]
> [INFO] org.apache.beam.examples.complete.TrafficRoutes$StationSpeed defines 
> compareTo(TrafficRoutes$StationSpeed) and uses Object.equals() 
> [org.apache.beam.examples.complete.TrafficRoutes$StationSpeed] At 
> TrafficRoutes.java:[line 113]
> [INFO] 
> org.apache.beam.examples.cookbook.TriggerExample$InsertDelays.processElement(DoFn$ProcessContext)
>  uses the nextDouble method of Random to generate a random integer; using 
> nextInt is more efficient 
> [org.apache.beam.examples.cookbook.TriggerExample$InsertDelays] At 
> TriggerExample.java:[line 479]
> [INFO] 
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Updated] (BEAM-933) Findbugs doesn't pass in Java Examples

2017-04-10 Thread Ahmet Altay (JIRA)

 [ 
https://issues.apache.org/jira/browse/BEAM-933?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Ahmet Altay updated BEAM-933:
-
Labels: newbie starter  (was: )

> Findbugs doesn't pass in Java Examples
> --
>
> Key: BEAM-933
> URL: https://issues.apache.org/jira/browse/BEAM-933
> Project: Beam
>  Issue Type: Bug
>  Components: examples-java
>Reporter: Daniel Halperin
>  Labels: newbie, starter
>
> {code}
> [INFO] --- findbugs-maven-plugin:3.0.1:check (default) @ beam-examples-java 
> ---
> [INFO] BugInstance size is 4
> [INFO] Error size is 0
> [INFO] Total bugs: 4
> [INFO] Possible null pointer dereference in 
> org.apache.beam.examples.complete.TfIdf.listInputDocuments(TfIdf$Options) due 
> to return value of called method [org.apache.beam.examples.complete.TfIdf, 
> org.apache.beam.examples.complete.TfIdf] Dereferenced at TfIdf.java:[line 
> 124]Known null at TfIdf.java:[line 124]
> [INFO] Bad attempt to compute absolute value of signed 32-bit hashcode in 
> org.apache.beam.examples.complete.TopWikipediaSessions$ComputeTopSessions$1.processElement(DoFn$ProcessContext)
>  
> [org.apache.beam.examples.complete.TopWikipediaSessions$ComputeTopSessions$1] 
> At TopWikipediaSessions.java:[line 165]
> [INFO] org.apache.beam.examples.complete.TrafficRoutes$StationSpeed defines 
> compareTo(TrafficRoutes$StationSpeed) and uses Object.equals() 
> [org.apache.beam.examples.complete.TrafficRoutes$StationSpeed] At 
> TrafficRoutes.java:[line 113]
> [INFO] 
> org.apache.beam.examples.cookbook.TriggerExample$InsertDelays.processElement(DoFn$ProcessContext)
>  uses the nextDouble method of Random to generate a random integer; using 
> nextInt is more efficient 
> [org.apache.beam.examples.cookbook.TriggerExample$InsertDelays] At 
> TriggerExample.java:[line 479]
> [INFO] 
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Updated] (BEAM-934) Findbugs doesn't pass in Java8 Examples

2017-04-10 Thread Ahmet Altay (JIRA)

 [ 
https://issues.apache.org/jira/browse/BEAM-934?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Ahmet Altay updated BEAM-934:
-
Labels: newbie starter  (was: )

> Findbugs doesn't pass in Java8 Examples
> ---
>
> Key: BEAM-934
> URL: https://issues.apache.org/jira/browse/BEAM-934
> Project: Beam
>  Issue Type: Bug
>  Components: examples-java
>Reporter: Daniel Halperin
>  Labels: newbie, starter
>
> {code}
> [INFO] --- findbugs-maven-plugin:3.0.1:check (default) @ beam-examples-java8 
> ---
> [INFO] BugInstance size is 2
> [INFO] Error size is 0
> [INFO] Total bugs: 2
> [INFO] Result of integer multiplication cast to long in 
> org.apache.beam.examples.complete.game.injector.Injector$TeamInfo.getEndTimeInMillis()
>  [org.apache.beam.examples.complete.game.injector.Injector$TeamInfo] At 
> Injector.java:[line 170]
> [INFO] Format string should use %n rather than \n in 
> org.apache.beam.examples.complete.game.injector.InjectorUtils.createTopic(Pubsub,
>  String) [org.apache.beam.examples.complete.game.injector.InjectorUtils] At 
> InjectorUtils.java:[line 96]
> [INFO]  
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Assigned] (BEAM-934) Findbugs doesn't pass in Java8 Examples

2017-04-10 Thread Ahmet Altay (JIRA)

 [ 
https://issues.apache.org/jira/browse/BEAM-934?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Ahmet Altay reassigned BEAM-934:


Assignee: (was: Davor Bonaci)

> Findbugs doesn't pass in Java8 Examples
> ---
>
> Key: BEAM-934
> URL: https://issues.apache.org/jira/browse/BEAM-934
> Project: Beam
>  Issue Type: Bug
>  Components: examples-java
>Reporter: Daniel Halperin
>
> {code}
> [INFO] --- findbugs-maven-plugin:3.0.1:check (default) @ beam-examples-java8 
> ---
> [INFO] BugInstance size is 2
> [INFO] Error size is 0
> [INFO] Total bugs: 2
> [INFO] Result of integer multiplication cast to long in 
> org.apache.beam.examples.complete.game.injector.Injector$TeamInfo.getEndTimeInMillis()
>  [org.apache.beam.examples.complete.game.injector.Injector$TeamInfo] At 
> Injector.java:[line 170]
> [INFO] Format string should use %n rather than \n in 
> org.apache.beam.examples.complete.game.injector.InjectorUtils.createTopic(Pubsub,
>  String) [org.apache.beam.examples.complete.game.injector.InjectorUtils] At 
> InjectorUtils.java:[line 96]
> [INFO]  
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Closed] (BEAM-1864) Shorten combining state names: "CombiningValue" and "AccumulatorCombiningState" to Combining (as appropriate)

2017-04-10 Thread Daniel Halperin (JIRA)

 [ 
https://issues.apache.org/jira/browse/BEAM-1864?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Daniel Halperin closed BEAM-1864.
-
Resolution: Fixed

> Shorten combining state names: "CombiningValue" and 
> "AccumulatorCombiningState" to Combining (as appropriate)
> -
>
> Key: BEAM-1864
> URL: https://issues.apache.org/jira/browse/BEAM-1864
> Project: Beam
>  Issue Type: Improvement
>  Components: sdk-java-core
>Reporter: Kenneth Knowles
>Assignee: Kenneth Knowles
>  Labels: backward-incompatible
> Fix For: First stable release
>
>
> This will clean up potential confusion around different kinds of state, 
> making it very clear which type is actually analogous to the {{Combine}} 
> transform and takes a {{CombineFn}}.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Updated] (BEAM-1864) Shorten combining state names: "CombiningValue" and "AccumulatorCombiningState" to Combining (as appropriate)

2017-04-10 Thread Daniel Halperin (JIRA)

 [ 
https://issues.apache.org/jira/browse/BEAM-1864?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Daniel Halperin updated BEAM-1864:
--
Labels: backward-incompatible  (was: )

> Shorten combining state names: "CombiningValue" and 
> "AccumulatorCombiningState" to Combining (as appropriate)
> -
>
> Key: BEAM-1864
> URL: https://issues.apache.org/jira/browse/BEAM-1864
> Project: Beam
>  Issue Type: Improvement
>  Components: sdk-java-core
>Reporter: Kenneth Knowles
>Assignee: Kenneth Knowles
>  Labels: backward-incompatible
> Fix For: First stable release
>
>
> This will clean up potential confusion around different kinds of state, 
> making it very clear which type is actually analogous to the {{Combine}} 
> transform and takes a {{CombineFn}}.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[jira] [Updated] (BEAM-539) Error when writing to the root of a GCS location

2017-04-10 Thread Ahmet Altay (JIRA)

 [ 
https://issues.apache.org/jira/browse/BEAM-539?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Ahmet Altay updated BEAM-539:
-
Labels: newbie starter  (was: )

> Error when writing to the root of a GCS location
> 
>
> Key: BEAM-539
> URL: https://issues.apache.org/jira/browse/BEAM-539
> Project: Beam
>  Issue Type: Bug
>  Components: sdk-py
>Reporter: Ahmet Altay
>Assignee: Chamikara Jayalath
>Priority: Minor
>  Labels: newbie, starter
> Fix For: First stable release
>
>
> User issue: 
> http://stackoverflow.com/questions/38811152/google-dataflow-python-pipeline-write-failure
> Reproduction: use a TextFileSink and set output locations as gs://mybucket 
> and it fails. Change it to gs://mybucket/ and it works.
> The final output path is generated here:
> https://github.com/apache/incubator-beam/blob/python-sdk/sdks/python/apache_beam/io/fileio.py#L495
> And this seemingly works in the Java SDK.
> Stack:
>   File "/usr/local/lib/python2.7/dist-packages/apache_beam/io/iobase.py", 
> line 1058, in finish_bundle
> yield window.TimestampedValue(self.writer.close(), window.MAX_TIMESTAMP)
>   File "/usr/local/lib/python2.7/dist-packages/apache_beam/io/fileio.py", 
> line 601, in close
> self.sink.close(self.temp_handle)
>   File "/usr/local/lib/python2.7/dist-packages/apache_beam/io/fileio.py", 
> line 687, in close
> file_handle.close()
>   File "/usr/local/lib/python2.7/dist-packages/apache_beam/io/gcsio.py", line 
> 617, in close
> self._flush_write_buffer()
>   File "/usr/local/lib/python2.7/dist-packages/apache_beam/io/gcsio.py", line 
> 647, in _flush_write_buffer
> raise self.upload_thread.last_error  # pylint: disable=raising-bad-type
> HttpError: HttpError accessing 
> :
>  response: <{'status': '404', 'alternate-protocol': '443:quic', 
> 'content-length': '165', 'vary': 'Origin, X-Origin', 'server': 
> 'UploadServer', 'x-guploader-uploadid': 
> 'AEnB2Uq6ZGb_CsrMVxozv6aL48k4OMMiRgYVeVGmJrM-sMQWRGeGMkesOQg5F0W7HZuaqTBog_d4ml-DlIars_ZvJTejdfcbAUr4gswZWVieq82ufc3WR2g',
>  'date': 'Mon, 08 Aug 2016 21:29:46 GMT', 'alt-svc': 'quic=":443"; 
> ma=2592000; v="36,35,34,33,32,31,30"', 'content-type': 'application/json; 
> charset=UTF-8'}>, content <{
>  "error": {
>   "errors": [
>{
> "domain": "global",
> "reason": "notFound",
> "message": "Not Found"
>}
>   ],
>   "code": 404,
>   "message": "Not Found"
>  }
> }



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


  1   2   >