[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=87171&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-87171 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 03/Apr/18 17:25 Start Date: 03/Apr/18 17:25 Worklog Time Spent: 10m Work Description: tgroh closed pull request #4980: [BEAM-3418] Support multiple SDKHarness in RunnerHarness URL: https://github.com/apache/beam/pull/4980 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/pom.xml b/pom.xml index 228b593c1d8..135be3737bb 100644 --- a/pom.xml +++ b/pom.xml @@ -845,6 +845,12 @@ ${grpc.version} + +io.grpc +grpc-context +${grpc.version} + + io.grpc grpc-core diff --git a/runners/java-fn-execution/pom.xml b/runners/java-fn-execution/pom.xml index dd82908a2f6..515801538f7 100644 --- a/runners/java-fn-execution/pom.xml +++ b/runners/java-fn-execution/pom.xml @@ -63,6 +63,11 @@ beam-sdks-java-core + + io.grpc + grpc-context + + io.grpc grpc-core diff --git a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/GrpcContextHeaderAccessorProvider.java b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/GrpcContextHeaderAccessorProvider.java new file mode 100644 index 000..71089e33f5b --- /dev/null +++ b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/GrpcContextHeaderAccessorProvider.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.beam.runners.fnexecution; + +import io.grpc.Context; +import io.grpc.Contexts; +import io.grpc.Metadata; +import io.grpc.Metadata.Key; +import io.grpc.ServerCall; +import io.grpc.ServerCall.Listener; +import io.grpc.ServerCallHandler; +import io.grpc.ServerInterceptor; + +/** + * A HeaderAccessorProvider which intercept the header in a GRPC request and expose the relevant + * fields. + */ +public class GrpcContextHeaderAccessorProvider { + + private static final Key WORKER_ID_KEY = + Key.of("worker_id", Metadata.ASCII_STRING_MARSHALLER); + private static final Context.Key SDK_WORKER_CONTEXT_KEY = Context.key("worker_id"); + private static final GrpcHeaderAccessor HEADER_ACCESSOR = new GrpcHeaderAccessor(); + private static final ServerInterceptor INTERCEPTOR = + new ServerInterceptor() { +@Override +public Listener interceptCall( +ServerCall call, +Metadata requestHeaders, +ServerCallHandler next) { + String workerId = requestHeaders.get(WORKER_ID_KEY); + Context context = Context.current().withValue(SDK_WORKER_CONTEXT_KEY, workerId); + return Contexts.interceptCall(context, call, requestHeaders, next); +} + }; + + public static ServerInterceptor interceptor() { +return INTERCEPTOR; + } + + public static HeaderAccessor getHeaderAccessor() { +return HEADER_ACCESSOR; + } + + private static class GrpcHeaderAccessor implements HeaderAccessor { + +@Override +/** This method should be called from the request method. */ +public String getSdkWorkerId() { + return SDK_WORKER_CONTEXT_KEY.get(); +} + } +} diff --git a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/HeaderAccessor.java b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/HeaderAccessor.java new file mode 100644 index 000..cde9044434b --- /dev/null +++ b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/HeaderAccessor.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional in
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=86886&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-86886 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 03/Apr/18 00:54 Start Date: 03/Apr/18 00:54 Worklog Time Spent: 10m Work Description: angoenka commented on issue #4980: [BEAM-3418] Support multiple SDKHarness in RunnerHarness URL: https://github.com/apache/beam/pull/4980#issuecomment-378093576 retest this please This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 86886) Time Spent: 6h 40m (was: 6.5h) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 6h 40m > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=86885&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-86885 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 03/Apr/18 00:52 Start Date: 03/Apr/18 00:52 Worklog Time Spent: 10m Work Description: angoenka commented on issue #4980: [BEAM-3418] Support multiple SDKHarness in RunnerHarness URL: https://github.com/apache/beam/pull/4980#issuecomment-378093351 retest please. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 86885) Time Spent: 6.5h (was: 6h 20m) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 6.5h > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=86884&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-86884 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 03/Apr/18 00:52 Start Date: 03/Apr/18 00:52 Worklog Time Spent: 10m Work Description: angoenka commented on issue #4980: [BEAM-3418] Support multiple SDKHarness in RunnerHarness URL: https://github.com/apache/beam/pull/4980#issuecomment-378093351 retest please. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 86884) Time Spent: 6h 20m (was: 6h 10m) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 6h 20m > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=86830&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-86830 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 02/Apr/18 23:10 Start Date: 02/Apr/18 23:10 Worklog Time Spent: 10m Work Description: tgroh commented on issue #4980: [BEAM-3418] Support multiple SDKHarness in RunnerHarness URL: https://github.com/apache/beam/pull/4980#issuecomment-378077141 2018-04-02T22:53:45.120 [WARNING] Used undeclared dependencies found: 2018-04-02T22:53:45.120 [WARNING]io.grpc:grpc-context:jar:1.2.0:compile This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 86830) Time Spent: 6h 10m (was: 6h) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 6h 10m > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=86781&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-86781 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 02/Apr/18 21:13 Start Date: 02/Apr/18 21:13 Worklog Time Spent: 10m Work Description: angoenka commented on a change in pull request #4980: [BEAM-3418] Support multiple SDKHarness in RunnerHarness URL: https://github.com/apache/beam/pull/4980#discussion_r178652219 ## File path: runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/control/FnApiControlClient.java ## @@ -51,23 +54,25 @@ private final ResponseStreamObserver responseObserver = new ResponseStreamObserver(); private final ConcurrentMap> outstandingRequests; + private final Set> onCloseListeners = ConcurrentHashMap.newKeySet(); Review comment: Yup, even ConcurrentHashMap.newKeySet() is a new addition to java8. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 86781) Time Spent: 6h (was: 5h 50m) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 6h > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=86746&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-86746 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 02/Apr/18 20:14 Start Date: 02/Apr/18 20:14 Worklog Time Spent: 10m Work Description: tgroh commented on a change in pull request #4980: [BEAM-3418] Support multiple SDKHarness in RunnerHarness URL: https://github.com/apache/beam/pull/4980#discussion_r178637182 ## File path: runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/control/FnApiControlClient.java ## @@ -51,23 +54,25 @@ private final ResponseStreamObserver responseObserver = new ResponseStreamObserver(); private final ConcurrentMap> outstandingRequests; + private final Set> onCloseListeners = ConcurrentHashMap.newKeySet(); Review comment: This seems like it'll do the right thing, but gosh it's really unfortunate that Java doesn't have a cleaner way to create a Concurrent Set This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 86746) Time Spent: 5h 50m (was: 5h 40m) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 5h 50m > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=86672&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-86672 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 02/Apr/18 17:52 Start Date: 02/Apr/18 17:52 Worklog Time Spent: 10m Work Description: angoenka commented on a change in pull request #4980: [BEAM-3418] Support multiple SDKHarness in RunnerHarness URL: https://github.com/apache/beam/pull/4980#discussion_r178603200 ## File path: runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/control/FnApiControlClient.java ## @@ -53,9 +55,9 @@ private final ResponseStreamObserver responseObserver = new ResponseStreamObserver(); private final ConcurrentMap> outstandingRequests; + private final Collection> onCloseListeners = new ArrayList<>(); Review comment: Made the changes. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 86672) Time Spent: 5h 40m (was: 5.5h) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 5h 40m > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=86629&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-86629 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 02/Apr/18 16:25 Start Date: 02/Apr/18 16:25 Worklog Time Spent: 10m Work Description: tgroh commented on a change in pull request #4980: [BEAM-3418] Support multiple SDKHarness in RunnerHarness URL: https://github.com/apache/beam/pull/4980#discussion_r178581369 ## File path: runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/control/FnApiControlClient.java ## @@ -53,9 +55,9 @@ private final ResponseStreamObserver responseObserver = new ResponseStreamObserver(); private final ConcurrentMap> outstandingRequests; + private final Collection> onCloseListeners = new ArrayList<>(); Review comment: This needs to be a thread-safe collection, as the rest of the class is written to handle concurrent access. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 86629) Time Spent: 5.5h (was: 5h 20m) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 5.5h > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=86232&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-86232 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 31/Mar/18 00:28 Start Date: 31/Mar/18 00:28 Worklog Time Spent: 10m Work Description: angoenka commented on issue #4980: [BEAM-3418] Support multiple SDKHarness in RunnerHarness URL: https://github.com/apache/beam/pull/4980#issuecomment-377651884 @tgroh Can you please take another look? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 86232) Time Spent: 5h 20m (was: 5h 10m) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 5h 20m > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=86223&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-86223 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 31/Mar/18 00:10 Start Date: 31/Mar/18 00:10 Worklog Time Spent: 10m Work Description: angoenka commented on issue #4980: [BEAM-3418] Support multiple SDKHarness in RunnerHarness URL: https://github.com/apache/beam/pull/4980#issuecomment-377650057 Applied the review comments. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 86223) Time Spent: 5h 10m (was: 5h) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 5h 10m > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=86215&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-86215 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 30/Mar/18 23:53 Start Date: 30/Mar/18 23:53 Worklog Time Spent: 10m Work Description: angoenka commented on a change in pull request #4980: [BEAM-3418] Support multiple SDKHarness in RunnerHarness URL: https://github.com/apache/beam/pull/4980#discussion_r178409445 ## File path: runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/GrpcContextHeaderAccessorProvider.java ## @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.beam.runners.fnexecution; + +import io.grpc.Context; +import io.grpc.Contexts; +import io.grpc.Metadata; +import io.grpc.Metadata.Key; +import io.grpc.ServerCall; +import io.grpc.ServerCall.Listener; +import io.grpc.ServerCallHandler; +import io.grpc.ServerInterceptor; + +/** + * A HeaderAccessorProvider which intercept the header in a GRPC request and expose the relevant + * fields. + */ +public class GrpcContextHeaderAccessorProvider { Review comment: Makes sense. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 86215) Time Spent: 5h (was: 4h 50m) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 5h > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=86214&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-86214 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 30/Mar/18 23:52 Start Date: 30/Mar/18 23:52 Worklog Time Spent: 10m Work Description: angoenka commented on a change in pull request #4980: [BEAM-3418] Support multiple SDKHarness in RunnerHarness URL: https://github.com/apache/beam/pull/4980#discussion_r178409410 ## File path: runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/control/FnApiControlClient.java ## @@ -88,34 +92,48 @@ public void close() { closeAndTerminateOutstandingRequests(new IllegalStateException("Runner closed connection")); } + public String getWorkerId() { +return workerId; + } + /** Closes this client and terminates any outstanding requests exceptionally. */ private void closeAndTerminateOutstandingRequests(Throwable cause) { if (isClosed.getAndSet(true)) { return; } -// Make a copy of the map to make the view of the outstanding requests consistent. -Map> outstandingRequestsCopy = -new ConcurrentHashMap<>(outstandingRequests); -outstandingRequests.clear(); +try { + // Make a copy of the map to make the view of the outstanding requests consistent. + Map> outstandingRequestsCopy = + new ConcurrentHashMap<>(outstandingRequests); + outstandingRequests.clear(); -if (outstandingRequestsCopy.isEmpty()) { - requestReceiver.onCompleted(); - return; -} -requestReceiver.onError( -new StatusRuntimeException(Status.CANCELLED.withDescription(cause.getMessage(; - -LOG.error( -"{} closed, clearing outstanding requests {}", -FnApiControlClient.class.getSimpleName(), -outstandingRequestsCopy); -for (CompletableFuture outstandingRequest : -outstandingRequestsCopy.values()) { - outstandingRequest.completeExceptionally(cause); + if (outstandingRequestsCopy.isEmpty()) { +requestReceiver.onCompleted(); +return; + } + requestReceiver.onError( + new StatusRuntimeException(Status.CANCELLED.withDescription(cause.getMessage(; + + LOG.error( + "{} closed, clearing outstanding requests {}", + FnApiControlClient.class.getSimpleName(), + outstandingRequestsCopy); + for (CompletableFuture outstandingRequest : + outstandingRequestsCopy.values()) { +outstandingRequest.completeExceptionally(cause); + } +} finally { + if (onCloseListener != null) { +onCloseListener.accept(this); + } } } + public void onClose(Consumer onCloseListener) { Review comment: Changed to call all the registered listeners on close. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 86214) Time Spent: 4h 50m (was: 4h 40m) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 4h 50m > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=86115&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-86115 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 30/Mar/18 18:31 Start Date: 30/Mar/18 18:31 Worklog Time Spent: 10m Work Description: tgroh commented on a change in pull request #4980: [BEAM-3418] Support multiple SDKHarness in RunnerHarness URL: https://github.com/apache/beam/pull/4980#discussion_r178345082 ## File path: runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/GrpcContextHeaderAccessorProvider.java ## @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.beam.runners.fnexecution; + +import io.grpc.Context; +import io.grpc.Contexts; +import io.grpc.Metadata; +import io.grpc.Metadata.Key; +import io.grpc.ServerCall; +import io.grpc.ServerCall.Listener; +import io.grpc.ServerCallHandler; +import io.grpc.ServerInterceptor; + +/** + * A HeaderAccessorProvider which intercept the header in a GRPC request and expose the relevant + * fields. + */ +public class GrpcContextHeaderAccessorProvider { Review comment: Tests? You should be able to use an `InProcessChannel` and some stub implementation of a random service This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 86115) Time Spent: 4h 40m (was: 4.5h) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 4h 40m > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=86114&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-86114 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 30/Mar/18 18:31 Start Date: 30/Mar/18 18:31 Worklog Time Spent: 10m Work Description: tgroh commented on a change in pull request #4980: [BEAM-3418] Support multiple SDKHarness in RunnerHarness URL: https://github.com/apache/beam/pull/4980#discussion_r178345470 ## File path: runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/control/FnApiControlClient.java ## @@ -88,34 +92,48 @@ public void close() { closeAndTerminateOutstandingRequests(new IllegalStateException("Runner closed connection")); } + public String getWorkerId() { +return workerId; + } + /** Closes this client and terminates any outstanding requests exceptionally. */ private void closeAndTerminateOutstandingRequests(Throwable cause) { if (isClosed.getAndSet(true)) { return; } -// Make a copy of the map to make the view of the outstanding requests consistent. -Map> outstandingRequestsCopy = -new ConcurrentHashMap<>(outstandingRequests); -outstandingRequests.clear(); +try { + // Make a copy of the map to make the view of the outstanding requests consistent. + Map> outstandingRequestsCopy = + new ConcurrentHashMap<>(outstandingRequests); + outstandingRequests.clear(); -if (outstandingRequestsCopy.isEmpty()) { - requestReceiver.onCompleted(); - return; -} -requestReceiver.onError( -new StatusRuntimeException(Status.CANCELLED.withDescription(cause.getMessage(; - -LOG.error( -"{} closed, clearing outstanding requests {}", -FnApiControlClient.class.getSimpleName(), -outstandingRequestsCopy); -for (CompletableFuture outstandingRequest : -outstandingRequestsCopy.values()) { - outstandingRequest.completeExceptionally(cause); + if (outstandingRequestsCopy.isEmpty()) { +requestReceiver.onCompleted(); +return; + } + requestReceiver.onError( + new StatusRuntimeException(Status.CANCELLED.withDescription(cause.getMessage(; + + LOG.error( + "{} closed, clearing outstanding requests {}", + FnApiControlClient.class.getSimpleName(), + outstandingRequestsCopy); + for (CompletableFuture outstandingRequest : + outstandingRequestsCopy.values()) { +outstandingRequest.completeExceptionally(cause); + } +} finally { + if (onCloseListener != null) { +onCloseListener.accept(this); + } } } + public void onClose(Consumer onCloseListener) { Review comment: What happens if multiple layers try to attach a listener? I think this should either call both of them, or fail loudly. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 86114) Time Spent: 4.5h (was: 4h 20m) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 4.5h > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=85884&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-85884 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 30/Mar/18 03:09 Start Date: 30/Mar/18 03:09 Worklog Time Spent: 10m Work Description: angoenka commented on issue #4980: [BEAM-3418] Support multiple SDKHarness in RunnerHarness URL: https://github.com/apache/beam/pull/4980#issuecomment-377433424 @tgroh Can you please have a look? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 85884) Time Spent: 4h 20m (was: 4h 10m) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 4h 20m > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=85883&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-85883 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 30/Mar/18 03:07 Start Date: 30/Mar/18 03:07 Worklog Time Spent: 10m Work Description: angoenka opened a new pull request #4980: [BEAM-3418] Support multiple SDKHarness in RunnerHarness URL: https://github.com/apache/beam/pull/4980 To Support Multiple SDKHarness on a single RunnerHarness we introduced a worker_id in GRPC header for ControlChannel, DataChannel, LoggingChannel and StateChannel. Follow this checklist to help us incorporate your contribution quickly and easily: - [ ] Make sure there is a [JIRA issue](https://issues.apache.org/jira/projects/BEAM/issues/) filed for the change (usually before you start working on it). Trivial changes like typos do not require a JIRA issue. Your pull request should address just this issue, without pulling in other changes. - [ ] Format the pull request title like `[BEAM-XXX] Fixes bug in ApproximateQuantiles`, where you replace `BEAM-XXX` with the appropriate JIRA issue. - [ ] Write a pull request description that is detailed enough to understand: - [ ] What the pull request does - [ ] Why it does it - [ ] How it does it - [ ] Why this approach - [ ] Each commit in the pull request should have a meaningful subject line and body. - [ ] Run `mvn clean verify` to make sure basic checks pass. A more thorough check will be performed on your pull request automatically. - [ ] If this contribution is large, please file an Apache [Individual Contributor License Agreement](https://www.apache.org/licenses/icla.pdf). This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 85883) Time Spent: 4h 10m (was: 4h) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 4h 10m > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=83388&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-83388 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 22/Mar/18 23:45 Start Date: 22/Mar/18 23:45 Worklog Time Spent: 10m Work Description: aaltay closed pull request #4587: [BEAM-3418] Send worker_id in all grpc channels to runner harness URL: https://github.com/apache/beam/pull/4587 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py b/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py index e503da9eb5b..9db1cab96f4 100644 --- a/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py +++ b/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py @@ -169,10 +169,17 @@ def __init__(self, packages, options, environment_version, pipeline_url): if job_type.startswith('FNAPI_'): runner_harness_override = ( dependency.get_runner_harness_container_image()) + self.debug_options.experiments = self.debug_options.experiments or [] if runner_harness_override: -self.debug_options.experiments = self.debug_options.experiments or [] self.debug_options.experiments.append( 'runner_harness_container_image=' + runner_harness_override) + # Add use_multiple_sdk_containers flag if its not already present. Do not + # add the flag if 'no_use_multiple_sdk_containers' is present. + # TODO: Cleanup use_multiple_sdk_containers once we deprecate Python SDK + # till version 2.4. + if ('use_multiple_sdk_containers' not in self.proto.experiments and + 'no_use_multiple_sdk_containers' not in self.proto.experiments): +self.debug_options.experiments.append('use_multiple_sdk_containers') # Experiments if self.debug_options.experiments: for experiment in self.debug_options.experiments: diff --git a/sdks/python/apache_beam/runners/worker/data_plane.py b/sdks/python/apache_beam/runners/worker/data_plane.py index f554646c659..7c79c4cc7a7 100644 --- a/sdks/python/apache_beam/runners/worker/data_plane.py +++ b/sdks/python/apache_beam/runners/worker/data_plane.py @@ -34,6 +34,7 @@ from apache_beam.coders import coder_impl from apache_beam.portability.api import beam_fn_api_pb2 from apache_beam.portability.api import beam_fn_api_pb2_grpc +from apache_beam.runners.worker.worker_id_interceptor import WorkerIdInterceptor # This module is experimental. No backwards-compatibility guarantees. @@ -311,6 +312,9 @@ def create_data_channel(self, remote_grpc_port): # controlled in a layer above. options=[("grpc.max_receive_message_length", -1), ("grpc.max_send_message_length", -1)]) + # Add workerId to the grpc channel + grpc_channel = grpc.intercept_channel(grpc_channel, +WorkerIdInterceptor()) self._data_channel_cache[url] = GrpcClientDataChannel( beam_fn_api_pb2_grpc.BeamFnDataStub(grpc_channel)) return self._data_channel_cache[url] diff --git a/sdks/python/apache_beam/runners/worker/log_handler.py b/sdks/python/apache_beam/runners/worker/log_handler.py index 6d8a1d92671..152659e0a3f 100644 --- a/sdks/python/apache_beam/runners/worker/log_handler.py +++ b/sdks/python/apache_beam/runners/worker/log_handler.py @@ -25,6 +25,7 @@ from apache_beam.portability.api import beam_fn_api_pb2 from apache_beam.portability.api import beam_fn_api_pb2_grpc +from apache_beam.runners.worker.worker_id_interceptor import WorkerIdInterceptor # This module is experimental. No backwards-compatibility guarantees. @@ -48,7 +49,9 @@ class FnApiLogRecordHandler(logging.Handler): def __init__(self, log_service_descriptor): super(FnApiLogRecordHandler, self).__init__() -self._log_channel = grpc.insecure_channel(log_service_descriptor.url) +self._log_channel = grpc.intercept_channel( +grpc.insecure_channel(log_service_descriptor.url), +WorkerIdInterceptor()) self._logging_stub = beam_fn_api_pb2_grpc.BeamFnLoggingStub( self._log_channel) self._log_entry_queue = queue.Queue() diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker.py b/sdks/python/apache_beam/runners/worker/sdk_worker.py index 1988490013c..c77659b3479 100644 --- a/sdks/python/apache_beam/runners/worker/sdk_worker.py +++ b/sdks/python/apache_beam/runners/worker/sdk_worker.py @@ -34,6 +34,7 @@ from apache_beam.portability.api import beam_fn_api_pb2_grpc from apache_beam.r
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=83314&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-83314 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 22/Mar/18 20:01 Start Date: 22/Mar/18 20:01 Worklog Time Spent: 10m Work Description: angoenka commented on issue #4587: [BEAM-3418] Send worker_id in all grpc channels to runner harness URL: https://github.com/apache/beam/pull/4587#issuecomment-375438660 Sure, I will squash them. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 83314) Time Spent: 3h 50m (was: 3h 40m) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 3h 50m > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=83313&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-83313 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 22/Mar/18 19:59 Start Date: 22/Mar/18 19:59 Worklog Time Spent: 10m Work Description: aaltay commented on issue #4587: [BEAM-3418] Send worker_id in all grpc channels to runner harness URL: https://github.com/apache/beam/pull/4587#issuecomment-375438254 Could you squash your commits? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 83313) Time Spent: 3h 40m (was: 3.5h) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 3h 40m > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=83287&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-83287 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 22/Mar/18 18:51 Start Date: 22/Mar/18 18:51 Worklog Time Spent: 10m Work Description: angoenka commented on a change in pull request #4587: [BEAM-3418] Send worker_id in all grpc channels to runner harness URL: https://github.com/apache/beam/pull/4587#discussion_r176207958 ## File path: sdks/python/apache_beam/runners/worker/worker_id_interceptor.py ## @@ -0,0 +1,54 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Client Interceptor to inject worker_id""" +from __future__ import absolute_import +from __future__ import division Review comment: Sure! This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 83287) Time Spent: 3.5h (was: 3h 20m) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 3.5h > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=83286&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-83286 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 22/Mar/18 18:51 Start Date: 22/Mar/18 18:51 Worklog Time Spent: 10m Work Description: angoenka commented on a change in pull request #4587: [BEAM-3418] Send worker_id in all grpc channels to runner harness URL: https://github.com/apache/beam/pull/4587#discussion_r176203438 ## File path: sdks/python/apache_beam/runners/worker/worker_id_interceptor.py ## @@ -0,0 +1,54 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Client Interceptor to inject worker_id""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import os +import uuid + +import grpc + + +class _ClientCallDetails( +collections.namedtuple('_ClientCallDetails', + ('method', 'timeout', 'metadata', 'credentials')), +grpc.ClientCallDetails): + pass + + +class WorkerIdInterceptor(grpc.StreamStreamClientInterceptor): + + # Unique worker Id for this worker. + _worker_id = os.environ['WORKER_ID'] if os.environ.has_key( Review comment: Created a jira issue BEAM-3904 to clean it up. I want to keep keep it around to decouple sdk changes to internal container changes. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 83286) Time Spent: 3.5h (was: 3h 20m) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 3.5h > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=82843&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-82843 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 21/Mar/18 18:21 Start Date: 21/Mar/18 18:21 Worklog Time Spent: 10m Work Description: aaltay commented on a change in pull request #4587: [BEAM-3418] Send worker_id in all grpc channels to runner harness URL: https://github.com/apache/beam/pull/4587#discussion_r176190223 ## File path: sdks/python/apache_beam/runners/worker/worker_id_interceptor.py ## @@ -0,0 +1,54 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Client Interceptor to inject worker_id""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import os +import uuid + +import grpc + + +class _ClientCallDetails( +collections.namedtuple('_ClientCallDetails', + ('method', 'timeout', 'metadata', 'credentials')), +grpc.ClientCallDetails): + pass + + +class WorkerIdInterceptor(grpc.StreamStreamClientInterceptor): + + # Unique worker Id for this worker. + _worker_id = os.environ['WORKER_ID'] if os.environ.has_key( Review comment: Do we really need to be backward compatible? This is mostly new code with no production usage. I would prefer to not have it succeed like this. But if you think this is necessary in the interim, we can add a TODO to remove the UUID generation. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 82843) Time Spent: 3h 20m (was: 3h 10m) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 3h 20m > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=82842&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-82842 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 21/Mar/18 18:19 Start Date: 21/Mar/18 18:19 Worklog Time Spent: 10m Work Description: aaltay commented on a change in pull request #4587: [BEAM-3418] Send worker_id in all grpc channels to runner harness URL: https://github.com/apache/beam/pull/4587#discussion_r176189482 ## File path: sdks/python/apache_beam/runners/worker/worker_id_interceptor.py ## @@ -0,0 +1,54 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Client Interceptor to inject worker_id""" +from __future__ import absolute_import +from __future__ import division Review comment: Let's remove them, if they are not needed now. I do not see print() or / being used here. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 82842) Time Spent: 3h 10m (was: 3h) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 3h 10m > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=82513&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-82513 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 20/Mar/18 22:17 Start Date: 20/Mar/18 22:17 Worklog Time Spent: 10m Work Description: angoenka commented on a change in pull request #4587: [BEAM-3418] Send worker_id in all grpc channels to runner harness URL: https://github.com/apache/beam/pull/4587#discussion_r175920015 ## File path: sdks/python/apache_beam/runners/dataflow/internal/apiclient.py ## @@ -177,6 +177,14 @@ def __init__(self, packages, options, environment_version, pipeline_url): if self.debug_options.experiments: for experiment in self.debug_options.experiments: self.proto.experiments.append(experiment) +# Add use_multiple_sdk_containers flag if its not already present. Do not +# add the flag if 'no_use_multiple_sdk_containers' is present. +# TODO: Cleanup use_multiple_sdk_containers once we deprecate Python SDK +# till version 2.4. +if (job_type.startswith('FNAPI_') and +'use_multiple_sdk_containers' not in self.proto.experiments and +'no_use_multiple_sdk_containers' not in self.proto.experiments): + self.proto.experiments.append('use_multiple_sdk_containers') Review comment: Makes Sense! This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 82513) Time Spent: 2h 40m (was: 2.5h) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 2h 40m > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=82515&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-82515 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 20/Mar/18 22:17 Start Date: 20/Mar/18 22:17 Worklog Time Spent: 10m Work Description: angoenka commented on a change in pull request #4587: [BEAM-3418] Send worker_id in all grpc channels to runner harness URL: https://github.com/apache/beam/pull/4587#discussion_r175918327 ## File path: sdks/python/apache_beam/runners/worker/sdk_worker.py ## @@ -43,6 +44,8 @@ def __init__(self, control_address, worker_count): self._worker_count = worker_count self._worker_index = 0 self._control_channel = grpc.insecure_channel(control_address) +self._control_channel = grpc.intercept_channel(self._control_channel, Review comment: Sure! This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 82515) Time Spent: 3h (was: 2h 50m) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 3h > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=82514&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-82514 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 20/Mar/18 22:17 Start Date: 20/Mar/18 22:17 Worklog Time Spent: 10m Work Description: angoenka commented on a change in pull request #4587: [BEAM-3418] Send worker_id in all grpc channels to runner harness URL: https://github.com/apache/beam/pull/4587#discussion_r175919043 ## File path: sdks/python/apache_beam/runners/worker/data_plane.py ## @@ -311,6 +312,9 @@ def create_data_channel(self, remote_grpc_port): # controlled in a layer above. options=[("grpc.max_receive_message_length", -1), ("grpc.max_send_message_length", -1)]) + # Add workerId to the grpc channel + grpc_channel = grpc.intercept_channel(grpc_channel, Review comment: Not Simplifying to keep readability. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 82514) Time Spent: 2h 50m (was: 2h 40m) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 2h 50m > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=82511&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-82511 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 20/Mar/18 22:17 Start Date: 20/Mar/18 22:17 Worklog Time Spent: 10m Work Description: angoenka commented on a change in pull request #4587: [BEAM-3418] Send worker_id in all grpc channels to runner harness URL: https://github.com/apache/beam/pull/4587#discussion_r175917370 ## File path: sdks/python/apache_beam/runners/worker/worker_id_interceptor.py ## @@ -0,0 +1,54 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Client Interceptor to inject worker_id""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import os +import uuid + +import grpc + + +class _ClientCallDetails( +collections.namedtuple('_ClientCallDetails', + ('method', 'timeout', 'metadata', 'credentials')), +grpc.ClientCallDetails): + pass + + +class WorkerIdInterceptor(grpc.StreamStreamClientInterceptor): + + # Unique worker Id for this worker. + _worker_id = os.environ['WORKER_ID'] if os.environ.has_key( Review comment: For backward compatibility of containers, I would like to assign a UUID if worker_id is not provided. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 82511) Time Spent: 2.5h (was: 2h 20m) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 2.5h > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=82510&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-82510 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 20/Mar/18 22:17 Start Date: 20/Mar/18 22:17 Worklog Time Spent: 10m Work Description: angoenka commented on a change in pull request #4587: [BEAM-3418] Send worker_id in all grpc channels to runner harness URL: https://github.com/apache/beam/pull/4587#discussion_r175917786 ## File path: sdks/python/apache_beam/runners/worker/worker_id_interceptor.py ## @@ -0,0 +1,54 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Client Interceptor to inject worker_id""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import os +import uuid + +import grpc + + +class _ClientCallDetails( +collections.namedtuple('_ClientCallDetails', + ('method', 'timeout', 'metadata', 'credentials')), +grpc.ClientCallDetails): + pass + + +class WorkerIdInterceptor(grpc.StreamStreamClientInterceptor): + + # Unique worker Id for this worker. + _worker_id = os.environ['WORKER_ID'] if os.environ.has_key( + 'WORKER_ID') else str(uuid.uuid4()) + + def __init__(self): +pass + + def intercept_stream_stream(self, continuation, client_call_details, + request_iterator): +metadata = [] +if client_call_details.metadata is not None: + metadata = list(client_call_details.metadata) +metadata.append(('worker_id', self._worker_id)) Review comment: It should be an error. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 82510) Time Spent: 2h 20m (was: 2h 10m) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 2h 20m > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=82512&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-82512 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 20/Mar/18 22:17 Start Date: 20/Mar/18 22:17 Worklog Time Spent: 10m Work Description: angoenka commented on a change in pull request #4587: [BEAM-3418] Send worker_id in all grpc channels to runner harness URL: https://github.com/apache/beam/pull/4587#discussion_r175918509 ## File path: sdks/python/apache_beam/runners/worker/log_handler.py ## @@ -49,6 +50,8 @@ class FnApiLogRecordHandler(logging.Handler): def __init__(self, log_service_descriptor): super(FnApiLogRecordHandler, self).__init__() self._log_channel = grpc.insecure_channel(log_service_descriptor.url) +self._log_channel = grpc.intercept_channel(self._log_channel, Review comment: Sure! This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 82512) Time Spent: 2.5h (was: 2h 20m) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 2.5h > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=82475&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-82475 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 20/Mar/18 20:45 Start Date: 20/Mar/18 20:45 Worklog Time Spent: 10m Work Description: angoenka commented on a change in pull request #4587: [BEAM-3418] Send worker_id in all grpc channels to runner harness URL: https://github.com/apache/beam/pull/4587#discussion_r175916247 ## File path: sdks/python/apache_beam/runners/worker/worker_id_interceptor.py ## @@ -0,0 +1,54 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Client Interceptor to inject worker_id""" +from __future__ import absolute_import +from __future__ import division Review comment: No, we don't need these imports. I added them based to resolve the compatibility issue between python 2 and 3 based on https://docs.python.org/3/howto/pyporting.html Should I remove them? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 82475) Time Spent: 2h 10m (was: 2h) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 2h 10m > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=82474&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-82474 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 20/Mar/18 20:39 Start Date: 20/Mar/18 20:39 Worklog Time Spent: 10m Work Description: aaltay commented on a change in pull request #4587: [BEAM-3418] Send worker_id in all grpc channels to runner harness URL: https://github.com/apache/beam/pull/4587#discussion_r175912872 ## File path: sdks/python/apache_beam/runners/worker/log_handler.py ## @@ -49,6 +50,8 @@ class FnApiLogRecordHandler(logging.Handler): def __init__(self, log_service_descriptor): super(FnApiLogRecordHandler, self).__init__() self._log_channel = grpc.insecure_channel(log_service_descriptor.url) +self._log_channel = grpc.intercept_channel(self._log_channel, Review comment: (Same simplification comment applies here.) This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 82474) Time Spent: 2h (was: 1h 50m) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 2h > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=82469&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-82469 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 20/Mar/18 20:39 Start Date: 20/Mar/18 20:39 Worklog Time Spent: 10m Work Description: aaltay commented on a change in pull request #4587: [BEAM-3418] Send worker_id in all grpc channels to runner harness URL: https://github.com/apache/beam/pull/4587#discussion_r175910440 ## File path: sdks/python/apache_beam/runners/worker/worker_id_interceptor.py ## @@ -0,0 +1,54 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Client Interceptor to inject worker_id""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import os +import uuid + +import grpc + + +class _ClientCallDetails( +collections.namedtuple('_ClientCallDetails', + ('method', 'timeout', 'metadata', 'credentials')), +grpc.ClientCallDetails): + pass + + +class WorkerIdInterceptor(grpc.StreamStreamClientInterceptor): + + # Unique worker Id for this worker. + _worker_id = os.environ['WORKER_ID'] if os.environ.has_key( Review comment: Do we want to fail if WORKER_ID is not found? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 82469) Time Spent: 1h 20m (was: 1h 10m) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 1h 20m > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=82473&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-82473 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 20/Mar/18 20:39 Start Date: 20/Mar/18 20:39 Worklog Time Spent: 10m Work Description: aaltay commented on a change in pull request #4587: [BEAM-3418] Send worker_id in all grpc channels to runner harness URL: https://github.com/apache/beam/pull/4587#discussion_r175914311 ## File path: sdks/python/apache_beam/runners/dataflow/internal/apiclient.py ## @@ -177,6 +177,14 @@ def __init__(self, packages, options, environment_version, pipeline_url): if self.debug_options.experiments: for experiment in self.debug_options.experiments: self.proto.experiments.append(experiment) +# Add use_multiple_sdk_containers flag if its not already present. Do not +# add the flag if 'no_use_multiple_sdk_containers' is present. +# TODO: Cleanup use_multiple_sdk_containers once we deprecate Python SDK +# till version 2.4. +if (job_type.startswith('FNAPI_') and +'use_multiple_sdk_containers' not in self.proto.experiments and +'no_use_multiple_sdk_containers' not in self.proto.experiments): + self.proto.experiments.append('use_multiple_sdk_containers') Review comment: It is preferable to modify debug_options.experiments (as done above for `runner_harness_override`). This also properly helps with updating the user visible pipeline options in the UI, and it will auto added to the proto by the loop above. It would also help combine things related to `if job_type.startswith('FNAPI_'):` in a single place. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 82473) Time Spent: 1h 50m (was: 1h 40m) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 1h 50m > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=82472&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-82472 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 20/Mar/18 20:39 Start Date: 20/Mar/18 20:39 Worklog Time Spent: 10m Work Description: aaltay commented on a change in pull request #4587: [BEAM-3418] Send worker_id in all grpc channels to runner harness URL: https://github.com/apache/beam/pull/4587#discussion_r175912611 ## File path: sdks/python/apache_beam/runners/worker/sdk_worker.py ## @@ -43,6 +44,8 @@ def __init__(self, control_address, worker_count): self._worker_count = worker_count self._worker_index = 0 self._control_channel = grpc.insecure_channel(control_address) +self._control_channel = grpc.intercept_channel(self._control_channel, Review comment: Should we simplify this as: `self._control_channel = grpc.intercept_channel(grpc.insecure_channel(control_address), WorkerIdInterceptor())` This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 82472) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 1h 40m > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=82471&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-82471 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 20/Mar/18 20:39 Start Date: 20/Mar/18 20:39 Worklog Time Spent: 10m Work Description: aaltay commented on a change in pull request #4587: [BEAM-3418] Send worker_id in all grpc channels to runner harness URL: https://github.com/apache/beam/pull/4587#discussion_r175912118 ## File path: sdks/python/apache_beam/runners/worker/worker_id_interceptor.py ## @@ -0,0 +1,54 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Client Interceptor to inject worker_id""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import os +import uuid + +import grpc + + +class _ClientCallDetails( +collections.namedtuple('_ClientCallDetails', + ('method', 'timeout', 'metadata', 'credentials')), +grpc.ClientCallDetails): + pass + + +class WorkerIdInterceptor(grpc.StreamStreamClientInterceptor): + + # Unique worker Id for this worker. + _worker_id = os.environ['WORKER_ID'] if os.environ.has_key( + 'WORKER_ID') else str(uuid.uuid4()) + + def __init__(self): +pass + + def intercept_stream_stream(self, continuation, client_call_details, + request_iterator): +metadata = [] +if client_call_details.metadata is not None: + metadata = list(client_call_details.metadata) +metadata.append(('worker_id', self._worker_id)) Review comment: Would it be an error (or expected) for client_call_details to already have worker_id? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 82471) Time Spent: 1h 40m (was: 1.5h) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 1h 40m > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=82470&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-82470 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 20/Mar/18 20:39 Start Date: 20/Mar/18 20:39 Worklog Time Spent: 10m Work Description: aaltay commented on a change in pull request #4587: [BEAM-3418] Send worker_id in all grpc channels to runner harness URL: https://github.com/apache/beam/pull/4587#discussion_r175911147 ## File path: sdks/python/apache_beam/runners/worker/worker_id_interceptor.py ## @@ -0,0 +1,54 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Client Interceptor to inject worker_id""" +from __future__ import absolute_import +from __future__ import division Review comment: Do we need `print_function` and `division` imports? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 82470) Time Spent: 1.5h (was: 1h 20m) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 1.5h > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=82432&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-82432 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 20/Mar/18 19:40 Start Date: 20/Mar/18 19:40 Worklog Time Spent: 10m Work Description: angoenka commented on issue #4587: [BEAM-3418] Send worker_id in all grpc channels to runner harness URL: https://github.com/apache/beam/pull/4587#issuecomment-374730490 @aaltay Can you please take a look? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 82432) Time Spent: 1h 10m (was: 1h) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 1h 10m > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=81235&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-81235 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 16/Mar/18 17:18 Start Date: 16/Mar/18 17:18 Worklog Time Spent: 10m Work Description: lukecwik commented on a change in pull request #4587: [BEAM-3418] Send worker_id in all grpc channels to runner harness URL: https://github.com/apache/beam/pull/4587#discussion_r175159017 ## File path: sdks/python/apache_beam/runners/dataflow/internal/apiclient.py ## @@ -176,6 +176,13 @@ def __init__(self, packages, options, environment_version, pipeline_url): if self.debug_options.experiments: for experiment in self.debug_options.experiments: self.proto.experiments.append(experiment) +# Add MULTIPLE_SDK_CONTAINERS flag if its not already present. Do not add +# the flag if 'NO_MULTIPLE_SDK_CONTAINERS' is present. +# TODO: Cleanup MULTIPLE_SDK_CONTAINERS once we depricate Python SDK till Review comment: `depricate` -> `deprecate` This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 81235) Time Spent: 1h (was: 50m) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 1h > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=81030&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-81030 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 16/Mar/18 00:20 Start Date: 16/Mar/18 00:20 Worklog Time Spent: 10m Work Description: angoenka commented on a change in pull request #4587: [BEAM-3418] Send worker_id in all grpc channels to runner harness URL: https://github.com/apache/beam/pull/4587#discussion_r174969852 ## File path: sdks/python/apache_beam/runners/dataflow/internal/apiclient.py ## @@ -176,6 +176,13 @@ def __init__(self, packages, options, environment_version, pipeline_url): if self.debug_options.experiments: for experiment in self.debug_options.experiments: self.proto.experiments.append(experiment) +# Add MULTIPLE_SDK_CONTAINERS flag if its not already present. Do not add +# the flag if 'NO_MULTIPLE_SDK_CONTAINERS' is present. +# TODO: Cleanup MULTIPLE_SDK_CONTAINERS once we depricate Python SDK till +# version 2.4. +if ('MULTIPLE_SDK_CONTAINERS' not in self.proto.experiments and Review comment: @robertwb @aaltay I am planning to make this feature opt out for new SDKs. Instead should we keep it opt in? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 81030) Time Spent: 50m (was: 40m) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 50m > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=81028&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-81028 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 16/Mar/18 00:15 Start Date: 16/Mar/18 00:15 Worklog Time Spent: 10m Work Description: angoenka commented on a change in pull request #4587: [BEAM-3418] Send worker_id in all grpc channels to runner harness URL: https://github.com/apache/beam/pull/4587#discussion_r174969164 ## File path: sdks/python/apache_beam/runners/dataflow/internal/apiclient.py ## @@ -176,6 +176,13 @@ def __init__(self, packages, options, environment_version, pipeline_url): if self.debug_options.experiments: for experiment in self.debug_options.experiments: self.proto.experiments.append(experiment) +# Add MULTIPLE_SDK_CONTAINERS flag if its not already present. Do not add +# the flag if 'NO_MULTIPLE_SDK_CONTAINERS' is present. +# TODO: Cleanup MULTIPLE_SDK_CONTAINERS once we depricate Python SDK till +# version 2.4. +if ('MULTIPLE_SDK_CONTAINERS' not in self.proto.experiments and Review comment: I expect this CL to get in 2.5. In a way this flag is required to help router distinguish between old SDK (sdk till 2.4) and new SDK (sdk from 2.5). So once we do not have any sdk which is older than 2.5, we don't need to distinguish between sdk atleast for MultiSdk functionality and hence it automatically becomes the default feature. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 81028) Time Spent: 40m (was: 0.5h) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 40m > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=81027&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-81027 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 16/Mar/18 00:09 Start Date: 16/Mar/18 00:09 Worklog Time Spent: 10m Work Description: tvalentyn commented on a change in pull request #4587: [BEAM-3418] Send worker_id in all grpc channels to runner harness URL: https://github.com/apache/beam/pull/4587#discussion_r174968455 ## File path: sdks/python/apache_beam/runners/dataflow/internal/apiclient.py ## @@ -176,6 +176,13 @@ def __init__(self, packages, options, environment_version, pipeline_url): if self.debug_options.experiments: for experiment in self.debug_options.experiments: self.proto.experiments.append(experiment) +# Add MULTIPLE_SDK_CONTAINERS flag if its not already present. Do not add +# the flag if 'NO_MULTIPLE_SDK_CONTAINERS' is present. +# TODO: Cleanup MULTIPLE_SDK_CONTAINERS once we depricate Python SDK till +# version 2.4. +if ('MULTIPLE_SDK_CONTAINERS' not in self.proto.experiments and Review comment: How confident are we to make this a default behavior for 2.5? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 81027) Time Spent: 0.5h (was: 20m) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 0.5h > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3418) Python Fnapi - Support Multiple SDK workers on a single VM
[ https://issues.apache.org/jira/browse/BEAM-3418?focusedWorklogId=80055&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-80055 ] ASF GitHub Bot logged work on BEAM-3418: Author: ASF GitHub Bot Created on: 13/Mar/18 20:57 Start Date: 13/Mar/18 20:57 Worklog Time Spent: 10m Work Description: angoenka commented on issue #4587: [BEAM-3418] Send worker_id in all grpc channels to runner harness URL: https://github.com/apache/beam/pull/4587#issuecomment-372815857 retest this please This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 80055) Time Spent: 20m (was: 10m) > Python Fnapi - Support Multiple SDK workers on a single VM > -- > > Key: BEAM-3418 > URL: https://issues.apache.org/jira/browse/BEAM-3418 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness >Reporter: Ankur Goenka >Assignee: Ankur Goenka >Priority: Major > Labels: performance, portability > Time Spent: 20m > Remaining Estimate: 0h > > Support multiple python SDK process on a VM to fully utilize a machine. > Each SDK Process will work in isolation and interact with Runner Harness > independently. -- This message was sent by Atlassian JIRA (v7.6.3#76005)