[
https://issues.apache.org/jira/browse/BEAM-8624?focusedWorklogId=369497&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-369497
]
ASF GitHub Bot logged work on BEAM-8624:
----------------------------------------
Author: ASF GitHub Bot
Created on: 10/Jan/20 00:54
Start Date: 10/Jan/20 00:54
Worklog Time Spent: 10m
Work Description: y1chi commented on pull request #10115: [BEAM-8624]
Implement Worker Status FnService in Dataflow runner
URL: https://github.com/apache/beam/pull/10115#discussion_r365030515
##########
File path:
runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/status/BeamWorkerStatusGrpcService.java
##########
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.fnexecution.status;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ConcurrentSkipListMap;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import org.apache.beam.model.fnexecution.v1.BeamFnApi.WorkerStatusRequest;
+import org.apache.beam.model.fnexecution.v1.BeamFnApi.WorkerStatusResponse;
+import
org.apache.beam.model.fnexecution.v1.BeamFnWorkerStatusGrpc.BeamFnWorkerStatusImplBase;
+import org.apache.beam.model.pipeline.v1.Endpoints.ApiServiceDescriptor;
+import org.apache.beam.runners.fnexecution.FnService;
+import org.apache.beam.runners.fnexecution.HeaderAccessor;
+import org.apache.beam.vendor.grpc.v1p21p0.io.grpc.stub.StreamObserver;
+import
org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Strings;
+import
org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableSet;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A Fn Status service which can collect run-time status information from SDK
harnesses for
+ * debugging purpose.
+ */
+public class BeamWorkerStatusGrpcService extends BeamFnWorkerStatusImplBase
implements FnService {
+
+ private static final Logger LOG =
LoggerFactory.getLogger(BeamWorkerStatusGrpcService.class);
+ private static final String DEFAULT_EXCEPTION_RESPONSE =
+ "Error: exception encountered getting status from SDK harness";
+
+ private final HeaderAccessor headerAccessor;
+ private final Map<String, CompletableFuture<WorkerStatusClient>>
connectedClient =
+ Collections.synchronizedMap(new HashMap<>());
+
+ private BeamWorkerStatusGrpcService(
+ ApiServiceDescriptor apiServiceDescriptor, HeaderAccessor
headerAccessor) {
+ this.headerAccessor = headerAccessor;
+ LOG.info("Launched Beam Fn Status service at {}", apiServiceDescriptor);
+ }
+
+ /**
+ * Create new instance of {@link BeamWorkerStatusGrpcService}.
+ *
+ * @param apiServiceDescriptor describes the configuration for the endpoint
the server will
+ * expose.
+ * @param headerAccessor headerAccessor gRPC header accessor used to obtain
SDK harness worker id.
+ * @return {@link BeamWorkerStatusGrpcService}
+ */
+ public static BeamWorkerStatusGrpcService create(
+ ApiServiceDescriptor apiServiceDescriptor, HeaderAccessor
headerAccessor) {
+ return new BeamWorkerStatusGrpcService(apiServiceDescriptor,
headerAccessor);
+ }
+
+ @Override
+ public void close() throws Exception {
+ synchronized (connectedClient) {
+ for (CompletableFuture<WorkerStatusClient> clientFuture :
connectedClient.values()) {
+ if (clientFuture.isDone()) {
+ clientFuture.get().close();
+ }
+ }
+ connectedClient.clear();
+ }
+ }
+
+ @Override
+ public StreamObserver<WorkerStatusResponse> workerStatus(
+ StreamObserver<WorkerStatusRequest> requestObserver) {
+ String workerId = headerAccessor.getSdkWorkerId();
+ LOG.info("Beam Fn Status client connected with id {}", workerId);
+
+ WorkerStatusClient fnApiStatusClient =
+ WorkerStatusClient.forRequestObserver(workerId, requestObserver);
+ connectedClient.compute(
+ workerId,
+ (k, existingClientFuture) -> {
+ if (existingClientFuture != null) {
+ try {
+ if (existingClientFuture.isDone()) {
+ LOG.info(
+ "SDK Worker {} was connected to status server previously,
disconnecting old client",
+ workerId);
+ existingClientFuture.get().close();
+ } else {
+ existingClientFuture.complete(fnApiStatusClient);
+ return existingClientFuture;
+ }
+ } catch (IOException | InterruptedException | ExecutionException
e) {
+ LOG.warn("Error closing worker status client", e);
+ }
+ }
+ return CompletableFuture.completedFuture(fnApiStatusClient);
+ });
+ return fnApiStatusClient.getResponseObserver();
+ }
+
+ /**
+ * Get the latest SDK worker status from the client's corresponding SDK
harness.
+ *
+ * @param workerId worker id of the SDK harness.
+ * @return {@link CompletableFuture} of WorkerStatusResponse from SDK
harness.
+ */
+ public String getSingleWorkerStatus(String workerId, long timeout, TimeUnit
timeUnit) {
+ try {
+ return getWorkerStatus(workerId).get(timeout, timeUnit);
+ } catch (InterruptedException | ExecutionException | TimeoutException e) {
+ return handleAndReturnExceptionResponse(e);
+ }
+ }
+
+ /**
+ * Get all the statuses from all connected SDK harnesses within specified
timeout. Any errors
+ * getting status from the SDK harnesses will be returned in the map.
+ *
+ * @param timeout max time waiting for the response from each SDK harness.
+ * @param timeUnit timeout time unit.
+ * @return All the statuses in a map keyed by the SDK harness id.
+ */
+ public Map<String, String> getAllWorkerStatuses(long timeout, TimeUnit
timeUnit) {
+ // return result in worker id sorted map.
+ Map<String, String> allStatuses = new
ConcurrentSkipListMap<>(Comparator.naturalOrder());
+
+ Set<String> connectedClientIdsCopy =
ImmutableSet.copyOf(connectedClient.keySet());
Review comment:
hmm right, I hesitated on this, thanks.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
Issue Time Tracking
-------------------
Worklog Id: (was: 369497)
Time Spent: 15h 50m (was: 15h 40m)
> Implement FnService for status api in Dataflow runner
> -----------------------------------------------------
>
> Key: BEAM-8624
> URL: https://issues.apache.org/jira/browse/BEAM-8624
> Project: Beam
> Issue Type: Sub-task
> Components: runner-dataflow
> Reporter: Yichi Zhang
> Assignee: Yichi Zhang
> Priority: Major
> Time Spent: 15h 50m
> Remaining Estimate: 0h
>
--
This message was sent by Atlassian Jira
(v8.3.4#803005)