y1chi commented on code in PR #26085:
URL: https://github.com/apache/beam/pull/26085#discussion_r1222094540


##########
runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/GrpcWindmillServer.java:
##########
@@ -862,6 +885,110 @@ public final Instant startTime() {
     }
   }
 
+  static class GetWorkTimingInfosTracker {
+    private final Map<State, Duration> getWorkStreamLatencies;
+    private Instant workItemCreationStartTime = 
Instant.ofEpochMilli(Long.MAX_VALUE);
+    private Instant workItemLastChunkReceivedByWorkerTime = Instant.EPOCH;
+
+    public GetWorkTimingInfosTracker() {
+      this.getWorkStreamLatencies = new EnumMap<>(State.class);
+    }
+
+    public void addTimingInfo(Collection<GetWorkStreamTimingInfo> infos) {
+      // We want to record duration for each stage and also be reflective on 
total work item
+      // processing time. It can be tricky because timings of different
+      // StreamingGetWorkResponseChunks can be interleaved. Current strategy 
is to record the
+      // sum duration in each stage across different chunks, then divide the 
total duration (start
+      // from the first chunk creation in the windmill worker to the end of 
last chunk reception by
+      // the user worker) proportionally according the sum duration values 
across the many stages.
+      // This should allow us to identify the slow stage meanwhile avoid 
confusions for comparing
+      // the stage duration to the total processing elapsed wall time.
+      Map<Event, Instant> getWorkStreamTimings = new HashMap<>();
+      for (GetWorkStreamTimingInfo info : infos) {
+        getWorkStreamTimings.putIfAbsent(
+            info.getEvent(), Instant.ofEpochMilli(info.getTimestampUsec() / 
1000));
+      }
+
+      for (Cell<Event, Event, State> cell : EVENT_STATE_TABLE.cellSet()) {
+        Event start = cell.getRowKey();
+        Event end = cell.getColumnKey();
+        State state = cell.getValue();
+        Instant startTiming = getWorkStreamTimings.get(start);
+        Instant endTiming = getWorkStreamTimings.get(end);
+        if (startTiming != null && endTiming != null) {
+          getWorkStreamLatencies.compute(
+              state,
+              (state_key, duration) -> {
+                Duration newDuration = new Duration(startTiming, endTiming);
+                if (duration == null) {
+                  return newDuration;
+                }
+                return duration.plus(newDuration);
+              });
+        }
+      }
+      Instant getWorkCreationStartTime = 
getWorkStreamTimings.get(Event.GET_WORK_CREATION_START);
+      if (getWorkCreationStartTime != null
+          && getWorkCreationStartTime.isBefore(workItemCreationStartTime)) {
+        workItemCreationStartTime = getWorkCreationStartTime;
+      }
+      Instant receivedByDispatcherTiming =
+          getWorkStreamTimings.get(Event.GET_WORK_RECEIVED_BY_DISPATCHER);
+      Instant now = Instant.now();
+      if (receivedByDispatcherTiming != null) {
+        getWorkStreamLatencies.compute(
+            State.GET_WORK_IN_TRANSIT_TO_USER_WORKER,
+            (state_key, duration) -> {
+              Duration newDuration = new Duration(receivedByDispatcherTiming, 
now);
+              if (duration == null) {
+                return newDuration;
+              }
+              return duration.plus(newDuration);
+            });
+      }
+      workItemLastChunkReceivedByWorkerTime = now;
+    }
+
+    List<LatencyAttribution> getLatencyAttributions() {
+      if (getWorkStreamLatencies.size() == 0) {
+        return new ArrayList<>();

Review Comment:
   Done.



##########
runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/GrpcWindmillServer.java:
##########
@@ -862,6 +885,110 @@ public final Instant startTime() {
     }
   }
 
+  static class GetWorkTimingInfosTracker {

Review Comment:
   Done.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to