lukecwik commented on code in PR #25065:
URL: https://github.com/apache/beam/pull/25065#discussion_r1085726016


##########
model/fn-execution/src/main/proto/org/apache/beam/model/fn_execution/v1/beam_fn_api.proto:
##########
@@ -137,12 +138,78 @@ message InstructionResponse {
     FinalizeBundleResponse finalize_bundle = 1004;
     MonitoringInfosMetadataResponse monitoring_infos = 1005;
     HarnessMonitoringInfosResponse harness_monitoring_infos = 1006;
+    SampleDataResponse sample = 1007;
 
     // DEPRECATED
     RegisterResponse register = 1000;
   }
 }
 
+// If supported, the `SampleDataRequest` will respond with a
+// `SampleDataResponse`. The SDK being queried must have the
+// "beam:protocol:data_sampling:v1" capability. Samples are taken from all
+// given ProcessBundleDescriptor ids and filtered on PCollection ids. An empty
+// list will match everything.

Review Comment:
   ```suggestion
   // If supported, the `SampleDataRequest` will respond with a
   // `SampleDataResponse`. The SDK being queried must have the
   // "beam:protocol:data_sampling:v1" capability. Samples are taken from all
   // specified ProcessBundleDescriptor ids and all specified PCollection ids 
(equivalent to OR clause). An empty
   // list will match everything.
   ```



##########
model/fn-execution/src/main/proto/org/apache/beam/model/fn_execution/v1/beam_fn_api.proto:
##########
@@ -137,12 +138,78 @@ message InstructionResponse {
     FinalizeBundleResponse finalize_bundle = 1004;
     MonitoringInfosMetadataResponse monitoring_infos = 1005;
     HarnessMonitoringInfosResponse harness_monitoring_infos = 1006;
+    SampleDataResponse sample = 1007;
 
     // DEPRECATED
     RegisterResponse register = 1000;
   }
 }
 
+// If supported, the `SampleDataRequest` will respond with a
+// `SampleDataResponse`. The SDK being queried must have the
+// "beam:protocol:data_sampling:v1" capability. Samples are taken from all
+// given ProcessBundleDescriptor ids and filtered on PCollection ids. An empty
+// list will match everything.
+message SampleDataRequest {
+  // (Optional) The ProcessBundleDescriptor ids to filter for.
+  repeated string process_bundle_descriptor_ids = 1;
+
+  // (Optional) The PCollection ids to filter for.
+  repeated string pcollection_ids = 2;
+}
+
+
+// An element sampled when the SDK is processing a bundle. This is a proto
+// message to allow for additional per-element metadata.
+message SampledElement {
+  // Required. Sampled raw bytes for an element. This is a
+  // single encoded element in the nested context.
+  bytes element = 1;
+
+  // FUTURE WORK: Capture lull detections and exceptions.
+  //
+  // Optional. Present if there was an exception
+  // processing the above element.
+  //
+  // LogEntry exception_entry = 2;
+}
+
+// If supported, the `SampleDataResponse` will contain samples from 
PCollections
+// based upon the filters specified in the request.
+message SampleDataResponse {
+  message ElementList {
+    // Required. The individual elements sampled from a PCollection.
+    repeated SampledElement elements = 1;
+  }
+
+  // Map from PCollection id to sample elements.
+  map<string, ElementList> element_samples = 1;
+
+  // FUTURE WORK: Investigate ways of storing multiple interesting types of
+  // sampled elements. There are two ways of accomplishing this:
+  // 1) Maps of typed elements: include multiple maps here with typed element
+  // proto messages, ex.
+  //
+  // message SlowElement {...}
+  // message ErroredElement {...}
+  // map<string, SlowElement> slow_elements
+  // map<string, ErroredElement> errored_elements
+  //
+  // However, this forces an element into a single category. It disallows
+  // classification across multiple characteristics (like a slow and errored
+  // element).
+  //
+  // 2) Compositional types: allow for URN and payloads on the base
+  // SampledElement message for interesting characteristics. The base class
+  // can then be queried for specific URNs. This allows for multiple
+  // attributes on the same element.
+  //
+  // message SlowElement {...}
+  // extend SampledElement { optional SlowElement slow_element = ... }

Review Comment:
   ```suggestion
   ```



##########
model/fn-execution/src/main/proto/org/apache/beam/model/fn_execution/v1/beam_fn_api.proto:
##########
@@ -137,12 +138,78 @@ message InstructionResponse {
     FinalizeBundleResponse finalize_bundle = 1004;
     MonitoringInfosMetadataResponse monitoring_infos = 1005;
     HarnessMonitoringInfosResponse harness_monitoring_infos = 1006;
+    SampleDataResponse sample = 1007;
 
     // DEPRECATED
     RegisterResponse register = 1000;
   }
 }
 
+// If supported, the `SampleDataRequest` will respond with a
+// `SampleDataResponse`. The SDK being queried must have the
+// "beam:protocol:data_sampling:v1" capability. Samples are taken from all
+// given ProcessBundleDescriptor ids and filtered on PCollection ids. An empty
+// list will match everything.
+message SampleDataRequest {
+  // (Optional) The ProcessBundleDescriptor ids to filter for.
+  repeated string process_bundle_descriptor_ids = 1;
+
+  // (Optional) The PCollection ids to filter for.
+  repeated string pcollection_ids = 2;
+}
+
+
+// An element sampled when the SDK is processing a bundle. This is a proto
+// message to allow for additional per-element metadata.
+message SampledElement {
+  // Required. Sampled raw bytes for an element. This is a
+  // single encoded element in the nested context.
+  bytes element = 1;
+
+  // FUTURE WORK: Capture lull detections and exceptions.
+  //
+  // Optional. Present if there was an exception
+  // processing the above element.
+  //
+  // LogEntry exception_entry = 2;
+}
+
+// If supported, the `SampleDataResponse` will contain samples from 
PCollections
+// based upon the filters specified in the request.
+message SampleDataResponse {
+  message ElementList {
+    // Required. The individual elements sampled from a PCollection.
+    repeated SampledElement elements = 1;
+  }
+
+  // Map from PCollection id to sample elements.

Review Comment:
   ```suggestion
     // Map from PCollection id to sampled elements.
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to