rohdesamuel commented on code in PR #25065:
URL: https://github.com/apache/beam/pull/25065#discussion_r1085673374
##########
model/fn-execution/src/main/proto/org/apache/beam/model/fn_execution/v1/beam_fn_api.proto:
##########
@@ -137,12 +138,79 @@ message InstructionResponse {
FinalizeBundleResponse finalize_bundle = 1004;
MonitoringInfosMetadataResponse monitoring_infos = 1005;
HarnessMonitoringInfosResponse harness_monitoring_infos = 1006;
+ SampleDataResponse sample = 1007;
// DEPRECATED
RegisterResponse register = 1000;
}
}
+// If supported, the `SampleDataRequest` will respond with a
+// `SampleDataResponse`. The SDK being queried must have the
+// "beam:protocol:data_sampling:v1" capability. The samples in the
+// `SampleDataResponse` will be filtered by both being in the
+// `process_bundle_descriptor_ids` AND the `pcollection_ids`. An empty list can
+// match anything.
+message SampleDataRequest {
+ // (Optional) The ProcessBundleDescriptor ids to filter for.
+ repeated string process_bundle_descriptor_ids = 1;
+
+ // (Optional) The PCollection ids to filter for.
+ repeated string pcollection_ids = 2;
+}
+
+
+// A element sampled when the SDK is processing a bundle. This is a proto
+// message to allow for additional per-element metadata.
+message SampledElement {
+ // Required. Sampled raw bytes for an element. This is a
+ // single encoded element in the nested context.
+ bytes element = 1;
+
+ // FUTURE WORK: Capture lull detections and exceptions.
+ //
+ // Optional. Present if there was an exception
+ // processing the above element.
+ //
+ // LogEntry exception_entry = 2;
+}
+
+// If supported, the `SampleDataResponse` will contain samples from all
+// ProcessBundleDescriptors.
+message SampleDataResponse {
+ message ElementList {
+ // Required. The individual elements sampled from a PCollection.
+ repeated SampledElement elements = 1;
+ }
+
+ // Map from PCollection id to sample elements.
+ map<string, ElementList> element_samples = 1;
+
+ // FUTURE WORK: Investigate ways of storing multiple interesting types of
+ // sampled elements. There are two ways of accomplishing this:
+ // 1) Maps of typed elements: include multiple maps here with typed element
+ // proto messages, ex.
+ //
+ // message SlowElement {...}
+ // message ErroredElement {...}
+ // map<string, SlowElement> slow_elements
+ // map<string, ErroredElement> errored_elements
+ //
+ // However, this forces an element into a single category. It disallows
+ // classification across multiple characteristics (like a slow and errored
+ // element).
+ //
+ // 2) Compositional types: allow for Protobuf Extensions on the base
Review Comment:
Gotcha, didn't know that this was discouraged. Changed to URN + payload.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]