This is an automated email from the ASF dual-hosted git repository.
abhijain pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git
The following commit(s) were added to refs/heads/master by this push:
new b535daf554 Add data quality field in GaaSJobObservabilityEventSchema
(#4114)
b535daf554 is described below
commit b535daf55455cd0260d5af06fd21462046bb1727
Author: vsinghal85 <[email protected]>
AuthorDate: Fri Apr 25 15:00:56 2025 +0530
Add data quality field in GaaSJobObservabilityEventSchema (#4114)
* Add data quality field in GaaSJobObservabilityEventSchema
---------
Co-authored-by: Vaibhav Singhal <[email protected]>
---
.../gobblin/qualitychecker/task/TaskLevelPolicyChecker.java | 10 ++++++++++
.../src/main/avro/GaaSJobObservabilityEvent.avsc | 6 ++++++
.../java/org/apache/gobblin/runtime/DatasetTaskSummary.java | 4 +++-
3 files changed, 19 insertions(+), 1 deletion(-)
diff --git
a/gobblin-core/src/main/java/org/apache/gobblin/qualitychecker/task/TaskLevelPolicyChecker.java
b/gobblin-core/src/main/java/org/apache/gobblin/qualitychecker/task/TaskLevelPolicyChecker.java
index 0a0001c30a..909248a4df 100644
---
a/gobblin-core/src/main/java/org/apache/gobblin/qualitychecker/task/TaskLevelPolicyChecker.java
+++
b/gobblin-core/src/main/java/org/apache/gobblin/qualitychecker/task/TaskLevelPolicyChecker.java
@@ -29,6 +29,16 @@ import org.slf4j.LoggerFactory;
* in a PolicyCheckResults object
*/
public class TaskLevelPolicyChecker {
+ /**
+ * An enumeration for possible statuses for Data quality checks,
+ * its values will be PASSED, FAILED, in case if data quality check
+ * evaluation is not performed for Job, it will be NOT_EVALUATED
+ */
+ public enum DataQualityStatus {
+ PASSED,
+ FAILED,
+ NOT_EVALUATED
+ }
private final List<TaskLevelPolicy> list;
private static final Logger LOG =
LoggerFactory.getLogger(TaskLevelPolicyChecker.class);
diff --git
a/gobblin-metrics-libs/gobblin-metrics-base/src/main/avro/GaaSJobObservabilityEvent.avsc
b/gobblin-metrics-libs/gobblin-metrics-base/src/main/avro/GaaSJobObservabilityEvent.avsc
index 1f3bf0a096..53671f8417 100644
---
a/gobblin-metrics-libs/gobblin-metrics-base/src/main/avro/GaaSJobObservabilityEvent.avsc
+++
b/gobblin-metrics-libs/gobblin-metrics-base/src/main/avro/GaaSJobObservabilityEvent.avsc
@@ -258,6 +258,12 @@
"name": "successfullyCommitted",
"type": "boolean",
"doc": "Whether the dataset was successfully committed by
Gobblin and fully successful, versus when users configure partial failures or
non-atomic writes"
+ },
+ {
+ "name": "dataQualityStatus",
+ "type": ["null","string"],
+ "doc": "Whether the dataset passed the overall data quality
check",
+ "default": null
}
]
}
diff --git
a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/DatasetTaskSummary.java
b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/DatasetTaskSummary.java
index 6c513d4b9a..1aebbc2f86 100644
---
a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/DatasetTaskSummary.java
+++
b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/DatasetTaskSummary.java
@@ -25,6 +25,7 @@ import lombok.RequiredArgsConstructor;
import lombok.Setter;
import org.apache.gobblin.metrics.DatasetMetric;
+import org.apache.gobblin.qualitychecker.task.TaskLevelPolicyChecker;
/**
@@ -45,6 +46,7 @@ public class DatasetTaskSummary {
* Convert a {@link DatasetTaskSummary} to a {@link DatasetMetric}.
*/
public static DatasetMetric toDatasetMetric(DatasetTaskSummary
datasetTaskSummary) {
- return new DatasetMetric(datasetTaskSummary.getDatasetUrn(),
datasetTaskSummary.getBytesWritten(), datasetTaskSummary.getRecordsWritten(),
datasetTaskSummary.isSuccessfullyCommitted());
+ return new DatasetMetric(datasetTaskSummary.getDatasetUrn(),
datasetTaskSummary.getBytesWritten(), datasetTaskSummary.getRecordsWritten(),
datasetTaskSummary.isSuccessfullyCommitted(),
+ TaskLevelPolicyChecker.DataQualityStatus.NOT_EVALUATED.name());
}
}