VladaZakharova commented on code in PR #32256:
URL: https://github.com/apache/airflow/pull/32256#discussion_r1288362420


##########
airflow/providers/google/cloud/hooks/dataplex.py:
##########
@@ -361,3 +374,434 @@ def get_lake(
             metadata=metadata,
         )
         return result
+
+    @GoogleBaseHook.fallback_to_default_project_id
+    def create_zone(
+        self,
+        project_id: str,
+        region: str,
+        lake_id: str,
+        zone_id: str,
+        body: dict[str, Any] | Zone,
+        retry: Retry | _MethodDefault = DEFAULT,
+        timeout: float | None = None,
+        metadata: Sequence[tuple[str, str]] = (),
+    ) -> Any:
+        """
+        Creates a zone resource within a lake.
+
+        :param project_id: Required. The ID of the Google Cloud project that 
the lake belongs to.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param lake_id: Required. The ID of the Google Cloud lake to be 
retrieved.
+        :param body: Required. The Request body contains an instance of Zone.
+        :param zone_id: Required. Zone identifier.
+        :param retry: A retry object used  to retry requests. If `None` is 
specified, requests
+            will not be retried.
+        :param timeout: The amount of time, in seconds, to wait for the 
request to complete.
+            Note that if `retry` is specified, the timeout applies to each 
individual attempt.
+        :param metadata: Additional metadata that is provided to the method.
+        """
+        client = self.get_dataplex_client()
+
+        name = f"projects/{project_id}/locations/{region}/lakes/{lake_id}"
+        result = client.create_zone(
+            request={
+                "parent": name,
+                "zone": body,
+                "zone_id": zone_id,
+            },
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return result
+
+    @GoogleBaseHook.fallback_to_default_project_id
+    def delete_zone(
+        self,
+        project_id: str,
+        region: str,
+        lake_id: str,
+        zone_id: str,
+        retry: Retry | _MethodDefault = DEFAULT,
+        timeout: float | None = None,
+        metadata: Sequence[tuple[str, str]] = (),
+    ) -> Any:
+        """
+        Deletes a zone resource. All assets within a zone must be deleted 
before the zone can be deleted.
+
+        :param project_id: Required. The ID of the Google Cloud project that 
the lake belongs to.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param lake_id: Required. The ID of the Google Cloud lake to be 
retrieved.
+        :param zone_id: Required. Zone identifier.
+        :param retry: A retry object used  to retry requests. If `None` is 
specified, requests
+            will not be retried.
+        :param timeout: The amount of time, in seconds, to wait for the 
request to complete.
+            Note that if `retry` is specified, the timeout applies to each 
individual attempt.
+        :param metadata: Additional metadata that is provided to the method.
+        """
+        client = self.get_dataplex_client()
+
+        name = 
f"projects/{project_id}/locations/{region}/lakes/{lake_id}/zones/{zone_id}"
+        operation = client.delete_zone(
+            request={"name": name},
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return operation
+
+    @GoogleBaseHook.fallback_to_default_project_id
+    def create_asset(
+        self,
+        project_id: str,
+        region: str,
+        lake_id: str,
+        zone_id: str,
+        asset_id: str,
+        body: dict[str, Any] | Asset,
+        retry: Retry | _MethodDefault = DEFAULT,
+        timeout: float | None = None,
+        metadata: Sequence[tuple[str, str]] = (),
+    ) -> Any:
+        """
+        Creates an asset resource.
+
+        :param project_id: Required. The ID of the Google Cloud project that 
the lake belongs to.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param lake_id: Required. The ID of the Google Cloud lake to be 
retrieved.
+        :param zone_id: Required. Zone identifier.
+        :param asset_id: Required. Asset identifier.
+        :param body: Required. The Request body contains an instance of Asset.
+        :param retry: A retry object used  to retry requests. If `None` is 
specified, requests
+            will not be retried.
+        :param timeout: The amount of time, in seconds, to wait for the 
request to complete.
+            Note that if `retry` is specified, the timeout applies to each 
individual attempt.
+        :param metadata: Additional metadata that is provided to the method.
+        """
+        client = self.get_dataplex_client()
+
+        name = 
f"projects/{project_id}/locations/{region}/lakes/{lake_id}/zones/{zone_id}"
+        result = client.create_asset(
+            request={
+                "parent": name,
+                "asset": body,
+                "asset_id": asset_id,
+            },
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return result
+
+    @GoogleBaseHook.fallback_to_default_project_id
+    def delete_asset(
+        self,
+        project_id: str,
+        region: str,
+        lake_id: str,
+        asset_id: str,
+        zone_id: str,
+        retry: Retry | _MethodDefault = DEFAULT,
+        timeout: float | None = None,
+        metadata: Sequence[tuple[str, str]] = (),
+    ) -> Any:
+        """
+        Deletes an asset resource.
+
+        :param project_id: Required. The ID of the Google Cloud project that 
the lake belongs to.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param lake_id: Required. The ID of the Google Cloud lake to be 
retrieved.
+        :param zone_id: Required. Zone identifier.
+        :param asset_id: Required. Asset identifier.
+        :param retry: A retry object used  to retry requests. If `None` is 
specified, requests
+            will not be retried.
+        :param timeout: The amount of time, in seconds, to wait for the 
request to complete.
+            Note that if `retry` is specified, the timeout applies to each 
individual attempt.
+        :param metadata: Additional metadata that is provided to the method.
+        """
+        client = self.get_dataplex_client()
+
+        name = 
f"projects/{project_id}/locations/{region}/lakes/{lake_id}/zones/{zone_id}/assets/{asset_id}"
+        result = client.delete_asset(
+            request={"name": name},
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return result
+
+    @GoogleBaseHook.fallback_to_default_project_id
+    def create_data_scan(
+        self,
+        project_id: str,
+        region: str,
+        body: dict[str, Any] | DataScan,
+        data_scan_id: str | None = None,
+        retry: Retry | _MethodDefault = DEFAULT,
+        timeout: float | None = None,
+        metadata: Sequence[tuple[str, str]] = (),
+    ) -> Any:
+        """
+        Creates a DataScan resource.
+
+        :param project_id: Required. The ID of the Google Cloud project that 
the lake belongs to.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param data_scan_id: Required. Data Quality scan identifier.
+        :param body: Required. The Request body contains an instance of 
DataScan.
+        :param retry: A retry object used  to retry requests. If `None` is 
specified, requests
+            will not be retried.
+        :param timeout: The amount of time, in seconds, to wait for the 
request to complete.
+            Note that if `retry` is specified, the timeout applies to each 
individual attempt.
+        :param metadata: Additional metadata that is provided to the method.
+        """
+        client = self.get_dataplex_data_scan_client()
+
+        parent = f"projects/{project_id}/locations/{region}"
+        result = client.create_data_scan(
+            request={
+                "parent": parent,
+                "data_scan": body,
+                "data_scan_id": data_scan_id,
+            },
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return result
+
+    @GoogleBaseHook.fallback_to_default_project_id
+    def run_data_scan(
+        self,
+        project_id: str,
+        region: str,
+        data_scan_id: str,
+        retry: Retry | _MethodDefault = DEFAULT,
+        timeout: float | None = None,
+        metadata: Sequence[tuple[str, str]] = (),
+    ) -> Any:
+        """
+        Runs an on-demand execution of a DataScan.
+
+        :param project_id: Required. The ID of the Google Cloud project that 
the lake belongs to.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param data_scan_id: Required. Data Quality scan identifier.
+        :param retry: A retry object used  to retry requests. If `None` is 
specified, requests
+            will not be retried.
+        :param timeout: The amount of time, in seconds, to wait for the 
request to complete.
+            Note that if `retry` is specified, the timeout applies to each 
individual attempt.
+        :param metadata: Additional metadata that is provided to the method.
+        """
+        client = self.get_dataplex_data_scan_client()
+
+        name = PATH_DATA_SCAN.format(project_id=project_id, region=region, 
data_scan_id=data_scan_id)
+        result = client.run_data_scan(
+            request={
+                "name": name,
+            },
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return result
+
+    @GoogleBaseHook.fallback_to_default_project_id
+    def get_data_scan_job(
+        self,
+        project_id: str,
+        region: str,
+        data_scan_id: str | None = None,
+        job_id: str | None = None,
+        retry: Retry | _MethodDefault = DEFAULT,
+        timeout: float | None = None,
+        metadata: Sequence[tuple[str, str]] = (),
+    ) -> Any:
+        """
+        Gets a DataScan Job resource.
+
+        :param project_id: Required. The ID of the Google Cloud project that 
the lake belongs to.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param data_scan_id: Required. Data Quality scan identifier.
+        :param job_id: Required. The resource name of the DataScanJob:
+            
projects/{project_id}/locations/{region}/dataScans/{data_scan_id}/jobs/{data_scan_job_id}
+        :param retry: A retry object used  to retry requests. If `None` is 
specified, requests
+            will not be retried.
+        :param timeout: The amount of time, in seconds, to wait for the 
request to complete.
+            Note that if `retry` is specified, the timeout applies to each 
individual attempt.
+        :param metadata: Additional metadata that is provided to the method.
+        """
+        client = self.get_dataplex_data_scan_client()
+
+        name = 
f"projects/{project_id}/locations/{region}/dataScans/{data_scan_id}/jobs/{job_id}"
+        result = client.get_data_scan_job(
+            request={"name": name, "view": "FULL"},
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return result
+
+    def wait_for_data_scan_job(
+        self,
+        data_scan_id: str,
+        fail_on_dq_failure: bool,
+        job_id: str | None = None,
+        fail_on_job_failure: bool = True,
+        project_id: str | None = None,
+        region: str | None = None,
+        wait_time: int = 10,
+        timeout: float | None = None,
+        fail_on_timeout: bool = True,
+    ) -> None:
+        """
+        Wait for Dataplex data scan job.
+
+        :param job_id: Required. The job_id to wait for.
+        :param data_scan_id: Required. Data Quality scan identifier.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param project_id: Optional. Google Cloud project ID.
+        :param wait_time: Number of seconds between checks.
+        :param timeout: The amount of time, in seconds, to wait for an 
execution job the result.
+        :param fail_on_dq_failure: If set to true and not all Data Quality 
scan rules have been passed,
+            an exception is thrown. If set to false and not all Data Quality 
scan rules have been passed,
+            execution will finish with success.
+        :param fail_on_job_failure: If set to true and job fails, an exception 
is thrown.
+        :param fail_on_timeout: If set to true and timeout, an exception is 
thrown.
+        """
+        start = time.monotonic()
+        state = None
+        while state not in (
+            DataScanJob.State.CANCELLED,
+            DataScanJob.State.FAILED,
+            DataScanJob.State.SUCCEEDED,
+        ):
+            if timeout and start + timeout < time.monotonic():
+                if fail_on_timeout:
+                    raise AirflowException(
+                        f"Timeout: data quality scan {job_id} is not ready 
after {timeout}s"
+                    )
+                break
+            time.sleep(wait_time)
+            try:
+                job = self.get_data_scan_job(
+                    job_id=job_id,
+                    data_scan_id=data_scan_id,
+                    project_id=project_id,
+                    region=region,
+                )
+                state = job.state
+            except Exception as err:
+                self.log.info("Retrying. Dataplex API returned error when 
waiting for job: %s", err)
+
+            if fail_on_dq_failure:
+                if state == DataScanJob.State.SUCCEEDED and not 
job.data_quality_result.passed:
+                    raise AirflowException(f"Data Quality scan failed: 
{data_scan_id}")
+            if state == DataScanJob.State.FAILED:
+                if fail_on_job_failure:
+                    raise AirflowException(f"Job failed:\n{job_id}")
+                break
+            if state == DataScanJob.State.CANCELLED:
+                raise AirflowException(f"Job was cancelled:\n{job_id}")

Review Comment:
   got it, thanks!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to