bkossakowska commented on code in PR #32256:
URL: https://github.com/apache/airflow/pull/32256#discussion_r1290960888


##########
airflow/providers/google/cloud/hooks/dataplex.py:
##########
@@ -361,3 +390,470 @@ def get_lake(
             metadata=metadata,
         )
         return result
+
+    @GoogleBaseHook.fallback_to_default_project_id
+    def create_zone(
+        self,
+        project_id: str,
+        region: str,
+        lake_id: str,
+        zone_id: str,
+        body: dict[str, Any] | Zone,
+        retry: Retry | _MethodDefault = DEFAULT,
+        timeout: float | None = None,
+        metadata: Sequence[tuple[str, str]] = (),
+    ) -> Any:
+        """
+        Creates a zone resource within a lake.
+
+        :param project_id: Required. The ID of the Google Cloud project that 
the lake belongs to.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param lake_id: Required. The ID of the Google Cloud lake to be 
retrieved.
+        :param body: Required. The Request body contains an instance of Zone.
+        :param zone_id: Required. Zone identifier.
+        :param retry: A retry object used  to retry requests. If `None` is 
specified, requests
+            will not be retried.
+        :param timeout: The amount of time, in seconds, to wait for the 
request to complete.
+            Note that if `retry` is specified, the timeout applies to each 
individual attempt.
+        :param metadata: Additional metadata that is provided to the method.
+        """
+        client = self.get_dataplex_client()
+
+        name = f"projects/{project_id}/locations/{region}/lakes/{lake_id}"
+        result = client.create_zone(
+            request={
+                "parent": name,
+                "zone": body,
+                "zone_id": zone_id,
+            },
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return result
+
+    @GoogleBaseHook.fallback_to_default_project_id
+    def delete_zone(
+        self,
+        project_id: str,
+        region: str,
+        lake_id: str,
+        zone_id: str,
+        retry: Retry | _MethodDefault = DEFAULT,
+        timeout: float | None = None,
+        metadata: Sequence[tuple[str, str]] = (),
+    ) -> Any:
+        """
+        Deletes a zone resource. All assets within a zone must be deleted 
before the zone can be deleted.
+
+        :param project_id: Required. The ID of the Google Cloud project that 
the lake belongs to.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param lake_id: Required. The ID of the Google Cloud lake to be 
retrieved.
+        :param zone_id: Required. Zone identifier.
+        :param retry: A retry object used  to retry requests. If `None` is 
specified, requests
+            will not be retried.
+        :param timeout: The amount of time, in seconds, to wait for the 
request to complete.
+            Note that if `retry` is specified, the timeout applies to each 
individual attempt.
+        :param metadata: Additional metadata that is provided to the method.
+        """
+        client = self.get_dataplex_client()
+
+        name = 
f"projects/{project_id}/locations/{region}/lakes/{lake_id}/zones/{zone_id}"
+        operation = client.delete_zone(
+            request={"name": name},
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return operation
+
+    @GoogleBaseHook.fallback_to_default_project_id
+    def create_asset(
+        self,
+        project_id: str,
+        region: str,
+        lake_id: str,
+        zone_id: str,
+        asset_id: str,
+        body: dict[str, Any] | Asset,
+        retry: Retry | _MethodDefault = DEFAULT,
+        timeout: float | None = None,
+        metadata: Sequence[tuple[str, str]] = (),
+    ) -> Any:
+        """
+        Creates an asset resource.
+
+        :param project_id: Required. The ID of the Google Cloud project that 
the lake belongs to.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param lake_id: Required. The ID of the Google Cloud lake to be 
retrieved.
+        :param zone_id: Required. Zone identifier.
+        :param asset_id: Required. Asset identifier.
+        :param body: Required. The Request body contains an instance of Asset.
+        :param retry: A retry object used  to retry requests. If `None` is 
specified, requests
+            will not be retried.
+        :param timeout: The amount of time, in seconds, to wait for the 
request to complete.
+            Note that if `retry` is specified, the timeout applies to each 
individual attempt.
+        :param metadata: Additional metadata that is provided to the method.
+        """
+        client = self.get_dataplex_client()
+
+        name = 
f"projects/{project_id}/locations/{region}/lakes/{lake_id}/zones/{zone_id}"
+        result = client.create_asset(
+            request={
+                "parent": name,
+                "asset": body,
+                "asset_id": asset_id,
+            },
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return result
+
+    @GoogleBaseHook.fallback_to_default_project_id
+    def delete_asset(
+        self,
+        project_id: str,
+        region: str,
+        lake_id: str,
+        asset_id: str,
+        zone_id: str,
+        retry: Retry | _MethodDefault = DEFAULT,
+        timeout: float | None = None,
+        metadata: Sequence[tuple[str, str]] = (),
+    ) -> Any:
+        """
+        Deletes an asset resource.
+
+        :param project_id: Required. The ID of the Google Cloud project that 
the lake belongs to.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param lake_id: Required. The ID of the Google Cloud lake to be 
retrieved.
+        :param zone_id: Required. Zone identifier.
+        :param asset_id: Required. Asset identifier.
+        :param retry: A retry object used  to retry requests. If `None` is 
specified, requests
+            will not be retried.
+        :param timeout: The amount of time, in seconds, to wait for the 
request to complete.
+            Note that if `retry` is specified, the timeout applies to each 
individual attempt.
+        :param metadata: Additional metadata that is provided to the method.
+        """
+        client = self.get_dataplex_client()
+
+        name = 
f"projects/{project_id}/locations/{region}/lakes/{lake_id}/zones/{zone_id}/assets/{asset_id}"
+        result = client.delete_asset(
+            request={"name": name},
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return result
+
+    @GoogleBaseHook.fallback_to_default_project_id
+    def create_data_scan(
+        self,
+        project_id: str,
+        region: str,
+        body: dict[str, Any] | DataScan,
+        data_scan_id: str | None = None,
+        retry: Retry | _MethodDefault = DEFAULT,
+        timeout: float | None = None,
+        metadata: Sequence[tuple[str, str]] = (),
+    ) -> Any:
+        """
+        Creates a DataScan resource.
+
+        :param project_id: Required. The ID of the Google Cloud project that 
the lake belongs to.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param data_scan_id: Required. Data Quality scan identifier.
+        :param body: Required. The Request body contains an instance of 
DataScan.
+        :param retry: A retry object used  to retry requests. If `None` is 
specified, requests
+            will not be retried.
+        :param timeout: The amount of time, in seconds, to wait for the 
request to complete.
+            Note that if `retry` is specified, the timeout applies to each 
individual attempt.
+        :param metadata: Additional metadata that is provided to the method.
+        """
+        client = self.get_dataplex_data_scan_client()
+
+        parent = f"projects/{project_id}/locations/{region}"
+        result = client.create_data_scan(
+            request={
+                "parent": parent,
+                "data_scan": body,
+                "data_scan_id": data_scan_id,
+            },
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return result
+
+    @GoogleBaseHook.fallback_to_default_project_id
+    def run_data_scan(
+        self,
+        project_id: str,
+        region: str,
+        data_scan_id: str,
+        retry: Retry | _MethodDefault = DEFAULT,
+        timeout: float | None = None,
+        metadata: Sequence[tuple[str, str]] = (),
+    ) -> Any:
+        """
+        Runs an on-demand execution of a DataScan.
+
+        :param project_id: Required. The ID of the Google Cloud project that 
the lake belongs to.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param data_scan_id: Required. Data Quality scan identifier.
+        :param retry: A retry object used  to retry requests. If `None` is 
specified, requests
+            will not be retried.
+        :param timeout: The amount of time, in seconds, to wait for the 
request to complete.
+            Note that if `retry` is specified, the timeout applies to each 
individual attempt.
+        :param metadata: Additional metadata that is provided to the method.
+        """
+        client = self.get_dataplex_data_scan_client()
+
+        name = PATH_DATA_SCAN.format(project_id=project_id, region=region, 
data_scan_id=data_scan_id)
+        result = client.run_data_scan(
+            request={
+                "name": name,
+            },
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return result
+
+    @GoogleBaseHook.fallback_to_default_project_id
+    def get_data_scan_job(
+        self,
+        project_id: str,
+        region: str,
+        data_scan_id: str | None = None,
+        job_id: str | None = None,
+        retry: Retry | _MethodDefault = DEFAULT,
+        timeout: float | None = None,
+        metadata: Sequence[tuple[str, str]] = (),
+    ) -> Any:
+        """
+        Gets a DataScan Job resource.
+
+        :param project_id: Required. The ID of the Google Cloud project that 
the lake belongs to.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param data_scan_id: Required. Data Quality scan identifier.
+        :param job_id: Required. The resource name of the DataScanJob:
+            
projects/{project_id}/locations/{region}/dataScans/{data_scan_id}/jobs/{data_scan_job_id}
+        :param retry: A retry object used  to retry requests. If `None` is 
specified, requests
+            will not be retried.
+        :param timeout: The amount of time, in seconds, to wait for the 
request to complete.
+            Note that if `retry` is specified, the timeout applies to each 
individual attempt.
+        :param metadata: Additional metadata that is provided to the method.
+        """
+        client = self.get_dataplex_data_scan_client()
+
+        name = 
f"projects/{project_id}/locations/{region}/dataScans/{data_scan_id}/jobs/{job_id}"
+        result = client.get_data_scan_job(
+            request={"name": name, "view": "FULL"},
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return result
+
+    def wait_for_data_scan_job(
+        self,
+        data_scan_id: str,
+        job_id: str | None = None,
+        project_id: str | None = None,
+        region: str | None = None,
+        wait_time: int = 10,
+        result_timeout: float | None = None,
+    ) -> Any:
+        """
+        Wait for Dataplex data scan job.
+
+        :param job_id: Required. The job_id to wait for.
+        :param data_scan_id: Required. Data Quality scan identifier.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param project_id: Optional. Google Cloud project ID.
+        :param wait_time: Number of seconds between checks.
+        :param result_timeout: Value in seconds for which operator will wait 
for the Data Quality scan result.
+            Throws exception if there is no result found after specified 
amount of seconds.
+        """
+        start = time.monotonic()
+        state = None
+        while state not in (
+            DataScanJob.State.CANCELLED,
+            DataScanJob.State.FAILED,
+            DataScanJob.State.SUCCEEDED,
+        ):
+            if result_timeout and start + result_timeout < time.monotonic():
+                raise AirflowDataQualityScanResultTimeoutException(
+                    f"Timeout: Data Quality scan {job_id} is not ready after 
{result_timeout}s"
+                )
+            time.sleep(wait_time)
+            try:
+                job = self.get_data_scan_job(
+                    job_id=job_id,
+                    data_scan_id=data_scan_id,
+                    project_id=project_id,
+                    region=region,
+                )
+                state = job.state
+            except Exception as err:
+                self.log.info("Retrying. Dataplex API returned error when 
waiting for job: %s", err)
+        return job
+
+    @GoogleBaseHook.fallback_to_default_project_id
+    def get_data_scan(
+        self,
+        project_id: str,
+        region: str,
+        data_scan_id: str,
+        retry: Retry | _MethodDefault = DEFAULT,
+        timeout: float | None = None,
+        metadata: Sequence[tuple[str, str]] = (),
+    ) -> Any:
+        """
+        Gets a DataScan resource.
+
+        :param project_id: Required. The ID of the Google Cloud project that 
the lake belongs to.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param data_scan_id: Required. Data Quality scan identifier.
+        :param retry: A retry object used  to retry requests. If `None` is 
specified, requests
+            will not be retried.
+        :param timeout: The amount of time, in seconds, to wait for the 
request to complete.
+            Note that if `retry` is specified, the timeout applies to each 
individual attempt.
+        :param metadata: Additional metadata that is provided to the method.
+        """
+        client = self.get_dataplex_data_scan_client()
+
+        name = PATH_DATA_SCAN.format(project_id=project_id, region=region, 
data_scan_id=data_scan_id)
+        result = client.get_data_scan(
+            request={"name": name, "view": "FULL"},
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return result
+
+    @GoogleBaseHook.fallback_to_default_project_id
+    def update_data_scan(
+        self,
+        project_id: str,
+        region: str,
+        data_scan_id: str,
+        body: dict[str, Any] | DataScan,
+        update_mask: dict | FieldMask | None = None,
+        retry: Retry | _MethodDefault = DEFAULT,
+        timeout: float | None = None,
+        metadata: Sequence[tuple[str, str]] = (),
+    ) -> Any:
+        """
+        Updates a DataScan resource.
+
+        :param project_id: Required. The ID of the Google Cloud project that 
the lake belongs to.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param data_scan_id: Required. Data Quality scan identifier.
+        :param body: Required. The Request body contains an instance of 
DataScan.
+        :param update_mask: Required. Mask of fields to update.
+        :param retry: A retry object used  to retry requests. If `None` is 
specified, requests
+            will not be retried.
+        :param timeout: The amount of time, in seconds, to wait for the 
request to complete.
+            Note that if `retry` is specified, the timeout applies to each 
individual attempt.
+        :param metadata: Additional metadata that is provided to the method.
+        """
+        client = self.get_dataplex_data_scan_client()
+
+        full_scan_name = 
f"projects/{project_id}/locations/{region}/dataScans/{data_scan_id}"
+
+        if body:
+            if isinstance(body, DataScan):
+                body.name = full_scan_name
+            elif isinstance(body, dict):
+                body["name"] = full_scan_name
+            else:
+                raise AirflowException("Unable to set scan_name.")
+
+        if not update_mask:
+            update_mask = FieldMask(
+                paths=["data", "data_quality_spec", "labels", "description", 
"displayName", "executionSpec"]

Review Comment:
   changed, 
   thanks for catching that



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to