bkossakowska commented on code in PR #32256:
URL: https://github.com/apache/airflow/pull/32256#discussion_r1277649716


##########
airflow/providers/google/cloud/triggers/dataplex.py:
##########
@@ -0,0 +1,104 @@
+#

Review Comment:
   deleted



##########
airflow/providers/google/cloud/hooks/dataplex.py:
##########
@@ -361,3 +374,467 @@ def get_lake(
             metadata=metadata,
         )
         return result
+
+    @GoogleBaseHook.fallback_to_default_project_id
+    def create_zone(
+        self,
+        project_id: str,
+        region: str,
+        lake_id: str,
+        zone_id: str,
+        zone: dict[str, Any] | Zone,
+        retry: Retry | _MethodDefault = DEFAULT,
+        timeout: float | None = None,
+        metadata: Sequence[tuple[str, str]] = (),
+    ) -> Any:
+        """
+        Creates a zone resource within a lake.
+
+        :param project_id: Required. The ID of the Google Cloud project that 
the lake belongs to.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param lake_id: Required. The ID of the Google Cloud lake to be 
retrieved.
+        :param zone: Required. Zone resource.
+        :param zone_id: Required. Zone identifier.
+        :param retry: A retry object used  to retry requests. If `None` is 
specified, requests
+            will not be retried.
+        :param timeout: The amount of time, in seconds, to wait for the 
request to complete.
+            Note that if `retry` is specified, the timeout applies to each 
individual attempt.
+        :param metadata: Additional metadata that is provided to the method.
+        """
+        client = self.get_dataplex_client()
+
+        name = f"projects/{project_id}/locations/{region}/lakes/{lake_id}"
+        result = client.create_zone(
+            request={
+                "parent": name,
+                "zone": zone,
+                "zone_id": zone_id,
+            },
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return result
+
+    @GoogleBaseHook.fallback_to_default_project_id
+    def delete_zone(
+        self,
+        project_id: str,
+        region: str,
+        lake_id: str,
+        zone_id: str,
+        retry: Retry | _MethodDefault = DEFAULT,
+        timeout: float | None = None,
+        metadata: Sequence[tuple[str, str]] = (),
+    ) -> Any:
+        """
+        Deletes a zone resource. All assets within a zone must be deleted 
before the zone can be deleted.
+
+        :param project_id: Required. The ID of the Google Cloud project that 
the lake belongs to.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param lake_id: Required. The ID of the Google Cloud lake to be 
retrieved.
+        :param zone_id: Required. Zone identifier.
+        :param retry: A retry object used  to retry requests. If `None` is 
specified, requests
+            will not be retried.
+        :param timeout: The amount of time, in seconds, to wait for the 
request to complete.
+            Note that if `retry` is specified, the timeout applies to each 
individual attempt.
+        :param metadata: Additional metadata that is provided to the method.
+        """
+        client = self.get_dataplex_client()
+
+        name = 
f"projects/{project_id}/locations/{region}/lakes/{lake_id}/zones/{zone_id}"
+        operation = client.delete_zone(
+            request={"name": name},
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return operation
+
+    @GoogleBaseHook.fallback_to_default_project_id
+    def create_asset(
+        self,
+        project_id: str,
+        region: str,
+        lake_id: str,
+        zone_id: str,
+        asset_id: str,
+        asset: dict[str, Any] | Zone,
+        retry: Retry | _MethodDefault = DEFAULT,
+        timeout: float | None = None,
+        metadata: Sequence[tuple[str, str]] = (),
+    ) -> Any:
+        """
+        Creates an asset resource.
+
+        :param project_id: Required. The ID of the Google Cloud project that 
the lake belongs to.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param lake_id: Required. The ID of the Google Cloud lake to be 
retrieved.
+        :param zone_id: Required. Zone identifier.
+        :param asset_id: Required. Asset identifier.
+        :param asset: Required. Asset resource.
+        :param retry: A retry object used  to retry requests. If `None` is 
specified, requests
+            will not be retried.
+        :param timeout: The amount of time, in seconds, to wait for the 
request to complete.
+            Note that if `retry` is specified, the timeout applies to each 
individual attempt.
+        :param metadata: Additional metadata that is provided to the method.
+        """
+        client = self.get_dataplex_client()
+
+        name = 
f"projects/{project_id}/locations/{region}/lakes/{lake_id}/zones/{zone_id}"
+        result = client.create_asset(
+            request={
+                "parent": name,
+                "asset": asset,
+                "asset_id": asset_id,
+            },
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return result
+
+    @GoogleBaseHook.fallback_to_default_project_id
+    def delete_asset(
+        self,
+        project_id: str,
+        region: str,
+        lake_id: str,
+        asset_id: str,
+        zone_id: str,
+        retry: Retry | _MethodDefault = DEFAULT,
+        timeout: float | None = None,
+        metadata: Sequence[tuple[str, str]] = (),
+    ) -> Any:
+        """
+        Deletes an asset resource.
+
+        :param project_id: Required. The ID of the Google Cloud project that 
the lake belongs to.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param lake_id: Required. The ID of the Google Cloud lake to be 
retrieved.
+        :param zone_id: Required. Zone identifier.
+        :param asset_id: Required. Asset identifier.
+        :param retry: A retry object used  to retry requests. If `None` is 
specified, requests
+            will not be retried.
+        :param timeout: The amount of time, in seconds, to wait for the 
request to complete.
+            Note that if `retry` is specified, the timeout applies to each 
individual attempt.
+        :param metadata: Additional metadata that is provided to the method.
+        """
+        client = self.get_dataplex_client()
+
+        name = 
f"projects/{project_id}/locations/{region}/lakes/{lake_id}/zones/{zone_id}/assets/{asset_id}"
+        result = client.delete_asset(
+            request={"name": name},
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return result
+
+    @GoogleBaseHook.fallback_to_default_project_id
+    def create_data_scan(
+        self,
+        project_id: str,
+        region: str,
+        data_scan_id: str | None = None,
+        data_scan: dict[str, Any] | None = None,
+        retry: Retry | _MethodDefault = DEFAULT,
+        timeout: float | None = None,
+        metadata: Sequence[tuple[str, str]] = (),
+    ) -> Any:
+        """
+        Creates a DataScan resource.
+
+        :param project_id: Required. The ID of the Google Cloud project that 
the lake belongs to.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param data_scan_id: Required. DataScan identifier.
+        :param data_scan: Required. DataScan resource.
+        :param retry: A retry object used  to retry requests. If `None` is 
specified, requests
+            will not be retried.
+        :param timeout: The amount of time, in seconds, to wait for the 
request to complete.
+            Note that if `retry` is specified, the timeout applies to each 
individual attempt.
+        :param metadata: Additional metadata that is provided to the method.
+        """
+        client = self.get_dataplex_data_scan_client()
+
+        parent = f"projects/{project_id}/locations/{region}"
+        result = client.create_data_scan(
+            request={
+                "parent": parent,
+                "data_scan": data_scan,
+                "data_scan_id": data_scan_id,
+            },
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return result
+
+    @GoogleBaseHook.fallback_to_default_project_id
+    def run_data_scan(
+        self,
+        project_id: str,
+        region: str,
+        data_scan_id: str,
+        retry: Retry | _MethodDefault = DEFAULT,
+        timeout: float | None = None,
+        metadata: Sequence[tuple[str, str]] = (),
+    ) -> Any:
+        """
+        Runs an on-demand execution of a DataScan.
+
+        :param project_id: Required. The ID of the Google Cloud project that 
the lake belongs to.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param data_scan_id: Required. Data scan identifier.
+        :param retry: A retry object used  to retry requests. If `None` is 
specified, requests
+            will not be retried.
+        :param timeout: The amount of time, in seconds, to wait for the 
request to complete.
+            Note that if `retry` is specified, the timeout applies to each 
individual attempt.
+        :param metadata: Additional metadata that is provided to the method.
+        """
+        client = self.get_dataplex_data_scan_client()
+
+        name = PATH_DATA_SCAN.format(project_id=project_id, region=region, 
data_scan_id=data_scan_id)
+        result = client.run_data_scan(
+            request={
+                "name": name,
+            },
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return result
+
+    @GoogleBaseHook.fallback_to_default_project_id
+    def get_data_scan_job(
+        self,
+        project_id: str,
+        region: str,
+        data_scan_id: str | None = None,
+        job_id: str | None = None,
+        name: str | None = None,
+        retry: Retry | _MethodDefault = DEFAULT,
+        timeout: float | None = None,
+        metadata: Sequence[tuple[str, str]] = (),
+    ) -> Any:
+        """
+        Gets a DataScan Job resource.
+
+        :param project_id: Required. The ID of the Google Cloud project that 
the lake belongs to.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param data_scan_id: Required. DataScan identifier.
+        :param job_id: Required. The resource name of the DataScanJob:
+            
projects/{project_id}/locations/{region}/dataScans/{data_scan_id}/jobs/{data_scan_job_id}
+        :param retry: A retry object used  to retry requests. If `None` is 
specified, requests
+            will not be retried.
+        :param timeout: The amount of time, in seconds, to wait for the 
request to complete.
+            Note that if `retry` is specified, the timeout applies to each 
individual attempt.
+        :param metadata: Additional metadata that is provided to the method.
+        """
+        client = self.get_dataplex_data_scan_client()
+        if not name:
+            name = 
f"projects/{project_id}/locations/{region}/dataScans/{data_scan_id}/jobs/{job_id}"
+        result = client.get_data_scan_job(
+            request={"name": name, "view": "FULL"},
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return result
+
+    def wait_for_job(
+        self,
+        job: DataScanJob,
+        project_id: str | None = None,
+        region: str | None = None,
+        wait_time: int = 10,
+        timeout: float | None = None,
+    ):
+        """
+        Wait for Dataplex data scan job.
+
+        :param job: Optional. The job to wait for
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param project_id: Optional. Google Cloud project ID
+        :param wait_time: Number of seconds between checks
+        :param timeout: How many seconds wait for job to be ready
+        """
+        start = time.monotonic()
+
+        while job.state != DataScanJob.State.SUCCEEDED:
+            if timeout and start + timeout < time.monotonic():
+                raise AirflowException(f"Timeout: data quality scan {job.name} 
is not ready after {timeout}s")
+            if job.state == DataScanJob.State.RUNNING or job.state == 
DataScanJob.State.PENDING:
+                time.sleep(wait_time)
+            job = self.get_data_scan_job(name=job.name, project_id=project_id, 
region=region, timeout=timeout)
+        return job
+
+    @GoogleBaseHook.fallback_to_default_project_id
+    def get_data_scan(
+        self,
+        project_id: str,
+        region: str,
+        data_scan_id: str,
+        retry: Retry | _MethodDefault = DEFAULT,
+        timeout: float | None = None,
+        metadata: Sequence[tuple[str, str]] = (),
+    ) -> Any:
+        """
+        Gets a DataScan resource.
+
+        :param project_id: Required. The ID of the Google Cloud project that 
the lake belongs to.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param data_scan_id: Required. DataScan identifier.
+        :param retry: A retry object used  to retry requests. If `None` is 
specified, requests
+            will not be retried.
+        :param timeout: The amount of time, in seconds, to wait for the 
request to complete.
+            Note that if `retry` is specified, the timeout applies to each 
individual attempt.
+        :param metadata: Additional metadata that is provided to the method.
+        """
+        client = self.get_dataplex_data_scan_client()
+
+        name = PATH_DATA_SCAN.format(project_id=project_id, region=region, 
data_scan_id=data_scan_id)
+        result = client.get_data_scan(
+            request={"name": name, "view": "FULL"},
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return result
+
+    @GoogleBaseHook.fallback_to_default_project_id
+    def delete_data_scan(
+        self,
+        project_id: str,
+        region: str,
+        data_scan_id: str,
+        retry: Retry | _MethodDefault = DEFAULT,
+        timeout: float | None = None,
+        metadata: Sequence[tuple[str, str]] = (),
+    ) -> Any:
+        """
+        Deletes a DataScan resource.
+
+        :param project_id: Required. The ID of the Google Cloud project that 
the lake belongs to.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param data_scan_id: Required. DataScan identifier.
+        :param retry: A retry object used  to retry requests. If `None` is 
specified, requests
+            will not be retried.
+        :param timeout: The amount of time, in seconds, to wait for the 
request to complete.
+            Note that if `retry` is specified, the timeout applies to each 
individual attempt.
+        :param metadata: Additional metadata that is provided to the method.
+        """
+        client = self.get_dataplex_data_scan_client()
+
+        name = PATH_DATA_SCAN.format(project_id=project_id, region=region, 
data_scan_id=data_scan_id)
+        result = client.delete_data_scan(
+            request={
+                "name": name,
+            },
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return result
+
+    @GoogleBaseHook.fallback_to_default_project_id
+    def list_data_scan_jobs(
+        self,
+        project_id: str,
+        region: str,
+        data_scan_id: str,
+        retry: Retry | _MethodDefault = DEFAULT,
+        timeout: float | None = None,
+        metadata: Sequence[tuple[str, str]] = (),
+    ) -> Any:
+        """
+        Deletes a DataScan resource.
+
+        :param project_id: Required. The ID of the Google Cloud project that 
the lake belongs to.
+        :param region: Required. The ID of the Google Cloud region that the 
lake belongs to.
+        :param data_scan_id: Required. DataScan identifier.
+        :param retry: A retry object used  to retry requests. If `None` is 
specified, requests
+            will not be retried.
+        :param timeout: The amount of time, in seconds, to wait for the 
request to complete.
+            Note that if `retry` is specified, the timeout applies to each 
individual attempt.
+        :param metadata: Additional metadata that is provided to the method.
+        """
+        client = self.get_dataplex_data_scan_client()
+
+        name = PATH_DATA_SCAN.format(project_id=project_id, region=region, 
data_scan_id=data_scan_id)
+        result = client.list_data_scan_jobs(
+            request={
+                "parent": name,
+            },
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return result
+
+
+class DataplexAsyncHook(GoogleBaseAsyncHook):

Review Comment:
   removed



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to