(airflow) branch main updated: Mark VertexAI AutoMLText deprecation (#42251)

shahar Thu, 19 Sep 2024 12:27:36 -0700

This is an automated email from the ASF dual-hosted git repository.

shahar pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git



The following commit(s) were added to refs/heads/main by this push:
     new d7343816b3 Mark VertexAI AutoMLText deprecation (#42251)
d7343816b3 is described below

commit d7343816b3406470dd7f963e1959171c32ef956f
Author: olegkachur-e <[email protected]>
AuthorDate: Thu Sep 19 21:27:22 2024 +0200

    Mark VertexAI AutoMLText deprecation (#42251)
    
    - Label deprecated items with deprecated decorator.
    - Refactor vertex_ai AutoMLHook.create_auto_ml_text_training_job, to
      avoid calling get_auto_ml_text_training_job, to not trigger mypy
    errors after applying @deprecate on hook method.
    - Update docs.
    - Delete irrelevant system tests.
    
    Co-authored-by: Oleg Kachur <[email protected]>
---
 .../google/cloud/hooks/vertex_ai/auto_ml.py        |  36 ++++-
 .../google/cloud/operators/vertex_ai/auto_ml.py    |  16 +-
 .../operators/cloud/automl.rst                     |  13 +-
 .../operators/cloud/vertex_ai.rst                  |  11 +-
 tests/always/test_project_structure.py             |   1 +
 .../google/cloud/operators/test_vertex_ai.py       |  58 +++----
 .../example_automl_nl_text_classification.py       | 176 --------------------
 .../automl/example_automl_nl_text_extraction.py    | 176 --------------------
 .../automl/example_automl_nl_text_sentiment.py     | 178 ---------------------
 .../example_vertex_ai_auto_ml_text_training.py     | 143 -----------------
 10 files changed, 83 insertions(+), 725 deletions(-)

diff --git a/airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py 
b/airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py
index b1ad7d1a07..3a03502250 100644
--- a/airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py
+++ b/airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py
@@ -36,6 +36,7 @@ from google.cloud.aiplatform import (
 from google.cloud.aiplatform_v1 import JobServiceClient, PipelineServiceClient
 
 from airflow.exceptions import AirflowException, 
AirflowProviderDeprecationWarning
+from airflow.providers.google.common.deprecated import deprecated
 from airflow.providers.google.common.hooks.base_google import GoogleBaseHook
 
 if TYPE_CHECKING:
@@ -185,6 +186,11 @@ class AutoMLHook(GoogleBaseHook):
             model_encryption_spec_key_name=model_encryption_spec_key_name,
         )
 
+    @deprecated(
+        planned_removal_date="June 15, 2025",
+        category=AirflowProviderDeprecationWarning,
+        reason="Deprecation of AutoMLText API",
+    )
     def get_auto_ml_text_training_job(
         self,
         display_name: str,
@@ -197,7 +203,12 @@ class AutoMLHook(GoogleBaseHook):
         training_encryption_spec_key_name: str | None = None,
         model_encryption_spec_key_name: str | None = None,
     ) -> AutoMLTextTrainingJob:
-        """Return AutoMLTextTrainingJob object."""
+        """
+        Return AutoMLTextTrainingJob object.
+
+        WARNING: Text creation API is deprecated since September 15, 2024
+        
(https://cloud.google.com/vertex-ai/docs/tutorials/text-classification-automl/overview).
+        """
         return AutoMLTextTrainingJob(
             display_name=display_name,
             prediction_type=prediction_type,
@@ -980,6 +991,11 @@ class AutoMLHook(GoogleBaseHook):
         return model, training_id
 
     @GoogleBaseHook.fallback_to_default_project_id
+    @deprecated(
+        planned_removal_date="September 15, 2025",
+        category=AirflowProviderDeprecationWarning,
+        reason="Deprecation of AutoMLText API",
+    )
     def create_auto_ml_text_training_job(
         self,
         project_id: str,
@@ -1009,6 +1025,9 @@ class AutoMLHook(GoogleBaseHook):
         """
         Create an AutoML Text Training Job.
 
+        WARNING: Text creation API is deprecated since September 15, 2024
+        
(https://cloud.google.com/vertex-ai/docs/tutorials/text-classification-automl/overview).
+
         :param project_id: Required. Project to run training in.
         :param region: Required. Location to run training in.
         :param display_name: Required. The user-defined name of this 
TrainingPipeline.
@@ -1101,13 +1120,14 @@ class AutoMLHook(GoogleBaseHook):
             concurrent Future and any downstream object will be immediately 
returned and synced when the
             Future has completed.
         """
-        self._job = self.get_auto_ml_text_training_job(
-            project=project_id,
-            location=region,
+        self._job = AutoMLTextTrainingJob(
             display_name=display_name,
             prediction_type=prediction_type,
             multi_label=multi_label,
             sentiment_max=sentiment_max,
+            project=project_id,
+            location=region,
+            credentials=self.get_credentials(),
             labels=labels,
             
training_encryption_spec_key_name=training_encryption_spec_key_name,
             model_encryption_spec_key_name=model_encryption_spec_key_name,
@@ -1117,13 +1137,13 @@ class AutoMLHook(GoogleBaseHook):
             raise AirflowException("AutoMLTextTrainingJob was not created")
 
         model = self._job.run(
-            dataset=dataset,
-            training_fraction_split=training_fraction_split,
-            validation_fraction_split=validation_fraction_split,
+            dataset=dataset,  # type: ignore[arg-type]
+            training_fraction_split=training_fraction_split,  # type: 
ignore[call-arg]
+            validation_fraction_split=validation_fraction_split,  # type: 
ignore[call-arg]
             test_fraction_split=test_fraction_split,
             training_filter_split=training_filter_split,
             validation_filter_split=validation_filter_split,
-            test_filter_split=test_filter_split,
+            test_filter_split=test_filter_split,  # type: ignore[call-arg]
             model_display_name=model_display_name,
             model_labels=model_labels,
             sync=sync,
diff --git a/airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py 
b/airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py
index 76319b892e..ec9a6784f7 100644
--- a/airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py
+++ b/airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py
@@ -455,8 +455,22 @@ class 
CreateAutoMLTabularTrainingJobOperator(AutoMLTrainingJobBaseOperator):
         return result
 
 
+@deprecated(
+    planned_removal_date="September 15, 2024",
+    use_instead="SupervisedFineTuningTrainOperator",
+    instructions=(
+        "Please consider using Fine Tuning over the Gemini model. "
+        "More info: 
https://cloud.google.com/vertex-ai/docs/start/automl-gemini-comparison";
+    ),
+    category=AirflowProviderDeprecationWarning,
+)
 class CreateAutoMLTextTrainingJobOperator(AutoMLTrainingJobBaseOperator):
-    """Create Auto ML Text Training job."""
+    """
+    Create Auto ML Text Training job.
+
+    WARNING: Text creation API is deprecated since September 15, 2024
+        
(https://cloud.google.com/vertex-ai/docs/tutorials/text-classification-automl/overview).
+    """
 
     template_fields = [
         "parent_model",
diff --git a/docs/apache-airflow-providers-google/operators/cloud/automl.rst 
b/docs/apache-airflow-providers-google/operators/cloud/automl.rst
index 5858b3a7c4..fdedb46ea8 100644
--- a/docs/apache-airflow-providers-google/operators/cloud/automl.rst
+++ b/docs/apache-airflow-providers-google/operators/cloud/automl.rst
@@ -109,15 +109,12 @@ available on the Vertex AI platform. Please use
 
:class:`~airflow.providers.google.cloud.operators.vertex_ai.auto_ml.CreateAutoMLImageTrainingJobOperator`
 or
 
:class:`~airflow.providers.google.cloud.operators.vertex_ai.auto_ml.CreateAutoMLVideoTrainingJobOperator`.
 
-You can find example on how to use VertexAI operators for AutoML Natural 
Language classification here:
+The Vertex AutoMLText API for model training is deprecated on September 15, 
2024 and the other part will be deprecated
+on June 15, 2025.
+Please consider using fine tuning with Gemini model -
+https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini-tuning.
 
-.. exampleinclude:: 
/../../tests/system/providers/google/cloud/automl/example_automl_nl_text_classification.py
-    :language: python
-    :dedent: 4
-    :start-after: [START 
howto_cloud_create_text_classification_training_job_operator]
-    :end-before: [END 
howto_cloud_create_text_classification_training_job_operator]
-
-Additionally, you can find example on how to use VertexAI operators for AutoML 
Vision classification here:
+You can find example on how to use VertexAI operators for AutoML Vision 
classification here:
 
 .. exampleinclude:: 
/../../tests/system/providers/google/cloud/automl/example_automl_vision_classification.py
     :language: python
diff --git a/docs/apache-airflow-providers-google/operators/cloud/vertex_ai.rst 
b/docs/apache-airflow-providers-google/operators/cloud/vertex_ai.rst
index db23d4dfea..9a03ed2924 100644
--- a/docs/apache-airflow-providers-google/operators/cloud/vertex_ai.rst
+++ b/docs/apache-airflow-providers-google/operators/cloud/vertex_ai.rst
@@ -260,14 +260,11 @@ put dataset id to ``dataset_id`` parameter in operator.
 How to run AutoML Text Training Job
 
:class:`~airflow.providers.google.cloud.operators.vertex_ai.auto_ml.CreateAutoMLTextTrainingJobOperator`
 
-Before start running this Job you must prepare and create ``Text`` dataset. 
After that you should
-put dataset id to ``dataset_id`` parameter in operator.
+Operator is deprecated, please use
+:class:`~airflow.providers.google.cloud.operators.vertex_ai.generative_model.SupervisedFineTuningTrainOperator`
 over
+the Gemini model.
+More info: 
https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini-tuning#tuning-gemini
 
-.. exampleinclude:: 
/../../tests/system/providers/google/cloud/vertex_ai/example_vertex_ai_auto_ml_text_training.py
-    :language: python
-    :dedent: 4
-    :start-after: [START 
how_to_cloud_vertex_ai_create_auto_ml_text_training_job_operator]
-    :end-before: [END 
how_to_cloud_vertex_ai_create_auto_ml_text_training_job_operator]
 
 How to run AutoML Video Training Job
 
:class:`~airflow.providers.google.cloud.operators.vertex_ai.auto_ml.CreateAutoMLVideoTrainingJobOperator`
diff --git a/tests/always/test_project_structure.py 
b/tests/always/test_project_structure.py
index 6cf3329e8e..b6ef9a89f9 100644
--- a/tests/always/test_project_structure.py
+++ b/tests/always/test_project_structure.py
@@ -389,6 +389,7 @@ class 
TestGoogleProviderProjectStructure(ExampleCoverageTest, AssetsCoverageTest
         
"airflow.providers.google.cloud.operators.bigquery.BigQueryPatchDatasetOperator",
         
"airflow.providers.google.cloud.operators.dataflow.DataflowCreatePythonJobOperator",
         
"airflow.providers.google.cloud.operators.bigquery.BigQueryExecuteQueryOperator",
+        
"airflow.providers.google.cloud.operators.vertex_ai.auto_ml.CreateAutoMLTextTrainingJobOperator",
         
"airflow.providers.google.cloud.sensors.bigquery.BigQueryTableExistenceAsyncSensor",
         
"airflow.providers.google.cloud.sensors.bigquery.BigQueryTableExistencePartitionAsyncSensor",
         
"airflow.providers.google.cloud.sensors.cloud_composer.CloudComposerEnvironmentSensor",
diff --git a/tests/providers/google/cloud/operators/test_vertex_ai.py 
b/tests/providers/google/cloud/operators/test_vertex_ai.py
index ef0ca53608..25deb188aa 100644
--- a/tests/providers/google/cloud/operators/test_vertex_ai.py
+++ b/tests/providers/google/cloud/operators/test_vertex_ai.py
@@ -1833,20 +1833,21 @@ class TestVertexAICreateAutoMLTextTrainingJobOperator:
     @mock.patch(VERTEX_AI_PATH.format("auto_ml.AutoMLHook"))
     def test_execute(self, mock_hook, mock_dataset):
         mock_hook.return_value.create_auto_ml_text_training_job.return_value = 
(None, "training_id")
-        op = CreateAutoMLTextTrainingJobOperator(
-            task_id=TASK_ID,
-            gcp_conn_id=GCP_CONN_ID,
-            impersonation_chain=IMPERSONATION_CHAIN,
-            display_name=DISPLAY_NAME,
-            dataset_id=TEST_DATASET_ID,
-            prediction_type=None,
-            multi_label=False,
-            sentiment_max=10,
-            sync=True,
-            region=GCP_LOCATION,
-            project_id=GCP_PROJECT,
-            parent_model=TEST_PARENT_MODEL,
-        )
+        with pytest.warns(AirflowProviderDeprecationWarning):
+            op = CreateAutoMLTextTrainingJobOperator(
+                task_id=TASK_ID,
+                gcp_conn_id=GCP_CONN_ID,
+                impersonation_chain=IMPERSONATION_CHAIN,
+                display_name=DISPLAY_NAME,
+                dataset_id=TEST_DATASET_ID,
+                prediction_type=None,
+                multi_label=False,
+                sentiment_max=10,
+                sync=True,
+                region=GCP_LOCATION,
+                project_id=GCP_PROJECT,
+                parent_model=TEST_PARENT_MODEL,
+            )
         op.execute(context={"ti": mock.MagicMock()})
         mock_hook.assert_called_once_with(gcp_conn_id=GCP_CONN_ID, 
impersonation_chain=IMPERSONATION_CHAIN)
         mock_dataset.assert_called_once_with(dataset_name=TEST_DATASET_ID)
@@ -1880,20 +1881,21 @@ class TestVertexAICreateAutoMLTextTrainingJobOperator:
     @mock.patch(VERTEX_AI_PATH.format("auto_ml.AutoMLHook"))
     def test_execute__parent_model_version_index_is_removed(self, mock_hook, 
mock_dataset):
         mock_hook.return_value.create_auto_ml_text_training_job.return_value = 
(None, "training_id")
-        op = CreateAutoMLTextTrainingJobOperator(
-            task_id=TASK_ID,
-            gcp_conn_id=GCP_CONN_ID,
-            impersonation_chain=IMPERSONATION_CHAIN,
-            display_name=DISPLAY_NAME,
-            dataset_id=TEST_DATASET_ID,
-            prediction_type=None,
-            multi_label=False,
-            sentiment_max=10,
-            sync=True,
-            region=GCP_LOCATION,
-            project_id=GCP_PROJECT,
-            parent_model=VERSIONED_TEST_PARENT_MODEL,
-        )
+        with pytest.warns(AirflowProviderDeprecationWarning):
+            op = CreateAutoMLTextTrainingJobOperator(
+                task_id=TASK_ID,
+                gcp_conn_id=GCP_CONN_ID,
+                impersonation_chain=IMPERSONATION_CHAIN,
+                display_name=DISPLAY_NAME,
+                dataset_id=TEST_DATASET_ID,
+                prediction_type=None,
+                multi_label=False,
+                sentiment_max=10,
+                sync=True,
+                region=GCP_LOCATION,
+                project_id=GCP_PROJECT,
+                parent_model=VERSIONED_TEST_PARENT_MODEL,
+            )
         op.execute(context={"ti": mock.MagicMock()})
         
mock_hook.return_value.create_auto_ml_text_training_job.assert_called_once_with(
             project_id=GCP_PROJECT,
diff --git 
a/tests/system/providers/google/cloud/automl/example_automl_nl_text_classification.py
 
b/tests/system/providers/google/cloud/automl/example_automl_nl_text_classification.py
deleted file mode 100644
index 9ef04db818..0000000000
--- 
a/tests/system/providers/google/cloud/automl/example_automl_nl_text_classification.py
+++ /dev/null
@@ -1,176 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Example Airflow DAG that uses Google AutoML services.
-"""
-
-from __future__ import annotations
-
-import os
-from datetime import datetime
-
-from google.cloud.aiplatform import schema
-from google.protobuf.struct_pb2 import Value
-
-from airflow.models.dag import DAG
-from airflow.providers.google.cloud.hooks.automl import CloudAutoMLHook
-from airflow.providers.google.cloud.operators.gcs import (
-    GCSCreateBucketOperator,
-    GCSDeleteBucketOperator,
-    GCSSynchronizeBucketsOperator,
-)
-from airflow.providers.google.cloud.operators.vertex_ai.auto_ml import (
-    CreateAutoMLTextTrainingJobOperator,
-    DeleteAutoMLTrainingJobOperator,
-)
-from airflow.providers.google.cloud.operators.vertex_ai.dataset import (
-    CreateDatasetOperator,
-    DeleteDatasetOperator,
-    ImportDataOperator,
-)
-from airflow.utils.trigger_rule import TriggerRule
-
-ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID", "default")
-GCP_PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT", "default")
-DAG_ID = "example_automl_text_cls"
-
-GCP_AUTOML_LOCATION = "us-central1"
-DATA_SAMPLE_GCS_BUCKET_NAME = f"bucket_{DAG_ID}_{ENV_ID}".replace("_", "-")
-RESOURCE_DATA_BUCKET = "airflow-system-tests-resources"
-
-TEXT_CLSS_DISPLAY_NAME = f"{DAG_ID}-{ENV_ID}".replace("_", "-")
-AUTOML_DATASET_BUCKET = 
f"gs://{DATA_SAMPLE_GCS_BUCKET_NAME}/automl/classification.csv"
-
-MODEL_NAME = f"{DAG_ID}-{ENV_ID}".replace("_", "-")
-
-DATASET_NAME = f"ds_clss_{ENV_ID}".replace("-", "_")
-DATASET = {
-    "display_name": DATASET_NAME,
-    "metadata_schema_uri": schema.dataset.metadata.text,
-    "metadata": Value(string_value="clss-dataset"),
-}
-
-DATA_CONFIG = [
-    {
-        "import_schema_uri": 
schema.dataset.ioformat.text.single_label_classification,
-        "gcs_source": {"uris": [AUTOML_DATASET_BUCKET]},
-    },
-]
-extract_object_id = CloudAutoMLHook.extract_object_id
-
-# Example DAG for AutoML Natural Language Text Classification
-with DAG(
-    DAG_ID,
-    schedule="@once",
-    start_date=datetime(2021, 1, 1),
-    catchup=False,
-    tags=["example", "automl", "text-classification"],
-) as dag:
-    create_bucket = GCSCreateBucketOperator(
-        task_id="create_bucket",
-        bucket_name=DATA_SAMPLE_GCS_BUCKET_NAME,
-        storage_class="REGIONAL",
-        location=GCP_AUTOML_LOCATION,
-    )
-
-    move_dataset_file = GCSSynchronizeBucketsOperator(
-        task_id="move_dataset_to_bucket",
-        source_bucket=RESOURCE_DATA_BUCKET,
-        source_object="vertex-ai/automl/datasets/text",
-        destination_bucket=DATA_SAMPLE_GCS_BUCKET_NAME,
-        destination_object="automl",
-        recursive=True,
-    )
-
-    create_clss_dataset = CreateDatasetOperator(
-        task_id="create_clss_dataset",
-        dataset=DATASET,
-        region=GCP_AUTOML_LOCATION,
-        project_id=GCP_PROJECT_ID,
-    )
-    clss_dataset_id = create_clss_dataset.output["dataset_id"]
-
-    import_clss_dataset = ImportDataOperator(
-        task_id="import_clss_data",
-        dataset_id=clss_dataset_id,
-        region=GCP_AUTOML_LOCATION,
-        project_id=GCP_PROJECT_ID,
-        import_configs=DATA_CONFIG,
-    )
-
-    # [START howto_cloud_create_text_classification_training_job_operator]
-    create_clss_training_job = CreateAutoMLTextTrainingJobOperator(
-        task_id="create_clss_training_job",
-        display_name=TEXT_CLSS_DISPLAY_NAME,
-        prediction_type="classification",
-        multi_label=False,
-        dataset_id=clss_dataset_id,
-        model_display_name=MODEL_NAME,
-        training_fraction_split=0.7,
-        validation_fraction_split=0.2,
-        test_fraction_split=0.1,
-        sync=True,
-        region=GCP_AUTOML_LOCATION,
-        project_id=GCP_PROJECT_ID,
-    )
-    # [END howto_cloud_create_text_classification_training_job_operator]
-
-    delete_clss_training_job = DeleteAutoMLTrainingJobOperator(
-        task_id="delete_clss_training_job",
-        training_pipeline_id=create_clss_training_job.output["training_id"],
-        region=GCP_AUTOML_LOCATION,
-        project_id=GCP_PROJECT_ID,
-        trigger_rule=TriggerRule.ALL_DONE,
-    )
-
-    delete_clss_dataset = DeleteDatasetOperator(
-        task_id="delete_clss_dataset",
-        dataset_id=clss_dataset_id,
-        region=GCP_AUTOML_LOCATION,
-        project_id=GCP_PROJECT_ID,
-        trigger_rule=TriggerRule.ALL_DONE,
-    )
-
-    delete_bucket = GCSDeleteBucketOperator(
-        task_id="delete_bucket",
-        bucket_name=DATA_SAMPLE_GCS_BUCKET_NAME,
-        trigger_rule=TriggerRule.ALL_DONE,
-    )
-
-    (
-        # TEST SETUP
-        [create_bucket >> move_dataset_file, create_clss_dataset]
-        # TEST BODY
-        >> import_clss_dataset
-        >> create_clss_training_job
-        # TEST TEARDOWN
-        >> delete_clss_training_job
-        >> delete_clss_dataset
-        >> delete_bucket
-    )
-
-    from tests.system.utils.watcher import watcher
-
-    # This test needs watcher in order to properly mark success/failure
-    # when "tearDown" task with trigger rule is part of the DAG
-    list(dag.tasks) >> watcher()
-
-from tests.system.utils import get_test_run  # noqa: E402
-
-# Needed to run the example DAG with pytest (see: 
tests/system/README.md#run_via_pytest)
-test_run = get_test_run(dag)
diff --git 
a/tests/system/providers/google/cloud/automl/example_automl_nl_text_extraction.py
 
b/tests/system/providers/google/cloud/automl/example_automl_nl_text_extraction.py
deleted file mode 100644
index 8f8564f62c..0000000000
--- 
a/tests/system/providers/google/cloud/automl/example_automl_nl_text_extraction.py
+++ /dev/null
@@ -1,176 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Example Airflow DAG that uses Google AutoML services.
-"""
-
-from __future__ import annotations
-
-import os
-from datetime import datetime
-
-from google.cloud.aiplatform import schema
-from google.protobuf.struct_pb2 import Value
-
-from airflow.models.dag import DAG
-from airflow.providers.google.cloud.hooks.automl import CloudAutoMLHook
-from airflow.providers.google.cloud.operators.gcs import (
-    GCSCreateBucketOperator,
-    GCSDeleteBucketOperator,
-    GCSSynchronizeBucketsOperator,
-)
-from airflow.providers.google.cloud.operators.vertex_ai.auto_ml import (
-    CreateAutoMLTextTrainingJobOperator,
-    DeleteAutoMLTrainingJobOperator,
-)
-from airflow.providers.google.cloud.operators.vertex_ai.dataset import (
-    CreateDatasetOperator,
-    DeleteDatasetOperator,
-    ImportDataOperator,
-)
-from airflow.utils.trigger_rule import TriggerRule
-
-ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID", "default")
-GCP_PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT", "default")
-DAG_ID = "example_automl_text_extr"
-
-GCP_AUTOML_LOCATION = "us-central1"
-RESOURCE_DATA_BUCKET = "airflow-system-tests-resources"
-
-DATA_SAMPLE_GCS_BUCKET_NAME = f"bucket_{DAG_ID}_{ENV_ID}".replace("_", "-")
-TEXT_EXTR_DISPLAY_NAME = f"{DAG_ID}-{ENV_ID}".replace("_", "-")
-AUTOML_DATASET_BUCKET = 
f"gs://{DATA_SAMPLE_GCS_BUCKET_NAME}/automl/extraction.jsonl"
-
-MODEL_NAME = f"{DAG_ID}-{ENV_ID}".replace("_", "-")
-DATASET_NAME = f"ds_clss_{ENV_ID}".replace("-", "_")
-DATASET = {
-    "display_name": DATASET_NAME,
-    "metadata_schema_uri": schema.dataset.metadata.text,
-    "metadata": Value(string_value="extr-dataset"),
-}
-DATA_CONFIG = [
-    {
-        "import_schema_uri": schema.dataset.ioformat.text.extraction,
-        "gcs_source": {"uris": [AUTOML_DATASET_BUCKET]},
-    },
-]
-
-extract_object_id = CloudAutoMLHook.extract_object_id
-
-# Example DAG for AutoML Natural Language Entities Extraction
-with DAG(
-    DAG_ID,
-    schedule="@once",  # Override to match your needs
-    start_date=datetime(2021, 1, 1),
-    catchup=False,
-    user_defined_macros={"extract_object_id": extract_object_id},
-    tags=["example", "automl", "text-extraction"],
-) as dag:
-    create_bucket = GCSCreateBucketOperator(
-        task_id="create_bucket",
-        bucket_name=DATA_SAMPLE_GCS_BUCKET_NAME,
-        storage_class="REGIONAL",
-        location=GCP_AUTOML_LOCATION,
-    )
-
-    move_dataset_file = GCSSynchronizeBucketsOperator(
-        task_id="move_dataset_to_bucket",
-        source_bucket=RESOURCE_DATA_BUCKET,
-        source_object="vertex-ai/automl/datasets/text",
-        destination_bucket=DATA_SAMPLE_GCS_BUCKET_NAME,
-        destination_object="automl",
-        recursive=True,
-    )
-
-    create_extr_dataset = CreateDatasetOperator(
-        task_id="create_extr_dataset",
-        dataset=DATASET,
-        region=GCP_AUTOML_LOCATION,
-        project_id=GCP_PROJECT_ID,
-    )
-    extr_dataset_id = create_extr_dataset.output["dataset_id"]
-
-    import_extr_dataset = ImportDataOperator(
-        task_id="import_extr_data",
-        dataset_id=extr_dataset_id,
-        region=GCP_AUTOML_LOCATION,
-        project_id=GCP_PROJECT_ID,
-        import_configs=DATA_CONFIG,
-    )
-
-    # [START howto_cloud_create_text_extraction_training_job_operator]
-    create_extr_training_job = CreateAutoMLTextTrainingJobOperator(
-        task_id="create_extr_training_job",
-        display_name=TEXT_EXTR_DISPLAY_NAME,
-        prediction_type="extraction",
-        multi_label=False,
-        dataset_id=extr_dataset_id,
-        model_display_name=MODEL_NAME,
-        training_fraction_split=0.8,
-        validation_fraction_split=0.1,
-        test_fraction_split=0.1,
-        sync=True,
-        region=GCP_AUTOML_LOCATION,
-        project_id=GCP_PROJECT_ID,
-    )
-    # [END howto_cloud_create_text_extraction_training_job_operator]
-
-    delete_extr_training_job = DeleteAutoMLTrainingJobOperator(
-        task_id="delete_extr_training_job",
-        training_pipeline_id=create_extr_training_job.output["training_id"],
-        region=GCP_AUTOML_LOCATION,
-        project_id=GCP_PROJECT_ID,
-        trigger_rule=TriggerRule.ALL_DONE,
-    )
-
-    delete_extr_dataset = DeleteDatasetOperator(
-        task_id="delete_extr_dataset",
-        dataset_id=extr_dataset_id,
-        region=GCP_AUTOML_LOCATION,
-        project_id=GCP_PROJECT_ID,
-        trigger_rule=TriggerRule.ALL_DONE,
-    )
-
-    delete_bucket = GCSDeleteBucketOperator(
-        task_id="delete_bucket",
-        bucket_name=DATA_SAMPLE_GCS_BUCKET_NAME,
-        trigger_rule=TriggerRule.ALL_DONE,
-    )
-
-    (
-        # TEST SETUP
-        [create_bucket >> move_dataset_file, create_extr_dataset]
-        # TEST BODY
-        >> import_extr_dataset
-        >> create_extr_training_job
-        # TEST TEARDOWN
-        >> delete_extr_training_job
-        >> delete_extr_dataset
-        >> delete_bucket
-    )
-
-    from tests.system.utils.watcher import watcher
-
-    # This test needs watcher in order to properly mark success/failure
-    # when "tearDown" task with trigger rule is part of the DAG
-    list(dag.tasks) >> watcher()
-
-from tests.system.utils import get_test_run  # noqa: E402
-
-# Needed to run the example DAG with pytest (see: 
tests/system/README.md#run_via_pytest)
-test_run = get_test_run(dag)
diff --git 
a/tests/system/providers/google/cloud/automl/example_automl_nl_text_sentiment.py
 
b/tests/system/providers/google/cloud/automl/example_automl_nl_text_sentiment.py
deleted file mode 100644
index 94f349c6c3..0000000000
--- 
a/tests/system/providers/google/cloud/automl/example_automl_nl_text_sentiment.py
+++ /dev/null
@@ -1,178 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Example Airflow DAG that uses Google AutoML services.
-"""
-
-from __future__ import annotations
-
-import os
-from datetime import datetime
-
-from google.cloud.aiplatform import schema
-from google.protobuf.struct_pb2 import Value
-
-from airflow.models.dag import DAG
-from airflow.providers.google.cloud.hooks.automl import CloudAutoMLHook
-from airflow.providers.google.cloud.operators.gcs import (
-    GCSCreateBucketOperator,
-    GCSDeleteBucketOperator,
-    GCSSynchronizeBucketsOperator,
-)
-from airflow.providers.google.cloud.operators.vertex_ai.auto_ml import (
-    CreateAutoMLTextTrainingJobOperator,
-    DeleteAutoMLTrainingJobOperator,
-)
-from airflow.providers.google.cloud.operators.vertex_ai.dataset import (
-    CreateDatasetOperator,
-    DeleteDatasetOperator,
-    ImportDataOperator,
-)
-from airflow.utils.trigger_rule import TriggerRule
-
-ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID", "default")
-GCP_PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT", "default")
-DAG_ID = "example_automl_text_sent"
-GCP_AUTOML_LOCATION = "us-central1"
-DATA_SAMPLE_GCS_BUCKET_NAME = f"bucket_{DAG_ID}_{ENV_ID}".replace("_", "-")
-RESOURCE_DATA_BUCKET = "airflow-system-tests-resources"
-
-TEXT_SENT_DISPLAY_NAME = f"{DAG_ID}-{ENV_ID}".replace("_", "-")
-AUTOML_DATASET_BUCKET = 
f"gs://{DATA_SAMPLE_GCS_BUCKET_NAME}/automl/sentiment.csv"
-
-MODEL_NAME = f"{DAG_ID}-{ENV_ID}".replace("_", "-")
-
-DATASET_NAME = f"ds_sent_{ENV_ID}".replace("-", "_")
-DATASET = {
-    "display_name": DATASET_NAME,
-    "metadata_schema_uri": schema.dataset.metadata.text,
-    "metadata": Value(string_value="sent-dataset"),
-}
-
-DATA_CONFIG = [
-    {
-        "import_schema_uri": schema.dataset.ioformat.text.sentiment,
-        "gcs_source": {"uris": [AUTOML_DATASET_BUCKET]},
-    },
-]
-
-extract_object_id = CloudAutoMLHook.extract_object_id
-
-# Example DAG for AutoML Natural Language Text Sentiment
-with DAG(
-    DAG_ID,
-    schedule="@once",
-    start_date=datetime(2021, 1, 1),
-    catchup=False,
-    user_defined_macros={"extract_object_id": extract_object_id},
-    tags=["example", "automl", "text-sentiment"],
-) as dag:
-    create_bucket = GCSCreateBucketOperator(
-        task_id="create_bucket",
-        bucket_name=DATA_SAMPLE_GCS_BUCKET_NAME,
-        storage_class="REGIONAL",
-        location=GCP_AUTOML_LOCATION,
-    )
-
-    move_dataset_file = GCSSynchronizeBucketsOperator(
-        task_id="move_dataset_to_bucket",
-        source_bucket=RESOURCE_DATA_BUCKET,
-        source_object="vertex-ai/automl/datasets/text",
-        destination_bucket=DATA_SAMPLE_GCS_BUCKET_NAME,
-        destination_object="automl",
-        recursive=True,
-    )
-
-    create_sent_dataset = CreateDatasetOperator(
-        task_id="create_sent_dataset",
-        dataset=DATASET,
-        region=GCP_AUTOML_LOCATION,
-        project_id=GCP_PROJECT_ID,
-    )
-    sent_dataset_id = create_sent_dataset.output["dataset_id"]
-
-    import_sent_dataset = ImportDataOperator(
-        task_id="import_sent_data",
-        dataset_id=sent_dataset_id,
-        region=GCP_AUTOML_LOCATION,
-        project_id=GCP_PROJECT_ID,
-        import_configs=DATA_CONFIG,
-    )
-
-    # [START howto_cloud_create_text_sentiment_training_job_operator]
-    create_sent_training_job = CreateAutoMLTextTrainingJobOperator(
-        task_id="create_sent_training_job",
-        display_name=TEXT_SENT_DISPLAY_NAME,
-        prediction_type="sentiment",
-        multi_label=False,
-        dataset_id=sent_dataset_id,
-        model_display_name=MODEL_NAME,
-        training_fraction_split=0.7,
-        validation_fraction_split=0.2,
-        test_fraction_split=0.1,
-        sentiment_max=5,
-        sync=True,
-        region=GCP_AUTOML_LOCATION,
-        project_id=GCP_PROJECT_ID,
-    )
-    # [END howto_cloud_create_text_sentiment_training_job_operator]
-
-    delete_sent_training_job = DeleteAutoMLTrainingJobOperator(
-        task_id="delete_sent_training_job",
-        training_pipeline_id=create_sent_training_job.output["training_id"],
-        region=GCP_AUTOML_LOCATION,
-        project_id=GCP_PROJECT_ID,
-        trigger_rule=TriggerRule.ALL_DONE,
-    )
-
-    delete_sent_dataset = DeleteDatasetOperator(
-        task_id="delete_sent_dataset",
-        dataset_id=sent_dataset_id,
-        region=GCP_AUTOML_LOCATION,
-        project_id=GCP_PROJECT_ID,
-        trigger_rule=TriggerRule.ALL_DONE,
-    )
-
-    delete_bucket = GCSDeleteBucketOperator(
-        task_id="delete_bucket",
-        bucket_name=DATA_SAMPLE_GCS_BUCKET_NAME,
-        trigger_rule=TriggerRule.ALL_DONE,
-    )
-
-    (
-        # TEST SETUP
-        [create_bucket >> move_dataset_file, create_sent_dataset]
-        # TEST BODY
-        >> import_sent_dataset
-        >> create_sent_training_job
-        # TEST TEARDOWN
-        >> delete_sent_training_job
-        >> delete_sent_dataset
-        >> delete_bucket
-    )
-
-    from tests.system.utils.watcher import watcher
-
-    # This test needs watcher in order to properly mark success/failure
-    # when "tearDown" task with trigger rule is part of the DAG
-    list(dag.tasks) >> watcher()
-
-from tests.system.utils import get_test_run  # noqa: E402
-
-# Needed to run the example DAG with pytest (see: 
tests/system/README.md#run_via_pytest)
-test_run = get_test_run(dag)
diff --git 
a/tests/system/providers/google/cloud/vertex_ai/example_vertex_ai_auto_ml_text_training.py
 
b/tests/system/providers/google/cloud/vertex_ai/example_vertex_ai_auto_ml_text_training.py
deleted file mode 100644
index b91a8cd969..0000000000
--- 
a/tests/system/providers/google/cloud/vertex_ai/example_vertex_ai_auto_ml_text_training.py
+++ /dev/null
@@ -1,143 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-
-"""
-Example Airflow DAG for Google Vertex AI service testing Auto ML operations.
-"""
-
-from __future__ import annotations
-
-import os
-from datetime import datetime
-
-from google.cloud.aiplatform import schema
-from google.protobuf.struct_pb2 import Value
-
-from airflow.models.dag import DAG
-from airflow.providers.google.cloud.operators.vertex_ai.auto_ml import (
-    CreateAutoMLTextTrainingJobOperator,
-    DeleteAutoMLTrainingJobOperator,
-)
-from airflow.providers.google.cloud.operators.vertex_ai.dataset import (
-    CreateDatasetOperator,
-    DeleteDatasetOperator,
-    ImportDataOperator,
-)
-from airflow.utils.trigger_rule import TriggerRule
-
-ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID", "default")
-PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT", "default")
-DAG_ID = "vertex_ai_auto_ml_operations"
-REGION = "us-central1"
-TEXT_DISPLAY_NAME = f"auto-ml-text-{ENV_ID}"
-MODEL_DISPLAY_NAME = f"auto-ml-text-model-{ENV_ID}"
-
-RESOURCE_DATA_BUCKET = "airflow-system-tests-resources"
-
-TEXT_DATASET = {
-    "display_name": f"text-dataset-{ENV_ID}",
-    "metadata_schema_uri": schema.dataset.metadata.text,
-    "metadata": Value(string_value="text-dataset"),
-}
-TEXT_DATA_CONFIG = [
-    {
-        "import_schema_uri": 
schema.dataset.ioformat.text.single_label_classification,
-        "gcs_source": {"uris": 
[f"gs://{RESOURCE_DATA_BUCKET}/vertex-ai/datasets/text-dataset.csv"]},
-    },
-]
-
-with DAG(
-    f"{DAG_ID}_text_training_job",
-    schedule="@once",
-    start_date=datetime(2021, 1, 1),
-    catchup=False,
-    tags=["example", "vertex_ai", "auto_ml"],
-) as dag:
-    create_text_dataset = CreateDatasetOperator(
-        task_id="text_dataset",
-        dataset=TEXT_DATASET,
-        region=REGION,
-        project_id=PROJECT_ID,
-    )
-    text_dataset_id = create_text_dataset.output["dataset_id"]
-
-    import_text_dataset = ImportDataOperator(
-        task_id="import_text_data",
-        dataset_id=text_dataset_id,
-        region=REGION,
-        project_id=PROJECT_ID,
-        import_configs=TEXT_DATA_CONFIG,
-    )
-
-    # [START how_to_cloud_vertex_ai_create_auto_ml_text_training_job_operator]
-    create_auto_ml_text_training_job = CreateAutoMLTextTrainingJobOperator(
-        task_id="auto_ml_text_task",
-        display_name=TEXT_DISPLAY_NAME,
-        prediction_type="classification",
-        multi_label=False,
-        dataset_id=text_dataset_id,
-        model_display_name=MODEL_DISPLAY_NAME,
-        training_fraction_split=0.7,
-        validation_fraction_split=0.2,
-        test_fraction_split=0.1,
-        sync=True,
-        region=REGION,
-        project_id=PROJECT_ID,
-    )
-    # [END how_to_cloud_vertex_ai_create_auto_ml_text_training_job_operator]
-
-    delete_auto_ml_text_training_job = DeleteAutoMLTrainingJobOperator(
-        task_id="delete_auto_ml_text_training_job",
-        training_pipeline_id="{{ 
task_instance.xcom_pull(task_ids='auto_ml_text_task', key='training_id') }}",
-        region=REGION,
-        project_id=PROJECT_ID,
-        trigger_rule=TriggerRule.ALL_DONE,
-    )
-
-    delete_text_dataset = DeleteDatasetOperator(
-        task_id="delete_text_dataset",
-        dataset_id=text_dataset_id,
-        region=REGION,
-        project_id=PROJECT_ID,
-        trigger_rule=TriggerRule.ALL_DONE,
-    )
-
-    (
-        # TEST SETUP
-        create_text_dataset
-        >> import_text_dataset
-        # TEST BODY
-        >> create_auto_ml_text_training_job
-        # TEST TEARDOWN
-        >> delete_auto_ml_text_training_job
-        >> delete_text_dataset
-    )
-
-    # ### Everything below this line is not part of example ###
-    # ### Just for system tests purpose ###
-    from tests.system.utils.watcher import watcher
-
-    # This test needs watcher in order to properly mark success/failure
-    # when "tearDown" task with trigger rule is part of the DAG
-    list(dag.tasks) >> watcher()
-
-from tests.system.utils import get_test_run  # noqa: E402
-
-# Needed to run the example DAG with pytest (see: 
tests/system/README.md#run_via_pytest)
-test_run = get_test_run(dag)

(airflow) branch main updated: Mark VertexAI AutoMLText deprecation (#42251)

Reply via email to