This is an automated email from the ASF dual-hosted git repository.
shahar pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new d7343816b3 Mark VertexAI AutoMLText deprecation (#42251)
d7343816b3 is described below
commit d7343816b3406470dd7f963e1959171c32ef956f
Author: olegkachur-e <[email protected]>
AuthorDate: Thu Sep 19 21:27:22 2024 +0200
Mark VertexAI AutoMLText deprecation (#42251)
- Label deprecated items with deprecated decorator.
- Refactor vertex_ai AutoMLHook.create_auto_ml_text_training_job, to
avoid calling get_auto_ml_text_training_job, to not trigger mypy
errors after applying @deprecate on hook method.
- Update docs.
- Delete irrelevant system tests.
Co-authored-by: Oleg Kachur <[email protected]>
---
.../google/cloud/hooks/vertex_ai/auto_ml.py | 36 ++++-
.../google/cloud/operators/vertex_ai/auto_ml.py | 16 +-
.../operators/cloud/automl.rst | 13 +-
.../operators/cloud/vertex_ai.rst | 11 +-
tests/always/test_project_structure.py | 1 +
.../google/cloud/operators/test_vertex_ai.py | 58 +++----
.../example_automl_nl_text_classification.py | 176 --------------------
.../automl/example_automl_nl_text_extraction.py | 176 --------------------
.../automl/example_automl_nl_text_sentiment.py | 178 ---------------------
.../example_vertex_ai_auto_ml_text_training.py | 143 -----------------
10 files changed, 83 insertions(+), 725 deletions(-)
diff --git a/airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py
b/airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py
index b1ad7d1a07..3a03502250 100644
--- a/airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py
+++ b/airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py
@@ -36,6 +36,7 @@ from google.cloud.aiplatform import (
from google.cloud.aiplatform_v1 import JobServiceClient, PipelineServiceClient
from airflow.exceptions import AirflowException,
AirflowProviderDeprecationWarning
+from airflow.providers.google.common.deprecated import deprecated
from airflow.providers.google.common.hooks.base_google import GoogleBaseHook
if TYPE_CHECKING:
@@ -185,6 +186,11 @@ class AutoMLHook(GoogleBaseHook):
model_encryption_spec_key_name=model_encryption_spec_key_name,
)
+ @deprecated(
+ planned_removal_date="June 15, 2025",
+ category=AirflowProviderDeprecationWarning,
+ reason="Deprecation of AutoMLText API",
+ )
def get_auto_ml_text_training_job(
self,
display_name: str,
@@ -197,7 +203,12 @@ class AutoMLHook(GoogleBaseHook):
training_encryption_spec_key_name: str | None = None,
model_encryption_spec_key_name: str | None = None,
) -> AutoMLTextTrainingJob:
- """Return AutoMLTextTrainingJob object."""
+ """
+ Return AutoMLTextTrainingJob object.
+
+ WARNING: Text creation API is deprecated since September 15, 2024
+
(https://cloud.google.com/vertex-ai/docs/tutorials/text-classification-automl/overview).
+ """
return AutoMLTextTrainingJob(
display_name=display_name,
prediction_type=prediction_type,
@@ -980,6 +991,11 @@ class AutoMLHook(GoogleBaseHook):
return model, training_id
@GoogleBaseHook.fallback_to_default_project_id
+ @deprecated(
+ planned_removal_date="September 15, 2025",
+ category=AirflowProviderDeprecationWarning,
+ reason="Deprecation of AutoMLText API",
+ )
def create_auto_ml_text_training_job(
self,
project_id: str,
@@ -1009,6 +1025,9 @@ class AutoMLHook(GoogleBaseHook):
"""
Create an AutoML Text Training Job.
+ WARNING: Text creation API is deprecated since September 15, 2024
+
(https://cloud.google.com/vertex-ai/docs/tutorials/text-classification-automl/overview).
+
:param project_id: Required. Project to run training in.
:param region: Required. Location to run training in.
:param display_name: Required. The user-defined name of this
TrainingPipeline.
@@ -1101,13 +1120,14 @@ class AutoMLHook(GoogleBaseHook):
concurrent Future and any downstream object will be immediately
returned and synced when the
Future has completed.
"""
- self._job = self.get_auto_ml_text_training_job(
- project=project_id,
- location=region,
+ self._job = AutoMLTextTrainingJob(
display_name=display_name,
prediction_type=prediction_type,
multi_label=multi_label,
sentiment_max=sentiment_max,
+ project=project_id,
+ location=region,
+ credentials=self.get_credentials(),
labels=labels,
training_encryption_spec_key_name=training_encryption_spec_key_name,
model_encryption_spec_key_name=model_encryption_spec_key_name,
@@ -1117,13 +1137,13 @@ class AutoMLHook(GoogleBaseHook):
raise AirflowException("AutoMLTextTrainingJob was not created")
model = self._job.run(
- dataset=dataset,
- training_fraction_split=training_fraction_split,
- validation_fraction_split=validation_fraction_split,
+ dataset=dataset, # type: ignore[arg-type]
+ training_fraction_split=training_fraction_split, # type:
ignore[call-arg]
+ validation_fraction_split=validation_fraction_split, # type:
ignore[call-arg]
test_fraction_split=test_fraction_split,
training_filter_split=training_filter_split,
validation_filter_split=validation_filter_split,
- test_filter_split=test_filter_split,
+ test_filter_split=test_filter_split, # type: ignore[call-arg]
model_display_name=model_display_name,
model_labels=model_labels,
sync=sync,
diff --git a/airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py
b/airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py
index 76319b892e..ec9a6784f7 100644
--- a/airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py
+++ b/airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py
@@ -455,8 +455,22 @@ class
CreateAutoMLTabularTrainingJobOperator(AutoMLTrainingJobBaseOperator):
return result
+@deprecated(
+ planned_removal_date="September 15, 2024",
+ use_instead="SupervisedFineTuningTrainOperator",
+ instructions=(
+ "Please consider using Fine Tuning over the Gemini model. "
+ "More info:
https://cloud.google.com/vertex-ai/docs/start/automl-gemini-comparison"
+ ),
+ category=AirflowProviderDeprecationWarning,
+)
class CreateAutoMLTextTrainingJobOperator(AutoMLTrainingJobBaseOperator):
- """Create Auto ML Text Training job."""
+ """
+ Create Auto ML Text Training job.
+
+ WARNING: Text creation API is deprecated since September 15, 2024
+
(https://cloud.google.com/vertex-ai/docs/tutorials/text-classification-automl/overview).
+ """
template_fields = [
"parent_model",
diff --git a/docs/apache-airflow-providers-google/operators/cloud/automl.rst
b/docs/apache-airflow-providers-google/operators/cloud/automl.rst
index 5858b3a7c4..fdedb46ea8 100644
--- a/docs/apache-airflow-providers-google/operators/cloud/automl.rst
+++ b/docs/apache-airflow-providers-google/operators/cloud/automl.rst
@@ -109,15 +109,12 @@ available on the Vertex AI platform. Please use
:class:`~airflow.providers.google.cloud.operators.vertex_ai.auto_ml.CreateAutoMLImageTrainingJobOperator`
or
:class:`~airflow.providers.google.cloud.operators.vertex_ai.auto_ml.CreateAutoMLVideoTrainingJobOperator`.
-You can find example on how to use VertexAI operators for AutoML Natural
Language classification here:
+The Vertex AutoMLText API for model training is deprecated on September 15,
2024 and the other part will be deprecated
+on June 15, 2025.
+Please consider using fine tuning with Gemini model -
+https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini-tuning.
-.. exampleinclude::
/../../tests/system/providers/google/cloud/automl/example_automl_nl_text_classification.py
- :language: python
- :dedent: 4
- :start-after: [START
howto_cloud_create_text_classification_training_job_operator]
- :end-before: [END
howto_cloud_create_text_classification_training_job_operator]
-
-Additionally, you can find example on how to use VertexAI operators for AutoML
Vision classification here:
+You can find example on how to use VertexAI operators for AutoML Vision
classification here:
.. exampleinclude::
/../../tests/system/providers/google/cloud/automl/example_automl_vision_classification.py
:language: python
diff --git a/docs/apache-airflow-providers-google/operators/cloud/vertex_ai.rst
b/docs/apache-airflow-providers-google/operators/cloud/vertex_ai.rst
index db23d4dfea..9a03ed2924 100644
--- a/docs/apache-airflow-providers-google/operators/cloud/vertex_ai.rst
+++ b/docs/apache-airflow-providers-google/operators/cloud/vertex_ai.rst
@@ -260,14 +260,11 @@ put dataset id to ``dataset_id`` parameter in operator.
How to run AutoML Text Training Job
:class:`~airflow.providers.google.cloud.operators.vertex_ai.auto_ml.CreateAutoMLTextTrainingJobOperator`
-Before start running this Job you must prepare and create ``Text`` dataset.
After that you should
-put dataset id to ``dataset_id`` parameter in operator.
+Operator is deprecated, please use
+:class:`~airflow.providers.google.cloud.operators.vertex_ai.generative_model.SupervisedFineTuningTrainOperator`
over
+the Gemini model.
+More info:
https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini-tuning#tuning-gemini
-.. exampleinclude::
/../../tests/system/providers/google/cloud/vertex_ai/example_vertex_ai_auto_ml_text_training.py
- :language: python
- :dedent: 4
- :start-after: [START
how_to_cloud_vertex_ai_create_auto_ml_text_training_job_operator]
- :end-before: [END
how_to_cloud_vertex_ai_create_auto_ml_text_training_job_operator]
How to run AutoML Video Training Job
:class:`~airflow.providers.google.cloud.operators.vertex_ai.auto_ml.CreateAutoMLVideoTrainingJobOperator`
diff --git a/tests/always/test_project_structure.py
b/tests/always/test_project_structure.py
index 6cf3329e8e..b6ef9a89f9 100644
--- a/tests/always/test_project_structure.py
+++ b/tests/always/test_project_structure.py
@@ -389,6 +389,7 @@ class
TestGoogleProviderProjectStructure(ExampleCoverageTest, AssetsCoverageTest
"airflow.providers.google.cloud.operators.bigquery.BigQueryPatchDatasetOperator",
"airflow.providers.google.cloud.operators.dataflow.DataflowCreatePythonJobOperator",
"airflow.providers.google.cloud.operators.bigquery.BigQueryExecuteQueryOperator",
+
"airflow.providers.google.cloud.operators.vertex_ai.auto_ml.CreateAutoMLTextTrainingJobOperator",
"airflow.providers.google.cloud.sensors.bigquery.BigQueryTableExistenceAsyncSensor",
"airflow.providers.google.cloud.sensors.bigquery.BigQueryTableExistencePartitionAsyncSensor",
"airflow.providers.google.cloud.sensors.cloud_composer.CloudComposerEnvironmentSensor",
diff --git a/tests/providers/google/cloud/operators/test_vertex_ai.py
b/tests/providers/google/cloud/operators/test_vertex_ai.py
index ef0ca53608..25deb188aa 100644
--- a/tests/providers/google/cloud/operators/test_vertex_ai.py
+++ b/tests/providers/google/cloud/operators/test_vertex_ai.py
@@ -1833,20 +1833,21 @@ class TestVertexAICreateAutoMLTextTrainingJobOperator:
@mock.patch(VERTEX_AI_PATH.format("auto_ml.AutoMLHook"))
def test_execute(self, mock_hook, mock_dataset):
mock_hook.return_value.create_auto_ml_text_training_job.return_value =
(None, "training_id")
- op = CreateAutoMLTextTrainingJobOperator(
- task_id=TASK_ID,
- gcp_conn_id=GCP_CONN_ID,
- impersonation_chain=IMPERSONATION_CHAIN,
- display_name=DISPLAY_NAME,
- dataset_id=TEST_DATASET_ID,
- prediction_type=None,
- multi_label=False,
- sentiment_max=10,
- sync=True,
- region=GCP_LOCATION,
- project_id=GCP_PROJECT,
- parent_model=TEST_PARENT_MODEL,
- )
+ with pytest.warns(AirflowProviderDeprecationWarning):
+ op = CreateAutoMLTextTrainingJobOperator(
+ task_id=TASK_ID,
+ gcp_conn_id=GCP_CONN_ID,
+ impersonation_chain=IMPERSONATION_CHAIN,
+ display_name=DISPLAY_NAME,
+ dataset_id=TEST_DATASET_ID,
+ prediction_type=None,
+ multi_label=False,
+ sentiment_max=10,
+ sync=True,
+ region=GCP_LOCATION,
+ project_id=GCP_PROJECT,
+ parent_model=TEST_PARENT_MODEL,
+ )
op.execute(context={"ti": mock.MagicMock()})
mock_hook.assert_called_once_with(gcp_conn_id=GCP_CONN_ID,
impersonation_chain=IMPERSONATION_CHAIN)
mock_dataset.assert_called_once_with(dataset_name=TEST_DATASET_ID)
@@ -1880,20 +1881,21 @@ class TestVertexAICreateAutoMLTextTrainingJobOperator:
@mock.patch(VERTEX_AI_PATH.format("auto_ml.AutoMLHook"))
def test_execute__parent_model_version_index_is_removed(self, mock_hook,
mock_dataset):
mock_hook.return_value.create_auto_ml_text_training_job.return_value =
(None, "training_id")
- op = CreateAutoMLTextTrainingJobOperator(
- task_id=TASK_ID,
- gcp_conn_id=GCP_CONN_ID,
- impersonation_chain=IMPERSONATION_CHAIN,
- display_name=DISPLAY_NAME,
- dataset_id=TEST_DATASET_ID,
- prediction_type=None,
- multi_label=False,
- sentiment_max=10,
- sync=True,
- region=GCP_LOCATION,
- project_id=GCP_PROJECT,
- parent_model=VERSIONED_TEST_PARENT_MODEL,
- )
+ with pytest.warns(AirflowProviderDeprecationWarning):
+ op = CreateAutoMLTextTrainingJobOperator(
+ task_id=TASK_ID,
+ gcp_conn_id=GCP_CONN_ID,
+ impersonation_chain=IMPERSONATION_CHAIN,
+ display_name=DISPLAY_NAME,
+ dataset_id=TEST_DATASET_ID,
+ prediction_type=None,
+ multi_label=False,
+ sentiment_max=10,
+ sync=True,
+ region=GCP_LOCATION,
+ project_id=GCP_PROJECT,
+ parent_model=VERSIONED_TEST_PARENT_MODEL,
+ )
op.execute(context={"ti": mock.MagicMock()})
mock_hook.return_value.create_auto_ml_text_training_job.assert_called_once_with(
project_id=GCP_PROJECT,
diff --git
a/tests/system/providers/google/cloud/automl/example_automl_nl_text_classification.py
b/tests/system/providers/google/cloud/automl/example_automl_nl_text_classification.py
deleted file mode 100644
index 9ef04db818..0000000000
---
a/tests/system/providers/google/cloud/automl/example_automl_nl_text_classification.py
+++ /dev/null
@@ -1,176 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Example Airflow DAG that uses Google AutoML services.
-"""
-
-from __future__ import annotations
-
-import os
-from datetime import datetime
-
-from google.cloud.aiplatform import schema
-from google.protobuf.struct_pb2 import Value
-
-from airflow.models.dag import DAG
-from airflow.providers.google.cloud.hooks.automl import CloudAutoMLHook
-from airflow.providers.google.cloud.operators.gcs import (
- GCSCreateBucketOperator,
- GCSDeleteBucketOperator,
- GCSSynchronizeBucketsOperator,
-)
-from airflow.providers.google.cloud.operators.vertex_ai.auto_ml import (
- CreateAutoMLTextTrainingJobOperator,
- DeleteAutoMLTrainingJobOperator,
-)
-from airflow.providers.google.cloud.operators.vertex_ai.dataset import (
- CreateDatasetOperator,
- DeleteDatasetOperator,
- ImportDataOperator,
-)
-from airflow.utils.trigger_rule import TriggerRule
-
-ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID", "default")
-GCP_PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT", "default")
-DAG_ID = "example_automl_text_cls"
-
-GCP_AUTOML_LOCATION = "us-central1"
-DATA_SAMPLE_GCS_BUCKET_NAME = f"bucket_{DAG_ID}_{ENV_ID}".replace("_", "-")
-RESOURCE_DATA_BUCKET = "airflow-system-tests-resources"
-
-TEXT_CLSS_DISPLAY_NAME = f"{DAG_ID}-{ENV_ID}".replace("_", "-")
-AUTOML_DATASET_BUCKET =
f"gs://{DATA_SAMPLE_GCS_BUCKET_NAME}/automl/classification.csv"
-
-MODEL_NAME = f"{DAG_ID}-{ENV_ID}".replace("_", "-")
-
-DATASET_NAME = f"ds_clss_{ENV_ID}".replace("-", "_")
-DATASET = {
- "display_name": DATASET_NAME,
- "metadata_schema_uri": schema.dataset.metadata.text,
- "metadata": Value(string_value="clss-dataset"),
-}
-
-DATA_CONFIG = [
- {
- "import_schema_uri":
schema.dataset.ioformat.text.single_label_classification,
- "gcs_source": {"uris": [AUTOML_DATASET_BUCKET]},
- },
-]
-extract_object_id = CloudAutoMLHook.extract_object_id
-
-# Example DAG for AutoML Natural Language Text Classification
-with DAG(
- DAG_ID,
- schedule="@once",
- start_date=datetime(2021, 1, 1),
- catchup=False,
- tags=["example", "automl", "text-classification"],
-) as dag:
- create_bucket = GCSCreateBucketOperator(
- task_id="create_bucket",
- bucket_name=DATA_SAMPLE_GCS_BUCKET_NAME,
- storage_class="REGIONAL",
- location=GCP_AUTOML_LOCATION,
- )
-
- move_dataset_file = GCSSynchronizeBucketsOperator(
- task_id="move_dataset_to_bucket",
- source_bucket=RESOURCE_DATA_BUCKET,
- source_object="vertex-ai/automl/datasets/text",
- destination_bucket=DATA_SAMPLE_GCS_BUCKET_NAME,
- destination_object="automl",
- recursive=True,
- )
-
- create_clss_dataset = CreateDatasetOperator(
- task_id="create_clss_dataset",
- dataset=DATASET,
- region=GCP_AUTOML_LOCATION,
- project_id=GCP_PROJECT_ID,
- )
- clss_dataset_id = create_clss_dataset.output["dataset_id"]
-
- import_clss_dataset = ImportDataOperator(
- task_id="import_clss_data",
- dataset_id=clss_dataset_id,
- region=GCP_AUTOML_LOCATION,
- project_id=GCP_PROJECT_ID,
- import_configs=DATA_CONFIG,
- )
-
- # [START howto_cloud_create_text_classification_training_job_operator]
- create_clss_training_job = CreateAutoMLTextTrainingJobOperator(
- task_id="create_clss_training_job",
- display_name=TEXT_CLSS_DISPLAY_NAME,
- prediction_type="classification",
- multi_label=False,
- dataset_id=clss_dataset_id,
- model_display_name=MODEL_NAME,
- training_fraction_split=0.7,
- validation_fraction_split=0.2,
- test_fraction_split=0.1,
- sync=True,
- region=GCP_AUTOML_LOCATION,
- project_id=GCP_PROJECT_ID,
- )
- # [END howto_cloud_create_text_classification_training_job_operator]
-
- delete_clss_training_job = DeleteAutoMLTrainingJobOperator(
- task_id="delete_clss_training_job",
- training_pipeline_id=create_clss_training_job.output["training_id"],
- region=GCP_AUTOML_LOCATION,
- project_id=GCP_PROJECT_ID,
- trigger_rule=TriggerRule.ALL_DONE,
- )
-
- delete_clss_dataset = DeleteDatasetOperator(
- task_id="delete_clss_dataset",
- dataset_id=clss_dataset_id,
- region=GCP_AUTOML_LOCATION,
- project_id=GCP_PROJECT_ID,
- trigger_rule=TriggerRule.ALL_DONE,
- )
-
- delete_bucket = GCSDeleteBucketOperator(
- task_id="delete_bucket",
- bucket_name=DATA_SAMPLE_GCS_BUCKET_NAME,
- trigger_rule=TriggerRule.ALL_DONE,
- )
-
- (
- # TEST SETUP
- [create_bucket >> move_dataset_file, create_clss_dataset]
- # TEST BODY
- >> import_clss_dataset
- >> create_clss_training_job
- # TEST TEARDOWN
- >> delete_clss_training_job
- >> delete_clss_dataset
- >> delete_bucket
- )
-
- from tests.system.utils.watcher import watcher
-
- # This test needs watcher in order to properly mark success/failure
- # when "tearDown" task with trigger rule is part of the DAG
- list(dag.tasks) >> watcher()
-
-from tests.system.utils import get_test_run # noqa: E402
-
-# Needed to run the example DAG with pytest (see:
tests/system/README.md#run_via_pytest)
-test_run = get_test_run(dag)
diff --git
a/tests/system/providers/google/cloud/automl/example_automl_nl_text_extraction.py
b/tests/system/providers/google/cloud/automl/example_automl_nl_text_extraction.py
deleted file mode 100644
index 8f8564f62c..0000000000
---
a/tests/system/providers/google/cloud/automl/example_automl_nl_text_extraction.py
+++ /dev/null
@@ -1,176 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Example Airflow DAG that uses Google AutoML services.
-"""
-
-from __future__ import annotations
-
-import os
-from datetime import datetime
-
-from google.cloud.aiplatform import schema
-from google.protobuf.struct_pb2 import Value
-
-from airflow.models.dag import DAG
-from airflow.providers.google.cloud.hooks.automl import CloudAutoMLHook
-from airflow.providers.google.cloud.operators.gcs import (
- GCSCreateBucketOperator,
- GCSDeleteBucketOperator,
- GCSSynchronizeBucketsOperator,
-)
-from airflow.providers.google.cloud.operators.vertex_ai.auto_ml import (
- CreateAutoMLTextTrainingJobOperator,
- DeleteAutoMLTrainingJobOperator,
-)
-from airflow.providers.google.cloud.operators.vertex_ai.dataset import (
- CreateDatasetOperator,
- DeleteDatasetOperator,
- ImportDataOperator,
-)
-from airflow.utils.trigger_rule import TriggerRule
-
-ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID", "default")
-GCP_PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT", "default")
-DAG_ID = "example_automl_text_extr"
-
-GCP_AUTOML_LOCATION = "us-central1"
-RESOURCE_DATA_BUCKET = "airflow-system-tests-resources"
-
-DATA_SAMPLE_GCS_BUCKET_NAME = f"bucket_{DAG_ID}_{ENV_ID}".replace("_", "-")
-TEXT_EXTR_DISPLAY_NAME = f"{DAG_ID}-{ENV_ID}".replace("_", "-")
-AUTOML_DATASET_BUCKET =
f"gs://{DATA_SAMPLE_GCS_BUCKET_NAME}/automl/extraction.jsonl"
-
-MODEL_NAME = f"{DAG_ID}-{ENV_ID}".replace("_", "-")
-DATASET_NAME = f"ds_clss_{ENV_ID}".replace("-", "_")
-DATASET = {
- "display_name": DATASET_NAME,
- "metadata_schema_uri": schema.dataset.metadata.text,
- "metadata": Value(string_value="extr-dataset"),
-}
-DATA_CONFIG = [
- {
- "import_schema_uri": schema.dataset.ioformat.text.extraction,
- "gcs_source": {"uris": [AUTOML_DATASET_BUCKET]},
- },
-]
-
-extract_object_id = CloudAutoMLHook.extract_object_id
-
-# Example DAG for AutoML Natural Language Entities Extraction
-with DAG(
- DAG_ID,
- schedule="@once", # Override to match your needs
- start_date=datetime(2021, 1, 1),
- catchup=False,
- user_defined_macros={"extract_object_id": extract_object_id},
- tags=["example", "automl", "text-extraction"],
-) as dag:
- create_bucket = GCSCreateBucketOperator(
- task_id="create_bucket",
- bucket_name=DATA_SAMPLE_GCS_BUCKET_NAME,
- storage_class="REGIONAL",
- location=GCP_AUTOML_LOCATION,
- )
-
- move_dataset_file = GCSSynchronizeBucketsOperator(
- task_id="move_dataset_to_bucket",
- source_bucket=RESOURCE_DATA_BUCKET,
- source_object="vertex-ai/automl/datasets/text",
- destination_bucket=DATA_SAMPLE_GCS_BUCKET_NAME,
- destination_object="automl",
- recursive=True,
- )
-
- create_extr_dataset = CreateDatasetOperator(
- task_id="create_extr_dataset",
- dataset=DATASET,
- region=GCP_AUTOML_LOCATION,
- project_id=GCP_PROJECT_ID,
- )
- extr_dataset_id = create_extr_dataset.output["dataset_id"]
-
- import_extr_dataset = ImportDataOperator(
- task_id="import_extr_data",
- dataset_id=extr_dataset_id,
- region=GCP_AUTOML_LOCATION,
- project_id=GCP_PROJECT_ID,
- import_configs=DATA_CONFIG,
- )
-
- # [START howto_cloud_create_text_extraction_training_job_operator]
- create_extr_training_job = CreateAutoMLTextTrainingJobOperator(
- task_id="create_extr_training_job",
- display_name=TEXT_EXTR_DISPLAY_NAME,
- prediction_type="extraction",
- multi_label=False,
- dataset_id=extr_dataset_id,
- model_display_name=MODEL_NAME,
- training_fraction_split=0.8,
- validation_fraction_split=0.1,
- test_fraction_split=0.1,
- sync=True,
- region=GCP_AUTOML_LOCATION,
- project_id=GCP_PROJECT_ID,
- )
- # [END howto_cloud_create_text_extraction_training_job_operator]
-
- delete_extr_training_job = DeleteAutoMLTrainingJobOperator(
- task_id="delete_extr_training_job",
- training_pipeline_id=create_extr_training_job.output["training_id"],
- region=GCP_AUTOML_LOCATION,
- project_id=GCP_PROJECT_ID,
- trigger_rule=TriggerRule.ALL_DONE,
- )
-
- delete_extr_dataset = DeleteDatasetOperator(
- task_id="delete_extr_dataset",
- dataset_id=extr_dataset_id,
- region=GCP_AUTOML_LOCATION,
- project_id=GCP_PROJECT_ID,
- trigger_rule=TriggerRule.ALL_DONE,
- )
-
- delete_bucket = GCSDeleteBucketOperator(
- task_id="delete_bucket",
- bucket_name=DATA_SAMPLE_GCS_BUCKET_NAME,
- trigger_rule=TriggerRule.ALL_DONE,
- )
-
- (
- # TEST SETUP
- [create_bucket >> move_dataset_file, create_extr_dataset]
- # TEST BODY
- >> import_extr_dataset
- >> create_extr_training_job
- # TEST TEARDOWN
- >> delete_extr_training_job
- >> delete_extr_dataset
- >> delete_bucket
- )
-
- from tests.system.utils.watcher import watcher
-
- # This test needs watcher in order to properly mark success/failure
- # when "tearDown" task with trigger rule is part of the DAG
- list(dag.tasks) >> watcher()
-
-from tests.system.utils import get_test_run # noqa: E402
-
-# Needed to run the example DAG with pytest (see:
tests/system/README.md#run_via_pytest)
-test_run = get_test_run(dag)
diff --git
a/tests/system/providers/google/cloud/automl/example_automl_nl_text_sentiment.py
b/tests/system/providers/google/cloud/automl/example_automl_nl_text_sentiment.py
deleted file mode 100644
index 94f349c6c3..0000000000
---
a/tests/system/providers/google/cloud/automl/example_automl_nl_text_sentiment.py
+++ /dev/null
@@ -1,178 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Example Airflow DAG that uses Google AutoML services.
-"""
-
-from __future__ import annotations
-
-import os
-from datetime import datetime
-
-from google.cloud.aiplatform import schema
-from google.protobuf.struct_pb2 import Value
-
-from airflow.models.dag import DAG
-from airflow.providers.google.cloud.hooks.automl import CloudAutoMLHook
-from airflow.providers.google.cloud.operators.gcs import (
- GCSCreateBucketOperator,
- GCSDeleteBucketOperator,
- GCSSynchronizeBucketsOperator,
-)
-from airflow.providers.google.cloud.operators.vertex_ai.auto_ml import (
- CreateAutoMLTextTrainingJobOperator,
- DeleteAutoMLTrainingJobOperator,
-)
-from airflow.providers.google.cloud.operators.vertex_ai.dataset import (
- CreateDatasetOperator,
- DeleteDatasetOperator,
- ImportDataOperator,
-)
-from airflow.utils.trigger_rule import TriggerRule
-
-ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID", "default")
-GCP_PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT", "default")
-DAG_ID = "example_automl_text_sent"
-GCP_AUTOML_LOCATION = "us-central1"
-DATA_SAMPLE_GCS_BUCKET_NAME = f"bucket_{DAG_ID}_{ENV_ID}".replace("_", "-")
-RESOURCE_DATA_BUCKET = "airflow-system-tests-resources"
-
-TEXT_SENT_DISPLAY_NAME = f"{DAG_ID}-{ENV_ID}".replace("_", "-")
-AUTOML_DATASET_BUCKET =
f"gs://{DATA_SAMPLE_GCS_BUCKET_NAME}/automl/sentiment.csv"
-
-MODEL_NAME = f"{DAG_ID}-{ENV_ID}".replace("_", "-")
-
-DATASET_NAME = f"ds_sent_{ENV_ID}".replace("-", "_")
-DATASET = {
- "display_name": DATASET_NAME,
- "metadata_schema_uri": schema.dataset.metadata.text,
- "metadata": Value(string_value="sent-dataset"),
-}
-
-DATA_CONFIG = [
- {
- "import_schema_uri": schema.dataset.ioformat.text.sentiment,
- "gcs_source": {"uris": [AUTOML_DATASET_BUCKET]},
- },
-]
-
-extract_object_id = CloudAutoMLHook.extract_object_id
-
-# Example DAG for AutoML Natural Language Text Sentiment
-with DAG(
- DAG_ID,
- schedule="@once",
- start_date=datetime(2021, 1, 1),
- catchup=False,
- user_defined_macros={"extract_object_id": extract_object_id},
- tags=["example", "automl", "text-sentiment"],
-) as dag:
- create_bucket = GCSCreateBucketOperator(
- task_id="create_bucket",
- bucket_name=DATA_SAMPLE_GCS_BUCKET_NAME,
- storage_class="REGIONAL",
- location=GCP_AUTOML_LOCATION,
- )
-
- move_dataset_file = GCSSynchronizeBucketsOperator(
- task_id="move_dataset_to_bucket",
- source_bucket=RESOURCE_DATA_BUCKET,
- source_object="vertex-ai/automl/datasets/text",
- destination_bucket=DATA_SAMPLE_GCS_BUCKET_NAME,
- destination_object="automl",
- recursive=True,
- )
-
- create_sent_dataset = CreateDatasetOperator(
- task_id="create_sent_dataset",
- dataset=DATASET,
- region=GCP_AUTOML_LOCATION,
- project_id=GCP_PROJECT_ID,
- )
- sent_dataset_id = create_sent_dataset.output["dataset_id"]
-
- import_sent_dataset = ImportDataOperator(
- task_id="import_sent_data",
- dataset_id=sent_dataset_id,
- region=GCP_AUTOML_LOCATION,
- project_id=GCP_PROJECT_ID,
- import_configs=DATA_CONFIG,
- )
-
- # [START howto_cloud_create_text_sentiment_training_job_operator]
- create_sent_training_job = CreateAutoMLTextTrainingJobOperator(
- task_id="create_sent_training_job",
- display_name=TEXT_SENT_DISPLAY_NAME,
- prediction_type="sentiment",
- multi_label=False,
- dataset_id=sent_dataset_id,
- model_display_name=MODEL_NAME,
- training_fraction_split=0.7,
- validation_fraction_split=0.2,
- test_fraction_split=0.1,
- sentiment_max=5,
- sync=True,
- region=GCP_AUTOML_LOCATION,
- project_id=GCP_PROJECT_ID,
- )
- # [END howto_cloud_create_text_sentiment_training_job_operator]
-
- delete_sent_training_job = DeleteAutoMLTrainingJobOperator(
- task_id="delete_sent_training_job",
- training_pipeline_id=create_sent_training_job.output["training_id"],
- region=GCP_AUTOML_LOCATION,
- project_id=GCP_PROJECT_ID,
- trigger_rule=TriggerRule.ALL_DONE,
- )
-
- delete_sent_dataset = DeleteDatasetOperator(
- task_id="delete_sent_dataset",
- dataset_id=sent_dataset_id,
- region=GCP_AUTOML_LOCATION,
- project_id=GCP_PROJECT_ID,
- trigger_rule=TriggerRule.ALL_DONE,
- )
-
- delete_bucket = GCSDeleteBucketOperator(
- task_id="delete_bucket",
- bucket_name=DATA_SAMPLE_GCS_BUCKET_NAME,
- trigger_rule=TriggerRule.ALL_DONE,
- )
-
- (
- # TEST SETUP
- [create_bucket >> move_dataset_file, create_sent_dataset]
- # TEST BODY
- >> import_sent_dataset
- >> create_sent_training_job
- # TEST TEARDOWN
- >> delete_sent_training_job
- >> delete_sent_dataset
- >> delete_bucket
- )
-
- from tests.system.utils.watcher import watcher
-
- # This test needs watcher in order to properly mark success/failure
- # when "tearDown" task with trigger rule is part of the DAG
- list(dag.tasks) >> watcher()
-
-from tests.system.utils import get_test_run # noqa: E402
-
-# Needed to run the example DAG with pytest (see:
tests/system/README.md#run_via_pytest)
-test_run = get_test_run(dag)
diff --git
a/tests/system/providers/google/cloud/vertex_ai/example_vertex_ai_auto_ml_text_training.py
b/tests/system/providers/google/cloud/vertex_ai/example_vertex_ai_auto_ml_text_training.py
deleted file mode 100644
index b91a8cd969..0000000000
---
a/tests/system/providers/google/cloud/vertex_ai/example_vertex_ai_auto_ml_text_training.py
+++ /dev/null
@@ -1,143 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-
-"""
-Example Airflow DAG for Google Vertex AI service testing Auto ML operations.
-"""
-
-from __future__ import annotations
-
-import os
-from datetime import datetime
-
-from google.cloud.aiplatform import schema
-from google.protobuf.struct_pb2 import Value
-
-from airflow.models.dag import DAG
-from airflow.providers.google.cloud.operators.vertex_ai.auto_ml import (
- CreateAutoMLTextTrainingJobOperator,
- DeleteAutoMLTrainingJobOperator,
-)
-from airflow.providers.google.cloud.operators.vertex_ai.dataset import (
- CreateDatasetOperator,
- DeleteDatasetOperator,
- ImportDataOperator,
-)
-from airflow.utils.trigger_rule import TriggerRule
-
-ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID", "default")
-PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT", "default")
-DAG_ID = "vertex_ai_auto_ml_operations"
-REGION = "us-central1"
-TEXT_DISPLAY_NAME = f"auto-ml-text-{ENV_ID}"
-MODEL_DISPLAY_NAME = f"auto-ml-text-model-{ENV_ID}"
-
-RESOURCE_DATA_BUCKET = "airflow-system-tests-resources"
-
-TEXT_DATASET = {
- "display_name": f"text-dataset-{ENV_ID}",
- "metadata_schema_uri": schema.dataset.metadata.text,
- "metadata": Value(string_value="text-dataset"),
-}
-TEXT_DATA_CONFIG = [
- {
- "import_schema_uri":
schema.dataset.ioformat.text.single_label_classification,
- "gcs_source": {"uris":
[f"gs://{RESOURCE_DATA_BUCKET}/vertex-ai/datasets/text-dataset.csv"]},
- },
-]
-
-with DAG(
- f"{DAG_ID}_text_training_job",
- schedule="@once",
- start_date=datetime(2021, 1, 1),
- catchup=False,
- tags=["example", "vertex_ai", "auto_ml"],
-) as dag:
- create_text_dataset = CreateDatasetOperator(
- task_id="text_dataset",
- dataset=TEXT_DATASET,
- region=REGION,
- project_id=PROJECT_ID,
- )
- text_dataset_id = create_text_dataset.output["dataset_id"]
-
- import_text_dataset = ImportDataOperator(
- task_id="import_text_data",
- dataset_id=text_dataset_id,
- region=REGION,
- project_id=PROJECT_ID,
- import_configs=TEXT_DATA_CONFIG,
- )
-
- # [START how_to_cloud_vertex_ai_create_auto_ml_text_training_job_operator]
- create_auto_ml_text_training_job = CreateAutoMLTextTrainingJobOperator(
- task_id="auto_ml_text_task",
- display_name=TEXT_DISPLAY_NAME,
- prediction_type="classification",
- multi_label=False,
- dataset_id=text_dataset_id,
- model_display_name=MODEL_DISPLAY_NAME,
- training_fraction_split=0.7,
- validation_fraction_split=0.2,
- test_fraction_split=0.1,
- sync=True,
- region=REGION,
- project_id=PROJECT_ID,
- )
- # [END how_to_cloud_vertex_ai_create_auto_ml_text_training_job_operator]
-
- delete_auto_ml_text_training_job = DeleteAutoMLTrainingJobOperator(
- task_id="delete_auto_ml_text_training_job",
- training_pipeline_id="{{
task_instance.xcom_pull(task_ids='auto_ml_text_task', key='training_id') }}",
- region=REGION,
- project_id=PROJECT_ID,
- trigger_rule=TriggerRule.ALL_DONE,
- )
-
- delete_text_dataset = DeleteDatasetOperator(
- task_id="delete_text_dataset",
- dataset_id=text_dataset_id,
- region=REGION,
- project_id=PROJECT_ID,
- trigger_rule=TriggerRule.ALL_DONE,
- )
-
- (
- # TEST SETUP
- create_text_dataset
- >> import_text_dataset
- # TEST BODY
- >> create_auto_ml_text_training_job
- # TEST TEARDOWN
- >> delete_auto_ml_text_training_job
- >> delete_text_dataset
- )
-
- # ### Everything below this line is not part of example ###
- # ### Just for system tests purpose ###
- from tests.system.utils.watcher import watcher
-
- # This test needs watcher in order to properly mark success/failure
- # when "tearDown" task with trigger rule is part of the DAG
- list(dag.tasks) >> watcher()
-
-from tests.system.utils import get_test_run # noqa: E402
-
-# Needed to run the example DAG with pytest (see:
tests/system/README.md#run_via_pytest)
-test_run = get_test_run(dag)