Lee-W commented on code in PR #36473:
URL: https://github.com/apache/airflow/pull/36473#discussion_r1437972926
##########
tests/system/providers/google/cloud/automl/example_automl_video_intelligence_classification.py:
##########
@@ -73,73 +72,88 @@
schedule="@once",
start_date=datetime(2021, 1, 1),
catchup=False,
- user_defined_macros={"extract_object_id": extract_object_id},
- tags=["example", "automl", "video-clss"],
+ tags=["example", "automl", "video", "classification"],
) as dag:
create_bucket = GCSCreateBucketOperator(
task_id="create_bucket",
- bucket_name=DATA_SAMPLE_GCS_BUCKET_NAME,
+ bucket_name=VIDEO_GCS_BUCKET_NAME,
storage_class="REGIONAL",
- location=GCP_AUTOML_LOCATION,
+ location=REGION,
)
move_dataset_file = GCSSynchronizeBucketsOperator(
task_id="move_dataset_to_bucket",
source_bucket=RESOURCE_DATA_BUCKET,
source_object="automl/datasets/video",
- destination_bucket=DATA_SAMPLE_GCS_BUCKET_NAME,
+ destination_bucket=VIDEO_GCS_BUCKET_NAME,
destination_object="automl",
recursive=True,
)
- create_dataset = AutoMLCreateDatasetOperator(
- task_id="create_dataset", dataset=DATASET, location=GCP_AUTOML_LOCATION
+ create_video_dataset = CreateDatasetOperator(
+ task_id="video_dataset",
+ dataset=VIDEO_DATASET,
+ region=REGION,
+ project_id=PROJECT_ID,
)
-
- dataset_id = cast(str, XComArg(create_dataset, key="dataset_id"))
- MODEL["dataset_id"] = dataset_id
-
- import_dataset = AutoMLImportDataOperator(
- task_id="import_dataset",
- dataset_id=dataset_id,
- location=GCP_AUTOML_LOCATION,
- input_config=IMPORT_INPUT_CONFIG,
+ video_dataset_id = create_video_dataset.output["dataset_id"]
+
+ import_video_dataset = ImportDataOperator(
+ task_id="import_video_data",
+ dataset_id=video_dataset_id,
+ region=REGION,
+ project_id=PROJECT_ID,
+ import_configs=VIDEO_DATA_CONFIG,
)
- MODEL["dataset_id"] = dataset_id
-
- create_model = AutoMLTrainModelOperator(task_id="create_model",
model=MODEL, location=GCP_AUTOML_LOCATION)
- model_id = cast(str, XComArg(create_model, key="model_id"))
-
- delete_model = AutoMLDeleteModelOperator(
- task_id="delete_model",
- model_id=model_id,
- location=GCP_AUTOML_LOCATION,
- project_id=GCP_PROJECT_ID,
+ # [START how_to_cloud_create_video_classification_training_job_operator]
+ create_auto_ml_video_training_job = CreateAutoMLVideoTrainingJobOperator(
+ task_id="auto_ml_video_task",
+ display_name=VIDEO_DISPLAY_NAME,
+ prediction_type="classification",
+ model_type="CLOUD",
+ dataset_id=video_dataset_id,
+ model_display_name=MODEL_DISPLAY_NAME,
+ region=REGION,
+ project_id=PROJECT_ID,
+ )
+ # [END how_to_cloud_create_video_classification_training_job_operator]
+
+ delete_auto_ml_video_training_job = DeleteAutoMLTrainingJobOperator(
Review Comment:
Should we make it part of the doc just like
`how_to_cloud_create_video_classification_training_job_operator`?
##########
tests/system/providers/google/cloud/automl/example_automl_video_intelligence_tracking.py:
##########
@@ -22,49 +22,49 @@
import os
from datetime import datetime
-from typing import cast
+
+from google.cloud.aiplatform import schema
+from google.protobuf.struct_pb2 import Value
from airflow.models.dag import DAG
-from airflow.models.xcom_arg import XComArg
-from airflow.providers.google.cloud.hooks.automl import CloudAutoMLHook
-from airflow.providers.google.cloud.operators.automl import (
- AutoMLCreateDatasetOperator,
- AutoMLDeleteDatasetOperator,
- AutoMLDeleteModelOperator,
- AutoMLImportDataOperator,
- AutoMLTrainModelOperator,
-)
from airflow.providers.google.cloud.operators.gcs import (
GCSCreateBucketOperator,
GCSDeleteBucketOperator,
GCSSynchronizeBucketsOperator,
)
+from airflow.providers.google.cloud.operators.vertex_ai.auto_ml import (
+ CreateAutoMLVideoTrainingJobOperator,
+ DeleteAutoMLTrainingJobOperator,
+)
+from airflow.providers.google.cloud.operators.vertex_ai.dataset import (
+ CreateDatasetOperator,
+ DeleteDatasetOperator,
+ ImportDataOperator,
+)
from airflow.utils.trigger_rule import TriggerRule
ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID", "default")
+PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT", "default")
DAG_ID = "example_automl_video_track"
-GCP_PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT", "default")
-GCP_AUTOML_LOCATION = "us-central1"
-DATA_SAMPLE_GCS_BUCKET_NAME = f"bucket_{DAG_ID}_{ENV_ID}".replace("_", "-")
-RESOURCE_DATA_BUCKET = "airflow-system-tests-resources"
+REGION = "us-central1"
+VIDEO_DISPLAY_NAME = f"auto-ml-video-tracking-{ENV_ID}"
+MODEL_DISPLAY_NAME = f"auto-ml-video-tracking-model-{ENV_ID}"
-MODEL_NAME = "video_track_test_model"
-MODEL = {
- "display_name": MODEL_NAME,
- "video_object_tracking_model_metadata": {},
-}
+RESOURCE_DATA_BUCKET = "airflow-system-tests-resources"
+VIDEO_GCS_BUCKET_NAME = f"bucket_video_tracking_{ENV_ID}".replace("_", "-")
-DATASET_NAME = f"ds_video_track_{ENV_ID}".replace("-", "_")
-DATASET = {
- "display_name": DATASET_NAME,
- "video_object_tracking_dataset_metadata": {},
+VIDEO_DATASET = {
+ "display_name": f"video-dataset-{ENV_ID}",
+ "metadata_schema_uri": schema.dataset.metadata.video,
+ "metadata": Value(string_value="video-dataset"),
}
-
-AUTOML_DATASET_BUCKET =
f"gs://{DATA_SAMPLE_GCS_BUCKET_NAME}/automl/video_tracking.csv"
-IMPORT_INPUT_CONFIG = {"gcs_source": {"input_uris": [AUTOML_DATASET_BUCKET]}}
-
-
-extract_object_id = CloudAutoMLHook.extract_object_id
+VIDEO_DATA_CONFIG = [
+ {
+ "import_schema_uri": schema.dataset.ioformat.video.object_tracking,
+ # "gcs_source": {"uris":
["gs://cloud-samples-data/ai-platform-unified/video/traffic/traffic_videos_labels.csv"]},
Review Comment:
May I know what is this comment for? Should we remove it?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]