bhirsz commented on code in PR #26915:
URL: https://github.com/apache/airflow/pull/26915#discussion_r989701818
##########
tests/system/providers/google/cloud/ml_engine/example_mlengine.py:
##########
@@ -37,70 +40,74 @@
MLEngineStartBatchPredictionJobOperator,
MLEngineStartTrainingJobOperator,
)
+from airflow.providers.google.cloud.transfers.local_to_gcs import
LocalFilesystemToGCSOperator
from airflow.providers.google.cloud.utils import mlengine_operator_utils
+from airflow.utils.trigger_rule import TriggerRule
-PROJECT_ID = os.environ.get("GCP_PROJECT_ID", "example-project")
+DAG_ID = "example_gcp_mlengine"
+BASE_DIR = pathlib.Path(__file__).parent.resolve()
+PREDICT_FILE_NAME = 'predict.json'
+PATH_TO_PREDICT_FILE = BASE_DIR / PREDICT_FILE_NAME
-MODEL_NAME = os.environ.get("GCP_MLENGINE_MODEL_NAME", "model_name")
-
-SAVED_MODEL_PATH = os.environ.get("GCP_MLENGINE_SAVED_MODEL_PATH",
"gs://INVALID BUCKET NAME/saved-model/")
-JOB_DIR = os.environ.get("GCP_MLENGINE_JOB_DIR", "gs://INVALID BUCKET
NAME/keras-job-dir")
-PREDICTION_INPUT = os.environ.get(
- "GCP_MLENGINE_PREDICTION_INPUT", "gs://INVALID BUCKET
NAME/prediction_input.json"
-)
+PROJECT_ID = os.environ.get("GCP_PROJECT_ID")
+ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID")
+MODEL_NAME = os.environ.get("GCP_MLENGINE_MODEL_NAME",
f"example_mlengine_model_{ENV_ID}")
+BUCKET_NAME = os.environ.get("BUCKET_NAME",
f"example_mlengine_bucket_{ENV_ID}")
+BUCKET_PATH = os.environ.get("BUCKET_PATH", f"gs://{BUCKET_NAME}")
+JOB_DIR = os.environ.get("GCP_MLENGINE_JOB_DIR", f"{BUCKET_PATH}/job-dir")
+SAVED_MODEL_PATH = os.environ.get("GCP_MLENGINE_SAVED_MODEL_PATH",
f"{JOB_DIR}/")
+PREDICTION_INPUT = os.environ.get("GCP_MLENGINE_PREDICTION_INPUT",
f"{BUCKET_PATH}/{PREDICT_FILE_NAME}")
PREDICTION_OUTPUT = os.environ.get(
- "GCP_MLENGINE_PREDICTION_OUTPUT", "gs://INVALID BUCKET
NAME/prediction_output"
+ "GCP_MLENGINE_PREDICTION_OUTPUT", "gs://INVALID BUCKET
NAME/prediction_output/"
+)
+TRAINER_URI = os.environ.get(
+ "GCP_MLENGINE_TRAINER_URI",
+ "gs://system-tests-resources/example_gcp_mlengine/trainer-0.1.tar.gz",
+)
+TRAINER_PY_MODULE = os.environ.get(
+ "GCP_MLENGINE_TRAINER_TRAINER_PY_MODULE",
+ "trainer.task",
)
-TRAINER_URI = os.environ.get("GCP_MLENGINE_TRAINER_URI", "gs://INVALID BUCKET
NAME/trainer.tar.gz")
-TRAINER_PY_MODULE = os.environ.get("GCP_MLENGINE_TRAINER_TRAINER_PY_MODULE",
"trainer.task")
+SUMMARY_TMP = os.environ.get("GCP_MLENGINE_DATAFLOW_TMP",
f"{BUCKET_PATH}/tmp/")
+SUMMARY_STAGING = os.environ.get("GCP_MLENGINE_DATAFLOW_STAGING",
f"{BUCKET_PATH}/staging/")
-SUMMARY_TMP = os.environ.get("GCP_MLENGINE_DATAFLOW_TMP", "gs://INVALID BUCKET
NAME/tmp/")
-SUMMARY_STAGING = os.environ.get("GCP_MLENGINE_DATAFLOW_STAGING",
"gs://INVALID BUCKET NAME/staging/")
+
+def generate_model_predict_input_data() -> list[int]:
+ return [i for i in range(0, 201, 10)]
with models.DAG(
- "example_gcp_mlengine",
+ dag_id=DAG_ID,
+ schedule="@once",
start_date=datetime(2021, 1, 1),
catchup=False,
- tags=['example'],
+ tags=['example', 'ml_engine'],
params={"model_name": MODEL_NAME},
) as dag:
- hyperparams: dict[str, Any] = {
- 'goal': 'MAXIMIZE',
- 'hyperparameterMetricTag': 'metric1',
- 'maxTrials': 30,
- 'maxParallelTrials': 1,
- 'enableTrialEarlyStopping': True,
- 'params': [],
- }
-
- hyperparams['params'].append(
- {
- 'parameterName': 'hidden1',
- 'type': 'INTEGER',
- 'minValue': 40,
- 'maxValue': 400,
- 'scaleType': 'UNIT_LINEAR_SCALE',
- }
+ create_bucket = GCSCreateBucketOperator(
+ task_id="create-bucket",
+ bucket_name=BUCKET_NAME,
)
- hyperparams['params'].append(
- {'parameterName': 'numRnnCells', 'type': 'DISCRETE', 'discreteValues':
[1, 2, 3, 4]}
+ def write_predict_file(path_to_file: str):
+ predict_data = generate_model_predict_input_data()
+ with open(path_to_file, 'w') as file:
+ for p in predict_data:
+ file.write(f'{{"input_layer": [{p}]}}\n')
+
+ write_data = PythonOperator(
+ task_id="write-predict-data-file",
+ python_callable=write_predict_file,
+ op_args=(PATH_TO_PREDICT_FILE,),
)
Review Comment:
The code is fine but with the introduction of ``@task`` it's recommended
more:
https://airflow.apache.org/docs/apache-airflow/stable/howto/operator/python.html
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]