This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new ce5cd52635 Change regular expression to exclude double quote and
newline (#39991)
ce5cd52635 is described below
commit ce5cd52635280e412eacef3a9705e7d12954cdfd
Author: Yuan Chuan Kee <[email protected]>
AuthorDate: Sat Jun 1 18:57:32 2024 +0800
Change regular expression to exclude double quote and newline (#39991)
This change tackles a change in Cloud Composer's default storage mechanism
for logs as documented in the release notes on [March 26,
2024](https://cloud.google.com/composer/docs/release-notes#March_26_2024) and
[May 11,
2024](https://cloud.google.com/composer/docs/release-notes#May_11_2024). This
causes issue for Beam operator which uses regular expression to identify the
Dataflow Job ID from the logs.
Co-authored-by: Yuan Chuan Kee <[email protected]>
---
airflow/providers/google/cloud/hooks/dataflow.py | 2 +-
tests/providers/google/cloud/hooks/test_dataflow.py | 17 +++++++++++++++++
2 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/airflow/providers/google/cloud/hooks/dataflow.py
b/airflow/providers/google/cloud/hooks/dataflow.py
index 59eee63501..ddfb019773 100644
--- a/airflow/providers/google/cloud/hooks/dataflow.py
+++ b/airflow/providers/google/cloud/hooks/dataflow.py
@@ -71,7 +71,7 @@ DEFAULT_DATAFLOW_LOCATION = "us-central1"
JOB_ID_PATTERN = re.compile(
- r"Submitted job: (?P<job_id_java>.*)|Created job with id:
\[(?P<job_id_python>.*)\]"
+ r"Submitted job: (?P<job_id_java>[^\"\n]*)|Created job with id:
\[(?P<job_id_python>[^\"\n]*)\]"
)
T = TypeVar("T", bound=Callable)
diff --git a/tests/providers/google/cloud/hooks/test_dataflow.py
b/tests/providers/google/cloud/hooks/test_dataflow.py
index 1c8f768ea3..e925dea1c8 100644
--- a/tests/providers/google/cloud/hooks/test_dataflow.py
+++ b/tests/providers/google/cloud/hooks/test_dataflow.py
@@ -1877,6 +1877,19 @@ INFO: To cancel the job using the 'gcloud' tool, run:
> gcloud dataflow jobs --project=XXX cancel --region=europe-west3 {TEST_JOB_ID}
"""
+CLOUD_COMPOSER_CLOUD_LOGGING_APACHE_BEAM_V_2_56_0_JAVA_SDK_LOG = f"""\
+WARNING - {{"message":"org.apache.beam.runners.dataflow.DataflowRunner -
Dataflow SDK version: 2.56.0",\
+"severity":"INFO"}}
+WARNING - {{"message":"org.apache.beam.runners.dataflow.DataflowRunner - To
access the Dataflow monitoring\\
+console, please navigate to
https://console.cloud.google.com/dataflow/jobs/europe-west3/{TEST_JOB_ID}?project
+\u003dXXXX","severity":"INFO"}}
+WARNING - {{"message":"org.apache.beam.runners.dataflow.DataflowRunner -
Submitted job: {TEST_JOB_ID}",\
+"severity":"INFO"}}
+WARNING - {{"message":"org.apache.beam.runners.dataflow.DataflowRunner - To
cancel the job using the \
+\u0027gcloud\u0027 tool, run:\n\u003e gcloud dataflow jobs --project\u003dXXX
cancel --region\u003deurope-\
+west3 {TEST_JOB_ID}","severity":"INFO"}}
+"""
+
APACHE_BEAM_V_2_14_0_PYTHON_SDK_LOG = f""""\
INFO:root:Completed GCS upload to
gs://test-dataflow-example/staging/start-python-job-local-5bcf3d71.\
1592286375.000962/apache_beam-2.14.0-cp37-cp37m-manylinux1_x86_64.whl in 0
seconds.
@@ -1926,6 +1939,10 @@ class TestDataflow:
[
pytest.param(APACHE_BEAM_V_2_14_0_JAVA_SDK_LOG,
id="apache-beam-2.14.0-JDK"),
pytest.param(APACHE_BEAM_V_2_22_0_JAVA_SDK_LOG,
id="apache-beam-2.22.0-JDK"),
+ pytest.param(
+ CLOUD_COMPOSER_CLOUD_LOGGING_APACHE_BEAM_V_2_56_0_JAVA_SDK_LOG,
+ id="cloud-composer-cloud-logging-apache-beam-2.56.0-JDK",
+ ),
pytest.param(APACHE_BEAM_V_2_14_0_PYTHON_SDK_LOG,
id="apache-beam-2.14.0-Python"),
pytest.param(APACHE_BEAM_V_2_22_0_PYTHON_SDK_LOG,
id="apache-beam-2.22.0-Python"),
],