This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new ce5cd52635 Change regular expression to exclude double quote and 
newline (#39991)
ce5cd52635 is described below

commit ce5cd52635280e412eacef3a9705e7d12954cdfd
Author: Yuan Chuan Kee <[email protected]>
AuthorDate: Sat Jun 1 18:57:32 2024 +0800

    Change regular expression to exclude double quote and newline (#39991)
    
    This change tackles a change in Cloud Composer's default storage mechanism 
for logs as documented in the release notes on [March 26, 
2024](https://cloud.google.com/composer/docs/release-notes#March_26_2024) and 
[May 11, 
2024](https://cloud.google.com/composer/docs/release-notes#May_11_2024). This 
causes issue for Beam operator which uses regular expression to identify the 
Dataflow Job ID from the logs.
    
    Co-authored-by: Yuan Chuan Kee <[email protected]>
---
 airflow/providers/google/cloud/hooks/dataflow.py    |  2 +-
 tests/providers/google/cloud/hooks/test_dataflow.py | 17 +++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/airflow/providers/google/cloud/hooks/dataflow.py 
b/airflow/providers/google/cloud/hooks/dataflow.py
index 59eee63501..ddfb019773 100644
--- a/airflow/providers/google/cloud/hooks/dataflow.py
+++ b/airflow/providers/google/cloud/hooks/dataflow.py
@@ -71,7 +71,7 @@ DEFAULT_DATAFLOW_LOCATION = "us-central1"
 
 
 JOB_ID_PATTERN = re.compile(
-    r"Submitted job: (?P<job_id_java>.*)|Created job with id: 
\[(?P<job_id_python>.*)\]"
+    r"Submitted job: (?P<job_id_java>[^\"\n]*)|Created job with id: 
\[(?P<job_id_python>[^\"\n]*)\]"
 )
 
 T = TypeVar("T", bound=Callable)
diff --git a/tests/providers/google/cloud/hooks/test_dataflow.py 
b/tests/providers/google/cloud/hooks/test_dataflow.py
index 1c8f768ea3..e925dea1c8 100644
--- a/tests/providers/google/cloud/hooks/test_dataflow.py
+++ b/tests/providers/google/cloud/hooks/test_dataflow.py
@@ -1877,6 +1877,19 @@ INFO: To cancel the job using the 'gcloud' tool, run:
 > gcloud dataflow jobs --project=XXX cancel --region=europe-west3 {TEST_JOB_ID}
 """
 
+CLOUD_COMPOSER_CLOUD_LOGGING_APACHE_BEAM_V_2_56_0_JAVA_SDK_LOG = f"""\
+WARNING - {{"message":"org.apache.beam.runners.dataflow.DataflowRunner - 
Dataflow SDK version: 2.56.0",\
+"severity":"INFO"}}
+WARNING - {{"message":"org.apache.beam.runners.dataflow.DataflowRunner - To 
access the Dataflow monitoring\\
+console, please navigate to 
https://console.cloud.google.com/dataflow/jobs/europe-west3/{TEST_JOB_ID}?project
+\u003dXXXX","severity":"INFO"}}
+WARNING - {{"message":"org.apache.beam.runners.dataflow.DataflowRunner - 
Submitted job: {TEST_JOB_ID}",\
+"severity":"INFO"}}
+WARNING - {{"message":"org.apache.beam.runners.dataflow.DataflowRunner - To 
cancel the job using the \
+\u0027gcloud\u0027 tool, run:\n\u003e gcloud dataflow jobs --project\u003dXXX 
cancel --region\u003deurope-\
+west3 {TEST_JOB_ID}","severity":"INFO"}}
+"""
+
 APACHE_BEAM_V_2_14_0_PYTHON_SDK_LOG = f""""\
 INFO:root:Completed GCS upload to 
gs://test-dataflow-example/staging/start-python-job-local-5bcf3d71.\
 1592286375.000962/apache_beam-2.14.0-cp37-cp37m-manylinux1_x86_64.whl in 0 
seconds.
@@ -1926,6 +1939,10 @@ class TestDataflow:
         [
             pytest.param(APACHE_BEAM_V_2_14_0_JAVA_SDK_LOG, 
id="apache-beam-2.14.0-JDK"),
             pytest.param(APACHE_BEAM_V_2_22_0_JAVA_SDK_LOG, 
id="apache-beam-2.22.0-JDK"),
+            pytest.param(
+                CLOUD_COMPOSER_CLOUD_LOGGING_APACHE_BEAM_V_2_56_0_JAVA_SDK_LOG,
+                id="cloud-composer-cloud-logging-apache-beam-2.56.0-JDK",
+            ),
             pytest.param(APACHE_BEAM_V_2_14_0_PYTHON_SDK_LOG, 
id="apache-beam-2.14.0-Python"),
             pytest.param(APACHE_BEAM_V_2_22_0_PYTHON_SDK_LOG, 
id="apache-beam-2.22.0-Python"),
         ],

Reply via email to