mobuchowski commented on code in PR #59094:
URL: https://github.com/apache/airflow/pull/59094#discussion_r2592641648


##########
providers/amazon/src/airflow/providers/amazon/aws/utils/openlineage.py:
##########
@@ -136,3 +141,148 @@ def get_identity_column_lineage_facet(
         }
     )
     return column_lineage_facet
+
+
+def _parse_glue_customer_env_vars(env_vars_string: str | None) -> dict[str, 
str]:
+    """
+    Parse the --customer-driver-env-vars format into a dict.
+
+    Format: "KEY1=VAL1,KEY2=\"val2,val2 val2\""
+    - Simple values: KEY=VALUE
+    - Values with commas/spaces: KEY="value with, spaces"
+
+    Args:
+        env_vars_string: The environment variables string from Glue script 
args.
+
+    Returns:
+        Dict of key-value pairs.
+    """
+    if not env_vars_string:
+        return {}
+
+    result: dict[str, str] = {}
+    current = ""
+    in_quotes = False
+
+    for char in env_vars_string:
+        if char == '"' and (not current or current[-1] != "\\"):
+            in_quotes = not in_quotes
+            current += char
+        elif char == "," and not in_quotes:
+            if "=" in current:
+                key, value = current.split("=", 1)
+                # Strip surrounding quotes if present
+                value = value.strip()
+                if value.startswith('"') and value.endswith('"'):
+                    value = value[1:-1]
+                result[key.strip()] = value
+            current = ""
+        else:
+            current += char
+
+    # Handle last element
+    if current and "=" in current:
+        key, value = current.split("=", 1)
+        value = value.strip()
+        if value.startswith('"') and value.endswith('"'):
+            value = value[1:-1]
+        result[key.strip()] = value
+
+    return result
+
+
+def _format_glue_customer_env_vars(env_vars: dict[str, str]) -> str:
+    """
+    Format a dict back into the --customer-driver-env-vars string format.
+
+    - Values containing commas, spaces, or quotes need quoting
+    - Quotes within values need escaping
+
+    Args:
+        env_vars: Dict of environment variables.
+
+    Returns:
+        String in format "KEY1=VAL1,KEY2=\"val2\""
+    """
+    parts = []
+    for key, value in env_vars.items():
+        # Quote if contains special chars
+        if "," in value or " " in value or '"' in value:
+            escaped_value = value.replace('"', '\\"')
+            parts.append(f'{key}="{escaped_value}"')
+        else:
+            parts.append(f"{key}={value}")
+    return ",".join(parts)
+
+
+def _is_parent_job_info_present_in_glue_env_vars(script_args: dict[str, Any]) 
-> bool:
+    """
+    Check if any OpenLineage parent job env vars are already set.
+
+    Args:
+        script_args: The Glue job's script_args dict.
+
+    Returns:
+        True if any OL parent job env vars are present.
+    """
+    # Check --customer-driver-env-vars
+    driver_env_vars_str = script_args.get("--customer-driver-env-vars", "")
+    driver_env_vars = _parse_glue_customer_env_vars(driver_env_vars_str)
+
+    # Also check --customer-executor-env-vars
+    executor_env_vars_str = script_args.get("--customer-executor-env-vars", "")
+    executor_env_vars = _parse_glue_customer_env_vars(executor_env_vars_str)
+
+    all_env_vars = {**driver_env_vars, **executor_env_vars}
+
+    # Check if ANY OpenLineage parent env var is present
+    return any(
+        key.startswith("OPENLINEAGE_PARENT") or 
key.startswith("OPENLINEAGE_ROOT_PARENT")
+        for key in all_env_vars
+    )
+
+
+def inject_parent_job_information_into_glue_script_args(
+    script_args: dict[str, Any], context: Context
+) -> dict[str, Any]:
+    """
+    Inject OpenLineage parent job info into Glue script_args.
+
+    The parent job information is injected via the --customer-driver-env-vars 
argument,
+    which sets environment variables in the Spark driver process.
+
+    - If OpenLineage provider is not available, skip injection

Review Comment:
   
`airflow.providers.common.compat.openlineage.utils.spark.get_parent_job_information`
 will return None in that case, which causes this function to return the 
original dict.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to