ferruzzi commented on code in PR #37948: URL: https://github.com/apache/airflow/pull/37948#discussion_r1526570614
########## airflow/traces/utils.py: ########## @@ -0,0 +1,98 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING + +from airflow.utils.hashlib_wrapper import md5 + +if TYPE_CHECKING: + from airflow.models import DagRun, TaskInstance + from airflow.models.taskinstancekey import TaskInstanceKey + +TRACE_ID = 0 +SPAN_ID = 16 + +log = logging.getLogger(__name__) + + +def _gen_id(seeds: list[str], as_int: bool = False, type: int = TRACE_ID) -> str | int: + seed_str = "_".join(seeds).encode("utf-8") + hash_hex = md5(seed_str).hexdigest()[type:] + return int(hash_hex, 16) if as_int else hash_hex + + +def gen_trace_id(dag_run: DagRun, as_int: bool = False) -> str | int: + """Generate trace id from DagRun.""" + return _gen_id([dag_run.dag_id, dag_run.run_id, str(dag_run.start_date.timestamp())], as_int) + + +def gen_span_id_from_ti_key(ti_key: TaskInstanceKey, as_int: bool = False) -> str | int: + """Generate span id from TI key.""" + return _gen_id([ti_key.dag_id, ti_key.run_id, ti_key.task_id, str(ti_key.try_number)], as_int, SPAN_ID) + + +def gen_dag_span_id(dag_run: DagRun, as_int: bool = False) -> str | int: + """Generate dag's root span id using dag_run.""" + return _gen_id([dag_run.dag_id, dag_run.run_id, str(dag_run.start_date.timestamp())], as_int, SPAN_ID) + + +def gen_span_id(ti: TaskInstance, as_int: bool = False) -> str | int: + """Generate span id from the task instance.""" + dag_run = ti.dag_run + """When this is called, the try_number of ti is already set to next(+1), hence the subtraction""" + return _gen_id([dag_run.dag_id, dag_run.run_id, ti.task_id, str(ti.try_number - 1)], as_int, SPAN_ID) + + +def parse_traceparent(traceparent_str: str | None = None) -> dict: + """Parse traceparent string: 00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01.""" + if traceparent_str is None: + return {} + tokens = traceparent_str.split("-") + if len(tokens) != 4: + raise ValueError("The traceparent string does not have the correct format.") + return {"version": tokens[0], "trace_id": tokens[1], "parent_id": tokens[2], "flags": tokens[3]} + + +def parse_tracestate(tracestate_str: str | None = None) -> dict: + """Parse tracestate string: rojo=00f067aa0ba902b7,congo=t61rcWkgMzE.""" + if tracestate_str is None: + return {} + tokens = tracestate_str.split(",") + result = {} + for pair in tokens: + key, value = pair.split("=") + result[key.strip()] = value.strip() + return result + + +def is_valid_trace_id(trace_id: str) -> bool: + """Check whether trace id is valid.""" + if trace_id is not None and len(trace_id) == 34 and int(trace_id, 16) != 0: + return True + else: + return False Review Comment: Huh, interesting. It seems to work fine ``` In [54]: x = '0x04200' In [55]: result = x and len(x) < 10 and int(x, 16) != 0 In [56]: result Out[56]: True In [57]: type(result) Out[57]: bool ``` Maybe try wrapping the return statement in (), otherwise yeah, yours will be fine. ``` return (trace_id and len(trace_id) == 34 and int(trace_id, 16) != 0) ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
