This is an automated email from the ASF dual-hosted git repository. skrawcz pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/hamilton.git
commit 553f0e9f988fff0041c3ffcdb10cffdccbc05d32 Author: Pablo Eduardo Diaz <[email protected]> AuthorDate: Fri Jan 16 23:37:56 2026 -0400 Enhance visualization with display_name tag support This update introduces the tag for nodes in graph visualizations, allowing for human-readable labels while maintaining valid Python identifiers as function names. The changes include: - Documentation updates to explain the usage of in visualizations. - Modifications to the graph creation logic to utilize when available. - New tests to ensure is correctly applied in visualizations and that HTML characters are properly escaped. - Example functions demonstrating the use of in a new resource file. This feature improves the readability of visualizations for stakeholders while keeping the codebase Pythonic. --- docs/concepts/visualization.rst | 21 ++++++++ docs/reference/decorators/tag.rst | 25 +++++++++ examples/hello_world/my_dag.png | Bin 63764 -> 69967 bytes examples/hello_world/my_functions.py | 4 ++ hamilton/graph.py | 33 +++++++++--- tests/resources/display_name_functions.py | 52 ++++++++++++++++++ tests/test_graph.py | 86 ++++++++++++++++++++++++++++++ 7 files changed, 213 insertions(+), 8 deletions(-) diff --git a/docs/concepts/visualization.rst b/docs/concepts/visualization.rst index 407e7c6f..e7bb6968 100644 --- a/docs/concepts/visualization.rst +++ b/docs/concepts/visualization.rst @@ -166,6 +166,27 @@ Configure your visualization All of the above visualization functions share parameters to customize the visualization (e.g., hide legend, hide inputs). Learn more by reviewing the API reference for `Driver.display_all_functions() <https://hamilton.apache.org/reference/drivers/Driver/#hamilton.driver.Driver.display_all_functions>`_; parameters should apply to all other visualizations. +Custom node labels with display_name +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use the ``@tag`` decorator with ``display_name`` to show human-readable labels in visualizations while keeping valid Python identifiers as function names. This is useful for creating presentation-ready diagrams or adding business-friendly names: + +.. code-block:: python + + from hamilton.function_modifiers import tag + + @tag(display_name="Parse Raw JSON") + def parse_raw_json(raw_data: str) -> dict: + return json.loads(raw_data) + + @tag(display_name="Transform to DataFrame") + def transform_to_df(parse_raw_json: dict) -> pd.DataFrame: + return pd.DataFrame(parse_raw_json) + +When visualized, nodes will display "Parse Raw JSON" and "Transform to DataFrame" instead of their function names. This keeps your code Pythonic while making visualizations more readable for stakeholders. + +Note that ``display_name`` only affects visualization labels - the actual node names used in code and execution remain the function names. + .. _custom-visualization-style: Apply custom style diff --git a/docs/reference/decorators/tag.rst b/docs/reference/decorators/tag.rst index 3d3805ea..6bd55b0f 100644 --- a/docs/reference/decorators/tag.rst +++ b/docs/reference/decorators/tag.rst @@ -35,6 +35,31 @@ available outputs for specific tag matches. E.g. output = dr.execute(desired_outputs) +**Using display_name for visualization** + +You can use the special ``display_name`` tag to provide a human-readable name for nodes in graphviz visualizations. +This allows you to show user-friendly names in DAG diagrams while keeping valid Python identifiers as function names. + +.. code-block:: python + + import pandas as pd + from hamilton.function_modifiers import tag + + @tag(display_name="Customer Lifetime Value") + def customer_ltv(purchases: pd.DataFrame, tenure: pd.Series) -> pd.Series: + """Calculate customer lifetime value.""" + return purchases.sum() * tenure + +When you visualize the DAG using ``dr.display_all_functions()``, the node will display "Customer Lifetime Value" +instead of "customer_ltv". This is useful for: + +- Creating presentation-ready diagrams for stakeholders +- Adding business-friendly names for technical functions +- Making visualizations more readable for non-technical audiences + +Note that ``display_name`` only affects visualization - the actual node name used in code remains the function name. + + ---- **Reference Documentation** diff --git a/examples/hello_world/my_dag.png b/examples/hello_world/my_dag.png index 4f9bb292..a3a4a809 100644 Binary files a/examples/hello_world/my_dag.png and b/examples/hello_world/my_dag.png differ diff --git a/examples/hello_world/my_functions.py b/examples/hello_world/my_functions.py index c5761884..a0b88135 100644 --- a/examples/hello_world/my_functions.py +++ b/examples/hello_world/my_functions.py @@ -17,6 +17,8 @@ import pandas as pd +from hamilton.function_modifiers import tag + """ Notes: 1. This file is used for all the [ray|dask|spark]/hello_world examples. @@ -25,11 +27,13 @@ Notes: """ +@tag(display_name="Rolling 3-Week Average Spend") def avg_3wk_spend(spend: pd.Series) -> pd.Series: """Rolling 3 week average spend.""" return spend.rolling(3).mean() +@tag(display_name="Cost Per Signup") def spend_per_signup(spend: pd.Series, signups: pd.Series) -> pd.Series: """The cost per signup in relation to spend.""" return spend / signups diff --git a/hamilton/graph.py b/hamilton/graph.py index f8cc32e4..8a253561 100644 --- a/hamilton/graph.py +++ b/hamilton/graph.py @@ -283,34 +283,51 @@ def create_graphviz_graph( name and type but values can be overridden. Overriding is currently used for materializers since `type_` is stored in n.tags. + If a node has a 'display_name' tag, it will be used as the label + instead of the node name. This allows human-readable names in + visualizations while keeping Python-valid identifiers as node names. + See: https://github.com/apache/hamilton/issues/1413 + ref: https://graphviz.org/doc/info/shapes.html#html """ - name = n.name if name is None else name + # Determine display name: explicit name param > display_name tag > node.name + if name is not None: + display_name = name + elif n.tags.get("display_name"): + display_name = n.tags["display_name"] + else: + display_name = n.name + if type_string is None: type_string = get_type_as_string(n.type) if get_type_as_string(n.type) else "" # We need to ensure that name and type string are HTML-escaped - # strings to avoid syntax errors. This is particular important - # because config *values* are passed through this function + # strings to avoid syntax errors. This is particularly important + # because config *values* and display_name tags are passed through this function # see issue: https://github.com/apache/hamilton/issues/1200 # see graphviz ref: https://graphviz.org/doc/info/shapes.html#html if len(type_string) > MAX_STRING_LENGTH: type_string = type_string[:MAX_STRING_LENGTH] + "[...]" + escaped_display_name = html.escape(display_name, quote=True) escaped_type_string = html.escape(type_string, quote=True) - return f"<<b>{name}</b><br /><br /><i>{escaped_type_string}</i>>" + return f"<<b>{escaped_display_name}</b><br /><br /><i>{escaped_type_string}</i>>" def _get_input_label(input_nodes: FrozenSet[node.Node]) -> str: - """Get a graphviz HTML-like node label formatted aspyer a table. + """Get a graphviz HTML-like node label formatted as a table. Each row is a different input node with one column containing - the name and the other the type. + the name (or display_name if present) and the other the type. ref: https://graphviz.org/doc/info/shapes.html#html """ rows = [] for dep in input_nodes: - name = dep.name + # Use display_name tag if present, otherwise use node name + display_name = dep.tags.get("display_name", dep.name) type_string = get_type_as_string(dep.type) if get_type_as_string(dep.type) else "" - rows.append(f"<tr><td>{name}</td><td>{type_string}</td></tr>") + # HTML escape for security + escaped_display_name = html.escape(display_name, quote=True) + escaped_type_string = html.escape(type_string, quote=True) + rows.append(f"<tr><td>{escaped_display_name}</td><td>{escaped_type_string}</td></tr>") return f"<<table border=\"0\">{''.join(rows)}</table>>" def _get_node_type(n: node.Node) -> str: diff --git a/tests/resources/display_name_functions.py b/tests/resources/display_name_functions.py new file mode 100644 index 00000000..4c69bb14 --- /dev/null +++ b/tests/resources/display_name_functions.py @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Test module for display_name tag support in graphviz visualization. + +See: https://github.com/apache/hamilton/issues/1413 +""" + +from hamilton.function_modifiers import tag + + +def input_a() -> int: + """A simple input node without display_name.""" + return 1 + + +@tag(display_name="My Custom Display Name") +def node_with_display_name(input_a: int) -> int: + """A node with a custom display name for visualization.""" + return input_a + 1 + + +@tag(display_name="Special <Characters> & \"Quotes\"") +def node_with_special_chars(input_a: int) -> int: + """A node with special HTML characters that need escaping.""" + return input_a * 2 + + +@tag(owner="data-science") +def node_without_display_name(input_a: int) -> int: + """A node with other tags but no display_name.""" + return input_a + 10 + + +@tag(display_name="Final Output Node", owner="analytics") +def output_node(node_with_display_name: int, node_without_display_name: int) -> int: + """A node with display_name and other tags.""" + return node_with_display_name + node_without_display_name diff --git a/tests/test_graph.py b/tests/test_graph.py index e412442a..6d1b4483 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -1309,3 +1309,89 @@ def test_update_dependencies(): for node_name, node_ in new_nodes.items(): assert node_.dependencies == nodes[node_name].dependencies assert node_.depended_on_by == nodes[node_name].depended_on_by + + +# Tests for display_name tag support in graphviz visualization +# See: https://github.com/apache/hamilton/issues/1413 + + +def test_create_graphviz_graph_with_display_name(): + """Tests that display_name tag is used for node labels in visualization.""" + import tests.resources.display_name_functions + + config = {} + fg = graph.FunctionGraph.from_modules(tests.resources.display_name_functions, config=config) + nodes, user_nodes = fg.get_upstream_nodes(["output_node"]) + all_nodes = nodes.union(user_nodes) + + digraph = graph.create_graphviz_graph( + all_nodes, + "Display Name Test\n", + graphviz_kwargs={}, + node_modifiers={}, + strictly_display_only_nodes_passed_in=False, + config=config, + ) + dot_string = str(digraph) + + # Node with display_name should show the display name, not the function name + assert "My Custom Display Name" in dot_string + assert "Final Output Node" in dot_string + + # Node without display_name should show the function name + assert "node_without_display_name" in dot_string + + +def test_create_graphviz_graph_display_name_html_escaping(): + """Tests that display_name values with special characters are properly HTML escaped.""" + import tests.resources.display_name_functions + + config = {} + fg = graph.FunctionGraph.from_modules(tests.resources.display_name_functions, config=config) + nodes, user_nodes = fg.get_upstream_nodes(["node_with_special_chars"]) + all_nodes = nodes.union(user_nodes) + + digraph = graph.create_graphviz_graph( + all_nodes, + "HTML Escape Test\n", + graphviz_kwargs={}, + node_modifiers={}, + strictly_display_only_nodes_passed_in=False, + config=config, + ) + dot_string = str(digraph) + + # Special characters should be HTML escaped + # < becomes <, > becomes >, & becomes &, " becomes " + assert "<" in dot_string # < + assert ">" in dot_string # > + assert "&" in dot_string # & + assert """ in dot_string # " + + # The raw special characters should NOT appear unescaped in the label + # (they would break graphviz HTML parsing) + assert 'label=<<b>Special <Characters>' not in dot_string + + +def test_create_graphviz_graph_without_display_name_backward_compatible(): + """Tests that nodes without display_name tag still work as before.""" + import tests.resources.dummy_functions + + config = {} + fg = graph.FunctionGraph.from_modules(tests.resources.dummy_functions, config=config) + nodes, user_nodes = fg.get_upstream_nodes(["A", "B"]) + all_nodes = nodes.union(user_nodes) + + digraph = graph.create_graphviz_graph( + all_nodes, + "Backward Compatibility Test\n", + graphviz_kwargs={}, + node_modifiers={}, + strictly_display_only_nodes_passed_in=False, + config=config, + ) + dot_string = str(digraph) + + # Without display_name tag, node names should be used (existing behavior) + assert "<b>A</b>" in dot_string + assert "<b>B</b>" in dot_string
