timsaucer commented on code in PR #1170:
URL: 
https://github.com/apache/datafusion-python/pull/1170#discussion_r2166481205


##########
python/datafusion/dataframe_formatter.py:
##########
@@ -0,0 +1,739 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""HTML formatting utilities for DataFusion DataFrames."""
+
+from __future__ import annotations
+
+from typing import (
+    Any,
+    Callable,
+    Optional,
+    Protocol,
+    runtime_checkable,
+)
+
+from datafusion._internal import DataFrame as DataFrameInternal
+
+
+def _validate_positive_int(value: Any, param_name: str) -> None:
+    """Validate that a parameter is a positive integer.
+
+    Args:
+        value: The value to validate
+        param_name: Name of the parameter (used in error message)
+
+    Raises:
+        ValueError: If the value is not a positive integer
+    """
+    if not isinstance(value, int) or value <= 0:
+        msg = f"{param_name} must be a positive integer"
+        raise ValueError(msg)
+
+
+def _validate_bool(value: Any, param_name: str) -> None:
+    """Validate that a parameter is a boolean.
+
+    Args:
+        value: The value to validate
+        param_name: Name of the parameter (used in error message)
+
+    Raises:
+        TypeError: If the value is not a boolean
+    """
+    if not isinstance(value, bool):
+        msg = f"{param_name} must be a boolean"
+        raise TypeError(msg)
+
+
+@runtime_checkable
+class CellFormatter(Protocol):
+    """Protocol for cell value formatters."""
+
+    def __call__(self, value: Any) -> str:
+        """Format a cell value to string representation."""
+        ...
+
+
+@runtime_checkable
+class StyleProvider(Protocol):
+    """Protocol for HTML style providers."""
+
+    def get_cell_style(self) -> str:
+        """Get the CSS style for table cells."""
+        ...
+
+    def get_header_style(self) -> str:
+        """Get the CSS style for header cells."""
+        ...
+
+
+class DefaultStyleProvider:
+    """Default implementation of StyleProvider."""
+
+    def get_cell_style(self) -> str:
+        """Get the CSS style for table cells.
+
+        Returns:
+            CSS style string
+        """
+        return (
+            "border: 1px solid black; padding: 8px; text-align: left; "
+            "white-space: nowrap;"
+        )
+
+    def get_header_style(self) -> str:
+        """Get the CSS style for header cells.
+
+        Returns:
+            CSS style string
+        """
+        return (
+            "border: 1px solid black; padding: 8px; text-align: left; "
+            "background-color: #f2f2f2; white-space: nowrap; min-width: 
fit-content; "
+            "max-width: fit-content;"
+        )
+
+
+class DataFrameHtmlFormatter:
+    """Configurable HTML formatter for DataFusion DataFrames.
+
+    This class handles the HTML rendering of DataFrames for display in
+    Jupyter notebooks and other rich display contexts.
+
+    This class supports extension through composition. Key extension points:
+    - Provide a custom StyleProvider for styling cells and headers
+    - Register custom formatters for specific types
+    - Provide custom cell builders for specialized cell rendering
+
+    Args:
+        max_cell_length: Maximum characters to display in a cell before 
truncation
+        max_width: Maximum width of the HTML table in pixels
+        max_height: Maximum height of the HTML table in pixels
+        max_memory_bytes: Maximum memory in bytes for rendered data (default: 
2MB)
+        min_rows_display: Minimum number of rows to display
+        repr_rows: Default number of rows to display in repr output
+        enable_cell_expansion: Whether to add expand/collapse buttons for long 
cell
+          values
+        custom_css: Additional CSS to include in the HTML output
+        show_truncation_message: Whether to display a message when data is 
truncated
+        style_provider: Custom provider for cell and header styles
+        use_shared_styles: Whether to load styles and scripts only once per 
notebook
+          session
+    """
+
+    # Class variable to track if styles have been loaded in the notebook
+    _styles_loaded = False
+
+    def __init__(
+        self,
+        max_cell_length: int = 25,
+        max_width: int = 1000,
+        max_height: int = 300,
+        max_memory_bytes: int = 2 * 1024 * 1024,  # 2 MB
+        min_rows_display: int = 20,
+        repr_rows: int = 10,
+        enable_cell_expansion: bool = True,
+        custom_css: Optional[str] = None,
+        show_truncation_message: bool = True,
+        style_provider: Optional[StyleProvider] = None,
+        use_shared_styles: bool = True,
+    ) -> None:
+        """Initialize the HTML formatter.
+
+        Parameters
+        ----------
+        max_cell_length : int, default 25
+            Maximum length of cell content before truncation.
+        max_width : int, default 1000
+            Maximum width of the displayed table in pixels.
+        max_height : int, default 300
+            Maximum height of the displayed table in pixels.
+        max_memory_bytes : int, default 2097152 (2MB)
+            Maximum memory in bytes for rendered data.
+        min_rows_display : int, default 20
+            Minimum number of rows to display.
+        repr_rows : int, default 10
+            Default number of rows to display in repr output.
+        enable_cell_expansion : bool, default True
+            Whether to allow cells to expand when clicked.
+        custom_css : str, optional
+            Custom CSS to apply to the HTML table.
+        show_truncation_message : bool, default True
+            Whether to show a message indicating that content has been 
truncated.
+        style_provider : StyleProvider, optional
+            Provider of CSS styles for the HTML table. If None, 
DefaultStyleProvider
+            is used.
+        use_shared_styles : bool, default True
+            Whether to use shared styles across multiple tables.
+
+        Raises:
+        ------
+        ValueError
+            If max_cell_length, max_width, max_height, max_memory_bytes,
+            min_rows_display, or repr_rows is not a positive integer.
+        TypeError
+            If enable_cell_expansion, show_truncation_message, or 
use_shared_styles is
+            not a boolean,
+            or if custom_css is provided but is not a string,
+            or if style_provider is provided but does not implement the 
StyleProvider
+            protocol.
+        """
+        # Validate numeric parameters
+        _validate_positive_int(max_cell_length, "max_cell_length")
+        _validate_positive_int(max_width, "max_width")
+        _validate_positive_int(max_height, "max_height")
+        _validate_positive_int(max_memory_bytes, "max_memory_bytes")
+        _validate_positive_int(min_rows_display, "min_rows_display")
+        _validate_positive_int(repr_rows, "repr_rows")
+
+        # Validate boolean parameters
+        _validate_bool(enable_cell_expansion, "enable_cell_expansion")
+        _validate_bool(show_truncation_message, "show_truncation_message")
+        _validate_bool(use_shared_styles, "use_shared_styles")
+
+        # Validate custom_css
+        if custom_css is not None and not isinstance(custom_css, str):
+            msg = "custom_css must be None or a string"
+            raise TypeError(msg)
+
+        # Validate style_provider
+        if style_provider is not None and not isinstance(style_provider, 
StyleProvider):
+            msg = "style_provider must implement the StyleProvider protocol"
+            raise TypeError(msg)
+
+        self.max_cell_length = max_cell_length
+        self.max_width = max_width
+        self.max_height = max_height
+        self.max_memory_bytes = max_memory_bytes
+        self.min_rows_display = min_rows_display
+        self.repr_rows = repr_rows
+        self.enable_cell_expansion = enable_cell_expansion
+        self.custom_css = custom_css
+        self.show_truncation_message = show_truncation_message
+        self.style_provider = style_provider or DefaultStyleProvider()
+        self.use_shared_styles = use_shared_styles
+        # Registry for custom type formatters
+        self._type_formatters: dict[type, CellFormatter] = {}
+        # Custom cell builders
+        self._custom_cell_builder: Optional[Callable[[Any, int, int, str], 
str]] = None
+        self._custom_header_builder: Optional[Callable[[Any], str]] = None
+
+    def register_formatter(self, type_class: type, formatter: CellFormatter) 
-> None:
+        """Register a custom formatter for a specific data type.
+
+        Args:
+            type_class: The type to register a formatter for
+            formatter: Function that takes a value of the given type and 
returns
+                a formatted string
+        """
+        self._type_formatters[type_class] = formatter
+
+    def set_custom_cell_builder(
+        self, builder: Callable[[Any, int, int, str], str]
+    ) -> None:
+        """Set a custom cell builder function.
+
+        Args:
+            builder: Function that takes (value, row, col, table_id) and 
returns HTML
+        """
+        self._custom_cell_builder = builder
+
+    def set_custom_header_builder(self, builder: Callable[[Any], str]) -> None:
+        """Set a custom header builder function.
+
+        Args:
+            builder: Function that takes a field and returns HTML
+        """
+        self._custom_header_builder = builder
+
+    @classmethod
+    def is_styles_loaded(cls) -> bool:
+        """Check if HTML styles have been loaded in the current session.

Review Comment:
   Excellent! Maybe we open a separate PR for this? 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to