Copilot commented on code in PR #2633:
URL: https://github.com/apache/sedona/pull/2633#discussion_r2781787884
##########
python/tests/raster_viz_utils/test_sedonautils.py:
##########
@@ -35,3 +35,28 @@ def test_display_image(self):
assert (
html_call is None
) # just test that this function was called and returned no output
+
+ def test_display_image_raw_raster(self):
+ """Test that display_image auto-detects raster columns and applies
RS_AsImage."""
+ raster_bin_df = self.spark.read.format("binaryFile").load(
+ world_map_raster_input_location
+ )
+ raster_bin_df.createOrReplaceTempView("raster_binary_table")
Review Comment:
Both tests register the same temp view name (`raster_binary_table`). This
can cause cross-test interference when running tests concurrently (or if other
tests in the suite use the same view name). Prefer avoiding temp views here
(use `selectExpr` directly on the DataFrame) or use a unique view name per test.
##########
python/sedona/spark/raster_utils/SedonaUtils.py:
##########
@@ -21,7 +21,40 @@
class SedonaUtils:
@classmethod
def display_image(cls, df):
+ """Display raster images in a Jupyter notebook.
+
+ Accepts DataFrames with either:
+ - A raster column (GridCoverage2D) — auto-applies RS_AsImage
+ - An HTML image column from RS_AsImage() — renders directly
+
+ Falls back to the SedonaMapUtils HTML table path for other DataFrames.
+ """
from IPython.display import HTML, display
+ schema = df.schema
+
+ # Detect raster UDT columns and auto-apply RS_AsImage.
+ # Without this, passing a raw raster DataFrame to the fallback path
+ # causes __convert_to_gdf_or_pdf__ to Arrow-serialize the full raster
+ # grid, which hangs for large rasters (e.g., 1400x800).
+ raster_cols = [
+ f.name
+ for f in schema.fields
+ if hasattr(f.dataType, "typeName") and f.dataType.typeName() ==
"rastertype"
+ ]
+ if raster_cols:
+ # Replace each raster column with its RS_AsImage() HTML
representation,
+ # preserving all other columns in the DataFrame.
+ select_exprs = []
+ for f in schema.fields:
+ col_name_escaped = f.name.replace("`", "``")
+ if f.name in raster_cols:
+ select_exprs.append(
+ f"RS_AsImage(`{col_name_escaped}`) as
`{col_name_escaped}`"
+ )
+ else:
+ select_exprs.append(f"`{col_name_escaped}`")
+ df = df.selectExpr(*select_exprs)
Review Comment:
`display_image` now unconditionally assumes a Spark DataFrame by accessing
`df.schema` / `df.selectExpr`. If `display_image` is (or was) intended to
accept non-Spark inputs that `SedonaMapUtils.__convert_to_gdf_or_pdf__` can
handle, this introduces a breaking change (AttributeError before reaching the
fallback). Consider guarding the raster auto-detection behind a Spark-DataFrame
check (e.g., `hasattr(df, "schema") and hasattr(df, "selectExpr")`) and
otherwise directly fall through to `__convert_to_gdf_or_pdf__`.
```suggestion
# Only attempt Spark-specific raster auto-detection if `df` looks
like
# a Spark DataFrame. For other input types, fall through directly to
# SedonaMapUtils.__convert_to_gdf_or_pdf__.
if hasattr(df, "schema") and hasattr(df, "selectExpr"):
schema = df.schema
# Detect raster UDT columns and auto-apply RS_AsImage.
# Without this, passing a raw raster DataFrame to the fallback
path
# causes __convert_to_gdf_or_pdf__ to Arrow-serialize the full
raster
# grid, which hangs for large rasters (e.g., 1400x800).
raster_cols = [
f.name
for f in schema.fields
if hasattr(f.dataType, "typeName") and f.dataType.typeName()
== "rastertype"
]
if raster_cols:
# Replace each raster column with its RS_AsImage() HTML
representation,
# preserving all other columns in the DataFrame.
select_exprs = []
for f in schema.fields:
col_name_escaped = f.name.replace("`", "``")
if f.name in raster_cols:
select_exprs.append(
f"RS_AsImage(`{col_name_escaped}`) as
`{col_name_escaped}`"
)
else:
select_exprs.append(f"`{col_name_escaped}`")
df = df.selectExpr(*select_exprs)
```
##########
python/tests/raster_viz_utils/test_sedonautils.py:
##########
@@ -35,3 +35,28 @@ def test_display_image(self):
assert (
html_call is None
) # just test that this function was called and returned no output
+
+ def test_display_image_raw_raster(self):
+ """Test that display_image auto-detects raster columns and applies
RS_AsImage."""
+ raster_bin_df = self.spark.read.format("binaryFile").load(
+ world_map_raster_input_location
+ )
+ raster_bin_df.createOrReplaceTempView("raster_binary_table")
+ # Pass a raw raster DataFrame — no RS_AsImage applied
+ raster_df = self.spark.sql(
+ "SELECT RS_FromGeotiff(content) as raster from raster_binary_table"
+ )
+ html_call = SedonaUtils.display_image(raster_df)
+ assert html_call is None
+
+ def test_display_image_preserves_non_raster_columns(self):
+ """Test that non-raster columns are preserved alongside raster
columns."""
+ raster_bin_df = self.spark.read.format("binaryFile").load(
+ world_map_raster_input_location
+ )
+ raster_bin_df.createOrReplaceTempView("raster_binary_table")
+ raster_df = self.spark.sql(
+ "SELECT path, RS_FromGeotiff(content) as raster from
raster_binary_table"
+ )
+ html_call = SedonaUtils.display_image(raster_df)
+ assert html_call is None
Review Comment:
The new tests exercise the call path but don’t assert the new behavior: (1)
that raster columns are actually converted via `RS_AsImage`, and (2) that
non-raster columns are preserved. Since `display_image` returns `None`,
consider asserting against an observable side effect (e.g., mock/spy
`SedonaMapUtils.__convert_to_gdf_or_pdf__` to capture the DataFrame passed in
and validate its schema/columns/types, or mock `IPython.display.display` and
assert the rendered HTML contains an `<img` tag and includes the non-raster
column value).
##########
python/tests/raster_viz_utils/test_sedonautils.py:
##########
@@ -35,3 +35,28 @@ def test_display_image(self):
assert (
html_call is None
) # just test that this function was called and returned no output
+
+ def test_display_image_raw_raster(self):
+ """Test that display_image auto-detects raster columns and applies
RS_AsImage."""
+ raster_bin_df = self.spark.read.format("binaryFile").load(
+ world_map_raster_input_location
+ )
+ raster_bin_df.createOrReplaceTempView("raster_binary_table")
+ # Pass a raw raster DataFrame — no RS_AsImage applied
+ raster_df = self.spark.sql(
+ "SELECT RS_FromGeotiff(content) as raster from raster_binary_table"
+ )
+ html_call = SedonaUtils.display_image(raster_df)
+ assert html_call is None
+
+ def test_display_image_preserves_non_raster_columns(self):
+ """Test that non-raster columns are preserved alongside raster
columns."""
+ raster_bin_df = self.spark.read.format("binaryFile").load(
+ world_map_raster_input_location
+ )
+ raster_bin_df.createOrReplaceTempView("raster_binary_table")
Review Comment:
Both tests register the same temp view name (`raster_binary_table`). This
can cause cross-test interference when running tests concurrently (or if other
tests in the suite use the same view name). Prefer avoiding temp views here
(use `selectExpr` directly on the DataFrame) or use a unique view name per test.
##########
python/tests/raster_viz_utils/test_sedonautils.py:
##########
@@ -35,3 +35,28 @@ def test_display_image(self):
assert (
html_call is None
) # just test that this function was called and returned no output
+
+ def test_display_image_raw_raster(self):
+ """Test that display_image auto-detects raster columns and applies
RS_AsImage."""
+ raster_bin_df = self.spark.read.format("binaryFile").load(
+ world_map_raster_input_location
+ )
+ raster_bin_df.createOrReplaceTempView("raster_binary_table")
+ # Pass a raw raster DataFrame — no RS_AsImage applied
+ raster_df = self.spark.sql(
+ "SELECT RS_FromGeotiff(content) as raster from raster_binary_table"
+ )
+ html_call = SedonaUtils.display_image(raster_df)
+ assert html_call is None
Review Comment:
The new tests exercise the call path but don’t assert the new behavior: (1)
that raster columns are actually converted via `RS_AsImage`, and (2) that
non-raster columns are preserved. Since `display_image` returns `None`,
consider asserting against an observable side effect (e.g., mock/spy
`SedonaMapUtils.__convert_to_gdf_or_pdf__` to capture the DataFrame passed in
and validate its schema/columns/types, or mock `IPython.display.display` and
assert the rendered HTML contains an `<img` tag and includes the non-raster
column value).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]