Copilot commented on code in PR #2128:
URL: https://github.com/apache/sedona/pull/2128#discussion_r2217751622


##########
python/sedona/spark/stac/collection_client.py:
##########
@@ -119,16 +122,35 @@ def _apply_spatial_temporal_filters(
         - DataFrame: The filtered Spark DataFrame.
 
         The function constructs SQL conditions for spatial and temporal 
filters and applies them to the DataFrame.
-        If bbox is provided, it constructs spatial conditions using 
st_intersects and ST_GeomFromText.
+        If geometry is provided, it takes precedence over bbox for spatial 
filtering.
+        If bbox is provided (and no geometry), it constructs spatial 
conditions using st_intersects and ST_GeomFromText.
         If datetime is provided, it constructs temporal conditions using the 
datetime column.
         The conditions are combined using OR logic.
         """
-        if bbox:
+        # Geometry takes precedence over bbox
+        if geometry:
+            geometry_conditions = []
+            for geom in geometry:
+                if isinstance(geom, str):
+                    # Assume it's WKT
+                    geom_wkt = geom
+                elif hasattr(geom, "wkt"):
+                    # Shapely geometry object
+                    geom_wkt = geom.wkt
+                else:
+                    # Try to convert to string (fallback)
+                    geom_wkt = str(geom)
+                geometry_conditions.append(
+                    f"st_intersects(ST_GeomFromText('{geom_wkt}'), geometry)"
+                )
+            geometry_sql_condition = " OR ".join(geometry_conditions)
+            df = df.filter(geometry_sql_condition)
+        elif bbox:
             bbox_conditions = []
-            for bbox in bbox:
+            for bbox_item in bbox:
                 polygon_wkt = (
-                    f"POLYGON(({bbox[0]} {bbox[1]}, {bbox[2]} {bbox[1]}, "
-                    f"{bbox[2]} {bbox[3]}, {bbox[0]} {bbox[3]}, {bbox[0]} 
{bbox[1]}))"
+                    f"POLYGON(({bbox_item[0]} {bbox_item[1]}, {bbox_item[2]} 
{bbox_item[1]}, "
+                    f"{bbox_item[2]} {bbox_item[3]}, {bbox_item[0]} 
{bbox_item[3]}, {bbox_item[0]} {bbox_item[1]}))"
                 )

Review Comment:
   Direct string interpolation of polygon WKT into SQL query creates potential 
for SQL injection. While bbox values are likely numeric, using parameterized 
queries would be safer and more consistent with security best practices.



##########
python/tests/stac/test_collection_client.py:
##########
@@ -188,6 +188,131 @@ def test_save_to_geoparquet(self) -> None:
             df_loaded = 
collection.spark.read.format("geoparquet").load(output_path)
             assert df_loaded.count() == 20, "Loaded GeoParquet file is empty"
 
+    def test_get_items_with_wkt_geometry(self) -> None:
+        """Test that WKT geometry strings are properly handled for spatial 
filtering."""
+        client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
+        collection = client.get_collection("aster-l1t")
+
+        # Test with WKT polygon geometry
+        wkt_polygon = "POLYGON((90 -73, 105 -73, 105 -69, 90 -69, 90 -73))"
+        items_with_wkt = list(collection.get_items(geometry=wkt_polygon))
+
+        # Both should return similar number of items (may not be exactly same 
due to geometry differences)
+        assert items_with_wkt is not None
+        assert len(items_with_wkt) > 0
+
+    def test_get_dataframe_with_shapely_geometry(self) -> None:
+        """Test that Shapely geometry objects are properly handled for spatial 
filtering."""
+        from shapely.geometry import Polygon
+
+        client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
+        collection = client.get_collection("aster-l1t")
+
+        # Test with Shapely polygon geometry
+        shapely_polygon = Polygon(
+            [(90, -73), (105, -73), (105, -69), (90, -69), (90, -73)]
+        )
+        df_with_shapely = collection.get_dataframe(geometry=shapely_polygon)
+
+        # Both should return similar number of items
+        assert df_with_shapely is not None
+        assert df_with_shapely.count() > 0
+
+    def test_get_items_with_geometry_list(self) -> None:
+        """Test that lists of geometry objects are properly handled."""
+        from shapely.geometry import Polygon
+
+        client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
+        collection = client.get_collection("aster-l1t")
+
+        # Test with list of geometries (both WKT and Shapely)
+        wkt_polygon = "POLYGON((90 -73, 105 -73, 105 -69, 90 -69, 90 -73))"
+        shapely_polygon = Polygon(
+            [(-100, -72), (-90, -72), (-90, -62), (-100, -62), (-100, -72)]
+        )
+        geometry_list = [wkt_polygon, shapely_polygon]
+
+        items_with_geom_list = 
list(collection.get_items(geometry=geometry_list))
+
+        # Should return items from both geometries
+        assert items_with_geom_list is not None
+        assert len(items_with_geom_list) > 0
+
+    def test_geometry_takes_precedence_over_bbox(self) -> None:
+        """Test that geometry parameter takes precedence over bbox when both 
are provided."""
+        from shapely.geometry import Polygon
+
+        client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
+        collection = client.get_collection("aster-l1t")
+
+        # Define different spatial extents
+        bbox = [-180.0, -90.0, 180.0, 90.0]  # World bbox
+        small_polygon = Polygon(
+            [(90, -73), (105, -73), (105, -69), (90, -69), (90, -73)]
+        )  # Small area
+
+        # When both are provided, geometry should take precedence
+        items_with_both = list(collection.get_items(bbox=bbox, 
geometry=small_polygon))
+        items_with_geom_only = 
list(collection.get_items(geometry=small_polygon))
+
+        # Results should be identical since geometry takes precedence
+        assert items_with_both is not None
+        assert items_with_geom_only is not None
+        assert len(items_with_both) == len(items_with_geom_only)
+        assert len(items_with_both) > 0
+
+    def test_get_dataframe_with_geometry_and_datetime(self) -> None:
+        """Test that geometry and datetime filters work together."""
+        from shapely.geometry import Polygon
+
+        client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
+        collection = client.get_collection("aster-l1t")
+
+        # Define spatial and temporal filters
+        polygon = Polygon([(90, -73), (105, -73), (105, -69), (90, -69), (90, 
-73)])
+        datetime_range = ["2006-12-01T00:00:00Z", "2006-12-27T03:00:00Z"]
+
+        df_with_both = collection.get_dataframe(
+            geometry=polygon, datetime=datetime_range
+        )
+        df_with_geom_only = collection.get_dataframe(geometry=polygon)
+
+        # Combined filter should return fewer or equal items than 
geometry-only filter
+        assert df_with_both is not None
+        assert df_with_geom_only is not None
+        assert df_with_both.count() <= df_with_geom_only.count()
+
+    def test_save_to_geoparquet_with_geometry(self) -> None:
+        """Test saving to GeoParquet with geometry parameter."""
+        from shapely.geometry import Polygon
+        import tempfile
+        import os
+
+        client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
+        collection = client.get_collection("aster-l1t")
+
+        # Create a temporary directory for the output path and clean it up 
after the test
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            output_path = f"{tmpdirname}/test_geometry_geoparquet_output"
+
+            # Define spatial and temporal extents
+            polygon = Polygon(
+                [(-180, -90), (180, -90), (180, 90), (-180, 90), (-180, -90)]
+            )
+            datetime_range = [["2006-01-01T00:00:00Z", "2007-01-01T00:00:00Z"]]

Review Comment:
   The datetime_range is wrapped in an extra list layer (double nested) which 
is inconsistent with other test methods that use single-level nesting like 
["start", "end"]. This could cause confusion or unexpected behavior.
   ```suggestion
               datetime_range = ["2006-01-01T00:00:00Z", "2007-01-01T00:00:00Z"]
   ```



##########
python/sedona/spark/stac/collection_client.py:
##########
@@ -119,16 +122,35 @@ def _apply_spatial_temporal_filters(
         - DataFrame: The filtered Spark DataFrame.
 
         The function constructs SQL conditions for spatial and temporal 
filters and applies them to the DataFrame.
-        If bbox is provided, it constructs spatial conditions using 
st_intersects and ST_GeomFromText.
+        If geometry is provided, it takes precedence over bbox for spatial 
filtering.
+        If bbox is provided (and no geometry), it constructs spatial 
conditions using st_intersects and ST_GeomFromText.
         If datetime is provided, it constructs temporal conditions using the 
datetime column.
         The conditions are combined using OR logic.
         """
-        if bbox:
+        # Geometry takes precedence over bbox
+        if geometry:
+            geometry_conditions = []
+            for geom in geometry:
+                if isinstance(geom, str):
+                    # Assume it's WKT
+                    geom_wkt = geom
+                elif hasattr(geom, "wkt"):
+                    # Shapely geometry object
+                    geom_wkt = geom.wkt
+                else:
+                    # Try to convert to string (fallback)
+                    geom_wkt = str(geom)
+                geometry_conditions.append(
+                    f"st_intersects(ST_GeomFromText('{geom_wkt}'), geometry)"
+                )
+            geometry_sql_condition = " OR ".join(geometry_conditions)
+            df = df.filter(geometry_sql_condition)

Review Comment:
   Direct string interpolation of user-provided WKT data into SQL query creates 
potential for SQL injection vulnerability. Consider using parameterized queries 
or proper SQL escaping to sanitize the WKT input.
   ```suggestion
                       "st_intersects(ST_GeomFromText(?), geometry)"
                   )
               geometry_sql_condition = " OR ".join(geometry_conditions)
               df = df.filter(geometry_sql_condition, [geom.wkt if 
hasattr(geom, "wkt") else str(geom) for geom in geometry])
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to