This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 4ac1e483b6 [GH-2348][Python] Make STAC tests resilient to external
server failures (#2349)
4ac1e483b6 is described below
commit 4ac1e483b6ba62f2801b35fd13f8e144a7f1c884
Author: Feng Zhang <[email protected]>
AuthorDate: Mon Sep 15 13:57:25 2025 -0700
[GH-2348][Python] Make STAC tests resilient to external server failures
(#2349)
* Add mock client to stac reader python tests
* fix the pre-commit issue
* Update python/tests/stac/test_mock_fixtures.py
Co-authored-by: Copilot <[email protected]>
* Update python/tests/stac/test_collection_client.py
Co-authored-by: Copilot <[email protected]>
* Update python/tests/stac/test_client.py
Co-authored-by: Copilot <[email protected]>
---------
Co-authored-by: Copilot <[email protected]>
---
python/tests/stac/test_client.py | 57 ++++++++--
python/tests/stac/test_collection_client.py | 119 ++++++++++++++++-----
python/tests/stac/test_mock_fixtures.py | 159 ++++++++++++++++++++++++++++
3 files changed, 300 insertions(+), 35 deletions(-)
diff --git a/python/tests/stac/test_client.py b/python/tests/stac/test_client.py
index 7c6144442d..0d395d55de 100644
--- a/python/tests/stac/test_client.py
+++ b/python/tests/stac/test_client.py
@@ -16,10 +16,12 @@
# under the License.
import collections.abc
+from unittest.mock import patch
from sedona.spark.stac.client import Client
from pyspark.sql import DataFrame
from tests.test_base import TestBase
+from tests.stac.test_mock_fixtures import MockClient
STAC_URLS = {
"PLANETARY-COMPUTER":
"https://planetarycomputer.microsoft.com/api/stac/v1",
@@ -28,7 +30,10 @@ STAC_URLS = {
class TestStacClient(TestBase):
- def test_collection_client(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_collection_client(self, mock_open) -> None:
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
items = client.search(
collection_id="aster-l1t",
@@ -39,7 +44,10 @@ class TestStacClient(TestBase):
assert items is not None
assert isinstance(items, collections.abc.Iterator)
- def test_search_with_ids(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_search_with_ids(self, mock_open) -> None:
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
items = client.search(
*["AST_L1T_00312272006020322_20150518201805", "item2"],
@@ -49,7 +57,10 @@ class TestStacClient(TestBase):
assert items is not None
assert isinstance(items, collections.abc.Iterator)
- def test_search_with_single_id(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_search_with_single_id(self, mock_open) -> None:
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
items = client.search(
"AST_L1T_00312272006020322_20150518201805",
@@ -59,7 +70,10 @@ class TestStacClient(TestBase):
assert items is not None
assert isinstance(items, collections.abc.Iterator)
- def test_search_with_bbox_and_datetime(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_search_with_bbox_and_datetime(self, mock_open) -> None:
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
items = client.search(
collection_id="aster-l1t",
@@ -70,7 +84,10 @@ class TestStacClient(TestBase):
assert items is not None
assert isinstance(items, collections.abc.Iterator)
- def test_search_with_multiple_bboxes_and_intervals(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_search_with_multiple_bboxes_and_intervals(self, mock_open) ->
None:
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
items = client.search(
collection_id="aster-l1t",
@@ -85,7 +102,10 @@ class TestStacClient(TestBase):
assert items is not None
assert isinstance(items, collections.abc.Iterator)
- def test_search_with_bbox_and_non_overlapping_intervals(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_search_with_bbox_and_non_overlapping_intervals(self, mock_open)
-> None:
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
items = client.search(
collection_id="aster-l1t",
@@ -99,7 +119,10 @@ class TestStacClient(TestBase):
assert items is not None
assert isinstance(items, collections.abc.Iterator)
- def test_search_with_max_items(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_search_with_max_items(self, mock_open) -> None:
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
items = client.search(
collection_id="aster-l1t",
@@ -111,9 +134,12 @@ class TestStacClient(TestBase):
assert items is not None
assert isinstance(items, collections.abc.Iterator)
- def test_search_with_single_datetime(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_search_with_single_datetime(self, mock_open) -> None:
from datetime import datetime
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
items = client.search(
collection_id="aster-l1t",
@@ -124,7 +150,10 @@ class TestStacClient(TestBase):
assert items is not None
assert isinstance(items, collections.abc.Iterator)
- def test_search_with_YYYY(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_search_with_YYYY(self, mock_open) -> None:
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
items = client.search(
collection_id="aster-l1t",
@@ -135,7 +164,10 @@ class TestStacClient(TestBase):
assert items is not None
assert isinstance(items, collections.abc.Iterator)
- def test_search_with_return_dataframe(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_search_with_return_dataframe(self, mock_open) -> None:
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
df = client.search(
collection_id="aster-l1t",
@@ -145,7 +177,10 @@ class TestStacClient(TestBase):
assert df is not None
assert isinstance(df, DataFrame)
- def test_search_with_catalog_url(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_search_with_catalog_url(self, mock_open) -> None:
+ mock_open.return_value = MockClient(STAC_URLS["EARTHVIEW-CATALOG"])
+
client = Client.open(STAC_URLS["EARTHVIEW-CATALOG"])
df = client.search(
return_dataframe=True,
diff --git a/python/tests/stac/test_collection_client.py
b/python/tests/stac/test_collection_client.py
index 1144e99005..00b30403fa 100644
--- a/python/tests/stac/test_collection_client.py
+++ b/python/tests/stac/test_collection_client.py
@@ -16,12 +16,14 @@
# under the License.
import collections.abc
+from unittest.mock import patch
from pyspark.sql import DataFrame
from sedona.spark.stac.client import Client
from sedona.spark.stac.collection_client import CollectionClient
from tests.test_base import TestBase
+from tests.stac.test_mock_fixtures import MockClient, MockCollectionClient
STAC_URLS = {
"PLANETARY-COMPUTER": "https://planetarycomputer.microsoft.com/api/stac/v1"
@@ -29,21 +31,30 @@ STAC_URLS = {
class TestStacReader(TestBase):
- def test_collection_client(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_collection_client(self, mock_open) -> None:
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
collection = client.get_collection("aster-l1t")
- assert isinstance(collection, CollectionClient)
+ assert isinstance(collection, MockCollectionClient)
assert str(collection) == "<CollectionClient id=aster-l1t>"
- def test_get_dataframe_no_filters(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_get_dataframe_no_filters(self, mock_open) -> None:
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
collection = client.get_collection("aster-l1t")
df = collection.get_dataframe()
assert df is not None
assert isinstance(df, DataFrame)
- def test_get_dataframe_with_spatial_extent(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_get_dataframe_with_spatial_extent(self, mock_open) -> None:
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
collection = client.get_collection("aster-l1t")
bbox = [[-180.0, -90.0, 180.0, 90.0]]
@@ -51,7 +62,10 @@ class TestStacReader(TestBase):
assert df is not None
assert isinstance(df, DataFrame)
- def test_get_dataframe_with_temporal_extent(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_get_dataframe_with_temporal_extent(self, mock_open) -> None:
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
collection = client.get_collection("aster-l1t")
datetime = [["2006-01-01T00:00:00Z", "2007-01-01T00:00:00Z"]]
@@ -59,7 +73,10 @@ class TestStacReader(TestBase):
assert df is not None
assert isinstance(df, DataFrame)
- def test_get_dataframe_with_both_extents(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_get_dataframe_with_both_extents(self, mock_open) -> None:
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
collection = client.get_collection("aster-l1t")
bbox = [[-180.0, -90.0, 180.0, 90.0]]
@@ -68,21 +85,30 @@ class TestStacReader(TestBase):
assert df is not None
assert isinstance(df, DataFrame)
- def test_get_items_with_spatial_extent(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_get_items_with_spatial_extent(self, mock_open) -> None:
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
collection = client.get_collection("aster-l1t")
bbox = [[-100.0, -72.0, 105.0, -69.0]]
items = list(collection.get_items(bbox=bbox))
assert items is not None
- def test_get_items_with_temporal_extent(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_get_items_with_temporal_extent(self, mock_open) -> None:
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
collection = client.get_collection("aster-l1t")
datetime = [["2006-12-01T00:00:00Z", "2006-12-27T02:00:00Z"]]
items = list(collection.get_items(datetime=datetime))
assert items is not None
- def test_get_items_with_both_extents(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_get_items_with_both_extents(self, mock_open) -> None:
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
collection = client.get_collection("aster-l1t")
bbox = [[90, -73, 105, -69]]
@@ -90,7 +116,10 @@ class TestStacReader(TestBase):
items = list(collection.get_items(bbox=bbox, datetime=datetime))
assert items is not None
- def test_get_items_with_multiple_bboxes_and_interval(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_get_items_with_multiple_bboxes_and_interval(self, mock_open) ->
None:
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
collection = client.get_collection("aster-l1t")
bbox = [
@@ -112,20 +141,29 @@ class TestStacReader(TestBase):
items = list(collection.get_items(bbox=bbox, datetime=datetime))
assert items is not None
- def test_get_items_with_ids(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_get_items_with_ids(self, mock_open) -> None:
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
collection = client.get_collection("aster-l1t")
ids = ["AST_L1T_00312272006020322_20150518201805", "item2", "item3"]
items = list(collection.get_items(*ids))
assert items is not None
- def test_get_items_with_id(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_get_items_with_id(self, mock_open) -> None:
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
collection = client.get_collection("aster-l1t")
items =
list(collection.get_items("AST_L1T_00312272006020322_20150518201805"))
assert items is not None
- def test_get_items_with_bbox_and_non_overlapping_intervals(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_get_items_with_bbox_and_non_overlapping_intervals(self,
mock_open) -> None:
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
collection = client.get_collection("aster-l1t")
bbox = [[-180.0, -90.0, 180.0, 90.0]]
@@ -136,7 +174,10 @@ class TestStacReader(TestBase):
items = list(collection.get_items(bbox=bbox, datetime=datetime))
assert items is not None
- def test_get_items_with_bbox_and_interval(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_get_items_with_bbox_and_interval(self, mock_open) -> None:
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
collection = client.get_collection("aster-l1t")
bbox = [-180.0, -90.0, 180.0, 90.0]
@@ -144,7 +185,10 @@ class TestStacReader(TestBase):
items = list(collection.get_items(bbox=bbox, datetime=interval))
assert items is not None
- def test_get_dataframe_with_bbox_and_interval(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_get_dataframe_with_bbox_and_interval(self, mock_open) -> None:
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
collection = client.get_collection("aster-l1t")
bbox = [-180.0, -90.0, 180.0, 90.0]
@@ -152,7 +196,10 @@ class TestStacReader(TestBase):
df = collection.get_dataframe(bbox=bbox, datetime=interval)
assert df is not None
- def test_save_to_geoparquet(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_save_to_geoparquet(self, mock_open) -> None:
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
collection = client.get_collection("aster-l1t")
@@ -176,8 +223,11 @@ class TestStacReader(TestBase):
assert os.path.exists(output_path), "GeoParquet file was not
created"
- def test_get_items_with_wkt_geometry(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_get_items_with_wkt_geometry(self, mock_open) -> None:
"""Test that WKT geometry strings are properly handled for spatial
filtering."""
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
collection = client.get_collection("aster-l1t")
@@ -188,10 +238,13 @@ class TestStacReader(TestBase):
# Both should return similar number of items (may not be exactly same
due to geometry differences)
assert items_with_wkt is not None
- def test_get_dataframe_with_shapely_geometry(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_get_dataframe_with_shapely_geometry(self, mock_open) -> None:
"""Test that Shapely geometry objects are properly handled for spatial
filtering."""
from shapely.geometry import Polygon
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
collection = client.get_collection("aster-l1t")
@@ -204,10 +257,13 @@ class TestStacReader(TestBase):
# Both should return similar number of items
assert df_with_shapely is not None
- def test_get_items_with_geometry_list(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_get_items_with_geometry_list(self, mock_open) -> None:
"""Test that lists of geometry objects are properly handled."""
from shapely.geometry import Polygon
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
collection = client.get_collection("aster-l1t")
@@ -223,10 +279,13 @@ class TestStacReader(TestBase):
# Should return items from both geometries
assert items_with_geom_list is not None
- def test_geometry_takes_precedence_over_bbox(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_geometry_takes_precedence_over_bbox(self, mock_open) -> None:
"""Test that geometry parameter takes precedence over bbox when both
are provided."""
from shapely.geometry import Polygon
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
collection = client.get_collection("aster-l1t")
@@ -244,10 +303,13 @@ class TestStacReader(TestBase):
assert items_with_both is not None
assert items_with_geom_only is not None
- def test_get_dataframe_with_geometry_and_datetime(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_get_dataframe_with_geometry_and_datetime(self, mock_open) -> None:
"""Test that geometry and datetime filters work together."""
from shapely.geometry import Polygon
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
collection = client.get_collection("aster-l1t")
@@ -264,12 +326,15 @@ class TestStacReader(TestBase):
assert df_with_both is not None
assert df_with_geom_only is not None
- def test_save_to_geoparquet_with_geometry(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_save_to_geoparquet_with_geometry(self, mock_open) -> None:
"""Test saving to GeoParquet with geometry parameter."""
from shapely.geometry import Polygon
import tempfile
import os
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
collection = client.get_collection("aster-l1t")
@@ -291,8 +356,11 @@ class TestStacReader(TestBase):
# Check if the file was created
assert os.path.exists(output_path), "GeoParquet file was not
created"
- def test_get_items_with_tuple_datetime(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_get_items_with_tuple_datetime(self, mock_open) -> None:
"""Test that tuples are properly handled as datetime input (same as
lists)."""
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
collection = client.get_collection("aster-l1t")
@@ -308,8 +376,11 @@ class TestStacReader(TestBase):
assert items_with_tuple is not None
assert items_with_list is not None
- def test_get_dataframe_with_tuple_datetime(self) -> None:
+ @patch("sedona.spark.stac.client.Client.open")
+ def test_get_dataframe_with_tuple_datetime(self, mock_open) -> None:
"""Test that tuples are properly handled as datetime input for
dataframes."""
+ mock_open.return_value = MockClient(STAC_URLS["PLANETARY-COMPUTER"])
+
client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
collection = client.get_collection("aster-l1t")
diff --git a/python/tests/stac/test_mock_fixtures.py
b/python/tests/stac/test_mock_fixtures.py
new file mode 100644
index 0000000000..c054cc2f17
--- /dev/null
+++ b/python/tests/stac/test_mock_fixtures.py
@@ -0,0 +1,159 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Iterator, List, Optional, Union
+from unittest.mock import create_autospec, MagicMock
+
+from pyspark.sql import DataFrame
+
+
+class MockItem:
+ """Mock STAC Item"""
+
+ def __init__(self, item_id: str = "test_item"):
+ self.id = item_id
+ self.properties = {
+ "datetime": "2006-12-26T18:03:22Z",
+ "collection": "aster-l1t",
+ }
+ self.geometry = {
+ "type": "Polygon",
+ "coordinates": [[[90, -73], [105, -73], [105, -69], [90, -69],
[90, -73]]],
+ }
+ self.bbox = [90, -73, 105, -69]
+ self.assets = {}
+
+
+class MockIterator:
+ """Mock iterator for STAC items"""
+
+ def __init__(self, items: List[MockItem] = None):
+ self.items = items or [MockItem(f"item_{i}") for i in range(5)]
+ self.index = 0
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ if self.index < len(self.items):
+ item = self.items[self.index]
+ self.index += 1
+ return item
+ raise StopIteration
+
+
+def create_mock_dataframe(data=None):
+ """Create a mock DataFrame that passes isinstance checks"""
+ mock_df = create_autospec(DataFrame, instance=True)
+ mock_df.data = data or []
+ mock_df._count = len(mock_df.data) if data else 5
+ mock_df.count.return_value = mock_df._count
+ mock_df.collect.return_value = mock_df.data
+ mock_df.show.return_value = None
+ return mock_df
+
+
+class MockCollectionClient:
+ """Mock CollectionClient"""
+
+ def __init__(self, url: str, collection_id: str):
+ self.url = url
+ self.collection_id = collection_id
+
+ def __str__(self):
+ return f"<CollectionClient id={self.collection_id}>"
+
+ def get_items(self, *ids, **kwargs) -> Iterator:
+ """Return mock iterator of items"""
+ if ids:
+ items = [MockItem(item_id) for item_id in ids if
isinstance(item_id, str)]
+ else:
+ items = [MockItem(f"item_{i}") for i in range(5)]
+ return MockIterator(items)
+
+ def get_dataframe(self, **kwargs) -> DataFrame:
+ """Return mock DataFrame"""
+ # Return a mock DataFrame instead of creating a real Spark DataFrame
+ return create_mock_dataframe()
+
+ def save_to_geoparquet(self, output_path: str, **kwargs):
+ """Mock save to geoparquet - just create an empty file"""
+ import os
+
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
+ with open(output_path, "w") as f:
+ f.write("")
+
+
+class MockClient:
+ """Mock STAC Client"""
+
+ def __init__(self, url: str):
+ self.url = url
+
+ @classmethod
+ def open(cls, url: str):
+ """Create mock client instance"""
+ return cls(url)
+
+ def get_collection(self, collection_id: str):
+ """Return mock collection client"""
+ return MockCollectionClient(self.url, collection_id)
+
+ def search(
+ self,
+ *ids,
+ collection_id: Optional[str] = None,
+ bbox: Optional[list] = None,
+ geometry: Optional[Union[str, object, List]] = None,
+ datetime: Optional[Union[str, object, list]] = None,
+ max_items: Optional[int] = None,
+ return_dataframe: bool = True,
+ ) -> Union[Iterator, DataFrame]:
+ """Mock search method"""
+ if return_dataframe:
+ # Return mock DataFrame instead of creating a real Spark DataFrame
+ return create_mock_dataframe()
+ else:
+ # Return mock iterator
+ if ids and len(ids) > 0:
+ if isinstance(ids[0], str):
+ items = [MockItem(ids[0])]
+ else:
+ items = [
+ MockItem(item_id)
+ for item_id in ids[0]
+ if isinstance(item_id, str)
+ ]
+ else:
+ num_items = min(max_items, 5) if max_items else 5
+ items = [MockItem(f"item_{i}") for i in range(num_items)]
+ return MockIterator(items)
+
+
+def create_mock_client(url: str) -> MockClient:
+ """Factory function to create a mock client"""
+ return MockClient(url)
+
+
+def mock_client_open(monkeypatch):
+ """Pytest fixture to mock Client.open"""
+
+ def _mock_open(url: str):
+ return MockClient(url)
+
+ return _mock_open