This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new b66e768155 [SEDONA-680] Remove rasterio from mandatory dependencies
(#1692)
b66e768155 is described below
commit b66e768155866a38ba2e3404f1151cac14fad5ea
Author: Jia Yu <[email protected]>
AuthorDate: Sat Nov 23 00:34:11 2024 -0800
[SEDONA-680] Remove rasterio from mandatory dependencies (#1692)
---
.github/workflows/python.yml | 19 +++++++++++++++----
docs/tutorial/raster.md | 3 +++
python/sedona/sql/types.py | 27 +++++++++++++++++++++++----
python/setup.py | 10 ++++++++--
4 files changed, 49 insertions(+), 10 deletions(-)
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 12956ffd93..aaca28df05 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -143,14 +143,25 @@ jobs:
- env:
PYTHON_VERSION: ${{ matrix.python }}
run: find spark-shaded/target -name sedona-*.jar -exec cp {}
${VENV_PATH}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars/ \;
- - env:
+ - name: Run tests
+ env:
PYTHON_VERSION: ${{ matrix.python }}
run: |
export
SPARK_HOME=${VENV_PATH}/lib/python${PYTHON_VERSION}/site-packages/pyspark
cd python
source ${VENV_PATH}/bin/activate
- pytest tests
- - env:
+ pytest -v tests
+ - name: Run basic tests without rasterio
+ env:
+ PYTHON_VERSION: ${{ matrix.python }}
+ run: |
+ export
SPARK_HOME=${VENV_PATH}/lib/python${PYTHON_VERSION}/site-packages/pyspark
+ cd python
+ source ${VENV_PATH}/bin/activate
+ pip uninstall -y rasterio
+ pytest -v tests/core/test_rdd.py tests/sql/test_dataframe_api.py
+ - name: Run Spark Connect tests
+ env:
PYTHON_VERSION: ${{ matrix.python }}
run: |
if [ ! -f
"${VENV_PATH}/lib/python${PYTHON_VERSION}/site-packages/pyspark/sbin/start-connect-server.sh"
]
@@ -165,4 +176,4 @@ jobs:
cd python
source ${VENV_PATH}/bin/activate
pip install "pyspark[connect]==${SPARK_VERSION}"
- pytest tests/sql/test_dataframe_api.py
+ pytest -v tests/sql/test_dataframe_api.py
diff --git a/docs/tutorial/raster.md b/docs/tutorial/raster.md
index 5384f82541..7d7df586eb 100644
--- a/docs/tutorial/raster.md
+++ b/docs/tutorial/raster.md
@@ -615,6 +615,9 @@ raster.as_numpy_masked() # numpy array with nodata values
masked as nan
If you want to work with the raster data using `rasterio`, you can retrieve a
`rasterio.DatasetReader` object using the
`as_rasterio` method.
+!!!note
+ You need to have the `rasterio` package installed (version >= 1.2.10) to
use this method. You can install it using `pip install rasterio`.
+
```python
ds = raster.as_rasterio() # rasterio.DatasetReader object
# Work with the raster using rasterio
diff --git a/python/sedona/sql/types.py b/python/sedona/sql/types.py
index 1d7a693f1e..c966d451ca 100644
--- a/python/sedona/sql/types.py
+++ b/python/sedona/sql/types.py
@@ -17,8 +17,21 @@
from pyspark.sql.types import BinaryType, UserDefinedType
-from ..raster import raster_serde
-from ..raster.sedona_raster import SedonaRaster
+# Only support RasterType when rasterio is installed
+try:
+ import rasterio
+except ImportError:
+ rasterio = None
+
+if rasterio is not None:
+ from ..raster import raster_serde
+ from ..raster.sedona_raster import SedonaRaster
+else:
+ # We'll skip RasterType UDT registration and raise error when deserializing
+ # RasterUDT objects if rasterio is not installed
+ raster_serde = None
+ SedonaRaster = None
+
from ..utils import geometry_serde
@@ -57,7 +70,12 @@ class RasterType(UserDefinedType):
raise NotImplementedError("RasterType.serialize is not implemented
yet")
def deserialize(self, datum):
- return raster_serde.deserialize(datum)
+ if raster_serde is not None:
+ return raster_serde.deserialize(datum)
+ else:
+ raise NotImplementedError(
+ "rasterio is not installed. Please install it to support
RasterType deserialization"
+ )
@classmethod
def module(cls):
@@ -71,4 +89,5 @@ class RasterType(UserDefinedType):
return "org.apache.spark.sql.sedona_sql.UDT.RasterUDT"
-SedonaRaster.__UDT__ = RasterType()
+if SedonaRaster is not None:
+ SedonaRaster.__UDT__ = RasterType()
diff --git a/python/setup.py b/python/setup.py
index e4dccbd8f1..d0770fcc02 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -58,12 +58,18 @@ setup(
long_description=long_description,
long_description_content_type="text/markdown",
python_requires=">=3.6",
- install_requires=["attrs", "shapely>=1.7.0", "rasterio>=1.2.10"],
+ install_requires=["attrs", "shapely>=1.7.0"],
extras_require={
"spark": ["pyspark>=2.3.0"],
"pydeck-map": ["geopandas", "pydeck==0.8.0"],
"kepler-map": ["geopandas", "keplergl==0.3.2"],
- "all": ["pyspark>=2.3.0", "geopandas", "pydeck==0.8.0",
"keplergl==0.3.2"],
+ "all": [
+ "pyspark>=2.3.0",
+ "geopandas",
+ "pydeck==0.8.0",
+ "keplergl==0.3.2",
+ "rasterio>=1.2.10",
+ ],
},
project_urls={
"Documentation": "https://sedona.apache.org",