Copilot commented on code in PR #2105:
URL: https://github.com/apache/sedona/pull/2105#discussion_r2212397661
##########
python/sedona/geopandas/geoseries.py:
##########
@@ -131,6 +132,7 @@ def try_geom_to_ewkb(x) -> bytes:
use_same_anchor = False
return shapely.wkb.dumps(x, **kwargs)
+ # return shapely.to_wkb(x, include_srid=True)
Review Comment:
This commented code should be removed as it appears to be leftover from
development and is not needed for the current implementation.
```suggestion
```
##########
python/tests/geopandas/test_match_geopandas_series.py:
##########
@@ -284,7 +284,19 @@ def test_from_shapely(self):
pass
def test_from_arrow(self):
- pass
+ if parse_version(gpd.__version__) < parse_version("1.0.0"):
+ return
+
+ for _, geom in self.geoms:
+ # ga doesn't support GeometryCollection: "Unrecognized GeoArrow
extension name: 'geoarrow.geometrycollection'"
Review Comment:
[nitpick] The comment about GeometryCollection support should be more
specific about which library version has this limitation, as this may change
over time.
```suggestion
# ga doesn't support GeometryCollection in geopandas < 1.0.0:
"Unrecognized GeoArrow extension name: 'geoarrow.geometrycollection'"
```
##########
python/tests/geopandas/test_match_geopandas_dataframe.py:
##########
@@ -171,3 +171,116 @@ def test_rename_geometry(self):
sgpd_df = sgpd_df.rename_geometry("name1")
gpd_df = gpd_df.rename_geometry("name2")
assert sgpd_df.geometry.name != gpd_df.geometry.name
+
+ def test_to_json(self):
+ tests = [
+ {
+ "a": [1, 2, 3],
+ "b": ["4", "5", "6"],
+ "geometry": [
+ Point(1, 2),
+ Point(2, 1),
+ Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
+ ],
+ },
+ {
+ "a": [1, 2, 3],
+ "b": ["4", "5", "6"],
+ "geometry": [None, GeometryCollection(Point()), Point(2, 1)],
+ },
+ {
+ "a": [1, 2, 3],
+ "b": ["4", "5", "6"],
+ "geometry": [Polygon(), Point(1, 2), None],
+ },
+ ]
+
+ for data in tests:
+ # TODO: Try to optimize this with self.ps_allow_diff_frames() away
+ with self.ps_allow_diff_frames():
+ sgpd_result = GeoDataFrame(data).to_json()
+ gpd_result = gpd.GeoDataFrame(data).to_json()
+ assert sgpd_result == gpd_result
+
+ # test different json args
+ data = {
+ "a": [1, 2, 3],
+ "b": [4, 5, 6],
+ "geometry": [Point(1, 2), Point(2, 1), LineString([(0, 0), (1,
1)])],
+ }
+ tests = [
+ {"na": "drop"},
+ {"na": "keep"},
+ {"show_bbox": True},
+ {"drop_id": True},
+ {"to_wgs84": True},
+ {"na": "drop", "show_bbox": True, "drop_id": True, "to_wgs84":
True},
+ ]
+ for kwargs in tests:
+ # TODO: Try to optimize this with self.ps_allow_diff_frames() away
+ with self.ps_allow_diff_frames():
+ sgpd_result = GeoDataFrame(data,
crs="EPSG:3857").to_json(**kwargs)
+ gpd_result = gpd.GeoDataFrame(data,
crs="EPSG:3857").to_json(**kwargs)
+ assert sgpd_result == gpd_result
+
+ def test_from_arrow(self):
+ if parse_version(gpd.__version__) < parse_version("1.0.0"):
+ return
+
+ gdf = gpd.GeoDataFrame(
+ {
+ "ints": [1, 2, 3, 4],
+ "strings": ["a", "b", "c", "d"],
+ "bools": [True, False, True, False],
+ "geometry": [
+ None,
+ LineString([(0, 0), (1, 1)]),
+ Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
+ Point(1, 1),
+ ],
+ }
+ )
+
+ # TODO: optimize this away
+ with self.ps_allow_diff_frames():
+ sgpd_result = GeoDataFrame.from_arrow(gdf.to_arrow())
+ gpd_result = gpd.GeoDataFrame.from_arrow(gdf.to_arrow())
+ self.check_sgpd_df_equals_gpd_df(sgpd_result, gpd_result)
+
+ def test_to_arrow(self):
+ if parse_version(gpd.__version__) < parse_version("1.0.0"):
+ return
+
+ import pyarrow as pa
+ import pandas as pd
+
+ data = {
+ "a": [1, 2, 3],
+ "b": [4, 5, 6],
+ "geometry": [Point(1, 2), Point(2, 1), LineString([(0, 0), (1,
1)])],
+ }
+
+ # TODO: Try to optimize this with self.ps_allow_diff_frames() away
+ with self.ps_allow_diff_frames():
+ sgpd_result = pa.table(GeoDataFrame(data).to_arrow(index=False))
+ gpd_result = pa.table(gpd.GeoDataFrame(data).to_arrow(index=False))
+
+ assert sgpd_result.equals(gpd_result)
+
+ # TODO: Try to optimize this with self.ps_allow_diff_frames() away
+ with self.ps_allow_diff_frames():
+ sgpd_result = pa.table(
+ GeoDataFrame(
+ data, index=pd.RangeIndex(start=0, stop=3, step=1)
+ ).to_arrow(index=True)
+ )
+ gpd_result = pa.table(
+ gpd.GeoDataFrame(
+ data, index=pd.RangeIndex(start=0, stop=3, step=1)
+ ).to_arrow(index=True)
+ )
+
+ assert sgpd_result.equals(gpd_result)
+
+ # Note: Results for not specifying index=True or index=False for
to_arrow is expected to be different
+ # from geopandas. See the to_arrow docstring for more details.
Review Comment:
[nitpick] This comment should be more specific about why the behavior
differs and what the expected differences are for better maintainability.
```suggestion
# Note: When index=True or index=False is not specified for the
to_arrow method, the behavior differs
# between GeoDataFrame and geopandas.GeoDataFrame. Specifically,
GeoDataFrame includes the index by
# default in the resulting Arrow table, while geopandas.GeoDataFrame
excludes it. This difference is
# intentional and aligns with the design choices of each library.
See the to_arrow docstring for more
# details on this behavior.
```
##########
python/tests/geopandas/test_geoseries.py:
##########
@@ -328,7 +342,37 @@ def test_estimate_utm_crs(self):
sgpd.GeoSeries([Polygon([(0, 90), (1, 90), (2,
90)])]).estimate_utm_crs()
def test_to_json(self):
- pass
+ s = GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
+
+ # TODO: optimize this away
+ with self.ps_allow_diff_frames():
+ result = s.to_json()
+ expected = '{"type": "FeatureCollection", "features": [{"id": "0",
"type": "Feature", "pr\
+operties": {}, "geometry": {"type": "Point", "coordinates": [1.0, 1.0]},
"bbox": [1.0,\
+ 1.0, 1.0, 1.0]}, {"id": "1", "type": "Feature", "properties": {}, "geometry":
{"type"\
+: "Point", "coordinates": [2.0, 2.0]}, "bbox": [2.0, 2.0, 2.0, 2.0]}, {"id":
"2", "typ\
+e": "Feature", "properties": {}, "geometry": {"type": "Point", "coordinates":
[3.0, 3.\
+0]}, "bbox": [3.0, 3.0, 3.0, 3.0]}], "bbox": [1.0, 1.0, 3.0, 3.0]}'
+
+ assert result == expected
+
+ with self.ps_allow_diff_frames():
+ result = s.to_json(show_bbox=True)
+ expected = '{"type": "FeatureCollection", "features": [{"id": "0",
"type": "Feature", "properties": {}, "geometry": {"type": "Point",
"coordinates": [1.0, 1.0]}, "bbox": [1.0, 1.0, 1.0, 1.0]}, {"id": "1", "type":
"Feature", "properties": {}, "geometry": {"type": "Point", "coordinates": [2.0,
2.0]}, "bbox": [2.0, 2.0, 2.0, 2.0]}, {"id": "2", "type": "Feature",
"properties": {}, "geometry": {"type": "Point", "coordinates": [3.0, 3.0]},
"bbox": [3.0, 3.0, 3.0, 3.0]}], "bbox": [1.0, 1.0, 3.0, 3.0]}'
+ assert result == expected
+
+ with self.ps_allow_diff_frames():
+ result = s.to_json(drop_id=True)
+ expected = '{"type": "FeatureCollection", "features": [{"type":
"Feature", "properties": {}, "geometry": {"type": "Point", "coordinates": [1.0,
1.0]}, "bbox": [1.0, 1.0, 1.0, 1.0]}, {"type": "Feature", "properties": {},
"geometry": {"type": "Point", "coordinates": [2.0, 2.0]}, "bbox": [2.0, 2.0,
2.0, 2.0]}, {"type": "Feature", "properties": {}, "geometry": {"type": "Point",
"coordinates": [3.0, 3.0]}, "bbox": [3.0, 3.0, 3.0, 3.0]}], "bbox": [1.0, 1.0,
3.0, 3.0]}'
+ # print(s.to_geopandas().to_json(drop_id=True))
+ # raise
+ # expected = '{"type": "FeatureCollection", "features": [{"type":
"Feature", "geometry": {"type": "Point", "coordinates": [1.0, 1.0]},
"properties": {}}, {"type": "Feature", "geometry": {"type": "Point",
"coordinates": [2.0, 2.0]}, "properties": {}}, {"type": "Feature", "geometry":
{"type": "Point", "coordinates": [3.0, 3.0]}, "properties": {}}]}'
Review Comment:
Remove commented print statements and the commented raise statement as they
appear to be debug code left over from development.
```suggestion
```
##########
python/tests/geopandas/test_geoseries.py:
##########
@@ -328,7 +342,37 @@ def test_estimate_utm_crs(self):
sgpd.GeoSeries([Polygon([(0, 90), (1, 90), (2,
90)])]).estimate_utm_crs()
def test_to_json(self):
- pass
+ s = GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
+
+ # TODO: optimize this away
+ with self.ps_allow_diff_frames():
+ result = s.to_json()
+ expected = '{"type": "FeatureCollection", "features": [{"id": "0",
"type": "Feature", "pr\
+operties": {}, "geometry": {"type": "Point", "coordinates": [1.0, 1.0]},
"bbox": [1.0,\
+ 1.0, 1.0, 1.0]}, {"id": "1", "type": "Feature", "properties": {}, "geometry":
{"type"\
+: "Point", "coordinates": [2.0, 2.0]}, "bbox": [2.0, 2.0, 2.0, 2.0]}, {"id":
"2", "typ\
+e": "Feature", "properties": {}, "geometry": {"type": "Point", "coordinates":
[3.0, 3.\
+0]}, "bbox": [3.0, 3.0, 3.0, 3.0]}], "bbox": [1.0, 1.0, 3.0, 3.0]}'
+
+ assert result == expected
+
+ with self.ps_allow_diff_frames():
+ result = s.to_json(show_bbox=True)
+ expected = '{"type": "FeatureCollection", "features": [{"id": "0",
"type": "Feature", "properties": {}, "geometry": {"type": "Point",
"coordinates": [1.0, 1.0]}, "bbox": [1.0, 1.0, 1.0, 1.0]}, {"id": "1", "type":
"Feature", "properties": {}, "geometry": {"type": "Point", "coordinates": [2.0,
2.0]}, "bbox": [2.0, 2.0, 2.0, 2.0]}, {"id": "2", "type": "Feature",
"properties": {}, "geometry": {"type": "Point", "coordinates": [3.0, 3.0]},
"bbox": [3.0, 3.0, 3.0, 3.0]}], "bbox": [1.0, 1.0, 3.0, 3.0]}'
+ assert result == expected
+
+ with self.ps_allow_diff_frames():
+ result = s.to_json(drop_id=True)
+ expected = '{"type": "FeatureCollection", "features": [{"type":
"Feature", "properties": {}, "geometry": {"type": "Point", "coordinates": [1.0,
1.0]}, "bbox": [1.0, 1.0, 1.0, 1.0]}, {"type": "Feature", "properties": {},
"geometry": {"type": "Point", "coordinates": [2.0, 2.0]}, "bbox": [2.0, 2.0,
2.0, 2.0]}, {"type": "Feature", "properties": {}, "geometry": {"type": "Point",
"coordinates": [3.0, 3.0]}, "bbox": [3.0, 3.0, 3.0, 3.0]}], "bbox": [1.0, 1.0,
3.0, 3.0]}'
+ # print(s.to_geopandas().to_json(drop_id=True))
+ # raise
Review Comment:
Remove commented print statements and the commented raise statement as they
appear to be debug code left over from development.
```suggestion
```
##########
python/sedona/geopandas/geodataframe.py:
##########
@@ -172,6 +176,9 @@ def safe_apply_to_each_column(
if isinstance(data, GeoDataFrame):
if data._safe_get_crs() is None:
data.crs = crs
+
+ super().__init__(data, index=index, columns=columns, dtype=dtype,
copy=copy)
Review Comment:
This `super().__init__` call appears to be duplicated - it's called here and
again later in the constructor. The logic should be consolidated to avoid
potential initialization issues.
##########
python/tests/geopandas/test_geoseries.py:
##########
@@ -328,7 +342,37 @@ def test_estimate_utm_crs(self):
sgpd.GeoSeries([Polygon([(0, 90), (1, 90), (2,
90)])]).estimate_utm_crs()
def test_to_json(self):
- pass
+ s = GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
+
+ # TODO: optimize this away
+ with self.ps_allow_diff_frames():
+ result = s.to_json()
+ expected = '{"type": "FeatureCollection", "features": [{"id": "0",
"type": "Feature", "pr\
+operties": {}, "geometry": {"type": "Point", "coordinates": [1.0, 1.0]},
"bbox": [1.0,\
+ 1.0, 1.0, 1.0]}, {"id": "1", "type": "Feature", "properties": {}, "geometry":
{"type"\
+: "Point", "coordinates": [2.0, 2.0]}, "bbox": [2.0, 2.0, 2.0, 2.0]}, {"id":
"2", "typ\
+e": "Feature", "properties": {}, "geometry": {"type": "Point", "coordinates":
[3.0, 3.\
+0]}, "bbox": [3.0, 3.0, 3.0, 3.0]}], "bbox": [1.0, 1.0, 3.0, 3.0]}'
+
+ assert result == expected
+
+ with self.ps_allow_diff_frames():
+ result = s.to_json(show_bbox=True)
+ expected = '{"type": "FeatureCollection", "features": [{"id": "0",
"type": "Feature", "properties": {}, "geometry": {"type": "Point",
"coordinates": [1.0, 1.0]}, "bbox": [1.0, 1.0, 1.0, 1.0]}, {"id": "1", "type":
"Feature", "properties": {}, "geometry": {"type": "Point", "coordinates": [2.0,
2.0]}, "bbox": [2.0, 2.0, 2.0, 2.0]}, {"id": "2", "type": "Feature",
"properties": {}, "geometry": {"type": "Point", "coordinates": [3.0, 3.0]},
"bbox": [3.0, 3.0, 3.0, 3.0]}], "bbox": [1.0, 1.0, 3.0, 3.0]}'
+ assert result == expected
+
+ with self.ps_allow_diff_frames():
+ result = s.to_json(drop_id=True)
+ expected = '{"type": "FeatureCollection", "features": [{"type":
"Feature", "properties": {}, "geometry": {"type": "Point", "coordinates": [1.0,
1.0]}, "bbox": [1.0, 1.0, 1.0, 1.0]}, {"type": "Feature", "properties": {},
"geometry": {"type": "Point", "coordinates": [2.0, 2.0]}, "bbox": [2.0, 2.0,
2.0, 2.0]}, {"type": "Feature", "properties": {}, "geometry": {"type": "Point",
"coordinates": [3.0, 3.0]}, "bbox": [3.0, 3.0, 3.0, 3.0]}], "bbox": [1.0, 1.0,
3.0, 3.0]}'
+ # print(s.to_geopandas().to_json(drop_id=True))
+ # raise
Review Comment:
Remove commented print statements and the commented raise statement as they
appear to be debug code left over from development.
```suggestion
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]