petern48 commented on code in PR #2021:
URL: https://github.com/apache/sedona/pull/2021#discussion_r2175988530
##########
python/sedona/geopandas/geoseries.py:
##########
@@ -180,43 +181,66 @@ def _process_geometry_column(
# Find the first column with BinaryType or GeometryType
first_col = self.get_first_geometry_column() # TODO: fixme
- if first_col:
- data_type = self._internal.spark_frame.schema[first_col].dataType
-
- # Handle both positional and keyword arguments
- all_args = list(args)
- for k, v in kwargs.items():
- all_args.append(v)
-
- # Join all arguments as comma-separated values
- params = ""
- if all_args:
- params_list = [
- str(arg) if isinstance(arg, (int, float)) else repr(arg)
- for arg in all_args
- ]
- params = f", {', '.join(params_list)}"
-
- if isinstance(data_type, BinaryType):
- sql_expr = (
- f"{operation}(ST_GeomFromWKB(`{first_col}`){params}) as
{rename}"
- )
- else:
- sql_expr = f"{operation}(`{first_col}`{params}) as {rename}"
-
- sdf = self._internal.spark_frame.selectExpr(sql_expr)
- internal = InternalFrame(
- spark_frame=sdf,
- index_spark_columns=None,
- column_labels=[self._column_label],
- data_spark_columns=[scol_for(sdf, rename)],
- data_fields=[self._internal.data_fields[0]],
- column_label_names=self._internal.column_label_names,
- )
- return
_to_geo_series(first_series(PandasOnSparkDataFrame(internal)))
- else:
+ # Handle both positional and keyword arguments
+ all_args = list(args)
+ for k, v in kwargs.items():
+ all_args.append(v)
+
+ # Join all arguments as comma-separated values
+ params = ""
+ if all_args:
+ params_list = [
+ str(arg) if isinstance(arg, (int, float)) else repr(arg)
+ for arg in all_args
+ ]
+ params = f", {', '.join(params_list)}"
+
+ sql_expr = f"{operation}(`{first_col}`{params})"
+
+ return self._query_geometry_column(sql_expr, first_col, rename)
+
+ def _query_geometry_column(
+ self, query: str, col: Union[str, None], rename: str
+ ) -> "GeoSeries":
+ """
+ Helper method to query a single geometry column with a specified
operation.
+
+ Parameters
+ ----------
+ query : str
+ The query to apply to the geometry column.
+ col : str
+ The name of the column to query.
+ rename : str
+ The name of the resulting column.
+
+ Returns
+ -------
+ GeoSeries
+ A GeoSeries with the operation applied to the geometry column.
+ """
+ if not col:
raise ValueError("No valid geometry column found.")
+ data_type = self._internal.spark_frame.schema[col].dataType
+
+ if isinstance(data_type, BinaryType):
+ # the backticks here are important so we don't match strings that
happen to be the same as the column name
+ query = query.replace(f"`{col}`", f"ST_GeomFromWKB(`{col}`)")
Review Comment:
I'm prefer to skip this too. I think SQL's syntax is strict enough to make
this safe. regex substitution seems overkill and less readable.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]