This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new c5fc05f8 chore: Remove use of DataFusionError::Internal for sedonadb
internal errors (#564)
c5fc05f8 is described below
commit c5fc05f88df3a07ab7cc3a4bf3eb7f74c924de57
Author: Dewey Dunnington <[email protected]>
AuthorDate: Fri Jan 30 16:46:39 2026 -0600
chore: Remove use of DataFusionError::Internal for sedonadb internal errors
(#564)
---
Cargo.lock | 2 ++
c/sedona-proj/src/st_transform.rs | 10 ++++------
rust/sedona-expr/src/statistics.rs | 3 ++-
rust/sedona-functions/src/st_analyze_agg.rs | 14 +++++---------
rust/sedona-functions/src/st_envelope.rs | 7 ++-----
rust/sedona-functions/src/st_envelope_agg.rs | 4 +---
rust/sedona-functions/src/st_geomfromwkt.rs | 10 ++++++----
rust/sedona-functions/src/st_pointzm.rs | 9 +++------
rust/sedona-functions/src/st_setsrid.rs | 6 +++---
rust/sedona-functions/src/st_xyzm.rs | 4 ++--
rust/sedona-functions/src/st_xyzm_minmax.rs | 17 ++++++++---------
rust/sedona-geo/src/centroid.rs | 6 ++----
rust/sedona-geo/src/st_intersection_agg.rs | 18 +++++-------------
rust/sedona-geo/src/st_union_agg.rs | 13 +++----------
rust/sedona-raster-functions/src/executor.rs | 12 ++++--------
rust/sedona-schema/src/datatypes.rs | 6 +++---
.../src/evaluated_batch/spill.rs | 16 ++++++----------
rust/sedona-spatial-join/src/exec.rs | 8 +++-----
rust/sedona-spatial-join/src/operand_evaluator.rs | 21 ++++++---------------
rust/sedona-spatial-join/src/refine/tg.rs | 16 ++++------------
rust/sedona/src/show.rs | 7 ++++---
sedona-cli/Cargo.toml | 2 ++
sedona-cli/src/print_format.rs | 5 +++--
23 files changed, 83 insertions(+), 133 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index bc6799a9..c265c659 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5003,6 +5003,7 @@ dependencies = [
"async-trait",
"clap",
"datafusion",
+ "datafusion-common",
"env_logger 0.11.8",
"futures",
"libmimalloc-sys",
@@ -5010,6 +5011,7 @@ dependencies = [
"regex",
"rustyline",
"sedona",
+ "sedona-common",
"sedona-tg",
"tokio",
]
diff --git a/c/sedona-proj/src/st_transform.rs
b/c/sedona-proj/src/st_transform.rs
index fcdf8e78..63c209f9 100644
--- a/c/sedona-proj/src/st_transform.rs
+++ b/c/sedona-proj/src/st_transform.rs
@@ -22,7 +22,7 @@ use datafusion_common::cast::{as_string_view_array,
as_struct_array};
use datafusion_common::config::ConfigOptions;
use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue};
use datafusion_expr::ColumnarValue;
-use sedona_common::sedona_internal_err;
+use sedona_common::{sedona_internal_datafusion_err, sedona_internal_err};
use sedona_expr::item_crs::make_item_crs;
use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel};
use sedona_functions::executor::WkbExecutor;
@@ -401,8 +401,8 @@ pub(crate) fn with_global_proj_engine(
// Otherwise, attempt to get the builder
let maybe_builder = PROJ_ENGINE_BUILDER.read().map_err(|_| {
// Highly unlikely (can only occur when a panic occurred during
set)
- DataFusionError::Internal(
- "Failed to acquire read lock for global PROJ
configuration".to_string(),
+ sedona_internal_datafusion_err!(
+ "Failed to acquire read lock for global PROJ configuration"
)
})?;
@@ -416,9 +416,7 @@ pub(crate) fn with_global_proj_engine(
engine_cell
.set(CachingCrsEngine::new(proj_engine))
- .map_err(|_| {
- DataFusionError::Internal("Failed to set cached PROJ
transform".to_string())
- })?;
+ .map_err(|_| sedona_internal_datafusion_err!("Failed to set cached
PROJ transform"))?;
func(engine_cell.get().unwrap())?;
Ok(())
})
diff --git a/rust/sedona-expr/src/statistics.rs
b/rust/sedona-expr/src/statistics.rs
index 08c0dd9c..4ae729e7 100644
--- a/rust/sedona-expr/src/statistics.rs
+++ b/rust/sedona-expr/src/statistics.rs
@@ -17,6 +17,7 @@
use std::str::FromStr;
use datafusion_common::{stats::Precision, ColumnStatistics, DataFusionError,
Result, ScalarValue};
+use sedona_common::sedona_internal_datafusion_err;
use sedona_geometry::interval::{Interval, IntervalTrait};
use sedona_geometry::{
bounding_box::BoundingBox,
@@ -391,7 +392,7 @@ impl GeoStatistics {
pub fn to_scalar_value(&self) -> Result<ScalarValue> {
// Serialize to JSON
let serialized = serde_json::to_vec(self).map_err(|e| {
- DataFusionError::Internal(format!("Failed to serialize
GeoStatistics: {e}"))
+ sedona_internal_datafusion_err!("Failed to serialize
GeoStatistics: {e}")
})?;
Ok(ScalarValue::Binary(Some(serialized)))
diff --git a/rust/sedona-functions/src/st_analyze_agg.rs
b/rust/sedona-functions/src/st_analyze_agg.rs
index 82647a6d..5377255a 100644
--- a/rust/sedona-functions/src/st_analyze_agg.rs
+++ b/rust/sedona-functions/src/st_analyze_agg.rs
@@ -30,6 +30,7 @@ use datafusion_common::{
};
use datafusion_expr::{scalar_doc_sections::DOC_SECTION_OTHER, Documentation,
Volatility};
use datafusion_expr::{Accumulator, ColumnarValue};
+use sedona_common::{sedona_internal_datafusion_err, sedona_internal_err};
use sedona_expr::aggregate_udf::SedonaAccumulatorRef;
use sedona_expr::aggregate_udf::SedonaAggregateUDF;
use sedona_expr::item_crs::ItemCrsSedonaAccumulator;
@@ -383,9 +384,7 @@ impl AnalyzeAccumulator {
impl Accumulator for AnalyzeAccumulator {
fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
if values.is_empty() {
- return Err(DataFusionError::Internal(
- "No input arrays provided to accumulator".to_string(),
- ));
+ return sedona_internal_err!("No input arrays provided to
accumulator");
}
let arg_types = [self.input_type.clone()];
let arg_values = [ColumnarValue::Array(values[0].clone())];
@@ -441,9 +440,7 @@ impl Accumulator for AnalyzeAccumulator {
fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
// Check input length (expecting 1 state field)
if states.is_empty() {
- return Err(DataFusionError::Internal(
- "No input arrays provided to accumulator in
merge_batch".to_string(),
- ));
+ return sedona_internal_err!("No input arrays provided to
accumulator in merge_batch");
}
let array = &states[0];
@@ -455,9 +452,8 @@ impl Accumulator for AnalyzeAccumulator {
}
let serialized = binary_array.value(i);
- let other_stats: GeoStatistics =
serde_json::from_slice(serialized).map_err(|e| {
- DataFusionError::Internal(format!("Failed to deserialize
stats: {e}"))
- })?;
+ let other_stats: GeoStatistics = serde_json::from_slice(serialized)
+ .map_err(|e| sedona_internal_datafusion_err!("Failed to
deserialize stats: {e}"))?;
// Use the merge method to combine statistics
self.stats.merge(&other_stats);
diff --git a/rust/sedona-functions/src/st_envelope.rs
b/rust/sedona-functions/src/st_envelope.rs
index c7dd22d8..cdf0ccb0 100644
--- a/rust/sedona-functions/src/st_envelope.rs
+++ b/rust/sedona-functions/src/st_envelope.rs
@@ -23,6 +23,7 @@ use datafusion_expr::{
scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation,
Volatility,
};
use geo_traits::GeometryTrait;
+use sedona_common::sedona_internal_err;
use sedona_expr::{
item_crs::ItemCrsKernel,
scalar_udf::{SedonaScalarKernel, SedonaScalarUDF},
@@ -125,11 +126,7 @@ fn invoke_scalar(wkb: &Wkb, writer: &mut impl
std::io::Write) -> Result<()> {
geo_traits::GeometryType::GeometryCollection(_) => {
write_wkb_geometrycollection_header(writer, wkb.dim(), 0)
}
- _ => {
- return Err(DataFusionError::Internal(
- "Unsupported geometry type".to_string(),
- ))
- }
+ _ => return sedona_internal_err!("Unsupported geometry type"),
};
if let Err(e) = result {
diff --git a/rust/sedona-functions/src/st_envelope_agg.rs
b/rust/sedona-functions/src/st_envelope_agg.rs
index 37ca9945..7966052c 100644
--- a/rust/sedona-functions/src/st_envelope_agg.rs
+++ b/rust/sedona-functions/src/st_envelope_agg.rs
@@ -132,9 +132,7 @@ impl BoundsAccumulator2D {
// Check the input length for update methods.
fn check_update_input_len(input: &[ArrayRef], expected: usize, context:
&str) -> Result<()> {
if input.is_empty() {
- return Err(DataFusionError::Internal(format!(
- "No input arrays provided to accumulator in {context}"
- )));
+ return sedona_internal_err!("No input arrays provided to
accumulator in {context}");
}
if input.len() != expected {
return sedona_internal_err!(
diff --git a/rust/sedona-functions/src/st_geomfromwkt.rs
b/rust/sedona-functions/src/st_geomfromwkt.rs
index c06c94be..01eef0fe 100644
--- a/rust/sedona-functions/src/st_geomfromwkt.rs
+++ b/rust/sedona-functions/src/st_geomfromwkt.rs
@@ -19,11 +19,13 @@ use std::{str::FromStr, sync::Arc, vec};
use arrow_array::builder::{BinaryBuilder, StringViewBuilder};
use arrow_schema::DataType;
use datafusion_common::cast::as_string_view_array;
-use datafusion_common::error::{DataFusionError, Result};
+use datafusion_common::error::Result;
+use datafusion_common::exec_datafusion_err;
use datafusion_common::scalar::ScalarValue;
use datafusion_expr::{
scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation,
Volatility,
};
+use sedona_common::sedona_internal_datafusion_err;
use sedona_expr::item_crs::make_item_crs;
use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF};
use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES;
@@ -136,8 +138,8 @@ impl SedonaScalarKernel for STGeoFromWKT {
}
fn invoke_scalar(wkt_bytes: &str, builder: &mut BinaryBuilder) -> Result<()> {
- let geometry: Wkt<f64> = Wkt::from_str(wkt_bytes)
- .map_err(|err| DataFusionError::Internal(format!("WKT parse error:
{err}")))?;
+ let geometry: Wkt<f64> =
+ Wkt::from_str(wkt_bytes).map_err(|err| exec_datafusion_err!("WKT parse
error: {err}"))?;
write_geometry(
builder,
@@ -146,7 +148,7 @@ fn invoke_scalar(wkt_bytes: &str, builder: &mut
BinaryBuilder) -> Result<()> {
endianness: Endianness::LittleEndian,
},
)
- .map_err(|err| DataFusionError::Internal(format!("WKB write error:
{err}")))
+ .map_err(|err| sedona_internal_datafusion_err!("WKB write error: {err}"))
}
/// ST_GeomFromEWKT() UDF implementation
diff --git a/rust/sedona-functions/src/st_pointzm.rs
b/rust/sedona-functions/src/st_pointzm.rs
index a2eaf997..59f9f43d 100644
--- a/rust/sedona-functions/src/st_pointzm.rs
+++ b/rust/sedona-functions/src/st_pointzm.rs
@@ -30,6 +30,7 @@ use datafusion_expr::{
scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation,
Volatility,
};
use geo_traits::Dimensions;
+use sedona_common::{sedona_internal_datafusion_err, sedona_internal_err};
use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF};
use sedona_geometry::{
error::SedonaGeometryError,
@@ -166,9 +167,7 @@ impl SedonaScalarKernel for STGeoFromPointZm {
.iter()
.map(|v| match v {
ColumnarValue::Scalar(ScalarValue::Float64(val)) =>
Ok(*val),
- _ => Err(datafusion_common::DataFusionError::Internal(
- "Expected Float64 scalar".to_string(),
- )),
+ _ => sedona_internal_err!("Expected Float64 scalar"),
})
.collect();
let scalar_coords = scalar_coords?;
@@ -216,9 +215,7 @@ impl SedonaScalarKernel for STGeoFromPointZm {
let values = arrays.iter().map(|v| v.value(i)).collect::<Vec<_>>();
if !any_null {
write_wkb_pointzm(&mut builder, &values, self.dim).map_err(|_|
{
- datafusion_common::DataFusionError::Internal(
- "Failed to write WKB point header".to_string(),
- )
+ sedona_internal_datafusion_err!("Failed to write WKB point
header")
})?;
builder.append_value([]);
} else {
diff --git a/rust/sedona-functions/src/st_setsrid.rs
b/rust/sedona-functions/src/st_setsrid.rs
index ddef11d4..0d0e09fb 100644
--- a/rust/sedona-functions/src/st_setsrid.rs
+++ b/rust/sedona-functions/src/st_setsrid.rs
@@ -756,9 +756,9 @@ mod test {
to: ScalarValue,
) -> Result<(SedonaType, ColumnarValue)> {
let SedonaType::Arrow(datatype) = &arg_type[1] else {
- return Err(DataFusionError::Internal(
- "Expected SedonaType::Arrow, but found a different
variant".to_string(),
- ));
+ return sedona_internal_err!(
+ "Expected SedonaType::Arrow, but found a different variant"
+ );
};
let arg_fields = vec![
Arc::new(arg_type[0].to_storage_field("", true)?),
diff --git a/rust/sedona-functions/src/st_xyzm.rs
b/rust/sedona-functions/src/st_xyzm.rs
index 423c6c4e..9fa7697b 100644
--- a/rust/sedona-functions/src/st_xyzm.rs
+++ b/rust/sedona-functions/src/st_xyzm.rs
@@ -27,7 +27,7 @@ use geo_traits::{
CoordTrait, Dimensions, GeometryCollectionTrait, GeometryTrait,
LineStringTrait,
MultiLineStringTrait, MultiPointTrait, MultiPolygonTrait, PointTrait,
PolygonTrait,
};
-use sedona_common::sedona_internal_err;
+use sedona_common::{sedona_internal_datafusion_err, sedona_internal_err};
use sedona_expr::{
item_crs::ItemCrsKernel,
scalar_udf::{SedonaScalarKernel, SedonaScalarUDF},
@@ -173,7 +173,7 @@ fn invoke_scalar(item: &Wkb, dim_index: usize) ->
Result<Option<f64>> {
1 => {
let coord_dim = multipoint.dim();
let point = MultiPointTrait::point(multipoint, 0)
- .ok_or(DataFusionError::Internal("Missing
point".to_string()))?;
+ .ok_or(sedona_internal_datafusion_err!("Missing
point"))?;
let coord = PointTrait::coord(&point);
return get_coord(coord_dim, coord, dim_index);
}
diff --git a/rust/sedona-functions/src/st_xyzm_minmax.rs
b/rust/sedona-functions/src/st_xyzm_minmax.rs
index 9720d72d..8617d6ad 100644
--- a/rust/sedona-functions/src/st_xyzm_minmax.rs
+++ b/rust/sedona-functions/src/st_xyzm_minmax.rs
@@ -19,12 +19,12 @@ use std::sync::Arc;
use crate::executor::WkbExecutor;
use arrow_array::builder::Float64Builder;
use arrow_schema::DataType;
-use datafusion_common::{error::Result, DataFusionError};
+use datafusion_common::error::Result;
use datafusion_expr::{
scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation,
Volatility,
};
use geo_traits::GeometryTrait;
-use sedona_common::sedona_internal_err;
+use sedona_common::{sedona_internal_datafusion_err, sedona_internal_err};
use sedona_expr::{
item_crs::ItemCrsKernel,
scalar_udf::{SedonaScalarKernel, SedonaScalarUDF},
@@ -196,24 +196,23 @@ fn invoke_scalar(
let interval: Interval = match dim {
"x" => {
let xy_bounds = geo_traits_bounds_xy(item)
- .map_err(|e| DataFusionError::Internal(format!("Error updating
bounds: {e}")))?;
- Interval::try_from(*xy_bounds.x()).map_err(|e| {
- DataFusionError::Internal(format!("Error converting to
interval: {e}"))
- })?
+ .map_err(|e| sedona_internal_datafusion_err!("Error updating
bounds: {e}"))?;
+ Interval::try_from(*xy_bounds.x())
+ .map_err(|e| sedona_internal_datafusion_err!("Error converting
to interval: {e}"))?
}
"y" => {
let xy_bounds = geo_traits_bounds_xy(item)
- .map_err(|e| DataFusionError::Internal(format!("Error updating
bounds: {e}")))?;
+ .map_err(|e| sedona_internal_datafusion_err!("Error updating
bounds: {e}"))?;
*xy_bounds.y()
}
"z" => {
let z_bounds = geo_traits_bounds_z(item)
- .map_err(|e| DataFusionError::Internal(format!("Error updating
bounds: {e}")))?;
+ .map_err(|e| sedona_internal_datafusion_err!("Error updating
bounds: {e}"))?;
z_bounds
}
"m" => {
let m_bounds = geo_traits_bounds_m(item)
- .map_err(|e| DataFusionError::Internal(format!("Error updating
bounds: {e}")))?;
+ .map_err(|e| sedona_internal_datafusion_err!("Error updating
bounds: {e}"))?;
m_bounds
}
_ => sedona_internal_err!("unexpected dim index")?,
diff --git a/rust/sedona-geo/src/centroid.rs b/rust/sedona-geo/src/centroid.rs
index ee74df13..0e927be9 100644
--- a/rust/sedona-geo/src/centroid.rs
+++ b/rust/sedona-geo/src/centroid.rs
@@ -16,7 +16,7 @@
// under the License.
//! Centroid extraction functionality for WKB geometries
-use datafusion_common::{error::DataFusionError, Result};
+use datafusion_common::{exec_err, Result};
use geo_traits::CoordTrait;
use geo_traits::GeometryTrait;
use geo_traits::PointTrait;
@@ -56,9 +56,7 @@ pub fn extract_centroid_2d(geo: impl GeometryTrait<T = f64>)
-> Result<(f64, f64
// Return POINT EMPTY as (NaN, NaN)
Ok((f64::NAN, f64::NAN))
} else {
- Err(DataFusionError::Internal(
- "Centroid computation failed.".to_string(),
- ))
+ exec_err!("Centroid computation failed.")
}
}
}
diff --git a/rust/sedona-geo/src/st_intersection_agg.rs
b/rust/sedona-geo/src/st_intersection_agg.rs
index 49d4b448..1ef1958a 100644
--- a/rust/sedona-geo/src/st_intersection_agg.rs
+++ b/rust/sedona-geo/src/st_intersection_agg.rs
@@ -18,13 +18,11 @@ use std::{sync::Arc, vec};
use arrow_array::ArrayRef;
use arrow_schema::FieldRef;
-use datafusion_common::{
- error::{DataFusionError, Result},
- ScalarValue,
-};
+use datafusion_common::{error::Result, exec_err, ScalarValue};
use datafusion_expr::{Accumulator, ColumnarValue};
use geo::{BooleanOps, Intersects};
use geo_traits::to_geo::ToGeoGeometry;
+use sedona_common::sedona_internal_err;
use sedona_expr::{
aggregate_udf::{SedonaAccumulator, SedonaAccumulatorRef},
item_crs::ItemCrsSedonaAccumulator,
@@ -104,9 +102,7 @@ impl IntersectionAccumulator {
geo::Geometry::Polygon(poly) =>
geo::MultiPolygon(vec![poly]),
geo::Geometry::MultiPolygon(multi) => multi.clone(),
_ => {
- return Err(DataFusionError::Internal(
- "Unsupported geometry type for intersection
operation".to_string(),
- ));
+ return exec_err!("Unsupported geometry type for
intersection operation");
}
};
@@ -170,9 +166,7 @@ impl IntersectionAccumulator {
impl Accumulator for IntersectionAccumulator {
fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
if values.is_empty() {
- return Err(DataFusionError::Internal(
- "No input arrays provided to accumulator in
update_batch".to_string(),
- ));
+ return sedona_internal_err!("No input arrays provided to
accumulator in update_batch");
}
let arg_types = [self.input_type.clone()];
let args = [ColumnarValue::Array(values[0].clone())];
@@ -213,9 +207,7 @@ impl Accumulator for IntersectionAccumulator {
fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
// Check input length (expecting 1 state field)
if states.is_empty() {
- return Err(DataFusionError::Internal(
- "No input arrays provided to accumulator in
merge_batch".to_string(),
- ));
+ return sedona_internal_err!("No input arrays provided to
accumulator in merge_batch");
}
let array = &states[0];
let args = [ColumnarValue::Array(array.clone())];
diff --git a/rust/sedona-geo/src/st_union_agg.rs
b/rust/sedona-geo/src/st_union_agg.rs
index cb2a8be3..e9d7e003 100644
--- a/rust/sedona-geo/src/st_union_agg.rs
+++ b/rust/sedona-geo/src/st_union_agg.rs
@@ -18,10 +18,7 @@ use std::{sync::Arc, vec};
use arrow_array::ArrayRef;
use arrow_schema::FieldRef;
-use datafusion_common::{
- error::{DataFusionError, Result},
- ScalarValue,
-};
+use datafusion_common::{error::Result, exec_err, ScalarValue};
use datafusion_expr::{Accumulator, ColumnarValue};
use geo::BooleanOps;
use geo_traits::to_geo::ToGeoGeometry;
@@ -100,9 +97,7 @@ impl UnionAccumulator {
geo::Geometry::Polygon(poly) =>
geo::MultiPolygon(vec![poly]),
geo::Geometry::MultiPolygon(multi) => multi.clone(),
_ => {
- return Err(DataFusionError::Internal(
- "Unsupported geometry type for union
operation".to_string(),
- ));
+ return exec_err!("Unsupported geometry type for union
operation");
}
};
@@ -206,9 +201,7 @@ impl Accumulator for UnionAccumulator {
fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
// Check input length (expecting 1 state field)
if states.is_empty() {
- return Err(DataFusionError::Internal(
- "No input arrays provided to accumulator in
merge_batch".to_string(),
- ));
+ return sedona_internal_err!("No input arrays provided to
accumulator in merge_batch");
}
let array = &states[0];
let args = [ColumnarValue::Array(array.clone())];
diff --git a/rust/sedona-raster-functions/src/executor.rs
b/rust/sedona-raster-functions/src/executor.rs
index 917a1118..e7bbbe9d 100644
--- a/rust/sedona-raster-functions/src/executor.rs
+++ b/rust/sedona-raster-functions/src/executor.rs
@@ -17,9 +17,9 @@
use arrow_array::{Array, ArrayRef, StructArray};
use datafusion_common::error::Result;
-use datafusion_common::{DataFusionError, ScalarValue};
+use datafusion_common::ScalarValue;
use datafusion_expr::ColumnarValue;
-use sedona_common::sedona_internal_err;
+use sedona_common::{sedona_internal_datafusion_err, sedona_internal_err};
use sedona_raster::array::{RasterRefImpl, RasterStructArray};
use sedona_schema::datatypes::SedonaType;
use sedona_schema::datatypes::RASTER;
@@ -73,9 +73,7 @@ impl<'a, 'b> RasterExecutor<'a, 'b> {
.as_any()
.downcast_ref::<StructArray>()
.ok_or_else(|| {
- DataFusionError::Internal(
- "Expected StructArray for raster
data".to_string(),
- )
+ sedona_internal_datafusion_err!("Expected
StructArray for raster data")
})?;
let raster_array = RasterStructArray::new(raster_struct);
@@ -103,9 +101,7 @@ impl<'a, 'b> RasterExecutor<'a, 'b> {
}
}
ScalarValue::Null => func(0, None),
- _ => Err(DataFusionError::Internal(
- "Expected Struct scalar for raster".to_string(),
- )),
+ _ => sedona_internal_err!("Expected Struct scalar for raster"),
},
}
}
diff --git a/rust/sedona-schema/src/datatypes.rs
b/rust/sedona-schema/src/datatypes.rs
index 4674d34e..9707302c 100644
--- a/rust/sedona-schema/src/datatypes.rs
+++ b/rust/sedona-schema/src/datatypes.rs
@@ -15,8 +15,8 @@
// specific language governing permissions and limitations
// under the License.
use arrow_schema::{DataType, Field};
-use datafusion_common::error::{DataFusionError, Result};
-use sedona_common::sedona_internal_err;
+use datafusion_common::error::Result;
+use sedona_common::{sedona_internal_datafusion_err, sedona_internal_err};
use serde_json::Value;
use std::fmt::{Debug, Display};
use std::sync::LazyLock;
@@ -341,7 +341,7 @@ fn deserialize_edges_and_crs(value: &Option<String>) ->
Result<(Edges, Crs)> {
}
let json_value: Value = serde_json::from_str(val).map_err(|err| {
- DataFusionError::Internal(format!("Error deserializing
GeoArrow metadata: {err}"))
+ sedona_internal_datafusion_err!("Error deserializing GeoArrow
metadata: {err}")
})?;
if !json_value.is_object() {
return sedona_internal_err!(
diff --git a/rust/sedona-spatial-join/src/evaluated_batch/spill.rs
b/rust/sedona-spatial-join/src/evaluated_batch/spill.rs
index 9d5dd8a8..eddd0574 100644
--- a/rust/sedona-spatial-join/src/evaluated_batch/spill.rs
+++ b/rust/sedona-spatial-join/src/evaluated_batch/spill.rs
@@ -21,11 +21,11 @@ use arrow::array::Float64Array;
use arrow_array::{Array, RecordBatch, StructArray};
use arrow_schema::{DataType, Field, Fields, Schema, SchemaRef};
use datafusion::config::SpillCompression;
-use datafusion_common::{DataFusionError, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue};
use datafusion_execution::{disk_manager::RefCountedTempFile,
runtime_env::RuntimeEnv};
use datafusion_expr::ColumnarValue;
use datafusion_physical_plan::metrics::SpillMetrics;
-use sedona_common::sedona_internal_err;
+use sedona_common::{sedona_internal_datafusion_err, sedona_internal_err};
use sedona_schema::datatypes::SedonaType;
use crate::{
@@ -191,16 +191,12 @@ pub(crate) fn spilled_batch_to_evaluated_batch(
.as_any()
.downcast_ref::<StructArray>()
.ok_or_else(|| {
- DataFusionError::Internal("Expected data column to be a
StructArray".to_string())
+ sedona_internal_datafusion_err!("Expected data column to be a
StructArray")
})?;
let data_schema = Arc::new(Schema::new(match data_array.data_type() {
DataType::Struct(fields) => fields.clone(),
- _ => {
- return Err(DataFusionError::Internal(
- "Expected data column to have Struct data type".to_string(),
- ))
- }
+ _ => return sedona_internal_err!("Expected data column to have Struct
data type"),
}));
let data_columns = (0..data_array.num_columns())
@@ -223,7 +219,7 @@ pub(crate) fn spilled_batch_to_evaluated_batch(
.as_any()
.downcast_ref::<Float64Array>()
.ok_or_else(|| {
- DataFusionError::Internal("Expected dist column to be
Float64Array".to_string())
+ sedona_internal_datafusion_err!("Expected dist column to be
Float64Array")
})?;
let distance = if !dist_array.is_empty() {
@@ -282,7 +278,7 @@ mod tests {
use crate::utils::arrow_utils::get_record_batch_memory_size;
use arrow_array::{ArrayRef, BinaryArray, Int32Array, StringArray};
use arrow_schema::{DataType, Field, Schema};
- use datafusion_common::Result;
+ use datafusion_common::{DataFusionError, Result};
use datafusion_execution::runtime_env::RuntimeEnv;
use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
use sedona_schema::datatypes::WKB_GEOMETRY;
diff --git a/rust/sedona-spatial-join/src/exec.rs
b/rust/sedona-spatial-join/src/exec.rs
index 495518ea..2ed90d73 100644
--- a/rust/sedona-spatial-join/src/exec.rs
+++ b/rust/sedona-spatial-join/src/exec.rs
@@ -17,7 +17,7 @@
use std::{fmt::Formatter, sync::Arc};
use arrow_schema::SchemaRef;
-use datafusion_common::{project_schema, DataFusionError, JoinSide, Result};
+use datafusion_common::{project_schema, JoinSide, Result};
use datafusion_execution::{SendableRecordBatchStream, TaskContext};
use datafusion_expr::{JoinType, Operator};
use datafusion_physical_expr::{
@@ -33,7 +33,7 @@ use datafusion_physical_plan::{
PlanProperties,
};
use parking_lot::Mutex;
-use sedona_common::SpatialJoinOptions;
+use sedona_common::{sedona_internal_err, SpatialJoinOptions};
use crate::{
prepare::{SpatialJoinComponents, SpatialJoinComponentsBuilder},
@@ -93,9 +93,7 @@ fn determine_knn_build_probe_plans<'a>(
match knn_pred.probe_side {
JoinSide::Left => Ok((right_plan, left_plan)),
JoinSide::Right => Ok((left_plan, right_plan)),
- JoinSide::None => Err(DataFusionError::Internal(
- "KNN join requires explicit probe_side designation".to_string(),
- )),
+ JoinSide::None => sedona_internal_err!("KNN join requires explicit
probe_side designation"),
}
}
diff --git a/rust/sedona-spatial-join/src/operand_evaluator.rs
b/rust/sedona-spatial-join/src/operand_evaluator.rs
index a8b83264..792d1957 100644
--- a/rust/sedona-spatial-join/src/operand_evaluator.rs
+++ b/rust/sedona-spatial-join/src/operand_evaluator.rs
@@ -19,9 +19,7 @@ use std::{mem::transmute, sync::Arc};
use arrow_array::{Array, ArrayRef, Float64Array, RecordBatch};
use arrow_schema::DataType;
-use datafusion_common::{
- utils::proxy::VecAllocExt, DataFusionError, JoinSide, Result, ScalarValue,
-};
+use datafusion_common::{utils::proxy::VecAllocExt, JoinSide, Result,
ScalarValue};
use datafusion_expr::ColumnarValue;
use datafusion_physical_expr::PhysicalExpr;
use float_next_after::NextAfter;
@@ -33,6 +31,7 @@ use sedona_schema::datatypes::SedonaType;
use wkb::reader::Wkb;
use sedona_common::option::SpatialJoinOptions;
+use sedona_common::sedona_internal_err;
use crate::{
spatial_predicate::{DistancePredicate, KNNPredicate, RelationPredicate,
SpatialPredicate},
@@ -266,15 +265,11 @@ impl DistanceOperandEvaluator {
}
}
} else {
- return Err(DataFusionError::Internal(
- "Distance columnar value is not a
Float64Array".to_string(),
- ));
+ return sedona_internal_err!("Distance columnar value is
not a Float64Array");
}
}
_ => {
- return Err(DataFusionError::Internal(
- "Distance columnar value is not a Float64".to_string(),
- ));
+ return sedona_internal_err!("Distance columnar value is not a
Float64");
}
}
@@ -297,14 +292,10 @@ pub(crate) fn distance_value_at(
Ok(Some(array.value(i)))
}
} else {
- Err(DataFusionError::Internal(
- "Distance columnar value is not a
Float64Array".to_string(),
- ))
+ sedona_internal_err!("Distance columnar value is not a
Float64Array")
}
}
- _ => Err(DataFusionError::Internal(
- "Distance columnar value is not a Float64".to_string(),
- )),
+ _ => sedona_internal_err!("Distance columnar value is not a Float64"),
}
}
diff --git a/rust/sedona-spatial-join/src/refine/tg.rs
b/rust/sedona-spatial-join/src/refine/tg.rs
index 01e6a411..82a781f9 100644
--- a/rust/sedona-spatial-join/src/refine/tg.rs
+++ b/rust/sedona-spatial-join/src/refine/tg.rs
@@ -22,7 +22,7 @@ use std::{
},
};
-use datafusion_common::{DataFusionError, Result};
+use datafusion_common::Result;
use sedona_common::{sedona_internal_err, ExecutionMode, SpatialJoinOptions,
TgIndexType};
use sedona_expr::statistics::GeoStatistics;
use sedona_tg::tg::{self, BinaryPredicate};
@@ -322,14 +322,10 @@ impl<Op: BinaryPredicate + Send + Sync>
TgPredicateEvaluator for TgPredicateEval
fn create_evaluator(predicate: &SpatialPredicate) -> Result<Box<dyn
TgPredicateEvaluator>> {
let evaluator: Box<dyn TgPredicateEvaluator> = match predicate {
SpatialPredicate::Distance(_) => {
- return Err(DataFusionError::Internal(
- "Distance predicate is not supported for TG".to_string(),
- ))
+ return sedona_internal_err!("Distance predicate is not supported
for TG")
}
SpatialPredicate::KNearestNeighbors(_) => {
- return Err(DataFusionError::Internal(
- "KNN predicate is not supported for TG".to_string(),
- ))
+ return sedona_internal_err!("KNN predicate is not supported for
TG")
}
SpatialPredicate::Relation(predicate) => match predicate.relation_type
{
SpatialRelationType::Intersects => {
@@ -347,11 +343,7 @@ fn create_evaluator(predicate: &SpatialPredicate) ->
Result<Box<dyn TgPredicateE
Box::new(TgPredicateEvaluatorImpl::<tg::Touches>::new())
}
SpatialRelationType::Equals =>
Box::new(TgPredicateEvaluatorImpl::<tg::Equals>::new()),
- _ => {
- return Err(DataFusionError::Internal(
- "Unsupported spatial relation type for TG".to_string(),
- ))
- }
+ _ => return sedona_internal_err!("Unsupported spatial relation
type for TG"),
},
};
Ok(evaluator)
diff --git a/rust/sedona/src/show.rs b/rust/sedona/src/show.rs
index 0d3b946b..08ac5c96 100644
--- a/rust/sedona/src/show.rs
+++ b/rust/sedona/src/show.rs
@@ -21,8 +21,9 @@ use comfy_table::{Cell, CellAlignment, ColumnConstraint,
ContentArrangement, Row
use datafusion::arrow::util::display::{ArrayFormatter, FormatOptions};
use datafusion::error::Result;
use datafusion_common::format::DEFAULT_FORMAT_OPTIONS;
-use datafusion_common::{config::ConfigOptions, DataFusionError, ScalarValue};
+use datafusion_common::{config::ConfigOptions, ScalarValue};
use datafusion_expr::{ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs,
ScalarUDF};
+use sedona_common::sedona_internal_datafusion_err;
use sedona_expr::scalar_udf::SedonaScalarUDF;
use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher};
use std::iter::zip;
@@ -45,8 +46,8 @@ pub fn show_batches<'a, W: std::io::Write>(
let format_fn = ctx
.functions
.scalar_udf("sd_format")
- .ok_or(DataFusionError::Internal(
- "sd_format UDF does not exist".to_string(),
+ .ok_or(sedona_internal_datafusion_err!(
+ "sd_format UDF does not exist"
))?
.clone();
diff --git a/sedona-cli/Cargo.toml b/sedona-cli/Cargo.toml
index 3c3d1dbb..c856d481 100644
--- a/sedona-cli/Cargo.toml
+++ b/sedona-cli/Cargo.toml
@@ -54,6 +54,7 @@ datafusion = { workspace = true, features = [
"unicode_expressions",
"compression",
] }
+datafusion-common = { workspace = true }
env_logger = { workspace = true }
futures = { workspace = true }
mimalloc = { workspace = true, optional = true }
@@ -61,5 +62,6 @@ libmimalloc-sys = { workspace = true, optional = true }
regex = { workspace = true }
rustyline = "15.0"
sedona = { workspace = true, features = ["aws", "gcp", "http", "proj"] }
+sedona-common = { workspace = true }
sedona-tg = { workspace = true }
tokio = { workspace = true, features = ["macros", "rt", "rt-multi-thread",
"sync", "parking_lot", "signal"] }
diff --git a/sedona-cli/src/print_format.rs b/sedona-cli/src/print_format.rs
index 580ee433..a5946d33 100644
--- a/sedona-cli/src/print_format.rs
+++ b/sedona-cli/src/print_format.rs
@@ -25,9 +25,10 @@ use arrow::csv::writer::WriterBuilder;
use arrow::datatypes::SchemaRef;
use arrow::json::{ArrayWriter, LineDelimitedWriter};
use arrow::record_batch::RecordBatch;
-use datafusion::error::{DataFusionError, Result};
+use datafusion::error::Result;
use sedona::context::SedonaContext;
use sedona::show::{show_batches, DisplayMode, DisplayTableOptions};
+use sedona_common::sedona_internal_datafusion_err;
/// Allow records to be printed in different formats
#[derive(Debug, PartialEq, Eq, clap::ValueEnum, Clone, Copy)]
@@ -144,7 +145,7 @@ fn format_batches_with_maxrows<W: std::io::Write>(
options,
)?;
let mut formatted_str = String::from_utf8(formatted)
- .map_err(|e| DataFusionError::Internal(format!("invalid utf-8
in table: {e}")))?;
+ .map_err(|e| sedona_internal_datafusion_err!("invalid utf-8 in
table: {e}"))?;
if over_limit {
formatted_str = keep_only_maxrows(&formatted_str, maxrows);