This is an automated email from the ASF dual-hosted git repository.
comphead pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new dd4263f843 Add `schema_err!` error macros with optional backtrace
(#8620)
dd4263f843 is described below
commit dd4263f843e093c807d63edf73a571b1ba2669b5
Author: comphead <[email protected]>
AuthorDate: Sat Jan 6 12:23:40 2024 -0800
Add `schema_err!` error macros with optional backtrace (#8620)
* Add `schema_err!` error macros with optional backtrace
---
.github/workflows/rust.yml | 6 +-
datafusion/common/src/column.rs | 17 +++---
datafusion/common/src/dfschema.rs | 35 +++++------
datafusion/common/src/error.rs | 93 +++++++++++++++++++----------
datafusion/core/src/dataframe/mod.rs | 2 +-
datafusion/expr/src/logical_plan/builder.rs | 34 ++++++-----
datafusion/sql/src/planner.rs | 2 +-
datafusion/sql/src/statement.rs | 15 +++--
datafusion/sql/tests/sql_integration.rs | 6 +-
9 files changed, 119 insertions(+), 91 deletions(-)
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index e2faa75e7b..ae6c1ee561 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -106,7 +106,7 @@ jobs:
RUSTFLAGS: "-C debuginfo=0 -C opt-level=0 -C incremental=false -C
codegen-units=256"
RUST_BACKTRACE: "1"
# avoid rust stack overflows on tpc-ds tests
- RUST_MINSTACK: "3000000"
+ RUST_MIN_STACK: "3000000"
- name: Verify Working Directory Clean
run: git diff --exit-code
@@ -316,7 +316,7 @@ jobs:
RUSTFLAGS: "-C debuginfo=line-tables-only"
RUST_BACKTRACE: "1"
# avoid rust stack overflows on tpc-ds tests
- RUST_MINSTACK: "3000000"
+ RUST_MIN_STACK: "3000000"
macos:
name: cargo test (mac)
runs-on: macos-latest
@@ -356,7 +356,7 @@ jobs:
RUSTFLAGS: "-C debuginfo=0 -C opt-level=0 -C incremental=false -C
codegen-units=256"
RUST_BACKTRACE: "1"
# avoid rust stack overflows on tpc-ds tests
- RUST_MINSTACK: "3000000"
+ RUST_MIN_STACK: "3000000"
test-datafusion-pyarrow:
name: cargo test pyarrow (amd64)
diff --git a/datafusion/common/src/column.rs b/datafusion/common/src/column.rs
index 2e729c128e..f0edc71759 100644
--- a/datafusion/common/src/column.rs
+++ b/datafusion/common/src/column.rs
@@ -17,6 +17,7 @@
//! Column
+use crate::error::_schema_err;
use crate::utils::{parse_identifiers_normalized, quote_identifier};
use crate::{DFSchema, DataFusionError, OwnedTableReference, Result,
SchemaError};
use std::collections::HashSet;
@@ -211,13 +212,13 @@ impl Column {
}
}
- Err(DataFusionError::SchemaError(SchemaError::FieldNotFound {
+ _schema_err!(SchemaError::FieldNotFound {
field: Box::new(Column::new(self.relation.clone(), self.name)),
valid_fields: schemas
.iter()
.flat_map(|s| s.fields().iter().map(|f| f.qualified_column()))
.collect(),
- }))
+ })
}
/// Qualify column if not done yet.
@@ -299,23 +300,21 @@ impl Column {
}
// If not due to USING columns then due to ambiguous
column name
- return Err(DataFusionError::SchemaError(
- SchemaError::AmbiguousReference {
- field: Column::new_unqualified(self.name),
- },
- ));
+ return _schema_err!(SchemaError::AmbiguousReference {
+ field: Column::new_unqualified(self.name),
+ });
}
}
}
- Err(DataFusionError::SchemaError(SchemaError::FieldNotFound {
+ _schema_err!(SchemaError::FieldNotFound {
field: Box::new(self),
valid_fields: schemas
.iter()
.flat_map(|s| s.iter())
.flat_map(|s| s.fields().iter().map(|f| f.qualified_column()))
.collect(),
- }))
+ })
}
}
diff --git a/datafusion/common/src/dfschema.rs
b/datafusion/common/src/dfschema.rs
index 8772c4c3e1..85b97aac03 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -26,6 +26,7 @@ use std::sync::Arc;
use crate::error::{
unqualified_field_not_found, DataFusionError, Result, SchemaError,
_plan_err,
+ _schema_err,
};
use crate::{
field_not_found, Column, FunctionalDependencies, OwnedTableReference,
TableReference,
@@ -141,11 +142,9 @@ impl DFSchema {
if let Some(qualifier) = field.qualifier() {
qualified_names.insert((qualifier, field.name()));
} else if !unqualified_names.insert(field.name()) {
- return Err(DataFusionError::SchemaError(
- SchemaError::DuplicateUnqualifiedField {
- name: field.name().to_string(),
- },
- ));
+ return _schema_err!(SchemaError::DuplicateUnqualifiedField {
+ name: field.name().to_string(),
+ });
}
}
@@ -159,14 +158,12 @@ impl DFSchema {
qualified_names.sort();
for (qualifier, name) in &qualified_names {
if unqualified_names.contains(name) {
- return Err(DataFusionError::SchemaError(
- SchemaError::AmbiguousReference {
- field: Column {
- relation: Some((*qualifier).clone()),
- name: name.to_string(),
- },
- },
- ));
+ return _schema_err!(SchemaError::AmbiguousReference {
+ field: Column {
+ relation: Some((*qualifier).clone()),
+ name: name.to_string(),
+ }
+ });
}
}
Ok(Self {
@@ -392,14 +389,12 @@ impl DFSchema {
if fields_without_qualifier.len() == 1 {
Ok(fields_without_qualifier[0])
} else {
- Err(DataFusionError::SchemaError(
- SchemaError::AmbiguousReference {
- field: Column {
- relation: None,
- name: name.to_string(),
- },
+ _schema_err!(SchemaError::AmbiguousReference {
+ field: Column {
+ relation: None,
+ name: name.to_string(),
},
- ))
+ })
}
}
}
diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs
index e58faaa150..978938809c 100644
--- a/datafusion/common/src/error.rs
+++ b/datafusion/common/src/error.rs
@@ -82,7 +82,9 @@ pub enum DataFusionError {
Configuration(String),
/// This error happens with schema-related errors, such as schema
inference not possible
/// and non-unique column names.
- SchemaError(SchemaError),
+ /// 2nd argument is for optional backtrace
+ /// Boxing the optional backtrace to prevent
<https://rust-lang.github.io/rust-clippy/master/index.html#/result_large_err>
+ SchemaError(SchemaError, Box<Option<String>>),
/// Error returned during execution of the query.
/// Examples include files not found, errors in parsing certain types.
Execution(String),
@@ -125,34 +127,6 @@ pub enum SchemaError {
},
}
-/// Create a "field not found" DataFusion::SchemaError
-pub fn field_not_found<R: Into<OwnedTableReference>>(
- qualifier: Option<R>,
- name: &str,
- schema: &DFSchema,
-) -> DataFusionError {
- DataFusionError::SchemaError(SchemaError::FieldNotFound {
- field: Box::new(Column::new(qualifier, name)),
- valid_fields: schema
- .fields()
- .iter()
- .map(|f| f.qualified_column())
- .collect(),
- })
-}
-
-/// Convenience wrapper over [`field_not_found`] for when there is no qualifier
-pub fn unqualified_field_not_found(name: &str, schema: &DFSchema) ->
DataFusionError {
- DataFusionError::SchemaError(SchemaError::FieldNotFound {
- field: Box::new(Column::new_unqualified(name)),
- valid_fields: schema
- .fields()
- .iter()
- .map(|f| f.qualified_column())
- .collect(),
- })
-}
-
impl Display for SchemaError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
@@ -298,7 +272,7 @@ impl Display for DataFusionError {
write!(f, "IO error: {desc}")
}
DataFusionError::SQL(ref desc, ref backtrace) => {
- let backtrace = backtrace.clone().unwrap_or("".to_owned());
+ let backtrace: String =
backtrace.clone().unwrap_or("".to_owned());
write!(f, "SQL error: {desc:?}{backtrace}")
}
DataFusionError::Configuration(ref desc) => {
@@ -314,8 +288,10 @@ impl Display for DataFusionError {
DataFusionError::Plan(ref desc) => {
write!(f, "Error during planning: {desc}")
}
- DataFusionError::SchemaError(ref desc) => {
- write!(f, "Schema error: {desc}")
+ DataFusionError::SchemaError(ref desc, ref backtrace) => {
+ let backtrace: &str =
+ &backtrace.as_ref().clone().unwrap_or("".to_owned());
+ write!(f, "Schema error: {desc}{backtrace}")
}
DataFusionError::Execution(ref desc) => {
write!(f, "Execution error: {desc}")
@@ -356,7 +332,7 @@ impl Error for DataFusionError {
DataFusionError::Internal(_) => None,
DataFusionError::Configuration(_) => None,
DataFusionError::Plan(_) => None,
- DataFusionError::SchemaError(e) => Some(e),
+ DataFusionError::SchemaError(e, _) => Some(e),
DataFusionError::Execution(_) => None,
DataFusionError::ResourcesExhausted(_) => None,
DataFusionError::External(e) => Some(e.as_ref()),
@@ -556,12 +532,63 @@ macro_rules! arrow_err {
};
}
+// Exposes a macro to create `DataFusionError::SchemaError` with optional
backtrace
+#[macro_export]
+macro_rules! schema_datafusion_err {
+ ($ERR:expr) => {
+ DataFusionError::SchemaError(
+ $ERR,
+ Box::new(Some(DataFusionError::get_back_trace())),
+ )
+ };
+}
+
+// Exposes a macro to create `Err(DataFusionError::SchemaError)` with optional
backtrace
+#[macro_export]
+macro_rules! schema_err {
+ ($ERR:expr) => {
+ Err(DataFusionError::SchemaError(
+ $ERR,
+ Box::new(Some(DataFusionError::get_back_trace())),
+ ))
+ };
+}
+
// To avoid compiler error when using macro in the same crate:
// macros from the current crate cannot be referred to by absolute paths
pub use internal_datafusion_err as _internal_datafusion_err;
pub use internal_err as _internal_err;
pub use not_impl_err as _not_impl_err;
pub use plan_err as _plan_err;
+pub use schema_err as _schema_err;
+
+/// Create a "field not found" DataFusion::SchemaError
+pub fn field_not_found<R: Into<OwnedTableReference>>(
+ qualifier: Option<R>,
+ name: &str,
+ schema: &DFSchema,
+) -> DataFusionError {
+ schema_datafusion_err!(SchemaError::FieldNotFound {
+ field: Box::new(Column::new(qualifier, name)),
+ valid_fields: schema
+ .fields()
+ .iter()
+ .map(|f| f.qualified_column())
+ .collect(),
+ })
+}
+
+/// Convenience wrapper over [`field_not_found`] for when there is no qualifier
+pub fn unqualified_field_not_found(name: &str, schema: &DFSchema) ->
DataFusionError {
+ schema_datafusion_err!(SchemaError::FieldNotFound {
+ field: Box::new(Column::new_unqualified(name)),
+ valid_fields: schema
+ .fields()
+ .iter()
+ .map(|f| f.qualified_column())
+ .collect(),
+ })
+}
#[cfg(test)]
mod test {
diff --git a/datafusion/core/src/dataframe/mod.rs
b/datafusion/core/src/dataframe/mod.rs
index 33e198d6d5..f15f1e9ba6 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -1186,7 +1186,7 @@ impl DataFrame {
let field_to_rename = match
self.plan.schema().field_from_column(&old_column) {
Ok(field) => field,
// no-op if field not found
- Err(DataFusionError::SchemaError(SchemaError::FieldNotFound { ..
})) => {
+ Err(DataFusionError::SchemaError(SchemaError::FieldNotFound { ..
}, _)) => {
return Ok(self)
}
Err(err) => return Err(err),
diff --git a/datafusion/expr/src/logical_plan/builder.rs
b/datafusion/expr/src/logical_plan/builder.rs
index a684f3e974..847fbbbf61 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -1845,13 +1845,16 @@ mod tests {
.project(vec![col("id"), col("first_name").alias("id")]);
match plan {
- Err(DataFusionError::SchemaError(SchemaError::AmbiguousReference {
- field:
- Column {
- relation: Some(OwnedTableReference::Bare { table }),
- name,
- },
- })) => {
+ Err(DataFusionError::SchemaError(
+ SchemaError::AmbiguousReference {
+ field:
+ Column {
+ relation: Some(OwnedTableReference::Bare { table
}),
+ name,
+ },
+ },
+ _,
+ )) => {
assert_eq!("employee_csv", table);
assert_eq!("id", &name);
Ok(())
@@ -1872,13 +1875,16 @@ mod tests {
.aggregate(vec![col("state")],
vec![sum(col("salary")).alias("state")]);
match plan {
- Err(DataFusionError::SchemaError(SchemaError::AmbiguousReference {
- field:
- Column {
- relation: Some(OwnedTableReference::Bare { table }),
- name,
- },
- })) => {
+ Err(DataFusionError::SchemaError(
+ SchemaError::AmbiguousReference {
+ field:
+ Column {
+ relation: Some(OwnedTableReference::Bare { table
}),
+ name,
+ },
+ },
+ _,
+ )) => {
assert_eq!("employee_csv", table);
assert_eq!("state", &name);
Ok(())
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index c5c30e3a22..a04df5589b 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -250,7 +250,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
// Default expressions are restricted, column references are not
allowed
let empty_schema = DFSchema::empty();
let error_desc = |e: DataFusionError| match e {
- DataFusionError::SchemaError(SchemaError::FieldNotFound { .. }) =>
{
+ DataFusionError::SchemaError(SchemaError::FieldNotFound { .. }, _)
=> {
plan_datafusion_err!(
"Column reference is not allowed in the DEFAULT expression
: {}",
e
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index b96553ffbf..b9fb4c65dc 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -31,9 +31,10 @@ use arrow_schema::DataType;
use datafusion_common::file_options::StatementOptions;
use datafusion_common::parsers::CompressionTypeVariant;
use datafusion_common::{
- not_impl_err, plan_datafusion_err, plan_err, unqualified_field_not_found,
Column,
- Constraints, DFField, DFSchema, DFSchemaRef, DataFusionError,
OwnedTableReference,
- Result, ScalarValue, SchemaReference, TableReference, ToDFSchema,
+ not_impl_err, plan_datafusion_err, plan_err, schema_err,
unqualified_field_not_found,
+ Column, Constraints, DFField, DFSchema, DFSchemaRef, DataFusionError,
+ OwnedTableReference, Result, ScalarValue, SchemaError, SchemaReference,
+ TableReference, ToDFSchema,
};
use datafusion_expr::dml::{CopyOptions, CopyTo};
use
datafusion_expr::expr_rewriter::normalize_col_with_schemas_and_ambiguity_check;
@@ -1138,11 +1139,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
.index_of_column_by_name(None, &c)?
.ok_or_else(|| unqualified_field_not_found(&c,
&table_schema))?;
if value_indices[column_index].is_some() {
- return Err(DataFusionError::SchemaError(
-
datafusion_common::SchemaError::DuplicateUnqualifiedField {
- name: c,
- },
- ));
+ return
schema_err!(SchemaError::DuplicateUnqualifiedField {
+ name: c,
+ });
} else {
value_indices[column_index] = Some(i);
}
diff --git a/datafusion/sql/tests/sql_integration.rs
b/datafusion/sql/tests/sql_integration.rs
index 48ba501453..4de08a7124 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -756,9 +756,11 @@ fn join_with_ambiguous_column() {
#[test]
fn where_selection_with_ambiguous_column() {
let sql = "SELECT * FROM person a, person b WHERE id = id + 1";
- let err = logical_plan(sql).expect_err("query should have failed");
+ let err = logical_plan(sql)
+ .expect_err("query should have failed")
+ .strip_backtrace();
assert_eq!(
- "SchemaError(AmbiguousReference { field: Column { relation: None,
name: \"id\" } })",
+ "\"Schema error: Ambiguous reference to unqualified field id\"",
format!("{err:?}")
);
}