This is an automated email from the ASF dual-hosted git repository.

comphead pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new dd4263f843 Add `schema_err!` error macros with optional backtrace 
(#8620)
dd4263f843 is described below

commit dd4263f843e093c807d63edf73a571b1ba2669b5
Author: comphead <[email protected]>
AuthorDate: Sat Jan 6 12:23:40 2024 -0800

    Add `schema_err!` error macros with optional backtrace (#8620)
    
    * Add `schema_err!` error macros with optional backtrace
---
 .github/workflows/rust.yml                  |  6 +-
 datafusion/common/src/column.rs             | 17 +++---
 datafusion/common/src/dfschema.rs           | 35 +++++------
 datafusion/common/src/error.rs              | 93 +++++++++++++++++++----------
 datafusion/core/src/dataframe/mod.rs        |  2 +-
 datafusion/expr/src/logical_plan/builder.rs | 34 ++++++-----
 datafusion/sql/src/planner.rs               |  2 +-
 datafusion/sql/src/statement.rs             | 15 +++--
 datafusion/sql/tests/sql_integration.rs     |  6 +-
 9 files changed, 119 insertions(+), 91 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index e2faa75e7b..ae6c1ee561 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -106,7 +106,7 @@ jobs:
           RUSTFLAGS: "-C debuginfo=0 -C opt-level=0 -C incremental=false -C 
codegen-units=256"
           RUST_BACKTRACE: "1"
           # avoid rust stack overflows on tpc-ds tests
-          RUST_MINSTACK: "3000000"
+          RUST_MIN_STACK: "3000000"
       - name: Verify Working Directory Clean
         run: git diff --exit-code
 
@@ -316,7 +316,7 @@ jobs:
           RUSTFLAGS: "-C debuginfo=line-tables-only"
           RUST_BACKTRACE: "1"
           # avoid rust stack overflows on tpc-ds tests
-          RUST_MINSTACK: "3000000"
+          RUST_MIN_STACK: "3000000"
   macos:
     name: cargo test (mac)
     runs-on: macos-latest
@@ -356,7 +356,7 @@ jobs:
           RUSTFLAGS: "-C debuginfo=0 -C opt-level=0 -C incremental=false -C 
codegen-units=256"
           RUST_BACKTRACE: "1"
           # avoid rust stack overflows on tpc-ds tests
-          RUST_MINSTACK: "3000000"
+          RUST_MIN_STACK: "3000000"
 
   test-datafusion-pyarrow:
     name: cargo test pyarrow (amd64)
diff --git a/datafusion/common/src/column.rs b/datafusion/common/src/column.rs
index 2e729c128e..f0edc71759 100644
--- a/datafusion/common/src/column.rs
+++ b/datafusion/common/src/column.rs
@@ -17,6 +17,7 @@
 
 //! Column
 
+use crate::error::_schema_err;
 use crate::utils::{parse_identifiers_normalized, quote_identifier};
 use crate::{DFSchema, DataFusionError, OwnedTableReference, Result, 
SchemaError};
 use std::collections::HashSet;
@@ -211,13 +212,13 @@ impl Column {
             }
         }
 
-        Err(DataFusionError::SchemaError(SchemaError::FieldNotFound {
+        _schema_err!(SchemaError::FieldNotFound {
             field: Box::new(Column::new(self.relation.clone(), self.name)),
             valid_fields: schemas
                 .iter()
                 .flat_map(|s| s.fields().iter().map(|f| f.qualified_column()))
                 .collect(),
-        }))
+        })
     }
 
     /// Qualify column if not done yet.
@@ -299,23 +300,21 @@ impl Column {
                     }
 
                     // If not due to USING columns then due to ambiguous 
column name
-                    return Err(DataFusionError::SchemaError(
-                        SchemaError::AmbiguousReference {
-                            field: Column::new_unqualified(self.name),
-                        },
-                    ));
+                    return _schema_err!(SchemaError::AmbiguousReference {
+                        field: Column::new_unqualified(self.name),
+                    });
                 }
             }
         }
 
-        Err(DataFusionError::SchemaError(SchemaError::FieldNotFound {
+        _schema_err!(SchemaError::FieldNotFound {
             field: Box::new(self),
             valid_fields: schemas
                 .iter()
                 .flat_map(|s| s.iter())
                 .flat_map(|s| s.fields().iter().map(|f| f.qualified_column()))
                 .collect(),
-        }))
+        })
     }
 }
 
diff --git a/datafusion/common/src/dfschema.rs 
b/datafusion/common/src/dfschema.rs
index 8772c4c3e1..85b97aac03 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -26,6 +26,7 @@ use std::sync::Arc;
 
 use crate::error::{
     unqualified_field_not_found, DataFusionError, Result, SchemaError, 
_plan_err,
+    _schema_err,
 };
 use crate::{
     field_not_found, Column, FunctionalDependencies, OwnedTableReference, 
TableReference,
@@ -141,11 +142,9 @@ impl DFSchema {
             if let Some(qualifier) = field.qualifier() {
                 qualified_names.insert((qualifier, field.name()));
             } else if !unqualified_names.insert(field.name()) {
-                return Err(DataFusionError::SchemaError(
-                    SchemaError::DuplicateUnqualifiedField {
-                        name: field.name().to_string(),
-                    },
-                ));
+                return _schema_err!(SchemaError::DuplicateUnqualifiedField {
+                    name: field.name().to_string(),
+                });
             }
         }
 
@@ -159,14 +158,12 @@ impl DFSchema {
         qualified_names.sort();
         for (qualifier, name) in &qualified_names {
             if unqualified_names.contains(name) {
-                return Err(DataFusionError::SchemaError(
-                    SchemaError::AmbiguousReference {
-                        field: Column {
-                            relation: Some((*qualifier).clone()),
-                            name: name.to_string(),
-                        },
-                    },
-                ));
+                return _schema_err!(SchemaError::AmbiguousReference {
+                    field: Column {
+                        relation: Some((*qualifier).clone()),
+                        name: name.to_string(),
+                    }
+                });
             }
         }
         Ok(Self {
@@ -392,14 +389,12 @@ impl DFSchema {
                 if fields_without_qualifier.len() == 1 {
                     Ok(fields_without_qualifier[0])
                 } else {
-                    Err(DataFusionError::SchemaError(
-                        SchemaError::AmbiguousReference {
-                            field: Column {
-                                relation: None,
-                                name: name.to_string(),
-                            },
+                    _schema_err!(SchemaError::AmbiguousReference {
+                        field: Column {
+                            relation: None,
+                            name: name.to_string(),
                         },
-                    ))
+                    })
                 }
             }
         }
diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs
index e58faaa150..978938809c 100644
--- a/datafusion/common/src/error.rs
+++ b/datafusion/common/src/error.rs
@@ -82,7 +82,9 @@ pub enum DataFusionError {
     Configuration(String),
     /// This error happens with schema-related errors, such as schema 
inference not possible
     /// and non-unique column names.
-    SchemaError(SchemaError),
+    /// 2nd argument is for optional backtrace    
+    /// Boxing the optional backtrace to prevent 
<https://rust-lang.github.io/rust-clippy/master/index.html#/result_large_err>
+    SchemaError(SchemaError, Box<Option<String>>),
     /// Error returned during execution of the query.
     /// Examples include files not found, errors in parsing certain types.
     Execution(String),
@@ -125,34 +127,6 @@ pub enum SchemaError {
     },
 }
 
-/// Create a "field not found" DataFusion::SchemaError
-pub fn field_not_found<R: Into<OwnedTableReference>>(
-    qualifier: Option<R>,
-    name: &str,
-    schema: &DFSchema,
-) -> DataFusionError {
-    DataFusionError::SchemaError(SchemaError::FieldNotFound {
-        field: Box::new(Column::new(qualifier, name)),
-        valid_fields: schema
-            .fields()
-            .iter()
-            .map(|f| f.qualified_column())
-            .collect(),
-    })
-}
-
-/// Convenience wrapper over [`field_not_found`] for when there is no qualifier
-pub fn unqualified_field_not_found(name: &str, schema: &DFSchema) -> 
DataFusionError {
-    DataFusionError::SchemaError(SchemaError::FieldNotFound {
-        field: Box::new(Column::new_unqualified(name)),
-        valid_fields: schema
-            .fields()
-            .iter()
-            .map(|f| f.qualified_column())
-            .collect(),
-    })
-}
-
 impl Display for SchemaError {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         match self {
@@ -298,7 +272,7 @@ impl Display for DataFusionError {
                 write!(f, "IO error: {desc}")
             }
             DataFusionError::SQL(ref desc, ref backtrace) => {
-                let backtrace = backtrace.clone().unwrap_or("".to_owned());
+                let backtrace: String = 
backtrace.clone().unwrap_or("".to_owned());
                 write!(f, "SQL error: {desc:?}{backtrace}")
             }
             DataFusionError::Configuration(ref desc) => {
@@ -314,8 +288,10 @@ impl Display for DataFusionError {
             DataFusionError::Plan(ref desc) => {
                 write!(f, "Error during planning: {desc}")
             }
-            DataFusionError::SchemaError(ref desc) => {
-                write!(f, "Schema error: {desc}")
+            DataFusionError::SchemaError(ref desc, ref backtrace) => {
+                let backtrace: &str =
+                    &backtrace.as_ref().clone().unwrap_or("".to_owned());
+                write!(f, "Schema error: {desc}{backtrace}")
             }
             DataFusionError::Execution(ref desc) => {
                 write!(f, "Execution error: {desc}")
@@ -356,7 +332,7 @@ impl Error for DataFusionError {
             DataFusionError::Internal(_) => None,
             DataFusionError::Configuration(_) => None,
             DataFusionError::Plan(_) => None,
-            DataFusionError::SchemaError(e) => Some(e),
+            DataFusionError::SchemaError(e, _) => Some(e),
             DataFusionError::Execution(_) => None,
             DataFusionError::ResourcesExhausted(_) => None,
             DataFusionError::External(e) => Some(e.as_ref()),
@@ -556,12 +532,63 @@ macro_rules! arrow_err {
     };
 }
 
+// Exposes a macro to create `DataFusionError::SchemaError` with optional 
backtrace
+#[macro_export]
+macro_rules! schema_datafusion_err {
+    ($ERR:expr) => {
+        DataFusionError::SchemaError(
+            $ERR,
+            Box::new(Some(DataFusionError::get_back_trace())),
+        )
+    };
+}
+
+// Exposes a macro to create `Err(DataFusionError::SchemaError)` with optional 
backtrace
+#[macro_export]
+macro_rules! schema_err {
+    ($ERR:expr) => {
+        Err(DataFusionError::SchemaError(
+            $ERR,
+            Box::new(Some(DataFusionError::get_back_trace())),
+        ))
+    };
+}
+
 // To avoid compiler error when using macro in the same crate:
 // macros from the current crate cannot be referred to by absolute paths
 pub use internal_datafusion_err as _internal_datafusion_err;
 pub use internal_err as _internal_err;
 pub use not_impl_err as _not_impl_err;
 pub use plan_err as _plan_err;
+pub use schema_err as _schema_err;
+
+/// Create a "field not found" DataFusion::SchemaError
+pub fn field_not_found<R: Into<OwnedTableReference>>(
+    qualifier: Option<R>,
+    name: &str,
+    schema: &DFSchema,
+) -> DataFusionError {
+    schema_datafusion_err!(SchemaError::FieldNotFound {
+        field: Box::new(Column::new(qualifier, name)),
+        valid_fields: schema
+            .fields()
+            .iter()
+            .map(|f| f.qualified_column())
+            .collect(),
+    })
+}
+
+/// Convenience wrapper over [`field_not_found`] for when there is no qualifier
+pub fn unqualified_field_not_found(name: &str, schema: &DFSchema) -> 
DataFusionError {
+    schema_datafusion_err!(SchemaError::FieldNotFound {
+        field: Box::new(Column::new_unqualified(name)),
+        valid_fields: schema
+            .fields()
+            .iter()
+            .map(|f| f.qualified_column())
+            .collect(),
+    })
+}
 
 #[cfg(test)]
 mod test {
diff --git a/datafusion/core/src/dataframe/mod.rs 
b/datafusion/core/src/dataframe/mod.rs
index 33e198d6d5..f15f1e9ba6 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -1186,7 +1186,7 @@ impl DataFrame {
         let field_to_rename = match 
self.plan.schema().field_from_column(&old_column) {
             Ok(field) => field,
             // no-op if field not found
-            Err(DataFusionError::SchemaError(SchemaError::FieldNotFound { .. 
})) => {
+            Err(DataFusionError::SchemaError(SchemaError::FieldNotFound { .. 
}, _)) => {
                 return Ok(self)
             }
             Err(err) => return Err(err),
diff --git a/datafusion/expr/src/logical_plan/builder.rs 
b/datafusion/expr/src/logical_plan/builder.rs
index a684f3e974..847fbbbf61 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -1845,13 +1845,16 @@ mod tests {
         .project(vec![col("id"), col("first_name").alias("id")]);
 
         match plan {
-            Err(DataFusionError::SchemaError(SchemaError::AmbiguousReference {
-                field:
-                    Column {
-                        relation: Some(OwnedTableReference::Bare { table }),
-                        name,
-                    },
-            })) => {
+            Err(DataFusionError::SchemaError(
+                SchemaError::AmbiguousReference {
+                    field:
+                        Column {
+                            relation: Some(OwnedTableReference::Bare { table 
}),
+                            name,
+                        },
+                },
+                _,
+            )) => {
                 assert_eq!("employee_csv", table);
                 assert_eq!("id", &name);
                 Ok(())
@@ -1872,13 +1875,16 @@ mod tests {
         .aggregate(vec![col("state")], 
vec![sum(col("salary")).alias("state")]);
 
         match plan {
-            Err(DataFusionError::SchemaError(SchemaError::AmbiguousReference {
-                field:
-                    Column {
-                        relation: Some(OwnedTableReference::Bare { table }),
-                        name,
-                    },
-            })) => {
+            Err(DataFusionError::SchemaError(
+                SchemaError::AmbiguousReference {
+                    field:
+                        Column {
+                            relation: Some(OwnedTableReference::Bare { table 
}),
+                            name,
+                        },
+                },
+                _,
+            )) => {
                 assert_eq!("employee_csv", table);
                 assert_eq!("state", &name);
                 Ok(())
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index c5c30e3a22..a04df5589b 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -250,7 +250,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         // Default expressions are restricted, column references are not 
allowed
         let empty_schema = DFSchema::empty();
         let error_desc = |e: DataFusionError| match e {
-            DataFusionError::SchemaError(SchemaError::FieldNotFound { .. }) => 
{
+            DataFusionError::SchemaError(SchemaError::FieldNotFound { .. }, _) 
=> {
                 plan_datafusion_err!(
                     "Column reference is not allowed in the DEFAULT expression 
: {}",
                     e
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index b96553ffbf..b9fb4c65dc 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -31,9 +31,10 @@ use arrow_schema::DataType;
 use datafusion_common::file_options::StatementOptions;
 use datafusion_common::parsers::CompressionTypeVariant;
 use datafusion_common::{
-    not_impl_err, plan_datafusion_err, plan_err, unqualified_field_not_found, 
Column,
-    Constraints, DFField, DFSchema, DFSchemaRef, DataFusionError, 
OwnedTableReference,
-    Result, ScalarValue, SchemaReference, TableReference, ToDFSchema,
+    not_impl_err, plan_datafusion_err, plan_err, schema_err, 
unqualified_field_not_found,
+    Column, Constraints, DFField, DFSchema, DFSchemaRef, DataFusionError,
+    OwnedTableReference, Result, ScalarValue, SchemaError, SchemaReference,
+    TableReference, ToDFSchema,
 };
 use datafusion_expr::dml::{CopyOptions, CopyTo};
 use 
datafusion_expr::expr_rewriter::normalize_col_with_schemas_and_ambiguity_check;
@@ -1138,11 +1139,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                         .index_of_column_by_name(None, &c)?
                         .ok_or_else(|| unqualified_field_not_found(&c, 
&table_schema))?;
                     if value_indices[column_index].is_some() {
-                        return Err(DataFusionError::SchemaError(
-                            
datafusion_common::SchemaError::DuplicateUnqualifiedField {
-                                name: c,
-                            },
-                        ));
+                        return 
schema_err!(SchemaError::DuplicateUnqualifiedField {
+                            name: c,
+                        });
                     } else {
                         value_indices[column_index] = Some(i);
                     }
diff --git a/datafusion/sql/tests/sql_integration.rs 
b/datafusion/sql/tests/sql_integration.rs
index 48ba501453..4de08a7124 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -756,9 +756,11 @@ fn join_with_ambiguous_column() {
 #[test]
 fn where_selection_with_ambiguous_column() {
     let sql = "SELECT * FROM person a, person b WHERE id = id + 1";
-    let err = logical_plan(sql).expect_err("query should have failed");
+    let err = logical_plan(sql)
+        .expect_err("query should have failed")
+        .strip_backtrace();
     assert_eq!(
-        "SchemaError(AmbiguousReference { field: Column { relation: None, 
name: \"id\" } })",
+        "\"Schema error: Ambiguous reference to unqualified field id\"",
         format!("{err:?}")
     );
 }

Reply via email to