This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new e253b8e505 Allow `skip_failed_rules` to skip  buggy logical plan rules 
that have a schema mismatch (#7277)
e253b8e505 is described below

commit e253b8e505d22dc4b427109917470db39aa3815b
Author: Miklos Szots <[email protected]>
AuthorDate: Mon Aug 14 14:07:10 2023 +0200

    Allow `skip_failed_rules` to skip  buggy logical plan rules that have a 
schema mismatch (#7277)
    
    * allow skipping buggy logical plan rules
    
    * test skipping failed rule
    
    * improve error chaining
    
    * fix: clippy
    
    ---------
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 datafusion/optimizer/src/optimizer.rs | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/datafusion/optimizer/src/optimizer.rs 
b/datafusion/optimizer/src/optimizer.rs
index caac4c34bd..3ce4ecf1c8 100644
--- a/datafusion/optimizer/src/optimizer.rs
+++ b/datafusion/optimizer/src/optimizer.rs
@@ -288,11 +288,16 @@ impl Optimizer {
             log_plan(&format!("Optimizer input (pass {i})"), &new_plan);
 
             for rule in &self.rules {
-                let result = self.optimize_recursively(rule, &new_plan, 
config);
-
+                let result =
+                    self.optimize_recursively(rule, &new_plan, config)
+                        .and_then(|plan| {
+                            if let Some(plan) = &plan {
+                                assert_schema_is_the_same(rule.name(), 
&new_plan, plan)?;
+                            }
+                            Ok(plan)
+                        });
                 match result {
                     Ok(Some(plan)) => {
-                        assert_schema_is_the_same(rule.name(), &new_plan, 
&plan)?;
                         new_plan = plan;
                         observer(&new_plan, rule.as_ref());
                         log_plan(rule.name(), &new_plan);
@@ -428,8 +433,7 @@ fn assert_schema_is_the_same(
 
     if !equivalent {
         let e = DataFusionError::Internal(format!(
-            "Optimizer rule '{}' failed, due to generate a different schema, 
original schema: {:?}, new schema: {:?}",
-            rule_name,
+            "Failed due to generate a different schema, original schema: {:?}, 
new schema: {:?}",
             prev_plan.schema(),
             new_plan.schema()
         ));
@@ -493,8 +497,8 @@ mod tests {
         });
         let err = opt.optimize(&plan, &config, &observe).unwrap_err();
         assert_eq!(
-            "get table_scan rule\ncaused by\n\
-             Internal error: Optimizer rule 'get table_scan rule' failed, due 
to generate a different schema, \
+            "Optimizer rule 'get table_scan rule' failed\ncaused by\nget 
table_scan rule\ncaused by\n\
+             Internal error: Failed due to generate a different schema, \
              original schema: DFSchema { fields: [], metadata: {}, 
functional_dependencies: FunctionalDependencies { deps: [] } }, \
              new schema: DFSchema { fields: [\
              DFField { qualifier: Some(Bare { table: \"test\" }), field: Field 
{ name: \"a\", data_type: UInt32, nullable: false, dict_id: 0, dict_is_ordered: 
false, metadata: {} } }, \
@@ -507,6 +511,17 @@ mod tests {
         );
     }
 
+    #[test]
+    fn skip_generate_different_schema() {
+        let opt = Optimizer::with_rules(vec![Arc::new(GetTableScanRule {})]);
+        let config = OptimizerContext::new().with_skip_failing_rules(true);
+        let plan = LogicalPlan::EmptyRelation(EmptyRelation {
+            produce_one_row: false,
+            schema: Arc::new(DFSchema::empty()),
+        });
+        opt.optimize(&plan, &config, &observe).unwrap();
+    }
+
     #[test]
     fn generate_same_schema_different_metadata() -> Result<()> {
         // if the plan creates more metadata than previously (because

Reply via email to