This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new e253b8e505 Allow `skip_failed_rules` to skip buggy logical plan rules
that have a schema mismatch (#7277)
e253b8e505 is described below
commit e253b8e505d22dc4b427109917470db39aa3815b
Author: Miklos Szots <[email protected]>
AuthorDate: Mon Aug 14 14:07:10 2023 +0200
Allow `skip_failed_rules` to skip buggy logical plan rules that have a
schema mismatch (#7277)
* allow skipping buggy logical plan rules
* test skipping failed rule
* improve error chaining
* fix: clippy
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
datafusion/optimizer/src/optimizer.rs | 29 ++++++++++++++++++++++-------
1 file changed, 22 insertions(+), 7 deletions(-)
diff --git a/datafusion/optimizer/src/optimizer.rs
b/datafusion/optimizer/src/optimizer.rs
index caac4c34bd..3ce4ecf1c8 100644
--- a/datafusion/optimizer/src/optimizer.rs
+++ b/datafusion/optimizer/src/optimizer.rs
@@ -288,11 +288,16 @@ impl Optimizer {
log_plan(&format!("Optimizer input (pass {i})"), &new_plan);
for rule in &self.rules {
- let result = self.optimize_recursively(rule, &new_plan,
config);
-
+ let result =
+ self.optimize_recursively(rule, &new_plan, config)
+ .and_then(|plan| {
+ if let Some(plan) = &plan {
+ assert_schema_is_the_same(rule.name(),
&new_plan, plan)?;
+ }
+ Ok(plan)
+ });
match result {
Ok(Some(plan)) => {
- assert_schema_is_the_same(rule.name(), &new_plan,
&plan)?;
new_plan = plan;
observer(&new_plan, rule.as_ref());
log_plan(rule.name(), &new_plan);
@@ -428,8 +433,7 @@ fn assert_schema_is_the_same(
if !equivalent {
let e = DataFusionError::Internal(format!(
- "Optimizer rule '{}' failed, due to generate a different schema,
original schema: {:?}, new schema: {:?}",
- rule_name,
+ "Failed due to generate a different schema, original schema: {:?},
new schema: {:?}",
prev_plan.schema(),
new_plan.schema()
));
@@ -493,8 +497,8 @@ mod tests {
});
let err = opt.optimize(&plan, &config, &observe).unwrap_err();
assert_eq!(
- "get table_scan rule\ncaused by\n\
- Internal error: Optimizer rule 'get table_scan rule' failed, due
to generate a different schema, \
+ "Optimizer rule 'get table_scan rule' failed\ncaused by\nget
table_scan rule\ncaused by\n\
+ Internal error: Failed due to generate a different schema, \
original schema: DFSchema { fields: [], metadata: {},
functional_dependencies: FunctionalDependencies { deps: [] } }, \
new schema: DFSchema { fields: [\
DFField { qualifier: Some(Bare { table: \"test\" }), field: Field
{ name: \"a\", data_type: UInt32, nullable: false, dict_id: 0, dict_is_ordered:
false, metadata: {} } }, \
@@ -507,6 +511,17 @@ mod tests {
);
}
+ #[test]
+ fn skip_generate_different_schema() {
+ let opt = Optimizer::with_rules(vec![Arc::new(GetTableScanRule {})]);
+ let config = OptimizerContext::new().with_skip_failing_rules(true);
+ let plan = LogicalPlan::EmptyRelation(EmptyRelation {
+ produce_one_row: false,
+ schema: Arc::new(DFSchema::empty()),
+ });
+ opt.optimize(&plan, &config, &observe).unwrap();
+ }
+
#[test]
fn generate_same_schema_different_metadata() -> Result<()> {
// if the plan creates more metadata than previously (because