alamb commented on issue #2712:
URL:
https://github.com/apache/arrow-datafusion/issues/2712#issuecomment-1150297976
Here is a self contained reproducer:
```rust
use std::sync::Arc;
use datafusion::prelude::*;
use datafusion::arrow::array::Int32Array;
use datafusion::datasource::MemTable;
use datafusion::execution::context::TaskContext;
use datafusion::logical_plan::{LogicalPlanBuilder, provider_as_source};
use datafusion::physical_plan::collect;
use datafusion::error::Result;
use datafusion::arrow::{self, record_batch::RecordBatch};
#[tokio::main]
async fn main() -> Result<()> {
let ctx = SessionContext::new();
let a: Int32Array = vec![Some(1)].into_iter().collect();
let batch = RecordBatch::try_from_iter(vec![
("a", Arc::new(a) as _),
]).unwrap();
let t = MemTable::try_new(batch.schema(), vec![vec![batch]]).unwrap();
let projection = None;
let builder = LogicalPlanBuilder::scan(
"cpu_load_short",
provider_as_source(Arc::new(t)),
projection
).unwrap()
.filter(col("a").is_null()
.or(col("a").eq(lit(2)))
.or(col("a").is_null().and(col("a").eq(lit(5))))
.or(col("a").is_null().or(col("a").eq(lit(2))))
)
.unwrap();
let logical_plan = builder.build().unwrap();
// manually optimize the plan
let state = ctx.state.read().clone();
let logical_plan = state.optimize(&logical_plan).unwrap();
// THIS IS THE KEY: optimize it a second time
let logical_plan = state.optimize(&logical_plan).unwrap();
let physical_plan =
state.query_planner.create_physical_plan(&logical_plan, &state).await.unwrap();
let task_ctx = Arc::new(TaskContext::from(&state));
let results: Vec<RecordBatch> = collect(physical_plan,
task_ctx).await.unwrap();
// format the results
println!("Results:\n\n{}",
arrow::util::pretty::pretty_format_batches(&results).unwrap());
Ok(())
}
```
Cargo.toml:
```toml
[package]
name = "rust_arrow_playground"
version = "0.1.0"
edition = "2018"
# See more keys and their definitions at
https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
ahash = "0.7"
tokio = "1.8.2"
tokio-stream = "0.1"
async-trait = "0.1"
futures-util = { version = "0.3.1" }
datafusion = { path =
"/Users/alamb/Software/arrow-datafusion/datafusion/core", default-features =
false }
once_cell = "1.8.0"
rand = "0.8"
```
When run errors like this:
```
cd /Users/alamb/Software/rust_datafusion_playground && RUST_BACKTRACE=1
CARGO_TARGET_DIR=/Users/alamb/Software/df-target cargo run
Compiling rust_arrow_playground v0.1.0
(/Users/alamb/Software/rust_datafusion_playground)
Finished dev [unoptimized + debuginfo] target(s) in 3.77s
Running `/Users/alamb/Software/df-target/debug/rust_arrow_playground`
thread 'main' panicked at 'called `Result::unwrap()` on an `Err` value:
SchemaError(DuplicateUnqualifiedField { name: "IsNull-Column-cpu_load_short.a"
})', src/main.rs:46:54
stack backtrace:
0: rust_begin_unwind
at
/rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/std/src/panicking.rs:584:5
1: core::panicking::panic_fmt
at
/rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/core/src/panicking.rs:143:14
2: core::result::unwrap_failed
at
/rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/core/src/result.rs:1785:5
3: core::result::Result<T,E>::unwrap
at
/rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/core/src/result.rs:1078:23
4: rust_arrow_playground::main::{{closure}}
at ./src/main.rs:46:24
5: <core::future::from_generator::GenFuture<T> as
core::future::future::Future>::poll
at
/rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/core/src/future/mod.rs:91:19
6: tokio::park::thread::CachedParkThread::block_on::{{closure}}
at
/Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.19.2/src/park/thread.rs:263:54
7: tokio::coop::with_budget::{{closure}}
at
/Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.19.2/src/coop.rs:102:9
8: std::thread::local::LocalKey<T>::try_with
at
/rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/std/src/thread/local.rs:442:16
9: std::thread::local::LocalKey<T>::with
at
/rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/std/src/thread/local.rs:418:9
10: tokio::coop::with_budget
at
/Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.19.2/src/coop.rs:95:5
11: tokio::coop::budget
at
/Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.19.2/src/coop.rs:72:5
12: tokio::park::thread::CachedParkThread::block_on
at
/Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.19.2/src/park/thread.rs:263:31
13: tokio::runtime::enter::Enter::block_on
at
/Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.19.2/src/runtime/enter.rs:151:13
14: tokio::runtime::thread_pool::ThreadPool::block_on
at
/Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.19.2/src/runtime/thread_pool/mod.rs:90:9
15: tokio::runtime::Runtime::block_on
at
/Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.19.2/src/runtime/mod.rs:482:43
16: rust_arrow_playground::main
at ./src/main.rs:55:5
17: core::ops::function::FnOnce::call_once
at
/rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/core/src/ops/function.rs:227:5
note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose
backtrace.
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]