alamb commented on issue #13510: URL: https://github.com/apache/datafusion/issues/13510#issuecomment-2558505287
Summary: - This issue can be worked around by setting the `datafusion.execution.parquet.schema_force_view_types` config to false The error comes from arrow-rs (source https://github.com/apache/arrow-rs/blob/2c84f243b882eff69806cd7294d38bf422fdb24a/arrow-ord/src/cmp.rs#L241 Here is the stack of the error: ``` arrow_ord::cmp::compare_op cmp.rs:246 arrow_ord::cmp::eq cmp.rs:79 datafusion_physical_plan::joins::hash_join::eq_dyn_null hash_join.rs:1220 datafusion_physical_plan::joins::hash_join::equal_rows_arr hash_join.rs:1242 datafusion_physical_plan::joins::hash_join::lookup_join_hashmap hash_join.rs:1190 datafusion_physical_plan::joins::hash_join::HashJoinStream::process_probe_batch hash_join.rs:1374 datafusion_physical_plan::joins::hash_join::HashJoinStream::poll_next_impl hash_join.rs:1290 <datafusion_physical_plan::joins::hash_join::HashJoinStream as futures_core::stream::Stream>::poll_next hash_join.rs:1532 <core::pin::Pin<P> as futures_core::stream::Stream>::poll_next stream.rs:130 futures_util::stream::stream::StreamExt::poll_next_unpin mod.rs:1638 datafusion_physical_plan::coalesce_batches::CoalesceBatchesStream::poll_next_inner coalesce_batches.rs:293 <datafusion_physical_plan::coalesce_batches::CoalesceBatchesStream as futures_core::stream::Stream>::poll_next coalesce_batches.rs:229 <core::pin::Pin<P> as futures_core::stream::Stream>::poll_next stream.rs:130 futures_util::stream::stream::StreamExt::poll_next_unpin mod.rs:1638 <futures_util::stream::stream::next::Next<St> as core::future::future::Future>::poll next.rs:32 datafusion_physical_plan::stream::RecordBatchReceiverStreamBuilder::run_input::{{closure}} stream.rs:288 tokio::runtime::task::core::Core<T,S>::poll::{{closure}} core.rs:331 [Inlined] tokio::loom::std::unsafe_cell::UnsafeCell<T>::with_mut unsafe_cell.rs:16 tokio::runtime::task::core::Core<T,S>::poll core.rs:320 tokio::runtime::task::harness::poll_future::{{closure}} harness.rs:499 <core::panic::unwind_safe::AssertUnwindSafe<F> as core::ops::function::FnOnce<()>>::call_once unwind_safe.rs:272 std::panicking::try::do_call panicking.rs:557 __rust_try 0x0000000105056f44 [Inlined] std::panicking::try panicking.rs:520 std::panic::catch_unwind panic.rs:358 tokio::runtime::task::harness::poll_future harness.rs:487 tokio::runtime::task::harness::Harness<T,S>::poll_inner harness.rs:209 tokio::runtime::task::harness::Harness<T,S>::poll harness.rs:154 tokio::runtime::task::raw::poll raw.rs:271 tokio::runtime::task::raw::RawTask::poll raw.rs:201 tokio::runtime::task::LocalNotified<S>::run mod.rs:435 tokio::runtime::scheduler::multi_thread::worker::Context::run_task::{{closure}} worker.rs:596 [Inlined] tokio::runtime::coop::with_budget coop.rs:107 [Inlined] tokio::runtime::coop::budget coop.rs:73 tokio::runtime::scheduler::multi_thread::worker::Context::run_task worker.rs:595 tokio::runtime::scheduler::multi_thread::worker::Context::run worker.rs:558 tokio::runtime::scheduler::multi_thread::worker::run::{{closure}}::{{closure}} worker.rs:511 tokio::runtime::context::scoped::Scoped<T>::set scoped.rs:40 tokio::runtime::context::set_scheduler::{{closure}} context.rs:180 std::thread::local::LocalKey<T>::try_with local.rs:283 std::thread::local::LocalKey<T>::with local.rs:260 tokio::runtime::context::set_scheduler context.rs:180 tokio::runtime::scheduler::multi_thread::worker::run::{{closure}} worker.rs:506 tokio::runtime::context::runtime::enter_runtime runtime.rs:65 tokio::runtime::scheduler::multi_thread::worker::run worker.rs:498 tokio::runtime::scheduler::multi_thread::worker::Launch::launch::{{closure}} worker.rs:464 <tokio::runtime::blocking::task::BlockingTask<T> as core::future::future::Future>::poll task.rs:42 tokio::runtime::task::core::Core<T,S>::poll::{{closure}} core.rs:331 [Inlined] tokio::loom::std::unsafe_cell::UnsafeCell<T>::with_mut unsafe_cell.rs:16 tokio::runtime::task::core::Core<T,S>::poll core.rs:320 tokio::runtime::task::harness::poll_future::{{closure}} harness.rs:499 <core::panic::unwind_safe::AssertUnwindSafe<F> as core::ops::function::FnOnce<()>>::call_once unwind_safe.rs:272 std::panicking::try::do_call panicking.rs:557 __rust_try 0x000000010683e59c [Inlined] std::panicking::try panicking.rs:520 std::panic::catch_unwind panic.rs:358 tokio::runtime::task::harness::poll_future harness.rs:487 tokio::runtime::task::harness::Harness<T,S>::poll_inner harness.rs:209 tokio::runtime::task::harness::Harness<T,S>::poll harness.rs:154 tokio::runtime::task::raw::poll raw.rs:271 tokio::runtime::task::raw::RawTask::poll raw.rs:201 tokio::runtime::task::UnownedTask<S>::run mod.rs:472 tokio::runtime::blocking::pool::Task::run pool.rs:161 tokio::runtime::blocking::pool::Inner::run pool.rs:511 tokio::runtime::blocking::pool::Spawner::spawn_thread::{{closure}} pool.rs:469 std::sys::backtrace::__rust_begin_short_backtrace backtrace.rs:154 std::thread::Builder::spawn_unchecked_::{{closure}}::{{closure}} mod.rs:538 <core::panic::unwind_safe::AssertUnwindSafe<F> as core::ops::function::FnOnce<()>>::call_once unwind_safe.rs:272 std::panicking::try::do_call panicking.rs:557 __rust_try 0x000000010683008c [Inlined] std::panicking::try panicking.rs:520 [Inlined] std::panic::catch_unwind panic.rs:358 std::thread::Builder::spawn_unchecked_::{{closure}} mod.rs:537 core::ops::function::FnOnce::call_once{{vtable.shim}} function.rs:250 [Inlined] <alloc::boxed::Box<F,A> as core::ops::function::FnOnce<Args>>::call_once boxed.rs:2454 [Inlined] <alloc::boxed::Box<F,A> as core::ops::function::FnOnce<Args>>::call_once boxed.rs:2454 std::sys::pal::unix::thread::Thread::new::thread_start thread.rs:105 _pthread_start 0x00000001940832e4 ``` I suspect what is needed to fix this issue is to insert a coercsion somewhere in DataFusion so the join key is correctly coerced -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org