alamb commented on issue #13510:
URL: https://github.com/apache/datafusion/issues/13510#issuecomment-2558505287

   Summary:
   - This issue can be worked around by setting the  
`datafusion.execution.parquet.schema_force_view_types` config to false
   
   
   The error comes from arrow-rs (source 
https://github.com/apache/arrow-rs/blob/2c84f243b882eff69806cd7294d38bf422fdb24a/arrow-ord/src/cmp.rs#L241
   
   Here is the stack of the error:
   ```
   arrow_ord::cmp::compare_op cmp.rs:246
   arrow_ord::cmp::eq cmp.rs:79
   datafusion_physical_plan::joins::hash_join::eq_dyn_null hash_join.rs:1220
   datafusion_physical_plan::joins::hash_join::equal_rows_arr hash_join.rs:1242
   datafusion_physical_plan::joins::hash_join::lookup_join_hashmap 
hash_join.rs:1190
   
datafusion_physical_plan::joins::hash_join::HashJoinStream::process_probe_batch 
hash_join.rs:1374
   datafusion_physical_plan::joins::hash_join::HashJoinStream::poll_next_impl 
hash_join.rs:1290
   <datafusion_physical_plan::joins::hash_join::HashJoinStream as 
futures_core::stream::Stream>::poll_next hash_join.rs:1532
   <core::pin::Pin<P> as futures_core::stream::Stream>::poll_next stream.rs:130
   futures_util::stream::stream::StreamExt::poll_next_unpin mod.rs:1638
   
datafusion_physical_plan::coalesce_batches::CoalesceBatchesStream::poll_next_inner
 coalesce_batches.rs:293
   <datafusion_physical_plan::coalesce_batches::CoalesceBatchesStream as 
futures_core::stream::Stream>::poll_next coalesce_batches.rs:229
   <core::pin::Pin<P> as futures_core::stream::Stream>::poll_next stream.rs:130
   futures_util::stream::stream::StreamExt::poll_next_unpin mod.rs:1638
   <futures_util::stream::stream::next::Next<St> as 
core::future::future::Future>::poll next.rs:32
   
datafusion_physical_plan::stream::RecordBatchReceiverStreamBuilder::run_input::{{closure}}
 stream.rs:288
   tokio::runtime::task::core::Core<T,S>::poll::{{closure}} core.rs:331
   [Inlined] tokio::loom::std::unsafe_cell::UnsafeCell<T>::with_mut 
unsafe_cell.rs:16
   tokio::runtime::task::core::Core<T,S>::poll core.rs:320
   tokio::runtime::task::harness::poll_future::{{closure}} harness.rs:499
   <core::panic::unwind_safe::AssertUnwindSafe<F> as 
core::ops::function::FnOnce<()>>::call_once unwind_safe.rs:272
   std::panicking::try::do_call panicking.rs:557
   __rust_try 0x0000000105056f44
   [Inlined] std::panicking::try panicking.rs:520
   std::panic::catch_unwind panic.rs:358
   tokio::runtime::task::harness::poll_future harness.rs:487
   tokio::runtime::task::harness::Harness<T,S>::poll_inner harness.rs:209
   tokio::runtime::task::harness::Harness<T,S>::poll harness.rs:154
   tokio::runtime::task::raw::poll raw.rs:271
   tokio::runtime::task::raw::RawTask::poll raw.rs:201
   tokio::runtime::task::LocalNotified<S>::run mod.rs:435
   
tokio::runtime::scheduler::multi_thread::worker::Context::run_task::{{closure}} 
worker.rs:596
   [Inlined] tokio::runtime::coop::with_budget coop.rs:107
   [Inlined] tokio::runtime::coop::budget coop.rs:73
   tokio::runtime::scheduler::multi_thread::worker::Context::run_task 
worker.rs:595
   tokio::runtime::scheduler::multi_thread::worker::Context::run worker.rs:558
   
tokio::runtime::scheduler::multi_thread::worker::run::{{closure}}::{{closure}} 
worker.rs:511
   tokio::runtime::context::scoped::Scoped<T>::set scoped.rs:40
   tokio::runtime::context::set_scheduler::{{closure}} context.rs:180
   std::thread::local::LocalKey<T>::try_with local.rs:283
   std::thread::local::LocalKey<T>::with local.rs:260
   tokio::runtime::context::set_scheduler context.rs:180
   tokio::runtime::scheduler::multi_thread::worker::run::{{closure}} 
worker.rs:506
   tokio::runtime::context::runtime::enter_runtime runtime.rs:65
   tokio::runtime::scheduler::multi_thread::worker::run worker.rs:498
   tokio::runtime::scheduler::multi_thread::worker::Launch::launch::{{closure}} 
worker.rs:464
   <tokio::runtime::blocking::task::BlockingTask<T> as 
core::future::future::Future>::poll task.rs:42
   tokio::runtime::task::core::Core<T,S>::poll::{{closure}} core.rs:331
   [Inlined] tokio::loom::std::unsafe_cell::UnsafeCell<T>::with_mut 
unsafe_cell.rs:16
   tokio::runtime::task::core::Core<T,S>::poll core.rs:320
   tokio::runtime::task::harness::poll_future::{{closure}} harness.rs:499
   <core::panic::unwind_safe::AssertUnwindSafe<F> as 
core::ops::function::FnOnce<()>>::call_once unwind_safe.rs:272
   std::panicking::try::do_call panicking.rs:557
   __rust_try 0x000000010683e59c
   [Inlined] std::panicking::try panicking.rs:520
   std::panic::catch_unwind panic.rs:358
   tokio::runtime::task::harness::poll_future harness.rs:487
   tokio::runtime::task::harness::Harness<T,S>::poll_inner harness.rs:209
   tokio::runtime::task::harness::Harness<T,S>::poll harness.rs:154
   tokio::runtime::task::raw::poll raw.rs:271
   tokio::runtime::task::raw::RawTask::poll raw.rs:201
   tokio::runtime::task::UnownedTask<S>::run mod.rs:472
   tokio::runtime::blocking::pool::Task::run pool.rs:161
   tokio::runtime::blocking::pool::Inner::run pool.rs:511
   tokio::runtime::blocking::pool::Spawner::spawn_thread::{{closure}} 
pool.rs:469
   std::sys::backtrace::__rust_begin_short_backtrace backtrace.rs:154
   std::thread::Builder::spawn_unchecked_::{{closure}}::{{closure}} mod.rs:538
   <core::panic::unwind_safe::AssertUnwindSafe<F> as 
core::ops::function::FnOnce<()>>::call_once unwind_safe.rs:272
   std::panicking::try::do_call panicking.rs:557
   __rust_try 0x000000010683008c
   [Inlined] std::panicking::try panicking.rs:520
   [Inlined] std::panic::catch_unwind panic.rs:358
   std::thread::Builder::spawn_unchecked_::{{closure}} mod.rs:537
   core::ops::function::FnOnce::call_once{{vtable.shim}} function.rs:250
   [Inlined] <alloc::boxed::Box<F,A> as 
core::ops::function::FnOnce<Args>>::call_once boxed.rs:2454
   [Inlined] <alloc::boxed::Box<F,A> as 
core::ops::function::FnOnce<Args>>::call_once boxed.rs:2454
   std::sys::pal::unix::thread::Thread::new::thread_start thread.rs:105
   _pthread_start 0x00000001940832e4
   ```
   
   I suspect what is needed to fix this issue is to insert a coercsion 
somewhere in DataFusion so the join key is correctly coerced
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to