Amir Aliev created ARROW-17474:
----------------------------------
Summary: Pandas read_parquet failr on pyarrow level
Key: ARROW-17474
URL: https://issues.apache.org/jira/browse/ARROW-17474
Project: Apache Arrow
Issue Type: Bug
Components: Parquet
Environment: Fedora 33
Reporter: Amir Aliev
When reading any of the set of parquet files in pandas with pyarrow parquet
engine I am getting "terminate called without an active exception | Aborted
(core dumped)" error with a 50% chance. I have run this through gdb for the
following backtrace:
#0 0x00007ffff7ad69d5 in raise () from /lib64/libc.so.6
#1 0x00007ffff7abf8a4 in abort () from /lib64/libc.so.6
#2 0x00007fffe6c68926 in __gnu_cxx::__verbose_terminate_handler() [clone
.cold] () from /lib64/libstdc++.so.6
#3 0x00007fffe6c741ac in __cxxabiv1::__terminate(void (*)()) () from
/lib64/libstdc++.so.6
#4 0x00007fffe6c74217 in std::terminate() () from /lib64/libstdc++.so.6
#5 0x00007fffe6c73bcc in __gxx_personality_v0 () from /lib64/libstdc++.so.6
#6 0x00007ffff497dd94 in _Unwind_ForcedUnwind_Phase2 () from
/lib64/libgcc_s.so.1
#7 0x00007ffff497e482 in _Unwind_ForcedUnwind () from /lib64/libgcc_s.so.1
#8 0x00007ffff7a89c26 in __pthread_unwind () from /lib64/libpthread.so.0
#9 0x00007ffff7a816a2 in pthread_exit () from /lib64/libpthread.so.0
#10 0x00007ffff7b1f34a in pthread_exit () from /lib64/libc.so.6
#11 0x00007ffff7e3657b in PyThread_exit_thread () from
/lib64/libpython3.9.so.1.0
#12 0x00007ffff7cc5a8d in take_gil.cold () from /lib64/libpython3.9.so.1.0
#13 0x00007ffff7d8f656 in PyEval_RestoreThread () from
/lib64/libpython3.9.so.1.0
#14 0x00007ffff7e68480 in PyGILState_Ensure () from /lib64/libpython3.9.so.1.0
#15 0x00007fffe6f000e3 in std::_Sp_counted_ptr<arrow::py::PyBuffer*,
(__gnu_cxx::_Lock_policy)2>::_M_dispose() () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_python.so.900
#16 0x00007fffe7519c4a in
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#17 0x00007fffe751badd in arrow::Future<std::shared_ptr<arrow::Buffer>
>::SetResult(arrow::Result<std::shared_ptr<arrow::Buffer>
>)::\{lambda(void*)#1}::_FUN(void*) () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#18 0x00007fffe8085929 in arrow::ConcreteFutureImpl::~ConcreteFutureImpl() ()
from /home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#19 0x00007fffe811da3a in arrow::io::internal::ReadRangeCache::Impl::~Impl() ()
from /home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#20 0x00007fffe6643eda in
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libparquet.so.900
#21 0x00007fffe66ec05e in parquet::ParquetFileReader::~ParquetFileReader() ()
from /home/inc/.local/lib/python3.9/site-packages/pyarrow/libparquet.so.900
#22 0x00007fffe676b98a in parquet::arrow::(anonymous
namespace)::FileReaderImpl::~FileReaderImpl() () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libparquet.so.900
#23 0x00007fffe6643eda in
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libparquet.so.900
#24 0x00007fffe67c950a in
std::_Function_handler<arrow::Future<std::function<arrow::Future<std::shared_ptr<arrow::RecordBatch>
> ()> > (), parquet::arrow::RowGroupGenerator>::_M_manager(std::_Any_data&,
std::_Any_data const&, std::_Manager_operation) ()
from /home/inc/.local/lib/python3.9/site-packages/pyarrow/libparquet.so.900
#25 0x00007fffe67bbe8e in
std::_Sp_counted_ptr_inplace<arrow::MergedGenerator<std::shared_ptr<arrow::RecordBatch>
>::State,
std::allocator<arrow::MergedGenerator<std::shared_ptr<arrow::RecordBatch>
>::State>, (__gnu_cxx::_Lock_policy)2>::_M_dispose() ()
from /home/inc/.local/lib/python3.9/site-packages/pyarrow/libparquet.so.900
#26 0x00007fffe6643eda in
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libparquet.so.900
#27 0x00007fffe67ba122 in
std::_Function_handler<arrow::Future<std::shared_ptr<arrow::RecordBatch> > (),
arrow::MergedGenerator<std::shared_ptr<arrow::RecordBatch> >
>::_M_manager(std::_Any_data&, std::_Any_data const&, std::_Manager_operation)
()
from /home/inc/.local/lib/python3.9/site-packages/pyarrow/libparquet.so.900
#28 0x00007fffdf126209 in
std::_Sp_counted_ptr_inplace<arrow::dataset::SlicingGenerator::State,
std::allocator<arrow::dataset::SlicingGenerator::State>,
(__gnu_cxx::_Lock_policy)2>::_M_dispose() () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#29 0x00007fffdf26493a in
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/_dataset.cpython-39-x86_64-linux-gnu.so
#30 0x00007fffdf127392 in
std::_Function_handler<arrow::Future<std::shared_ptr<arrow::RecordBatch> > (),
arrow::dataset::SlicingGenerator>::_M_manager(std::_Any_data&, std::_Any_data
const&, std::_Manager_operation) () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#31 0x00007fffdf11b56a in
std::_Sp_counted_ptr_inplace<arrow::SerialReadaheadGenerator<std::shared_ptr<arrow::RecordBatch>
>::State,
std::allocator<arrow::SerialReadaheadGenerator<std::shared_ptr<arrow::RecordBatch>
>::State>, (__gnu_cxx::_Lock_policy)2>::_M_dispose() ()
from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#32 0x00007fffdf26493a in
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/_dataset.cpython-39-x86_64-linux-gnu.so
#33 0x00007fffdf126732 in
std::_Function_handler<arrow::Future<std::shared_ptr<arrow::RecordBatch> > (),
arrow::SerialReadaheadGenerator<std::shared_ptr<arrow::RecordBatch> >
>::_M_manager(std::_Any_data&, std::_Any_data const&, std::_Manager_operation)
()
from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#34 0x00007fffdf128ae6 in
arrow::Future<std::function<arrow::Future<std::shared_ptr<arrow::RecordBatch> >
()>
>::SetResult(arrow::Result<std::function<arrow::Future<std::shared_ptr<arrow::RecordBatch>
> ()> >)::\{lambda(void*)#1}::_FUN(void*) ()
from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#35 0x00007fffe8085929 in arrow::ConcreteFutureImpl::~ConcreteFutureImpl() ()
from /home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#36 0x00007fffdf26493a in
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/_dataset.cpython-39-x86_64-linux-gnu.so
#37 0x00007fffdf26493a in
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/_dataset.cpython-39-x86_64-linux-gnu.so
#38 0x00007fffdf1267e2 in
std::_Function_handler<arrow::Future<std::shared_ptr<arrow::RecordBatch> > (),
arrow::FutureFirstGenerator<std::shared_ptr<arrow::RecordBatch> >
>::_M_manager(std::_Any_data&, std::_Any_data const&, std::_Manager_operation)
()
from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#39 0x00007fffdf1261d9 in
std::_Sp_counted_ptr_inplace<arrow::DefaultIfEmptyGenerator<std::shared_ptr<arrow::RecordBatch>
>::State,
std::allocator<arrow::DefaultIfEmptyGenerator<std::shared_ptr<arrow::RecordBatch>
>::State>, (__gnu_cxx::_Lock_policy)2>::_M_dispose() ()
from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#40 0x00007fffdf26493a in
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/_dataset.cpython-39-x86_64-linux-gnu.so
#41 0x00007fffdf127232 in
std::_Function_handler<arrow::Future<std::shared_ptr<arrow::RecordBatch> > (),
arrow::DefaultIfEmptyGenerator<std::shared_ptr<arrow::RecordBatch> >
>::_M_manager(std::_Any_data&, std::_Any_data const&, std::_Manager_operation)
()
from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#42 0x00007fffdf1261a9 in
std::_Sp_counted_ptr_inplace<arrow::EnumeratingGenerator<std::shared_ptr<arrow::RecordBatch>
>::State,
std::allocator<arrow::EnumeratingGenerator<std::shared_ptr<arrow::RecordBatch>
>::State>, (__gnu_cxx::_Lock_policy)2>::_M_dispose() ()
from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#43 0x00007fffdf26493a in
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/_dataset.cpython-39-x86_64-linux-gnu.so
#44 0x00007fffdf127182 in
std::_Function_handler<arrow::Future<arrow::Enumerated<std::shared_ptr<arrow::RecordBatch>
> > (), arrow::EnumeratingGenerator<std::shared_ptr<arrow::RecordBatch> >
>::_M_manager(std::_Any_data&, std::_Any_data const&, std::_Manager_operation)
()
from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#45 0x00007fffdf128a26 in
arrow::Future<std::function<arrow::Future<arrow::Enumerated<std::shared_ptr<arrow::RecordBatch>
> > ()>
>::SetResult(arrow::Result<std::function<arrow::Future<arrow::Enumerated<std::shared_ptr<arrow::RecordBatch>
> > ()> >)::\{lambda(void*)#1}::_FUN(void*) ()
from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#46 0x00007fffe8085929 in arrow::ConcreteFutureImpl::~ConcreteFutureImpl() ()
from /home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#47 0x00007fffdf26493a in
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/_dataset.cpython-39-x86_64-linux-gnu.so
#48 0x00007fffdf26493a in
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/_dataset.cpython-39-x86_64-linux-gnu.so
#49 0x00007fffdf1270d2 in
std::_Function_handler<arrow::Future<arrow::Enumerated<std::shared_ptr<arrow::RecordBatch>
> > (),
arrow::FutureFirstGenerator<arrow::Enumerated<std::shared_ptr<arrow::RecordBatch>
> > >::_M_manager(std::_Any_data&, std::_Any_data const&,
std::_Manager_operation) ()
from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#50 0x00007fffdf14a6dd in
std::_Sp_counted_ptr_inplace<arrow::MappingGenerator<arrow::Enumerated<std::shared_ptr<arrow::RecordBatch>
>, arrow::dataset::EnumeratedRecordBatch>::State,
std::allocator<arrow::MappingGenerator<arrow::Enumerated<std::shared_ptr<arrow::RecordBatch>
>, arrow::dataset::EnumeratedRecordBatch>::State>,
(__gnu_cxx::_Lock_policy)2>::_M_dispose() () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#51 0x00007fffdf26493a in
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/_dataset.cpython-39-x86_64-linux-gnu.so
#52 0x00007fffdf127022 in
std::_Function_handler<arrow::Future<arrow::dataset::EnumeratedRecordBatch> (),
arrow::MappingGenerator<arrow::Enumerated<std::shared_ptr<arrow::RecordBatch>
>, arrow::dataset::EnumeratedRecordBatch> >::_M_manager(std::_Any_data&,
std::_Any_data const&, std::_Manager_operation) ()
from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#53 0x00007fffdf12cd1e in
std::_Sp_counted_ptr_inplace<arrow::MergedGenerator<arrow::dataset::EnumeratedRecordBatch>::State,
std::allocator<arrow::MergedGenerator<arrow::dataset::EnumeratedRecordBatch>::State>,
(__gnu_cxx::_Lock_policy)2>::_M_dispose() ()
from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#54 0x00007fffdf26493a in
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/_dataset.cpython-39-x86_64-linux-gnu.so
#55 0x00007fffdf1269f2 in
std::_Function_handler<arrow::Future<arrow::dataset::EnumeratedRecordBatch> (),
arrow::MergedGenerator<arrow::dataset::EnumeratedRecordBatch>
>::_M_manager(std::_Any_data&, std::_Any_data const&, std::_Manager_operation)
()
from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#56 0x00007fffdf14a5e2 in
std::_Sp_counted_ptr_inplace<arrow::ReadaheadGenerator<arrow::dataset::EnumeratedRecordBatch>::State,
std::allocator<arrow::ReadaheadGenerator<arrow::dataset::EnumeratedRecordBatch>::State>,
(__gnu_cxx::_Lock_policy)2>::_M_dispose() ()
from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#57 0x00007fffdf26493a in
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/_dataset.cpython-39-x86_64-linux-gnu.so
#58 0x00007fffdf126942 in
std::_Function_handler<arrow::Future<arrow::dataset::EnumeratedRecordBatch> (),
arrow::ReadaheadGenerator<arrow::dataset::EnumeratedRecordBatch>
>::_M_manager(std::_Any_data&, std::_Any_data const&, std::_Manager_operation)
()
from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#59 0x00007fffdf12ba28 in
std::_Sp_counted_ptr_inplace<arrow::MappingGenerator<arrow::dataset::EnumeratedRecordBatch,
nonstd::optional_lite::optional<arrow::compute::ExecBatch> >::State,
std::allocator<arrow::MappingGenerator<arrow::dataset::EnumeratedRecordBatch,
nonstd::optional_lite::optional<arrow::compute::ExecBatch> >::State>,
(__gnu_cxx::_Lock_policy)2>::_M_dispose() () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#60 0x00007fffdf26493a in
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/_dataset.cpython-39-x86_64-linux-gnu.so
#61 0x00007fffdf126892 in
std::_Function_handler<arrow::Future<nonstd::optional_lite::optional<arrow::compute::ExecBatch>
> (), arrow::MappingGenerator<arrow::dataset::EnumeratedRecordBatch,
nonstd::optional_lite::optional<arrow::compute::ExecBatch> >
>::_M_manager(std::_Any_data&, std::_Any_data const&, std::_Manager_operation)
() from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#62 0x00007fffe7e805db in arrow::compute::(anonymous
namespace)::SourceNode::~SourceNode() () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#63 0x00007fffe7f355e3 in arrow::compute::(anonymous
namespace)::ExecPlanImpl::~ExecPlanImpl() () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#64 0x00007fffe7f35a52 in std::_Sp_counted_ptr<arrow::compute::(anonymous
namespace)::ExecPlanImpl*, (__gnu_cxx::_Lock_policy)2>::_M_dispose() () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#65 0x00007fffdf0f0a32 in arrow::internal::FnOnce<void (arrow::FutureImpl
const&)>::FnImpl<arrow::Future<arrow::internal::Empty>::WrapStatusyOnComplete::Callback<arrow::dataset::(anonymous
namespace)::AsyncScanner::ScanBatchesUnorderedAsync(arrow::internal::Executor*,
bool, bool)::\{lambda(...)#1}::operator()(...)
const::\{lambda()#1}::operator()() const::\{lambda(arrow::Status const&)#1}>
>::~FnImpl() () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#66 0x00007fffe808a416 in
arrow::ConcreteFutureImpl::DoMarkFinishedOrFailed(arrow::FutureState) () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#67 0x00007fffe75b33ff in void
arrow::Future<arrow::internal::Empty>::MarkFinished<arrow::internal::Empty,
void>(arrow::Status) () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#68 0x00007fffe7f16f25 in arrow::internal::FnOnce<void (arrow::FutureImpl
const&)>::FnImpl<arrow::Future<arrow::internal::Empty>::WrapStatusyOnComplete::Callback<arrow::compute::(anonymous
namespace)::ExecPlanImpl::EndTaskGroup()::\{lambda(arrow::Status const&)#1}>
>::invoke(arrow::FutureImpl const&) ()
from /home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#69 0x00007fffe808a408 in
arrow::ConcreteFutureImpl::DoMarkFinishedOrFailed(arrow::FutureState) () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#70 0x00007fffe75b33ff in void
arrow::Future<arrow::internal::Empty>::MarkFinished<arrow::internal::Empty,
void>(arrow::Status) () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#71 0x00007fffe80cf46b in arrow::internal::FnOnce<void (arrow::FutureImpl
const&)>::FnImpl<arrow::Future<arrow::internal::Empty>::WrapStatusyOnComplete::Callback<arrow::util::AsyncTaskGroup::AddTaskUnlocked(arrow::Future<arrow::internal::Empty>
const&, arrow::util::Mutex::Guard)::\{lambda(arrow::Status const&)#1}>
>::invoke(arrow::FutureImpl const&) () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#72 0x00007fffe808a408 in
arrow::ConcreteFutureImpl::DoMarkFinishedOrFailed(arrow::FutureState) () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#73 0x00007fffe75b33ff in void
arrow::Future<arrow::internal::Empty>::MarkFinished<arrow::internal::Empty,
void>(arrow::Status) () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#74 0x00007fffe7f3a5cd in arrow::internal::FnOnce<void
()>::FnImpl<std::_Bind<arrow::detail::ContinueFuture
(arrow::Future<arrow::internal::Empty>, std::function<arrow::Status ()>)>
>::invoke() () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#75 0x00007fffe8066e7b in
std::thread::_State_impl<std::thread::_Invoker<std::tuple<arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::\{lambda()#1}>
> >::_M_run() () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#76 0x00007fffe8f7d5f0 in execute_native_thread_routine () from
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#77 0x00007ffff7a803f9 in start_thread () from /lib64/libpthread.so.0
#78 0x00007ffff7b9ab53 in clone () from /lib64/libc.so.6
--
This message was sent by Atlassian Jira
(v8.20.10#820010)