[
https://issues.apache.org/jira/browse/ARROW-7884?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
ASF GitHub Bot updated ARROW-7884:
----------------------------------
Labels: pull-request-available (was: )
> [C++][Python] Crash in pq.read_table()
> --------------------------------------
>
> Key: ARROW-7884
> URL: https://issues.apache.org/jira/browse/ARROW-7884
> Project: Apache Arrow
> Issue Type: Bug
> Components: C++, Python
> Reporter: Antoine Pitrou
> Assignee: Antoine Pitrou
> Priority: Major
> Labels: pull-request-available
>
> The following crashes:
> {code:python}
> >>> import pyarrow.parquet as pq
> >>>
> >>>
> >>> tab =
> >>> pq.read_table("../cpp/submodules/parquet-testing/data/nation.dict-malformed.parquet")
> {code}
> Here is the backtrace:
> {code}
> #0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
> #1 0x00007ffff7805801 in __GI_abort () at abort.c:79
> #2 0x00007fffb8a18e42 in arrow::util::CerrLog::~CerrLog
> (this=0x7fff84001690, __in_chrg=<optimized out>) at
> ../src/arrow/util/logging.cc:50
> #3 0x00007fffb8a18e5e in arrow::util::CerrLog::~CerrLog
> (this=0x7fff84001690, __in_chrg=<optimized out>) at
> ../src/arrow/util/logging.cc:52
> #4 0x00007fffb8a18c9f in arrow::util::ArrowLog::~ArrowLog
> (this=0x7fffaeffbf60, __in_chrg=<optimized out>) at
> ../src/arrow/util/logging.cc:228
> #5 0x00007fffb89e1607 in
> arrow::io::internal::SharedExclusiveChecker::LockExclusive
> (this=0x555555db1338) at ../src/arrow/io/interfaces.cc:287
> #6 0x00007fffb89b0b10 in
> arrow::io::internal::ExclusiveLockGuard<arrow::io::internal::SharedExclusiveChecker>::ExclusiveLockGuard
> (this=0x7fffaeffbff8,
> lock=0x555555db1338) at ../src/arrow/io/concurrency.h:47
> #7 0x00007fffb89ad20f in
> arrow::io::internal::SharedExclusiveChecker::exclusive_guard
> (this=0x555555db1338) at ../src/arrow/io/concurrency.h:74
> #8 0x00007fffb89cb74b in
> arrow::io::internal::RandomAccessFileConcurrencyWrapper<arrow::io::ReadableFile>::GetSize
> (this=0x555555db1320)
> at ../src/arrow/io/concurrency.h:200
> #9 0x00007fffb4ca61f3 in parquet::SerializedRowGroup::GetColumnPageReader
> (this=0x7fff840013e0, i=2) at ../src/parquet/file_reader.cc:117
> #10 0x00007fffb4ca2b0e in parquet::RowGroupReader::GetColumnPageReader
> (this=0x7fff840014b0, i=2) at ../src/parquet/file_reader.cc:75
> #11 0x00007fffb4b03296 in parquet::arrow::FileColumnIterator::NextChunk
> (this=0x7fff84000c10) at ../src/parquet/arrow/reader_internal.h:81
> #12 0x00007fffb4b06ccc in parquet::arrow::LeafReader::NextRowGroup
> (this=0x7fff84000ef0) at ../src/parquet/arrow/reader.cc:452
> #13 0x00007fffb4b0677e in parquet::arrow::LeafReader::LeafReader
> (this=0x7fff84000ef0, ctx=std::shared_ptr<parquet::arrow::ReaderContext>
> (empty) = {...},
> field=std::shared_ptr<arrow::Field> (empty) = {...},
> input=std::unique_ptr<parquet::arrow::FileColumnIterator> = {...}) at
> ../src/parquet/arrow/reader.cc:407
> #14 0x00007fffb4afbdac in parquet::arrow::GetReader (field=...,
> ctx=std::shared_ptr<parquet::arrow::ReaderContext> (use count 2, weak count
> 0) = {...},
> out=0x7fffaeffc580) at ../src/parquet/arrow/reader.cc:709
> #15 0x00007fffb4b0425a in parquet::arrow::FileReaderImpl::GetFieldReader
> (this=0x555555dbf480, i=2,
> included_leaves=std::shared_ptr<std::unordered_set<int, std::hash<int>,
> std::equal_to<int>, std::allocator<int> >> (use count 5, weak count 0) =
> {...},
> row_groups=std::vector of length 1, capacity 1 = {...},
> out=0x7fffaeffc580) at ../src/parquet/arrow/reader.cc:173
> #16 0x00007fffb4b04451 in parquet::arrow::FileReaderImpl::ReadSchemaField
> (this=0x555555dbf480, i=2,
> included_leaves=std::shared_ptr<std::unordered_set<int, std::hash<int>,
> std::equal_to<int>, std::allocator<int> >> (use count 5, weak count 0) =
> {...},
> row_groups=std::vector of length 1, capacity 1 = {...},
> out_field=0x555555dce870, out=0x555555dce790) at
> ../src/parquet/arrow/reader.cc:186
> #17 0x00007fffb4afcf7f in
> parquet::arrow::FileReaderImpl::<lambda(int)>::operator()(int) const
> (__closure=0x555555dd4e08, i=2) at ../src/parquet/arrow/reader.cc:810
> #18 0x00007fffb4b01151 in std::__invoke_impl<arrow::Status,
> parquet::arrow::FileReaderImpl::ReadRowGroups(const std::vector<int>&, const
> std::vector<int>&, std::shared_ptr<arrow::Table>*)::<lambda(int)>&,
> int&>(std::__invoke_other, parquet::arrow::FileReaderImpl::<lambda(int)> &,
> int &) (__f=..., __args#0=@0x555555dd4e38: 2)
> at /usr/include/c++/7/bits/invoke.h:60
> #19 0x00007fffb4b010dc in
> std::__invoke<parquet::arrow::FileReaderImpl::ReadRowGroups(const
> std::vector<int>&, const std::vector<int>&,
> std::shared_ptr<arrow::Table>*)::<lambda(int)>&,
> int&>(parquet::arrow::FileReaderImpl::<lambda(int)> &, int &) (__fn=...,
> __args#0=@0x555555dd4e38: 2) at /usr/include/c++/7/bits/invoke.h:96
> #20 0x00007fffb4b00fe1 in
> std::_Bind<parquet::arrow::FileReaderImpl::ReadRowGroups(const
> std::vector<int>&, const std::vector<int>&,
> std::shared_ptr<arrow::Table>*)::<lambda(int)>(int)>::__call<arrow::Status,
> 0>(std::tuple<> &&, std::_Index_tuple<0>) (this=0x555555dd4e08, __args=...)
> at /usr/include/c++/7/functional:469
> #21 0x00007fffb4b00b0d in
> std::_Bind<parquet::arrow::FileReaderImpl::ReadRowGroups(const
> std::vector<int>&, const std::vector<int>&,
> std::shared_ptr<arrow::Table>*)::<lambda(int)>(int)>::operator()<>(void)
> (this=0x555555dd4e08) at /usr/include/c++/7/functional:551
> #22 0x00007fffb4b00742 in std::__invoke_impl<arrow::Status,
> std::_Bind<parquet::arrow::FileReaderImpl::ReadRowGroups(const
> std::vector<int>&, const std::vector<int>&,
> std::shared_ptr<arrow::Table>*)::<lambda(int)>(int)>&>(std::__invoke_other,
> std::_Bind<parquet::arrow::FileReaderImpl::ReadRowGroups(const
> std::vector<int>&, const std::vector<int>&,
> std::shared_ptr<arrow::Table>*)::<lambda(int)>(int)> &) (__f=...) at
> /usr/include/c++/7/bits/invoke.h:60
> #23 0x00007fffb4b004c5 in
> std::__invoke<std::_Bind<parquet::arrow::FileReaderImpl::ReadRowGroups(const
> std::vector<int>&, const std::vector<int>&,
> std::shared_ptr<arrow::Table>*)::<lambda(int)>(int)>&>(std::_Bind<parquet::arrow::FileReaderImpl::ReadRowGroups(const
> std::vector<int>&, const std::vector<int>&,
> std::shared_ptr<arrow::Table>*)::<lambda(int)>(int)> &) (__fn=...) at
> /usr/include/c++/7/bits/invoke.h:96
> #24 0x00007fffb4b001bb in
> std::__future_base::_Task_state<std::_Bind<parquet::arrow::FileReaderImpl::ReadRowGroups(const
> std::vector<int>&, const std::vector<int>&,
> std::shared_ptr<arrow::Table>*)::<lambda(int)>(int)>, std::allocator<int>,
> arrow::Status()>::<lambda()>::operator()(void) const
> (__closure=0x7fffaeffcc28)
> at /usr/include/c++/7/future:1421
> #25 0x00007fffb4b00bf9 in
> std::__future_base::_Task_setter<std::unique_ptr<std::__future_base::_Result<arrow::Status>,
> std::__future_base::_Result_base::_Deleter>,
> std::__future_base::_Task_state<_Fn, _Alloc, _Res(_Args ...)>::_M_run(_Args&&
> ...) [with _Fn =
> std::_Bind<parquet::arrow::FileReaderImpl::ReadRowGroups(const
> std::vector<int>&, const std::vector<int>&,
> std::shared_ptr<arrow::Table>*)::<lambda(int)>(int)>; _Alloc =
> std::allocator<int>; _Res = arrow::Status; _Args = {}]::<lambda()>,
> arrow::Status>::operator()(void) const (this=0x7fffaeffcc30) at
> /usr/include/c++/7/future:1339
> #26 0x00007fffb4b0080a in
> std::_Function_handler<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter>(),
> std::__future_base::_Task_setter<std::unique_ptr<std::__future_base::_Result<arrow::Status>,
> std::__future_base::_Result_base::_Deleter>,
> std::__future_base::_Task_state<_Fn, _Alloc, _Res(_Args ...)>::_M_run(_Args&&
> ...) [with _Fn =
> std::_Bind<parquet::arrow::FileReaderImpl::ReadRowGroups(const
> std::vector<int>&, const std::vector<int>&,
> std::shared_ptr<arrow::Table>*)::<lambda(int)>(int)>; _Alloc =
> std::allocator<int>; _Res = arrow::Status; _Args = {}]::<lambda()>,
> arrow::Status> >::_M_invoke(const std::_Any_data &) (
> __functor=...) at /usr/include/c++/7/bits/std_function.h:302
> #27 0x00007fffb4b0861e in
> std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>::operator()() const (
> this=0x7fffaeffcc30) at /usr/include/c++/7/bits/std_function.h:706
> #28 0x00007fffb4b028e6 in
> std::__future_base::_State_baseV2::_M_do_set(std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*, bool*)
> (this=0x555555dd4de0, __f=0x7fffaeffcc30, __did_set=0x7fffaeffcb9f) at
> /usr/include/c++/7/future:561
> #29 0x00007fffb4b126c5 in std::__invoke_impl<void, void
> (std::__future_base::_State_baseV2::*)(std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*, bool*),
> std::__future_base::_State_baseV2*,
> std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*,
> bool*>(std::__invoke_memfun_deref, void
> (std::__future_base::_State_baseV2::*&&)(std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*, bool*),
> std::__future_base::_State_baseV2*&&,
> std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*&&, bool*&&) (__f=
> @0x7fffaeffcbe0: (void
> (std::__future_base::_State_baseV2::*)(std::__future_base::_State_baseV2 *
> const, std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter>()> *, bool *)) 0x7fffb4b028b0
> <std::__future_base::_State_baseV2::_M_do_set(std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*, bool*)>,
> __t=@0x7fffaeffcbb0: 0x555555dd4de0, __args#0=@0x7fffaeffcbb8:
> 0x7fffaeffcc30,
> ---Type <return> to continue, or q <return> to quit---
> __args#1=@0x7fffaeffcbc0: 0x7fffaeffcb9f) at
> /usr/include/c++/7/bits/invoke.h:73
> #30 0x00007fffb4b0c9f2 in std::__invoke<void
> (std::__future_base::_State_baseV2::*)(std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*, bool*),
> std::__future_base::_State_baseV2*,
> std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*, bool*>(void
> (std::__future_base::_State_baseV2::*&&)(std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*, bool*),
> std::__future_base::_State_baseV2*&&,
> std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*&&, bool*&&) (__fn=
> @0x7fffaeffcbe0: (void
> (std::__future_base::_State_baseV2::*)(std::__future_base::_State_baseV2 *
> const, std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter>()> *, bool *)) 0x7fffb4b028b0
> <std::__future_base::_State_baseV2::_M_do_set(std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*, bool*)>,
> __args#0=@0x7fffaeffcbb0: 0x555555dd4de0, __args#1=@0x7fffaeffcbb8:
> 0x7fffaeffcc30,
> __args#2=@0x7fffaeffcbc0: 0x7fffaeffcb9f) at
> /usr/include/c++/7/bits/invoke.h:95
> #31 0x00007fffb4b0819e in void std::call_once<void
> (std::__future_base::_State_baseV2::*)(std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*, bool*),
> std::__future_base::_State_baseV2*,
> std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*, bool*>(std::once_flag&,
> void
> (std::__future_base::_State_baseV2::*&&)(std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*, bool*),
> std::__future_base::_State_baseV2*&&,
> std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*&&,
> bool*&&)::{lambda()#1}::operator()() const (__closure=0x7fffaeffcb20) at
> /usr/include/c++/7/mutex:672
> #32 0x00007fffb4b08209 in void std::call_once<void
> (std::__future_base::_State_baseV2::*)(std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*, bool*),
> std::__future_base::_State_baseV2*,
> std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*, bool*>(std::once_flag&,
> void
> (std::__future_base::_State_baseV2::*&&)(std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*, bool*),
> std::__future_base::_State_baseV2*&&,
> std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*&&,
> bool*&&)::{lambda()#2}::operator()() const (__closure=0x0) at
> /usr/include/c++/7/mutex:677
> #33 0x00007fffb4b0821a in void std::call_once<void
> (std::__future_base::_State_baseV2::*)(std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*, bool*),
> std::__future_base::_State_baseV2*,
> std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*, bool*>(std::once_flag&,
> void
> (std::__future_base::_State_baseV2::*&&)(std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*, bool*),
> std::__future_base::_State_baseV2*&&,
> std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*&&,
> bool*&&)::{lambda()#2}::_FUN() () at /usr/include/c++/7/mutex:677
> #34 0x00007ffff7bc5827 in __pthread_once_slow (once_control=0x555555dd4df8,
> init_routine=0x7fffb6d52782 <std::__once_proxy()>) at pthread_once.c:116
> #35 0x00007fffb4afa2fa in __gthread_once (__once=0x555555dd4df8,
> __func=0x7fffb6d52782 <std::__once_proxy()>)
> at /usr/include/x86_64-linux-gnu/c++/7/bits/gthr-default.h:699
> #36 0x00007fffb4b082d3 in std::call_once<void
> (std::__future_base::_State_baseV2::*)(std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*, bool*),
> std::__future_base::_State_baseV2*,
> std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*, bool*>(std::once_flag&,
> void
> (std::__future_base::_State_baseV2::*&&)(std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*, bool*),
> std::__future_base::_State_baseV2*&&,
> std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*&&, bool*&&) (__once=..., __f=
> @0x7fffaeffcbe0: (void
> (std::__future_base::_State_baseV2::*)(std::__future_base::_State_baseV2 *
> const, std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter>()> *, bool *)) 0x7fffb4b028b0
> <std::__future_base::_State_baseV2::_M_do_set(std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>*, bool*)>,
> __args#0=@0x7fffaeffcbb0: 0x555555dd4de0, __args#1=@0x7fffaeffcbb8:
> 0x7fffaeffcc30,
> __args#2=@0x7fffaeffcbc0: 0x7fffaeffcb9f) at /usr/include/c++/7/mutex:684
> #37 0x00007fffb4b02488 in
> std::__future_base::_State_baseV2::_M_set_result(std::function<std::unique_ptr<std::__future_base::_Result_base,
> std::__future_base::_Result_base::_Deleter> ()>, bool) (this=0x555555dd4de0,
> __res=..., __ignore_failure=false) at /usr/include/c++/7/future:401
> #38 0x00007fffb4b00241 in
> std::__future_base::_Task_state<std::_Bind<parquet::arrow::FileReaderImpl::ReadRowGroups(const
> std::vector<int>&, const std::vector<int>&,
> std::shared_ptr<arrow::Table>*)::<lambda(int)>(int)>, std::allocator<int>,
> arrow::Status()>::_M_run(void) (this=0x555555dd4de0) at
> /usr/include/c++/7/future:1423
> #39 0x00007fffb708834d in std::packaged_task<arrow::Status ()>::operator()()
> (this=0x555555dd4e90) at /usr/include/c++/7/future:1556
> #40 0x00007fffb7080078 in
> arrow::internal::detail::packaged_task_wrapper<arrow::Status>::operator()
> (this=0x555555dd4eb0) at ../src/arrow/util/thread_pool.h:70
> #41 0x00007fffb4b15d78 in std::_Function_handler<void (),
> arrow::internal::detail::packaged_task_wrapper<arrow::Status>
> >::_M_invoke(std::_Any_data const&) (
> __functor=...) at /usr/include/c++/7/bits/std_function.h:316
> #42 0x00007fffb8a2c73e in std::function<void ()>::operator()() const
> (this=0x7fffaeffcd50) at /usr/include/c++/7/bits/std_function.h:706
> #43 0x00007fffb8a2a644 in arrow::internal::WorkerLoop
> (state=std::shared_ptr<arrow::internal::ThreadPool::State> (use count 49,
> weak count 0) = {...}, it={
> _M_id = {
> _M_thread = 140736129390336
> }
> }) at ../src/arrow/util/thread_pool.cc:88
> #44 0x00007fffb8a2b032 in
> arrow::internal::ThreadPool::<lambda()>::operator()(void) const
> (__closure=0x555555dd2578) at ../src/arrow/util/thread_pool.cc:225
> #45 0x00007fffb8a2bc93 in std::__invoke_impl<void,
> arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::<lambda()>
> >(std::__invoke_other, arrow::internal::ThreadPool::<lambda()> &&) (__f=...)
> at /usr/include/c++/7/bits/invoke.h:60
> #46 0x00007fffb8a2b9d4 in
> std::__invoke<arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::<lambda()>
> >(arrow::internal::ThreadPool::<lambda()> &&) (__fn=...)
> at /usr/include/c++/7/bits/invoke.h:95
> #47 0x00007fffb8a2bec6 in
> std::thread::_Invoker<std::tuple<arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::<lambda()>
> > >::_M_invoke<0>(std::_Index_tuple<0>)
> (this=0x555555dd2578) at /usr/include/c++/7/thread:234
> #48 0x00007fffb8a2be82 in
> std::thread::_Invoker<std::tuple<arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::<lambda()>
> > >::operator()(void) (
> this=0x555555dd2578) at /usr/include/c++/7/thread:243
> #49 0x00007fffb8a2be52 in
> std::thread::_State_impl<std::thread::_Invoker<std::tuple<arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::<lambda()>
> > > >::_M_run(void) (this=0x555555dd2570) at /usr/include/c++/7/thread:186
> #50 0x00007fffb6d53163 in std::execute_native_thread_routine
> (__p=0x555555dd2570)
> at
> /home/conda/feedstock_root/build_artifacts/ctng-compilers_1578638331887/work/.build/x86_64-conda_cos6-linux-gnu/src/gcc/libstdc++-v3/src/c++11/thread.cc:80
> #51 0x00007ffff7bbd6db in start_thread (arg=0x7fffaeffd700) at
> pthread_create.c:463
> ---Type <return> to continue, or q <return> to quit---
> #52 0x00007ffff78e688f in clone () at
> ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
> {code}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)