[ 
https://issues.apache.org/jira/browse/ARROW-7873?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17039981#comment-17039981
 ] 

Matt Calder commented on ARROW-7873:
------------------------------------

I rebuilt pyarrow with debug symbols and now the backtrace has line numbers. 
I'm only pasting the first 28 levels of the stack below. The last point in 
arrow code is:

{code:java}
In arrow/cpp/src/parquet/metadata.cc:792
ApplicationVersion::ApplicationVersion(const std::string& created_by) {
  regex app_regex{ApplicationVersion::APPLICATION_FORMAT};
{code}

Here is the stacktrace:

{code:java}
#0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
#1  0x00007ffff7a24801 in __GI_abort () at abort.c:79
#2  0x00007ffff63c1957 in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#3  0x00007ffff63c7ab6 in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#4  0x00007ffff63c7af1 in std::terminate() () from 
/usr/lib/x86_64-linux-gnu/libstdc++.so.6
#5  0x00007ffff63c7d24 in __cxa_throw () from 
/usr/lib/x86_64-linux-gnu/libstdc++.so.6
#6  0x00007ffff63c6a52 in __cxa_bad_cast () from 
/usr/lib/x86_64-linux-gnu/libstdc++.so.6
#7  0x00007ffff64131ec in std::__cxx11::collate<char> const& 
std::use_facet<std::__cxx11::collate<char> >(std::locale const&) () from 
/usr/lib/x86_64-linux-gnu/libstdc++.so.6
#8  0x00007fffb7bedd4e in std::__cxx11::regex_traits<char>::transform<char*> 
(this=0x12c7570, __first=0x10e36c0 "", __last=0x10e36c1 "\203\024\001") at 
/usr/include/c++/7/bits/regex.h:233
#9  0x00007fffb7beb6b7 in 
std::__cxx11::regex_traits<char>::transform_primary<char const*> 
(this=0x12c7570, __first=0x7fffffffacb8 "", __last=0x7fffffffacb9 "") at 
/usr/include/c++/7/bits/regex.h:266
#10 0x00007fffb7be6c14 in 
std::__detail::_BracketMatcher<std::__cxx11::regex_traits<char>, false, 
false>::_M_apply(char, std::integral_constant<bool, false>) 
const::{lambda()#1}::operator()() const (
    __closure=0x7fffffffacb0) at /usr/include/c++/7/bits/regex_compiler.tcc:626
#11 0x00007fffb7be6da7 in 
std::__detail::_BracketMatcher<std::__cxx11::regex_traits<char>, false, 
false>::_M_apply (this=0x7fffffffae10, __ch=0 '\000')
    at /usr/include/c++/7/bits/regex_compiler.tcc:634
#12 0x00007fffb7be21be in 
std::__detail::_BracketMatcher<std::__cxx11::regex_traits<char>, false, 
false>::_M_make_cache (this=0x7fffffffae10) at 
/usr/include/c++/7/bits/regex_compiler.h:556
#13 0x00007fffb7bddeb5 in 
std::__detail::_BracketMatcher<std::__cxx11::regex_traits<char>, false, 
false>::_M_ready (this=0x7fffffffae10) at 
/usr/include/c++/7/bits/regex_compiler.h:525
#14 0x00007fffb7bda724 in 
std::__detail::_Compiler<std::__cxx11::regex_traits<char> 
>::_M_insert_character_class_matcher<false, false> (this=0x7fffffffb250)
    at /usr/include/c++/7/bits/regex_compiler.tcc:414
#15 0x00007fffb7bd6687 in 
std::__detail::_Compiler<std::__cxx11::regex_traits<char> >::_M_atom 
(this=0x7fffffffb250) at /usr/include/c++/7/bits/regex_compiler.tcc:327
#16 0x00007fffb7bd3775 in 
std::__detail::_Compiler<std::__cxx11::regex_traits<char> >::_M_term 
(this=0x7fffffffb250) at /usr/include/c++/7/bits/regex_compiler.tcc:139
#17 0x00007fffb7bd0c36 in 
std::__detail::_Compiler<std::__cxx11::regex_traits<char> >::_M_alternative 
(this=0x7fffffffb250) at /usr/include/c++/7/bits/regex_compiler.tcc:121
#18 0x00007fffb7bd0c59 in 
std::__detail::_Compiler<std::__cxx11::regex_traits<char> >::_M_alternative 
(this=0x7fffffffb250) at /usr/include/c++/7/bits/regex_compiler.tcc:124
#19 0x00007fffb7bce50e in 
std::__detail::_Compiler<std::__cxx11::regex_traits<char> >::_M_disjunction 
(this=0x7fffffffb250) at /usr/include/c++/7/bits/regex_compiler.tcc:97
#20 0x00007fffb7bcc0f9 in 
std::__detail::_Compiler<std::__cxx11::regex_traits<char> >::_Compiler 
(this=0x7fffffffb250, 
    __b=0x7fffb7c92c70 
"(.*?)\\s*(?:(version\\s*(?:([^(]*?)\\s*(?:\\(\\s*build\\s*([^)]*?)\\s*\\))?)?)?)",
 __e=0x7fffb7c92cb8 "", __loc=..., __flags=(unknown: 16))
    at /usr/include/c++/7/bits/regex_compiler.tcc:82
#21 0x00007fffb7bc98bc in std::__detail::__compile_nfa<char const*, 
std::__cxx11::regex_traits<char> > (
    __first=0x7fffb7c92c70 
"(.*?)\\s*(?:(version\\s*(?:([^(]*?)\\s*(?:\\(\\s*build\\s*([^)]*?)\\s*\\))?)?)?)",
 __last=0x7fffb7c92cb8 "", __loc=..., __flags=(unknown: 16))
    at /usr/include/c++/7/bits/regex_compiler.h:203
#22 0x00007fffb7bc62e4 in std::__cxx11::basic_regex<char, 
std::__cxx11::regex_traits<char> >::basic_regex<char const*> 
(this=0x7fffffffb510, 
    __first=0x7fffb7c92c70 
"(.*?)\\s*(?:(version\\s*(?:([^(]*?)\\s*(?:\\(\\s*build\\s*([^)]*?)\\s*\\))?)?)?)",
 __last=0x7fffb7c92cb8 "", __loc=..., __f=(unknown: 16))
    at /usr/include/c++/7/bits/regex.h:767
#23 0x00007fffb7bc1abb in std::__cxx11::basic_regex<char, 
std::__cxx11::regex_traits<char> >::basic_regex<char const*> 
(this=0x7fffffffb510, 
    __first=0x7fffb7c92c70 
"(.*?)\\s*(?:(version\\s*(?:([^(]*?)\\s*(?:\\(\\s*build\\s*([^)]*?)\\s*\\))?)?)?)",
 __last=0x7fffb7c92cb8 "", __f=(unknown: 16)) at 
/usr/include/c++/7/bits/regex.h:512
#24 0x00007fffb7bbcd66 in std::__cxx11::basic_regex<char, 
std::__cxx11::regex_traits<char> >::basic_regex (this=0x7fffffffb510, 
    __p=0x7fffb7c92c70 
"(.*?)\\s*(?:(version\\s*(?:([^(]*?)\\s*(?:\\(\\s*build\\s*([^)]*?)\\s*\\))?)?)?)",
 __f=(unknown: 16)) at /usr/include/c++/7/bits/regex.h:445
#25 0x00007fffb7bb200f in parquet::ApplicationVersion::ApplicationVersion 
(this=0x7fffffffb750, created_by="parquet-cpp version 1.5.1-SNAPSHOT") at 
/repos/arrow/cpp/src/parquet/metadata.cc:792
#26 0x00007fffb7bb6be0 in 
parquet::FileMetaData::FileMetaDataImpl::FileMetaDataImpl (this=0xec6df0, 
metadata=0x7fffbd63d120, metadata_len=0x7fffffffbb04, 
    decryptor=std::shared_ptr<parquet::Decryptor> (empty) = {...}) at 
/repos/arrow/cpp/src/parquet/metadata.cc:462
#27 0x00007fffb7bb1449 in parquet::FileMetaData::FileMetaData (this=0xed8c40, 
metadata=0x7fffbd63d120, metadata_len=0x7fffffffbb04, 
decryptor=std::shared_ptr<parquet::Decryptor> (empty) = {...})
    at /repos/arrow/cpp/src/parquet/metadata.cc:651
{code}


> [Python] Segfault in pandas version 1.0.1, read_parquet after creating a 
> clickhouse odbc connection
> ---------------------------------------------------------------------------------------------------
>
>                 Key: ARROW-7873
>                 URL: https://issues.apache.org/jira/browse/ARROW-7873
>             Project: Apache Arrow
>          Issue Type: Bug
>          Components: Python
>         Environment: Ubuntu 18.04
>            Reporter: Matt Calder
>            Priority: Minor
>             Fix For: 1.0.0, 0.16.1
>
>         Attachments: foo.pkl, foo.pq
>
>
> [I posted this issue to the pandas 
> github|[https://github.com/pandas-dev/pandas/issues/31981]].
> We get a segfault when making a call to pd.read_parquet after having made a 
> connection to clickhouse via odbc. Like so,
> {code:python}
> import pyodbc
> import pandas as pd
> con_str = 
> f"Driver=libclickhouseodbc.so;url=http://clickhouse/query;timeout=600";
> with pyodbc.connect(con_str, autocommit=True) as con:
>     pass
> df = pd.DataFrame({'A': [1,1,1], 'B': ['a', 'b', 'c']})
> df.to_parquet('/tmp/foo.pq')
> # This line core dumps:
> pd.read_parquet('/tmp/foo.pq')
> {code}
> This happens with pandas version 1.0.1 but not with pandas 0.25.3. Here's a 
> stacktrace:
> {code:java}
> #0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
> #1  0x00007ffff7a24801 in __GI_abort () at abort.c:79
> #2  0x00007ffff63c1957 in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
> #3  0x00007ffff63c7ab6 in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
> #4  0x00007ffff63c7af1 in std::terminate() () from 
> /usr/lib/x86_64-linux-gnu/libstdc++.so.6
> #5  0x00007ffff63c7d24 in __cxa_throw () from 
> /usr/lib/x86_64-linux-gnu/libstdc++.so.6
> #6  0x00007ffff63c6a52 in __cxa_bad_cast () from 
> /usr/lib/x86_64-linux-gnu/libstdc++.so.6
> #7  0x00007ffff64131ec in std::__cxx11::collate<char> const& 
> std::use_facet<std::__cxx11::collate<char> >(std::locale const&) () from 
> /usr/lib/x86_64-linux-gnu/libstdc++.so.6
> #8  0x00007fffbe4b8279 in std::__cxx11::basic_string<char, 
> std::char_traits<char>, std::allocator<char> > 
> std::__cxx11::regex_traits<char>::transform_primary<char const*>(char const*, 
> char const*) const () from /usr/local/lib/libparquet.so.100
> #9  0x00007fffbe4bd71c in 
> std::__detail::_BracketMatcher<std::__cxx11::regex_traits<char>, false, 
> false>::_M_ready() () from /usr/local/lib/libparquet.so.100
> #10 0x00007fffbe4bda9e in void 
> std::__detail::_Compiler<std::__cxx11::regex_traits<char> 
> >::_M_insert_character_class_matcher<false, false>() () from 
> /usr/local/lib/libparquet.so.100
> #11 0x00007fffbe4c0569 in 
> std::__detail::_Compiler<std::__cxx11::regex_traits<char> >::_M_atom() () 
> from /usr/local/lib/libparquet.so.100
> #12 0x00007fffbe4c0ad8 in 
> std::__detail::_Compiler<std::__cxx11::regex_traits<char> >::_M_alternative() 
> () from /usr/local/lib/libparquet.so.100
> #13 0x00007fffbe4c0a43 in 
> std::__detail::_Compiler<std::__cxx11::regex_traits<char> >::_M_alternative() 
> () from /usr/local/lib/libparquet.so.100
> #14 0x00007fffbe4c0d1c in 
> std::__detail::_Compiler<std::__cxx11::regex_traits<char> >::_M_disjunction() 
> () from /usr/local/lib/libparquet.so.100
> #15 0x00007fffbe4c1469 in 
> std::__detail::_Compiler<std::__cxx11::regex_traits<char> >::_Compiler(char 
> const*, char const*, std::locale const&, 
> std::regex_constants::syntax_option_type) () from 
> /usr/local/lib/libparquet.so.100
> #16 0x00007fffbe4a93d1 in 
> parquet::ApplicationVersion::ApplicationVersion(std::__cxx11::basic_string<char,
>  std::char_traits<char>, std::allocator<char> > const&) () from 
> /usr/local/lib/libparquet.so.100
> #17 0x00007fffbe4c1c03 in 
> parquet::FileMetaData::FileMetaDataImpl::FileMetaDataImpl(void const*, 
> unsigned int*, std::shared_ptr<parquet::Decryptor> const&) () from 
> /usr/local/lib/libparquet.so.100
> #18 0x00007fffbe4a9e62 in parquet::FileMetaData::FileMetaData(void const*, 
> unsigned int*, std::shared_ptr<parquet::Decryptor> const&) () from 
> /usr/local/lib/libparquet.so.100
> #19 0x00007fffbe4a9ec2 in parquet::FileMetaData::Make(void const*, unsigned 
> int*, std::shared_ptr<parquet::Decryptor> const&) () from 
> /usr/local/lib/libparquet.so.100
> #20 0x00007fffbe48acaf in 
> parquet::SerializedFile::ParseUnencryptedFileMetadata(std::shared_ptr<arrow::Buffer>
>  const&, long, long, std::shared_ptr<arrow::Buffer>*, unsigned int*, unsigned 
> int*) () from /usr/local/lib/libparquet.so.100
> #21 0x00007fffbe492d75 in parquet::SerializedFile::ParseMetaData() () from 
> /usr/local/lib/libparquet.so.100
> #22 0x00007fffbe48d8f8 in 
> parquet::ParquetFileReader::Contents::Open(std::shared_ptr<arrow::io::RandomAccessFile>,
>  parquet::ReaderProperties const&, std::shared_ptr<parquet::FileMetaData>) () 
> from /usr/local/lib/libparquet.so.100
> #23 0x00007fffbe48e598 in 
> parquet::ParquetFileReader::Open(std::shared_ptr<arrow::io::RandomAccessFile>,
>  parquet::ReaderProperties const&, std::shared_ptr<parquet::FileMetaData>) () 
> from /usr/local/lib/libparquet.so.100
> #24 0x00007fffbe3a89bd in 
> parquet::arrow::FileReaderBuilder::Open(std::shared_ptr<arrow::io::RandomAccessFile>,
>  parquet::ReaderProperties const&, std::shared_ptr<parquet::FileMetaData>) () 
> from /usr/local/lib/libparquet.so.100
> #25 0x00007fffbe7dc348 in 
> __pyx_pf_7pyarrow_8_parquet_13ParquetReader_2open(__pyx_obj_7pyarrow_8_parquet_ParquetReader*,
>  _object*, int, _object*, __pyx_obj_7pyarrow_8_parquet_FileMetaData*, int) ()
>    from 
> /usr/local/lib/python3.6/dist-packages/pyarrow-0.15.1.dev539+g8cf0c8e0a-py3.6-linux-x86_64.egg/pyarrow/_parquet.cpython-36m-x86_64-linux-gnu.so
> #26 0x00007fffbe7dcbc9 in 
> __pyx_pw_7pyarrow_8_parquet_13ParquetReader_3open(_object*, _object*, 
> _object*) () from 
> /usr/local/lib/python3.6/dist-packages/pyarrow-0.15.1.dev539+g8cf0c8e0a-py3.6-linux-x86_64.egg/pyarrow/_parquet.cpython-36m-x86_64-linux-gnu.so
> #27 0x000000000050ac25 in _PyCFunction_FastCallDict (kwargs=<optimized out>, 
> nargs=<optimized out>, args=<optimized out>, func_obj=<built-in method open 
> of pyarrow._parquet.ParquetReader object at remote 0x7fffbfc6b938>) at 
> ../Objects/methodobject.c:231
> #28 _PyCFunction_FastCallKeywords (kwnames=<optimized out>, nargs=<optimized 
> out>, stack=<optimized out>, func=<optimized out>) at 
> ../Objects/methodobject.c:294
> #29 call_function.lto_priv () at ../Python/ceval.c:4851
> #30 0x000000000050d390 in _PyEval_EvalFrameDefault () at 
> ../Python/ceval.c:3351
> #31 0x0000000000508245 in PyEval_EvalFrameEx (throwflag=0, f=
>     Frame 0x142a818, for file 
> /usr/local/lib/python3.6/dist-packages/pyarrow-0.15.1.dev539+g8cf0c8e0a-py3.6-linux-x86_64.egg/pyarrow/parquet.py,
>  line 137, in __init__ 
> (self=<ParquetFile(reader=<pyarrow._parquet.ParquetReader at remote 
> 0x7fffbfc6b938>) at remote 0x7fffc4b68cc0>, source='/tmp/foo.pq', 
> metadata=None, common_metadata=None, read_dictionary=None, memory_map=False, 
> buffer_size=0)) at ../Python/ceval.c:754
> #32 _PyEval_EvalCodeWithName.lto_priv.1836 () at ../Python/ceval.c:4166
> #33 0x0000000000509642 in _PyFunction_FastCallDict () at 
> ../Python/ceval.c:5075
> #34 0x0000000000595311 in _PyObject_FastCallDict (kwargs={'metadata': None, 
> 'memory_map': False, 'read_dictionary': None, 'common_metadata': None, 
> 'buffer_size': 0}, nargs=2, args=0x7fffffffc430, func=<function at remote 
> 0x7fffbfc5e378>)
>     at ../Objects/abstract.c:2310
> #35 _PyObject_Call_Prepend (kwargs={'metadata': None, 'memory_map': False, 
> 'read_dictionary': None, 'common_metadata': None, 'buffer_size': 0}, 
> args=<optimized out>, obj=<optimized out>, func=<function at remote 
> 0x7fffbfc5e378>) at ../Objects/abstract.c:2373
> #36 method_call.lto_priv () at ../Objects/classobject.c:314
> #37 0x000000000054a6ff in PyObject_Call (kwargs={'metadata': None, 
> 'memory_map': False, 'read_dictionary': None, 'common_metadata': None, 
> 'buffer_size': 0}, args=('/tmp/foo.pq',), func=<method at remote 
> 0x7ffff7f67fc8>) at ../Objects/abstract.c:2261
> #38 slot_tp_init () at ../Objects/typeobject.c:6420
> #39 0x0000000000551b81 in type_call.lto_priv () at ../Objects/typeobject.c:915
> ---Type <return> to continue, or q <return> to quit---
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to