Quanlong Huang created IMPALA-9324:
--------------------------------------
Summary: HdfsOrcScanner crashes in DCHECK failure in
OrcSchemaResolver::BuildSchemaPath
Key: IMPALA-9324
URL: https://issues.apache.org/jira/browse/IMPALA-9324
Project: IMPALA
Issue Type: Bug
Components: Backend
Reporter: Quanlong Huang
Assignee: Quanlong Huang
Attachments: complextypes_crash.orc
Hit a crash after running test_fuzz_scanners.py for orc/def/block for 2 days.
FATAL log:
{code}
F0123 03:46:22.084527 15347 orc-metadata-utils.cc:44]
9b4d52b2f594f58b:687b5fe200000001] Check failed: paths->size() ==
node.getColumnId() (3 vs. 4)
{code}
stacktrace:
{code}
Crash reason: SIGABRT
Crash address: 0x3e8000033b3
Process uptime: not available
Thread 306 (crashed)
0 libc-2.23.so + 0x35428
1 libc-2.23.so + 0x3702a
2 impalad!google_breakpad::ExceptionHandler::HandleSignal(int, siginfo_t*,
void*) + 0x1e0
3 impalad!google::DumpStackTraceAndExit() + 0x24
4 impalad!google::LogMessage::Fail() + 0xd
5 impalad!google::LogMessage::SendToLog() + 0x2b2
6 impalad!google::LogMessage::Flush() + 0x157
7 impalad!google::LogMessageFatal::~LogMessageFatal() + 0xe
8 impalad!impala::OrcSchemaResolver::BuildSchemaPath(orc::Type const&,
std::vector<int, std::allocator<int> >*, std::vector<std::vector<int,
std::allocator<int> >, std::allocator<std::vector<int, std::allocator<int> > >
>*) [orc-metadata-utils.cc : 44 + 0xf]
9 impalad!impala::OrcSchemaResolver::BuildSchemaPaths(int,
std::vector<std::vector<int, std::allocator<int> >,
std::allocator<std::vector<int, std::allocator<int> > > >*)
[orc-metadata-utils.cc : 36 + 0x4a]
10 impalad!impala::HdfsOrcScanner::Open(impala::ScannerContext*)
[hdfs-orc-scanner.cc : 187 + 0x51]
11
impalad!impala::HdfsScanNodeBase::CreateAndOpenScannerHelper(impala::HdfsPartitionDescriptor*,
impala::ScannerContext*, boost::scoped_ptr<impala::HdfsScanner>*)
[hdfs-scan-node-base.cc : 819 + 0x29]
12
impalad!impala::HdfsScanNode::ProcessSplit(std::vector<impala::FilterContext,
std::allocator<impala::FilterContext> > const&, impala::MemPool*,
impala::io::ScanRange*, long*) [hdfs-scan-node.cc : 494 + 0x2b]
13 impalad!impala::HdfsScanNode::ScannerThread(bool, long) [hdfs-scan-node.cc
: 416 + 0x2a]
14
impalad!impala::HdfsScanNode::ThreadTokenAvailableCb(impala::ThreadResourcePool*)::{lambda()#1}::operator()()
const + 0x30
15
impalad!boost::detail::function::void_function_obj_invoker0<impala::HdfsScanNode::ThreadTokenAvailableCb(impala::ThreadResourcePool*)::<lambda()>,
void>::invoke [function_template.hpp : 153 + 0xc]
16 impalad!boost::function0<void>::operator()() const [function_template.hpp :
767 + 0x11]
17 impalad!impala::Thread::SuperviseThread(std::string const&, std::string
const&, boost::function<void ()>, impala::ThreadDebugInfo const*,
impala::Promise<long, (impala::PromiseMode)0>*) [thread.cc : 360 + 0xf]
18 impalad!void boost::_bi::list5<boost::_bi::value<std::string>,
boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >,
boost::_bi::value<impala::ThreadDebugInfo*>,
boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*>
>::operator()<void (*)(std::string const&, std::string const&,
boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long,
(impala::PromiseMode)0>*), boost::_bi::list0>(boost::_bi::type<void>, void
(*&)(std::string const&, std::string const&, boost::function<void ()>,
impala::ThreadDebugInfo const*, impala::Promise<long,
(impala::PromiseMode)0>*), boost::_bi::list0&, int) [bind.hpp : 525 + 0x15]
19 impalad!boost::_bi::bind_t<void, void (*)(std::string const&, std::string
const&, boost::function<void ()>, impala::ThreadDebugInfo const*,
impala::Promise<long, (impala::PromiseMode)0>*),
boost::_bi::list5<boost::_bi::value<std::string>,
boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >,
boost::_bi::value<impala::ThreadDebugInfo*>,
boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> >
>::operator()() [bind_template.hpp : 20 + 0x22]
20 impalad!boost::detail::thread_data<boost::_bi::bind_t<void, void
(*)(std::string const&, std::string const&, boost::function<void ()>,
impala::ThreadDebugInfo const*, impala::Promise<long,
(impala::PromiseMode)0>*), boost::_bi::list5<boost::_bi::value<std::string>,
boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >,
boost::_bi::value<impala::ThreadDebugInfo*>,
boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> > > >::run()
[thread.hpp : 116 + 0x12]
21 impalad!thread_proxy + 0xda
22 libpthread-2.23.so + 0x76ba
23 libc-2.23.so + 0x10741d
{code}
Code snipper
{code:c++}
42 void OrcSchemaResolver::BuildSchemaPath(const orc::Type& node, SchemaPath*
path,
43 vector<SchemaPath>* paths) {
44 DCHECK_EQ(paths->size(), node.getColumnId());
45 paths->push_back(*path);
46 if (node.getKind() == orc::TypeKind::STRUCT) {
47 int size = node.getSubtypeCount();
48 for (int i = 0; i < size; ++i) {
49 path->push_back(i);
50 const orc::Type* child = node.getSubtype(i);
51 BuildSchemaPath(*child, path, paths);
52 path->pop_back();
53 }
54 } else if (node.getKind() == orc::TypeKind::LIST) {
55 DCHECK_EQ(node.getSubtypeCount(), 1);
56 const orc::Type* child = node.getSubtype(0);
57 path->push_back(SchemaPathConstants::ARRAY_ITEM);
58 BuildSchemaPath(*child, path, paths);
59 path->pop_back();
60 } else if (node.getKind() == orc::TypeKind::MAP) {
61 DCHECK_EQ(node.getSubtypeCount(), 2);
62 const orc::Type* key_child = node.getSubtype(0);
63 const orc::Type* value_child = node.getSubtype(1);
64 path->push_back(SchemaPathConstants::MAP_KEY);
65 BuildSchemaPath(*key_child, path, paths);
66 (*path)[path->size() - 1] = SchemaPathConstants::MAP_VALUE;
67 BuildSchemaPath(*value_child, path, paths);
68 path->pop_back();
69 }
70 }
{code}
Reproduce
{code:sql}
CREATE EXTERNAL TABLE my_complextypes_tbl (
id BIGINT,
int_array ARRAY<INT>,
int_array_array ARRAY<ARRAY<INT>>,
int_map MAP<STRING,INT>,
int_map_array ARRAY<MAP<STRING,INT>>,
nested_struct
STRUCT<a:INT,b:ARRAY<INT>,c:STRUCT<d:ARRAY<ARRAY<STRUCT<e:INT,f:STRING>>>>,g:MAP<STRING,STRUCT<h:STRUCT<i:ARRAY<DOUBLE>>>>>
)
STORED AS ORC;
{code}
Load the attached corrupt ORC file into this table and run:
{code:sql}
select * from my_complextypes_tbl;
{code}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]