[
https://issues.apache.org/jira/browse/IMPALA-9338?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Abhishek Rawat updated IMPALA-9338:
-----------------------------------
Description:
Repro:
{code:java}
create database default;
CREATE EXTERNAL TABLE default.dimension ( ce_bor_ssn INT, ce_bor_act_sfx
CHAR(1), ce_eff_dt CHAR(10), ce_seq_num SMALLINT, ce_entry_dt CHAR(10),
ce_lon_map_cd ARRAY<INT>, ce_src_cd CHAR(10), ce_msg_tp_cd CHAR(1), ce_msg_num
CHAR(3), ce_tp_cd CHAR(3), ce_sys_upd_ts CHAR(26), default_load_ts CHAR(26),
ce_all_lon_map_cd VARCHAR(50) ) PARTITIONED BY ( year INT, ssn_hash_nbr INT )
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\u001C' WITH SERDEPROPERTIES
('colelction.delim'=',', 'field.delim'='\u001C',
'serialization.format'='\u001C') STORED AS PARQUET --LOCATION
'hdfs://prdnameservice/user/hive/warehouse/default.db/dimension' TBLPROPERTIES
('DO_NOT_UPDATE_STATS'='true', 'STATS_GENERATED'='TASK',
'STATS_GENERATED_VIA_STATS_TASK'='true',
'impala.lastComputeStatsTime'='1579246708', 'last_modified_by'='a00811p',
'last_modified_time'='1489791214', 'numRows'='7357715311',
'totalSize'='235136295799');
CREATE EXTERNAL TABLE default.fact ( cem_bor_ssn INT, cem_bor_act_sfx CHAR(1),
cem_ce_eff_dt CHAR(10), cem_ce_seq_num SMALLINT, cem_msg_atmpt_tm CHAR(8),
cem_rsln_dt CHAR(10), cem_msg_ln1_txt CHAR(50), cem_msg_ln2_txt CHAR(50),
cem_msg_ln3_txt CHAR(50), default_load_ts CHAR(26) ) PARTITIONED BY ( year INT,
ssn_hash_nbr INT ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\u0016' WITH
SERDEPROPERTIES ('field.delim'='\u0016', 'serialization.format'='\u0016')
STORED AS PARQUET --LOCATION
'hdfs://prdnameservice/user/hive/warehouse/default.db/fact' TBLPROPERTIES
('DO_NOT_UPDATE_STATS'='true', 'STATS_GENERATED'='TASK',
'STATS_GENERATED_VIA_STATS_TASK'='true',
'impala.lastComputeStatsTime'='1579242111', 'last_modified_by'='e32940',
'last_modified_time'='1484186332', 'numRows'='5142832439',
'totalSize'='105397898347');
use default;
select cem_bor_ssn, cem_bor_act_sfx, amap.item, cem_ce_eff_dt, cem_msg_ln1_txt,
concat(ce_msg_tp_cd, ce_msg_num) corr_code from dimension,
dimension.ce_lon_map_cd amap LEFT JOIN fact ON dimension.ce_bor_ssn =
fact.CEM_BOR_SSN AND dimension.ce_bor_act_sfx = fact.cem_bor_act_sfx AND
dimension.ce_eff_dt = fact.cem_ce_eff_dt and dimension.year = fact.year --and
dimension.month(cast(ce_eff_dt as timestamp)) = fact.month(cast(cem_ce_eff_dt
as timestamp)) AND dimension.YEAR = fact.YEAR AND fact.year in (2018,2019)
where dimension.ce_msg_tp_cd like '%B295%' AND dimension.year in (2018,2019);
{code}
Stack Trace:
{code:java}
#0 0x0000000000f8b1b9 in impala::RowDescriptor::TupleIsNullable(int) const ()
#1 0x000000000130911f in impala::SlotRef::Init(impala::RowDescriptor const&,
impala::RuntimeState*) ()
#2 0x000000000130748e in impala::ScalarExpr::Create(impala::TExpr const&,
impala::RowDescriptor const&, impala::RuntimeState*, impala::ObjectPool*,
impala::ScalarExpr**) ()
#3 0x00000000013075e5 in impala::ScalarExpr::Create(std::vector<impala::TExpr,
std::allocator<impala::TExpr> > const&, impala::RowDescriptor const&,
impala::RuntimeState*, impala::ObjectPool*, std::vector<impala::ScalarExpr*,
std::allocator<impala::ScalarExpr*> >*) ()
#4 0x000000000130769f in impala::ScalarExpr::Create(std::vector<impala::TExpr,
std::allocator<impala::TExpr> > const&, impala::RowDescriptor const&,
impala::RuntimeState*, std::vector<impala::ScalarExpr*,
std::allocator<impala::ScalarExpr*> >*) ()
#5 0x000000000149c1aa in
impala::KrpcDataStreamSender::Init(std::vector<impala::TExpr,
std::allocator<impala::TExpr> > const&, impala::TDataSink const&,
impala::RuntimeState*) ()
#6 0x0000000001208ad3 in impala::DataSink::Create(impala::TPlanFragmentCtx
const&, impala::TPlanFragmentInstanceCtx const&, impala::RowDescriptor const*,
impala::RuntimeState*, impala::DataSink**) ()
#7 0x0000000000fac9a4 in impala::FragmentInstanceState::Prepare() ()
#8 0x0000000000fad3dd in impala::FragmentInstanceState::Exec() ()
#9 0x0000000000f98e77 in
impala::QueryState::ExecFInstance(impala::FragmentInstanceState*) ()
#10 0x00000000011a1490 in impala::Thread::SuperviseThread(std::string const&,
std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*,
impala::Promise<long, (impala::PromiseMode)0>*) ()
#11 0x00000000011a203a in boost::detail::thread_data<boost::_bi::bind_t<void,
void (std::string const&, std::string const&, boost::function<void ()>,
impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>),
boost::_bi::list5<boost::_bi::value<std::string>,
boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >,
boost::_bi::value<impala::ThreadDebugInfo>,
boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> > > >::run()
()
#12 0x00000000017909ca in thread_proxy () #13 0x00007f8832fa6aa1 in
__pthread_initialize_minimal_internal () from /lib64/libpthread.so.0 #14
0x0000000000000000 in ?? ()
{code}
The crash only happens when ROJ plan is selected. If, LOJ plan is selected the
query runs successfully.
Initial investigation indicates that the Scalar expression being contructed in
the above stack trace is referencing an invalid tupleId in the row descriptor.
was:
Repro:
{code:java}
create database default;
CREATE EXTERNAL TABLE default.dimension ( ce_bor_ssn INT, ce_bor_act_sfx
CHAR(1), ce_eff_dt CHAR(10), ce_seq_num SMALLINT, ce_entry_dt CHAR(10),
ce_lon_map_cd ARRAY<INT>, ce_src_cd CHAR(10), ce_msg_tp_cd CHAR(1), ce_msg_num
CHAR(3), ce_tp_cd CHAR(3), ce_sys_upd_ts CHAR(26), default_load_ts CHAR(26),
ce_all_lon_map_cd VARCHAR(50) ) PARTITIONED BY ( year INT, ssn_hash_nbr INT )
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\u001C' WITH SERDEPROPERTIES
('colelction.delim'=',', 'field.delim'='\u001C',
'serialization.format'='\u001C') STORED AS PARQUET --LOCATION
'hdfs://prdnameservice/user/hive/warehouse/default.db/dimension' TBLPROPERTIES
('DO_NOT_UPDATE_STATS'='true', 'STATS_GENERATED'='TASK',
'STATS_GENERATED_VIA_STATS_TASK'='true',
'impala.lastComputeStatsTime'='1579246708', 'last_modified_by'='a00811p',
'last_modified_time'='1489791214', 'numRows'='7357715311',
'totalSize'='235136295799');
CREATE EXTERNAL TABLE default.fact ( cem_bor_ssn INT, cem_bor_act_sfx CHAR(1),
cem_ce_eff_dt CHAR(10), cem_ce_seq_num SMALLINT, cem_msg_atmpt_tm CHAR(8),
cem_rsln_dt CHAR(10), cem_msg_ln1_txt CHAR(50), cem_msg_ln2_txt CHAR(50),
cem_msg_ln3_txt CHAR(50), default_load_ts CHAR(26) ) PARTITIONED BY ( year INT,
ssn_hash_nbr INT ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\u0016' WITH
SERDEPROPERTIES ('field.delim'='\u0016', 'serialization.format'='\u0016')
STORED AS PARQUET --LOCATION
'hdfs://prdnameservice/user/hive/warehouse/default.db/fact' TBLPROPERTIES
('DO_NOT_UPDATE_STATS'='true', 'STATS_GENERATED'='TASK',
'STATS_GENERATED_VIA_STATS_TASK'='true',
'impala.lastComputeStatsTime'='1579242111', 'last_modified_by'='e32940',
'last_modified_time'='1484186332', 'numRows'='5142832439',
'totalSize'='105397898347');
use default;
select cem_bor_ssn, cem_bor_act_sfx, amap.item, cem_ce_eff_dt, cem_msg_ln1_txt,
concat(ce_msg_tp_cd, ce_msg_num) corr_code from dimension,
dimension.ce_lon_map_cd amap LEFT JOIN fact ON dimension.ce_bor_ssn =
fact.CEM_BOR_SSN AND dimension.ce_bor_act_sfx = fact.cem_bor_act_sfx AND
dimension.ce_eff_dt = fact.cem_ce_eff_dt and dimension.year = fact.year --and
dimension.month(cast(ce_eff_dt as timestamp)) = fact.month(cast(cem_ce_eff_dt
as timestamp)) AND dimension.YEAR = fact.YEAR AND fact.year in (2018,2019)
where dimension.ce_msg_tp_cd like '%B295%' AND dimension.year in (2018,2019);
{code}
Stack Trace:
{code:java}
#0 0x0000000000f8b1b9 in impala::RowDescriptor::TupleIsNullable(int) const ()
#1 0x000000000130911f in impala::SlotRef::Init(impala::RowDescriptor const&,
impala::RuntimeState*) () #2 0x000000000130748e in
impala::ScalarExpr::Create(impala::TExpr const&, impala::RowDescriptor const&,
impala::RuntimeState*, impala::ObjectPool*, impala::ScalarExpr**) () #3
0x00000000013075e5 in impala::ScalarExpr::Create(std::vector<impala::TExpr,
std::allocator<impala::TExpr> > const&, impala::RowDescriptor const&,
impala::RuntimeState*, impala::ObjectPool*, std::vector<impala::ScalarExpr*,
std::allocator<impala::ScalarExpr*> >*) () #4 0x000000000130769f in
impala::ScalarExpr::Create(std::vector<impala::TExpr,
std::allocator<impala::TExpr> > const&, impala::RowDescriptor const&,
impala::RuntimeState*, std::vector<impala::ScalarExpr*,
std::allocator<impala::ScalarExpr*> >*) () #5 0x000000000149c1aa in
impala::KrpcDataStreamSender::Init(std::vector<impala::TExpr,
std::allocator<impala::TExpr> > const&, impala::TDataSink const&,
impala::RuntimeState*) () #6 0x0000000001208ad3 in
impala::DataSink::Create(impala::TPlanFragmentCtx const&,
impala::TPlanFragmentInstanceCtx const&, impala::RowDescriptor const*,
impala::RuntimeState*, impala::DataSink**) () #7 0x0000000000fac9a4 in
impala::FragmentInstanceState::Prepare() () #8 0x0000000000fad3dd in
impala::FragmentInstanceState::Exec() () #9 0x0000000000f98e77 in
impala::QueryState::ExecFInstance(impala::FragmentInstanceState*) () #10
0x00000000011a1490 in impala::Thread::SuperviseThread(std::string const&,
std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*,
impala::Promise<long, (impala::PromiseMode)0>*) () #11 0x00000000011a203a in
boost::detail::thread_data<boost::_bi::bind_t<void, void (std::string const&,
std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*,
impala::Promise<long, (impala::PromiseMode)0>),
boost::_bi::list5<boost::_bi::value<std::string>,
boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >,
boost::_bi::value<impala::ThreadDebugInfo>,
boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> > > >::run()
() #12 0x00000000017909ca in thread_proxy () #13 0x00007f8832fa6aa1 in
__pthread_initialize_minimal_internal () from /lib64/libpthread.so.0 #14
0x0000000000000000 in ?? ()
{code}
> Impala crashing in impala::RowDescriptor::TupleIsNullable(int)
> --------------------------------------------------------------
>
> Key: IMPALA-9338
> URL: https://issues.apache.org/jira/browse/IMPALA-9338
> Project: IMPALA
> Issue Type: Bug
> Components: Frontend
> Affects Versions: Impala 3.3.0
> Reporter: Abhishek Rawat
> Assignee: Abhishek Rawat
> Priority: Major
>
> Repro:
> {code:java}
> create database default;
> CREATE EXTERNAL TABLE default.dimension ( ce_bor_ssn INT, ce_bor_act_sfx
> CHAR(1), ce_eff_dt CHAR(10), ce_seq_num SMALLINT, ce_entry_dt CHAR(10),
> ce_lon_map_cd ARRAY<INT>, ce_src_cd CHAR(10), ce_msg_tp_cd CHAR(1),
> ce_msg_num CHAR(3), ce_tp_cd CHAR(3), ce_sys_upd_ts CHAR(26), default_load_ts
> CHAR(26), ce_all_lon_map_cd VARCHAR(50) ) PARTITIONED BY ( year INT,
> ssn_hash_nbr INT ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\u001C' WITH
> SERDEPROPERTIES ('colelction.delim'=',', 'field.delim'='\u001C',
> 'serialization.format'='\u001C') STORED AS PARQUET --LOCATION
> 'hdfs://prdnameservice/user/hive/warehouse/default.db/dimension'
> TBLPROPERTIES ('DO_NOT_UPDATE_STATS'='true', 'STATS_GENERATED'='TASK',
> 'STATS_GENERATED_VIA_STATS_TASK'='true',
> 'impala.lastComputeStatsTime'='1579246708', 'last_modified_by'='a00811p',
> 'last_modified_time'='1489791214', 'numRows'='7357715311',
> 'totalSize'='235136295799');
> CREATE EXTERNAL TABLE default.fact ( cem_bor_ssn INT, cem_bor_act_sfx
> CHAR(1), cem_ce_eff_dt CHAR(10), cem_ce_seq_num SMALLINT, cem_msg_atmpt_tm
> CHAR(8), cem_rsln_dt CHAR(10), cem_msg_ln1_txt CHAR(50), cem_msg_ln2_txt
> CHAR(50), cem_msg_ln3_txt CHAR(50), default_load_ts CHAR(26) ) PARTITIONED BY
> ( year INT, ssn_hash_nbr INT ) ROW FORMAT DELIMITED FIELDS TERMINATED BY
> '\u0016' WITH SERDEPROPERTIES ('field.delim'='\u0016',
> 'serialization.format'='\u0016') STORED AS PARQUET --LOCATION
> 'hdfs://prdnameservice/user/hive/warehouse/default.db/fact' TBLPROPERTIES
> ('DO_NOT_UPDATE_STATS'='true', 'STATS_GENERATED'='TASK',
> 'STATS_GENERATED_VIA_STATS_TASK'='true',
> 'impala.lastComputeStatsTime'='1579242111', 'last_modified_by'='e32940',
> 'last_modified_time'='1484186332', 'numRows'='5142832439',
> 'totalSize'='105397898347');
> use default;
> select cem_bor_ssn, cem_bor_act_sfx, amap.item, cem_ce_eff_dt,
> cem_msg_ln1_txt, concat(ce_msg_tp_cd, ce_msg_num) corr_code from dimension,
> dimension.ce_lon_map_cd amap LEFT JOIN fact ON dimension.ce_bor_ssn =
> fact.CEM_BOR_SSN AND dimension.ce_bor_act_sfx = fact.cem_bor_act_sfx AND
> dimension.ce_eff_dt = fact.cem_ce_eff_dt and dimension.year = fact.year --and
> dimension.month(cast(ce_eff_dt as timestamp)) = fact.month(cast(cem_ce_eff_dt
> as timestamp)) AND dimension.YEAR = fact.YEAR AND fact.year in (2018,2019)
> where dimension.ce_msg_tp_cd like '%B295%' AND dimension.year in (2018,2019);
> {code}
> Stack Trace:
> {code:java}
> #0 0x0000000000f8b1b9 in impala::RowDescriptor::TupleIsNullable(int) const ()
> #1 0x000000000130911f in impala::SlotRef::Init(impala::RowDescriptor const&,
> impala::RuntimeState*) ()
> #2 0x000000000130748e in impala::ScalarExpr::Create(impala::TExpr const&,
> impala::RowDescriptor const&, impala::RuntimeState*, impala::ObjectPool*,
> impala::ScalarExpr**) ()
> #3 0x00000000013075e5 in
> impala::ScalarExpr::Create(std::vector<impala::TExpr,
> std::allocator<impala::TExpr> > const&, impala::RowDescriptor const&,
> impala::RuntimeState*, impala::ObjectPool*, std::vector<impala::ScalarExpr*,
> std::allocator<impala::ScalarExpr*> >*) ()
> #4 0x000000000130769f in
> impala::ScalarExpr::Create(std::vector<impala::TExpr,
> std::allocator<impala::TExpr> > const&, impala::RowDescriptor const&,
> impala::RuntimeState*, std::vector<impala::ScalarExpr*,
> std::allocator<impala::ScalarExpr*> >*) ()
> #5 0x000000000149c1aa in
> impala::KrpcDataStreamSender::Init(std::vector<impala::TExpr,
> std::allocator<impala::TExpr> > const&, impala::TDataSink const&,
> impala::RuntimeState*) ()
> #6 0x0000000001208ad3 in impala::DataSink::Create(impala::TPlanFragmentCtx
> const&, impala::TPlanFragmentInstanceCtx const&, impala::RowDescriptor
> const*, impala::RuntimeState*, impala::DataSink**) ()
> #7 0x0000000000fac9a4 in impala::FragmentInstanceState::Prepare() ()
> #8 0x0000000000fad3dd in impala::FragmentInstanceState::Exec() ()
> #9 0x0000000000f98e77 in
> impala::QueryState::ExecFInstance(impala::FragmentInstanceState*) ()
> #10 0x00000000011a1490 in impala::Thread::SuperviseThread(std::string const&,
> std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*,
> impala::Promise<long, (impala::PromiseMode)0>*) ()
> #11 0x00000000011a203a in boost::detail::thread_data<boost::_bi::bind_t<void,
> void (std::string const&, std::string const&, boost::function<void ()>,
> impala::ThreadDebugInfo const*, impala::Promise<long,
> (impala::PromiseMode)0>), boost::_bi::list5<boost::_bi::value<std::string>,
> boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >,
> boost::_bi::value<impala::ThreadDebugInfo>,
> boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> > >
> >::run() ()
> #12 0x00000000017909ca in thread_proxy () #13 0x00007f8832fa6aa1 in
> __pthread_initialize_minimal_internal () from /lib64/libpthread.so.0 #14
> 0x0000000000000000 in ?? ()
> {code}
>
> The crash only happens when ROJ plan is selected. If, LOJ plan is selected
> the query runs successfully.
> Initial investigation indicates that the Scalar expression being contructed
> in the above stack trace is referencing an invalid tupleId in the row
> descriptor.
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]