Zoltan Haindrich created HIVE-24671:
---------------------------------------

             Summary: Semijoinremoval should not run into an NPE in case the SJ 
filter contains an UDF
                 Key: HIVE-24671
                 URL: https://issues.apache.org/jira/browse/HIVE-24671
             Project: Hive
          Issue Type: Bug
            Reporter: Zoltan Haindrich
            Assignee: Zoltan Haindrich


{code}
set hive.optimize.index.filter=true;
set hive.support.concurrency=true;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.dynamic.partition=true;
set hive.vectorized.execution.enabled=true;



drop table if exists t1;
drop table if exists t2;

create table t1 (
        v1 string
);

create table t2 (
        v2 string
);

insert into t1 values ('e123456789'),('x123456789');
insert into t2 values
('123'),
 ('e123456789');


-- alter table t1 update statistics set 
('numRows'='9348843574','rawDataSize'='0');

alter table t1 update statistics set ('numRows'='934884357','rawDataSize'='0');
alter table t2 update statistics set ('numRows'='9348','rawDataSize'='0');

alter table t1 update statistics for column v1 set 
('numNulls'='0','numDVs'='15541355','avgColLen'='10.0','maxColLen'='10');
alter table t2 update statistics for column v2 set 
('numNulls'='0','numDVs'='155','avgColLen'='5.0','maxColLen'='10');
-- alter table t2 update statistics for column k set 
('numNulls'='0','numDVs'='13876472','avgColLen'='15.9836','maxColLen'='16');

explain
select v1,v2 from t1 join t2 on (substr(v1,1,3) = v2);
{code}

results in:
{code}
 java.lang.NullPointerException
        at 
org.apache.hadoop.hive.ql.parse.TezCompiler.removeSemijoinOptimizationByBenefit(TezCompiler.java:1944)
        at 
org.apache.hadoop.hive.ql.parse.TezCompiler.semijoinRemovalBasedTransformations(TezCompiler.java:544)
        at 
org.apache.hadoop.hive.ql.parse.TezCompiler.optimizeOperatorPlan(TezCompiler.java:240)
        at 
org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:161)
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.compilePlan(SemanticAnalyzer.java:12467)
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12672)
        at 
org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:455)
        at 
org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:301)
        at 
org.apache.hadoop.hive.ql.parse.ExplainSemanticAnalyzer.analyzeInternal(ExplainSemanticAnalyzer.java:171)
[...]
{code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to