[jira] [Work logged] (HIVE-25653) Precision problem in STD, STDDDEV_SAMP,STDDEV_POP
[ https://issues.apache.org/jira/browse/HIVE-25653?focusedWorklogId=676887=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-676887 ] ASF GitHub Bot logged work on HIVE-25653: - Author: ASF GitHub Bot Created on: 05/Nov/21 04:48 Start Date: 05/Nov/21 04:48 Worklog Time Spent: 10m Work Description: ashish-kumar-sharma commented on a change in pull request #2760: URL: https://github.com/apache/hive/pull/2760#discussion_r743388404 ## File path: ql/src/test/queries/clientpositive/sdtdev.q ## @@ -0,0 +1,12 @@ +create table test ( col1 decimal(10,3) ); +insert into test values (10230.72),(10230.72),(10230.72),(10230.72),(10230.72),(10230.72),(10230.72); +select STDDEV_SAMP(col1) AS STDDEV_6M , STDDEV(col1) as STDDEV ,STDDEV_POP(col1) as STDDEV_POP , variance(col1) as variance,var_pop(col1) as var_pop,var_samp(col1) as var_samp from test; + +create table testpoint ( col1 decimal(10,3)); +insert into testpoint values (0.12345678),(0.25362123),(0.62437485),(0.65133746),(0.98765432),(0.12435647),(0.7654321445); +select STDDEV_SAMP(col1) AS STDDEV_6M , STDDEV(col1) as STDDEV ,STDDEV_POP(col1) as STDDEV_POP , variance(col1) as variance,var_pop(col1) as var_pop,var_samp(col1) as var_samp from testpoint; + +create table testint(col1 int); +insert into testint values (85),(86),(100),(76),(81),(93),(84),(99),(71),(69),(93),(85),(81),(87),(89); +select STDDEV_SAMP(col1) AS STDDEV_6M , STDDEV(col1) as STDDEV ,STDDEV_POP(col1) as STDDEV_POP, variance(col1) as variance,var_pop(col1) as var_pop,var_samp(col1) as var_samp from testint; + Review comment: Done ## File path: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStd.java ## @@ -90,7 +92,9 @@ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) * use it, etc. */ public static double calculateStdResult(double variance, long count) { - return Math.sqrt(variance / count); + BigDecimal bvariance = new BigDecimal(variance); + BigDecimal result = bvariance.divide(new BigDecimal(count)); + return Math.sqrt(result.doubleValue()); Review comment: Done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 676887) Time Spent: 0.5h (was: 20m) > Precision problem in STD, STDDDEV_SAMP,STDDEV_POP > - > > Key: HIVE-25653 > URL: https://issues.apache.org/jira/browse/HIVE-25653 > Project: Hive > Issue Type: Improvement >Reporter: Ashish Sharma >Assignee: Ashish Sharma >Priority: Major > Labels: pull-request-available > Time Spent: 0.5h > Remaining Estimate: 0h > > Description > *Script*- > create table test ( col1 int ); > insert into values > ('10230.72'),('10230.72'),('10230.72'),('10230.72'),('10230.72'),('10230.72'),('10230.72'); > select STDDEV_SAMP(col1) AS STDDEV_6M , STDDEV(col1) as STDDEV > ,STDDEV_POP(col1) as STDDEV_POP from test; > *Result*- > STDDDEV_SAMPSTDDEV > STDDEV_POP > 5.940794514955821E-13 5.42317860890711E-13 5.42317860890711E-13 > *Expected*- > STDDDEV_SAMPSTDDEV > STDDEV_POP > 0 0 >0 -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Commented] (HIVE-25671) Hybrid Grace Hash Join NullPointer When query RCFile
[ https://issues.apache.org/jira/browse/HIVE-25671?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17439019#comment-17439019 ] Jiayu Shen commented on HIVE-25671: --- I have seen a similar issue in kryo,[https://github.com/EsotericSoftware/kryo/issues/379] Maybe you can try a higher version of kryo at first. > Hybrid Grace Hash Join NullPointer When query RCFile > > > Key: HIVE-25671 > URL: https://issues.apache.org/jira/browse/HIVE-25671 > Project: Hive > Issue Type: Bug >Affects Versions: 3.1.2 >Reporter: Nemon Lou >Priority: Major > Attachments: rcfile_kryo.patch > > > Hive 3.1.0 kryo 3.0.3 tez engine > the following sql can reproduce this issue > {code:sql} > CREATE TABLE `nemon.rt_dm_lpc_customer_sum_tmp3_3`( >`logo` string, >`customer_code` string, >`brand_name` string, >`business_code` string, >`discount` double, >`creation_date` string, >`etl_time` string)stored as rcfile; > > CREATE TABLE `nemon.rt_dm_lpc_customer_sum_tmp4_1`( >`customer_code` string, >`etl_time` string) stored as rcfile; > > insert into nemon.rt_dm_lpc_customer_sum_tmp3_3 values > ("logo","customer_code","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code1","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code2","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code3","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code4","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code5","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code6","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code7","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code8","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code9","brand_name","business_code",1,"creation_date","etl_time"); > insert into nemon.rt_dm_lpc_customer_sum_tmp4_1 values > ("customer_code","etl_time") >,("customer_code1","etl_time") >,("customer_code2","etl_time") >,("customer_code3","etl_time") >; > set hive.auto.convert.join.noconditionaltask.size=10; > set hive.mapjoin.hybridgrace.hashtable=true; > SELECT > tt1.logo, > tt1.customer_code, > tt1.brand_name, > tt1.business_code, > tt1.discount, > tt1.creation_date, > date_format(from_utc_timestamp(unix_timestamp()*1000,'Asia/Shanghai'),'-MM-dd > HH:mm:ss') etl_time > from > ( > SELECT > t1.logo, > t1.customer_code, > t1.brand_name, > t1.business_code, > t1.discount, > t1.creation_date, > row_number() over(partition by t1.customer_code,t1.logo order by > t1.creation_date desc) as discount_rank > from nemon.rt_dm_lpc_customer_sum_tmp3_3 t1 > join nemon.rt_dm_lpc_customer_sum_tmp4_1 t2 > on t2.customer_code = t1.customer_code > ) tt1 > where tt1.discount_rank = 1; > {code} > Error log from tez task: > {noformat} > 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid > Grace Hash Join: Deserializing spilled hash partition... > 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid > Grace Hash Join: Number of rows in hashmap: 1 > 2021-11-04 10:02:47,554 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid > Grace Hash Join: Going to process spilled big table rows in partition 5. > Number of rows: 1 > 2021-11-04 10:02:47,561 [ERROR] [TezChild] |exec.MapJoinOperator|: Unexpected > exception from MapJoinOperator : null > java.lang.NullPointerException > at > org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase$FieldInfo.uncheckedGetField(ColumnarStructBase.java:114) > at > org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase.getField(ColumnarStructBase.java:172) > at > org.apache.hadoop.hive.serde2.objectinspector.ColumnarStructObjectInspector.getStructFieldData(ColumnarStructObjectInspector.java:67) > at > org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator._evaluate(ExprNodeColumnEvaluator.java:95) > at > org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80) > at > org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68) > at > org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer$GetAdaptor.setFromRow(MapJoinBytesTableContainer.java:552) > at > org.apache.hadoop.hive.ql.exec.MapJoinOperator.setMapJoinKey(MapJoinOperator.java:415) > at >
[jira] [Commented] (HIVE-25671) Hybrid Grace Hash Join NullPointer When query RCFile
[ https://issues.apache.org/jira/browse/HIVE-25671?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17439016#comment-17439016 ] ZHANGSHUNYU commented on HIVE-25671: Thanks for report this problem. kryo 3.0.3 has some problems while ser/desr objects, maybe you can try update the kryo version to have a try. > Hybrid Grace Hash Join NullPointer When query RCFile > > > Key: HIVE-25671 > URL: https://issues.apache.org/jira/browse/HIVE-25671 > Project: Hive > Issue Type: Bug >Affects Versions: 3.1.2 >Reporter: Nemon Lou >Priority: Major > Attachments: rcfile_kryo.patch > > > Hive 3.1.0 kryo 3.0.3 tez engine > the following sql can reproduce this issue > {code:sql} > CREATE TABLE `nemon.rt_dm_lpc_customer_sum_tmp3_3`( >`logo` string, >`customer_code` string, >`brand_name` string, >`business_code` string, >`discount` double, >`creation_date` string, >`etl_time` string)stored as rcfile; > > CREATE TABLE `nemon.rt_dm_lpc_customer_sum_tmp4_1`( >`customer_code` string, >`etl_time` string) stored as rcfile; > > insert into nemon.rt_dm_lpc_customer_sum_tmp3_3 values > ("logo","customer_code","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code1","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code2","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code3","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code4","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code5","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code6","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code7","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code8","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code9","brand_name","business_code",1,"creation_date","etl_time"); > insert into nemon.rt_dm_lpc_customer_sum_tmp4_1 values > ("customer_code","etl_time") >,("customer_code1","etl_time") >,("customer_code2","etl_time") >,("customer_code3","etl_time") >; > set hive.auto.convert.join.noconditionaltask.size=10; > set hive.mapjoin.hybridgrace.hashtable=true; > SELECT > tt1.logo, > tt1.customer_code, > tt1.brand_name, > tt1.business_code, > tt1.discount, > tt1.creation_date, > date_format(from_utc_timestamp(unix_timestamp()*1000,'Asia/Shanghai'),'-MM-dd > HH:mm:ss') etl_time > from > ( > SELECT > t1.logo, > t1.customer_code, > t1.brand_name, > t1.business_code, > t1.discount, > t1.creation_date, > row_number() over(partition by t1.customer_code,t1.logo order by > t1.creation_date desc) as discount_rank > from nemon.rt_dm_lpc_customer_sum_tmp3_3 t1 > join nemon.rt_dm_lpc_customer_sum_tmp4_1 t2 > on t2.customer_code = t1.customer_code > ) tt1 > where tt1.discount_rank = 1; > {code} > Error log from tez task: > {noformat} > 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid > Grace Hash Join: Deserializing spilled hash partition... > 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid > Grace Hash Join: Number of rows in hashmap: 1 > 2021-11-04 10:02:47,554 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid > Grace Hash Join: Going to process spilled big table rows in partition 5. > Number of rows: 1 > 2021-11-04 10:02:47,561 [ERROR] [TezChild] |exec.MapJoinOperator|: Unexpected > exception from MapJoinOperator : null > java.lang.NullPointerException > at > org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase$FieldInfo.uncheckedGetField(ColumnarStructBase.java:114) > at > org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase.getField(ColumnarStructBase.java:172) > at > org.apache.hadoop.hive.serde2.objectinspector.ColumnarStructObjectInspector.getStructFieldData(ColumnarStructObjectInspector.java:67) > at > org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator._evaluate(ExprNodeColumnEvaluator.java:95) > at > org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80) > at > org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68) > at > org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer$GetAdaptor.setFromRow(MapJoinBytesTableContainer.java:552) > at > org.apache.hadoop.hive.ql.exec.MapJoinOperator.setMapJoinKey(MapJoinOperator.java:415) > at >
[jira] [Commented] (HIVE-25671) Hybrid Grace Hash Join NullPointer When query RCFile
[ https://issues.apache.org/jira/browse/HIVE-25671?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17439012#comment-17439012 ] Nemon Lou commented on HIVE-25671: -- This bug seems related to kryo: Null pointer occurs when JVM trying to invoke getLength method . Invoker is in ColumnarStructBase$FieldInfo.uncheckedGetField(), while the actual method implementation is ColumnarStruct.getLength(), which overwride ColumnarStructBase.getLength(). The ColumnarStruct object is created by kryo deserializer. Adding a reference to ColumnarStructBase can fix this issue.Uploading a patch to demonstrate this fix. [^rcfile_kryo.patch] > Hybrid Grace Hash Join NullPointer When query RCFile > > > Key: HIVE-25671 > URL: https://issues.apache.org/jira/browse/HIVE-25671 > Project: Hive > Issue Type: Bug >Affects Versions: 3.1.2 >Reporter: Nemon Lou >Priority: Major > Attachments: rcfile_kryo.patch > > > Hive 3.1.0 kryo 3.0.3 tez engine > the following sql can reproduce this issue > {code:sql} > CREATE TABLE `nemon.rt_dm_lpc_customer_sum_tmp3_3`( >`logo` string, >`customer_code` string, >`brand_name` string, >`business_code` string, >`discount` double, >`creation_date` string, >`etl_time` string)stored as rcfile; > > CREATE TABLE `nemon.rt_dm_lpc_customer_sum_tmp4_1`( >`customer_code` string, >`etl_time` string) stored as rcfile; > > insert into nemon.rt_dm_lpc_customer_sum_tmp3_3 values > ("logo","customer_code","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code1","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code2","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code3","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code4","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code5","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code6","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code7","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code8","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code9","brand_name","business_code",1,"creation_date","etl_time"); > insert into nemon.rt_dm_lpc_customer_sum_tmp4_1 values > ("customer_code","etl_time") >,("customer_code1","etl_time") >,("customer_code2","etl_time") >,("customer_code3","etl_time") >; > set hive.auto.convert.join.noconditionaltask.size=10; > set hive.mapjoin.hybridgrace.hashtable=true; > SELECT > tt1.logo, > tt1.customer_code, > tt1.brand_name, > tt1.business_code, > tt1.discount, > tt1.creation_date, > date_format(from_utc_timestamp(unix_timestamp()*1000,'Asia/Shanghai'),'-MM-dd > HH:mm:ss') etl_time > from > ( > SELECT > t1.logo, > t1.customer_code, > t1.brand_name, > t1.business_code, > t1.discount, > t1.creation_date, > row_number() over(partition by t1.customer_code,t1.logo order by > t1.creation_date desc) as discount_rank > from nemon.rt_dm_lpc_customer_sum_tmp3_3 t1 > join nemon.rt_dm_lpc_customer_sum_tmp4_1 t2 > on t2.customer_code = t1.customer_code > ) tt1 > where tt1.discount_rank = 1; > {code} > Error log from tez task: > {noformat} > 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid > Grace Hash Join: Deserializing spilled hash partition... > 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid > Grace Hash Join: Number of rows in hashmap: 1 > 2021-11-04 10:02:47,554 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid > Grace Hash Join: Going to process spilled big table rows in partition 5. > Number of rows: 1 > 2021-11-04 10:02:47,561 [ERROR] [TezChild] |exec.MapJoinOperator|: Unexpected > exception from MapJoinOperator : null > java.lang.NullPointerException > at > org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase$FieldInfo.uncheckedGetField(ColumnarStructBase.java:114) > at > org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase.getField(ColumnarStructBase.java:172) > at > org.apache.hadoop.hive.serde2.objectinspector.ColumnarStructObjectInspector.getStructFieldData(ColumnarStructObjectInspector.java:67) > at > org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator._evaluate(ExprNodeColumnEvaluator.java:95) > at > org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80) > at >
[jira] [Updated] (HIVE-25671) Hybrid Grace Hash Join NullPointer When query RCFile
[ https://issues.apache.org/jira/browse/HIVE-25671?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Nemon Lou updated HIVE-25671: - Attachment: rcfile_kryo.patch > Hybrid Grace Hash Join NullPointer When query RCFile > > > Key: HIVE-25671 > URL: https://issues.apache.org/jira/browse/HIVE-25671 > Project: Hive > Issue Type: Bug >Affects Versions: 3.1.2 >Reporter: Nemon Lou >Priority: Major > Attachments: rcfile_kryo.patch > > > Hive 3.1.0 kryo 3.0.3 tez engine > the following sql can reproduce this issue > {code:sql} > CREATE TABLE `nemon.rt_dm_lpc_customer_sum_tmp3_3`( >`logo` string, >`customer_code` string, >`brand_name` string, >`business_code` string, >`discount` double, >`creation_date` string, >`etl_time` string)stored as rcfile; > > CREATE TABLE `nemon.rt_dm_lpc_customer_sum_tmp4_1`( >`customer_code` string, >`etl_time` string) stored as rcfile; > > insert into nemon.rt_dm_lpc_customer_sum_tmp3_3 values > ("logo","customer_code","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code1","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code2","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code3","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code4","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code5","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code6","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code7","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code8","brand_name","business_code",1,"creation_date","etl_time") > > ,("logo","customer_code9","brand_name","business_code",1,"creation_date","etl_time"); > insert into nemon.rt_dm_lpc_customer_sum_tmp4_1 values > ("customer_code","etl_time") >,("customer_code1","etl_time") >,("customer_code2","etl_time") >,("customer_code3","etl_time") >; > set hive.auto.convert.join.noconditionaltask.size=10; > set hive.mapjoin.hybridgrace.hashtable=true; > SELECT > tt1.logo, > tt1.customer_code, > tt1.brand_name, > tt1.business_code, > tt1.discount, > tt1.creation_date, > date_format(from_utc_timestamp(unix_timestamp()*1000,'Asia/Shanghai'),'-MM-dd > HH:mm:ss') etl_time > from > ( > SELECT > t1.logo, > t1.customer_code, > t1.brand_name, > t1.business_code, > t1.discount, > t1.creation_date, > row_number() over(partition by t1.customer_code,t1.logo order by > t1.creation_date desc) as discount_rank > from nemon.rt_dm_lpc_customer_sum_tmp3_3 t1 > join nemon.rt_dm_lpc_customer_sum_tmp4_1 t2 > on t2.customer_code = t1.customer_code > ) tt1 > where tt1.discount_rank = 1; > {code} > Error log from tez task: > {noformat} > 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid > Grace Hash Join: Deserializing spilled hash partition... > 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid > Grace Hash Join: Number of rows in hashmap: 1 > 2021-11-04 10:02:47,554 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid > Grace Hash Join: Going to process spilled big table rows in partition 5. > Number of rows: 1 > 2021-11-04 10:02:47,561 [ERROR] [TezChild] |exec.MapJoinOperator|: Unexpected > exception from MapJoinOperator : null > java.lang.NullPointerException > at > org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase$FieldInfo.uncheckedGetField(ColumnarStructBase.java:114) > at > org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase.getField(ColumnarStructBase.java:172) > at > org.apache.hadoop.hive.serde2.objectinspector.ColumnarStructObjectInspector.getStructFieldData(ColumnarStructObjectInspector.java:67) > at > org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator._evaluate(ExprNodeColumnEvaluator.java:95) > at > org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80) > at > org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68) > at > org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer$GetAdaptor.setFromRow(MapJoinBytesTableContainer.java:552) > at > org.apache.hadoop.hive.ql.exec.MapJoinOperator.setMapJoinKey(MapJoinOperator.java:415) > at > org.apache.hadoop.hive.ql.exec.MapJoinOperator.process(MapJoinOperator.java:466) > at >
[jira] [Updated] (HIVE-25671) Hybrid Grace Hash Join NullPointer When query RCFile
[ https://issues.apache.org/jira/browse/HIVE-25671?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Nemon Lou updated HIVE-25671: - Description: Hive 3.1.0 kryo 3.0.3 tez engine the following sql can reproduce this issue {code:sql} CREATE TABLE `nemon.rt_dm_lpc_customer_sum_tmp3_3`( `logo` string, `customer_code` string, `brand_name` string, `business_code` string, `discount` double, `creation_date` string, `etl_time` string)stored as rcfile; CREATE TABLE `nemon.rt_dm_lpc_customer_sum_tmp4_1`( `customer_code` string, `etl_time` string) stored as rcfile; insert into nemon.rt_dm_lpc_customer_sum_tmp3_3 values ("logo","customer_code","brand_name","business_code",1,"creation_date","etl_time") ,("logo","customer_code1","brand_name","business_code",1,"creation_date","etl_time") ,("logo","customer_code2","brand_name","business_code",1,"creation_date","etl_time") ,("logo","customer_code3","brand_name","business_code",1,"creation_date","etl_time") ,("logo","customer_code4","brand_name","business_code",1,"creation_date","etl_time") ,("logo","customer_code5","brand_name","business_code",1,"creation_date","etl_time") ,("logo","customer_code6","brand_name","business_code",1,"creation_date","etl_time") ,("logo","customer_code7","brand_name","business_code",1,"creation_date","etl_time") ,("logo","customer_code8","brand_name","business_code",1,"creation_date","etl_time") ,("logo","customer_code9","brand_name","business_code",1,"creation_date","etl_time"); insert into nemon.rt_dm_lpc_customer_sum_tmp4_1 values ("customer_code","etl_time") ,("customer_code1","etl_time") ,("customer_code2","etl_time") ,("customer_code3","etl_time") ; set hive.auto.convert.join.noconditionaltask.size=10; set hive.mapjoin.hybridgrace.hashtable=true; SELECT tt1.logo, tt1.customer_code, tt1.brand_name, tt1.business_code, tt1.discount, tt1.creation_date, date_format(from_utc_timestamp(unix_timestamp()*1000,'Asia/Shanghai'),'-MM-dd HH:mm:ss') etl_time from ( SELECT t1.logo, t1.customer_code, t1.brand_name, t1.business_code, t1.discount, t1.creation_date, row_number() over(partition by t1.customer_code,t1.logo order by t1.creation_date desc) as discount_rank from nemon.rt_dm_lpc_customer_sum_tmp3_3 t1 join nemon.rt_dm_lpc_customer_sum_tmp4_1 t2 on t2.customer_code = t1.customer_code ) tt1 where tt1.discount_rank = 1; {code} Error log from tez task: {noformat} 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace Hash Join: Deserializing spilled hash partition... 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace Hash Join: Number of rows in hashmap: 1 2021-11-04 10:02:47,554 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace Hash Join: Going to process spilled big table rows in partition 5. Number of rows: 1 2021-11-04 10:02:47,561 [ERROR] [TezChild] |exec.MapJoinOperator|: Unexpected exception from MapJoinOperator : null java.lang.NullPointerException at org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase$FieldInfo.uncheckedGetField(ColumnarStructBase.java:114) at org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase.getField(ColumnarStructBase.java:172) at org.apache.hadoop.hive.serde2.objectinspector.ColumnarStructObjectInspector.getStructFieldData(ColumnarStructObjectInspector.java:67) at org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator._evaluate(ExprNodeColumnEvaluator.java:95) at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80) at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68) at org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer$GetAdaptor.setFromRow(MapJoinBytesTableContainer.java:552) at org.apache.hadoop.hive.ql.exec.MapJoinOperator.setMapJoinKey(MapJoinOperator.java:415) at org.apache.hadoop.hive.ql.exec.MapJoinOperator.process(MapJoinOperator.java:466) at org.apache.hadoop.hive.ql.exec.MapJoinOperator.reProcessBigTable(MapJoinOperator.java:755) at org.apache.hadoop.hive.ql.exec.MapJoinOperator.continueProcess(MapJoinOperator.java:671) at org.apache.hadoop.hive.ql.exec.MapJoinOperator.closeOp(MapJoinOperator.java:604) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:733) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:757) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.close(MapRecordProcessor.java:477) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:284) at
[jira] [Updated] (HIVE-25671) Hybrid Grace Hash Join NullPointer When query RCFile
[ https://issues.apache.org/jira/browse/HIVE-25671?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Nemon Lou updated HIVE-25671: - Description: {noformat} 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace Hash Join: Deserializing spilled hash partition... 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace Hash Join: Number of rows in hashmap: 1 2021-11-04 10:02:47,554 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace Hash Join: Going to process spilled big table rows in partition 5. Number of rows: 1 2021-11-04 10:02:47,561 [ERROR] [TezChild] |exec.MapJoinOperator|: Unexpected exception from MapJoinOperator : null java.lang.NullPointerException at org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase$FieldInfo.uncheckedGetField(ColumnarStructBase.java:114) at org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase.getField(ColumnarStructBase.java:172) at org.apache.hadoop.hive.serde2.objectinspector.ColumnarStructObjectInspector.getStructFieldData(ColumnarStructObjectInspector.java:67) at org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator._evaluate(ExprNodeColumnEvaluator.java:95) at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80) at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68) at org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer$GetAdaptor.setFromRow(MapJoinBytesTableContainer.java:552) at org.apache.hadoop.hive.ql.exec.MapJoinOperator.setMapJoinKey(MapJoinOperator.java:415) at org.apache.hadoop.hive.ql.exec.MapJoinOperator.process(MapJoinOperator.java:466) at org.apache.hadoop.hive.ql.exec.MapJoinOperator.reProcessBigTable(MapJoinOperator.java:755) at org.apache.hadoop.hive.ql.exec.MapJoinOperator.continueProcess(MapJoinOperator.java:671) at org.apache.hadoop.hive.ql.exec.MapJoinOperator.closeOp(MapJoinOperator.java:604) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:733) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:757) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.close(MapRecordProcessor.java:477) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:284) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:250) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1729) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) at com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108) at com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41) at com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) {noformat} was: {format} 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace Hash Join: Deserializing spilled hash partition... 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace Hash Join: Number of rows in hashmap: 1 2021-11-04 10:02:47,554 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace Hash Join: Going to process spilled big table rows in partition 5. Number of rows: 1 2021-11-04 10:02:47,561 [ERROR] [TezChild] |exec.MapJoinOperator|: Unexpected exception from MapJoinOperator : null java.lang.NullPointerException at org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase$FieldInfo.uncheckedGetField(ColumnarStructBase.java:114) at org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase.getField(ColumnarStructBase.java:172) at org.apache.hadoop.hive.serde2.objectinspector.ColumnarStructObjectInspector.getStructFieldData(ColumnarStructObjectInspector.java:67) at
[jira] [Commented] (HIVE-25671) Hybrid Grace Hash Join NullPointer When query RCFile
[ https://issues.apache.org/jira/browse/HIVE-25671?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17438985#comment-17438985 ] David Cai commented on HIVE-25671: -- if you can describe the reproduce steps, it will help to locate this issue. > Hybrid Grace Hash Join NullPointer When query RCFile > > > Key: HIVE-25671 > URL: https://issues.apache.org/jira/browse/HIVE-25671 > Project: Hive > Issue Type: Bug >Affects Versions: 3.1.2 >Reporter: Nemon Lou >Priority: Major > > {format} > 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid > Grace Hash Join: Deserializing spilled hash partition... > 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid > Grace Hash Join: Number of rows in hashmap: 1 > 2021-11-04 10:02:47,554 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid > Grace Hash Join: Going to process spilled big table rows in partition 5. > Number of rows: 1 > 2021-11-04 10:02:47,561 [ERROR] [TezChild] |exec.MapJoinOperator|: Unexpected > exception from MapJoinOperator : null > java.lang.NullPointerException > at > org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase$FieldInfo.uncheckedGetField(ColumnarStructBase.java:114) > at > org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase.getField(ColumnarStructBase.java:172) > at > org.apache.hadoop.hive.serde2.objectinspector.ColumnarStructObjectInspector.getStructFieldData(ColumnarStructObjectInspector.java:67) > at > org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator._evaluate(ExprNodeColumnEvaluator.java:95) > at > org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80) > at > org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68) > at > org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer$GetAdaptor.setFromRow(MapJoinBytesTableContainer.java:552) > at > org.apache.hadoop.hive.ql.exec.MapJoinOperator.setMapJoinKey(MapJoinOperator.java:415) > at > org.apache.hadoop.hive.ql.exec.MapJoinOperator.process(MapJoinOperator.java:466) > at > org.apache.hadoop.hive.ql.exec.MapJoinOperator.reProcessBigTable(MapJoinOperator.java:755) > at > org.apache.hadoop.hive.ql.exec.MapJoinOperator.continueProcess(MapJoinOperator.java:671) > at > org.apache.hadoop.hive.ql.exec.MapJoinOperator.closeOp(MapJoinOperator.java:604) > at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:733) > at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:757) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.close(MapRecordProcessor.java:477) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:284) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:250) > at > org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1729) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) > at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) > at > com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108) > at > com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41) > at > com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > {format} -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Assigned] (HIVE-25674) Add getAllTableConstraints to HS2 cache
[ https://issues.apache.org/jira/browse/HIVE-25674?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Yu-Wen Lai reassigned HIVE-25674: - > Add getAllTableConstraints to HS2 cache > --- > > Key: HIVE-25674 > URL: https://issues.apache.org/jira/browse/HIVE-25674 > Project: Hive > Issue Type: Improvement > Components: HiveServer2 >Reporter: Yu-Wen Lai >Assignee: Yu-Wen Lai >Priority: Major > > In HIVE-22782, a new API getAllTableConstraints is introduced to retrieve all > of the constraints in one call. We could add local cache to HS2 to avoid > duplicate calls to HMS. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-25673) Column pruning fix for MR tasks
[ https://issues.apache.org/jira/browse/HIVE-25673?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] ASF GitHub Bot updated HIVE-25673: -- Labels: pull-request-available (was: ) > Column pruning fix for MR tasks > --- > > Key: HIVE-25673 > URL: https://issues.apache.org/jira/browse/HIVE-25673 > Project: Hive > Issue Type: Bug >Reporter: Peter Vary >Assignee: Peter Vary >Priority: Major > Labels: pull-request-available > Time Spent: 10m > Remaining Estimate: 0h > > When running join tests for Iceberg tables then we got the following > exception: > {code} > Caused by: java.lang.RuntimeException: Map operator initialization failed > at > org.apache.hadoop.hive.ql.exec.mr.ExecMapper.configure(ExecMapper.java:131) > ... 23 more > Caused by: java.lang.RuntimeException: cannot find field val from > [org.apache.iceberg.mr.hive.serde.objectinspector.IcebergRecordObjectInspector$IcebergRecordStructField@45f29d] > at > org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.getStandardStructFieldRef(ObjectInspectorUtils.java:550) > at > org.apache.iceberg.mr.hive.serde.objectinspector.IcebergRecordObjectInspector.getStructFieldRef(IcebergRecordObjectInspector.java:70) > at > org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator.initialize(ExprNodeColumnEvaluator.java:56) > at > org.apache.hadoop.hive.ql.exec.Operator.initEvaluators(Operator.java:1073) > at > org.apache.hadoop.hive.ql.exec.Operator.initEvaluatorsAndReturnStruct(Operator.java:1099) > at > org.apache.hadoop.hive.ql.exec.SelectOperator.initializeOp(SelectOperator.java:74) > at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:360) > at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:549) > at > org.apache.hadoop.hive.ql.exec.Operator.initializeChildren(Operator.java:503) > at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:369) > at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:549) > at > org.apache.hadoop.hive.ql.exec.Operator.initializeChildren(Operator.java:503) > at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:369) > at > org.apache.hadoop.hive.ql.exec.MapOperator.initializeMapOperator(MapOperator.java:505) > at > org.apache.hadoop.hive.ql.exec.mr.ExecMapper.configure(ExecMapper.java:110) > ... 23 more > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-25673) Column pruning fix for MR tasks
[ https://issues.apache.org/jira/browse/HIVE-25673?focusedWorklogId=676719=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-676719 ] ASF GitHub Bot logged work on HIVE-25673: - Author: ASF GitHub Bot Created on: 04/Nov/21 20:43 Start Date: 04/Nov/21 20:43 Worklog Time Spent: 10m Work Description: pvary opened a new pull request #2765: URL: https://github.com/apache/hive/pull/2765 ### What changes were proposed in this pull request? When updating column pruning information `READ_NESTED_COLUMN_PATH_CONF_STR`, update `READ_COLUMN_NAMES_CONF_STR` and `READ_COLUMN_IDS_CONF_STR` as well. ### Why are the changes needed? Iceberg MR queries are failing if multiple tables are queried and several columns are pruned ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added unit test -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 676719) Remaining Estimate: 0h Time Spent: 10m > Column pruning fix for MR tasks > --- > > Key: HIVE-25673 > URL: https://issues.apache.org/jira/browse/HIVE-25673 > Project: Hive > Issue Type: Bug >Reporter: Peter Vary >Assignee: Peter Vary >Priority: Major > Time Spent: 10m > Remaining Estimate: 0h > > When running join tests for Iceberg tables then we got the following > exception: > {code} > Caused by: java.lang.RuntimeException: Map operator initialization failed > at > org.apache.hadoop.hive.ql.exec.mr.ExecMapper.configure(ExecMapper.java:131) > ... 23 more > Caused by: java.lang.RuntimeException: cannot find field val from > [org.apache.iceberg.mr.hive.serde.objectinspector.IcebergRecordObjectInspector$IcebergRecordStructField@45f29d] > at > org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.getStandardStructFieldRef(ObjectInspectorUtils.java:550) > at > org.apache.iceberg.mr.hive.serde.objectinspector.IcebergRecordObjectInspector.getStructFieldRef(IcebergRecordObjectInspector.java:70) > at > org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator.initialize(ExprNodeColumnEvaluator.java:56) > at > org.apache.hadoop.hive.ql.exec.Operator.initEvaluators(Operator.java:1073) > at > org.apache.hadoop.hive.ql.exec.Operator.initEvaluatorsAndReturnStruct(Operator.java:1099) > at > org.apache.hadoop.hive.ql.exec.SelectOperator.initializeOp(SelectOperator.java:74) > at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:360) > at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:549) > at > org.apache.hadoop.hive.ql.exec.Operator.initializeChildren(Operator.java:503) > at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:369) > at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:549) > at > org.apache.hadoop.hive.ql.exec.Operator.initializeChildren(Operator.java:503) > at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:369) > at > org.apache.hadoop.hive.ql.exec.MapOperator.initializeMapOperator(MapOperator.java:505) > at > org.apache.hadoop.hive.ql.exec.mr.ExecMapper.configure(ExecMapper.java:110) > ... 23 more > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Assigned] (HIVE-25673) Column pruning fix for MR tasks
[ https://issues.apache.org/jira/browse/HIVE-25673?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Peter Vary reassigned HIVE-25673: - > Column pruning fix for MR tasks > --- > > Key: HIVE-25673 > URL: https://issues.apache.org/jira/browse/HIVE-25673 > Project: Hive > Issue Type: Bug >Reporter: Peter Vary >Assignee: Peter Vary >Priority: Major > > When running join tests for Iceberg tables then we got the following > exception: > {code} > Caused by: java.lang.RuntimeException: Map operator initialization failed > at > org.apache.hadoop.hive.ql.exec.mr.ExecMapper.configure(ExecMapper.java:131) > ... 23 more > Caused by: java.lang.RuntimeException: cannot find field val from > [org.apache.iceberg.mr.hive.serde.objectinspector.IcebergRecordObjectInspector$IcebergRecordStructField@45f29d] > at > org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.getStandardStructFieldRef(ObjectInspectorUtils.java:550) > at > org.apache.iceberg.mr.hive.serde.objectinspector.IcebergRecordObjectInspector.getStructFieldRef(IcebergRecordObjectInspector.java:70) > at > org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator.initialize(ExprNodeColumnEvaluator.java:56) > at > org.apache.hadoop.hive.ql.exec.Operator.initEvaluators(Operator.java:1073) > at > org.apache.hadoop.hive.ql.exec.Operator.initEvaluatorsAndReturnStruct(Operator.java:1099) > at > org.apache.hadoop.hive.ql.exec.SelectOperator.initializeOp(SelectOperator.java:74) > at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:360) > at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:549) > at > org.apache.hadoop.hive.ql.exec.Operator.initializeChildren(Operator.java:503) > at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:369) > at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:549) > at > org.apache.hadoop.hive.ql.exec.Operator.initializeChildren(Operator.java:503) > at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:369) > at > org.apache.hadoop.hive.ql.exec.MapOperator.initializeMapOperator(MapOperator.java:505) > at > org.apache.hadoop.hive.ql.exec.mr.ExecMapper.configure(ExecMapper.java:110) > ... 23 more > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-25672) Hive isn't purging older compaction entries from show compaction command
[ https://issues.apache.org/jira/browse/HIVE-25672?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Jalender Reddy updated HIVE-25672: -- Description: Added below properties in hive-site, but it's not enforced to auto purging. When we run show compaction command it takes forever and returns billions of rows. Result of show compactions command : {code:java} 752,450 rows selected (198.066 seconds) {code} {code:java} hive.compactor.history.retention.succeeded": "10", "hive.compactor.history.retention.failed": "10", "hive.compactor.history.retention.attempted": "10", "hive.compactor.history.reaper.interval": "10m" {code} was: Added below properties in hive-site, but it's not enforced to auto purging. When we run show compaction command it takes forever and returns billions of rows. Result of show compaction command : {code:java} 752,450 rows selected (198.066 seconds) {code} {code:java} hive.compactor.history.retention.succeeded": "10", "hive.compactor.history.retention.failed": "10", "hive.compactor.history.retention.attempted": "10", "hive.compactor.history.reaper.interval": "10m" {code} > Hive isn't purging older compaction entries from show compaction command > > > Key: HIVE-25672 > URL: https://issues.apache.org/jira/browse/HIVE-25672 > Project: Hive > Issue Type: Bug > Components: Hive, Metastore, Transactions >Affects Versions: 3.1.1 >Reporter: Rohan Nimmagadda >Priority: Minor > > Added below properties in hive-site, but it's not enforced to auto purging. > When we run show compaction command it takes forever and returns billions of > rows. > Result of show compactions command : > {code:java} > 752,450 rows selected (198.066 seconds) > {code} > {code:java} > hive.compactor.history.retention.succeeded": "10", > "hive.compactor.history.retention.failed": "10", > "hive.compactor.history.retention.attempted": "10", > "hive.compactor.history.reaper.interval": "10m" {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-25672) Hive isn't purging older compaction entries from show compaction command
[ https://issues.apache.org/jira/browse/HIVE-25672?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Rohan Nimmagadda updated HIVE-25672: Description: Added below properties in hive-site, but it's not enforced to auto purging. When we run show compaction command it takes forever and returns billions of rows. Result of show compaction command : {code:java} 752,450 rows selected (198.066 seconds) {code} {code:java} hive.compactor.history.retention.succeeded": "10", "hive.compactor.history.retention.failed": "10", "hive.compactor.history.retention.attempted": "10", "hive.compactor.history.reaper.interval": "10m" {code} was: Added below properties in hive-site , but its not enforcing to auto purging. When we run show compaction command it takes forever and returns billions of rows. Result of show compaction command : {code:java} 752,450 rows selected (198.066 seconds) {code} {code:java} hive.compactor.history.retention.succeeded": "10", "hive.compactor.history.retention.failed": "10", "hive.compactor.history.retention.attempted": "10", "hive.compactor.history.reaper.interval": "10m" {code} > Hive isn't purging older compaction entries from show compaction command > > > Key: HIVE-25672 > URL: https://issues.apache.org/jira/browse/HIVE-25672 > Project: Hive > Issue Type: Bug > Components: Hive, Metastore, Transactions >Affects Versions: 3.1.1 >Reporter: Rohan Nimmagadda >Priority: Minor > > Added below properties in hive-site, but it's not enforced to auto purging. > When we run show compaction command it takes forever and returns billions of > rows. > Result of show compaction command : > {code:java} > 752,450 rows selected (198.066 seconds) > {code} > {code:java} > hive.compactor.history.retention.succeeded": "10", > "hive.compactor.history.retention.failed": "10", > "hive.compactor.history.retention.attempted": "10", > "hive.compactor.history.reaper.interval": "10m" {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-25672) Hive isn't purging older compaction entries from show compaction command
[ https://issues.apache.org/jira/browse/HIVE-25672?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Rohan Nimmagadda updated HIVE-25672: Description: Added below properties in hive-site , but its not enforcing to auto purging. When we run show compaction command it takes forever and returns billions of rows. Result of show compaction command : {code:java} 752,450 rows selected (198.066 seconds) {code} {code:java} hive.compactor.history.retention.succeeded": "10", "hive.compactor.history.retention.failed": "10", "hive.compactor.history.retention.attempted": "10", "hive.compactor.history.reaper.interval": "10m" {code} was: Added below properties in hive-site , but its not enforcing to auto purging. When we run show compaction command it takes forever and returns billions of rows. {code:java} hive.compactor.history.retention.succeeded": "10", "hive.compactor.history.retention.failed": "10", "hive.compactor.history.retention.attempted": "10", "hive.compactor.history.reaper.interval": "10m" {code} > Hive isn't purging older compaction entries from show compaction command > > > Key: HIVE-25672 > URL: https://issues.apache.org/jira/browse/HIVE-25672 > Project: Hive > Issue Type: Bug > Components: Hive, Metastore, Transactions >Affects Versions: 3.1.1 >Reporter: Rohan Nimmagadda >Priority: Minor > > Added below properties in hive-site , but its not enforcing to auto purging. > When we run show compaction command it takes forever and returns billions of > rows. > Result of show compaction command : > {code:java} > 752,450 rows selected (198.066 seconds) {code} > {code:java} > hive.compactor.history.retention.succeeded": "10", > "hive.compactor.history.retention.failed": "10", > "hive.compactor.history.retention.attempted": "10", > "hive.compactor.history.reaper.interval": "10m" {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-25671) Hybrid Grace Hash Join NullPointer When query RCFile
[ https://issues.apache.org/jira/browse/HIVE-25671?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] FrankieLee updated HIVE-25671: -- Description: {format} 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace Hash Join: Deserializing spilled hash partition... 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace Hash Join: Number of rows in hashmap: 1 2021-11-04 10:02:47,554 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace Hash Join: Going to process spilled big table rows in partition 5. Number of rows: 1 2021-11-04 10:02:47,561 [ERROR] [TezChild] |exec.MapJoinOperator|: Unexpected exception from MapJoinOperator : null java.lang.NullPointerException at org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase$FieldInfo.uncheckedGetField(ColumnarStructBase.java:114) at org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase.getField(ColumnarStructBase.java:172) at org.apache.hadoop.hive.serde2.objectinspector.ColumnarStructObjectInspector.getStructFieldData(ColumnarStructObjectInspector.java:67) at org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator._evaluate(ExprNodeColumnEvaluator.java:95) at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80) at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68) at org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer$GetAdaptor.setFromRow(MapJoinBytesTableContainer.java:552) at org.apache.hadoop.hive.ql.exec.MapJoinOperator.setMapJoinKey(MapJoinOperator.java:415) at org.apache.hadoop.hive.ql.exec.MapJoinOperator.process(MapJoinOperator.java:466) at org.apache.hadoop.hive.ql.exec.MapJoinOperator.reProcessBigTable(MapJoinOperator.java:755) at org.apache.hadoop.hive.ql.exec.MapJoinOperator.continueProcess(MapJoinOperator.java:671) at org.apache.hadoop.hive.ql.exec.MapJoinOperator.closeOp(MapJoinOperator.java:604) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:733) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:757) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.close(MapRecordProcessor.java:477) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:284) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:250) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1729) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) at com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108) at com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41) at com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) {format} was: {format} 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace Hash Join: Deserializing spilled hash partition... 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace Hash Join: Number of rows in hashmap: 1 2021-11-04 10:02:47,554 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace Hash Join: Going to process spilled big table rows in partition 5. Number of rows: 1 2021-11-04 10:02:47,561 [ERROR] [TezChild] |exec.MapJoinOperator|: Unexpected exception from MapJoinOperator : null java.lang.NullPointerException at org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase$FieldInfo.uncheckedGetField(ColumnarStructBase.java:114) at org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase.getField(ColumnarStructBase.java:172) at org.apache.hadoop.hive.serde2.objectinspector.ColumnarStructObjectInspector.getStructFieldData(ColumnarStructObjectInspector.java:67) at
[jira] [Resolved] (HIVE-25060) Hive Compactor doesn´t launch cleaner
[ https://issues.apache.org/jira/browse/HIVE-25060?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Karen Coppage resolved HIVE-25060. -- Assignee: Fran Gonzalez Resolution: Abandoned Due to lack of information we cannot proceed. As it's been more than 6 months I will mark this issue abandoned. > Hive Compactor doesn´t launch cleaner > - > > Key: HIVE-25060 > URL: https://issues.apache.org/jira/browse/HIVE-25060 > Project: Hive > Issue Type: Bug > Components: Hive >Affects Versions: 3.1.1 > Environment: Hive 3.1.0 > Hadoop 3.1.1 >Reporter: Fran Gonzalez >Assignee: Fran Gonzalez >Priority: Major > > Hello, > there are problems with Hive Compactor. We can see in hivemetastore.log this > message "Max block location exceeded for split" and it´s appearing more and > more times. > After that, the "compactor.Cleaner" is not launched. > We observed that after a Hive Metastore restart, the "compactor.Cleaner" has > not been launched nevermore, but logs doesn´t display any message about it. > Could be a degradation of the Hive Compactor when delta files are growing in > the partitions? > Regards. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-25596) Compress Hive Replication Metrics while storing
[ https://issues.apache.org/jira/browse/HIVE-25596?focusedWorklogId=676240=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-676240 ] ASF GitHub Bot logged work on HIVE-25596: - Author: ASF GitHub Bot Created on: 04/Nov/21 07:27 Start Date: 04/Nov/21 07:27 Worklog Time Spent: 10m Work Description: hmangla98 commented on a change in pull request #2724: URL: https://github.com/apache/hive/pull/2724#discussion_r742588986 ## File path: ql/src/java/org/apache/hadoop/hive/ql/parse/repl/metric/event/Stage.java ## @@ -129,12 +127,7 @@ public String getReplStats() { } public void setReplStats(String replStats) { -// Check the stat string doesn't surpass the RM_PROGRESS column length. -if (replStats.length() >= RM_PROGRESS_LENGTH - 2000) { Review comment: 2k chars are consumed by variables other than replStats. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 676240) Time Spent: 5.5h (was: 5h 20m) > Compress Hive Replication Metrics while storing > --- > > Key: HIVE-25596 > URL: https://issues.apache.org/jira/browse/HIVE-25596 > Project: Hive > Issue Type: Improvement >Reporter: Haymant Mangla >Assignee: Haymant Mangla >Priority: Major > Labels: pull-request-available > Attachments: CompressedRM_Progress(k=10), CompressedRM_Progress(k=5), > PlainTextRM_Progress(k=10), PlainTextRM_Progress(k=5) > > Time Spent: 5.5h > Remaining Estimate: 0h > > Compress the json fields of sys.replication_metrics table to optimise RDBMS > space usage. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-25596) Compress Hive Replication Metrics while storing
[ https://issues.apache.org/jira/browse/HIVE-25596?focusedWorklogId=676239=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-676239 ] ASF GitHub Bot logged work on HIVE-25596: - Author: ASF GitHub Bot Created on: 04/Nov/21 07:26 Start Date: 04/Nov/21 07:26 Worklog Time Spent: 10m Work Description: pkumarsinha commented on a change in pull request #2724: URL: https://github.com/apache/hive/pull/2724#discussion_r742588517 ## File path: ql/src/java/org/apache/hadoop/hive/ql/parse/repl/metric/event/Stage.java ## @@ -129,12 +127,7 @@ public String getReplStats() { } public void setReplStats(String replStats) { -// Check the stat string doesn't surpass the RM_PROGRESS column length. -if (replStats.length() >= RM_PROGRESS_LENGTH - 2000) { Review comment: why was it 2k lesser than RM_PROGRESS_LENGTH -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 676239) Time Spent: 5h 20m (was: 5h 10m) > Compress Hive Replication Metrics while storing > --- > > Key: HIVE-25596 > URL: https://issues.apache.org/jira/browse/HIVE-25596 > Project: Hive > Issue Type: Improvement >Reporter: Haymant Mangla >Assignee: Haymant Mangla >Priority: Major > Labels: pull-request-available > Attachments: CompressedRM_Progress(k=10), CompressedRM_Progress(k=5), > PlainTextRM_Progress(k=10), PlainTextRM_Progress(k=5) > > Time Spent: 5h 20m > Remaining Estimate: 0h > > Compress the json fields of sys.replication_metrics table to optimise RDBMS > space usage. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-25596) Compress Hive Replication Metrics while storing
[ https://issues.apache.org/jira/browse/HIVE-25596?focusedWorklogId=676237=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-676237 ] ASF GitHub Bot logged work on HIVE-25596: - Author: ASF GitHub Bot Created on: 04/Nov/21 07:25 Start Date: 04/Nov/21 07:25 Worklog Time Spent: 10m Work Description: pkumarsinha commented on a change in pull request #2724: URL: https://github.com/apache/hive/pull/2724#discussion_r742587970 ## File path: ql/src/java/org/apache/hadoop/hive/ql/parse/repl/metric/ReplicationMetricCollector.java ## @@ -116,14 +136,16 @@ public void reportStageEnd(String stageName, Status status, long lastReplId, stage = new Stage(stageName, status, -1L); } stage.setStatus(status); - stage.setEndTime(System.currentTimeMillis()); + stage.setEndTime(getCurrentTimeInMillis()); stage.setReplSnapshotsCount(replSnapshotCount); if (replStatsTracker != null && !(replStatsTracker instanceof NoOpReplStatsTracker)) { String replStatString = replStatsTracker.toString(); LOG.info("Replication Statistics are: {}", replStatString); stage.setReplStats(replStatString); } progress.addStage(stage); + // Check the progress string doesn't surpass the RM_PROGRESS column width. + checkRMProgressLimit(progress, stage); Review comment: nit: it's not just check. it is updating the state as well. we should name it accordingly -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 676237) Time Spent: 5h 10m (was: 5h) > Compress Hive Replication Metrics while storing > --- > > Key: HIVE-25596 > URL: https://issues.apache.org/jira/browse/HIVE-25596 > Project: Hive > Issue Type: Improvement >Reporter: Haymant Mangla >Assignee: Haymant Mangla >Priority: Major > Labels: pull-request-available > Attachments: CompressedRM_Progress(k=10), CompressedRM_Progress(k=5), > PlainTextRM_Progress(k=10), PlainTextRM_Progress(k=5) > > Time Spent: 5h 10m > Remaining Estimate: 0h > > Compress the json fields of sys.replication_metrics table to optimise RDBMS > space usage. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-25596) Compress Hive Replication Metrics while storing
[ https://issues.apache.org/jira/browse/HIVE-25596?focusedWorklogId=676235=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-676235 ] ASF GitHub Bot logged work on HIVE-25596: - Author: ASF GitHub Bot Created on: 04/Nov/21 07:23 Start Date: 04/Nov/21 07:23 Worklog Time Spent: 10m Work Description: pkumarsinha commented on a change in pull request #2724: URL: https://github.com/apache/hive/pull/2724#discussion_r742587136 ## File path: ql/src/java/org/apache/hadoop/hive/ql/parse/repl/metric/ReplicationMetricCollector.java ## @@ -105,6 +110,21 @@ public void reportFailoverStart(String stageName, Map metricMap, } } + private void checkRMProgressLimit(Progress progress, Stage stage) throws SemanticException { +MessageSerializer serializer = MessageFactory.getDefaultInstanceForReplMetrics(conf).getSerializer(); +ObjectMapper mapper = new ObjectMapper(); +String serializedProgress = null; +try { + serializedProgress = serializer.serialize(mapper.writeValueAsString(progress)); +} catch (Exception e) { + throw new SemanticException(e); +} +if (serializedProgress.length() > ReplStatsTracker.RM_PROGRESS_LENGTH) { + stage.setReplStats("RM_PROGRESS LIMIT EXCEEDED TO " + serializedProgress.length()); Review comment: Add keyword as a prefix like "ERROR" or "WARN" -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 676235) Time Spent: 5h (was: 4h 50m) > Compress Hive Replication Metrics while storing > --- > > Key: HIVE-25596 > URL: https://issues.apache.org/jira/browse/HIVE-25596 > Project: Hive > Issue Type: Improvement >Reporter: Haymant Mangla >Assignee: Haymant Mangla >Priority: Major > Labels: pull-request-available > Attachments: CompressedRM_Progress(k=10), CompressedRM_Progress(k=5), > PlainTextRM_Progress(k=10), PlainTextRM_Progress(k=5) > > Time Spent: 5h > Remaining Estimate: 0h > > Compress the json fields of sys.replication_metrics table to optimise RDBMS > space usage. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-25652) Add constraints in result of “SHOW CREATE TABLE ”
[ https://issues.apache.org/jira/browse/HIVE-25652?focusedWorklogId=676231=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-676231 ] ASF GitHub Bot logged work on HIVE-25652: - Author: ASF GitHub Bot Created on: 04/Nov/21 06:41 Start Date: 04/Nov/21 06:41 Worklog Time Spent: 10m Work Description: soumyakanti3578 commented on a change in pull request #2752: URL: https://github.com/apache/hive/pull/2752#discussion_r742571038 ## File path: ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java ## @@ -800,19 +809,166 @@ private String getExternal(Table table) { return table.getTableType() == TableType.EXTERNAL_TABLE ? "EXTERNAL " : ""; } - private String getColumns(Table table) { -List columnDescs = new ArrayList(); + private String getColumns(Table table) throws HiveException{ +List columnDescs = new ArrayList<>(); +List columns = table.getCols().stream().map(FieldSchema::getName).collect(Collectors.toList()); +Set notNullColumns = Collections.emptySet(); +if (NotNullConstraint.isNotEmpty(table.getNotNullConstraint())) { + notNullColumns = new HashSet<>(table.getNotNullConstraint().getNotNullConstraints().values()); +} + +Map columnDefaultValueMap = Collections.emptyMap(); +if (DefaultConstraint.isNotEmpty(table.getDefaultConstraint())) { + columnDefaultValueMap = table.getDefaultConstraint().getColNameToDefaultValueMap(); +} + +List sqlCheckConstraints; +try { + sqlCheckConstraints = Hive.get().getCheckConstraintList(table.getDbName(), table.getTableName()); +} catch (NoSuchObjectException e) { + throw new HiveException(e); +} +Map columnCheckConstraintsMap = sqlCheckConstraints.stream() + .filter(SQLCheckConstraint::isSetColumn_name) + .collect(Collectors.toMap(SQLCheckConstraint::getColumn_name, Function.identity())); +List tableCheckConstraints = sqlCheckConstraints.stream() + .filter(cc -> !cc.isSetColumn_name()) + .collect(Collectors.toList()); + for (FieldSchema column : table.getCols()) { String columnType = formatType(TypeInfoUtils.getTypeInfoFromTypeString(column.getType())); - String columnDesc = " `" + column.getName() + "` " + columnType; + String columnName = column.getName(); + StringBuilder columnDesc = new StringBuilder(); + columnDesc.append(" `").append(columnName).append("` ").append(columnType); + if (notNullColumns.contains(columnName)) { +columnDesc.append(" NOT NULL"); + } + if (columnDefaultValueMap.containsKey(columnName)) { +columnDesc.append(" DEFAULT ").append(columnDefaultValueMap.get(columnName)); + } + if (columnCheckConstraintsMap.containsKey(columnName)) { + columnDesc.append(getColumnCheckConstraintDesc(columnCheckConstraintsMap.get(columnName), columns)); + } if (column.getComment() != null) { -columnDesc += " COMMENT '" + HiveStringUtils.escapeHiveCommand(column.getComment()) + "'"; +columnDesc.append(" COMMENT '").append(HiveStringUtils.escapeHiveCommand(column.getComment())).append("'"); } - columnDescs.add(columnDesc); + columnDescs.add(columnDesc.toString()); } +String pkDesc = getPrimaryKeyDesc(table); +if (pkDesc != null) { + columnDescs.add(pkDesc); +} +columnDescs.addAll(getForeignKeyDesc(table)); +columnDescs.addAll(getTableCheckConstraintDesc(tableCheckConstraints, columns)); return StringUtils.join(columnDescs, ", \n"); } + private List getTableCheckConstraintDesc(List tableCheckConstraints, + List columns) { +List ccDescs = new ArrayList<>(); +for (SQLCheckConstraint constraint: tableCheckConstraints) { + String enable = constraint.isEnable_cstr()? " enable": " disable"; + String validate = constraint.isValidate_cstr()? " validate": " novalidate"; + String rely = constraint.isRely_cstr()? " rely": " norely"; + String expression = getCheckExpressionWithBackticks(columns, constraint); + ccDescs.add(" constraint " + constraint.getDc_name() + " CHECK(" + expression + +")" + enable + validate + rely); +} +return ccDescs; + } + + private String getCheckExpressionWithBackticks(List columns, SQLCheckConstraint constraint) { +TreeMap indexToCols = new TreeMap<>(); +String expression = constraint.getCheck_expression(); +for (String col: columns) { + int idx = expression.indexOf(col); + if (idx == -1) { +continue; + } + indexToCols.put(idx, col); + while (idx + col.length() < expression.length()) { +idx = expression.indexOf(col, idx + col.length()); +if (idx == -1) { + break; +} +indexToCols.put(idx, col); + } +} Review comment: