[jira] [Work logged] (HIVE-25653) Precision problem in STD, STDDDEV_SAMP,STDDEV_POP

2021-11-04 Thread ASF GitHub Bot (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25653?focusedWorklogId=676887=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-676887
 ]

ASF GitHub Bot logged work on HIVE-25653:
-

Author: ASF GitHub Bot
Created on: 05/Nov/21 04:48
Start Date: 05/Nov/21 04:48
Worklog Time Spent: 10m 
  Work Description: ashish-kumar-sharma commented on a change in pull 
request #2760:
URL: https://github.com/apache/hive/pull/2760#discussion_r743388404



##
File path: ql/src/test/queries/clientpositive/sdtdev.q
##
@@ -0,0 +1,12 @@
+create table test ( col1 decimal(10,3) );
+insert into test values 
(10230.72),(10230.72),(10230.72),(10230.72),(10230.72),(10230.72),(10230.72);
+select STDDEV_SAMP(col1) AS STDDEV_6M , STDDEV(col1) as STDDEV 
,STDDEV_POP(col1) as STDDEV_POP , variance(col1) as variance,var_pop(col1) as 
var_pop,var_samp(col1) as var_samp from test;
+
+create table testpoint ( col1 decimal(10,3));
+insert into testpoint values 
(0.12345678),(0.25362123),(0.62437485),(0.65133746),(0.98765432),(0.12435647),(0.7654321445);
+select STDDEV_SAMP(col1) AS STDDEV_6M , STDDEV(col1) as STDDEV 
,STDDEV_POP(col1) as STDDEV_POP , variance(col1) as variance,var_pop(col1) as 
var_pop,var_samp(col1) as var_samp from testpoint;
+
+create table testint(col1 int);
+insert into testint values 
(85),(86),(100),(76),(81),(93),(84),(99),(71),(69),(93),(85),(81),(87),(89);
+select STDDEV_SAMP(col1) AS STDDEV_6M , STDDEV(col1) as STDDEV 
,STDDEV_POP(col1) as STDDEV_POP, variance(col1) as variance,var_pop(col1) as 
var_pop,var_samp(col1) as var_samp from testint;
+

Review comment:
   Done

##
File path: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStd.java
##
@@ -90,7 +92,9 @@ public GenericUDAFEvaluator getEvaluator(TypeInfo[] 
parameters)
  * use it, etc.
  */
 public static double calculateStdResult(double variance, long count) {
-  return Math.sqrt(variance / count);
+  BigDecimal bvariance = new BigDecimal(variance);
+  BigDecimal result = bvariance.divide(new BigDecimal(count));
+  return Math.sqrt(result.doubleValue());

Review comment:
   Done




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 676887)
Time Spent: 0.5h  (was: 20m)

> Precision problem in STD, STDDDEV_SAMP,STDDEV_POP
> -
>
> Key: HIVE-25653
> URL: https://issues.apache.org/jira/browse/HIVE-25653
> Project: Hive
>  Issue Type: Improvement
>Reporter: Ashish Sharma
>Assignee: Ashish Sharma
>Priority: Major
>  Labels: pull-request-available
>  Time Spent: 0.5h
>  Remaining Estimate: 0h
>
> Description
> *Script*- 
> create table test ( col1 int );
> insert into values 
> ('10230.72'),('10230.72'),('10230.72'),('10230.72'),('10230.72'),('10230.72'),('10230.72');
> select STDDEV_SAMP(col1) AS STDDEV_6M , STDDEV(col1) as STDDEV 
> ,STDDEV_POP(col1) as STDDEV_POP from test;
> *Result*- 
> STDDDEV_SAMPSTDDEV  
> STDDEV_POP 
> 5.940794514955821E-13 5.42317860890711E-13 5.42317860890711E-13
> *Expected*- 
> STDDDEV_SAMPSTDDEV  
> STDDEV_POP 
> 0   0 
>0



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Commented] (HIVE-25671) Hybrid Grace Hash Join NullPointer When query RCFile

2021-11-04 Thread Jiayu Shen (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-25671?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17439019#comment-17439019
 ] 

Jiayu Shen commented on HIVE-25671:
---

I have seen a similar issue in 
kryo,[https://github.com/EsotericSoftware/kryo/issues/379]

Maybe you can try a higher version of kryo at first.

> Hybrid Grace Hash Join NullPointer When query RCFile
> 
>
> Key: HIVE-25671
> URL: https://issues.apache.org/jira/browse/HIVE-25671
> Project: Hive
>  Issue Type: Bug
>Affects Versions: 3.1.2
>Reporter: Nemon Lou
>Priority: Major
> Attachments: rcfile_kryo.patch
>
>
> Hive 3.1.0 kryo 3.0.3 tez engine
> the following sql can reproduce this issue
> {code:sql}
> CREATE TABLE `nemon.rt_dm_lpc_customer_sum_tmp3_3`( 
>`logo` string,   
>`customer_code` string,  
>`brand_name` string, 
>`business_code` string,  
>`discount` double,   
>`creation_date` string,  
>`etl_time` string)stored as rcfile; 
>  
> CREATE TABLE `nemon.rt_dm_lpc_customer_sum_tmp4_1`( 
>`customer_code` string,  
>`etl_time` string) stored as rcfile; 
>
> insert into nemon.rt_dm_lpc_customer_sum_tmp3_3 values 
> ("logo","customer_code","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code1","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code2","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code3","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code4","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code5","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code6","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code7","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code8","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code9","brand_name","business_code",1,"creation_date","etl_time");
> insert into  nemon.rt_dm_lpc_customer_sum_tmp4_1  values 
> ("customer_code","etl_time")
>,("customer_code1","etl_time")
>,("customer_code2","etl_time")
>,("customer_code3","etl_time")
>;
> set hive.auto.convert.join.noconditionaltask.size=10;
> set hive.mapjoin.hybridgrace.hashtable=true;
> SELECT
> tt1.logo,
> tt1.customer_code,
> tt1.brand_name,
> tt1.business_code,
> tt1.discount,
> tt1.creation_date,
> date_format(from_utc_timestamp(unix_timestamp()*1000,'Asia/Shanghai'),'-MM-dd
>  HH:mm:ss') etl_time
> from
> (
> SELECT
> t1.logo,
> t1.customer_code,
> t1.brand_name,
> t1.business_code,
> t1.discount,
> t1.creation_date,
> row_number() over(partition by t1.customer_code,t1.logo order by 
> t1.creation_date desc) as discount_rank
> from nemon.rt_dm_lpc_customer_sum_tmp3_3 t1
> join nemon.rt_dm_lpc_customer_sum_tmp4_1 t2
> on t2.customer_code = t1.customer_code
> ) tt1
> where tt1.discount_rank = 1;
> {code}
> Error log from tez task:
> {noformat}
> 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid 
> Grace Hash Join: Deserializing spilled hash partition...
> 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid 
> Grace Hash Join: Number of rows in hashmap: 1
> 2021-11-04 10:02:47,554 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid 
> Grace Hash Join: Going to process spilled big table rows in partition 5. 
> Number of rows: 1
> 2021-11-04 10:02:47,561 [ERROR] [TezChild] |exec.MapJoinOperator|: Unexpected 
> exception from MapJoinOperator : null
> java.lang.NullPointerException
>   at 
> org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase$FieldInfo.uncheckedGetField(ColumnarStructBase.java:114)
>   at 
> org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase.getField(ColumnarStructBase.java:172)
>   at 
> org.apache.hadoop.hive.serde2.objectinspector.ColumnarStructObjectInspector.getStructFieldData(ColumnarStructObjectInspector.java:67)
>   at 
> org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator._evaluate(ExprNodeColumnEvaluator.java:95)
>   at 
> org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
>   at 
> org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68)
>   at 
> org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer$GetAdaptor.setFromRow(MapJoinBytesTableContainer.java:552)
>   at 
> org.apache.hadoop.hive.ql.exec.MapJoinOperator.setMapJoinKey(MapJoinOperator.java:415)
>   at 
> 

[jira] [Commented] (HIVE-25671) Hybrid Grace Hash Join NullPointer When query RCFile

2021-11-04 Thread ZHANGSHUNYU (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-25671?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17439016#comment-17439016
 ] 

ZHANGSHUNYU commented on HIVE-25671:


Thanks for report this problem. kryo 3.0.3 has some problems while ser/desr 
objects, maybe you can try update the kryo version to have a try.

> Hybrid Grace Hash Join NullPointer When query RCFile
> 
>
> Key: HIVE-25671
> URL: https://issues.apache.org/jira/browse/HIVE-25671
> Project: Hive
>  Issue Type: Bug
>Affects Versions: 3.1.2
>Reporter: Nemon Lou
>Priority: Major
> Attachments: rcfile_kryo.patch
>
>
> Hive 3.1.0 kryo 3.0.3 tez engine
> the following sql can reproduce this issue
> {code:sql}
> CREATE TABLE `nemon.rt_dm_lpc_customer_sum_tmp3_3`( 
>`logo` string,   
>`customer_code` string,  
>`brand_name` string, 
>`business_code` string,  
>`discount` double,   
>`creation_date` string,  
>`etl_time` string)stored as rcfile; 
>  
> CREATE TABLE `nemon.rt_dm_lpc_customer_sum_tmp4_1`( 
>`customer_code` string,  
>`etl_time` string) stored as rcfile; 
>
> insert into nemon.rt_dm_lpc_customer_sum_tmp3_3 values 
> ("logo","customer_code","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code1","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code2","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code3","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code4","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code5","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code6","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code7","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code8","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code9","brand_name","business_code",1,"creation_date","etl_time");
> insert into  nemon.rt_dm_lpc_customer_sum_tmp4_1  values 
> ("customer_code","etl_time")
>,("customer_code1","etl_time")
>,("customer_code2","etl_time")
>,("customer_code3","etl_time")
>;
> set hive.auto.convert.join.noconditionaltask.size=10;
> set hive.mapjoin.hybridgrace.hashtable=true;
> SELECT
> tt1.logo,
> tt1.customer_code,
> tt1.brand_name,
> tt1.business_code,
> tt1.discount,
> tt1.creation_date,
> date_format(from_utc_timestamp(unix_timestamp()*1000,'Asia/Shanghai'),'-MM-dd
>  HH:mm:ss') etl_time
> from
> (
> SELECT
> t1.logo,
> t1.customer_code,
> t1.brand_name,
> t1.business_code,
> t1.discount,
> t1.creation_date,
> row_number() over(partition by t1.customer_code,t1.logo order by 
> t1.creation_date desc) as discount_rank
> from nemon.rt_dm_lpc_customer_sum_tmp3_3 t1
> join nemon.rt_dm_lpc_customer_sum_tmp4_1 t2
> on t2.customer_code = t1.customer_code
> ) tt1
> where tt1.discount_rank = 1;
> {code}
> Error log from tez task:
> {noformat}
> 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid 
> Grace Hash Join: Deserializing spilled hash partition...
> 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid 
> Grace Hash Join: Number of rows in hashmap: 1
> 2021-11-04 10:02:47,554 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid 
> Grace Hash Join: Going to process spilled big table rows in partition 5. 
> Number of rows: 1
> 2021-11-04 10:02:47,561 [ERROR] [TezChild] |exec.MapJoinOperator|: Unexpected 
> exception from MapJoinOperator : null
> java.lang.NullPointerException
>   at 
> org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase$FieldInfo.uncheckedGetField(ColumnarStructBase.java:114)
>   at 
> org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase.getField(ColumnarStructBase.java:172)
>   at 
> org.apache.hadoop.hive.serde2.objectinspector.ColumnarStructObjectInspector.getStructFieldData(ColumnarStructObjectInspector.java:67)
>   at 
> org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator._evaluate(ExprNodeColumnEvaluator.java:95)
>   at 
> org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
>   at 
> org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68)
>   at 
> org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer$GetAdaptor.setFromRow(MapJoinBytesTableContainer.java:552)
>   at 
> org.apache.hadoop.hive.ql.exec.MapJoinOperator.setMapJoinKey(MapJoinOperator.java:415)
>   at 
> 

[jira] [Commented] (HIVE-25671) Hybrid Grace Hash Join NullPointer When query RCFile

2021-11-04 Thread Nemon Lou (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-25671?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17439012#comment-17439012
 ] 

Nemon Lou commented on HIVE-25671:
--

This bug seems related to kryo:
Null pointer occurs when JVM trying to invoke getLength method .
Invoker is in ColumnarStructBase$FieldInfo.uncheckedGetField(), while the 
actual method implementation is ColumnarStruct.getLength(), which overwride 
ColumnarStructBase.getLength().
The ColumnarStruct object is created by kryo deserializer.

Adding a reference to ColumnarStructBase can fix this issue.Uploading a patch 
to demonstrate this fix.

 [^rcfile_kryo.patch] 

> Hybrid Grace Hash Join NullPointer When query RCFile
> 
>
> Key: HIVE-25671
> URL: https://issues.apache.org/jira/browse/HIVE-25671
> Project: Hive
>  Issue Type: Bug
>Affects Versions: 3.1.2
>Reporter: Nemon Lou
>Priority: Major
> Attachments: rcfile_kryo.patch
>
>
> Hive 3.1.0 kryo 3.0.3 tez engine
> the following sql can reproduce this issue
> {code:sql}
> CREATE TABLE `nemon.rt_dm_lpc_customer_sum_tmp3_3`( 
>`logo` string,   
>`customer_code` string,  
>`brand_name` string, 
>`business_code` string,  
>`discount` double,   
>`creation_date` string,  
>`etl_time` string)stored as rcfile; 
>  
> CREATE TABLE `nemon.rt_dm_lpc_customer_sum_tmp4_1`( 
>`customer_code` string,  
>`etl_time` string) stored as rcfile; 
>
> insert into nemon.rt_dm_lpc_customer_sum_tmp3_3 values 
> ("logo","customer_code","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code1","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code2","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code3","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code4","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code5","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code6","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code7","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code8","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code9","brand_name","business_code",1,"creation_date","etl_time");
> insert into  nemon.rt_dm_lpc_customer_sum_tmp4_1  values 
> ("customer_code","etl_time")
>,("customer_code1","etl_time")
>,("customer_code2","etl_time")
>,("customer_code3","etl_time")
>;
> set hive.auto.convert.join.noconditionaltask.size=10;
> set hive.mapjoin.hybridgrace.hashtable=true;
> SELECT
> tt1.logo,
> tt1.customer_code,
> tt1.brand_name,
> tt1.business_code,
> tt1.discount,
> tt1.creation_date,
> date_format(from_utc_timestamp(unix_timestamp()*1000,'Asia/Shanghai'),'-MM-dd
>  HH:mm:ss') etl_time
> from
> (
> SELECT
> t1.logo,
> t1.customer_code,
> t1.brand_name,
> t1.business_code,
> t1.discount,
> t1.creation_date,
> row_number() over(partition by t1.customer_code,t1.logo order by 
> t1.creation_date desc) as discount_rank
> from nemon.rt_dm_lpc_customer_sum_tmp3_3 t1
> join nemon.rt_dm_lpc_customer_sum_tmp4_1 t2
> on t2.customer_code = t1.customer_code
> ) tt1
> where tt1.discount_rank = 1;
> {code}
> Error log from tez task:
> {noformat}
> 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid 
> Grace Hash Join: Deserializing spilled hash partition...
> 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid 
> Grace Hash Join: Number of rows in hashmap: 1
> 2021-11-04 10:02:47,554 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid 
> Grace Hash Join: Going to process spilled big table rows in partition 5. 
> Number of rows: 1
> 2021-11-04 10:02:47,561 [ERROR] [TezChild] |exec.MapJoinOperator|: Unexpected 
> exception from MapJoinOperator : null
> java.lang.NullPointerException
>   at 
> org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase$FieldInfo.uncheckedGetField(ColumnarStructBase.java:114)
>   at 
> org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase.getField(ColumnarStructBase.java:172)
>   at 
> org.apache.hadoop.hive.serde2.objectinspector.ColumnarStructObjectInspector.getStructFieldData(ColumnarStructObjectInspector.java:67)
>   at 
> org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator._evaluate(ExprNodeColumnEvaluator.java:95)
>   at 
> org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
>   at 
> 

[jira] [Updated] (HIVE-25671) Hybrid Grace Hash Join NullPointer When query RCFile

2021-11-04 Thread Nemon Lou (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25671?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Nemon Lou updated HIVE-25671:
-
Attachment: rcfile_kryo.patch

> Hybrid Grace Hash Join NullPointer When query RCFile
> 
>
> Key: HIVE-25671
> URL: https://issues.apache.org/jira/browse/HIVE-25671
> Project: Hive
>  Issue Type: Bug
>Affects Versions: 3.1.2
>Reporter: Nemon Lou
>Priority: Major
> Attachments: rcfile_kryo.patch
>
>
> Hive 3.1.0 kryo 3.0.3 tez engine
> the following sql can reproduce this issue
> {code:sql}
> CREATE TABLE `nemon.rt_dm_lpc_customer_sum_tmp3_3`( 
>`logo` string,   
>`customer_code` string,  
>`brand_name` string, 
>`business_code` string,  
>`discount` double,   
>`creation_date` string,  
>`etl_time` string)stored as rcfile; 
>  
> CREATE TABLE `nemon.rt_dm_lpc_customer_sum_tmp4_1`( 
>`customer_code` string,  
>`etl_time` string) stored as rcfile; 
>
> insert into nemon.rt_dm_lpc_customer_sum_tmp3_3 values 
> ("logo","customer_code","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code1","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code2","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code3","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code4","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code5","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code6","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code7","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code8","brand_name","business_code",1,"creation_date","etl_time")
>
> ,("logo","customer_code9","brand_name","business_code",1,"creation_date","etl_time");
> insert into  nemon.rt_dm_lpc_customer_sum_tmp4_1  values 
> ("customer_code","etl_time")
>,("customer_code1","etl_time")
>,("customer_code2","etl_time")
>,("customer_code3","etl_time")
>;
> set hive.auto.convert.join.noconditionaltask.size=10;
> set hive.mapjoin.hybridgrace.hashtable=true;
> SELECT
> tt1.logo,
> tt1.customer_code,
> tt1.brand_name,
> tt1.business_code,
> tt1.discount,
> tt1.creation_date,
> date_format(from_utc_timestamp(unix_timestamp()*1000,'Asia/Shanghai'),'-MM-dd
>  HH:mm:ss') etl_time
> from
> (
> SELECT
> t1.logo,
> t1.customer_code,
> t1.brand_name,
> t1.business_code,
> t1.discount,
> t1.creation_date,
> row_number() over(partition by t1.customer_code,t1.logo order by 
> t1.creation_date desc) as discount_rank
> from nemon.rt_dm_lpc_customer_sum_tmp3_3 t1
> join nemon.rt_dm_lpc_customer_sum_tmp4_1 t2
> on t2.customer_code = t1.customer_code
> ) tt1
> where tt1.discount_rank = 1;
> {code}
> Error log from tez task:
> {noformat}
> 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid 
> Grace Hash Join: Deserializing spilled hash partition...
> 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid 
> Grace Hash Join: Number of rows in hashmap: 1
> 2021-11-04 10:02:47,554 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid 
> Grace Hash Join: Going to process spilled big table rows in partition 5. 
> Number of rows: 1
> 2021-11-04 10:02:47,561 [ERROR] [TezChild] |exec.MapJoinOperator|: Unexpected 
> exception from MapJoinOperator : null
> java.lang.NullPointerException
>   at 
> org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase$FieldInfo.uncheckedGetField(ColumnarStructBase.java:114)
>   at 
> org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase.getField(ColumnarStructBase.java:172)
>   at 
> org.apache.hadoop.hive.serde2.objectinspector.ColumnarStructObjectInspector.getStructFieldData(ColumnarStructObjectInspector.java:67)
>   at 
> org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator._evaluate(ExprNodeColumnEvaluator.java:95)
>   at 
> org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
>   at 
> org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68)
>   at 
> org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer$GetAdaptor.setFromRow(MapJoinBytesTableContainer.java:552)
>   at 
> org.apache.hadoop.hive.ql.exec.MapJoinOperator.setMapJoinKey(MapJoinOperator.java:415)
>   at 
> org.apache.hadoop.hive.ql.exec.MapJoinOperator.process(MapJoinOperator.java:466)
>   at 
> 

[jira] [Updated] (HIVE-25671) Hybrid Grace Hash Join NullPointer When query RCFile

2021-11-04 Thread Nemon Lou (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25671?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Nemon Lou updated HIVE-25671:
-
Description: 
Hive 3.1.0 kryo 3.0.3 tez engine
the following sql can reproduce this issue
{code:sql}
CREATE TABLE `nemon.rt_dm_lpc_customer_sum_tmp3_3`( 
   `logo` string,   
   `customer_code` string,  
   `brand_name` string, 
   `business_code` string,  
   `discount` double,   
   `creation_date` string,  
   `etl_time` string)stored as rcfile; 
 
CREATE TABLE `nemon.rt_dm_lpc_customer_sum_tmp4_1`( 
   `customer_code` string,  
   `etl_time` string) stored as rcfile; 
   
insert into nemon.rt_dm_lpc_customer_sum_tmp3_3 values 
("logo","customer_code","brand_name","business_code",1,"creation_date","etl_time")
   
,("logo","customer_code1","brand_name","business_code",1,"creation_date","etl_time")
   
,("logo","customer_code2","brand_name","business_code",1,"creation_date","etl_time")
   
,("logo","customer_code3","brand_name","business_code",1,"creation_date","etl_time")
   
,("logo","customer_code4","brand_name","business_code",1,"creation_date","etl_time")
   
,("logo","customer_code5","brand_name","business_code",1,"creation_date","etl_time")
   
,("logo","customer_code6","brand_name","business_code",1,"creation_date","etl_time")
   
,("logo","customer_code7","brand_name","business_code",1,"creation_date","etl_time")
   
,("logo","customer_code8","brand_name","business_code",1,"creation_date","etl_time")
   
,("logo","customer_code9","brand_name","business_code",1,"creation_date","etl_time");
insert into  nemon.rt_dm_lpc_customer_sum_tmp4_1  values 
("customer_code","etl_time")
   ,("customer_code1","etl_time")
   ,("customer_code2","etl_time")
   ,("customer_code3","etl_time")
   ;
set hive.auto.convert.join.noconditionaltask.size=10;
set hive.mapjoin.hybridgrace.hashtable=true;
SELECT
tt1.logo,
tt1.customer_code,
tt1.brand_name,
tt1.business_code,
tt1.discount,
tt1.creation_date,
date_format(from_utc_timestamp(unix_timestamp()*1000,'Asia/Shanghai'),'-MM-dd
 HH:mm:ss') etl_time
from
(
SELECT
t1.logo,
t1.customer_code,
t1.brand_name,
t1.business_code,
t1.discount,
t1.creation_date,
row_number() over(partition by t1.customer_code,t1.logo order by 
t1.creation_date desc) as discount_rank
from nemon.rt_dm_lpc_customer_sum_tmp3_3 t1
join nemon.rt_dm_lpc_customer_sum_tmp4_1 t2
on t2.customer_code = t1.customer_code
) tt1
where tt1.discount_rank = 1;
{code}

Error log from tez task:
{noformat}
2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace 
Hash Join: Deserializing spilled hash partition...
2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace 
Hash Join: Number of rows in hashmap: 1
2021-11-04 10:02:47,554 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace 
Hash Join: Going to process spilled big table rows in partition 5. Number of 
rows: 1
2021-11-04 10:02:47,561 [ERROR] [TezChild] |exec.MapJoinOperator|: Unexpected 
exception from MapJoinOperator : null
java.lang.NullPointerException
at 
org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase$FieldInfo.uncheckedGetField(ColumnarStructBase.java:114)
at 
org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase.getField(ColumnarStructBase.java:172)
at 
org.apache.hadoop.hive.serde2.objectinspector.ColumnarStructObjectInspector.getStructFieldData(ColumnarStructObjectInspector.java:67)
at 
org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator._evaluate(ExprNodeColumnEvaluator.java:95)
at 
org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
at 
org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68)
at 
org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer$GetAdaptor.setFromRow(MapJoinBytesTableContainer.java:552)
at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.setMapJoinKey(MapJoinOperator.java:415)
at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.process(MapJoinOperator.java:466)
at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.reProcessBigTable(MapJoinOperator.java:755)
at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.continueProcess(MapJoinOperator.java:671)
at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.closeOp(MapJoinOperator.java:604)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:733)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:757)
at 
org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.close(MapRecordProcessor.java:477)
at 
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:284)
at 

[jira] [Updated] (HIVE-25671) Hybrid Grace Hash Join NullPointer When query RCFile

2021-11-04 Thread Nemon Lou (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25671?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Nemon Lou updated HIVE-25671:
-
Description: 
{noformat}
2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace 
Hash Join: Deserializing spilled hash partition...
2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace 
Hash Join: Number of rows in hashmap: 1
2021-11-04 10:02:47,554 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace 
Hash Join: Going to process spilled big table rows in partition 5. Number of 
rows: 1
2021-11-04 10:02:47,561 [ERROR] [TezChild] |exec.MapJoinOperator|: Unexpected 
exception from MapJoinOperator : null
java.lang.NullPointerException
at 
org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase$FieldInfo.uncheckedGetField(ColumnarStructBase.java:114)
at 
org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase.getField(ColumnarStructBase.java:172)
at 
org.apache.hadoop.hive.serde2.objectinspector.ColumnarStructObjectInspector.getStructFieldData(ColumnarStructObjectInspector.java:67)
at 
org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator._evaluate(ExprNodeColumnEvaluator.java:95)
at 
org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
at 
org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68)
at 
org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer$GetAdaptor.setFromRow(MapJoinBytesTableContainer.java:552)
at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.setMapJoinKey(MapJoinOperator.java:415)
at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.process(MapJoinOperator.java:466)
at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.reProcessBigTable(MapJoinOperator.java:755)
at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.continueProcess(MapJoinOperator.java:671)
at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.closeOp(MapJoinOperator.java:604)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:733)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:757)
at 
org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.close(MapRecordProcessor.java:477)
at 
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:284)
at 
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:250)
at 
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374)
at 
org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73)
at 
org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1729)
at 
org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61)
at 
org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37)
at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
at 
com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108)
at 
com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41)
at 
com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77)
at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
{noformat}

  was:
{format}
2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace 
Hash Join: Deserializing spilled hash partition...
2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace 
Hash Join: Number of rows in hashmap: 1
2021-11-04 10:02:47,554 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace 
Hash Join: Going to process spilled big table rows in partition 5. Number of 
rows: 1
2021-11-04 10:02:47,561 [ERROR] [TezChild] |exec.MapJoinOperator|: Unexpected 
exception from MapJoinOperator : null
java.lang.NullPointerException
at 
org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase$FieldInfo.uncheckedGetField(ColumnarStructBase.java:114)
at 
org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase.getField(ColumnarStructBase.java:172)
at 
org.apache.hadoop.hive.serde2.objectinspector.ColumnarStructObjectInspector.getStructFieldData(ColumnarStructObjectInspector.java:67)
at 

[jira] [Commented] (HIVE-25671) Hybrid Grace Hash Join NullPointer When query RCFile

2021-11-04 Thread David Cai (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-25671?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17438985#comment-17438985
 ] 

David Cai commented on HIVE-25671:
--

if you can describe the reproduce steps, it will help to locate this issue.

> Hybrid Grace Hash Join NullPointer When query RCFile
> 
>
> Key: HIVE-25671
> URL: https://issues.apache.org/jira/browse/HIVE-25671
> Project: Hive
>  Issue Type: Bug
>Affects Versions: 3.1.2
>Reporter: Nemon Lou
>Priority: Major
>
> {format}
> 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid 
> Grace Hash Join: Deserializing spilled hash partition...
> 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid 
> Grace Hash Join: Number of rows in hashmap: 1
> 2021-11-04 10:02:47,554 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid 
> Grace Hash Join: Going to process spilled big table rows in partition 5. 
> Number of rows: 1
> 2021-11-04 10:02:47,561 [ERROR] [TezChild] |exec.MapJoinOperator|: Unexpected 
> exception from MapJoinOperator : null
> java.lang.NullPointerException
>   at 
> org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase$FieldInfo.uncheckedGetField(ColumnarStructBase.java:114)
>   at 
> org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase.getField(ColumnarStructBase.java:172)
>   at 
> org.apache.hadoop.hive.serde2.objectinspector.ColumnarStructObjectInspector.getStructFieldData(ColumnarStructObjectInspector.java:67)
>   at 
> org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator._evaluate(ExprNodeColumnEvaluator.java:95)
>   at 
> org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
>   at 
> org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68)
>   at 
> org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer$GetAdaptor.setFromRow(MapJoinBytesTableContainer.java:552)
>   at 
> org.apache.hadoop.hive.ql.exec.MapJoinOperator.setMapJoinKey(MapJoinOperator.java:415)
>   at 
> org.apache.hadoop.hive.ql.exec.MapJoinOperator.process(MapJoinOperator.java:466)
>   at 
> org.apache.hadoop.hive.ql.exec.MapJoinOperator.reProcessBigTable(MapJoinOperator.java:755)
>   at 
> org.apache.hadoop.hive.ql.exec.MapJoinOperator.continueProcess(MapJoinOperator.java:671)
>   at 
> org.apache.hadoop.hive.ql.exec.MapJoinOperator.closeOp(MapJoinOperator.java:604)
>   at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:733)
>   at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:757)
>   at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.close(MapRecordProcessor.java:477)
>   at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:284)
>   at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:250)
>   at 
> org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374)
>   at 
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73)
>   at 
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61)
>   at java.security.AccessController.doPrivileged(Native Method)
>   at javax.security.auth.Subject.doAs(Subject.java:422)
>   at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1729)
>   at 
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61)
>   at 
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37)
>   at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
>   at 
> com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108)
>   at 
> com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41)
>   at 
> com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77)
>   at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>   at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>   at java.lang.Thread.run(Thread.java:748)
> {format}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Assigned] (HIVE-25674) Add getAllTableConstraints to HS2 cache

2021-11-04 Thread Yu-Wen Lai (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25674?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Yu-Wen Lai reassigned HIVE-25674:
-


> Add getAllTableConstraints to HS2 cache
> ---
>
> Key: HIVE-25674
> URL: https://issues.apache.org/jira/browse/HIVE-25674
> Project: Hive
>  Issue Type: Improvement
>  Components: HiveServer2
>Reporter: Yu-Wen Lai
>Assignee: Yu-Wen Lai
>Priority: Major
>
> In HIVE-22782, a new API getAllTableConstraints is introduced to retrieve all 
> of the constraints in one call. We could add local cache to HS2 to avoid 
> duplicate calls to HMS.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-25673) Column pruning fix for MR tasks

2021-11-04 Thread ASF GitHub Bot (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25673?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

ASF GitHub Bot updated HIVE-25673:
--
Labels: pull-request-available  (was: )

> Column pruning fix for MR tasks
> ---
>
> Key: HIVE-25673
> URL: https://issues.apache.org/jira/browse/HIVE-25673
> Project: Hive
>  Issue Type: Bug
>Reporter: Peter Vary
>Assignee: Peter Vary
>Priority: Major
>  Labels: pull-request-available
>  Time Spent: 10m
>  Remaining Estimate: 0h
>
> When running join tests for Iceberg tables then we got the following 
> exception:
> {code}
> Caused by: java.lang.RuntimeException: Map operator initialization failed
>   at 
> org.apache.hadoop.hive.ql.exec.mr.ExecMapper.configure(ExecMapper.java:131)
>   ... 23 more
> Caused by: java.lang.RuntimeException: cannot find field val from 
> [org.apache.iceberg.mr.hive.serde.objectinspector.IcebergRecordObjectInspector$IcebergRecordStructField@45f29d]
>   at 
> org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.getStandardStructFieldRef(ObjectInspectorUtils.java:550)
>   at 
> org.apache.iceberg.mr.hive.serde.objectinspector.IcebergRecordObjectInspector.getStructFieldRef(IcebergRecordObjectInspector.java:70)
>   at 
> org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator.initialize(ExprNodeColumnEvaluator.java:56)
>   at 
> org.apache.hadoop.hive.ql.exec.Operator.initEvaluators(Operator.java:1073)
>   at 
> org.apache.hadoop.hive.ql.exec.Operator.initEvaluatorsAndReturnStruct(Operator.java:1099)
>   at 
> org.apache.hadoop.hive.ql.exec.SelectOperator.initializeOp(SelectOperator.java:74)
>   at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:360)
>   at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:549)
>   at 
> org.apache.hadoop.hive.ql.exec.Operator.initializeChildren(Operator.java:503)
>   at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:369)
>   at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:549)
>   at 
> org.apache.hadoop.hive.ql.exec.Operator.initializeChildren(Operator.java:503)
>   at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:369)
>   at 
> org.apache.hadoop.hive.ql.exec.MapOperator.initializeMapOperator(MapOperator.java:505)
>   at 
> org.apache.hadoop.hive.ql.exec.mr.ExecMapper.configure(ExecMapper.java:110)
>   ... 23 more
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Work logged] (HIVE-25673) Column pruning fix for MR tasks

2021-11-04 Thread ASF GitHub Bot (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25673?focusedWorklogId=676719=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-676719
 ]

ASF GitHub Bot logged work on HIVE-25673:
-

Author: ASF GitHub Bot
Created on: 04/Nov/21 20:43
Start Date: 04/Nov/21 20:43
Worklog Time Spent: 10m 
  Work Description: pvary opened a new pull request #2765:
URL: https://github.com/apache/hive/pull/2765


   ### What changes were proposed in this pull request?
   When updating column pruning information `READ_NESTED_COLUMN_PATH_CONF_STR`, 
update `READ_COLUMN_NAMES_CONF_STR` and `READ_COLUMN_IDS_CONF_STR` as well. 
   
   ### Why are the changes needed?
   Iceberg MR queries are failing if multiple tables are queried and several 
columns are pruned
   
   ### Does this PR introduce _any_ user-facing change?
   No
   
   ### How was this patch tested?
   Added unit test


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 676719)
Remaining Estimate: 0h
Time Spent: 10m

> Column pruning fix for MR tasks
> ---
>
> Key: HIVE-25673
> URL: https://issues.apache.org/jira/browse/HIVE-25673
> Project: Hive
>  Issue Type: Bug
>Reporter: Peter Vary
>Assignee: Peter Vary
>Priority: Major
>  Time Spent: 10m
>  Remaining Estimate: 0h
>
> When running join tests for Iceberg tables then we got the following 
> exception:
> {code}
> Caused by: java.lang.RuntimeException: Map operator initialization failed
>   at 
> org.apache.hadoop.hive.ql.exec.mr.ExecMapper.configure(ExecMapper.java:131)
>   ... 23 more
> Caused by: java.lang.RuntimeException: cannot find field val from 
> [org.apache.iceberg.mr.hive.serde.objectinspector.IcebergRecordObjectInspector$IcebergRecordStructField@45f29d]
>   at 
> org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.getStandardStructFieldRef(ObjectInspectorUtils.java:550)
>   at 
> org.apache.iceberg.mr.hive.serde.objectinspector.IcebergRecordObjectInspector.getStructFieldRef(IcebergRecordObjectInspector.java:70)
>   at 
> org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator.initialize(ExprNodeColumnEvaluator.java:56)
>   at 
> org.apache.hadoop.hive.ql.exec.Operator.initEvaluators(Operator.java:1073)
>   at 
> org.apache.hadoop.hive.ql.exec.Operator.initEvaluatorsAndReturnStruct(Operator.java:1099)
>   at 
> org.apache.hadoop.hive.ql.exec.SelectOperator.initializeOp(SelectOperator.java:74)
>   at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:360)
>   at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:549)
>   at 
> org.apache.hadoop.hive.ql.exec.Operator.initializeChildren(Operator.java:503)
>   at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:369)
>   at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:549)
>   at 
> org.apache.hadoop.hive.ql.exec.Operator.initializeChildren(Operator.java:503)
>   at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:369)
>   at 
> org.apache.hadoop.hive.ql.exec.MapOperator.initializeMapOperator(MapOperator.java:505)
>   at 
> org.apache.hadoop.hive.ql.exec.mr.ExecMapper.configure(ExecMapper.java:110)
>   ... 23 more
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Assigned] (HIVE-25673) Column pruning fix for MR tasks

2021-11-04 Thread Peter Vary (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25673?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Peter Vary reassigned HIVE-25673:
-


> Column pruning fix for MR tasks
> ---
>
> Key: HIVE-25673
> URL: https://issues.apache.org/jira/browse/HIVE-25673
> Project: Hive
>  Issue Type: Bug
>Reporter: Peter Vary
>Assignee: Peter Vary
>Priority: Major
>
> When running join tests for Iceberg tables then we got the following 
> exception:
> {code}
> Caused by: java.lang.RuntimeException: Map operator initialization failed
>   at 
> org.apache.hadoop.hive.ql.exec.mr.ExecMapper.configure(ExecMapper.java:131)
>   ... 23 more
> Caused by: java.lang.RuntimeException: cannot find field val from 
> [org.apache.iceberg.mr.hive.serde.objectinspector.IcebergRecordObjectInspector$IcebergRecordStructField@45f29d]
>   at 
> org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.getStandardStructFieldRef(ObjectInspectorUtils.java:550)
>   at 
> org.apache.iceberg.mr.hive.serde.objectinspector.IcebergRecordObjectInspector.getStructFieldRef(IcebergRecordObjectInspector.java:70)
>   at 
> org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator.initialize(ExprNodeColumnEvaluator.java:56)
>   at 
> org.apache.hadoop.hive.ql.exec.Operator.initEvaluators(Operator.java:1073)
>   at 
> org.apache.hadoop.hive.ql.exec.Operator.initEvaluatorsAndReturnStruct(Operator.java:1099)
>   at 
> org.apache.hadoop.hive.ql.exec.SelectOperator.initializeOp(SelectOperator.java:74)
>   at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:360)
>   at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:549)
>   at 
> org.apache.hadoop.hive.ql.exec.Operator.initializeChildren(Operator.java:503)
>   at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:369)
>   at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:549)
>   at 
> org.apache.hadoop.hive.ql.exec.Operator.initializeChildren(Operator.java:503)
>   at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:369)
>   at 
> org.apache.hadoop.hive.ql.exec.MapOperator.initializeMapOperator(MapOperator.java:505)
>   at 
> org.apache.hadoop.hive.ql.exec.mr.ExecMapper.configure(ExecMapper.java:110)
>   ... 23 more
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-25672) Hive isn't purging older compaction entries from show compaction command

2021-11-04 Thread Jalender Reddy (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25672?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Jalender Reddy updated HIVE-25672:
--
Description: 
Added below properties in hive-site, but it's not enforced to auto purging.

When we run show compaction command it takes forever and returns billions of 
rows.

Result of show compactions command :
{code:java}
752,450 rows selected (198.066 seconds) 

{code}
{code:java}
hive.compactor.history.retention.succeeded": "10",
"hive.compactor.history.retention.failed": "10",  
"hive.compactor.history.retention.attempted": "10",  
"hive.compactor.history.reaper.interval": "10m" {code}

  was:
Added below properties in hive-site, but it's not enforced to auto purging.

When we run show compaction command it takes forever and returns billions of 
rows.

Result of show compaction command :
{code:java}
752,450 rows selected (198.066 seconds) 

{code}
{code:java}

hive.compactor.history.retention.succeeded": "10",
"hive.compactor.history.retention.failed": "10",  
"hive.compactor.history.retention.attempted": "10",  
"hive.compactor.history.reaper.interval": "10m" {code}


> Hive isn't purging older compaction entries from show compaction command
> 
>
> Key: HIVE-25672
> URL: https://issues.apache.org/jira/browse/HIVE-25672
> Project: Hive
>  Issue Type: Bug
>  Components: Hive, Metastore, Transactions
>Affects Versions: 3.1.1
>Reporter: Rohan Nimmagadda
>Priority: Minor
>
> Added below properties in hive-site, but it's not enforced to auto purging.
> When we run show compaction command it takes forever and returns billions of 
> rows.
> Result of show compactions command :
> {code:java}
> 752,450 rows selected (198.066 seconds) 
> {code}
> {code:java}
> hive.compactor.history.retention.succeeded": "10",
> "hive.compactor.history.retention.failed": "10",  
> "hive.compactor.history.retention.attempted": "10",  
> "hive.compactor.history.reaper.interval": "10m" {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-25672) Hive isn't purging older compaction entries from show compaction command

2021-11-04 Thread Rohan Nimmagadda (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25672?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Rohan Nimmagadda updated HIVE-25672:

Description: 
Added below properties in hive-site, but it's not enforced to auto purging.

When we run show compaction command it takes forever and returns billions of 
rows.

Result of show compaction command :
{code:java}
752,450 rows selected (198.066 seconds) 

{code}
{code:java}

hive.compactor.history.retention.succeeded": "10",
"hive.compactor.history.retention.failed": "10",  
"hive.compactor.history.retention.attempted": "10",  
"hive.compactor.history.reaper.interval": "10m" {code}

  was:
Added below properties in hive-site , but its not enforcing to auto purging.

When we run show compaction command it takes forever and returns billions of 
rows.

Result of show compaction command :
{code:java}
752,450 rows selected (198.066 seconds) {code}
{code:java}
hive.compactor.history.retention.succeeded": "10",
"hive.compactor.history.retention.failed": "10",  
"hive.compactor.history.retention.attempted": "10",  
"hive.compactor.history.reaper.interval": "10m" {code}


> Hive isn't purging older compaction entries from show compaction command
> 
>
> Key: HIVE-25672
> URL: https://issues.apache.org/jira/browse/HIVE-25672
> Project: Hive
>  Issue Type: Bug
>  Components: Hive, Metastore, Transactions
>Affects Versions: 3.1.1
>Reporter: Rohan Nimmagadda
>Priority: Minor
>
> Added below properties in hive-site, but it's not enforced to auto purging.
> When we run show compaction command it takes forever and returns billions of 
> rows.
> Result of show compaction command :
> {code:java}
> 752,450 rows selected (198.066 seconds) 
> {code}
> {code:java}
> hive.compactor.history.retention.succeeded": "10",
> "hive.compactor.history.retention.failed": "10",  
> "hive.compactor.history.retention.attempted": "10",  
> "hive.compactor.history.reaper.interval": "10m" {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-25672) Hive isn't purging older compaction entries from show compaction command

2021-11-04 Thread Rohan Nimmagadda (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25672?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Rohan Nimmagadda updated HIVE-25672:

Description: 
Added below properties in hive-site , but its not enforcing to auto purging.

When we run show compaction command it takes forever and returns billions of 
rows.

Result of show compaction command :
{code:java}
752,450 rows selected (198.066 seconds) {code}
{code:java}
hive.compactor.history.retention.succeeded": "10",
"hive.compactor.history.retention.failed": "10",  
"hive.compactor.history.retention.attempted": "10",  
"hive.compactor.history.reaper.interval": "10m" {code}

  was:
Added below properties in hive-site , but its not enforcing to auto purging.

When we run show compaction command it takes forever and returns billions of 
rows.
{code:java}
hive.compactor.history.retention.succeeded": "10",
"hive.compactor.history.retention.failed": "10",  
"hive.compactor.history.retention.attempted": "10",  
"hive.compactor.history.reaper.interval": "10m" {code}


> Hive isn't purging older compaction entries from show compaction command
> 
>
> Key: HIVE-25672
> URL: https://issues.apache.org/jira/browse/HIVE-25672
> Project: Hive
>  Issue Type: Bug
>  Components: Hive, Metastore, Transactions
>Affects Versions: 3.1.1
>Reporter: Rohan Nimmagadda
>Priority: Minor
>
> Added below properties in hive-site , but its not enforcing to auto purging.
> When we run show compaction command it takes forever and returns billions of 
> rows.
> Result of show compaction command :
> {code:java}
> 752,450 rows selected (198.066 seconds) {code}
> {code:java}
> hive.compactor.history.retention.succeeded": "10",
> "hive.compactor.history.retention.failed": "10",  
> "hive.compactor.history.retention.attempted": "10",  
> "hive.compactor.history.reaper.interval": "10m" {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-25671) Hybrid Grace Hash Join NullPointer When query RCFile

2021-11-04 Thread FrankieLee (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25671?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

FrankieLee updated HIVE-25671:
--
Description: 
{format}
2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace 
Hash Join: Deserializing spilled hash partition...
2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace 
Hash Join: Number of rows in hashmap: 1
2021-11-04 10:02:47,554 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace 
Hash Join: Going to process spilled big table rows in partition 5. Number of 
rows: 1
2021-11-04 10:02:47,561 [ERROR] [TezChild] |exec.MapJoinOperator|: Unexpected 
exception from MapJoinOperator : null
java.lang.NullPointerException
at 
org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase$FieldInfo.uncheckedGetField(ColumnarStructBase.java:114)
at 
org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase.getField(ColumnarStructBase.java:172)
at 
org.apache.hadoop.hive.serde2.objectinspector.ColumnarStructObjectInspector.getStructFieldData(ColumnarStructObjectInspector.java:67)
at 
org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator._evaluate(ExprNodeColumnEvaluator.java:95)
at 
org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
at 
org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68)
at 
org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer$GetAdaptor.setFromRow(MapJoinBytesTableContainer.java:552)
at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.setMapJoinKey(MapJoinOperator.java:415)
at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.process(MapJoinOperator.java:466)
at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.reProcessBigTable(MapJoinOperator.java:755)
at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.continueProcess(MapJoinOperator.java:671)
at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.closeOp(MapJoinOperator.java:604)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:733)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:757)
at 
org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.close(MapRecordProcessor.java:477)
at 
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:284)
at 
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:250)
at 
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374)
at 
org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73)
at 
org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1729)
at 
org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61)
at 
org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37)
at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
at 
com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108)
at 
com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41)
at 
com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77)
at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
{format}

  was:

{format}
2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace 
Hash Join: Deserializing spilled hash partition...
2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace 
Hash Join: Number of rows in hashmap: 1
2021-11-04 10:02:47,554 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace 
Hash Join: Going to process spilled big table rows in partition 5. Number of 
rows: 1
2021-11-04 10:02:47,561 [ERROR] [TezChild] |exec.MapJoinOperator|: Unexpected 
exception from MapJoinOperator : null
java.lang.NullPointerException
at 
org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase$FieldInfo.uncheckedGetField(ColumnarStructBase.java:114)
at 
org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase.getField(ColumnarStructBase.java:172)
at 
org.apache.hadoop.hive.serde2.objectinspector.ColumnarStructObjectInspector.getStructFieldData(ColumnarStructObjectInspector.java:67)
at 

[jira] [Resolved] (HIVE-25060) Hive Compactor doesn´t launch cleaner

2021-11-04 Thread Karen Coppage (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25060?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Karen Coppage resolved HIVE-25060.
--
  Assignee: Fran Gonzalez
Resolution: Abandoned

Due to lack of information we cannot proceed. As it's been more than 6 months I 
will mark this issue abandoned.

> Hive Compactor doesn´t launch cleaner
> -
>
> Key: HIVE-25060
> URL: https://issues.apache.org/jira/browse/HIVE-25060
> Project: Hive
>  Issue Type: Bug
>  Components: Hive
>Affects Versions: 3.1.1
> Environment: Hive 3.1.0
> Hadoop 3.1.1
>Reporter: Fran Gonzalez
>Assignee: Fran Gonzalez
>Priority: Major
>
> Hello,
> there are problems with Hive Compactor. We can see in hivemetastore.log this 
> message "Max block location exceeded for split" and it´s appearing more and 
> more times.
> After that, the "compactor.Cleaner" is not launched.
> We observed that after a Hive Metastore restart, the "compactor.Cleaner" has 
> not been launched nevermore, but logs doesn´t display any message about it.
> Could be a degradation of the Hive Compactor when delta files are growing in 
> the partitions?
> Regards.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Work logged] (HIVE-25596) Compress Hive Replication Metrics while storing

2021-11-04 Thread ASF GitHub Bot (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25596?focusedWorklogId=676240=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-676240
 ]

ASF GitHub Bot logged work on HIVE-25596:
-

Author: ASF GitHub Bot
Created on: 04/Nov/21 07:27
Start Date: 04/Nov/21 07:27
Worklog Time Spent: 10m 
  Work Description: hmangla98 commented on a change in pull request #2724:
URL: https://github.com/apache/hive/pull/2724#discussion_r742588986



##
File path: 
ql/src/java/org/apache/hadoop/hive/ql/parse/repl/metric/event/Stage.java
##
@@ -129,12 +127,7 @@ public String getReplStats() {
   }
 
   public void setReplStats(String replStats) {
-// Check the stat string doesn't surpass the RM_PROGRESS column length.
-if (replStats.length() >= RM_PROGRESS_LENGTH - 2000) {

Review comment:
   2k chars are consumed by variables other than replStats.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 676240)
Time Spent: 5.5h  (was: 5h 20m)

> Compress Hive Replication Metrics while storing
> ---
>
> Key: HIVE-25596
> URL: https://issues.apache.org/jira/browse/HIVE-25596
> Project: Hive
>  Issue Type: Improvement
>Reporter: Haymant Mangla
>Assignee: Haymant Mangla
>Priority: Major
>  Labels: pull-request-available
> Attachments: CompressedRM_Progress(k=10), CompressedRM_Progress(k=5), 
> PlainTextRM_Progress(k=10), PlainTextRM_Progress(k=5)
>
>  Time Spent: 5.5h
>  Remaining Estimate: 0h
>
> Compress the json fields of sys.replication_metrics table to optimise RDBMS 
> space usage.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Work logged] (HIVE-25596) Compress Hive Replication Metrics while storing

2021-11-04 Thread ASF GitHub Bot (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25596?focusedWorklogId=676239=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-676239
 ]

ASF GitHub Bot logged work on HIVE-25596:
-

Author: ASF GitHub Bot
Created on: 04/Nov/21 07:26
Start Date: 04/Nov/21 07:26
Worklog Time Spent: 10m 
  Work Description: pkumarsinha commented on a change in pull request #2724:
URL: https://github.com/apache/hive/pull/2724#discussion_r742588517



##
File path: 
ql/src/java/org/apache/hadoop/hive/ql/parse/repl/metric/event/Stage.java
##
@@ -129,12 +127,7 @@ public String getReplStats() {
   }
 
   public void setReplStats(String replStats) {
-// Check the stat string doesn't surpass the RM_PROGRESS column length.
-if (replStats.length() >= RM_PROGRESS_LENGTH - 2000) {

Review comment:
   why was it 2k lesser than RM_PROGRESS_LENGTH 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 676239)
Time Spent: 5h 20m  (was: 5h 10m)

> Compress Hive Replication Metrics while storing
> ---
>
> Key: HIVE-25596
> URL: https://issues.apache.org/jira/browse/HIVE-25596
> Project: Hive
>  Issue Type: Improvement
>Reporter: Haymant Mangla
>Assignee: Haymant Mangla
>Priority: Major
>  Labels: pull-request-available
> Attachments: CompressedRM_Progress(k=10), CompressedRM_Progress(k=5), 
> PlainTextRM_Progress(k=10), PlainTextRM_Progress(k=5)
>
>  Time Spent: 5h 20m
>  Remaining Estimate: 0h
>
> Compress the json fields of sys.replication_metrics table to optimise RDBMS 
> space usage.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Work logged] (HIVE-25596) Compress Hive Replication Metrics while storing

2021-11-04 Thread ASF GitHub Bot (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25596?focusedWorklogId=676237=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-676237
 ]

ASF GitHub Bot logged work on HIVE-25596:
-

Author: ASF GitHub Bot
Created on: 04/Nov/21 07:25
Start Date: 04/Nov/21 07:25
Worklog Time Spent: 10m 
  Work Description: pkumarsinha commented on a change in pull request #2724:
URL: https://github.com/apache/hive/pull/2724#discussion_r742587970



##
File path: 
ql/src/java/org/apache/hadoop/hive/ql/parse/repl/metric/ReplicationMetricCollector.java
##
@@ -116,14 +136,16 @@ public void reportStageEnd(String stageName, Status 
status, long lastReplId,
 stage = new Stage(stageName, status, -1L);
   }
   stage.setStatus(status);
-  stage.setEndTime(System.currentTimeMillis());
+  stage.setEndTime(getCurrentTimeInMillis());
   stage.setReplSnapshotsCount(replSnapshotCount);
   if (replStatsTracker != null && !(replStatsTracker instanceof 
NoOpReplStatsTracker)) {
 String replStatString = replStatsTracker.toString();
 LOG.info("Replication Statistics are: {}", replStatString);
 stage.setReplStats(replStatString);
   }
   progress.addStage(stage);
+  // Check the progress string doesn't surpass the RM_PROGRESS column 
width.
+  checkRMProgressLimit(progress, stage);

Review comment:
   nit: 
   it's not just check. it is updating the state as well. we should name it 
accordingly




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 676237)
Time Spent: 5h 10m  (was: 5h)

> Compress Hive Replication Metrics while storing
> ---
>
> Key: HIVE-25596
> URL: https://issues.apache.org/jira/browse/HIVE-25596
> Project: Hive
>  Issue Type: Improvement
>Reporter: Haymant Mangla
>Assignee: Haymant Mangla
>Priority: Major
>  Labels: pull-request-available
> Attachments: CompressedRM_Progress(k=10), CompressedRM_Progress(k=5), 
> PlainTextRM_Progress(k=10), PlainTextRM_Progress(k=5)
>
>  Time Spent: 5h 10m
>  Remaining Estimate: 0h
>
> Compress the json fields of sys.replication_metrics table to optimise RDBMS 
> space usage.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Work logged] (HIVE-25596) Compress Hive Replication Metrics while storing

2021-11-04 Thread ASF GitHub Bot (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25596?focusedWorklogId=676235=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-676235
 ]

ASF GitHub Bot logged work on HIVE-25596:
-

Author: ASF GitHub Bot
Created on: 04/Nov/21 07:23
Start Date: 04/Nov/21 07:23
Worklog Time Spent: 10m 
  Work Description: pkumarsinha commented on a change in pull request #2724:
URL: https://github.com/apache/hive/pull/2724#discussion_r742587136



##
File path: 
ql/src/java/org/apache/hadoop/hive/ql/parse/repl/metric/ReplicationMetricCollector.java
##
@@ -105,6 +110,21 @@ public void reportFailoverStart(String stageName, 
Map metricMap,
 }
   }
 
+  private void checkRMProgressLimit(Progress progress, Stage stage) throws 
SemanticException {
+MessageSerializer serializer = 
MessageFactory.getDefaultInstanceForReplMetrics(conf).getSerializer();
+ObjectMapper mapper = new ObjectMapper();
+String serializedProgress = null;
+try {
+  serializedProgress = 
serializer.serialize(mapper.writeValueAsString(progress));
+} catch (Exception e) {
+  throw new SemanticException(e);
+}
+if (serializedProgress.length() > ReplStatsTracker.RM_PROGRESS_LENGTH) {
+  stage.setReplStats("RM_PROGRESS LIMIT EXCEEDED TO " + 
serializedProgress.length());

Review comment:
   Add keyword as a prefix like "ERROR" or "WARN"




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 676235)
Time Spent: 5h  (was: 4h 50m)

> Compress Hive Replication Metrics while storing
> ---
>
> Key: HIVE-25596
> URL: https://issues.apache.org/jira/browse/HIVE-25596
> Project: Hive
>  Issue Type: Improvement
>Reporter: Haymant Mangla
>Assignee: Haymant Mangla
>Priority: Major
>  Labels: pull-request-available
> Attachments: CompressedRM_Progress(k=10), CompressedRM_Progress(k=5), 
> PlainTextRM_Progress(k=10), PlainTextRM_Progress(k=5)
>
>  Time Spent: 5h
>  Remaining Estimate: 0h
>
> Compress the json fields of sys.replication_metrics table to optimise RDBMS 
> space usage.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Work logged] (HIVE-25652) Add constraints in result of “SHOW CREATE TABLE ”

2021-11-04 Thread ASF GitHub Bot (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25652?focusedWorklogId=676231=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-676231
 ]

ASF GitHub Bot logged work on HIVE-25652:
-

Author: ASF GitHub Bot
Created on: 04/Nov/21 06:41
Start Date: 04/Nov/21 06:41
Worklog Time Spent: 10m 
  Work Description: soumyakanti3578 commented on a change in pull request 
#2752:
URL: https://github.com/apache/hive/pull/2752#discussion_r742571038



##
File path: ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java
##
@@ -800,19 +809,166 @@ private String getExternal(Table table) {
 return table.getTableType() == TableType.EXTERNAL_TABLE ? "EXTERNAL " : "";
   }
 
-  private String getColumns(Table table) {
-List columnDescs = new ArrayList();
+  private String getColumns(Table table) throws HiveException{
+List columnDescs = new ArrayList<>();
+List columns = 
table.getCols().stream().map(FieldSchema::getName).collect(Collectors.toList());
+Set notNullColumns = Collections.emptySet();
+if (NotNullConstraint.isNotEmpty(table.getNotNullConstraint())) {
+  notNullColumns = new 
HashSet<>(table.getNotNullConstraint().getNotNullConstraints().values());
+}
+
+Map columnDefaultValueMap = Collections.emptyMap();
+if (DefaultConstraint.isNotEmpty(table.getDefaultConstraint())) {
+  columnDefaultValueMap = 
table.getDefaultConstraint().getColNameToDefaultValueMap();
+}
+
+List sqlCheckConstraints;
+try {
+  sqlCheckConstraints = 
Hive.get().getCheckConstraintList(table.getDbName(), table.getTableName());
+} catch (NoSuchObjectException e) {
+  throw new HiveException(e);
+}
+Map columnCheckConstraintsMap = 
sqlCheckConstraints.stream()
+  .filter(SQLCheckConstraint::isSetColumn_name)
+  .collect(Collectors.toMap(SQLCheckConstraint::getColumn_name, 
Function.identity()));
+List tableCheckConstraints = 
sqlCheckConstraints.stream()
+  .filter(cc -> !cc.isSetColumn_name())
+  .collect(Collectors.toList());
+
 for (FieldSchema column : table.getCols()) {
   String columnType = 
formatType(TypeInfoUtils.getTypeInfoFromTypeString(column.getType()));
-  String columnDesc = "  `" + column.getName() + "` " + columnType;
+  String columnName = column.getName();
+  StringBuilder columnDesc = new StringBuilder();
+  columnDesc.append("  `").append(columnName).append("` 
").append(columnType);
+  if (notNullColumns.contains(columnName)) {
+columnDesc.append(" NOT NULL");
+  }
+  if (columnDefaultValueMap.containsKey(columnName)) {
+columnDesc.append(" DEFAULT 
").append(columnDefaultValueMap.get(columnName));
+  }
+  if (columnCheckConstraintsMap.containsKey(columnName)) {
+
columnDesc.append(getColumnCheckConstraintDesc(columnCheckConstraintsMap.get(columnName),
 columns));
+  }
   if (column.getComment() != null) {
-columnDesc += " COMMENT '" + 
HiveStringUtils.escapeHiveCommand(column.getComment()) + "'";
+columnDesc.append(" COMMENT 
'").append(HiveStringUtils.escapeHiveCommand(column.getComment())).append("'");
   }
-  columnDescs.add(columnDesc);
+  columnDescs.add(columnDesc.toString());
 }
+String pkDesc = getPrimaryKeyDesc(table);
+if (pkDesc != null) {
+  columnDescs.add(pkDesc);
+}
+columnDescs.addAll(getForeignKeyDesc(table));
+columnDescs.addAll(getTableCheckConstraintDesc(tableCheckConstraints, 
columns));
 return StringUtils.join(columnDescs, ", \n");
   }
 
+  private List getTableCheckConstraintDesc(List 
tableCheckConstraints,
+   List columns) {
+List ccDescs = new ArrayList<>();
+for (SQLCheckConstraint constraint: tableCheckConstraints) {
+  String enable = constraint.isEnable_cstr()? " enable": " disable";
+  String validate = constraint.isValidate_cstr()? " validate": " 
novalidate";
+  String rely = constraint.isRely_cstr()? " rely": " norely";
+  String expression = getCheckExpressionWithBackticks(columns, constraint);
+  ccDescs.add("  constraint " + constraint.getDc_name() + " CHECK(" + 
expression +
+")" + enable + validate + rely);
+}
+return ccDescs;
+  }
+
+  private String getCheckExpressionWithBackticks(List columns, 
SQLCheckConstraint constraint) {
+TreeMap indexToCols = new TreeMap<>();
+String expression = constraint.getCheck_expression();
+for (String col: columns) {
+  int idx = expression.indexOf(col);
+  if (idx == -1) {
+continue;
+  }
+  indexToCols.put(idx, col);
+  while (idx + col.length() < expression.length()) {
+idx = expression.indexOf(col, idx + col.length());
+if (idx == -1) {
+  break;
+}
+indexToCols.put(idx, col);
+  }
+}

Review comment: