[jira] [Updated] (HIVE-26505) Case When Some result data is lost when there are common column conditions and partitioned column conditions

2022-09-01 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-26505?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-26505:

Description: 
{code:java}
create table test0831 (id string) partitioned by (cp string);
insert into test0831 values ('a', '2022-08-23'),('c', '2022-08-23'),('d', 
'2022-08-23');
insert into test0831 values ('a', '2022-08-24'),('b', '2022-08-24');
select * from test0831;
+-+--+
| test0831.id | test0831.cp  |
+-+--+
| a     | 2022-08-23   |
| b        | 2022-08-23   |
| a        | 2022-08-23   |
| c        | 2022-08-24   |
| d        | 2022-08-24   |
+-+--+

select * from test0831 where (case when id='a' and cp='2022-08-23' then 1 else 
0 end)=0;  
+--+--+
| test0830.id  | test0830.cp  |
+--+--+
| a        | 2022-08-24   |
| b        | 2022-08-24   |
+--+--+
{code}
 

  was:
{code:java}
create table test0831 (id string) partitioned by (cp string);
insert into test0831 values ('a', '2022-08-23'),('c', '2022-08-23'),('d', 
'2022-08-23');
insert into test0831 values ('a', '2022-08-24'),('b', '2022-08-24');
select * from test0831;
+-+--+
| test0831.id | test0831.cp  |
+-+--+
| a           | 2022-08-23   |
| b           | 2022-08-23   |
| a           | 2022-08-23   |
| c           | 2022-08-24   |
| d           | 2022-08-24   |
+-+--+

select * from test0831 where (case when id='a' and cp='2022-08-24' then 1 else 
0 end)=0; 
+--+--+
| test0830.id  | test0830.cp  |
+--+--+
| a            | 2022-08-23   |
| c            | 2022-08-23   |
| d            | 2022-08-23   |
+--+--+{code}
 


> Case When Some result data is lost when there are common column conditions 
> and partitioned column conditions 
> -
>
> Key: HIVE-26505
> URL: https://issues.apache.org/jira/browse/HIVE-26505
> Project: Hive
>  Issue Type: Bug
>  Components: Hive
>Affects Versions: 3.1.0, 4.0.0-alpha-1
>Reporter: GuangMing Lu
>Priority: Critical
>
> {code:java}
> create table test0831 (id string) partitioned by (cp string);
> insert into test0831 values ('a', '2022-08-23'),('c', '2022-08-23'),('d', 
> '2022-08-23');
> insert into test0831 values ('a', '2022-08-24'),('b', '2022-08-24');
> select * from test0831;
> +-+--+
> | test0831.id | test0831.cp  |
> +-+--+
> | a     | 2022-08-23   |
> | b        | 2022-08-23   |
> | a        | 2022-08-23   |
> | c        | 2022-08-24   |
> | d        | 2022-08-24   |
> +-+--+
> select * from test0831 where (case when id='a' and cp='2022-08-23' then 1 
> else 0 end)=0;  
> +--+--+
> | test0830.id  | test0830.cp  |
> +--+--+
> | a        | 2022-08-24   |
> | b        | 2022-08-24   |
> +--+--+
> {code}
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Updated] (HIVE-26505) Case When Some result data is lost when there are common column conditions and partitioned column conditions

2022-08-30 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-26505?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-26505:

Description: 
{code:java}
create table test0831 (id string) partitioned by (cp string);
insert into test0831 values ('a', '2022-08-23'),('c', '2022-08-23'),('d', 
'2022-08-23');
insert into test0831 values ('a', '2022-08-24'),('b', '2022-08-24');
select * from test0831;
+-+--+
| test0831.id | test0831.cp  |
+-+--+
| a           | 2022-08-23   |
| b           | 2022-08-23   |
| a           | 2022-08-23   |
| c           | 2022-08-24   |
| d           | 2022-08-24   |
+-+--+

select * from test0831 where (case when id='a' and cp='2022-08-24' then 1 else 
0 end)=0; 
+--+--+
| test0830.id  | test0830.cp  |
+--+--+
| a            | 2022-08-23   |
| c            | 2022-08-23   |
| d            | 2022-08-23   |
+--+--+{code}
 

  was:
{code:java}
create table test0831 (id string) partitioned by (cp string);
insert into test0831 values ('a', '2022-08-23'),('c', '2022-08-24'),('d', 
'2022-08-244');
insert into test0831 values ('a', '2022-08-24'),('b', '2022-08-24');
select * from test0831;
+-+--+
| test0831.id | test0831.cp  |
+-+--+
| a           | 2022-08-23   |
| b           | 2022-08-23   |
| a           | 2022-08-23   |
| c           | 2022-08-24   |
| d           | 2022-08-24   |
+-+--+

select * from test0831 where (case when id='a' and cp='2022-08-24' then 1 else 
0 end)=0; 
+--+--+
| test0830.id  | test0830.cp  |
+--+--+
| a            | 2022-08-23   |
| c            | 2022-08-23   |
| d            | 2022-08-23   |
+--+--+{code}
 


> Case When Some result data is lost when there are common column conditions 
> and partitioned column conditions 
> -
>
> Key: HIVE-26505
> URL: https://issues.apache.org/jira/browse/HIVE-26505
> Project: Hive
>  Issue Type: Bug
>  Components: Hive
>Affects Versions: 3.1.0, 4.0.0-alpha-1
>Reporter: GuangMing Lu
>Priority: Critical
>
> {code:java}
> create table test0831 (id string) partitioned by (cp string);
> insert into test0831 values ('a', '2022-08-23'),('c', '2022-08-23'),('d', 
> '2022-08-23');
> insert into test0831 values ('a', '2022-08-24'),('b', '2022-08-24');
> select * from test0831;
> +-+--+
> | test0831.id | test0831.cp  |
> +-+--+
> | a           | 2022-08-23   |
> | b           | 2022-08-23   |
> | a           | 2022-08-23   |
> | c           | 2022-08-24   |
> | d           | 2022-08-24   |
> +-+--+
> select * from test0831 where (case when id='a' and cp='2022-08-24' then 1 
> else 0 end)=0; 
> +--+--+
> | test0830.id  | test0830.cp  |
> +--+--+
> | a            | 2022-08-23   |
> | c            | 2022-08-23   |
> | d            | 2022-08-23   |
> +--+--+{code}
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Updated] (HIVE-26505) Case When Some result data is lost when there are common column conditions and partitioned column conditions

2022-08-30 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-26505?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-26505:

Description: 
{code:java}
create table test0831 (id string) partitioned by (cp string);
insert into test0831 values ('a', '2022-08-23'),('c', '2022-08-24'),('d', 
'2022-08-244');
insert into test0831 values ('a', '2022-08-24'),('b', '2022-08-24');
select * from test0831;
+-+--+
| test0831.id | test0831.cp  |
+-+--+
| a           | 2022-08-23   |
| b           | 2022-08-23   |
| a           | 2022-08-23   |
| c           | 2022-08-24   |
| d           | 2022-08-24   |
+-+--+

select * from test0831 where (case when id='a' and cp='2022-08-24' then 1 else 
0 end)=0; 
+--+--+
| test0830.id  | test0830.cp  |
+--+--+
| a            | 2022-08-23   |
| c            | 2022-08-23   |
| d            | 2022-08-23   |
+--+--+{code}
 

  was:
{code:java}
create table test0831 (id string, cp string) stored as orc;
insert into test0831 values ('a', '2022-08-23'),('c', '2022-08-24'),('d', 
'2022-08-244');
insert into test0831 values ('a', '2022-08-24'),('b', '2022-08-24');
select * from test0831;
+-+--+
| test0831.id | test0831.cp  |
+-+--+
| a           | 2022-08-23   |
| b           | 2022-08-23   |
| a           | 2022-08-23   |
| c           | 2022-08-24   |
| d           | 2022-08-24   |
+-+--+

select * from test0831 where (case when id='a' and cp='2022-08-24' then 1 else 
0 end)=0; 
+--+--+
| test0830.id  | test0830.cp  |
+--+--+
| a            | 2022-08-23   |
| c            | 2022-08-23   |
| d            | 2022-08-23   |
+--+--+{code}
 


> Case When Some result data is lost when there are common column conditions 
> and partitioned column conditions 
> -
>
> Key: HIVE-26505
> URL: https://issues.apache.org/jira/browse/HIVE-26505
> Project: Hive
>  Issue Type: Bug
>  Components: Hive
>Affects Versions: 3.1.0, 4.0.0-alpha-1
>Reporter: GuangMing Lu
>Priority: Critical
>
> {code:java}
> create table test0831 (id string) partitioned by (cp string);
> insert into test0831 values ('a', '2022-08-23'),('c', '2022-08-24'),('d', 
> '2022-08-244');
> insert into test0831 values ('a', '2022-08-24'),('b', '2022-08-24');
> select * from test0831;
> +-+--+
> | test0831.id | test0831.cp  |
> +-+--+
> | a           | 2022-08-23   |
> | b           | 2022-08-23   |
> | a           | 2022-08-23   |
> | c           | 2022-08-24   |
> | d           | 2022-08-24   |
> +-+--+
> select * from test0831 where (case when id='a' and cp='2022-08-24' then 1 
> else 0 end)=0; 
> +--+--+
> | test0830.id  | test0830.cp  |
> +--+--+
> | a            | 2022-08-23   |
> | c            | 2022-08-23   |
> | d            | 2022-08-23   |
> +--+--+{code}
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Updated] (HIVE-26505) Case When Some result data is lost when there are common column conditions and partitioned column conditions

2022-08-30 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-26505?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-26505:

Description: 
{code:java}
create table test0831 (id string, cp string) stored as orc;
insert into test0831 values ('a', '2022-08-23'),('c', '2022-08-24'),('d', 
'2022-08-244');
insert into test0831 values ('a', '2022-08-24'),('b', '2022-08-24');
select * from test0831;
+-+--+
| test0831.id | test0831.cp  |
+-+--+
| a           | 2022-08-23   |
| b           | 2022-08-23   |
| a           | 2022-08-23   |
| c           | 2022-08-24   |
| d           | 2022-08-24   |
+-+--+

select * from test0831 where (case when id='a' and cp='2022-08-24' then 1 else 
0 end)=0; 
+--+--+
| test0830.id  | test0830.cp  |
+--+--+
| a            | 2022-08-23   |
| c            | 2022-08-23   |
| d            | 2022-08-23   |
+--+--+{code}
 

  was:
insert into test0831 values ('a', '2022-08-23'),('c', '2022-08-24'),('d', 
'2022-08-244');
insert into test0831 values ('a', '2022-08-24'),('b', '2022-08-24');

select * from test0831;
+-+--+
| test0831.id | test0831.cp  |
+-+--+
| a           | 2022-08-23   |
| b           | 2022-08-23   |
| a           | 2022-08-23   |
| c           | 2022-08-24   |
| d           | 2022-08-24   |
+-+--+

select * from test0831 where (case when id='a' and cp='2022-08-24' then 1 else 
0 end)=0;

+--+--+
| test0830.id  | test0830.cp  |
+--+--+
| a            | 2022-08-23   |
| c            | 2022-08-23   |
| d            | 2022-08-23   |
+--+--+


> Case When Some result data is lost when there are common column conditions 
> and partitioned column conditions 
> -
>
> Key: HIVE-26505
> URL: https://issues.apache.org/jira/browse/HIVE-26505
> Project: Hive
>  Issue Type: Bug
>  Components: Hive
>Affects Versions: 3.1.0, 4.0.0-alpha-1
>Reporter: GuangMing Lu
>Priority: Major
>
> {code:java}
> create table test0831 (id string, cp string) stored as orc;
> insert into test0831 values ('a', '2022-08-23'),('c', '2022-08-24'),('d', 
> '2022-08-244');
> insert into test0831 values ('a', '2022-08-24'),('b', '2022-08-24');
> select * from test0831;
> +-+--+
> | test0831.id | test0831.cp  |
> +-+--+
> | a           | 2022-08-23   |
> | b           | 2022-08-23   |
> | a           | 2022-08-23   |
> | c           | 2022-08-24   |
> | d           | 2022-08-24   |
> +-+--+
> select * from test0831 where (case when id='a' and cp='2022-08-24' then 1 
> else 0 end)=0; 
> +--+--+
> | test0830.id  | test0830.cp  |
> +--+--+
> | a            | 2022-08-23   |
> | c            | 2022-08-23   |
> | d            | 2022-08-23   |
> +--+--+{code}
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Updated] (HIVE-26505) Case When Some result data is lost when there are common column conditions and partitioned column conditions

2022-08-30 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-26505?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-26505:

Priority: Critical  (was: Major)

> Case When Some result data is lost when there are common column conditions 
> and partitioned column conditions 
> -
>
> Key: HIVE-26505
> URL: https://issues.apache.org/jira/browse/HIVE-26505
> Project: Hive
>  Issue Type: Bug
>  Components: Hive
>Affects Versions: 3.1.0, 4.0.0-alpha-1
>Reporter: GuangMing Lu
>Priority: Critical
>
> {code:java}
> create table test0831 (id string, cp string) stored as orc;
> insert into test0831 values ('a', '2022-08-23'),('c', '2022-08-24'),('d', 
> '2022-08-244');
> insert into test0831 values ('a', '2022-08-24'),('b', '2022-08-24');
> select * from test0831;
> +-+--+
> | test0831.id | test0831.cp  |
> +-+--+
> | a           | 2022-08-23   |
> | b           | 2022-08-23   |
> | a           | 2022-08-23   |
> | c           | 2022-08-24   |
> | d           | 2022-08-24   |
> +-+--+
> select * from test0831 where (case when id='a' and cp='2022-08-24' then 1 
> else 0 end)=0; 
> +--+--+
> | test0830.id  | test0830.cp  |
> +--+--+
> | a            | 2022-08-23   |
> | c            | 2022-08-23   |
> | d            | 2022-08-23   |
> +--+--+{code}
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Commented] (HIVE-26342) About EOL schedule discussion

2022-06-21 Thread GuangMing Lu (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-26342?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17556765#comment-17556765
 ] 

GuangMing Lu commented on HIVE-26342:
-

h4. Hi [Stamatis 
Zampetakis,|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=zabetak]

Ok, I will continue to participate and discuss together

> About EOL schedule discussion
> -
>
> Key: HIVE-26342
> URL: https://issues.apache.org/jira/browse/HIVE-26342
> Project: Hive
>  Issue Type: Task
>Affects Versions: All Versions
>Reporter: GuangMing Lu
>Assignee: Aihua Xu
>Priority: Major
>
> h4. Hi [Aihua 
> Xu|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=aihuaxu] , 
> [Stamatis 
> Zampetakis|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=zabetak]
> h4. We don't have a EOL schedule for every version at present, can we discuss 
> making one?



--
This message was sent by Atlassian Jira
(v8.20.7#820007)


[jira] [Updated] (HIVE-26342) About EOL schedule discussion

2022-06-20 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-26342?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-26342:

Description: 
h4. Hi [Aihua 
Xu|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=aihuaxu] , 
[Stamatis 
Zampetakis|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=zabetak]
h4. We don't have a EOL schedule for every version at present, can we discuss 
making one?

  was:
h4. Hi [Aihua 
Xu|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=aihuaxu] and 
[Harish JP|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=harishjp]
h4. We don't have a EOL schedule for every version at present, can we discuss 
making one?


> About EOL schedule discussion
> -
>
> Key: HIVE-26342
> URL: https://issues.apache.org/jira/browse/HIVE-26342
> Project: Hive
>  Issue Type: Task
>Affects Versions: All Versions
>Reporter: GuangMing Lu
>Assignee: Aihua Xu
>Priority: Major
>
> h4. Hi [Aihua 
> Xu|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=aihuaxu] , 
> [Stamatis 
> Zampetakis|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=zabetak]
> h4. We don't have a EOL schedule for every version at present, can we discuss 
> making one?



--
This message was sent by Atlassian Jira
(v8.20.7#820007)


[jira] [Updated] (HIVE-26342) About EOL schedule discussion

2022-06-20 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-26342?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-26342:

Issue Type: Task  (was: Bug)

> About EOL schedule discussion
> -
>
> Key: HIVE-26342
> URL: https://issues.apache.org/jira/browse/HIVE-26342
> Project: Hive
>  Issue Type: Task
>Affects Versions: All Versions
>Reporter: GuangMing Lu
>Assignee: Aihua Xu
>Priority: Major
>
> h4. Hi [Aihua 
> Xu|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=aihuaxu] and 
> [Harish 
> JP|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=harishjp]
> h4. We don't have a EOL schedule for every version at present, can we discuss 
> making one?



--
This message was sent by Atlassian Jira
(v8.20.7#820007)


[jira] [Commented] (HIVE-26342) About EOL schedule discussion

2022-06-20 Thread GuangMing Lu (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-26342?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17556325#comment-17556325
 ] 

GuangMing Lu commented on HIVE-26342:
-

* Related discussion posts

https://lists.apache.org/thread/sxcrcf4v9j630tl9domp0bn4m33bdq0s

> About EOL schedule discussion
> -
>
> Key: HIVE-26342
> URL: https://issues.apache.org/jira/browse/HIVE-26342
> Project: Hive
>  Issue Type: Bug
>Affects Versions: All Versions
>Reporter: GuangMing Lu
>Assignee: Aihua Xu
>Priority: Major
>
> h4. Hi [Aihua 
> Xu|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=aihuaxu] and 
> [Harish 
> JP|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=harishjp]
> h4. We don't have a EOL schedule for every version at present, can we discuss 
> making one?



--
This message was sent by Atlassian Jira
(v8.20.7#820007)


[jira] [Updated] (HIVE-26342) About EOL schedule discussion

2022-06-20 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-26342?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-26342:

Affects Version/s: All Versions

> About EOL schedule discussion
> -
>
> Key: HIVE-26342
> URL: https://issues.apache.org/jira/browse/HIVE-26342
> Project: Hive
>  Issue Type: Bug
>Affects Versions: All Versions
>Reporter: GuangMing Lu
>Assignee: Aihua Xu
>Priority: Major
>
> h4. Hi [Aihua 
> Xu|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=aihuaxu] and 
> [Harish 
> JP|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=harishjp]
> h4. We don't have a EOL schedule for every version at present, can we discuss 
> making one?



--
This message was sent by Atlassian Jira
(v8.20.7#820007)


[jira] [Assigned] (HIVE-26342) About EOL schedule discussion

2022-06-20 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-26342?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu reassigned HIVE-26342:
---

Assignee: Aihua Xu

> About EOL schedule discussion
> -
>
> Key: HIVE-26342
> URL: https://issues.apache.org/jira/browse/HIVE-26342
> Project: Hive
>  Issue Type: Bug
>Reporter: GuangMing Lu
>Assignee: Aihua Xu
>Priority: Major
>
> h4. Hi [Aihua 
> Xu|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=aihuaxu] and 
> [Harish 
> JP|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=harishjp]
> h4. We don't have a EOL schedule for every version at present, can we discuss 
> making one?



--
This message was sent by Atlassian Jira
(v8.20.7#820007)


[jira] [Commented] (HIVE-20607) TxnHandler should use PreparedStatement to execute direct SQL queries.

2022-06-17 Thread GuangMing Lu (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-20607?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=1745#comment-1745
 ] 

GuangMing Lu commented on HIVE-20607:
-

Hi [~sankarh]  [~kgyrtkirk],  Do you know Hive's EOL schedule?

> TxnHandler should use PreparedStatement to execute direct SQL queries.
> --
>
> Key: HIVE-20607
> URL: https://issues.apache.org/jira/browse/HIVE-20607
> Project: Hive
>  Issue Type: Bug
>  Components: Standalone Metastore, Transactions
>Affects Versions: 3.1.0, 4.0.0
>Reporter: Sankar Hariappan
>Assignee: Sankar Hariappan
>Priority: Major
>  Labels: ACID, pull-request-available
> Fix For: 3.2.0, 4.0.0, 4.0.0-alpha-1
>
> Attachments: HIVE-20607.01-branch-3.patch, HIVE-20607.01.patch
>
>
> TxnHandler uses direct SQL queries to operate on Txn related databases/tables 
> in Hive metastore RDBMS.
> Most of the methods are direct calls from Metastore api which should be 
> directly append input string arguments to the SQL string.
> Need to use parameterised PreparedStatement object to set these arguments.



--
This message was sent by Atlassian Jira
(v8.20.7#820007)


[jira] [Assigned] (HIVE-25853) Security Vulnerability CVE-2021-44832 log4j2 need upgrade to 2.17.1

2022-03-26 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25853?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu reassigned HIVE-25853:
---

Assignee: GuangMing Lu

> Security Vulnerability CVE-2021-44832 log4j2 need upgrade to 2.17.1
> ---
>
> Key: HIVE-25853
> URL: https://issues.apache.org/jira/browse/HIVE-25853
> Project: Hive
>  Issue Type: Bug
>Affects Versions: 4.0.0
>Reporter: GuangMing Lu
>Assignee: GuangMing Lu
>Priority: Major
> Fix For: 4.0.0
>
>
> Security Vulnerability CVE-2021-44832 log4j2 need upgrade to 2.17.1



--
This message was sent by Atlassian Jira
(v8.20.1#820001)


[jira] [Updated] (HIVE-26018) The result of UNIQUEJOIN on Hive on Tez is inconsistent with that of MR

2022-03-09 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-26018?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-26018:

Attachment: (was: image-2022-03-09-21-08-17-835.png)

> The result of UNIQUEJOIN on Hive on Tez is inconsistent with that of MR
> ---
>
> Key: HIVE-26018
> URL: https://issues.apache.org/jira/browse/HIVE-26018
> Project: Hive
>  Issue Type: Bug
>  Components: Tez
>Affects Versions: 3.1.0, 4.0.0
>Reporter: GuangMing Lu
>Priority: Major
>
> The result of UNIQUEJOIN on Hive on Tez is inconsistent with that of MR, and 
> the result Is not correct, for example:
> CREATE TABLE T1_n1x(key STRING, val STRING) STORED AS orc;
> CREATE TABLE T2_n1x(key STRING, val STRING) STORED AS orc;
> insert into T1_n1x values('aaa', '111'),('bbb', '222'),('ccc', '333');
> insert into T2_n1x values('aaa', '111'),('ddd', '444'),('ccc', '333');
> SELECT a.key, b.key FROM UNIQUEJOIN PRESERVE T1_n1x a (a.key), PRESERVE  
> T2_n1x b (b.key);
> Hive on Tez result: wrong
> |a.key  |b.key  |
> |aaa    |aaa    |
> |bbb    |NULL  |
> |ccc    |ccc    |
> |NULL  |ddd    |
> +--+
> Hive on MR result: right
> |a.key  |b.key  |
> |aaa    |aaa    |
> |bbb    |NULL  |
> |ccc    |ccc    |
> +-+
> SELECT a.key, b.key FROM UNIQUEJOIN T1_n1x a (a.key), T2_n1x b (b.key);
> Hive on Tez result: wrong
> +---+
> |a.key  |b.key  |
> |aaa    |aaa    |
> |bbb    |NULL  |
> |ccc    |ccc    |
> |NULL  |ddd    |
> +-+
> Hive on MR result: right
> |a.key  |b.key  |
> |aaa    |aaa    |
> |ccc    |ccc    |
>  
>  



--
This message was sent by Atlassian Jira
(v8.20.1#820001)


[jira] [Updated] (HIVE-26018) The result of UNIQUEJOIN on Hive on Tez is inconsistent with that of MR

2022-03-09 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-26018?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-26018:

Description: 
The result of UNIQUEJOIN on Hive on Tez is inconsistent with that of MR, and 
the result Is not correct, for example:

CREATE TABLE T1_n1x(key STRING, val STRING) STORED AS orc;
CREATE TABLE T2_n1x(key STRING, val STRING) STORED AS orc;

insert into T1_n1x values('aaa', '111'),('bbb', '222'),('ccc', '333');
insert into T2_n1x values('aaa', '111'),('ddd', '444'),('ccc', '333');

SELECT a.key, b.key FROM UNIQUEJOIN PRESERVE T1_n1x a (a.key), PRESERVE  T2_n1x 
b (b.key);

Hive on Tez result: wrong
|a.key  |b.key  |
|aaa    |aaa    |
|bbb    |NULL  |
|ccc    |ccc    |
|NULL  |ddd    |

+--+
Hive on MR result: right
|a.key  |b.key  |
|aaa    |aaa    |
|bbb    |NULL  |
|ccc    |ccc    |

+-+

SELECT a.key, b.key FROM UNIQUEJOIN T1_n1x a (a.key), T2_n1x b (b.key);

Hive on Tez result: wrong

+---+
|a.key  |b.key  |
|aaa    |aaa    |
|bbb    |NULL  |
|ccc    |ccc    |
|NULL  |ddd    |

+-+

Hive on MR result: right
|a.key  |b.key  |
|aaa    |aaa    |
|ccc    |ccc    |

 

 

  was:
The result of UNIQUEJOIN on Hive on Tez is inconsistent with that of MR, and 
the result Is not correct, for example:

CREATE TABLE T1_n1x(key STRING, val STRING) STORED AS orc;
CREATE TABLE T2_n1x(key STRING, val STRING) STORED AS orc;

insert into T1_n1x values('aaa', '111'),('bbb', '222'),('ccc', '333');
insert into T2_n1x values('aaa', '111'),('ddd', '444'),('ccc', '333');

SELECT a.key, b.key FROM UNIQUEJOIN PRESERVE T1_n1x a (a.key), PRESERVE  T2_n1x 
b (b.key);

Hive on Tez result: wrong

{+}---{-}{-}{+}---+
|a.key  |b.key  |
|aaa    |aaa    |
|bbb    |NULL  |
|ccc    |ccc    |
|NULL  |ddd    |

{+}---{-}{-}{+}---+
Hive on MR result: right

{+}---{-}{-}{+}---+
|a.key  |b.key  |

 
|aaa    |aaa    |
|bbb    |NULL  |
|ccc    |ccc    |

{+}---{-}{-}{+}---+

SELECT a.key, b.key FROM UNIQUEJOIN T1_n1x a (a.key), T2_n1x b (b.key);

Hive on Tez result: wrong

{+}---{-}{-}{+}---+
|a.key  |b.key  |

{+}---{-}{-}{+}---+
|aaa    |aaa    |
|bbb    |NULL  |
|ccc    |ccc    |
|NULL  |ddd    |

{+}---{-}{-}{+}---+

Hive on MR result: right

{+}---{-}{-}{+}---+
|a.key  |b.key  |

{+}---{-}{-}{+}---+
|aaa    |aaa    |
|ccc    |ccc    |

{+}---{-}{-}{+}---+

 


> The result of UNIQUEJOIN on Hive on Tez is inconsistent with that of MR
> ---
>
> Key: HIVE-26018
> URL: https://issues.apache.org/jira/browse/HIVE-26018
> Project: Hive
>  Issue Type: Bug
>  Components: Tez
>Affects Versions: 3.1.0, 4.0.0
>Reporter: GuangMing Lu
>Priority: Major
> Attachments: image-2022-03-09-21-08-17-835.png
>
>
> The result of UNIQUEJOIN on Hive on Tez is inconsistent with that of MR, and 
> the result Is not correct, for example:
> CREATE TABLE T1_n1x(key STRING, val STRING) STORED AS orc;
> CREATE TABLE T2_n1x(key STRING, val STRING) STORED AS orc;
> insert into T1_n1x values('aaa', '111'),('bbb', '222'),('ccc', '333');
> insert into T2_n1x values('aaa', '111'),('ddd', '444'),('ccc', '333');
> SELECT a.key, b.key FROM UNIQUEJOIN PRESERVE T1_n1x a (a.key), PRESERVE  
> T2_n1x b (b.key);
> Hive on Tez result: wrong
> |a.key  |b.key  |
> |aaa    |aaa    |
> |bbb    |NULL  |
> |ccc    |ccc    |
> |NULL  |ddd    |
> +--+
> Hive on MR result: right
> |a.key  |b.key  |
> |aaa    |aaa    |
> |bbb    |NULL  |
> |ccc    |ccc    |
> +-+
> SELECT a.key, b.key FROM UNIQUEJOIN T1_n1x a (a.key), T2_n1x b (b.key);
> Hive on Tez result: wrong
> +---+
> |a.key  |b.key  |
> |aaa    |aaa    |
> |bbb    |NULL  |
> |ccc    |ccc    |
> |NULL  |ddd    |
> +-+
> Hive on MR result: right
> |a.key  |b.key  |
> |aaa    |aaa    |
> |ccc    |ccc    |
>  
>  



--
This message was sent by Atlassian Jira
(v8.20.1#820001)


[jira] [Updated] (HIVE-26018) The result of UNIQUEJOIN on Hive on Tez is inconsistent with that of MR

2022-03-09 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-26018?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-26018:

Description: 
The result of UNIQUEJOIN on Hive on Tez is inconsistent with that of MR, and 
the result Is not correct, for example:

CREATE TABLE T1_n1x(key STRING, val STRING) STORED AS orc;
CREATE TABLE T2_n1x(key STRING, val STRING) STORED AS orc;

insert into T1_n1x values('aaa', '111'),('bbb', '222'),('ccc', '333');
insert into T2_n1x values('aaa', '111'),('ddd', '444'),('ccc', '333');

SELECT a.key, b.key FROM UNIQUEJOIN PRESERVE T1_n1x a (a.key), PRESERVE  T2_n1x 
b (b.key);

Hive on Tez result: wrong

{+}---{-}{-}{+}---+
|a.key  |b.key  |
|aaa    |aaa    |
|bbb    |NULL  |
|ccc    |ccc    |
|NULL  |ddd    |

{+}---{-}{-}{+}---+
Hive on MR result: right

{+}---{-}{-}{+}---+
|a.key  |b.key  |

 
|aaa    |aaa    |
|bbb    |NULL  |
|ccc    |ccc    |

{+}---{-}{-}{+}---+

SELECT a.key, b.key FROM UNIQUEJOIN T1_n1x a (a.key), T2_n1x b (b.key);

Hive on Tez result: wrong

{+}---{-}{-}{+}---+
|a.key  |b.key  |

{+}---{-}{-}{+}---+
|aaa    |aaa    |
|bbb    |NULL  |
|ccc    |ccc    |
|NULL  |ddd    |

{+}---{-}{-}{+}---+

Hive on MR result: right

{+}---{-}{-}{+}---+
|a.key  |b.key  |

{+}---{-}{-}{+}---+
|aaa    |aaa    |
|ccc    |ccc    |

{+}---{-}{-}{+}---+

 

  was:
The result of UNIQUEJOIN on Hive on Tez is inconsistent with that of MR, and 
the result Is not correct, for example:

CREATE TABLE T1_n1x(key STRING, val STRING) STORED AS orc;
CREATE TABLE T2_n1x(key STRING, val STRING) STORED AS orc;

insert into T1_n1x values('aaa', '111'),('bbb', '222'),('ccc', '333');
insert into T2_n1x values('aaa', '111'),('ddd', '444'),('ccc', '333');

SELECT a.key, b.key FROM UNIQUEJOIN PRESERVE T1_n1x a (a.key), PRESERVE  T2_n1x 
b (b.key);

Hive on Tez result: wrong

+++
| a.key  | b.key  |
+++
| aaa    | aaa    |
| bbb    | NULL   |
| ccc    | ccc    |
| NULL   | ddd    |
+++
Hive on MR result: right

+++
| a.key  | b.key  |
+++
| aaa    | aaa    |
| bbb    | NULL   |
| ccc    | ccc    |
+++

SELECT a.key, b.key FROM UNIQUEJOIN T1_n1x a (a.key), T2_n1x b (b.key);

Hive on Tez result: wrong

+++
| a.key  | b.key  |
+++
| aaa    | aaa    |
| bbb    | NULL   |
| ccc    | ccc    |
| NULL   | ddd    |
+++

Hive on MR result: right

+++
| a.key  | b.key  |
+++
| aaa    | aaa    |
| ccc    | ccc    |
+++

 


> The result of UNIQUEJOIN on Hive on Tez is inconsistent with that of MR
> ---
>
> Key: HIVE-26018
> URL: https://issues.apache.org/jira/browse/HIVE-26018
> Project: Hive
>  Issue Type: Bug
>  Components: Tez
>Affects Versions: 3.1.0, 4.0.0
>Reporter: GuangMing Lu
>Priority: Major
> Attachments: image-2022-03-09-21-08-17-835.png
>
>
> The result of UNIQUEJOIN on Hive on Tez is inconsistent with that of MR, and 
> the result Is not correct, for example:
> CREATE TABLE T1_n1x(key STRING, val STRING) STORED AS orc;
> CREATE TABLE T2_n1x(key STRING, val STRING) STORED AS orc;
> insert into T1_n1x values('aaa', '111'),('bbb', '222'),('ccc', '333');
> insert into T2_n1x values('aaa', '111'),('ddd', '444'),('ccc', '333');
> SELECT a.key, b.key FROM UNIQUEJOIN PRESERVE T1_n1x a (a.key), PRESERVE  
> T2_n1x b (b.key);
> Hive on Tez result: wrong
> {+}---{-}{-}{+}---+
> |a.key  |b.key  |
> |aaa    |aaa    |
> |bbb    |NULL  |
> |ccc    |ccc    |
> |NULL  |ddd    |
> {+}---{-}{-}{+}---+
> Hive on MR result: right
> {+}---{-}{-}{+}---+
> |a.key  |b.key  |
>  
> |aaa    |aaa    |
> |bbb    |NULL  |
> |ccc    |ccc    |
> {+}---{-}{-}{+}---+
> SELECT a.key, b.key FROM UNIQUEJOIN T1_n1x a (a.key), T2_n1x b (b.key);
> Hive on Tez result: wrong
> {+}---{-}{-}{+}---+
> |a.key  |b.key  |
> {+}---{-}{-}{+}---+
> |aaa    |aaa    |
> |bbb    |NULL  |
> |ccc    |ccc    |
> |NULL  |ddd    |
> {+}---{-}{-}{+}---+
> Hive on MR result: right
> {+}---{-}{-}{+}---+
> |a.key  |b.key  |
> {+}---{-}{-}{+}---+
> |aaa    |aaa    |
> |ccc    |ccc    |
> {+}---{-}{-}{+}---+
>  



--
This message was sent by Atlassian Jira
(v8.20.1#820001)


[jira] [Updated] (HIVE-25853) Security Vulnerability CVE-2021-44832 log4j2 need upgrade to 2.17.1

2022-01-10 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25853?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-25853:

Issue Type: Bug  (was: Improvement)

> Security Vulnerability CVE-2021-44832 log4j2 need upgrade to 2.17.1
> ---
>
> Key: HIVE-25853
> URL: https://issues.apache.org/jira/browse/HIVE-25853
> Project: Hive
>  Issue Type: Bug
>Affects Versions: 4.0.0
>Reporter: GuangMing Lu
>Priority: Major
>
> Security Vulnerability CVE-2021-44832 log4j2 need upgrade to 2.17.1



--
This message was sent by Atlassian Jira
(v8.20.1#820001)


[jira] [Updated] (HIVE-25853) Security Vulnerability CVE-2021-44832 log4j2 need upgrade to 2.17.1

2022-01-07 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25853?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-25853:

Summary: Security Vulnerability CVE-2021-44832 log4j2 need upgrade to 
2.17.1  (was: Security Vulnerability CVE-2021-44832 log4j2 upgrade to 2.17.1)

> Security Vulnerability CVE-2021-44832 log4j2 need upgrade to 2.17.1
> ---
>
> Key: HIVE-25853
> URL: https://issues.apache.org/jira/browse/HIVE-25853
> Project: Hive
>  Issue Type: Improvement
>Affects Versions: 4.0.0
>Reporter: GuangMing Lu
>Priority: Major
>
> Security Vulnerability CVE-2021-44832 log4j2 need upgrade to 2.17.1



--
This message was sent by Atlassian Jira
(v8.20.1#820001)


[jira] [Updated] (HIVE-25853) Security Vulnerability CVE-2021-44832 log4j2 upgrade to 2.17.1

2022-01-07 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25853?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-25853:

Description: Security Vulnerability CVE-2021-44832 log4j2 need upgrade to 
2.17.1  (was: Security Vulnerability CVE-2021-44832 log4j2 upgrade to 2.17.1)

> Security Vulnerability CVE-2021-44832 log4j2 upgrade to 2.17.1
> --
>
> Key: HIVE-25853
> URL: https://issues.apache.org/jira/browse/HIVE-25853
> Project: Hive
>  Issue Type: Improvement
>Affects Versions: 4.0.0
>Reporter: GuangMing Lu
>Priority: Major
>
> Security Vulnerability CVE-2021-44832 log4j2 need upgrade to 2.17.1



--
This message was sent by Atlassian Jira
(v8.20.1#820001)


[jira] [Updated] (HIVE-25525) TestRetryingThriftCLIServiceClient test case optimization

2021-09-14 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25525?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-25525:

Description: 
{code:java}
//devil's Numbers
cliServiceClient.openSession("anonymous", "anonymous");
client.openSession("anonymous", "anonymous");
{code}
Client.opensession invoke initializes the user name and password for the 
devil's Numbers

  was:Client.opensession invoke initializes the user name and password for the 
devil's Numbers


> TestRetryingThriftCLIServiceClient test case optimization
> -
>
> Key: HIVE-25525
> URL: https://issues.apache.org/jira/browse/HIVE-25525
> Project: Hive
>  Issue Type: Improvement
>  Components: Tests
>Affects Versions: 3.1.0, 3.1.2
>Reporter: GuangMing Lu
>Priority: Major
>
> {code:java}
> //devil's Numbers
> cliServiceClient.openSession("anonymous", "anonymous");
> client.openSession("anonymous", "anonymous");
> {code}
> Client.opensession invoke initializes the user name and password for the 
> devil's Numbers



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Commented] (HIVE-20682) Async query execution can potentially fail if shared sessionHive is closed by master thread.

2021-09-02 Thread GuangMing Lu (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-20682?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17408892#comment-17408892
 ] 

GuangMing Lu commented on HIVE-20682:
-

Hivs 3.1.0 does not match the current code, which is not shown in version 3.1.0

> Async query execution can potentially fail if shared sessionHive is closed by 
> master thread.
> 
>
> Key: HIVE-20682
> URL: https://issues.apache.org/jira/browse/HIVE-20682
> Project: Hive
>  Issue Type: Bug
>  Components: HiveServer2
>Affects Versions: 4.0.0
>Reporter: Sankar Hariappan
>Assignee: Sankar Hariappan
>Priority: Major
>  Labels: pull-request-available
> Fix For: 4.0.0
>
> Attachments: HIVE-20682.01.patch, HIVE-20682.02.patch, 
> HIVE-20682.03.patch, HIVE-20682.04.patch, HIVE-20682.05.patch, 
> HIVE-20682.06.patch
>
>
> *Problem description:*
> The master thread initializes the *sessionHive* object in *HiveSessionImpl* 
> class when we open a new session for a client connection and by default all 
> queries from this connection shares the same sessionHive object. 
> If the master thread executes a *synchronous* query, it closes the 
> sessionHive object (referred via thread local hiveDb) if  
> {{Hive.isCompatible}} returns false and sets new Hive object in thread local 
> HiveDb but doesn't change the sessionHive object in the session. Whereas, 
> *asynchronous* query execution via async threads never closes the sessionHive 
> object and it just creates a new one if needed and sets it as their thread 
> local hiveDb.
> So, the problem can happen in the case where an *asynchronous* query is being 
> executed by async threads refers to sessionHive object and the master thread 
> receives a *synchronous* query that closes the same sessionHive object. 
> Also, each query execution overwrites the thread local hiveDb object to 
> sessionHive object which potentially leaks a metastore connection if the 
> previous synchronous query execution re-created the Hive object.
> *Possible Fix:*
> The *sessionHive* object could be shared my multiple threads and so it 
> shouldn't be allowed to be closed by any query execution threads when they 
> re-create the Hive object due to changes in Hive configurations. But the Hive 
> objects created by query execution threads should be closed when the thread 
> exits.
> So, it is proposed to have an *isAllowClose* flag (default: *true*) in Hive 
> object which should be set to *false* for *sessionHive* and would be 
> forcefully closed when the session is closed or released.
> Also, when we reset *sessionHive* object with new one due to changes in 
> *sessionConf*, the old one should be closed when no async thread is referring 
> to it. This can be done using "*finalize*" method of Hive object where we can 
> close HMS connection when Hive object is garbage collected.
> cc [~pvary]



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-20682) Async query execution can potentially fail if shared sessionHive is closed by master thread.

2021-09-02 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-20682?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-20682:

Affects Version/s: (was: 3.1.0)

> Async query execution can potentially fail if shared sessionHive is closed by 
> master thread.
> 
>
> Key: HIVE-20682
> URL: https://issues.apache.org/jira/browse/HIVE-20682
> Project: Hive
>  Issue Type: Bug
>  Components: HiveServer2
>Affects Versions: 4.0.0
>Reporter: Sankar Hariappan
>Assignee: Sankar Hariappan
>Priority: Major
>  Labels: pull-request-available
> Fix For: 4.0.0
>
> Attachments: HIVE-20682.01.patch, HIVE-20682.02.patch, 
> HIVE-20682.03.patch, HIVE-20682.04.patch, HIVE-20682.05.patch, 
> HIVE-20682.06.patch
>
>
> *Problem description:*
> The master thread initializes the *sessionHive* object in *HiveSessionImpl* 
> class when we open a new session for a client connection and by default all 
> queries from this connection shares the same sessionHive object. 
> If the master thread executes a *synchronous* query, it closes the 
> sessionHive object (referred via thread local hiveDb) if  
> {{Hive.isCompatible}} returns false and sets new Hive object in thread local 
> HiveDb but doesn't change the sessionHive object in the session. Whereas, 
> *asynchronous* query execution via async threads never closes the sessionHive 
> object and it just creates a new one if needed and sets it as their thread 
> local hiveDb.
> So, the problem can happen in the case where an *asynchronous* query is being 
> executed by async threads refers to sessionHive object and the master thread 
> receives a *synchronous* query that closes the same sessionHive object. 
> Also, each query execution overwrites the thread local hiveDb object to 
> sessionHive object which potentially leaks a metastore connection if the 
> previous synchronous query execution re-created the Hive object.
> *Possible Fix:*
> The *sessionHive* object could be shared my multiple threads and so it 
> shouldn't be allowed to be closed by any query execution threads when they 
> re-create the Hive object due to changes in Hive configurations. But the Hive 
> objects created by query execution threads should be closed when the thread 
> exits.
> So, it is proposed to have an *isAllowClose* flag (default: *true*) in Hive 
> object which should be set to *false* for *sessionHive* and would be 
> forcefully closed when the session is closed or released.
> Also, when we reset *sessionHive* object with new one due to changes in 
> *sessionConf*, the old one should be closed when no async thread is referring 
> to it. This can be done using "*finalize*" method of Hive object where we can 
> close HMS connection when Hive object is garbage collected.
> cc [~pvary]



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Commented] (HIVE-20828) Upgrade to Spark 2.4.0

2021-09-01 Thread GuangMing Lu (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-20828?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17408106#comment-17408106
 ] 

GuangMing Lu commented on HIVE-20828:
-

Hi [~stakiar], How is the hive on spark evolving?

> Upgrade to Spark 2.4.0
> --
>
> Key: HIVE-20828
> URL: https://issues.apache.org/jira/browse/HIVE-20828
> Project: Hive
>  Issue Type: Improvement
>  Components: Spark
>Reporter: Sahil Takiar
>Priority: Major
> Attachments: HIVE-20828.1.patch, HIVE-20828.2.patch
>
>
> The Spark community is in the process of releasing Spark 2.4.0. We should do 
> some testing with the RC candidates and then upgrade once the release is 
> finalized.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-25315) SQL executed hiveserver is killed by the HiveServer2-Handler-Pool thread, very occasionally

2021-07-08 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25315?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-25315:

Component/s: HiveServer2

> SQL executed hiveserver is killed by the HiveServer2-Handler-Pool thread, 
> very occasionally
> ---
>
> Key: HIVE-25315
> URL: https://issues.apache.org/jira/browse/HIVE-25315
> Project: Hive
>  Issue Type: Bug
>  Components: HiveServer2
>Affects Versions: 3.1.0
>Reporter: GuangMing Lu
>Priority: Major
>
> {code:java}
> 2021-07-05 15:23:17,376 | INFO  | HiveServer2-Handler-Pool: Thread-226765745 
> | Shutting down HiveServer2 | 
> org.apache.hive.service.server.HiveServer2.stop(HiveServer2.java:1090)
> 2021-07-05 15:23:17,376 | INFO  | HiveServer2-Handler-Pool: Thread-226765745 
> | Thrift server has stopped | 
> org.apache.hive.service.cli.thrift.ThriftBinaryCLIService.stopServer(ThriftBinaryCLIService.java:225)
> 2021-07-05 15:23:17,376 | INFO  | HiveServer2-Handler-Pool: Thread-226765745 
> | Service:ThriftBinaryCLIService is stopped. | 
> org.apache.hive.service.AbstractService.stop(AbstractService.java:130)
> 2021-07-05 15:23:17,376 | INFO  | HiveServer2-Handler-Pool: Thread-226765745 
> | Service:OperationManager is stopped. | 
> org.apache.hive.service.AbstractService.stop(AbstractService.java:130)
> 2021-07-05 15:23:17,376 | INFO  | HiveServer2-Handler-Pool: Thread-226765745 
> | Service:SessionManager is stopped. | 
> org.apache.hive.service.AbstractService.stop(AbstractService.java:130)
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-25269) When the skew and parallel parameters are true simultaneously, the result is less data

2021-06-21 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25269?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-25269:

Attachment: P10IDS_RISKLIST.zip
p10ids_riskcon.zip
p10ids_realpayrc_ygz.zip
p10ids_prerec_split_ygz.zip
comb_classcode.zip

> When the skew and parallel parameters are true simultaneously, the result is 
> less data
> --
>
> Key: HIVE-25269
> URL: https://issues.apache.org/jira/browse/HIVE-25269
> Project: Hive
>  Issue Type: Bug
>  Components: Physical Optimizer, SQL
>Affects Versions: 3.1.0, 3.1.2
>Reporter: GuangMing Lu
>Priority: Major
> Attachments: P10IDS_RISKLIST.zip, comb_classcode.zip, 
> p10ids_prerec_split_ygz.zip, p10ids_realpayrc_ygz.zip, p10ids_riskcon.zip, 
> test.sql
>
>
> When the params of hive.optimize.skewjoin, hive.groupby.skewindata and 
> hive.exec.parallel are true, and exec sql such as 'INSERT... FROM (SUBQUERY 
> UNIONALL ...GROUP BY...) A JOIN/LEFT JOIN A.expression', result data will be 
> reduced. Details of SQL and test data can be found in the attachment



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-25269) When the skew and parallel parameters are true simultaneously, the result is less data

2021-06-21 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25269?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-25269:

Attachment: test.sql

> When the skew and parallel parameters are true simultaneously, the result is 
> less data
> --
>
> Key: HIVE-25269
> URL: https://issues.apache.org/jira/browse/HIVE-25269
> Project: Hive
>  Issue Type: Bug
>  Components: Physical Optimizer, SQL
>Affects Versions: 3.1.0, 3.1.2
>Reporter: GuangMing Lu
>Priority: Major
> Attachments: test.sql
>
>
> When the params of hive.optimize.skewjoin, hive.groupby.skewindata and 
> hive.exec.parallel are true, and exec sql such as 'INSERT... FROM (SUBQUERY 
> UNIONALL ...GROUP BY...) A JOIN/LEFT JOIN A.expression', result data will be 
> reduced. Details of SQL and test data can be found in the attachment



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-25269) When the skew and parallel parameters are true simultaneously, the result is less data

2021-06-21 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25269?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-25269:

Attachment: (was: comb_classcode.data)

> When the skew and parallel parameters are true simultaneously, the result is 
> less data
> --
>
> Key: HIVE-25269
> URL: https://issues.apache.org/jira/browse/HIVE-25269
> Project: Hive
>  Issue Type: Bug
>  Components: Physical Optimizer, SQL
>Affects Versions: 3.1.0, 3.1.2
>Reporter: GuangMing Lu
>Priority: Major
>
> When the params of hive.optimize.skewjoin, hive.groupby.skewindata and 
> hive.exec.parallel are true, and exec sql such as 'INSERT... FROM (SUBQUERY 
> UNIONALL ...GROUP BY...) A JOIN/LEFT JOIN A.expression', result data will be 
> reduced. Details of SQL and test data can be found in the attachment



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-25269) When the skew and parallel parameters are true simultaneously, the result is less data

2021-06-21 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25269?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-25269:

Attachment: (was: 样例分析-表入数据.sql)

> When the skew and parallel parameters are true simultaneously, the result is 
> less data
> --
>
> Key: HIVE-25269
> URL: https://issues.apache.org/jira/browse/HIVE-25269
> Project: Hive
>  Issue Type: Bug
>  Components: Physical Optimizer, SQL
>Affects Versions: 3.1.0, 3.1.2
>Reporter: GuangMing Lu
>Priority: Major
>
> When the params of hive.optimize.skewjoin, hive.groupby.skewindata and 
> hive.exec.parallel are true, and exec sql such as 'INSERT... FROM (SUBQUERY 
> UNIONALL ...GROUP BY...) A JOIN/LEFT JOIN A.expression', result data will be 
> reduced. Details of SQL and test data can be found in the attachment



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-25269) When the skew and parallel parameters are true simultaneously, the result is less data

2021-06-21 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25269?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-25269:

Attachment: 样例分析-表入数据.sql

> When the skew and parallel parameters are true simultaneously, the result is 
> less data
> --
>
> Key: HIVE-25269
> URL: https://issues.apache.org/jira/browse/HIVE-25269
> Project: Hive
>  Issue Type: Bug
>  Components: Physical Optimizer, SQL
>Affects Versions: 3.1.0, 3.1.2
>Reporter: GuangMing Lu
>Priority: Major
> Attachments: comb_classcode.data, 样例分析-表入数据.sql
>
>
> When the params of hive.optimize.skewjoin, hive.groupby.skewindata and 
> hive.exec.parallel are true, and exec sql such as 'INSERT... FROM (SUBQUERY 
> UNIONALL ...GROUP BY...) A JOIN/LEFT JOIN A.expression', result data will be 
> reduced. Details of SQL and test data can be found in the attachment



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-25269) When the skew and parallel parameters are true simultaneously, the result is less data

2021-06-21 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25269?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-25269:

Attachment: (was: table_b_data.orc)

> When the skew and parallel parameters are true simultaneously, the result is 
> less data
> --
>
> Key: HIVE-25269
> URL: https://issues.apache.org/jira/browse/HIVE-25269
> Project: Hive
>  Issue Type: Bug
>  Components: Physical Optimizer, SQL
>Affects Versions: 3.1.0, 3.1.2
>Reporter: GuangMing Lu
>Priority: Major
> Attachments: comb_classcode.data, 样例分析-表入数据.sql
>
>
> When the params of hive.optimize.skewjoin, hive.groupby.skewindata and 
> hive.exec.parallel are true, and exec sql such as 'INSERT... FROM (SUBQUERY 
> UNIONALL ...GROUP BY...) A JOIN/LEFT JOIN A.expression', result data will be 
> reduced. Details of SQL and test data can be found in the attachment



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-25269) When the skew and parallel parameters are true simultaneously, the result is less data

2021-06-21 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25269?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-25269:

Attachment: (was: test.sql)

> When the skew and parallel parameters are true simultaneously, the result is 
> less data
> --
>
> Key: HIVE-25269
> URL: https://issues.apache.org/jira/browse/HIVE-25269
> Project: Hive
>  Issue Type: Bug
>  Components: Physical Optimizer, SQL
>Affects Versions: 3.1.0, 3.1.2
>Reporter: GuangMing Lu
>Priority: Major
> Attachments: comb_classcode.data, 样例分析-表入数据.sql
>
>
> When the params of hive.optimize.skewjoin, hive.groupby.skewindata and 
> hive.exec.parallel are true, and exec sql such as 'INSERT... FROM (SUBQUERY 
> UNIONALL ...GROUP BY...) A JOIN/LEFT JOIN A.expression', result data will be 
> reduced. Details of SQL and test data can be found in the attachment



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-25269) When the skew and parallel parameters are true simultaneously, the result is less data

2021-06-21 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25269?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-25269:

Attachment: (was: table_d_data.orc)

> When the skew and parallel parameters are true simultaneously, the result is 
> less data
> --
>
> Key: HIVE-25269
> URL: https://issues.apache.org/jira/browse/HIVE-25269
> Project: Hive
>  Issue Type: Bug
>  Components: Physical Optimizer, SQL
>Affects Versions: 3.1.0, 3.1.2
>Reporter: GuangMing Lu
>Priority: Major
> Attachments: comb_classcode.data, 样例分析-表入数据.sql
>
>
> When the params of hive.optimize.skewjoin, hive.groupby.skewindata and 
> hive.exec.parallel are true, and exec sql such as 'INSERT... FROM (SUBQUERY 
> UNIONALL ...GROUP BY...) A JOIN/LEFT JOIN A.expression', result data will be 
> reduced. Details of SQL and test data can be found in the attachment



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-25269) When the skew and parallel parameters are true simultaneously, the result is less data

2021-06-21 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25269?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-25269:

Attachment: comb_classcode.data

> When the skew and parallel parameters are true simultaneously, the result is 
> less data
> --
>
> Key: HIVE-25269
> URL: https://issues.apache.org/jira/browse/HIVE-25269
> Project: Hive
>  Issue Type: Bug
>  Components: Physical Optimizer, SQL
>Affects Versions: 3.1.0, 3.1.2
>Reporter: GuangMing Lu
>Priority: Major
> Attachments: comb_classcode.data, 样例分析-表入数据.sql
>
>
> When the params of hive.optimize.skewjoin, hive.groupby.skewindata and 
> hive.exec.parallel are true, and exec sql such as 'INSERT... FROM (SUBQUERY 
> UNIONALL ...GROUP BY...) A JOIN/LEFT JOIN A.expression', result data will be 
> reduced. Details of SQL and test data can be found in the attachment



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-25269) When the skew and parallel parameters are true simultaneously, the result is less data

2021-06-21 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25269?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-25269:

Attachment: (was: table_c_data.orc)

> When the skew and parallel parameters are true simultaneously, the result is 
> less data
> --
>
> Key: HIVE-25269
> URL: https://issues.apache.org/jira/browse/HIVE-25269
> Project: Hive
>  Issue Type: Bug
>  Components: Physical Optimizer, SQL
>Affects Versions: 3.1.0, 3.1.2
>Reporter: GuangMing Lu
>Priority: Major
> Attachments: comb_classcode.data, 样例分析-表入数据.sql
>
>
> When the params of hive.optimize.skewjoin, hive.groupby.skewindata and 
> hive.exec.parallel are true, and exec sql such as 'INSERT... FROM (SUBQUERY 
> UNIONALL ...GROUP BY...) A JOIN/LEFT JOIN A.expression', result data will be 
> reduced. Details of SQL and test data can be found in the attachment



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-25269) When the skew and parallel parameters are true simultaneously, the result is less data

2021-06-21 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25269?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-25269:

Attachment: (was: table_a_data.orc)

> When the skew and parallel parameters are true simultaneously, the result is 
> less data
> --
>
> Key: HIVE-25269
> URL: https://issues.apache.org/jira/browse/HIVE-25269
> Project: Hive
>  Issue Type: Bug
>  Components: Physical Optimizer, SQL
>Affects Versions: 3.1.0, 3.1.2
>Reporter: GuangMing Lu
>Priority: Major
> Attachments: comb_classcode.data, 样例分析-表入数据.sql
>
>
> When the params of hive.optimize.skewjoin, hive.groupby.skewindata and 
> hive.exec.parallel are true, and exec sql such as 'INSERT... FROM (SUBQUERY 
> UNIONALL ...GROUP BY...) A JOIN/LEFT JOIN A.expression', result data will be 
> reduced. Details of SQL and test data can be found in the attachment



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Commented] (HIVE-25239) Create the compression table but the properties Compressed is No

2021-06-10 Thread GuangMing Lu (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-25239?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17361387#comment-17361387
 ] 

GuangMing Lu commented on HIVE-25239:
-

Hi 
[XixiHua|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=honeyaya], 
It's not be solved, you can test in master branch. Compressed should be marked 
as the attribute value when building a table

> Create the compression table but the properties Compressed is No
> 
>
> Key: HIVE-25239
> URL: https://issues.apache.org/jira/browse/HIVE-25239
> Project: Hive
>  Issue Type: Bug
>  Components: Hive
>Affects Versions: 3.1.0
>Reporter: GuangMing Lu
>Priority: Major
>  Labels: easyfix
> Fix For: 4.0.0
>
> Attachments: HIVE-25239.01.patch, image-2021-06-11-10-49-25-710.png
>
>
> Create an ORC Snappy format table, call 'desc formatted table' found that 
> 'Compressed' is No, should need to display as YES
> {quote}create database lgm;
> create table lgm.test_tbl(
>  f1 int,
>  f2 string
> ) stored as orc
> TBLPROPERTIES("orc.compress"="snappy");
> desc formatted lgm.test_tbl;
> !image-2021-06-11-10-49-25-710.png!
> {quote}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Assigned] (HIVE-25239) Create the compression table but the properties Compressed is No

2021-06-10 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25239?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu reassigned HIVE-25239:
---

Assignee: (was: GuangMing Lu)

> Create the compression table but the properties Compressed is No
> 
>
> Key: HIVE-25239
> URL: https://issues.apache.org/jira/browse/HIVE-25239
> Project: Hive
>  Issue Type: Bug
>  Components: Hive
>Affects Versions: 3.1.0
>Reporter: GuangMing Lu
>Priority: Major
>  Labels: easyfix
> Fix For: 4.0.0
>
> Attachments: HIVE-25239.01.patch, image-2021-06-11-10-49-25-710.png
>
>
> Create an ORC Snappy format table, call 'desc formatted table' found that 
> 'Compressed' is No, should need to display as YES
> {quote}create database lgm;
> create table lgm.test_tbl(
>  f1 int,
>  f2 string
> ) stored as orc
> TBLPROPERTIES("orc.compress"="snappy");
> desc formatted lgm.test_tbl;
> !image-2021-06-11-10-49-25-710.png!
> {quote}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-25239) Create the compression table but the properties Compressed is No

2021-06-10 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25239?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-25239:

   Attachment: HIVE-25239.01.patch
Fix Version/s: 4.0.0
 Assignee: GuangMing Lu
   Status: Patch Available  (was: Open)

> Create the compression table but the properties Compressed is No
> 
>
> Key: HIVE-25239
> URL: https://issues.apache.org/jira/browse/HIVE-25239
> Project: Hive
>  Issue Type: Bug
>  Components: Hive
>Affects Versions: 3.1.0
>Reporter: GuangMing Lu
>Assignee: GuangMing Lu
>Priority: Major
>  Labels: easyfix
> Fix For: 4.0.0
>
> Attachments: HIVE-25239.01.patch, image-2021-06-11-10-49-25-710.png
>
>
> Create an ORC Snappy format table, call 'desc formatted table' found that 
> 'Compressed' is No, should need to display as YES
> {quote}create database lgm;
> create table lgm.test_tbl(
>  f1 int,
>  f2 string
> ) stored as orc
> TBLPROPERTIES("orc.compress"="snappy");
> desc formatted lgm.test_tbl;
> !image-2021-06-11-10-49-25-710.png!
> {quote}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-25239) Create the compression table but the properties Compressed is No

2021-06-10 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25239?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-25239:

Summary: Create the compression table but the properties Compressed is No  
(was: Create the compression table but the compressed properties are no)

> Create the compression table but the properties Compressed is No
> 
>
> Key: HIVE-25239
> URL: https://issues.apache.org/jira/browse/HIVE-25239
> Project: Hive
>  Issue Type: Bug
>  Components: Hive
>Affects Versions: 3.1.0
>Reporter: GuangMing Lu
>Priority: Major
>  Labels: easyfix
> Attachments: image-2021-06-11-10-49-25-710.png
>
>
> Create an ORC Snappy format table, call 'desc formatted table' found that 
> 'Compressed' is No, should need to display as YES
> {quote}create database lgm;
> create table lgm.test_tbl(
>  f1 int,
>  f2 string
> ) stored as orc
> TBLPROPERTIES("orc.compress"="snappy");
> desc formatted lgm.test_tbl;
> !image-2021-06-11-10-49-25-710.png!
> {quote}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-24715) Increase bucketId range

2021-06-07 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24715?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-24715:

Issue Type: Improvement  (was: Bug)

> Increase bucketId range
> ---
>
> Key: HIVE-24715
> URL: https://issues.apache.org/jira/browse/HIVE-24715
> Project: Hive
>  Issue Type: Improvement
>  Components: HiveServer2
>Reporter: Attila Magyar
>Assignee: Attila Magyar
>Priority: Major
>  Labels: pull-request-available
> Fix For: 4.0.0
>
> Attachments: Bucket Id range increase.pdf
>
>  Time Spent: 1h 20m
>  Remaining Estimate: 0h
>




--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-22098) Data loss occurs when multiple tables are join with different bucket_version

2020-09-21 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-22098?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-22098:

Attachment: join_test.sql

> Data loss occurs when multiple tables are join with different bucket_version
> 
>
> Key: HIVE-22098
> URL: https://issues.apache.org/jira/browse/HIVE-22098
> Project: Hive
>  Issue Type: Bug
>  Components: Operators
>Affects Versions: 3.1.0, 3.1.2
>Reporter: GuangMing Lu
>Assignee: yongtaoliao
>Priority: Blocker
>  Labels: data-loss, wrongresults
> Attachments: HIVE-22098.1.patch, image-2019-08-12-18-45-15-771.png, 
> join_test.sql, table_a_data.orc, table_b_data.orc, table_c_data.orc
>
>
> When different bucketVersion of tables do join and no of reducers is greater 
> than 2, the result is incorrect (*data loss*).
>  *Scenario 1*: Three tables join. The temporary result data of table_a in the 
> first table and table_b in the second table joins result is recorded as 
> tmp_a_b, When it joins with the third table, the bucket_version=2 of the 
> table created by default after hive-3.0.0, temporary data tmp_a_b initialized 
> the bucketVerison=-1, and then ReduceSinkOperator Verketison=-1 is joined. In 
> the init method, the hash algorithm of selecting join column is selected 
> according to bucketVersion. If bucketVersion = 2 and is not an acid 
> operation, it will acquired the new algorithm of hash. Otherwise, the old 
> algorithm of hash is acquired. Because of the inconsistency of the algorithm 
> of hash, the partition of data allocation caused are different. At stage of 
> Reducer, Data with the same key can not be paired resulting in data loss.
> *Scenario 2*: create two test tables, create table 
> table_bucketversion_1(col_1 string, col_2 string) TBLPROPERTIES 
> ('bucketing_version'='1'); table_bucketversion_2(col_1 string, col_2 string) 
> TBLPROPERTIES ('bucketing_version'='2');
>  when use table_bucketversion_1 to join table_bucketversion_2, partial result 
> data will be loss due to bucketVerison is different.
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-22098) Data loss occurs when multiple tables are join with different bucket_version

2020-09-21 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-22098?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-22098:

Attachment: (was: join_test.sql)

> Data loss occurs when multiple tables are join with different bucket_version
> 
>
> Key: HIVE-22098
> URL: https://issues.apache.org/jira/browse/HIVE-22098
> Project: Hive
>  Issue Type: Bug
>  Components: Operators
>Affects Versions: 3.1.0, 3.1.2
>Reporter: GuangMing Lu
>Assignee: yongtaoliao
>Priority: Blocker
>  Labels: data-loss, wrongresults
> Attachments: HIVE-22098.1.patch, image-2019-08-12-18-45-15-771.png, 
> join_test.sql, table_a_data.orc, table_b_data.orc, table_c_data.orc
>
>
> When different bucketVersion of tables do join and no of reducers is greater 
> than 2, the result is incorrect (*data loss*).
>  *Scenario 1*: Three tables join. The temporary result data of table_a in the 
> first table and table_b in the second table joins result is recorded as 
> tmp_a_b, When it joins with the third table, the bucket_version=2 of the 
> table created by default after hive-3.0.0, temporary data tmp_a_b initialized 
> the bucketVerison=-1, and then ReduceSinkOperator Verketison=-1 is joined. In 
> the init method, the hash algorithm of selecting join column is selected 
> according to bucketVersion. If bucketVersion = 2 and is not an acid 
> operation, it will acquired the new algorithm of hash. Otherwise, the old 
> algorithm of hash is acquired. Because of the inconsistency of the algorithm 
> of hash, the partition of data allocation caused are different. At stage of 
> Reducer, Data with the same key can not be paired resulting in data loss.
> *Scenario 2*: create two test tables, create table 
> table_bucketversion_1(col_1 string, col_2 string) TBLPROPERTIES 
> ('bucketing_version'='1'); table_bucketversion_2(col_1 string, col_2 string) 
> TBLPROPERTIES ('bucketing_version'='2');
>  when use table_bucketversion_1 to join table_bucketversion_2, partial result 
> data will be loss due to bucketVerison is different.
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Commented] (HIVE-24060) When the CBO is false, NPE is thrown by an EXCEPT or INTERSECT execution

2020-09-21 Thread GuangMing Lu (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-24060?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17199373#comment-17199373
 ] 

GuangMing Lu commented on HIVE-24060:
-

Hey [~dengzh] Such is the case, but hive-1.2.1 is available, which leads to 
incompatibility problems for some users, whether we need to consider it

> When the CBO is false, NPE is thrown by an EXCEPT or INTERSECT execution
> 
>
> Key: HIVE-24060
> URL: https://issues.apache.org/jira/browse/HIVE-24060
> Project: Hive
>  Issue Type: Bug
>  Components: CBO, Hive
>Affects Versions: 3.1.0, 3.1.2
>Reporter: GuangMing Lu
>Priority: Major
>
> {code:java}
> set hive.cbo.enable=false;
> create table testtable(idx string, namex string) stored as orc;
> insert into testtable values('123', 'aaa'), ('234', 'bbb');
> explain select a.idx from (select idx,namex from testtable intersect select 
> idx,namex from testtable) a
> {code}
>  The execution throws a NullPointException:
> {code:java}
> 2020-08-24 15:12:24,261 | WARN  | HiveServer2-Handler-Pool: Thread-345 | 
> Error executing statement:  | 
> org.apache.hive.service.cli.thrift.ThriftCLIService.executeNewStatement(ThriftCLIService.java:1155)
> org.apache.hive.service.cli.HiveSQLException: Error while compiling 
> statement: FAILED: NullPointerException null
> at 
> org.apache.hive.service.cli.operation.Operation.toSQLException(Operation.java:341)
>  ~[hive-service-3.1.0.jar:3.1.0]
> at 
> org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:215)
>  ~[hive-service-3.1.0.jar:3.1.0]
> at 
> org.apache.hive.service.cli.operation.SQLOperation.runInternal(SQLOperation.java:316)
>  ~[hive-service-3.1.0.jar:3.1.0]
> at 
> org.apache.hive.service.cli.operation.Operation.run(Operation.java:253) 
> ~[hive-service-3.1.0.jar:3.1.0]
> at 
> org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:684)
>  ~[hive-service-3.1.0.jar:3.1.0]
> at 
> org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementAsync(HiveSessionImpl.java:670)
>  ~[hive-service-3.1.0.jar:3.1.0]
> at 
> org.apache.hive.service.cli.CLIService.executeStatementAsync(CLIService.java:342)
>  ~[hive-service-3.1.0.jar:3.1.0]
> at 
> org.apache.hive.service.cli.thrift.ThriftCLIService.executeNewStatement(ThriftCLIService.java:1144)
>  ~[hive-service-3.1.0.jar:3.1.0]
> at 
> org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:1280)
>  ~[hive-service-3.1.0.jar:3.1.0]
> at 
> org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1557)
>  ~[hive-service-rpc-3.1.0.jar:3.1.0]
> at 
> org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1542)
>  ~[hive-service-rpc-3.1.0.jar:3.1.0]
> at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39) 
> ~[libthrift-0.9.3.jar:0.9.3]
> at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39) 
> ~[libthrift-0.9.3.jar:0.9.3]
> at 
> org.apache.hadoop.hive.metastore.security.HadoopThriftAuthBridge$Server$TUGIAssumingProcessor.process(HadoopThriftAuthBridge.java:648)
>  ~[hive-standalone-metastore-3.1.0.jar:3.1.0]
> at 
> org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286)
>  ~[libthrift-0.9.3.jar:0.9.3]
> at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>  ~[?:1.8.0_201]
> at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>  ~[?:1.8.0_201]
> at java.lang.Thread.run(Thread.java:748) [?:1.8.0_201]
> Caused by: java.lang.NullPointerException
> at 
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genSelectPlan(SemanticAnalyzer.java:4367)
>  ~[hive-exec-3.1.0.jar:3.1.0]
> at 
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genSelectPlan(SemanticAnalyzer.java:4346)
>  ~[hive-exec-3.1.0.jar:3.1.0]
> at 
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPostGroupByBodyPlan(SemanticAnalyzer.java:10576)
>  ~[hive-exec-3.1.0.jar:3.1.0]
> at 
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genBodyPlan(SemanticAnalyzer.java:10515)
>  ~[hive-exec-3.1.0.jar:3.1.0]
> at 
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11434)
>  ~[hive-exec-3.1.0.jar:3.1.0]
> at 
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11291)
>  ~[hive-exec-3.1.0.jar:3.1.0]
> at 
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11318)
>  ~[hive-exec-3.1.0.jar:3.1.0]
> at 
> 

[jira] [Updated] (HIVE-24186) The aggregate class operation fails when the CBO is false

2020-09-21 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24186?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-24186:

Affects Version/s: 3.1.2

> The aggregate class operation fails when the CBO is false
> -
>
> Key: HIVE-24186
> URL: https://issues.apache.org/jira/browse/HIVE-24186
> Project: Hive
>  Issue Type: Bug
>  Components: CBO, SQL
>Affects Versions: 3.1.0, 3.1.2
>Reporter: GuangMing Lu
>Priority: Major
>
> {code:java}
> create table table_1
> (
> idx string, 
> namex string
> ) stored as orc;
> create table table_2
> (
> sid string,
> sname string
> )stored as orc;
> set hive.cbo.enable=false;
> explain
> insert into table table_1(idx , namex)
> select t.sid idx, '123' namex 
> from table_2 t
> group by t.sid
> order by 1,2;
> {code}
> Executing the above SQL will report an error, errors as follows:
> {code:java}
> org.apache.hive.service.cli.HiveSQLException: Error while compiling 
> statement: FAILED: SemanticException [Error 10004]: Line 4:7 Invalid table 
> alias or column reference 't': (possible column names are: _col0, _col1)
>     at 
> org.apache.hive.service.cli.operation.Operation.toSQLException(Operation.java:341)
>  ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at 
> org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:215)
>  ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at 
> org.apache.hive.service.cli.operation.SQLOperation.runInternal(SQLOperation.java:316)
>  ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at 
> org.apache.hive.service.cli.operation.Operation.run(Operation.java:253) 
> ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at 
> org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:684)
>  ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at 
> org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementAsync(HiveSessionImpl.java:670)
>  ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at sun.reflect.GeneratedMethodAccessor151.invoke(Unknown Source) 
> ~[?:?]
>     at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>  ~[?:1.8.0_242]
>     at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_242]
>     at 
> org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:78)
>  ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at 
> org.apache.hive.service.cli.session.HiveSessionProxy.access$000(HiveSessionProxy.java:36)
>  ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at 
> org.apache.hive.service.cli.session.HiveSessionProxy$1.run(HiveSessionProxy.java:63)
>  ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at java.security.AccessController.doPrivileged(Native Method) 
> ~[?:1.8.0_242]
>     at javax.security.auth.Subject.doAs(Subject.java:422) ~[?:1.8.0_242]
>     at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1737)
>  ~[hadoop-common-3.1.1-hw-ei-302001-SNAPSHOT.jar:?]
>     at 
> org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:59)
>  ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at com.sun.proxy.$Proxy66.executeStatementAsync(Unknown Source) ~[?:?]
>     at 
> org.apache.hive.service.cli.CLIService.executeStatementAsync(CLIService.java:342)
>  ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at 
> org.apache.hive.service.cli.thrift.ThriftCLIService.executeNewStatement(ThriftCLIService.java:1144)
>  ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at 
> org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:1280)
>  ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at 
> org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1557)
>  
> ~[hive-service-rpc-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at 
> org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1542)
>  
> ~[hive-service-rpc-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39) 
> ~[hive-exec-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at 

[jira] [Updated] (HIVE-24186) The aggregate class operation fails when the CBO is false

2020-09-21 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24186?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-24186:

Fix Version/s: (was: 3.1.2)
   (was: 3.1.0)

> The aggregate class operation fails when the CBO is false
> -
>
> Key: HIVE-24186
> URL: https://issues.apache.org/jira/browse/HIVE-24186
> Project: Hive
>  Issue Type: Bug
>  Components: CBO, SQL
>Affects Versions: 3.1.0
>Reporter: GuangMing Lu
>Priority: Major
>
> {code:java}
> create table table_1
> (
> idx string, 
> namex string
> ) stored as orc;
> create table table_2
> (
> sid string,
> sname string
> )stored as orc;
> set hive.cbo.enable=false;
> explain
> insert into table table_1(idx , namex)
> select t.sid idx, '123' namex 
> from table_2 t
> group by t.sid
> order by 1,2;
> {code}
> Executing the above SQL will report an error, errors as follows:
> {code:java}
> org.apache.hive.service.cli.HiveSQLException: Error while compiling 
> statement: FAILED: SemanticException [Error 10004]: Line 4:7 Invalid table 
> alias or column reference 't': (possible column names are: _col0, _col1)
>     at 
> org.apache.hive.service.cli.operation.Operation.toSQLException(Operation.java:341)
>  ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at 
> org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:215)
>  ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at 
> org.apache.hive.service.cli.operation.SQLOperation.runInternal(SQLOperation.java:316)
>  ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at 
> org.apache.hive.service.cli.operation.Operation.run(Operation.java:253) 
> ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at 
> org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:684)
>  ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at 
> org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementAsync(HiveSessionImpl.java:670)
>  ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at sun.reflect.GeneratedMethodAccessor151.invoke(Unknown Source) 
> ~[?:?]
>     at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>  ~[?:1.8.0_242]
>     at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_242]
>     at 
> org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:78)
>  ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at 
> org.apache.hive.service.cli.session.HiveSessionProxy.access$000(HiveSessionProxy.java:36)
>  ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at 
> org.apache.hive.service.cli.session.HiveSessionProxy$1.run(HiveSessionProxy.java:63)
>  ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at java.security.AccessController.doPrivileged(Native Method) 
> ~[?:1.8.0_242]
>     at javax.security.auth.Subject.doAs(Subject.java:422) ~[?:1.8.0_242]
>     at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1737)
>  ~[hadoop-common-3.1.1-hw-ei-302001-SNAPSHOT.jar:?]
>     at 
> org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:59)
>  ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at com.sun.proxy.$Proxy66.executeStatementAsync(Unknown Source) ~[?:?]
>     at 
> org.apache.hive.service.cli.CLIService.executeStatementAsync(CLIService.java:342)
>  ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at 
> org.apache.hive.service.cli.thrift.ThriftCLIService.executeNewStatement(ThriftCLIService.java:1144)
>  ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at 
> org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:1280)
>  ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at 
> org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1557)
>  
> ~[hive-service-rpc-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at 
> org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1542)
>  
> ~[hive-service-rpc-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39) 
> ~[hive-exec-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT]
>     at 

[jira] [Assigned] (HIVE-24122) When CBO is enable, CAST(STR as Bigint)IS NOT NULL result is wrong

2020-09-21 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24122?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu reassigned HIVE-24122:
---

Assignee: GuangMing Lu

> When CBO is enable, CAST(STR as Bigint)IS NOT NULL result is wrong 
> ---
>
> Key: HIVE-24122
> URL: https://issues.apache.org/jira/browse/HIVE-24122
> Project: Hive
>  Issue Type: Bug
>  Components: CBO
>Affects Versions: 3.1.0, 3.1.2
>Reporter: GuangMing Lu
>Assignee: GuangMing Lu
>Priority: Major
> Fix For: 4.0.0
>
>
> {code:java}
> create  database testdb;
> CREATE TABLE IF NOT EXISTS testdb.z_tab 
> ( 
>     SEARCHWORD    STRING, 
>     COUNT_NUM BIGINT, 
>     WORDS STRING 
> ) 
> ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' 
> STORED AS TEXTFILE;
> insert into table testdb.z_tab 
> values('hivetest',111,'aaa'),('hivetest2',111,'bbb');
> set hive.cbo.enable=true;
> SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab;
> SELECT CAST(searchword as bigint) IS NULL FROM testdb.z_tab;
> {code}
> The SQL results for both queries are the same, as follows:
> {noformat}
> +---+
> |  _c0  |
> +---+
> | true  |
> | true  |
> +---+{noformat}
> SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab;  execute 
> result is wrong
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Resolved] (HIVE-24122) When CBO is enable, CAST(STR as Bigint)IS NOT NULL result is wrong

2020-09-21 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24122?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu resolved HIVE-24122.
-
Fix Version/s: 4.0.0
   Resolution: Fixed

> When CBO is enable, CAST(STR as Bigint)IS NOT NULL result is wrong 
> ---
>
> Key: HIVE-24122
> URL: https://issues.apache.org/jira/browse/HIVE-24122
> Project: Hive
>  Issue Type: Bug
>  Components: CBO
>Affects Versions: 3.1.0, 3.1.2
>Reporter: GuangMing Lu
>Priority: Major
> Fix For: 4.0.0
>
>
> {code:java}
> create  database testdb;
> CREATE TABLE IF NOT EXISTS testdb.z_tab 
> ( 
>     SEARCHWORD    STRING, 
>     COUNT_NUM BIGINT, 
>     WORDS STRING 
> ) 
> ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' 
> STORED AS TEXTFILE;
> insert into table testdb.z_tab 
> values('hivetest',111,'aaa'),('hivetest2',111,'bbb');
> set hive.cbo.enable=true;
> SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab;
> SELECT CAST(searchword as bigint) IS NULL FROM testdb.z_tab;
> {code}
> The SQL results for both queries are the same, as follows:
> {noformat}
> +---+
> |  _c0  |
> +---+
> | true  |
> | true  |
> +---+{noformat}
> SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab;  execute 
> result is wrong
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Comment Edited] (HIVE-24122) When CBO is enable, CAST(STR as Bigint)IS NOT NULL result is wrong

2020-09-21 Thread GuangMing Lu (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-24122?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17199362#comment-17199362
 ] 

GuangMing Lu edited comment on HIVE-24122 at 9/21/20, 12:44 PM:


Hey {color:#0066cc} [~zabetak]  {color}

{color:#0066cc}{color:#172b4d} Thanks for reminding me that I was test in the 
master is ok, the reason why the master used calcite-1.21.{color} {color} After 
analysis, the problem was fixed in calcite 1.19 or above


was (Author: luguangming):
Hey {color:#0066cc} [~zabetak]  {color:#172b4d} Thanks for reminding me that I 
was test in the master is ok, the reason why the master used calcite-1.21. 
{color}{color}

{color:#0066cc}{color:#172b4d}After analysis, the problem was fixed in calcite 
1.19 or above{color}{color}

> When CBO is enable, CAST(STR as Bigint)IS NOT NULL result is wrong 
> ---
>
> Key: HIVE-24122
> URL: https://issues.apache.org/jira/browse/HIVE-24122
> Project: Hive
>  Issue Type: Bug
>  Components: CBO
>Affects Versions: 3.1.0, 3.1.2
>Reporter: GuangMing Lu
>Priority: Major
>
> {code:java}
> create  database testdb;
> CREATE TABLE IF NOT EXISTS testdb.z_tab 
> ( 
>     SEARCHWORD    STRING, 
>     COUNT_NUM BIGINT, 
>     WORDS STRING 
> ) 
> ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' 
> STORED AS TEXTFILE;
> insert into table testdb.z_tab 
> values('hivetest',111,'aaa'),('hivetest2',111,'bbb');
> set hive.cbo.enable=true;
> SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab;
> SELECT CAST(searchword as bigint) IS NULL FROM testdb.z_tab;
> {code}
> The SQL results for both queries are the same, as follows:
> {noformat}
> +---+
> |  _c0  |
> +---+
> | true  |
> | true  |
> +---+{noformat}
> SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab;  execute 
> result is wrong
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Commented] (HIVE-24122) When CBO is enable, CAST(STR as Bigint)IS NOT NULL result is wrong

2020-09-21 Thread GuangMing Lu (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-24122?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17199362#comment-17199362
 ] 

GuangMing Lu commented on HIVE-24122:
-

Hey {color:#0066cc} [~zabetak]  {color:#172b4d} Thanks for reminding me that I 
was test in the master is ok, the reason why the master used calcite-1.21. 
{color}{color}

{color:#0066cc}{color:#172b4d}After analysis, the problem was fixed in calcite 
1.19 or above{color}{color}

> When CBO is enable, CAST(STR as Bigint)IS NOT NULL result is wrong 
> ---
>
> Key: HIVE-24122
> URL: https://issues.apache.org/jira/browse/HIVE-24122
> Project: Hive
>  Issue Type: Bug
>  Components: CBO
>Affects Versions: 3.1.0, 3.1.2
>Reporter: GuangMing Lu
>Priority: Major
>
> {code:java}
> create  database testdb;
> CREATE TABLE IF NOT EXISTS testdb.z_tab 
> ( 
>     SEARCHWORD    STRING, 
>     COUNT_NUM BIGINT, 
>     WORDS STRING 
> ) 
> ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' 
> STORED AS TEXTFILE;
> insert into table testdb.z_tab 
> values('hivetest',111,'aaa'),('hivetest2',111,'bbb');
> set hive.cbo.enable=true;
> SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab;
> SELECT CAST(searchword as bigint) IS NULL FROM testdb.z_tab;
> {code}
> The SQL results for both queries are the same, as follows:
> {noformat}
> +---+
> |  _c0  |
> +---+
> | true  |
> | true  |
> +---+{noformat}
> SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab;  execute 
> result is wrong
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-24124) NPE occurs when bucket_version different bucket tables are joined

2020-09-07 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24124?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-24124:

Description: 
{code:java}
create table z_tab_1(
    task_id  string,    
    data_date  string,  
    accno  string,  
    curr_type  string,  
    ifrs9_pd12_value  double,
    ifrs9_ccf_value  double,
    ifrs9_lgd_value  double
)partitioned by(pt_dt string)
STORED AS ORCFILE
TBLPROPERTIES ('bucketing_version'='1');

alter table z_tab_1 add partition(pt_dt = '2020-7-31');

insert into z_tab_1 partition(pt_dt = '2020-7-31') values
('123','2020-7-31','accno-','curr_type-x', 0.1, 0.2 ,0.3),
('1','2020-1-31','a','1-curr_type-a', 0.1, 0.2 ,0.3),
('2','2020-2-31','b','2-curr_type-b', 0.1, 0.2 ,0.3),
('3','2020-3-31','c','3-curr_type-c', 0.1, 0.2 ,0.3),
('4','2020-4-31','d','4-curr_type-d', 0.1, 0.2 ,0.3),
('5','2020-5-31','e','5-curr_type-e', 0.1, 0.2 ,0.3),
('6','2020-6-31','f','6-curr_type-f', 0.1, 0.2 ,0.3),
('7','2020-7-31','g','7-curr_type-g', 0.1, 0.2 ,0.3),
('8','2020-8-31','h','8-curr_type-h', 0.1, 0.2 ,0.3),
('9','2020-9-31','i','9-curr_type-i', 0.1, 0.2 ,0.3);
drop table if exists z_tab_2;
CREATE TABLE z_tab_2(  
    task_id  string,    
    data_date  string,  
    accno  string,  
    curr_type  string,  
    ifrs9_pd12_value  double,   
    ifrs9_ccf_value  double,    
    ifrs9_lgd_value  double
) 
CLUSTERED BY (TASK_ID, DATA_DATE, ACCNO, CURR_TYPE)  SORTED by (TASK_ID, ACCNO, 
CURR_TYPE) INTO 2000 BUCKETS 
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS ORCFILE;

set hive.enforce.bucketing=true;

INSERT OVERWRITE TABLE z_tab_2
SELECT  DCCR.TASK_ID
   ,DCCR.DATA_DATE
   ,DCCR.ACCNO
   ,DCCR.CURR_TYPE
   ,DCCR.IFRS9_PD12_VALUE
   ,DCCR.IFRS9_CCF_VALUE
   ,DCCR.IFRS9_LGD_VALUE 
FROM z_tab_1 DCCR
WHERE pt_dt = '2020-7-31';
{code}
{noformat}
Caused by: java.lang.NullPointerException  
at 
org.apache.hadoop.hive.ql.exec.FileSinkOperator.findWriterOffset(FileSinkOperator.java:1072)
  
at 
org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:988)
  
at org.apache.hadoop.hive.ql.exec.Operator.baseForward(Operator.java:995)  
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:941)  
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:928)  
at 
org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95)  
at org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:237)  
... 7 more{noformat}

  was:
{code:java}
create table z_tab_1(
    task_id  string,    
    data_date  string,  
    accno  string,  
    curr_type  string,  
    ifrs9_pd12_value  double,
    ifrs9_ccf_value  double,
    ifrs9_lgd_value  double
)partitioned by(pt_dt string)
STORED AS ORCFILE
TBLPROPERTIES ('bucketing_version'='1');alter table z_tab_1 add partition(pt_dt 
= '2020-7-31');
insert into z_tab_1 partition(pt_dt = '2020-7-31') 
values('123','2020-7-31','accno-','curr_type-x', 0.1, 0.2 ,0.3),
('1','2020-1-31','a','1-curr_type-a', 0.1, 0.2 ,0.3),
('2','2020-2-31','b','2-curr_type-b', 0.1, 0.2 ,0.3),
('3','2020-3-31','c','3-curr_type-c', 0.1, 0.2 ,0.3),
('4','2020-4-31','d','4-curr_type-d', 0.1, 0.2 ,0.3),
('5','2020-5-31','e','5-curr_type-e', 0.1, 0.2 ,0.3),
('6','2020-6-31','f','6-curr_type-f', 0.1, 0.2 ,0.3),
('7','2020-7-31','g','7-curr_type-g', 0.1, 0.2 ,0.3),
('8','2020-8-31','h','8-curr_type-h', 0.1, 0.2 ,0.3),
('9','2020-9-31','i','9-curr_type-i', 0.1, 0.2 ,0.3);
drop table if exists z_tab_2;
CREATE TABLE z_tab_2(  
    task_id  string,    
    data_date  string,  
    accno  string,  
    curr_type  string,  
    ifrs9_pd12_value  double,   
    ifrs9_ccf_value  double,    
    ifrs9_lgd_value  double
) 
CLUSTERED BY (TASK_ID, DATA_DATE, ACCNO, CURR_TYPE)  SORTED by (TASK_ID, ACCNO, 
CURR_TYPE) INTO 2000 BUCKETS 
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS ORCFILE;

set hive.enforce.bucketing=true;

INSERT OVERWRITE TABLE z_tab_2
SELECT  DCCR.TASK_ID
   ,DCCR.DATA_DATE
   ,DCCR.ACCNO
   ,DCCR.CURR_TYPE
   ,DCCR.IFRS9_PD12_VALUE
   ,DCCR.IFRS9_CCF_VALUE
   ,DCCR.IFRS9_LGD_VALUE 
FROM z_tab_1 DCCR
WHERE pt_dt = '2020-7-31';
{code}
{noformat}
Caused by: java.lang.NullPointerException  
at 
org.apache.hadoop.hive.ql.exec.FileSinkOperator.findWriterOffset(FileSinkOperator.java:1072)
  
at 
org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:988)
  
at org.apache.hadoop.hive.ql.exec.Operator.baseForward(Operator.java:995)  
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:941)  
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:928)  
at 
org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95)  
at 

[jira] [Updated] (HIVE-24124) NPE occurs when bucket_version different bucket tables are joined

2020-09-07 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24124?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-24124:

Description: 
{code:java}
create table z_tab_1(
    task_id  string,    
    data_date  string,  
    accno  string,  
    curr_type  string,  
    ifrs9_pd12_value  double,
    ifrs9_ccf_value  double,
    ifrs9_lgd_value  double
)partitioned by(pt_dt string)
STORED AS ORCFILE
TBLPROPERTIES ('bucketing_version'='1');alter table z_tab_1 add partition(pt_dt 
= '2020-7-31');
insert into z_tab_1 partition(pt_dt = '2020-7-31') 
values('123','2020-7-31','accno-','curr_type-x', 0.1, 0.2 ,0.3),
('1','2020-1-31','a','1-curr_type-a', 0.1, 0.2 ,0.3),
('2','2020-2-31','b','2-curr_type-b', 0.1, 0.2 ,0.3),
('3','2020-3-31','c','3-curr_type-c', 0.1, 0.2 ,0.3),
('4','2020-4-31','d','4-curr_type-d', 0.1, 0.2 ,0.3),
('5','2020-5-31','e','5-curr_type-e', 0.1, 0.2 ,0.3),
('6','2020-6-31','f','6-curr_type-f', 0.1, 0.2 ,0.3),
('7','2020-7-31','g','7-curr_type-g', 0.1, 0.2 ,0.3),
('8','2020-8-31','h','8-curr_type-h', 0.1, 0.2 ,0.3),
('9','2020-9-31','i','9-curr_type-i', 0.1, 0.2 ,0.3);
drop table if exists z_tab_2;
CREATE TABLE z_tab_2(  
    task_id  string,    
    data_date  string,  
    accno  string,  
    curr_type  string,  
    ifrs9_pd12_value  double,   
    ifrs9_ccf_value  double,    
    ifrs9_lgd_value  double
) 
CLUSTERED BY (TASK_ID, DATA_DATE, ACCNO, CURR_TYPE)  SORTED by (TASK_ID, ACCNO, 
CURR_TYPE) INTO 2000 BUCKETS 
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS ORCFILE;

set hive.enforce.bucketing=true;

INSERT OVERWRITE TABLE z_tab_2
SELECT  DCCR.TASK_ID
   ,DCCR.DATA_DATE
   ,DCCR.ACCNO
   ,DCCR.CURR_TYPE
   ,DCCR.IFRS9_PD12_VALUE
   ,DCCR.IFRS9_CCF_VALUE
   ,DCCR.IFRS9_LGD_VALUE 
FROM z_tab_1 DCCR
WHERE pt_dt = '2020-7-31';
{code}
{noformat}
Caused by: java.lang.NullPointerException  
at 
org.apache.hadoop.hive.ql.exec.FileSinkOperator.findWriterOffset(FileSinkOperator.java:1072)
  
at 
org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:988)
  
at org.apache.hadoop.hive.ql.exec.Operator.baseForward(Operator.java:995)  
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:941)  
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:928)  
at 
org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95)  
at org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:237)  
... 7 more{noformat}

  was:
{code:java}
create table z_tab_1(
    task_id  string,    
    data_date  string,  
    accno  string,  
    curr_type  string,  
    ifrs9_pd12_value  double,
    ifrs9_ccf_value  double,
    ifrs9_lgd_value  double
)partitioned by(pt_dt string)
STORED AS ORCFILE
TBLPROPERTIES ('bucketing_version'='1');alter table z_tab_1 add partition(pt_dt 
= '2020-7-31');
insert into z_tab_1 partition(pt_dt = '2020-7-31') 
values('123','2020-7-31','accno-','curr_type-x', 0.1, 0.2 ,0.3),
('1','2020-1-31','a','1-curr_type-a', 0.1, 0.2 ,0.3),
('2','2020-2-31','b','2-curr_type-b', 0.1, 0.2 ,0.3),
('3','2020-3-31','c','3-curr_type-c', 0.1, 0.2 ,0.3),
('4','2020-4-31','d','4-curr_type-d', 0.1, 0.2 ,0.3),
('5','2020-5-31','e','5-curr_type-e', 0.1, 0.2 ,0.3),
('6','2020-6-31','f','6-curr_type-f', 0.1, 0.2 ,0.3),
('7','2020-7-31','g','7-curr_type-g', 0.1, 0.2 ,0.3),
('8','2020-8-31','h','8-curr_type-h', 0.1, 0.2 ,0.3),
('9','2020-9-31','i','9-curr_type-i', 0.1, 0.2 ,0.3);
drop table if exists z_tab_2;
CREATE TABLE z_tab_2(  
    task_id  string,    
    data_date  string,  
    accno  string,  
    curr_type  string,  
    ifrs9_pd12_value  double,   
    ifrs9_ccf_value  double,    
    ifrs9_lgd_value  double
) 
CLUSTERED BY (TASK_ID, DATA_DATE, ACCNO, CURR_TYPE)  SORTED by (TASK_ID, ACCNO, 
CURR_TYPE) INTO 2000 BUCKETS 
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS ORCFILE;

set hive.enforce.bucketing=true;

INSERT OVERWRITE TABLE z_tab_2
SELECT  DCCR.TASK_ID
   ,DCCR.DATA_DATE
   ,DCCR.ACCNO
   ,DCCR.CURR_TYPE
   ,DCCR.IFRS9_PD12_VALUE
   ,DCCR.IFRS9_CCF_VALUE
   ,DCCR.IFRS9_LGD_VALUE 
FROM z_tab_1 DCCR
WHERE pt_dt = '2020-7-31';
{code}
{noformat}
Caused by: java.lang.NullPointerException  at 
org.apache.hadoop.hive.ql.exec.FileSinkOperator.findWriterOffset(FileSinkOperator.java:1072)
  at 
org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:988)
  at org.apache.hadoop.hive.ql.exec.Operator.baseForward(Operator.java:995)  at 
org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:941)  at 
org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:928)  at 
org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95)  
at 

[jira] [Updated] (HIVE-24122) When CBO is enable, CAST(STR as Bigint)IS NOT NULL result is wrong

2020-09-07 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24122?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-24122:

Description: 
{code:java}
create  database testdb;
CREATE TABLE IF NOT EXISTS testdb.z_tab 
( 
    SEARCHWORD    STRING, 
    COUNT_NUM BIGINT, 
    WORDS STRING 
) 
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' 
STORED AS TEXTFILE;
insert into table testdb.z_tab 
values('hivetest',111,'aaa'),('hivetest2',111,'bbb');

set hive.cbo.enable=true;

SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab;
SELECT CAST(searchword as bigint) IS NULL FROM testdb.z_tab;
{code}
The SQL results for both queries are the same, as follows:
{noformat}
+---+
|  _c0  |
+---+
| true  |
| true  |
+---+{noformat}
SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab;  execute 
result is wrong

 

  was:
{code:java}
create  database testdb;
CREATE TABLE IF NOT EXISTS testdb.z_tab 
( 
    SEARCHWORD    STRING, 
    COUNT_NUM BIGINT, 
    WORDS STRING 
) 
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' 
STORED AS TEXTFILE;
insert into table testdb.z_tab 
values('hivetest',111,'aaa'),('hivetest2',111,'bbb');

set hive.cbo.enable=true;

SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab;
SELECT CAST(searchword as bigint) IS NULL FROM testdb.z_tab;
{code}
The SQL results for both queries are the same, as follows:

+---+
|  _c0  |
+---+
| true  |
| true  |
+---+

SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab;  execute 
result is wrong


> When CBO is enable, CAST(STR as Bigint)IS NOT NULL result is wrong 
> ---
>
> Key: HIVE-24122
> URL: https://issues.apache.org/jira/browse/HIVE-24122
> Project: Hive
>  Issue Type: Bug
>  Components: CBO
>Affects Versions: 3.1.0, 3.1.2
>Reporter: GuangMing Lu
>Priority: Major
>
> {code:java}
> create  database testdb;
> CREATE TABLE IF NOT EXISTS testdb.z_tab 
> ( 
>     SEARCHWORD    STRING, 
>     COUNT_NUM BIGINT, 
>     WORDS STRING 
> ) 
> ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' 
> STORED AS TEXTFILE;
> insert into table testdb.z_tab 
> values('hivetest',111,'aaa'),('hivetest2',111,'bbb');
> set hive.cbo.enable=true;
> SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab;
> SELECT CAST(searchword as bigint) IS NULL FROM testdb.z_tab;
> {code}
> The SQL results for both queries are the same, as follows:
> {noformat}
> +---+
> |  _c0  |
> +---+
> | true  |
> | true  |
> +---+{noformat}
> SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab;  execute 
> result is wrong
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-24100) Syntax compile failure occurs when INSERT table column Order by is greater than 2 columns when CBO is false

2020-09-01 Thread GuangMing Lu (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24100?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-24100:

Description: 
Executing the following SQL will fail to compile
{code:java}
set hive.cbo.enable=false;

-- create tabls --
create table table_1
(
item_id string, 
stru_area_id string
)partitioned by ( PT_DT string) stored as orc;

create table table_2
(
CREATE_ORG_ID string,
PROMOTION_ID  string,
PROMOTION_STATUS string
) partitioned by (pt_dt string) stored as orc;

create table table_3
(
STRU_ID string,
SUP_STRU string
) partitioned by(pt_dt string) stored as orc;

set hive.cbo.enable=false;
-- execute sql--
explain
insert into table table_1 partition(PT_DT = '2020-08-22')
(item_id , stru_area_id)
select '123' ITEM_ID , T.STRU_ID STRU_AREA_ID 
from ( 
  select 
  T0.STRU_ID STRU_ID ,T0.STRU_ID STRU_ID_BRANCH 
  from  table_3 T0 
) T
inner join ( 
  select 
  TT.CREATE_ORG_ID
  from  table_2 TT 
) TIV
on (T.STRU_ID_BRANCH = TIV.CREATE_ORG_ID) 
group by T.STRU_ID
order by 1,2;
{code}
{code:java}
org.apache.hive.service.cli.HiveSQLException: Error while compiling statement: 
FAILED: SemanticException [Error 10004]: Line 5:28 Invalid table alias or 
column reference 'T': (possible column names are: _col0, _col1)
 at 
org.apache.hive.service.cli.operation.Operation.toSQLException(Operation.java:341)
 ~[hive-service-3.1.0.jar:3.1.0]
 at 
org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:215)
 ~[hive-service-3.1.0.jar:3.1.0]
 at 
org.apache.hive.service.cli.operation.SQLOperation.runInternal(SQLOperation.java:316)
 ~[hive-service-3.1.0.jar:3.1.0]
 at org.apache.hive.service.cli.operation.Operation.run(Operation.java:253) 
~[hive-service-3.1.0.jar:3.1.0]
 at 
org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:684)
 ~[hive-service-3.1.0.jar:3.1.0]
 at 
org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementAsync(HiveSessionImpl.java:670)
 ~[hive-service-3.1.0.jar:3.1.0]
 at 
org.apache.hive.service.cli.CLIService.executeStatementAsync(CLIService.java:342)
 ~[hive-service-3.1.0.jar:3.1.0]
 at 
org.apache.hive.service.cli.thrift.ThriftCLIService.executeNewStatement(ThriftCLIService.java:1144)
 ~[hive-service-3.1.0.jar:3.1.0]
 at 
org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:1280)
 ~[hive-service-3.1.0.jar:3.1.0]
 at 
org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1557)
 ~[hive-exec-3.1.0.jar:3.1.0]
 at 
org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1542)
 ~[hive-exec-3.1.0.jar:3.1.0]
 at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39) 
~[hive-exec-3.1.0.jar:3.1.0]
 at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39) 
~[hive-exec-3.1.0.jar:3.1.0]
 at 
org.apache.hadoop.hive.metastore.security.HadoopThriftAuthBridge$Server$TUGIAssumingProcessor.process(HadoopThriftAuthBridge.java:648)
 ~[hive-exec-3.1.0.jar:3.1.0]
 at 
org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286)
 ~[hive-exec-3.1.0.jar:3.1.0]
 at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 
~[?:1.8.0_201]
 at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) 
~[?:1.8.0_201]
 at java.lang.Thread.run(Thread.java:748) [?:1.8.0_201]
Caused by: org.apache.hadoop.hive.ql.parse.SemanticException: Line 5:28 Invalid 
table alias or column reference 'T': (possible column names are: _col0, _col1)
 at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genAllExprNodeDesc(SemanticAnalyzer.java:12689)
 ~[hive-exec-3.1.0.jar:3.1.0]
 at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genExprNodeDesc(SemanticAnalyzer.java:12629)
 ~[hive-exec-3.1.0.jar:3.1.0]
 at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genExprNodeDesc(SemanticAnalyzer.java:12597)
 ~[hive-exec-3.1.0.jar:3.1.0]
 at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genExprNodeDesc(SemanticAnalyzer.java:12575)
 ~[hive-exec-3.1.0.jar:3.1.0]
 at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genReduceSinkPlan(SemanticAnalyzer.java:8482)
 ~[hive-exec-3.1.0.jar:3.1.0]
 at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPostGroupByBodyPlan(SemanticAnalyzer.java:10616)
 ~[hive-exec-3.1.0.jar:3.1.0]
 at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genBodyPlan(SemanticAnalyzer.java:10515)
 ~[hive-exec-3.1.0.jar:3.1.0]
 at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11434)
 ~[hive-exec-3.1.0.jar:3.1.0]
 at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11304)
 ~[hive-exec-3.1.0.jar:3.1.0]
 at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genOPTree(SemanticAnalyzer.java:12090)
 ~[hive-exec-3.1.0.jar:3.1.0]
 at