[jira] [Updated] (CARBONDATA-3954) Global sorting with array, if read from ORC format, write to carbon, error; If you use no_sort, success;

2020-08-21 Thread xiaohui (Jira)


 [ 
https://issues.apache.org/jira/browse/CARBONDATA-3954?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

xiaohui updated CARBONDATA-3954:

Description: 
orc table sql test: 
create table array_orc(name string, col array,fee int) STORED AS orc;
insert into array_orc values("xiao3",array('上呼吸道疾病 1','白内障1','胃溃疡1'),2);
insert into array_orc values("xiao3",array('上呼吸道疾病1 ','白内障1','胃溃疡1'),2);
insert into array_orc values("xiao3",array('上呼吸道疾病1','白内障 1','胃溃疡1'),2);
insert into array_orc values("xiao3",array('上呼吸道疾病1','白内障1','胃溃疡 1'),2);
insert into array_orc values("xiao3",array('上呼吸道疾病1','白内障1','胃溃疡1'),2);
insert into array_orc values("xiao5",array(null,'白内障1','胃溃疡1'),2);
insert into array_orc values("xiao5",null,2);
insert into array_orc values("xiao3",array('j'),2);
insert into array_orc values("xiao4",array('j','j'),2);
insert into array_orc values("xiao4",NULL,2);



0: jdbc:hive2://localhost:1> use dict;
+-+--+
| Result  |
+-+--+
+-+--+
No rows selected (0.391 seconds)
0: jdbc:hive2://localhost:1> select * from array_orc;
++---+--+--+
|  name  |  col  | fee  |
++---+--+--+
| xiao3  | ["",null,"j"] | 3|
| xiao2  | ["上呼吸道疾病1","白内障1","胃溃疡1"] | 2|
| xiao3  | ["",null,"j"] | 3|
| xiao1  | ["上呼吸道疾病","白内障","胃溃疡"]| 1|
| xiao9  | NULL  | 3|
| xiao9  | NULL  | 3|
| xiao3  | NULL  | 3|
| xiao6  | NULL  | 3|
| xiao2  | ["上呼吸道疾病 1","白内障 1","胃溃疡 1"]  | 2|
| xiao1  | ["上呼吸道疾病 ","白内障 ","胃溃疡 "] | 1|
| xiao3  | NULL  | 3|
| xiao3  | [null]| 3|
| xiao3  | [""]  | 3|
++---+--+--+
13 rows selected (0.416 seconds)
0: jdbc:hive2://localhost:1> create table array_carbon4(name string, col 
array,fee int) STORED AS carbondata TBLPROPERTIES 
('SORT_COLUMNS'='name',
0: jdbc:hive2://localhost:1> 'TABLE_BLOCKSIZE'='128',
0: jdbc:hive2://localhost:1> 'TABLE_BLOCKLET_SIZE'='128',
0: jdbc:hive2://localhost:1> 'SORT_SCOPE'='no_SORT');
+-+--+
| Result  |
+-+--+
+-+--+
No rows selected (1.04 seconds)
0: jdbc:hive2://localhost:1> insert overwrite table array_carbon4 select 
name,col,fee from array_orc;
+-+--+
| Result  |
+-+--+
+-+--+
No rows selected (5.065 seconds)
0: jdbc:hive2://localhost:1> create table array_carbon5(name string, col 
array,fee int) STORED AS carbondata TBLPROPERTIES 
('SORT_COLUMNS'='name',
0: jdbc:hive2://localhost:1> 'TABLE_BLOCKSIZE'='128',
0: jdbc:hive2://localhost:1> 'TABLE_BLOCKLET_SIZE'='128',
0: jdbc:hive2://localhost:1> 'SORT_SCOPE'='global_SORT');
+-+--+
| Result  |
+-+--+
+-+--+
No rows selected (0.098 seconds)
0: jdbc:hive2://localhost:1> insert overwrite table array_carbon5 select 
name,col,fee from array_orc;
Error: java.lang.Exception: DataLoad failure (state=,code=0)


  was:
orcdata
create table array_orc(name string, col array,fee int) STORED AS orc;
insert into array_orc values("xiao3",array('上呼吸道疾病 1','白内障1','胃溃疡1'),2);
insert into array_orc values("xiao3",array('上呼吸道疾病1 ','白内障1','胃溃疡1'),2);
insert into array_orc values("xiao3",array('上呼吸道疾病1','白内障 1','胃溃疡1'),2);
insert into array_orc values("xiao3",array('上呼吸道疾病1','白内障1','胃溃疡 1'),2);
insert into array_orc values("xiao3",array('上呼吸道疾病1','白内障1','胃溃疡1'),2);
insert into array_orc values("xiao5",array(null,'白内障1','胃溃疡1'),2);
insert into array_orc values("xiao5",null,2);
insert into array_orc values("xiao3",array('j'),2);
insert into array_orc values("xiao4",array('j','j'),2);
insert into array_orc values("xiao4",NULL,2);



0: jdbc:hive2://localhost:1> use dict;
+-+--+
| Result  |
+-+--+
+-+--+
No rows selected (0.391 seconds)
0: jdbc:hive2://localhost:1> select * from array_orc;
++---+--+--+
|  name  |  col  | fee  |
++---+--+--+
| xiao3  | ["",null,"j"] | 3|
| xiao2  | ["上呼吸道疾病1","白内障1","胃溃疡1"] | 2|
| xiao3  | ["",null,"j"] | 3|
| xiao1  | ["上呼吸道疾病","白内障","胃溃疡"]| 1|
| xiao9  | NULL  | 3|
| xiao9  | NULL  | 3|
| xiao3  | NULL  | 3|
| xiao6  | NULL  | 3|
| xiao2  | ["上呼吸道疾病 1","白内障 1","胃溃疡 1"]  | 2|
| xiao1  | ["上呼吸道疾病 ","白内障 ","胃溃疡 "] | 1|
| xiao3  | NULL  | 3|
| xiao3  | [null]| 3|
| xiao3  | [""]  | 3|

[jira] [Updated] (CARBONDATA-3954) Global sorting with array, if read from ORC format, write to carbon, error; If you use no_sort, success;

2020-08-21 Thread xiaohui (Jira)


 [ 
https://issues.apache.org/jira/browse/CARBONDATA-3954?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

xiaohui updated CARBONDATA-3954:

Description: 
orcdata
create table array_orc(name string, col array,fee int) STORED AS orc;
insert into array_orc values("xiao3",array('上呼吸道疾病 1','白内障1','胃溃疡1'),2);
insert into array_orc values("xiao3",array('上呼吸道疾病1 ','白内障1','胃溃疡1'),2);
insert into array_orc values("xiao3",array('上呼吸道疾病1','白内障 1','胃溃疡1'),2);
insert into array_orc values("xiao3",array('上呼吸道疾病1','白内障1','胃溃疡 1'),2);
insert into array_orc values("xiao3",array('上呼吸道疾病1','白内障1','胃溃疡1'),2);
insert into array_orc values("xiao5",array(null,'白内障1','胃溃疡1'),2);
insert into array_orc values("xiao5",null,2);
insert into array_orc values("xiao3",array('j'),2);
insert into array_orc values("xiao4",array('j','j'),2);
insert into array_orc values("xiao4",NULL,2);



0: jdbc:hive2://localhost:1> use dict;
+-+--+
| Result  |
+-+--+
+-+--+
No rows selected (0.391 seconds)
0: jdbc:hive2://localhost:1> select * from array_orc;
++---+--+--+
|  name  |  col  | fee  |
++---+--+--+
| xiao3  | ["",null,"j"] | 3|
| xiao2  | ["上呼吸道疾病1","白内障1","胃溃疡1"] | 2|
| xiao3  | ["",null,"j"] | 3|
| xiao1  | ["上呼吸道疾病","白内障","胃溃疡"]| 1|
| xiao9  | NULL  | 3|
| xiao9  | NULL  | 3|
| xiao3  | NULL  | 3|
| xiao6  | NULL  | 3|
| xiao2  | ["上呼吸道疾病 1","白内障 1","胃溃疡 1"]  | 2|
| xiao1  | ["上呼吸道疾病 ","白内障 ","胃溃疡 "] | 1|
| xiao3  | NULL  | 3|
| xiao3  | [null]| 3|
| xiao3  | [""]  | 3|
++---+--+--+
13 rows selected (0.416 seconds)
0: jdbc:hive2://localhost:1> create table array_carbon4(name string, col 
array,fee int) STORED AS carbondata TBLPROPERTIES 
('SORT_COLUMNS'='name',
0: jdbc:hive2://localhost:1> 'TABLE_BLOCKSIZE'='128',
0: jdbc:hive2://localhost:1> 'TABLE_BLOCKLET_SIZE'='128',
0: jdbc:hive2://localhost:1> 'SORT_SCOPE'='no_SORT');
+-+--+
| Result  |
+-+--+
+-+--+
No rows selected (1.04 seconds)
0: jdbc:hive2://localhost:1> insert overwrite table array_carbon4 select 
name,col,fee from array_orc;
+-+--+
| Result  |
+-+--+
+-+--+
No rows selected (5.065 seconds)
0: jdbc:hive2://localhost:1> create table array_carbon5(name string, col 
array,fee int) STORED AS carbondata TBLPROPERTIES 
('SORT_COLUMNS'='name',
0: jdbc:hive2://localhost:1> 'TABLE_BLOCKSIZE'='128',
0: jdbc:hive2://localhost:1> 'TABLE_BLOCKLET_SIZE'='128',
0: jdbc:hive2://localhost:1> 'SORT_SCOPE'='global_SORT');
+-+--+
| Result  |
+-+--+
+-+--+
No rows selected (0.098 seconds)
0: jdbc:hive2://localhost:1> insert overwrite table array_carbon5 select 
name,col,fee from array_orc;
Error: java.lang.Exception: DataLoad failure (state=,code=0)


  was:
0: jdbc:hive2://localhost:1> use dict;
+-+--+
| Result  |
+-+--+
+-+--+
No rows selected (0.391 seconds)
0: jdbc:hive2://localhost:1> select * from array_orc;
++---+--+--+
|  name  |  col  | fee  |
++---+--+--+
| xiao3  | ["",null,"j"] | 3|
| xiao2  | ["上呼吸道疾病1","白内障1","胃溃疡1"] | 2|
| xiao3  | ["",null,"j"] | 3|
| xiao1  | ["上呼吸道疾病","白内障","胃溃疡"]| 1|
| xiao9  | NULL  | 3|
| xiao9  | NULL  | 3|
| xiao3  | NULL  | 3|
| xiao6  | NULL  | 3|
| xiao2  | ["上呼吸道疾病 1","白内障 1","胃溃疡 1"]  | 2|
| xiao1  | ["上呼吸道疾病 ","白内障 ","胃溃疡 "] | 1|
| xiao3  | NULL  | 3|
| xiao3  | [null]| 3|
| xiao3  | [""]  | 3|
++---+--+--+
13 rows selected (0.416 seconds)
0: jdbc:hive2://localhost:1> create table array_carbon4(name string, col 
array,fee int) STORED AS carbondata TBLPROPERTIES 
('SORT_COLUMNS'='name',
0: jdbc:hive2://localhost:1> 'TABLE_BLOCKSIZE'='128',
0: jdbc:hive2://localhost:1> 'TABLE_BLOCKLET_SIZE'='128',
0: jdbc:hive2://localhost:1> 'SORT_SCOPE'='no_SORT');
+-+--+
| Result  |
+-+--+
+-+--+
No rows selected (1.04 seconds)
0: jdbc:hive2://localhost:1> insert overwrite table array_carbon4 select 
name,col,fee from array_orc;
+-+--+
| Result  |
+-+--+
+-+--+
No rows selected (5.065 seconds)
0: jdbc:hive2://localhost:1> create table 

[jira] [Updated] (CARBONDATA-3954) Global sorting with array, if read from ORC format, write to carbon, error; If you use no_sort, success;

2020-08-18 Thread xiaohui (Jira)


 [ 
https://issues.apache.org/jira/browse/CARBONDATA-3954?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

xiaohui updated CARBONDATA-3954:

Attachment: wx20200818-174...@2x.png
wx20200818-174...@2x.png

> Global sorting with array, if read from ORC format, write to carbon, error; 
> If you use no_sort, success;
> 
>
> Key: CARBONDATA-3954
> URL: https://issues.apache.org/jira/browse/CARBONDATA-3954
> Project: CarbonData
>  Issue Type: Bug
>  Components: spark-integration
>Affects Versions: 2.0.0
>Reporter: xiaohui
>Priority: Major
> Attachments: wx20200818-174...@2x.png, wx20200818-174...@2x.png
>
>
> 0: jdbc:hive2://localhost:1> use dict;
> +-+--+
> | Result  |
> +-+--+
> +-+--+
> No rows selected (0.391 seconds)
> 0: jdbc:hive2://localhost:1> select * from array_orc;
> ++---+--+--+
> |  name  |  col  | fee  |
> ++---+--+--+
> | xiao3  | ["",null,"j"] | 3|
> | xiao2  | ["上呼吸道疾病1","白内障1","胃溃疡1"] | 2|
> | xiao3  | ["",null,"j"] | 3|
> | xiao1  | ["上呼吸道疾病","白内障","胃溃疡"]| 1|
> | xiao9  | NULL  | 3|
> | xiao9  | NULL  | 3|
> | xiao3  | NULL  | 3|
> | xiao6  | NULL  | 3|
> | xiao2  | ["上呼吸道疾病 1","白内障 1","胃溃疡 1"]  | 2|
> | xiao1  | ["上呼吸道疾病 ","白内障 ","胃溃疡 "] | 1|
> | xiao3  | NULL  | 3|
> | xiao3  | [null]| 3|
> | xiao3  | [""]  | 3|
> ++---+--+--+
> 13 rows selected (0.416 seconds)
> 0: jdbc:hive2://localhost:1> create table array_carbon4(name string, col 
> array,fee int) STORED AS carbondata TBLPROPERTIES 
> ('SORT_COLUMNS'='name',
> 0: jdbc:hive2://localhost:1> 'TABLE_BLOCKSIZE'='128',
> 0: jdbc:hive2://localhost:1> 'TABLE_BLOCKLET_SIZE'='128',
> 0: jdbc:hive2://localhost:1> 'SORT_SCOPE'='no_SORT');
> +-+--+
> | Result  |
> +-+--+
> +-+--+
> No rows selected (1.04 seconds)
> 0: jdbc:hive2://localhost:1> insert overwrite table array_carbon4 select 
> name,col,fee from array_orc;
> +-+--+
> | Result  |
> +-+--+
> +-+--+
> No rows selected (5.065 seconds)
> 0: jdbc:hive2://localhost:1> create table array_carbon5(name string, col 
> array,fee int) STORED AS carbondata TBLPROPERTIES 
> ('SORT_COLUMNS'='name',
> 0: jdbc:hive2://localhost:1> 'TABLE_BLOCKSIZE'='128',
> 0: jdbc:hive2://localhost:1> 'TABLE_BLOCKLET_SIZE'='128',
> 0: jdbc:hive2://localhost:1> 'SORT_SCOPE'='global_SORT');
> +-+--+
> | Result  |
> +-+--+
> +-+--+
> No rows selected (0.098 seconds)
> 0: jdbc:hive2://localhost:1> insert overwrite table array_carbon5 select 
> name,col,fee from array_orc;
> Error: java.lang.Exception: DataLoad failure (state=,code=0)



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (CARBONDATA-3954) Global sorting with array, if read from ORC format, write to carbon, error; If you use no_sort, success;

2020-08-18 Thread xiaohui (Jira)


 [ 
https://issues.apache.org/jira/browse/CARBONDATA-3954?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

xiaohui updated CARBONDATA-3954:

Description: 
0: jdbc:hive2://localhost:1> use dict;
+-+--+
| Result  |
+-+--+
+-+--+
No rows selected (0.391 seconds)
0: jdbc:hive2://localhost:1> select * from array_orc;
++---+--+--+
|  name  |  col  | fee  |
++---+--+--+
| xiao3  | ["",null,"j"] | 3|
| xiao2  | ["上呼吸道疾病1","白内障1","胃溃疡1"] | 2|
| xiao3  | ["",null,"j"] | 3|
| xiao1  | ["上呼吸道疾病","白内障","胃溃疡"]| 1|
| xiao9  | NULL  | 3|
| xiao9  | NULL  | 3|
| xiao3  | NULL  | 3|
| xiao6  | NULL  | 3|
| xiao2  | ["上呼吸道疾病 1","白内障 1","胃溃疡 1"]  | 2|
| xiao1  | ["上呼吸道疾病 ","白内障 ","胃溃疡 "] | 1|
| xiao3  | NULL  | 3|
| xiao3  | [null]| 3|
| xiao3  | [""]  | 3|
++---+--+--+
13 rows selected (0.416 seconds)
0: jdbc:hive2://localhost:1> create table array_carbon4(name string, col 
array,fee int) STORED AS carbondata TBLPROPERTIES 
('SORT_COLUMNS'='name',
0: jdbc:hive2://localhost:1> 'TABLE_BLOCKSIZE'='128',
0: jdbc:hive2://localhost:1> 'TABLE_BLOCKLET_SIZE'='128',
0: jdbc:hive2://localhost:1> 'SORT_SCOPE'='no_SORT');
+-+--+
| Result  |
+-+--+
+-+--+
No rows selected (1.04 seconds)
0: jdbc:hive2://localhost:1> insert overwrite table array_carbon4 select 
name,col,fee from array_orc;
+-+--+
| Result  |
+-+--+
+-+--+
No rows selected (5.065 seconds)
0: jdbc:hive2://localhost:1> create table array_carbon5(name string, col 
array,fee int) STORED AS carbondata TBLPROPERTIES 
('SORT_COLUMNS'='name',
0: jdbc:hive2://localhost:1> 'TABLE_BLOCKSIZE'='128',
0: jdbc:hive2://localhost:1> 'TABLE_BLOCKLET_SIZE'='128',
0: jdbc:hive2://localhost:1> 'SORT_SCOPE'='global_SORT');
+-+--+
| Result  |
+-+--+
+-+--+
No rows selected (0.098 seconds)
0: jdbc:hive2://localhost:1> insert overwrite table array_carbon5 select 
name,col,fee from array_orc;
Error: java.lang.Exception: DataLoad failure (state=,code=0)


  was:
0: jdbc:hive2://localhost:1> use dict;
+-+--+
| Result  |
+-+--+
+-+--+
No rows selected (0.391 seconds)
0: jdbc:hive2://localhost:1> insert overwrite table array_carbon3 select 
name,col,fee from array_orc;
Error: java.lang.Exception: DataLoad failure (state=,code=0)
0: jdbc:hive2://localhost:1> create table array_carbon4(name string, col 
array,fee int) STORED AS carbondata TBLPROPERTIES 
('SORT_COLUMNS'='name',
0: jdbc:hive2://localhost:1> 'TABLE_BLOCKSIZE'='128',
0: jdbc:hive2://localhost:1> 'TABLE_BLOCKLET_SIZE'='128',
0: jdbc:hive2://localhost:1> 'SORT_SCOPE'='no_SORT');
+-+--+
| Result  |
+-+--+
+-+--+
No rows selected (1.04 seconds)
0: jdbc:hive2://localhost:1> insert overwrite table array_carbon4 select 
name,col,fee from array_orc;
+-+--+
| Result  |
+-+--+
+-+--+
No rows selected (5.065 seconds)
0: jdbc:hive2://localhost:1> create table array_carbon5(name string, col 
array,fee int) STORED AS carbondata TBLPROPERTIES 
('SORT_COLUMNS'='name',
0: jdbc:hive2://localhost:1> 'TABLE_BLOCKSIZE'='128',
0: jdbc:hive2://localhost:1> 'TABLE_BLOCKLET_SIZE'='128',
0: jdbc:hive2://localhost:1> 'SORT_SCOPE'='global_SORT');
+-+--+
| Result  |
+-+--+
+-+--+
No rows selected (0.098 seconds)
0: jdbc:hive2://localhost:1> insert overwrite table array_carbon5 select 
name,col,fee from array_orc;
Error: java.lang.Exception: DataLoad failure (state=,code=0)
0: jdbc:hive2://localhost:1> select * from array_orc;
++---+--+--+
|  name  |  col  | fee  |
++---+--+--+
| xiao3  | ["",null,"j"] | 3|
| xiao2  | ["上呼吸道疾病1","白内障1","胃溃疡1"] | 2|
| xiao3  | ["",null,"j"] | 3|
| xiao1  | ["上呼吸道疾病","白内障","胃溃疡"]| 1|
| xiao9  | NULL  | 3|
| xiao9  | NULL  | 3|
| xiao3  | NULL  | 3|
| xiao6  | NULL  | 3|
| xiao2  | ["上呼吸道疾病 1","白内障 1","胃溃疡 1"]  | 2|
| xiao1  | ["上呼吸道疾病 ","白内障 ","胃溃疡 "] | 1|
| xiao3  | NULL  | 3|
| xiao3  | [null]| 3|
| xiao3  | [""]  | 3|
++---+--+--+
13 rows selected (0.416 seconds)


> Global