[ 
https://issues.apache.org/jira/browse/HIVE-18102?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Nillohit Nandi updated HIVE-18102:
----------------------------------
    Description: 
I am merging into a table daily which has a column type as an array of structs :
{color:blue}segment_info ARRAY < STRUCT <idlpSegmentName: STRING, 
idlpSegmentValue: STRING >>
{color}

*When table is created without transactional=true, behaviour is fine.
*
Example snippet:

{code:sql}

drop table struct_merge;

CREATE TABLE struct_merge (
lr_id STRING,
segment_info ARRAY < STRUCT <idlpSegmentName: STRING, idlpSegmentValue: STRING 
>>
)
CLUSTERED BY(lr_id)
INTO 1 BUCKETS
STORED AS ORC;

INSERT INTO TABLE struct_merge 
           Select 1 AS lr_id , 
           
ARRAY(NAMED_STRUCT('idlpSegmentName','viant','idlpSegmentValue','z'), 
NAMED_STRUCT('idlpSegmentName','instyle','idlpSegmentValue','3')) 
AS segment_info; 

select * from struct_merge;

{code}
hive> select * from default.struct_merge;
OK
{color:blue}1   
[{"idlpSegmentName":"viant","idlpSegmentValue":"z"},{"idlpSegmentName":"instyle","idlpSegmentValue":"3"}]
{color}
Time taken: 0.125 seconds, Fetched: 1 row(s)


*With transactional = true, behaviour is erratic, null values are populated as 
values of nested Structs.
*
Eg:
{code:sql}


drop table struct_merge;

CREATE TABLE struct_merge (
lr_id STRING,
segment_info ARRAY < STRUCT <idlpSegmentName: STRING, idlpSegmentValue: STRING 
>>
)
CLUSTERED BY(lr_id)
INTO 1 BUCKETS
STORED AS ORC
TBLPROPERTIES ('transactional'='true');

INSERT INTO TABLE struct_merge 
           Select 1 AS lr_id , 
           
ARRAY(NAMED_STRUCT('idlpSegmentName','viant','idlpSegmentValue','z'), 
NAMED_STRUCT('idlpSegmentName','instyle','idlpSegmentValue','3')) 
AS segment_info; 

select * from struct_merge;
//this one gives null values
{code}


hive> select * from default.struct_merge1;
OK
{color:red}1    
[{"idlpSegmentName":null,"idlpSegmentValue":null},{"idlpSegmentName":null,"idlpSegmentValue":null}]
{color}
Time taken: 0.608 seconds, Fetched: 1 row(s)


*Can this behaviour be explained? I need the transaction property since I am 
merging into a common table on daily data.*


  was:
I am merging into a table daily which has a column type as an array of structs :
{color:blue}segment_info ARRAY < STRUCT <idlpSegmentName: STRING, 
idlpSegmentValue: STRING >>
{color}

*When table is created without transactional=true, behaviour is fine.
*
Example snippet:

{code:sql}

drop table struct_merge;

CREATE TABLE struct_merge (
lr_id STRING,
segment_info ARRAY < STRUCT <idlpSegmentName: STRING, idlpSegmentValue: STRING 
>>
)
CLUSTERED BY(lr_id)
INTO 1 BUCKETS
STORED AS ORC;

INSERT INTO TABLE struct_merge 
           Select 1 AS lr_id , 
           
ARRAY(NAMED_STRUCT('idlpSegmentName','viant','idlpSegmentValue','z'), 
NAMED_STRUCT('idlpSegmentName','instyle','idlpSegmentValue','3')) 
AS segment_info; 

select * from struct_merge;

{code}
hive> select * from default.struct_merge;
OK
{color:blue}1   
[{"idlpSegmentName":"viant","idlpSegmentValue":"z"},{"idlpSegmentName":"instyle","idlpSegmentValue":"3"}]
{color}Time taken: 0.125 seconds, Fetched: 1 row(s)


*With transactional = true, behaviour is erratic, null values are populated as 
values of nested Structs.
*
Eg:
{code:sql}


drop table struct_merge;

CREATE TABLE struct_merge (
lr_id STRING,
segment_info ARRAY < STRUCT <idlpSegmentName: STRING, idlpSegmentValue: STRING 
>>
)
CLUSTERED BY(lr_id)
INTO 1 BUCKETS
STORED AS ORC
TBLPROPERTIES ('transactional'='true');

INSERT INTO TABLE struct_merge 
           Select 1 AS lr_id , 
           
ARRAY(NAMED_STRUCT('idlpSegmentName','viant','idlpSegmentValue','z'), 
NAMED_STRUCT('idlpSegmentName','instyle','idlpSegmentValue','3')) 
AS segment_info; 

select * from struct_merge;
//this one gives null values
{code}


hive> select * from default.struct_merge1;
OK
{color:red}1    
[{"idlpSegmentName":null,"idlpSegmentValue":null},{"idlpSegmentName":null,"idlpSegmentValue":null}]
{color}Time taken: 0.608 seconds, Fetched: 1 row(s)


*Can this behaviour be explained? I need the transaction property since I am 
merging into a common table on daily data.*



> Hive insertion for complex types not working when "transactional=true"
> ----------------------------------------------------------------------
>
>                 Key: HIVE-18102
>                 URL: https://issues.apache.org/jira/browse/HIVE-18102
>             Project: Hive
>          Issue Type: Bug
>    Affects Versions: 2.3.1
>         Environment: Running EMR cluster on AWS, with :
> Master: Running1m3.xlarge
> Core: Running4m3.xlarge
>            Reporter: Nillohit Nandi
>
> I am merging into a table daily which has a column type as an array of 
> structs :
> {color:blue}segment_info ARRAY < STRUCT <idlpSegmentName: STRING, 
> idlpSegmentValue: STRING >>
> {color}
> *When table is created without transactional=true, behaviour is fine.
> *
> Example snippet:
> {code:sql}
> drop table struct_merge;
> CREATE TABLE struct_merge (
> lr_id STRING,
> segment_info ARRAY < STRUCT <idlpSegmentName: STRING, idlpSegmentValue: 
> STRING >>
> )
> CLUSTERED BY(lr_id)
> INTO 1 BUCKETS
> STORED AS ORC;
> INSERT INTO TABLE struct_merge 
>            Select 1 AS lr_id , 
>            
> ARRAY(NAMED_STRUCT('idlpSegmentName','viant','idlpSegmentValue','z'), 
> NAMED_STRUCT('idlpSegmentName','instyle','idlpSegmentValue','3')) 
> AS segment_info; 
> select * from struct_merge;
> {code}
> hive> select * from default.struct_merge;
> OK
> {color:blue}1 
> [{"idlpSegmentName":"viant","idlpSegmentValue":"z"},{"idlpSegmentName":"instyle","idlpSegmentValue":"3"}]
> {color}
> Time taken: 0.125 seconds, Fetched: 1 row(s)
> *With transactional = true, behaviour is erratic, null values are populated 
> as values of nested Structs.
> *
> Eg:
> {code:sql}
> drop table struct_merge;
> CREATE TABLE struct_merge (
> lr_id STRING,
> segment_info ARRAY < STRUCT <idlpSegmentName: STRING, idlpSegmentValue: 
> STRING >>
> )
> CLUSTERED BY(lr_id)
> INTO 1 BUCKETS
> STORED AS ORC
> TBLPROPERTIES ('transactional'='true');
> INSERT INTO TABLE struct_merge 
>            Select 1 AS lr_id , 
>            
> ARRAY(NAMED_STRUCT('idlpSegmentName','viant','idlpSegmentValue','z'), 
> NAMED_STRUCT('idlpSegmentName','instyle','idlpSegmentValue','3')) 
> AS segment_info; 
> select * from struct_merge;
> //this one gives null values
> {code}
> hive> select * from default.struct_merge1;
> OK
> {color:red}1  
> [{"idlpSegmentName":null,"idlpSegmentValue":null},{"idlpSegmentName":null,"idlpSegmentValue":null}]
> {color}
> Time taken: 0.608 seconds, Fetched: 1 row(s)
> *Can this behaviour be explained? I need the transaction property since I am 
> merging into a common table on daily data.*



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to