[ https://issues.apache.org/jira/browse/HIVE-18102?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Nillohit Nandi updated HIVE-18102: ---------------------------------- Description: I am merging into a table daily which has a column type as an array of structs : {color:blue}segment_info ARRAY < STRUCT <idlpSegmentName: STRING, idlpSegmentValue: STRING >> {color} *When table is created without transactional=true, behaviour is fine. * Example snippet: {code:sql} drop table struct_merge; CREATE TABLE struct_merge ( lr_id STRING, segment_info ARRAY < STRUCT <idlpSegmentName: STRING, idlpSegmentValue: STRING >> ) CLUSTERED BY(lr_id) INTO 1 BUCKETS STORED AS ORC; INSERT INTO TABLE struct_merge Select 1 AS lr_id , ARRAY(NAMED_STRUCT('idlpSegmentName','viant','idlpSegmentValue','z'), NAMED_STRUCT('idlpSegmentName','instyle','idlpSegmentValue','3')) AS segment_info; select * from struct_merge; {code} hive> select * from default.struct_merge; OK {color:blue}1 [{"idlpSegmentName":"viant","idlpSegmentValue":"z"},{"idlpSegmentName":"instyle","idlpSegmentValue":"3"}] {color} Time taken: 0.125 seconds, Fetched: 1 row(s) *With transactional = true, behaviour is erratic, null values are populated as values of nested Structs. * Eg: {code:sql} drop table struct_merge; CREATE TABLE struct_merge ( lr_id STRING, segment_info ARRAY < STRUCT <idlpSegmentName: STRING, idlpSegmentValue: STRING >> ) CLUSTERED BY(lr_id) INTO 1 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true'); INSERT INTO TABLE struct_merge Select 1 AS lr_id , ARRAY(NAMED_STRUCT('idlpSegmentName','viant','idlpSegmentValue','z'), NAMED_STRUCT('idlpSegmentName','instyle','idlpSegmentValue','3')) AS segment_info; select * from struct_merge; //this one gives null values {code} hive> select * from default.struct_merge1; OK {color:red}1 [{"idlpSegmentName":null,"idlpSegmentValue":null},{"idlpSegmentName":null,"idlpSegmentValue":null}] {color} Time taken: 0.608 seconds, Fetched: 1 row(s) *Can this behaviour be explained? I need the transaction property since I am merging into a common table on daily data.* was: I am merging into a table daily which has a column type as an array of structs : {color:blue}segment_info ARRAY < STRUCT <idlpSegmentName: STRING, idlpSegmentValue: STRING >> {color} *When table is created without transactional=true, behaviour is fine. * Example snippet: {code:sql} drop table struct_merge; CREATE TABLE struct_merge ( lr_id STRING, segment_info ARRAY < STRUCT <idlpSegmentName: STRING, idlpSegmentValue: STRING >> ) CLUSTERED BY(lr_id) INTO 1 BUCKETS STORED AS ORC; INSERT INTO TABLE struct_merge Select 1 AS lr_id , ARRAY(NAMED_STRUCT('idlpSegmentName','viant','idlpSegmentValue','z'), NAMED_STRUCT('idlpSegmentName','instyle','idlpSegmentValue','3')) AS segment_info; select * from struct_merge; {code} hive> select * from default.struct_merge; OK {color:blue}1 [{"idlpSegmentName":"viant","idlpSegmentValue":"z"},{"idlpSegmentName":"instyle","idlpSegmentValue":"3"}] {color}Time taken: 0.125 seconds, Fetched: 1 row(s) *With transactional = true, behaviour is erratic, null values are populated as values of nested Structs. * Eg: {code:sql} drop table struct_merge; CREATE TABLE struct_merge ( lr_id STRING, segment_info ARRAY < STRUCT <idlpSegmentName: STRING, idlpSegmentValue: STRING >> ) CLUSTERED BY(lr_id) INTO 1 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true'); INSERT INTO TABLE struct_merge Select 1 AS lr_id , ARRAY(NAMED_STRUCT('idlpSegmentName','viant','idlpSegmentValue','z'), NAMED_STRUCT('idlpSegmentName','instyle','idlpSegmentValue','3')) AS segment_info; select * from struct_merge; //this one gives null values {code} hive> select * from default.struct_merge1; OK {color:red}1 [{"idlpSegmentName":null,"idlpSegmentValue":null},{"idlpSegmentName":null,"idlpSegmentValue":null}] {color}Time taken: 0.608 seconds, Fetched: 1 row(s) *Can this behaviour be explained? I need the transaction property since I am merging into a common table on daily data.* > Hive insertion for complex types not working when "transactional=true" > ---------------------------------------------------------------------- > > Key: HIVE-18102 > URL: https://issues.apache.org/jira/browse/HIVE-18102 > Project: Hive > Issue Type: Bug > Affects Versions: 2.3.1 > Environment: Running EMR cluster on AWS, with : > Master: Running1m3.xlarge > Core: Running4m3.xlarge > Reporter: Nillohit Nandi > > I am merging into a table daily which has a column type as an array of > structs : > {color:blue}segment_info ARRAY < STRUCT <idlpSegmentName: STRING, > idlpSegmentValue: STRING >> > {color} > *When table is created without transactional=true, behaviour is fine. > * > Example snippet: > {code:sql} > drop table struct_merge; > CREATE TABLE struct_merge ( > lr_id STRING, > segment_info ARRAY < STRUCT <idlpSegmentName: STRING, idlpSegmentValue: > STRING >> > ) > CLUSTERED BY(lr_id) > INTO 1 BUCKETS > STORED AS ORC; > INSERT INTO TABLE struct_merge > Select 1 AS lr_id , > > ARRAY(NAMED_STRUCT('idlpSegmentName','viant','idlpSegmentValue','z'), > NAMED_STRUCT('idlpSegmentName','instyle','idlpSegmentValue','3')) > AS segment_info; > select * from struct_merge; > {code} > hive> select * from default.struct_merge; > OK > {color:blue}1 > [{"idlpSegmentName":"viant","idlpSegmentValue":"z"},{"idlpSegmentName":"instyle","idlpSegmentValue":"3"}] > {color} > Time taken: 0.125 seconds, Fetched: 1 row(s) > *With transactional = true, behaviour is erratic, null values are populated > as values of nested Structs. > * > Eg: > {code:sql} > drop table struct_merge; > CREATE TABLE struct_merge ( > lr_id STRING, > segment_info ARRAY < STRUCT <idlpSegmentName: STRING, idlpSegmentValue: > STRING >> > ) > CLUSTERED BY(lr_id) > INTO 1 BUCKETS > STORED AS ORC > TBLPROPERTIES ('transactional'='true'); > INSERT INTO TABLE struct_merge > Select 1 AS lr_id , > > ARRAY(NAMED_STRUCT('idlpSegmentName','viant','idlpSegmentValue','z'), > NAMED_STRUCT('idlpSegmentName','instyle','idlpSegmentValue','3')) > AS segment_info; > select * from struct_merge; > //this one gives null values > {code} > hive> select * from default.struct_merge1; > OK > {color:red}1 > [{"idlpSegmentName":null,"idlpSegmentValue":null},{"idlpSegmentName":null,"idlpSegmentValue":null}] > {color} > Time taken: 0.608 seconds, Fetched: 1 row(s) > *Can this behaviour be explained? I need the transaction property since I am > merging into a common table on daily data.* -- This message was sent by Atlassian JIRA (v6.4.14#64029)