[ 
https://issues.apache.org/jira/browse/HIVE-29328?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Zsolt Miskolczi updated HIVE-29328:
-----------------------------------
    Description: 
Orc file format allows metadata field names with ignoring casing. For example, 
we have manual tests when query based compaction creates Orc files with 
lowercase fields (the root cause for this is under unvestigation).

As OrcInputFormat.isOriginal currently checks the field names with strict 
casing, FixAcidKeyIndex can fail if the Orc file footer contains its metadata 
fields (like currentTransaction) lowercasing.

 

To repro the issue manually: 

 
{color:#569cd6}set{color}{color:#d4d4d4} hive.support.concurrency=true;{color}
{color:#569cd6}set{color}{color:#d4d4d4} 
hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;{color}
{color:#569cd6}set{color}{color:#d4d4d4} hive.compactor.crud.query.based;{color}

{color:#569cd6}DROP{color}{color:#d4d4d4} 
{color}{color:#569cd6}TABLE{color}{color:#d4d4d4} 
{color}{color:#569cd6}IF{color}{color:#d4d4d4} 
{color}{color:#569cd6}EXISTS{color}{color:#d4d4d4} testtable;{color}
{color:#569cd6}DROP{color}{color:#d4d4d4} 
{color}{color:#569cd6}TABLE{color}{color:#d4d4d4} 
{color}{color:#569cd6}IF{color}{color:#d4d4d4} 
{color}{color:#569cd6}EXISTS{color}{color:#d4d4d4} testtable_chg1;{color}
{color:#569cd6}DROP{color}{color:#d4d4d4} 
{color}{color:#569cd6}TABLE{color}{color:#d4d4d4} 
{color}{color:#569cd6}IF{color}{color:#d4d4d4} 
{color}{color:#569cd6}EXISTS{color}{color:#d4d4d4} testtable_chg2;{color}

{color:#569cd6}CREATE{color}{color:#d4d4d4} 
{color}{color:#569cd6}TABLE{color}{color:#d4d4d4} testtable (iD 
{color}{color:#569cd6}int{color}{color:#d4d4d4}, tXt string) STORED 
{color}{color:#569cd6}AS{color}{color:#d4d4d4} {color}{color:#569cd6}ORC{color}
{color:#d4d4d4} TBLPROPERTIES 
({color}{color:#ce9178}'NO_AUTO_COMPACTION'{color}{color:#d4d4d4}={color}{color:#ce9178}'true'{color}{color:#d4d4d4},
 
{color}{color:#ce9178}'transactional'{color}{color:#d4d4d4}={color}{color:#ce9178}'true'{color}{color:#d4d4d4});{color}
{color:#569cd6}CREATE{color}{color:#d4d4d4} 
{color}{color:#569cd6}TABLE{color}{color:#d4d4d4} testtable_chg1 (iD 
{color}{color:#569cd6}int{color}{color:#d4d4d4}, tXt string) STORED 
{color}{color:#569cd6}AS{color}{color:#d4d4d4} 
{color}{color:#569cd6}ORC{color}{color:#d4d4d4};{color}
{color:#569cd6}CREATE{color}{color:#d4d4d4} 
{color}{color:#569cd6}TABLE{color}{color:#d4d4d4} testtable_chg2 (iD 
{color}{color:#569cd6}int{color}{color:#d4d4d4}, tXt string) STORED 
{color}{color:#569cd6}AS{color}{color:#d4d4d4} 
{color}{color:#569cd6}ORC{color}{color:#d4d4d4};{color}

{color:#569cd6}INSERT INTO{color}{color:#d4d4d4} testtable 
{color}{color:#569cd6}VALUES{color}{color:#d4d4d4} 
({color}{color:#b5cea8}1{color}{color:#d4d4d4}, 
{color}{color:#ce9178}'base-A'{color}{color:#d4d4d4}), 
({color}{color:#b5cea8}2{color}{color:#d4d4d4}, 
{color}{color:#ce9178}'base-B'{color}{color:#d4d4d4});{color}
{color:#569cd6}INSERT INTO{color}{color:#d4d4d4} testtable_chg1 
{color}{color:#569cd6}VALUES{color}{color:#d4d4d4} 
({color}{color:#b5cea8}1{color}{color:#d4d4d4}, 
{color}{color:#ce9178}'chg1-A'{color}{color:#d4d4d4}), 
({color}{color:#b5cea8}3{color}{color:#d4d4d4}, 
{color}{color:#ce9178}'chg1-C'{color}{color:#d4d4d4}), 
({color}{color:#b5cea8}4{color}{color:#d4d4d4}, 
{color}{color:#ce9178}'chg1-D'{color}{color:#d4d4d4});{color}
{color:#569cd6}INSERT INTO{color}{color:#d4d4d4} testtable_chg2 
{color}{color:#569cd6}VALUES{color}{color:#d4d4d4} 
({color}{color:#b5cea8}1{color}{color:#d4d4d4}, 
{color}{color:#ce9178}'chg2-A'{color}{color:#d4d4d4}), 
({color}{color:#b5cea8}3{color}{color:#d4d4d4}, 
{color}{color:#ce9178}'chg2-C'{color}{color:#d4d4d4}), 
({color}{color:#b5cea8}5{color}{color:#d4d4d4}, 
{color}{color:#ce9178}'chg2-E'{color}{color:#d4d4d4});{color}

{color:#569cd6}MERGE{color}{color:#d4d4d4} 
{color}{color:#569cd6}INTO{color}{color:#d4d4d4} testtable 
{color}{color:#569cd6}AS{color}{color:#d4d4d4} T{color}
{color:#d4d4d4} {color}{color:#569cd6}USING{color}{color:#d4d4d4} 
testtable_chg1 {color}{color:#569cd6}AS{color}{color:#d4d4d4} S 
{color}{color:#569cd6}ON{color}{color:#d4d4d4} T.id = S.id{color}
{color:#d4d4d4} {color}{color:#569cd6}WHEN{color}{color:#d4d4d4} 
{color}{color:#569cd6}MATCHED{color}{color:#d4d4d4} 
{color}{color:#569cd6}AND{color}{color:#d4d4d4} (T.txt != S.txt 
{color}{color:#569cd6}AND{color}{color:#d4d4d4} S.txt {color}{color:#569cd6}IS 
NOT NULL{color}{color:#d4d4d4}) 
{color}{color:#569cd6}THEN{color}{color:#d4d4d4} 
{color}{color:#569cd6}UPDATE{color}{color:#d4d4d4} 
{color}{color:#569cd6}SET{color}{color:#d4d4d4} txt = S.txt{color}
{color:#d4d4d4} {color}{color:#569cd6}WHEN{color}{color:#d4d4d4} 
{color}{color:#569cd6}NOT{color}{color:#d4d4d4} 
{color}{color:#569cd6}MATCHED{color}{color:#d4d4d4} 
{color}{color:#569cd6}THEN{color}{color:#d4d4d4} 
{color}{color:#569cd6}INSERT{color}{color:#d4d4d4} 
{color}{color:#569cd6}VALUES{color}{color:#d4d4d4} (S.id, S.txt);{color}

{color:#569cd6}MERGE{color}{color:#d4d4d4} 
{color}{color:#569cd6}INTO{color}{color:#d4d4d4} testtable 
{color}{color:#569cd6}AS{color}{color:#d4d4d4} T{color}
{color:#d4d4d4} {color}{color:#569cd6}USING{color}{color:#d4d4d4} 
testtable_chg2 {color}{color:#569cd6}AS{color}{color:#d4d4d4} S 
{color}{color:#569cd6}ON{color}{color:#d4d4d4} T.id = S.id{color}
{color:#d4d4d4} {color}{color:#569cd6}WHEN{color}{color:#d4d4d4} 
{color}{color:#569cd6}MATCHED{color}{color:#d4d4d4} 
{color}{color:#569cd6}AND{color}{color:#d4d4d4} (T.txt != S.txt 
{color}{color:#569cd6}AND{color}{color:#d4d4d4} S.txt {color}{color:#569cd6}IS 
NOT NULL{color}{color:#d4d4d4}) 
{color}{color:#569cd6}THEN{color}{color:#d4d4d4} 
{color}{color:#569cd6}UPDATE{color}{color:#d4d4d4} 
{color}{color:#569cd6}SET{color}{color:#d4d4d4} txt = S.txt{color}
{color:#d4d4d4} {color}{color:#569cd6}WHEN{color}{color:#d4d4d4} 
{color}{color:#569cd6}NOT{color}{color:#d4d4d4} 
{color}{color:#569cd6}MATCHED{color}{color:#d4d4d4} 
{color}{color:#569cd6}THEN{color}{color:#d4d4d4} 
{color}{color:#569cd6}INSERT{color}{color:#d4d4d4} 
{color}{color:#569cd6}VALUES{color}{color:#d4d4d4} (S.id, S.txt);{color}

{color:#569cd6}ALTER{color}{color:#d4d4d4} 
{color}{color:#569cd6}TABLE{color}{color:#d4d4d4} testtable COMPACT 
{color}{color:#ce9178}'MINOR'{color}{color:#d4d4d4} 
{color}{color:#569cd6}AND{color}{color:#d4d4d4} WAIT;{color}

> Orc acid footer metadata should be case insensitive
> ---------------------------------------------------
>
>                 Key: HIVE-29328
>                 URL: https://issues.apache.org/jira/browse/HIVE-29328
>             Project: Hive
>          Issue Type: Bug
>            Reporter: Zsolt Miskolczi
>            Priority: Major
>              Labels: pull-request-available
>
> Orc file format allows metadata field names with ignoring casing. For 
> example, we have manual tests when query based compaction creates Orc files 
> with lowercase fields (the root cause for this is under unvestigation).
> As OrcInputFormat.isOriginal currently checks the field names with strict 
> casing, FixAcidKeyIndex can fail if the Orc file footer contains its metadata 
> fields (like currentTransaction) lowercasing.
>  
> To repro the issue manually: 
>  
> {color:#569cd6}set{color}{color:#d4d4d4} hive.support.concurrency=true;{color}
> {color:#569cd6}set{color}{color:#d4d4d4} 
> hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;{color}
> {color:#569cd6}set{color}{color:#d4d4d4} 
> hive.compactor.crud.query.based;{color}
> {color:#569cd6}DROP{color}{color:#d4d4d4} 
> {color}{color:#569cd6}TABLE{color}{color:#d4d4d4} 
> {color}{color:#569cd6}IF{color}{color:#d4d4d4} 
> {color}{color:#569cd6}EXISTS{color}{color:#d4d4d4} testtable;{color}
> {color:#569cd6}DROP{color}{color:#d4d4d4} 
> {color}{color:#569cd6}TABLE{color}{color:#d4d4d4} 
> {color}{color:#569cd6}IF{color}{color:#d4d4d4} 
> {color}{color:#569cd6}EXISTS{color}{color:#d4d4d4} testtable_chg1;{color}
> {color:#569cd6}DROP{color}{color:#d4d4d4} 
> {color}{color:#569cd6}TABLE{color}{color:#d4d4d4} 
> {color}{color:#569cd6}IF{color}{color:#d4d4d4} 
> {color}{color:#569cd6}EXISTS{color}{color:#d4d4d4} testtable_chg2;{color}
> {color:#569cd6}CREATE{color}{color:#d4d4d4} 
> {color}{color:#569cd6}TABLE{color}{color:#d4d4d4} testtable (iD 
> {color}{color:#569cd6}int{color}{color:#d4d4d4}, tXt string) STORED 
> {color}{color:#569cd6}AS{color}{color:#d4d4d4} 
> {color}{color:#569cd6}ORC{color}
> {color:#d4d4d4} TBLPROPERTIES 
> ({color}{color:#ce9178}'NO_AUTO_COMPACTION'{color}{color:#d4d4d4}={color}{color:#ce9178}'true'{color}{color:#d4d4d4},
>  
> {color}{color:#ce9178}'transactional'{color}{color:#d4d4d4}={color}{color:#ce9178}'true'{color}{color:#d4d4d4});{color}
> {color:#569cd6}CREATE{color}{color:#d4d4d4} 
> {color}{color:#569cd6}TABLE{color}{color:#d4d4d4} testtable_chg1 (iD 
> {color}{color:#569cd6}int{color}{color:#d4d4d4}, tXt string) STORED 
> {color}{color:#569cd6}AS{color}{color:#d4d4d4} 
> {color}{color:#569cd6}ORC{color}{color:#d4d4d4};{color}
> {color:#569cd6}CREATE{color}{color:#d4d4d4} 
> {color}{color:#569cd6}TABLE{color}{color:#d4d4d4} testtable_chg2 (iD 
> {color}{color:#569cd6}int{color}{color:#d4d4d4}, tXt string) STORED 
> {color}{color:#569cd6}AS{color}{color:#d4d4d4} 
> {color}{color:#569cd6}ORC{color}{color:#d4d4d4};{color}
> {color:#569cd6}INSERT INTO{color}{color:#d4d4d4} testtable 
> {color}{color:#569cd6}VALUES{color}{color:#d4d4d4} 
> ({color}{color:#b5cea8}1{color}{color:#d4d4d4}, 
> {color}{color:#ce9178}'base-A'{color}{color:#d4d4d4}), 
> ({color}{color:#b5cea8}2{color}{color:#d4d4d4}, 
> {color}{color:#ce9178}'base-B'{color}{color:#d4d4d4});{color}
> {color:#569cd6}INSERT INTO{color}{color:#d4d4d4} testtable_chg1 
> {color}{color:#569cd6}VALUES{color}{color:#d4d4d4} 
> ({color}{color:#b5cea8}1{color}{color:#d4d4d4}, 
> {color}{color:#ce9178}'chg1-A'{color}{color:#d4d4d4}), 
> ({color}{color:#b5cea8}3{color}{color:#d4d4d4}, 
> {color}{color:#ce9178}'chg1-C'{color}{color:#d4d4d4}), 
> ({color}{color:#b5cea8}4{color}{color:#d4d4d4}, 
> {color}{color:#ce9178}'chg1-D'{color}{color:#d4d4d4});{color}
> {color:#569cd6}INSERT INTO{color}{color:#d4d4d4} testtable_chg2 
> {color}{color:#569cd6}VALUES{color}{color:#d4d4d4} 
> ({color}{color:#b5cea8}1{color}{color:#d4d4d4}, 
> {color}{color:#ce9178}'chg2-A'{color}{color:#d4d4d4}), 
> ({color}{color:#b5cea8}3{color}{color:#d4d4d4}, 
> {color}{color:#ce9178}'chg2-C'{color}{color:#d4d4d4}), 
> ({color}{color:#b5cea8}5{color}{color:#d4d4d4}, 
> {color}{color:#ce9178}'chg2-E'{color}{color:#d4d4d4});{color}
> {color:#569cd6}MERGE{color}{color:#d4d4d4} 
> {color}{color:#569cd6}INTO{color}{color:#d4d4d4} testtable 
> {color}{color:#569cd6}AS{color}{color:#d4d4d4} T{color}
> {color:#d4d4d4} {color}{color:#569cd6}USING{color}{color:#d4d4d4} 
> testtable_chg1 {color}{color:#569cd6}AS{color}{color:#d4d4d4} S 
> {color}{color:#569cd6}ON{color}{color:#d4d4d4} T.id = S.id{color}
> {color:#d4d4d4} {color}{color:#569cd6}WHEN{color}{color:#d4d4d4} 
> {color}{color:#569cd6}MATCHED{color}{color:#d4d4d4} 
> {color}{color:#569cd6}AND{color}{color:#d4d4d4} (T.txt != S.txt 
> {color}{color:#569cd6}AND{color}{color:#d4d4d4} S.txt 
> {color}{color:#569cd6}IS NOT NULL{color}{color:#d4d4d4}) 
> {color}{color:#569cd6}THEN{color}{color:#d4d4d4} 
> {color}{color:#569cd6}UPDATE{color}{color:#d4d4d4} 
> {color}{color:#569cd6}SET{color}{color:#d4d4d4} txt = S.txt{color}
> {color:#d4d4d4} {color}{color:#569cd6}WHEN{color}{color:#d4d4d4} 
> {color}{color:#569cd6}NOT{color}{color:#d4d4d4} 
> {color}{color:#569cd6}MATCHED{color}{color:#d4d4d4} 
> {color}{color:#569cd6}THEN{color}{color:#d4d4d4} 
> {color}{color:#569cd6}INSERT{color}{color:#d4d4d4} 
> {color}{color:#569cd6}VALUES{color}{color:#d4d4d4} (S.id, S.txt);{color}
> {color:#569cd6}MERGE{color}{color:#d4d4d4} 
> {color}{color:#569cd6}INTO{color}{color:#d4d4d4} testtable 
> {color}{color:#569cd6}AS{color}{color:#d4d4d4} T{color}
> {color:#d4d4d4} {color}{color:#569cd6}USING{color}{color:#d4d4d4} 
> testtable_chg2 {color}{color:#569cd6}AS{color}{color:#d4d4d4} S 
> {color}{color:#569cd6}ON{color}{color:#d4d4d4} T.id = S.id{color}
> {color:#d4d4d4} {color}{color:#569cd6}WHEN{color}{color:#d4d4d4} 
> {color}{color:#569cd6}MATCHED{color}{color:#d4d4d4} 
> {color}{color:#569cd6}AND{color}{color:#d4d4d4} (T.txt != S.txt 
> {color}{color:#569cd6}AND{color}{color:#d4d4d4} S.txt 
> {color}{color:#569cd6}IS NOT NULL{color}{color:#d4d4d4}) 
> {color}{color:#569cd6}THEN{color}{color:#d4d4d4} 
> {color}{color:#569cd6}UPDATE{color}{color:#d4d4d4} 
> {color}{color:#569cd6}SET{color}{color:#d4d4d4} txt = S.txt{color}
> {color:#d4d4d4} {color}{color:#569cd6}WHEN{color}{color:#d4d4d4} 
> {color}{color:#569cd6}NOT{color}{color:#d4d4d4} 
> {color}{color:#569cd6}MATCHED{color}{color:#d4d4d4} 
> {color}{color:#569cd6}THEN{color}{color:#d4d4d4} 
> {color}{color:#569cd6}INSERT{color}{color:#d4d4d4} 
> {color}{color:#569cd6}VALUES{color}{color:#d4d4d4} (S.id, S.txt);{color}
> {color:#569cd6}ALTER{color}{color:#d4d4d4} 
> {color}{color:#569cd6}TABLE{color}{color:#d4d4d4} testtable COMPACT 
> {color}{color:#ce9178}'MINOR'{color}{color:#d4d4d4} 
> {color}{color:#569cd6}AND{color}{color:#d4d4d4} WAIT;{color}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to