[
https://issues.apache.org/jira/browse/HIVE-29328?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Zsolt Miskolczi updated HIVE-29328:
-----------------------------------
Description:
Orc file format allows metadata field names with ignoring casing. For example,
we have manual tests when query based compaction creates Orc files with
lowercase fields (the root cause for this is under unvestigation).
As OrcInputFormat.isOriginal currently checks the field names with strict
casing, FixAcidKeyIndex can fail if the Orc file footer contains its metadata
fields (like currentTransaction) lowercasing.
To repro the issue manually:
{color:#569cd6}set{color}{color:#d4d4d4} hive.support.concurrency=true;{color}
{color:#569cd6}set{color}{color:#d4d4d4}
hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;{color}
{color:#569cd6}set{color}{color:#d4d4d4} hive.compactor.crud.query.based;{color}
{color:#569cd6}DROP{color}{color:#d4d4d4}
{color}{color:#569cd6}TABLE{color}{color:#d4d4d4}
{color}{color:#569cd6}IF{color}{color:#d4d4d4}
{color}{color:#569cd6}EXISTS{color}{color:#d4d4d4} testtable;{color}
{color:#569cd6}DROP{color}{color:#d4d4d4}
{color}{color:#569cd6}TABLE{color}{color:#d4d4d4}
{color}{color:#569cd6}IF{color}{color:#d4d4d4}
{color}{color:#569cd6}EXISTS{color}{color:#d4d4d4} testtable_chg1;{color}
{color:#569cd6}DROP{color}{color:#d4d4d4}
{color}{color:#569cd6}TABLE{color}{color:#d4d4d4}
{color}{color:#569cd6}IF{color}{color:#d4d4d4}
{color}{color:#569cd6}EXISTS{color}{color:#d4d4d4} testtable_chg2;{color}
{color:#569cd6}CREATE{color}{color:#d4d4d4}
{color}{color:#569cd6}TABLE{color}{color:#d4d4d4} testtable (iD
{color}{color:#569cd6}int{color}{color:#d4d4d4}, tXt string) STORED
{color}{color:#569cd6}AS{color}{color:#d4d4d4} {color}{color:#569cd6}ORC{color}
{color:#d4d4d4} TBLPROPERTIES
({color}{color:#ce9178}'NO_AUTO_COMPACTION'{color}{color:#d4d4d4}={color}{color:#ce9178}'true'{color}{color:#d4d4d4},
{color}{color:#ce9178}'transactional'{color}{color:#d4d4d4}={color}{color:#ce9178}'true'{color}{color:#d4d4d4});{color}
{color:#569cd6}CREATE{color}{color:#d4d4d4}
{color}{color:#569cd6}TABLE{color}{color:#d4d4d4} testtable_chg1 (iD
{color}{color:#569cd6}int{color}{color:#d4d4d4}, tXt string) STORED
{color}{color:#569cd6}AS{color}{color:#d4d4d4}
{color}{color:#569cd6}ORC{color}{color:#d4d4d4};{color}
{color:#569cd6}CREATE{color}{color:#d4d4d4}
{color}{color:#569cd6}TABLE{color}{color:#d4d4d4} testtable_chg2 (iD
{color}{color:#569cd6}int{color}{color:#d4d4d4}, tXt string) STORED
{color}{color:#569cd6}AS{color}{color:#d4d4d4}
{color}{color:#569cd6}ORC{color}{color:#d4d4d4};{color}
{color:#569cd6}INSERT INTO{color}{color:#d4d4d4} testtable
{color}{color:#569cd6}VALUES{color}{color:#d4d4d4}
({color}{color:#b5cea8}1{color}{color:#d4d4d4},
{color}{color:#ce9178}'base-A'{color}{color:#d4d4d4}),
({color}{color:#b5cea8}2{color}{color:#d4d4d4},
{color}{color:#ce9178}'base-B'{color}{color:#d4d4d4});{color}
{color:#569cd6}INSERT INTO{color}{color:#d4d4d4} testtable_chg1
{color}{color:#569cd6}VALUES{color}{color:#d4d4d4}
({color}{color:#b5cea8}1{color}{color:#d4d4d4},
{color}{color:#ce9178}'chg1-A'{color}{color:#d4d4d4}),
({color}{color:#b5cea8}3{color}{color:#d4d4d4},
{color}{color:#ce9178}'chg1-C'{color}{color:#d4d4d4}),
({color}{color:#b5cea8}4{color}{color:#d4d4d4},
{color}{color:#ce9178}'chg1-D'{color}{color:#d4d4d4});{color}
{color:#569cd6}INSERT INTO{color}{color:#d4d4d4} testtable_chg2
{color}{color:#569cd6}VALUES{color}{color:#d4d4d4}
({color}{color:#b5cea8}1{color}{color:#d4d4d4},
{color}{color:#ce9178}'chg2-A'{color}{color:#d4d4d4}),
({color}{color:#b5cea8}3{color}{color:#d4d4d4},
{color}{color:#ce9178}'chg2-C'{color}{color:#d4d4d4}),
({color}{color:#b5cea8}5{color}{color:#d4d4d4},
{color}{color:#ce9178}'chg2-E'{color}{color:#d4d4d4});{color}
{color:#569cd6}MERGE{color}{color:#d4d4d4}
{color}{color:#569cd6}INTO{color}{color:#d4d4d4} testtable
{color}{color:#569cd6}AS{color}{color:#d4d4d4} T{color}
{color:#d4d4d4} {color}{color:#569cd6}USING{color}{color:#d4d4d4}
testtable_chg1 {color}{color:#569cd6}AS{color}{color:#d4d4d4} S
{color}{color:#569cd6}ON{color}{color:#d4d4d4} T.id = S.id{color}
{color:#d4d4d4} {color}{color:#569cd6}WHEN{color}{color:#d4d4d4}
{color}{color:#569cd6}MATCHED{color}{color:#d4d4d4}
{color}{color:#569cd6}AND{color}{color:#d4d4d4} (T.txt != S.txt
{color}{color:#569cd6}AND{color}{color:#d4d4d4} S.txt {color}{color:#569cd6}IS
NOT NULL{color}{color:#d4d4d4})
{color}{color:#569cd6}THEN{color}{color:#d4d4d4}
{color}{color:#569cd6}UPDATE{color}{color:#d4d4d4}
{color}{color:#569cd6}SET{color}{color:#d4d4d4} txt = S.txt{color}
{color:#d4d4d4} {color}{color:#569cd6}WHEN{color}{color:#d4d4d4}
{color}{color:#569cd6}NOT{color}{color:#d4d4d4}
{color}{color:#569cd6}MATCHED{color}{color:#d4d4d4}
{color}{color:#569cd6}THEN{color}{color:#d4d4d4}
{color}{color:#569cd6}INSERT{color}{color:#d4d4d4}
{color}{color:#569cd6}VALUES{color}{color:#d4d4d4} (S.id, S.txt);{color}
{color:#569cd6}MERGE{color}{color:#d4d4d4}
{color}{color:#569cd6}INTO{color}{color:#d4d4d4} testtable
{color}{color:#569cd6}AS{color}{color:#d4d4d4} T{color}
{color:#d4d4d4} {color}{color:#569cd6}USING{color}{color:#d4d4d4}
testtable_chg2 {color}{color:#569cd6}AS{color}{color:#d4d4d4} S
{color}{color:#569cd6}ON{color}{color:#d4d4d4} T.id = S.id{color}
{color:#d4d4d4} {color}{color:#569cd6}WHEN{color}{color:#d4d4d4}
{color}{color:#569cd6}MATCHED{color}{color:#d4d4d4}
{color}{color:#569cd6}AND{color}{color:#d4d4d4} (T.txt != S.txt
{color}{color:#569cd6}AND{color}{color:#d4d4d4} S.txt {color}{color:#569cd6}IS
NOT NULL{color}{color:#d4d4d4})
{color}{color:#569cd6}THEN{color}{color:#d4d4d4}
{color}{color:#569cd6}UPDATE{color}{color:#d4d4d4}
{color}{color:#569cd6}SET{color}{color:#d4d4d4} txt = S.txt{color}
{color:#d4d4d4} {color}{color:#569cd6}WHEN{color}{color:#d4d4d4}
{color}{color:#569cd6}NOT{color}{color:#d4d4d4}
{color}{color:#569cd6}MATCHED{color}{color:#d4d4d4}
{color}{color:#569cd6}THEN{color}{color:#d4d4d4}
{color}{color:#569cd6}INSERT{color}{color:#d4d4d4}
{color}{color:#569cd6}VALUES{color}{color:#d4d4d4} (S.id, S.txt);{color}
{color:#569cd6}ALTER{color}{color:#d4d4d4}
{color}{color:#569cd6}TABLE{color}{color:#d4d4d4} testtable COMPACT
{color}{color:#ce9178}'MINOR'{color}{color:#d4d4d4}
{color}{color:#569cd6}AND{color}{color:#d4d4d4} WAIT;{color}
> Orc acid footer metadata should be case insensitive
> ---------------------------------------------------
>
> Key: HIVE-29328
> URL: https://issues.apache.org/jira/browse/HIVE-29328
> Project: Hive
> Issue Type: Bug
> Reporter: Zsolt Miskolczi
> Priority: Major
> Labels: pull-request-available
>
> Orc file format allows metadata field names with ignoring casing. For
> example, we have manual tests when query based compaction creates Orc files
> with lowercase fields (the root cause for this is under unvestigation).
> As OrcInputFormat.isOriginal currently checks the field names with strict
> casing, FixAcidKeyIndex can fail if the Orc file footer contains its metadata
> fields (like currentTransaction) lowercasing.
>
> To repro the issue manually:
>
> {color:#569cd6}set{color}{color:#d4d4d4} hive.support.concurrency=true;{color}
> {color:#569cd6}set{color}{color:#d4d4d4}
> hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;{color}
> {color:#569cd6}set{color}{color:#d4d4d4}
> hive.compactor.crud.query.based;{color}
> {color:#569cd6}DROP{color}{color:#d4d4d4}
> {color}{color:#569cd6}TABLE{color}{color:#d4d4d4}
> {color}{color:#569cd6}IF{color}{color:#d4d4d4}
> {color}{color:#569cd6}EXISTS{color}{color:#d4d4d4} testtable;{color}
> {color:#569cd6}DROP{color}{color:#d4d4d4}
> {color}{color:#569cd6}TABLE{color}{color:#d4d4d4}
> {color}{color:#569cd6}IF{color}{color:#d4d4d4}
> {color}{color:#569cd6}EXISTS{color}{color:#d4d4d4} testtable_chg1;{color}
> {color:#569cd6}DROP{color}{color:#d4d4d4}
> {color}{color:#569cd6}TABLE{color}{color:#d4d4d4}
> {color}{color:#569cd6}IF{color}{color:#d4d4d4}
> {color}{color:#569cd6}EXISTS{color}{color:#d4d4d4} testtable_chg2;{color}
> {color:#569cd6}CREATE{color}{color:#d4d4d4}
> {color}{color:#569cd6}TABLE{color}{color:#d4d4d4} testtable (iD
> {color}{color:#569cd6}int{color}{color:#d4d4d4}, tXt string) STORED
> {color}{color:#569cd6}AS{color}{color:#d4d4d4}
> {color}{color:#569cd6}ORC{color}
> {color:#d4d4d4} TBLPROPERTIES
> ({color}{color:#ce9178}'NO_AUTO_COMPACTION'{color}{color:#d4d4d4}={color}{color:#ce9178}'true'{color}{color:#d4d4d4},
>
> {color}{color:#ce9178}'transactional'{color}{color:#d4d4d4}={color}{color:#ce9178}'true'{color}{color:#d4d4d4});{color}
> {color:#569cd6}CREATE{color}{color:#d4d4d4}
> {color}{color:#569cd6}TABLE{color}{color:#d4d4d4} testtable_chg1 (iD
> {color}{color:#569cd6}int{color}{color:#d4d4d4}, tXt string) STORED
> {color}{color:#569cd6}AS{color}{color:#d4d4d4}
> {color}{color:#569cd6}ORC{color}{color:#d4d4d4};{color}
> {color:#569cd6}CREATE{color}{color:#d4d4d4}
> {color}{color:#569cd6}TABLE{color}{color:#d4d4d4} testtable_chg2 (iD
> {color}{color:#569cd6}int{color}{color:#d4d4d4}, tXt string) STORED
> {color}{color:#569cd6}AS{color}{color:#d4d4d4}
> {color}{color:#569cd6}ORC{color}{color:#d4d4d4};{color}
> {color:#569cd6}INSERT INTO{color}{color:#d4d4d4} testtable
> {color}{color:#569cd6}VALUES{color}{color:#d4d4d4}
> ({color}{color:#b5cea8}1{color}{color:#d4d4d4},
> {color}{color:#ce9178}'base-A'{color}{color:#d4d4d4}),
> ({color}{color:#b5cea8}2{color}{color:#d4d4d4},
> {color}{color:#ce9178}'base-B'{color}{color:#d4d4d4});{color}
> {color:#569cd6}INSERT INTO{color}{color:#d4d4d4} testtable_chg1
> {color}{color:#569cd6}VALUES{color}{color:#d4d4d4}
> ({color}{color:#b5cea8}1{color}{color:#d4d4d4},
> {color}{color:#ce9178}'chg1-A'{color}{color:#d4d4d4}),
> ({color}{color:#b5cea8}3{color}{color:#d4d4d4},
> {color}{color:#ce9178}'chg1-C'{color}{color:#d4d4d4}),
> ({color}{color:#b5cea8}4{color}{color:#d4d4d4},
> {color}{color:#ce9178}'chg1-D'{color}{color:#d4d4d4});{color}
> {color:#569cd6}INSERT INTO{color}{color:#d4d4d4} testtable_chg2
> {color}{color:#569cd6}VALUES{color}{color:#d4d4d4}
> ({color}{color:#b5cea8}1{color}{color:#d4d4d4},
> {color}{color:#ce9178}'chg2-A'{color}{color:#d4d4d4}),
> ({color}{color:#b5cea8}3{color}{color:#d4d4d4},
> {color}{color:#ce9178}'chg2-C'{color}{color:#d4d4d4}),
> ({color}{color:#b5cea8}5{color}{color:#d4d4d4},
> {color}{color:#ce9178}'chg2-E'{color}{color:#d4d4d4});{color}
> {color:#569cd6}MERGE{color}{color:#d4d4d4}
> {color}{color:#569cd6}INTO{color}{color:#d4d4d4} testtable
> {color}{color:#569cd6}AS{color}{color:#d4d4d4} T{color}
> {color:#d4d4d4} {color}{color:#569cd6}USING{color}{color:#d4d4d4}
> testtable_chg1 {color}{color:#569cd6}AS{color}{color:#d4d4d4} S
> {color}{color:#569cd6}ON{color}{color:#d4d4d4} T.id = S.id{color}
> {color:#d4d4d4} {color}{color:#569cd6}WHEN{color}{color:#d4d4d4}
> {color}{color:#569cd6}MATCHED{color}{color:#d4d4d4}
> {color}{color:#569cd6}AND{color}{color:#d4d4d4} (T.txt != S.txt
> {color}{color:#569cd6}AND{color}{color:#d4d4d4} S.txt
> {color}{color:#569cd6}IS NOT NULL{color}{color:#d4d4d4})
> {color}{color:#569cd6}THEN{color}{color:#d4d4d4}
> {color}{color:#569cd6}UPDATE{color}{color:#d4d4d4}
> {color}{color:#569cd6}SET{color}{color:#d4d4d4} txt = S.txt{color}
> {color:#d4d4d4} {color}{color:#569cd6}WHEN{color}{color:#d4d4d4}
> {color}{color:#569cd6}NOT{color}{color:#d4d4d4}
> {color}{color:#569cd6}MATCHED{color}{color:#d4d4d4}
> {color}{color:#569cd6}THEN{color}{color:#d4d4d4}
> {color}{color:#569cd6}INSERT{color}{color:#d4d4d4}
> {color}{color:#569cd6}VALUES{color}{color:#d4d4d4} (S.id, S.txt);{color}
> {color:#569cd6}MERGE{color}{color:#d4d4d4}
> {color}{color:#569cd6}INTO{color}{color:#d4d4d4} testtable
> {color}{color:#569cd6}AS{color}{color:#d4d4d4} T{color}
> {color:#d4d4d4} {color}{color:#569cd6}USING{color}{color:#d4d4d4}
> testtable_chg2 {color}{color:#569cd6}AS{color}{color:#d4d4d4} S
> {color}{color:#569cd6}ON{color}{color:#d4d4d4} T.id = S.id{color}
> {color:#d4d4d4} {color}{color:#569cd6}WHEN{color}{color:#d4d4d4}
> {color}{color:#569cd6}MATCHED{color}{color:#d4d4d4}
> {color}{color:#569cd6}AND{color}{color:#d4d4d4} (T.txt != S.txt
> {color}{color:#569cd6}AND{color}{color:#d4d4d4} S.txt
> {color}{color:#569cd6}IS NOT NULL{color}{color:#d4d4d4})
> {color}{color:#569cd6}THEN{color}{color:#d4d4d4}
> {color}{color:#569cd6}UPDATE{color}{color:#d4d4d4}
> {color}{color:#569cd6}SET{color}{color:#d4d4d4} txt = S.txt{color}
> {color:#d4d4d4} {color}{color:#569cd6}WHEN{color}{color:#d4d4d4}
> {color}{color:#569cd6}NOT{color}{color:#d4d4d4}
> {color}{color:#569cd6}MATCHED{color}{color:#d4d4d4}
> {color}{color:#569cd6}THEN{color}{color:#d4d4d4}
> {color}{color:#569cd6}INSERT{color}{color:#d4d4d4}
> {color}{color:#569cd6}VALUES{color}{color:#d4d4d4} (S.id, S.txt);{color}
> {color:#569cd6}ALTER{color}{color:#d4d4d4}
> {color}{color:#569cd6}TABLE{color}{color:#d4d4d4} testtable COMPACT
> {color}{color:#ce9178}'MINOR'{color}{color:#d4d4d4}
> {color}{color:#569cd6}AND{color}{color:#d4d4d4} WAIT;{color}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)