This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new e9d3cd7a3fc HIVE-29328: Orc acid footer metadata should be case
insensitive (#6203)
e9d3cd7a3fc is described below
commit e9d3cd7a3fc1a13435a360dd9f613e51fd860e37
Author: InvisibleProgrammer <[email protected]>
AuthorDate: Tue Nov 25 19:05:10 2025 +0100
HIVE-29328: Orc acid footer metadata should be case insensitive (#6203)
---
.../hadoop/hive/ql/io/orc/OrcInputFormat.java | 24 +++++++++++---
.../hadoop/hive/ql/io/orc/OrcRecordUpdater.java | 2 +-
.../hadoop/hive/ql/io/orc/TestFixAcidKeyIndex.java | 37 +++++++++++++++++++---
3 files changed, 53 insertions(+), 10 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 2ab04cbfe6b..05642e27605 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -18,7 +18,8 @@
package org.apache.hadoop.hive.ql.io.orc;
-import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.collections4.CollectionUtils;
+import org.apache.commons.collections4.Equator;
import org.apache.hadoop.hdfs.protocol.HdfsLocatedFileStatus;
import org.apache.hadoop.hive.common.BlobStorageUtils;
import org.apache.hadoop.hive.common.NoDynamicValuesException;
@@ -29,6 +30,7 @@
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
@@ -378,8 +380,7 @@ public static RecordReader createReaderFromFile(Reader file,
* @return <code>false</code> if an ACID file, <code>true</code> if a simple
orc file
*/
public static boolean isOriginal(Reader file) {
- return !CollectionUtils.isEqualCollection(file.getSchema().getFieldNames(),
- OrcRecordUpdater.ALL_ACID_ROW_NAMES);
+ return !checkIfAcidRowNamesFilled(file.getSchema().getFieldNames());
}
/**
@@ -388,8 +389,21 @@ public static boolean isOriginal(Reader file) {
* @return <code>false</code> if an ACID file, <code>true</code> if a simple
orc file
*/
public static boolean isOriginal(Footer footer) {
- return
!CollectionUtils.isEqualCollection(footer.getTypesList().get(0).getFieldNamesList(),
- OrcRecordUpdater.ALL_ACID_ROW_NAMES);
+ return
!checkIfAcidRowNamesFilled(footer.getTypesList().getFirst().getFieldNamesList());
+ }
+
+ private static boolean checkIfAcidRowNamesFilled(Collection<String>
fieldNames) {
+ return
CollectionUtils.isEqualCollection(OrcRecordUpdater.ALL_ACID_ROW_NAMES,
fieldNames, new Equator<>() {
+ @Override
+ public boolean equate(String s, String t1) {
+ return s.equalsIgnoreCase(t1);
+ }
+
+ @Override
+ public int hash(String s) {
+ return 0;
+ }
+ });
}
public static boolean[] genIncludedColumns(TypeDescription readerSchema,
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
index 3710ee71c7c..c99ae79824a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
@@ -92,7 +92,7 @@ public class OrcRecordUpdater implements RecordUpdater {
static final String ROW_ID_FIELD_NAME = "rowId";
static final String CURRENT_WRITEID_FIELD_NAME = "currentTransaction";
static final String ROW_FIELD_NAME = "row";
- public static final Collection ALL_ACID_ROW_NAMES = Arrays.asList(
+ public static final Collection<String> ALL_ACID_ROW_NAMES = Arrays.asList(
OrcRecordUpdater.BUCKET_FIELD_NAME,
OrcRecordUpdater.CURRENT_WRITEID_FIELD_NAME,
OrcRecordUpdater.ORIGINAL_WRITEID_FIELD_NAME,
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFixAcidKeyIndex.java
b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFixAcidKeyIndex.java
index 07e16931b6b..4d45f9e4fbd 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFixAcidKeyIndex.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFixAcidKeyIndex.java
@@ -45,6 +45,10 @@
public class TestFixAcidKeyIndex {
public final static Logger LOG =
LoggerFactory.getLogger(TestFixAcidKeyIndex.class);
+ final static String typeStr = "struct<operation:int," +
+ "originalTransaction:bigint,bucket:int,rowId:bigint," +
+ "currentTransaction:bigint," +
+ "row:struct<a:int,b:struct<c:int>,d:string>>";
@Rule
public TestName testCaseName = new TestName();
@@ -72,12 +76,15 @@ static abstract class TestKeyIndexBuilder
}
void createTestAcidFile(Path path, int numRows, TestKeyIndexBuilder
indexBuilder) throws Exception {
+ createTestAcidFile(path, numRows, indexBuilder, typeStr);
+ }
+
+ void createTestAcidFile(Path path,
+ int numRows,
+ TestKeyIndexBuilder indexBuilder,
+ String typeStr) throws Exception {
FileSystem fs = path.getFileSystem(conf);
fs.delete(path, true);
- String typeStr = "struct<operation:int," +
- "originalTransaction:bigint,bucket:int,rowId:bigint," +
- "currentTransaction:bigint," +
- "row:struct<a:int,b:struct<c:int>,d:string>>";
TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeStr);
Writer writer = OrcFile.createWriter(path,
OrcFile.writerOptions(conf)
@@ -219,6 +226,28 @@ public void testValidKeyIndex() throws Exception {
fixValidIndex(testFilePath);
}
+ @Test
+ public void testValidKeyIndexWithAcidMetadataLowerCase() throws Exception {
+ String lowerCaseTypeStr = typeStr.toLowerCase();
+ // Try with 0 row file.
+ createTestAcidFile(testFilePath, 0, new GoodKeyIndexBuilder(),
lowerCaseTypeStr);
+ checkValidKeyIndex(testFilePath);
+ // Attempting to fix a valid - should not result in a new file.
+ fixValidIndex(testFilePath);
+
+ // Try single stripe
+ createTestAcidFile(testFilePath, 100, new GoodKeyIndexBuilder(),
lowerCaseTypeStr);
+ checkValidKeyIndex(testFilePath);
+ // Attempting to fix a valid - should not result in a new file.
+ fixValidIndex(testFilePath);
+
+ // Multiple stripes
+ createTestAcidFile(testFilePath, 12000, new GoodKeyIndexBuilder(),
lowerCaseTypeStr);
+ checkValidKeyIndex(testFilePath);
+ // Attempting to fix a valid - should not result in a new file.
+ fixValidIndex(testFilePath);
+ }
+
@Test
public void testInvalidKeyIndex() throws Exception {
// Try single stripe