This is an automated email from the ASF dual-hosted git repository.
xushiyan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 49a2110fc8 [HUDI-4496] Fix Orc support broken for Spark 3.x and more
(#6227)
49a2110fc8 is described below
commit 49a2110fc81ea101e75a1bf1b5ca5ac2eb6e7181
Author: Alexey Kudinkin <[email protected]>
AuthorDate: Sat Nov 12 11:01:21 2022 -0800
[HUDI-4496] Fix Orc support broken for Spark 3.x and more (#6227)
---
hudi-client/hudi-client-common/pom.xml | 6 ++++
.../apache/hudi/io/storage/HoodieOrcWriter.java | 4 +--
hudi-common/pom.xml | 4 +--
.../hudi/common/table/cdc/HoodieCDCFileSplit.java | 4 +--
.../org/apache/hudi/common/util/AvroOrcUtils.java | 24 ++++++-------
.../hudi/common/util/CustomizedThreadFactory.java | 5 ++-
.../apache/hudi/common/util/OrcReaderIterator.java | 2 +-
.../java/org/apache/hudi/common/util/OrcUtils.java | 4 +--
.../hudi/common/util/TestOrcReaderIterator.java | 6 ++--
.../apache/hudi/functional/TestOrcBootstrap.java | 6 ++++
.../org/apache/hudi/TestHoodieSparkSqlWriter.scala | 7 ++--
.../functional/TestHoodieDeltaStreamer.java | 13 +++++--
.../functional/TestHoodieSnapshotExporter.java | 7 ++++
.../utilities/testutils/UtilitiesTestBase.java | 4 +--
packaging/hudi-flink-bundle/pom.xml | 3 +-
pom.xml | 41 +++++++++++++++++++---
16 files changed, 98 insertions(+), 42 deletions(-)
diff --git a/hudi-client/hudi-client-common/pom.xml
b/hudi-client/hudi-client-common/pom.xml
index 9757e95c3b..a8efee8c5b 100644
--- a/hudi-client/hudi-client-common/pom.xml
+++ b/hudi-client/hudi-client-common/pom.xml
@@ -65,6 +65,12 @@
<artifactId>parquet-avro</artifactId>
</dependency>
+ <!-- Orc -->
+ <dependency>
+ <groupId>org.apache.orc</groupId>
+ <artifactId>orc-core</artifactId>
+ </dependency>
+
<!-- Hilbert Curve -->
<dependency>
<groupId>com.github.davidmoten</groupId>
diff --git
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieOrcWriter.java
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieOrcWriter.java
index 4bcab2cec8..0923d151cb 100644
---
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieOrcWriter.java
+++
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieOrcWriter.java
@@ -35,8 +35,8 @@ import org.apache.hudi.common.util.AvroOrcUtils;
import org.apache.orc.OrcFile;
import org.apache.orc.TypeDescription;
import org.apache.orc.Writer;
-import org.apache.orc.storage.ql.exec.vector.ColumnVector;
-import org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import java.io.Closeable;
import java.io.IOException;
diff --git a/hudi-common/pom.xml b/hudi-common/pom.xml
index 15becc3c16..200b759ed2 100644
--- a/hudi-common/pom.xml
+++ b/hudi-common/pom.xml
@@ -141,12 +141,10 @@
<artifactId>parquet-avro</artifactId>
</dependency>
- <!-- ORC -->
+ <!-- Orc -->
<dependency>
<groupId>org.apache.orc</groupId>
<artifactId>orc-core</artifactId>
- <version>${orc.version}</version>
- <classifier>nohive</classifier>
</dependency>
<!-- Httpcomponents -->
diff --git
a/hudi-common/src/main/java/org/apache/hudi/common/table/cdc/HoodieCDCFileSplit.java
b/hudi-common/src/main/java/org/apache/hudi/common/table/cdc/HoodieCDCFileSplit.java
index 79aaf50f1c..d508f7ac4e 100644
---
a/hudi-common/src/main/java/org/apache/hudi/common/table/cdc/HoodieCDCFileSplit.java
+++
b/hudi-common/src/main/java/org/apache/hudi/common/table/cdc/HoodieCDCFileSplit.java
@@ -22,8 +22,6 @@ import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.util.Option;
-import org.jetbrains.annotations.NotNull;
-
import java.io.Serializable;
import java.util.Collection;
import java.util.Collections;
@@ -125,7 +123,7 @@ public class HoodieCDCFileSplit implements Serializable,
Comparable<HoodieCDCFil
}
@Override
- public int compareTo(@NotNull HoodieCDCFileSplit o) {
+ public int compareTo(HoodieCDCFileSplit o) {
return this.instant.compareTo(o.instant);
}
}
diff --git
a/hudi-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java
b/hudi-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java
index c311842443..607b1fb3bd 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java
@@ -38,18 +38,18 @@ import java.nio.charset.StandardCharsets;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData.StringType;
import org.apache.avro.util.Utf8;
-import org.apache.orc.storage.common.type.HiveDecimal;
-import org.apache.orc.storage.ql.exec.vector.BytesColumnVector;
-import org.apache.orc.storage.ql.exec.vector.ColumnVector;
-import org.apache.orc.storage.ql.exec.vector.DecimalColumnVector;
-import org.apache.orc.storage.ql.exec.vector.DoubleColumnVector;
-import org.apache.orc.storage.ql.exec.vector.ListColumnVector;
-import org.apache.orc.storage.ql.exec.vector.LongColumnVector;
-import org.apache.orc.storage.ql.exec.vector.MapColumnVector;
-import org.apache.orc.storage.ql.exec.vector.StructColumnVector;
-import org.apache.orc.storage.ql.exec.vector.TimestampColumnVector;
-import org.apache.orc.storage.ql.exec.vector.UnionColumnVector;
-import org.apache.orc.storage.serde2.io.DateWritable;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.orc.TypeDescription;
diff --git
a/hudi-common/src/main/java/org/apache/hudi/common/util/CustomizedThreadFactory.java
b/hudi-common/src/main/java/org/apache/hudi/common/util/CustomizedThreadFactory.java
index a13f3a804f..2af7c078f7 100644
---
a/hudi-common/src/main/java/org/apache/hudi/common/util/CustomizedThreadFactory.java
+++
b/hudi-common/src/main/java/org/apache/hudi/common/util/CustomizedThreadFactory.java
@@ -18,7 +18,7 @@
package org.apache.hudi.common.util;
-import org.jetbrains.annotations.NotNull;
+import javax.annotation.Nonnull;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.atomic.AtomicLong;
@@ -59,7 +59,7 @@ public class CustomizedThreadFactory implements ThreadFactory
{
}
@Override
- public Thread newThread(@NotNull Runnable r) {
+ public Thread newThread(@Nonnull Runnable r) {
Thread runThread = preExecuteRunnable == null ? new Thread(r) : new
Thread(new Runnable() {
@Override
@@ -68,7 +68,6 @@ public class CustomizedThreadFactory implements ThreadFactory
{
r.run();
}
});
-
runThread.setDaemon(daemon);
runThread.setName(threadName + threadNum.getAndIncrement());
return runThread;
diff --git
a/hudi-common/src/main/java/org/apache/hudi/common/util/OrcReaderIterator.java
b/hudi-common/src/main/java/org/apache/hudi/common/util/OrcReaderIterator.java
index d9ceeeee40..727bccb5e2 100644
---
a/hudi-common/src/main/java/org/apache/hudi/common/util/OrcReaderIterator.java
+++
b/hudi-common/src/main/java/org/apache/hudi/common/util/OrcReaderIterator.java
@@ -22,7 +22,7 @@ import java.util.List;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericData.Record;
-import org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.orc.RecordReader;
diff --git
a/hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java
b/hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java
index 5afe354d0e..8c41f7f3e5 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java
@@ -38,8 +38,8 @@ import org.apache.orc.Reader;
import org.apache.orc.Reader.Options;
import org.apache.orc.RecordReader;
import org.apache.orc.TypeDescription;
-import org.apache.orc.storage.ql.exec.vector.BytesColumnVector;
-import org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import java.io.IOException;
import java.nio.ByteBuffer;
diff --git
a/hudi-common/src/test/java/org/apache/hudi/common/util/TestOrcReaderIterator.java
b/hudi-common/src/test/java/org/apache/hudi/common/util/TestOrcReaderIterator.java
index b55995c0c2..0f36f1877d 100644
---
a/hudi-common/src/test/java/org/apache/hudi/common/util/TestOrcReaderIterator.java
+++
b/hudi-common/src/test/java/org/apache/hudi/common/util/TestOrcReaderIterator.java
@@ -28,9 +28,9 @@ import org.apache.orc.Reader;
import org.apache.orc.RecordReader;
import org.apache.orc.TypeDescription;
import org.apache.orc.Writer;
-import org.apache.orc.storage.ql.exec.vector.BytesColumnVector;
-import org.apache.orc.storage.ql.exec.vector.LongColumnVector;
-import org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java
b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java
index 330b6015bc..d0c74d109c 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java
+++
b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java
@@ -19,6 +19,7 @@
package org.apache.hudi.functional;
import org.apache.hudi.DataSourceWriteOptions;
+import org.apache.hudi.HoodieSparkUtils;
import org.apache.hudi.avro.model.HoodieFileStatus;
import org.apache.hudi.client.bootstrap.BootstrapMode;
import org.apache.hudi.client.bootstrap.FullRecordBootstrapDataProvider;
@@ -185,6 +186,11 @@ public class TestOrcBootstrap extends HoodieClientTestBase
{
}
private void testBootstrapCommon(boolean partitioned, boolean deltaCommit,
EffectiveMode mode) throws Exception {
+ // NOTE: Hudi doesn't support Orc in Spark < 3.0
+ // Please check HUDI-4496 for more details
+ if (!HoodieSparkUtils.gteqSpark3_0()) {
+ return;
+ }
if (deltaCommit) {
metaClient = HoodieTestUtils.init(basePath,
HoodieTableType.MERGE_ON_READ, bootstrapBasePath, HoodieFileFormat.ORC);
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala
index 732d8d7ec0..54800f579f 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala
+++
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala
@@ -1179,11 +1179,12 @@ object TestHoodieSparkSqlWriter {
val parquetScenarios = scenarios.map { _ :+ "parquet" }
val orcScenarios = scenarios.map { _ :+ "orc" }
- // TODO(HUDI-4496) Fix Orc support in Spark 3.x
+ // NOTE: Hudi doesn't support Orc in Spark < 3.0
+ // Please check HUDI-4496 for more details
val targetScenarios = if (gteqSpark3_0) {
- parquetScenarios
- } else {
parquetScenarios ++ orcScenarios
+ } else {
+ parquetScenarios
}
java.util.Arrays.stream(targetScenarios.map(as =>
arguments(as.map(_.asInstanceOf[AnyRef]):_*)))
diff --git
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamer.java
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamer.java
index 5c1365ad4f..1e24a83c82 100644
---
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamer.java
+++
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamer.java
@@ -21,6 +21,7 @@ package org.apache.hudi.utilities.functional;
import org.apache.hudi.AvroConversionUtils;
import org.apache.hudi.DataSourceReadOptions;
import org.apache.hudi.DataSourceWriteOptions;
+import org.apache.hudi.HoodieSparkUtils$;
import org.apache.hudi.client.SparkRDDWriteClient;
import org.apache.hudi.client.transaction.lock.InProcessLockProvider;
import org.apache.hudi.common.config.DFSPropertiesConfiguration;
@@ -1927,12 +1928,20 @@ public class TestHoodieDeltaStreamer extends
HoodieDeltaStreamerTestBase {
@Test
public void testORCDFSSourceWithoutSchemaProviderAndNoTransformer() throws
Exception {
- testORCDFSSource(false, null);
+ // NOTE: Hudi doesn't support Orc in Spark < 3.0
+ // Please check HUDI-4496 for more details
+ if (HoodieSparkUtils$.MODULE$.gteqSpark3_0()) {
+ testORCDFSSource(false, null);
+ }
}
@Test
public void testORCDFSSourceWithSchemaProviderAndWithTransformer() throws
Exception {
- testORCDFSSource(true,
Collections.singletonList(TripsWithDistanceTransformer.class.getName()));
+ // NOTE: Hudi doesn't support Orc in Spark < 3.0
+ // Please check HUDI-4496 for more details
+ if (HoodieSparkUtils$.MODULE$.gteqSpark3_0()) {
+ testORCDFSSource(true,
Collections.singletonList(TripsWithDistanceTransformer.class.getName()));
+ }
}
private void prepareCsvDFSSource(
diff --git
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotExporter.java
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotExporter.java
index 541da0a554..8169da3416 100644
---
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotExporter.java
+++
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotExporter.java
@@ -18,6 +18,7 @@
package org.apache.hudi.utilities.functional;
+import org.apache.hudi.HoodieSparkUtils;
import org.apache.hudi.client.SparkRDDWriteClient;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieAvroPayload;
@@ -220,6 +221,12 @@ public class TestHoodieSnapshotExporter extends
SparkClientFunctionalTestHarness
@ParameterizedTest
@ValueSource(strings = {"json", "parquet", "orc"})
public void testExportAsNonHudi(String format) throws IOException {
+ // NOTE: Hudi doesn't support Orc in Spark < 3.0
+ // Please check HUDI-4496 for more details
+ if ("orc".equals(format) && !HoodieSparkUtils.gteqSpark3_0()) {
+ return;
+ }
+
HoodieSnapshotExporter.Config cfg = new Config();
cfg.sourceBasePath = sourcePath;
cfg.targetOutputPath = targetPath;
diff --git
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java
index cc61a35886..493953e894 100644
---
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java
+++
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java
@@ -62,8 +62,8 @@ import org.apache.log4j.Logger;
import org.apache.orc.OrcFile;
import org.apache.orc.TypeDescription;
import org.apache.orc.Writer;
-import org.apache.orc.storage.ql.exec.vector.ColumnVector;
-import org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.parquet.avro.AvroParquetWriter;
import org.apache.parquet.hadoop.ParquetFileWriter.Mode;
import org.apache.parquet.hadoop.ParquetWriter;
diff --git a/packaging/hudi-flink-bundle/pom.xml
b/packaging/hudi-flink-bundle/pom.xml
index df590a78a7..fef63ed687 100644
--- a/packaging/hudi-flink-bundle/pom.xml
+++ b/packaging/hudi-flink-bundle/pom.xml
@@ -638,8 +638,7 @@
<dependency>
<groupId>org.apache.orc</groupId>
<artifactId>orc-core</artifactId>
- <version>${orc.version}</version>
- <classifier>nohive</classifier>
+ <version>${orc.flink.version}</version>
<scope>${flink.bundle.hive.scope}</scope>
</dependency>
diff --git a/pom.xml b/pom.xml
index 3b02c916f7..a0407fa79e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -122,7 +122,8 @@
<trino.version>390</trino.version>
<hive.exec.classifier>core</hive.exec.classifier>
<metrics.version>4.1.1</metrics.version>
- <orc.version>1.6.0</orc.version>
+ <orc.spark.version>1.6.0</orc.spark.version>
+ <orc.flink.version>1.5.6</orc.flink.version>
<airlift.version>0.16</airlift.version>
<prometheus.version>0.8.0</prometheus.version>
<http.version>4.4.1</http.version>
@@ -830,6 +831,14 @@
</exclusions>
</dependency>
+ <!-- Orc -->
+ <dependency>
+ <groupId>org.apache.orc</groupId>
+ <artifactId>orc-core</artifactId>
+ <version>${orc.spark.version}</version>
+ <scope>compile</scope>
+ </dependency>
+
<!-- Spark -->
<dependency>
<groupId>org.apache.spark</groupId>
@@ -2072,9 +2081,13 @@
<hudi.spark.common.modules.2>hudi-spark3.2plus-common</hudi.spark.common.modules.2>
<scalatest.version>${scalatest.spark3.version}</scalatest.version>
<kafka.version>${kafka.spark3.version}</kafka.version>
+ <!-- NOTE: Some Hudi modules require standalone Parquet/Orc/etc
file-format dependency (hudi-hive-sync,
+ hudi-hadoop-mr, for ex). Since these Hudi modules might be
used from w/in the execution engine(s)
+ bringing these file-formats as dependencies as well, we
need to make sure that versions are
+ synchronized to avoid classpath ambiguity -->
<parquet.version>1.12.2</parquet.version>
+ <orc.spark.version>1.7.4</orc.spark.version>
<avro.version>1.11.1</avro.version>
- <orc.version>1.7.4</orc.version>
<antlr.version>4.8</antlr.version>
<fasterxml.spark3.version>2.13.3</fasterxml.spark3.version>
<fasterxml.version>${fasterxml.spark3.version}</fasterxml.version>
@@ -2109,6 +2122,13 @@
<hudi.spark.common.modules.1>hudi-spark3-common</hudi.spark.common.modules.1>
<scalatest.version>${scalatest.spark3.version}</scalatest.version>
<kafka.version>${kafka.spark3.version}</kafka.version>
+ <!-- NOTE: Some Hudi modules require standalone Parquet/Orc/etc
file-format dependency (hudi-hive-sync,
+ hudi-hadoop-mr, for ex). Since these Hudi modules might be
used from w/in the execution engine(s)
+ bringing these file-formats as dependencies as well, we
need to make sure that versions are
+ synchronized to avoid classpath ambiguity -->
+ <parquet.version>1.10.1</parquet.version>
+ <orc.spark.version>1.5.13</orc.spark.version>
+ <avro.version>1.8.2</avro.version>
<antlr.version>4.8-1</antlr.version>
<fasterxml.version>${fasterxml.spark3.version}</fasterxml.version>
<fasterxml.jackson.databind.version>${fasterxml.spark3.version}</fasterxml.jackson.databind.version>
@@ -2143,9 +2163,13 @@
<hudi.spark.common.modules.2>hudi-spark3.2plus-common</hudi.spark.common.modules.2>
<scalatest.version>${scalatest.spark3.version}</scalatest.version>
<kafka.version>${kafka.spark3.version}</kafka.version>
+ <!-- NOTE: Some Hudi modules require standalone Parquet/Orc/etc
file-format dependency (hudi-hive-sync,
+ hudi-hadoop-mr, for ex). Since these Hudi modules might be
used from w/in the execution engine(s)
+ bringing these file-formats as dependencies as well, we
need to make sure that versions are
+ synchronized to avoid classpath ambiguity -->
<parquet.version>1.12.2</parquet.version>
+ <orc.spark.version>1.6.12</orc.spark.version>
<avro.version>1.10.2</avro.version>
- <orc.version>1.6.12</orc.version>
<antlr.version>4.8</antlr.version>
<fasterxml.version>${fasterxml.spark3.version}</fasterxml.version>
<fasterxml.jackson.databind.version>${fasterxml.spark3.version}</fasterxml.jackson.databind.version>
@@ -2181,9 +2205,13 @@
<hudi.spark.common.modules.2>hudi-spark3.2plus-common</hudi.spark.common.modules.2>
<scalatest.version>${scalatest.spark3.version}</scalatest.version>
<kafka.version>${kafka.spark3.version}</kafka.version>
+ <!-- NOTE: Some Hudi modules require standalone Parquet/Orc/etc
file-format dependency (hudi-hive-sync,
+ hudi-hadoop-mr, for ex). Since these Hudi modules might be
used from w/in the execution engine(s)
+ bringing these file-formats as dependencies as well, we
need to make sure that versions are
+ synchronized to avoid classpath ambiguity -->
<parquet.version>1.12.2</parquet.version>
+ <orc.spark.version>1.7.4</orc.spark.version>
<avro.version>1.11.1</avro.version>
- <orc.version>1.7.4</orc.version>
<antlr.version>4.8</antlr.version>
<fasterxml.spark3.version>2.13.3</fasterxml.spark3.version>
<fasterxml.version>${fasterxml.spark3.version}</fasterxml.version>
@@ -2208,6 +2236,9 @@
<profile>
<id>flink1.15</id>
+ <properties>
+ <orc.flink.version>1.5.6</orc.flink.version>
+ </properties>
<activation>
<property>
<name>flink1.15</name>
@@ -2220,6 +2251,7 @@
<flink.version>${flink1.14.version}</flink.version>
<hudi.flink.module>hudi-flink1.14.x</hudi.flink.module>
<flink.bundle.version>1.14</flink.bundle.version>
+ <orc.flink.version>1.5.6</orc.flink.version>
<flink.table.runtime.artifactId>flink-table-runtime_${scala.binary.version}</flink.table.runtime.artifactId>
<flink.table.planner.artifactId>flink-table-planner_${scala.binary.version}</flink.table.planner.artifactId>
<flink.parquet.artifactId>flink-parquet_${scala.binary.version}</flink.parquet.artifactId>
@@ -2242,6 +2274,7 @@
<flink.version>${flink1.13.version}</flink.version>
<hudi.flink.module>hudi-flink1.13.x</hudi.flink.module>
<flink.bundle.version>1.13</flink.bundle.version>
+ <orc.flink.version>1.5.6</orc.flink.version>
<flink.runtime.artifactId>flink-runtime_${scala.binary.version}</flink.runtime.artifactId>
<flink.table.runtime.artifactId>flink-table-runtime-blink_${scala.binary.version}</flink.table.runtime.artifactId>
<flink.table.planner.artifactId>flink-table-planner-blink_${scala.binary.version}</flink.table.planner.artifactId>