This is an automated email from the ASF dual-hosted git repository.
lesun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-gobblin.git
The following commit(s) were added to refs/heads/master by this push:
new d112ac6 [GOBBLIN-1265] Switch the dependency of gobblin-orc with the
shadow jar instead
d112ac6 is described below
commit d112ac6b93091ddea2a703299e762d8f1ae6c526
Author: Lei Sun <[email protected]>
AuthorDate: Thu Sep 17 18:05:56 2020 -0700
[GOBBLIN-1265] Switch the dependency of gobblin-orc with the shadow jar
instead
Switch the dependency of gobblin-orc with the
shadow jar instead
Fix build and test for compaction module
Closes #3106 from autumnust/fixShadowDepsInORC
---
gobblin-compaction/build.gradle | 11 +++--------
.../writer/GenericRecordToOrcValueWriter.java | 23 +++++++++++-----------
.../apache/gobblin/writer/GobblinOrcWriter.java | 20 +++++++++----------
.../org/apache/gobblin/writer/OrcValueWriter.java | 2 +-
.../writer/GenericRecordToOrcValueWriterTest.java | 2 +-
.../gobblin/writer/GobblinOrcWriterTest.java | 4 ++--
gradle/scripts/dependencyDefinitions.gradle | 7 +++++--
7 files changed, 34 insertions(+), 35 deletions(-)
diff --git a/gobblin-compaction/build.gradle b/gobblin-compaction/build.gradle
index 4e607ad..d24019f 100644
--- a/gobblin-compaction/build.gradle
+++ b/gobblin-compaction/build.gradle
@@ -26,17 +26,11 @@ dependencies {
compile project(":gobblin-data-management")
compile project(":gobblin-runtime")
compile project(":gobblin-modules:gobblin-kafka-common")
+ compile externalDependency.orcMapreduce
+ compile externalDependency.orcCore
- // Given orc-mapreduce depends on hive version of hive-storage-api(2.4.0)
and conflicted
- // with hive-exec-core in older version(1.0.1), we need to shadow
ord-mapreduce's transitive deps.
- // and include direct orc-mapreduce library just as a compileOnly dependency
- compileOnly externalDependency.orcMapreduce
compile externalDependency.orcTools
- testCompileOnly externalDependency.orcMapreduce
- compile project(path: ":gobblin-modules:gobblin-orc-dep",
configuration:"shadow")
-
compile externalDependency.calciteCore
- testCompile externalDependency.calciteAvatica
compile externalDependency.jhyde
compile externalDependency.avro
compile externalDependency.commonsLang
@@ -61,6 +55,7 @@ dependencies {
runtimeOnly externalDependency.datanucleusRdbms
testCompile externalDependency.testng
+ testCompile externalDependency.calciteAvatica
}
diff --git
a/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GenericRecordToOrcValueWriter.java
b/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GenericRecordToOrcValueWriter.java
index 67352c7..21a05a5 100644
---
a/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GenericRecordToOrcValueWriter.java
+++
b/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GenericRecordToOrcValueWriter.java
@@ -31,18 +31,19 @@ import org.apache.avro.generic.GenericEnumSymbol;
import org.apache.avro.generic.GenericFixed;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.util.Utf8;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
import org.apache.orc.TypeDescription;
+import org.apache.orc.storage.common.type.HiveDecimal;
+import org.apache.orc.storage.ql.exec.vector.BytesColumnVector;
+import org.apache.orc.storage.ql.exec.vector.ColumnVector;
+import org.apache.orc.storage.ql.exec.vector.DecimalColumnVector;
+import org.apache.orc.storage.ql.exec.vector.DoubleColumnVector;
+import org.apache.orc.storage.ql.exec.vector.ListColumnVector;
+import org.apache.orc.storage.ql.exec.vector.LongColumnVector;
+import org.apache.orc.storage.ql.exec.vector.MapColumnVector;
+import org.apache.orc.storage.ql.exec.vector.StructColumnVector;
+import org.apache.orc.storage.ql.exec.vector.UnionColumnVector;
+import org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch;
import com.google.common.annotations.VisibleForTesting;
diff --git
a/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GobblinOrcWriter.java
b/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GobblinOrcWriter.java
index 22957b0..0e07c0e 100644
---
a/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GobblinOrcWriter.java
+++
b/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GobblinOrcWriter.java
@@ -24,22 +24,22 @@ import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.avro.AvroObjectInspectorGenerator;
import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils;
import org.apache.orc.OrcFile;
import org.apache.orc.TypeDescription;
import org.apache.orc.Writer;
+import org.apache.orc.storage.ql.exec.vector.BytesColumnVector;
+import org.apache.orc.storage.ql.exec.vector.ColumnVector;
+import org.apache.orc.storage.ql.exec.vector.DecimalColumnVector;
+import org.apache.orc.storage.ql.exec.vector.DoubleColumnVector;
+import org.apache.orc.storage.ql.exec.vector.ListColumnVector;
+import org.apache.orc.storage.ql.exec.vector.LongColumnVector;
+import org.apache.orc.storage.ql.exec.vector.MapColumnVector;
+import org.apache.orc.storage.ql.exec.vector.StructColumnVector;
+import org.apache.orc.storage.ql.exec.vector.UnionColumnVector;
+import org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch;
import com.google.common.annotations.VisibleForTesting;
diff --git
a/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/OrcValueWriter.java
b/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/OrcValueWriter.java
index e500da7..4815de1 100644
---
a/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/OrcValueWriter.java
+++
b/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/OrcValueWriter.java
@@ -19,7 +19,7 @@ package org.apache.gobblin.writer;
import java.io.IOException;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch;
/**
diff --git
a/gobblin-modules/gobblin-orc/src/test/java/org/apache/gobblin/writer/GenericRecordToOrcValueWriterTest.java
b/gobblin-modules/gobblin-orc/src/test/java/org/apache/gobblin/writer/GenericRecordToOrcValueWriterTest.java
index 4660369..7389991 100644
---
a/gobblin-modules/gobblin-orc/src/test/java/org/apache/gobblin/writer/GenericRecordToOrcValueWriterTest.java
+++
b/gobblin-modules/gobblin-orc/src/test/java/org/apache/gobblin/writer/GenericRecordToOrcValueWriterTest.java
@@ -30,7 +30,6 @@ import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
@@ -40,6 +39,7 @@ import org.apache.orc.TypeDescription;
import org.apache.orc.Writer;
import org.apache.orc.mapred.OrcStruct;
import org.apache.orc.mapred.OrcUnion;
+import org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch;
import org.testng.Assert;
import org.testng.annotations.Test;
diff --git
a/gobblin-modules/gobblin-orc/src/test/java/org/apache/gobblin/writer/GobblinOrcWriterTest.java
b/gobblin-modules/gobblin-orc/src/test/java/org/apache/gobblin/writer/GobblinOrcWriterTest.java
index 5a31530..f3bd2dc 100644
---
a/gobblin-modules/gobblin-orc/src/test/java/org/apache/gobblin/writer/GobblinOrcWriterTest.java
+++
b/gobblin-modules/gobblin-orc/src/test/java/org/apache/gobblin/writer/GobblinOrcWriterTest.java
@@ -30,9 +30,9 @@ import org.apache.avro.io.DecoderFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
import org.apache.hadoop.io.Writable;
+import org.apache.orc.storage.ql.exec.vector.BytesColumnVector;
+import org.apache.orc.storage.ql.exec.vector.ListColumnVector;
import org.mockito.Mockito;
import org.testng.Assert;
import org.testng.annotations.Test;
diff --git a/gradle/scripts/dependencyDefinitions.gradle
b/gradle/scripts/dependencyDefinitions.gradle
index 2ba9eb2..6a165f8 100644
--- a/gradle/scripts/dependencyDefinitions.gradle
+++ b/gradle/scripts/dependencyDefinitions.gradle
@@ -171,8 +171,11 @@ ext.externalDependency = [
"opencsv": "com.opencsv:opencsv:3.8",
"grok": "io.thekraken:grok:0.1.5",
"hadoopAdl" : "org.apache.hadoop:hadoop-azure-datalake:3.0.0-alpha2",
- "orcMapreduce":"org.apache.orc:orc-mapreduce:1.6.3",
- "orcCore": "org.apache.orc:orc-core:1.6.3",
+ /**
+ * Avoiding conflicts with Hive 1.x versions existed in the classpath
+ */
+ "orcMapreduce":"org.apache.orc:orc-mapreduce:1.6.3:nohive",
+ "orcCore": "org.apache.orc:orc-core:1.6.3:nohive",
"orcTools":"org.apache.orc:orc-tools:1.6.3",
'parquet': 'org.apache.parquet:parquet-hadoop:1.10.1',
'parquetAvro': 'org.apache.parquet:parquet-avro:1.10.1',