This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new dae15d6 [SPARK-31580][BUILD] Upgrade Apache ORC to 1.5.10
dae15d6 is described below
commit dae15d68d3feddd08aab4b5b831267d4f11b730a
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Mon Apr 27 18:56:30 2020 -0700
[SPARK-31580][BUILD] Upgrade Apache ORC to 1.5.10
### What changes were proposed in this pull request?
This PR aims to upgrade Apache ORC to 1.5.10.
### Why are the changes needed?
Apache ORC 1.5.10 is a maintenance release with the following patches.
- [ORC-621](https://issues.apache.org/jira/browse/ORC-621) Need reader fix
for ORC-569
- [ORC-616](https://issues.apache.org/jira/browse/ORC-616) In Patched Base
encoding, the value of headerThirdByte goes beyond the range of byte
- [ORC-613](https://issues.apache.org/jira/browse/ORC-613)
OrcMapredRecordReader mis-reuse struct object when actual children schema
differs
- [ORC-610](https://issues.apache.org/jira/browse/ORC-610) Updated
Copyright year in the NOTICE file
The following is release note.
-
https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12318320&version=12346912
### Does this PR introduce any user-facing change?
No.
### How was this patch tested?
Pass the Jenkins with the existing ORC tests and a newly added test case.
- The first commit is already tested in `hive-2.3` profile with both native
ORC implementation and Hive 2.3 ORC implementation.
(https://github.com/apache/spark/pull/28373#issuecomment-620265114)
- The latest run is about to make the test case disable in `hive-1.2`
profile which doesn't use Apache ORC.
- `hive-1.2`:
https://github.com/apache/spark/pull/28373#issuecomment-620325906
Closes #28373 from dongjoon-hyun/SPARK-ORC-1.5.10.
Authored-by: Dongjoon Hyun <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
(cherry picked from commit 79eaaaf6daeff6b048a81f4ef60fcc48395a2772)
Signed-off-by: Dongjoon Hyun <[email protected]>
---
dev/deps/spark-deps-hadoop-2.7-hive-1.2 | 6 +++---
dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 6 +++---
dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 6 +++---
pom.xml | 2 +-
.../test-data/TestStringDictionary.testRowIndex.orc | Bin 0 -> 74580 bytes
.../sql/execution/datasources/orc/OrcSourceSuite.scala | 6 ++++++
.../apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala | 7 +++++++
7 files changed, 23 insertions(+), 10 deletions(-)
diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-1.2
b/dev/deps/spark-deps-hadoop-2.7-hive-1.2
index 6e8368b..82b0115 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-1.2
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-1.2
@@ -160,9 +160,9 @@ objenesis/2.5.1//objenesis-2.5.1.jar
okhttp/3.12.6//okhttp-3.12.6.jar
okio/1.15.0//okio-1.15.0.jar
opencsv/2.3//opencsv-2.3.jar
-orc-core/1.5.9/nohive/orc-core-1.5.9-nohive.jar
-orc-mapreduce/1.5.9/nohive/orc-mapreduce-1.5.9-nohive.jar
-orc-shims/1.5.9//orc-shims-1.5.9.jar
+orc-core/1.5.10/nohive/orc-core-1.5.10-nohive.jar
+orc-mapreduce/1.5.10/nohive/orc-mapreduce-1.5.10-nohive.jar
+orc-shims/1.5.10//orc-shims-1.5.10.jar
oro/2.0.8//oro-2.0.8.jar
osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
paranamer/2.8//paranamer-2.8.jar
diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index beb6c83..fb2b894 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -175,9 +175,9 @@ objenesis/2.5.1//objenesis-2.5.1.jar
okhttp/3.12.6//okhttp-3.12.6.jar
okio/1.15.0//okio-1.15.0.jar
opencsv/2.3//opencsv-2.3.jar
-orc-core/1.5.9//orc-core-1.5.9.jar
-orc-mapreduce/1.5.9//orc-mapreduce-1.5.9.jar
-orc-shims/1.5.9//orc-shims-1.5.9.jar
+orc-core/1.5.10//orc-core-1.5.10.jar
+orc-mapreduce/1.5.10//orc-mapreduce-1.5.10.jar
+orc-shims/1.5.10//orc-shims-1.5.10.jar
oro/2.0.8//oro-2.0.8.jar
osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
paranamer/2.8//paranamer-2.8.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index 69c7cdf..db74c89 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -190,9 +190,9 @@ okhttp/2.7.5//okhttp-2.7.5.jar
okhttp/3.12.6//okhttp-3.12.6.jar
okio/1.15.0//okio-1.15.0.jar
opencsv/2.3//opencsv-2.3.jar
-orc-core/1.5.9//orc-core-1.5.9.jar
-orc-mapreduce/1.5.9//orc-mapreduce-1.5.9.jar
-orc-shims/1.5.9//orc-shims-1.5.9.jar
+orc-core/1.5.10//orc-core-1.5.10.jar
+orc-mapreduce/1.5.10//orc-mapreduce-1.5.10.jar
+orc-shims/1.5.10//orc-shims-1.5.10.jar
oro/2.0.8//oro-2.0.8.jar
osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
paranamer/2.8//paranamer-2.8.jar
diff --git a/pom.xml b/pom.xml
index 0547973..f6eca3b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -135,7 +135,7 @@
<kafka.version>2.4.1</kafka.version>
<derby.version>10.12.1.1</derby.version>
<parquet.version>1.10.1</parquet.version>
- <orc.version>1.5.9</orc.version>
+ <orc.version>1.5.10</orc.version>
<orc.classifier></orc.classifier>
<hive.parquet.group>com.twitter</hive.parquet.group>
<hive.parquet.version>1.6.0</hive.parquet.version>
diff --git
a/sql/core/src/test/resources/test-data/TestStringDictionary.testRowIndex.orc
b/sql/core/src/test/resources/test-data/TestStringDictionary.testRowIndex.orc
new file mode 100644
index 0000000..cba483d
Binary files /dev/null and
b/sql/core/src/test/resources/test-data/TestStringDictionary.testRowIndex.orc
differ
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
index 806f0d4..7387368 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
@@ -589,4 +589,10 @@ class OrcSourceSuite extends OrcSuite with
SharedSparkSession {
test("SPARK-11412 read and merge orc schemas in parallel") {
testMergeSchemasInParallel(OrcUtils.readOrcSchemasInParallel)
}
+
+ test("SPARK-31580: Read a file written before ORC-569") {
+ // Test ORC file came from ORC-621
+ val df =
readResourceOrcFile("test-data/TestStringDictionary.testRowIndex.orc")
+ assert(df.where("str < 'row 001000'").count() === 1000)
+ }
}
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
index f3e712d..91fd8a4 100644
---
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
+++
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
@@ -320,4 +320,11 @@ class HiveOrcSourceSuite extends OrcSuite with
TestHiveSingleton {
}
}
}
+
+ test("SPARK-31580: Read a file written before ORC-569") {
+ assume(HiveUtils.isHive23) // Hive 1.2 doesn't use Apache ORC
+ // Test ORC file came from ORC-621
+ val df =
readResourceOrcFile("test-data/TestStringDictionary.testRowIndex.orc")
+ assert(df.where("str < 'row 001000'").count() === 1000)
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]