This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-3.5
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push:
new 4f9dbc33627e [SPARK-49066][SQL][TESTS][3.5] Refactor
`OrcEncryptionSuite` and make `spark.hadoop.hadoop.security.key.provider.path`
effective only within `OrcEncryptionSuite`
4f9dbc33627e is described below
commit 4f9dbc33627e956a83e757211a73a6895103d264
Author: yangjie01 <[email protected]>
AuthorDate: Thu Aug 1 12:22:44 2024 -0700
[SPARK-49066][SQL][TESTS][3.5] Refactor `OrcEncryptionSuite` and make
`spark.hadoop.hadoop.security.key.provider.path` effective only within
`OrcEncryptionSuite`
### What changes were proposed in this pull request?
This pr moves the global scope test configuration
`spark.hadoop.hadoop.security.key.provider.path`, which is configured in the
parent `pom.xml` and `SparkBuild.scala`, to `OrcEncryptionSuite` to ensure that
it is effective only within `OrcEncryptionSuite`.
To achieve this, the pr also refactors `OrcEncryptionSuite`:
1. Overrides `beforeAll` to back up the contents of
`CryptoUtils#keyProviderCache`.
2. Overrides `afterAll` to restore the contents of
`CryptoUtils#keyProviderCache`.
This ensures that `CryptoUtils#keyProviderCache` is isolated during the
test process of `OrcEncryptionSuite`.
### Why are the changes needed?
The test configuration `spark.hadoop.hadoop.security.key.provider.path` in
the parent `pom.xml` and `SparkBuild.scala` is effective globally, which leads
to the possibility that other Orc writing test cases, besides
`OrcEncryptionSuite`, might also be affected by this configuration and use
`test.org.apache.spark.sql.execution.datasources.orc.FakeKeyProvider.Factory`。
### Does this PR introduce _any_ user-facing change?
No, just for test.
### How was this patch tested?
Pass GitHub Actions
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #47561 from LuciferYang/SPARK-49066-3.5.
Authored-by: yangjie01 <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
pom.xml | 3 --
project/SparkBuild.scala | 1 -
.../datasources/orc/OrcEncryptionSuite.scala | 34 +++++++++++++++++++++-
3 files changed, 33 insertions(+), 5 deletions(-)
diff --git a/pom.xml b/pom.xml
index 57bae938891d..61780803afb5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3046,7 +3046,6 @@
<spark.ui.showConsoleProgress>false</spark.ui.showConsoleProgress>
<spark.unsafe.exceptionOnMemoryLeak>true</spark.unsafe.exceptionOnMemoryLeak>
<spark.memory.debugFill>true</spark.memory.debugFill>
-
<spark.hadoop.hadoop.security.key.provider.path>test:///</spark.hadoop.hadoop.security.key.provider.path>
<!-- Needed by sql/hive tests. -->
<test.src.tables>src</test.src.tables>
<hive.conf.validation>false</hive.conf.validation>
@@ -3103,8 +3102,6 @@
<spark.test.docker.removePulledImage>${spark.test.docker.removePulledImage}</spark.test.docker.removePulledImage>
<!-- Needed by sql/hive tests. -->
<test.src.tables>__not_used__</test.src.tables>
- <!--SPARK-42934: Need by `OrcEncryptionSuite` -->
-
<spark.hadoop.hadoop.security.key.provider.path>test:///</spark.hadoop.hadoop.security.key.provider.path>
</systemProperties>
<tagsToExclude>${test.exclude.tags},${test.default.exclude.tags}</tagsToExclude>
<tagsToInclude>${test.include.tags}</tagsToInclude>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 25f04f7bff31..e8c52dc0aff3 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -1600,7 +1600,6 @@ object TestSettings {
(Test / javaOptions) += "-Dspark.ui.enabled=false",
(Test / javaOptions) += "-Dspark.ui.showConsoleProgress=false",
(Test / javaOptions) += "-Dspark.unsafe.exceptionOnMemoryLeak=true",
- (Test / javaOptions) +=
"-Dspark.hadoop.hadoop.security.key.provider.path=test:///",
(Test / javaOptions) += "-Dhive.conf.validation=false",
(Test / javaOptions) += "-Dsun.io.serialization.extendedDebugInfo=false",
(Test / javaOptions) += "-Dderby.system.durability=test",
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala
index b7d29588f6bf..575f230729eb 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala
@@ -17,20 +17,52 @@
package org.apache.spark.sql.execution.datasources.orc
+import java.util.{Map => JMap}
import java.util.Random
-import org.apache.orc.impl.HadoopShimsFactory
+import scala.collection.mutable
+import org.apache.orc.impl.{CryptoUtils, HadoopShimsFactory, KeyProvider}
+
+import org.apache.spark.SparkConf
import org.apache.spark.sql.Row
import org.apache.spark.sql.test.SharedSparkSession
class OrcEncryptionSuite extends OrcTest with SharedSparkSession {
import testImplicits._
+ override def sparkConf: SparkConf = {
+ super.sparkConf.set("spark.hadoop.hadoop.security.key.provider.path",
"test:///")
+ }
+
+ override def beforeAll(): Unit = {
+ // Backup `CryptoUtils#keyProviderCache` and clear it.
+ keyProviderCacheRef.entrySet()
+ .forEach(e => keyProviderCacheBackup.put(e.getKey, e.getValue))
+ keyProviderCacheRef.clear()
+ super.beforeAll()
+ }
+
+ override def afterAll(): Unit = {
+ super.afterAll()
+ // Restore `CryptoUtils#keyProviderCache`.
+ keyProviderCacheRef.clear()
+ keyProviderCacheBackup.foreach { case (k, v) => keyProviderCacheRef.put(k,
v) }
+ }
+
val originalData = Seq(("123456789", "[email protected]", "Dongjoon Hyun"))
val rowDataWithoutKey =
Row(null,
"841626795E7D351555B835A002E3BF10669DE9B81C95A3D59E10865AC37EA7C3", "Dongjoon
Hyun")
+ private val keyProviderCacheBackup: mutable.Map[String, KeyProvider] =
mutable.Map.empty
+
+ private val keyProviderCacheRef: JMap[String, KeyProvider] = {
+ val clazz = classOf[CryptoUtils]
+ val field = clazz.getDeclaredField("keyProviderCache")
+ field.setAccessible(true)
+ field.get(null).asInstanceOf[JMap[String, KeyProvider]]
+ }
+
test("Write and read an encrypted file") {
val conf = spark.sessionState.newHadoopConf()
val provider = HadoopShimsFactory.get.getHadoopKeyProvider(conf, new
Random)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]