This is an automated email from the ASF dual-hosted git repository.
vinoth pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 32ade368d89 [MINOR] Clean default Hadoop configuration values in tests
(#10495)
32ade368d89 is described below
commit 32ade368d899ede5c8e7854863945864604b5692
Author: Lin Liu <[email protected]>
AuthorDate: Tue Jan 16 14:24:23 2024 -0800
[MINOR] Clean default Hadoop configuration values in tests (#10495)
* [MINOR] Clean default Hadoop configurations for SparkContext
These default Hadoop configurations are not used in Hudi tests.
* Consolidating the code into a helper class
---------
Co-authored-by: vinoth chandar <[email protected]>
---
.../org/apache/hudi/testutils/HoodieClientTestUtils.java | 14 ++++++++++++++
.../hudi/testutils/HoodieSparkClientTestHarness.java | 9 ++++++---
.../hudi/testutils/SparkClientFunctionalTestHarness.java | 1 +
3 files changed, 21 insertions(+), 3 deletions(-)
diff --git
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java
index 991c615c35d..55619a2a24b 100644
---
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java
+++
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java
@@ -53,6 +53,7 @@ import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFileScanner;
import org.apache.spark.SparkConf;
+import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
@@ -61,6 +62,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
+import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
@@ -125,6 +127,18 @@ public class HoodieClientTestUtils {
return SparkRDDReadClient.addHoodieSupport(sparkConf);
}
+ public static void overrideSparkHadoopConfiguration(SparkContext
sparkContext) {
+ try {
+ // Clean the default Hadoop configurations since in our Hudi tests they
are not used.
+ Field hadoopConfigurationField =
sparkContext.getClass().getDeclaredField("_hadoopConfiguration");
+ hadoopConfigurationField.setAccessible(true);
+ Configuration testHadoopConfig = new Configuration(false);
+ hadoopConfigurationField.set(sparkContext, testHadoopConfig);
+ } catch (NoSuchFieldException | IllegalAccessException e) {
+ LOG.warn(e.getMessage());
+ }
+ }
+
private static HashMap<String, String> getLatestFileIDsToFullPath(String
basePath, HoodieTimeline commitTimeline,
List<HoodieInstant> commitsToReturn) throws IOException {
HashMap<String, String> fileIdToFullPath = new HashMap<>();
diff --git
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java
index 2a83baa018c..59cfcb4bb6d 100644
---
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java
+++
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java
@@ -69,6 +69,8 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.spark.SparkConf;
+import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SQLContext;
@@ -191,11 +193,12 @@ public abstract class HoodieSparkClientTestHarness
extends HoodieWriterClientTes
}
// Initialize a local spark env
- jsc = new
JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest(appName + "#" +
testMethodName));
+ SparkConf sc = HoodieClientTestUtils.getSparkConfForTest(appName + "#" +
testMethodName);
+ SparkContext sparkContext = new SparkContext(sc);
+ HoodieClientTestUtils.overrideSparkHadoopConfiguration(sparkContext);
+ jsc = new JavaSparkContext(sparkContext);
jsc.setLogLevel("ERROR");
-
hadoopConf = jsc.hadoopConfiguration();
-
sparkSession = SparkSession.builder()
.withExtensions(JFunction.toScala(sparkSessionExtensions -> {
sparkSessionExtensionsInjector.ifPresent(injector ->
injector.accept(sparkSessionExtensions));
diff --git
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkClientFunctionalTestHarness.java
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkClientFunctionalTestHarness.java
index 511613d9044..14d325bfdac 100644
---
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkClientFunctionalTestHarness.java
+++
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkClientFunctionalTestHarness.java
@@ -201,6 +201,7 @@ public class SparkClientFunctionalTestHarness implements
SparkProvider, HoodieMe
SparkRDDReadClient.addHoodieSupport(sparkConf);
spark = SparkSession.builder().config(sparkConf).getOrCreate();
sqlContext = spark.sqlContext();
+
HoodieClientTestUtils.overrideSparkHadoopConfiguration(spark.sparkContext());
jsc = new JavaSparkContext(spark.sparkContext());
context = new HoodieSparkEngineContext(jsc);
timelineService = HoodieClientTestUtils.initTimelineService(