This is an automated email from the ASF dual-hosted git repository. yihua pushed a commit to branch release-0.14.1-spark35-scala213 in repository https://gitbox.apache.org/repos/asf/hudi.git
commit 0063da8b0562edcf77216be2a193fae3e9fba496 Author: Lin Liu <[email protected]> AuthorDate: Tue Jan 16 14:24:23 2024 -0800 [MINOR] Clean default Hadoop configuration values in tests (#10495) * [MINOR] Clean default Hadoop configurations for SparkContext These default Hadoop configurations are not used in Hudi tests. * Consolidating the code into a helper class --------- Co-authored-by: vinoth chandar <[email protected]> --- .../org/apache/hudi/testutils/HoodieClientTestUtils.java | 14 ++++++++++++++ .../hudi/testutils/HoodieSparkClientTestHarness.java | 9 ++++++--- .../hudi/testutils/SparkClientFunctionalTestHarness.java | 1 + 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java index a7808ea9382..d7eb97f31a8 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java @@ -53,6 +53,7 @@ import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.HFileScanner; import org.apache.spark.SparkConf; +import org.apache.spark.SparkContext; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; @@ -61,6 +62,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; +import java.lang.reflect.Field; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -124,6 +126,18 @@ public class HoodieClientTestUtils { return SparkRDDReadClient.addHoodieSupport(sparkConf); } + public static void overrideSparkHadoopConfiguration(SparkContext sparkContext) { + try { + // Clean the default Hadoop configurations since in our Hudi tests they are not used. + Field hadoopConfigurationField = sparkContext.getClass().getDeclaredField("_hadoopConfiguration"); + hadoopConfigurationField.setAccessible(true); + Configuration testHadoopConfig = new Configuration(false); + hadoopConfigurationField.set(sparkContext, testHadoopConfig); + } catch (NoSuchFieldException | IllegalAccessException e) { + LOG.warn(e.getMessage()); + } + } + private static HashMap<String, String> getLatestFileIDsToFullPath(String basePath, HoodieTimeline commitTimeline, List<HoodieInstant> commitsToReturn) throws IOException { HashMap<String, String> fileIdToFullPath = new HashMap<>(); diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java index 299c4ab4b79..b9b2fe2c869 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java @@ -70,6 +70,8 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; +import org.apache.spark.SparkConf; +import org.apache.spark.SparkContext; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SQLContext; @@ -192,11 +194,12 @@ public abstract class HoodieSparkClientTestHarness extends HoodieWriterClientTes } // Initialize a local spark env - jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest(appName + "#" + testMethodName)); + SparkConf sc = HoodieClientTestUtils.getSparkConfForTest(appName + "#" + testMethodName); + SparkContext sparkContext = new SparkContext(sc); + HoodieClientTestUtils.overrideSparkHadoopConfiguration(sparkContext); + jsc = new JavaSparkContext(sparkContext); jsc.setLogLevel("ERROR"); - hadoopConf = jsc.hadoopConfiguration(); - sparkSession = SparkSession.builder() .withExtensions(JFunction.toScala(sparkSessionExtensions -> { sparkSessionExtensionsInjector.ifPresent(injector -> injector.accept(sparkSessionExtensions)); diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkClientFunctionalTestHarness.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkClientFunctionalTestHarness.java index 511613d9044..14d325bfdac 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkClientFunctionalTestHarness.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkClientFunctionalTestHarness.java @@ -201,6 +201,7 @@ public class SparkClientFunctionalTestHarness implements SparkProvider, HoodieMe SparkRDDReadClient.addHoodieSupport(sparkConf); spark = SparkSession.builder().config(sparkConf).getOrCreate(); sqlContext = spark.sqlContext(); + HoodieClientTestUtils.overrideSparkHadoopConfiguration(spark.sparkContext()); jsc = new JavaSparkContext(spark.sparkContext()); context = new HoodieSparkEngineContext(jsc); timelineService = HoodieClientTestUtils.initTimelineService(
