This is an automated email from the ASF dual-hosted git repository.

wenningd pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 473be87aa5 Disable EmrFS file metadata caching and EMR Spark's data 
prefetcher feature
     new f52b93fd10 Merge pull request #6154 from 
rahil-c/rahil-c/disable-emrSpark-properties
473be87aa5 is described below

commit 473be87aa5d71939c2e8a367851b0e3b96744bc0
Author: Rahil Chertara <[email protected]>
AuthorDate: Wed Jul 20 17:04:00 2022 -0700

    Disable EmrFS file metadata caching and EMR Spark's data prefetcher feature
---
 .../src/main/scala/org/apache/hudi/DefaultSource.scala                 | 3 +++
 1 file changed, 3 insertions(+)

diff --git 
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala
 
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala
index 484debbb81..af8fb1b61c 100644
--- 
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala
+++ 
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala
@@ -56,6 +56,9 @@ class DefaultSource extends RelationProvider
       // Enable "passPartitionByAsOptions" to support "write.partitionBy(...)"
       
spark.conf.set("spark.sql.legacy.sources.write.passPartitionByAsOptions", 
"true")
     }
+    // Revisit EMR Spark and EMRFS incompatibilities, for now disable
+    spark.conf.set("spark.sql.dataPrefetch.enabled", "false")
+    
spark.sparkContext.hadoopConfiguration.set("fs.s3.metadata.cache.expiration.seconds",
 "0")
   }
 
   private val log = LogManager.getLogger(classOf[DefaultSource])

Reply via email to