This is an automated email from the ASF dual-hosted git repository.
wenningd pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 473be87aa5 Disable EmrFS file metadata caching and EMR Spark's data
prefetcher feature
new f52b93fd10 Merge pull request #6154 from
rahil-c/rahil-c/disable-emrSpark-properties
473be87aa5 is described below
commit 473be87aa5d71939c2e8a367851b0e3b96744bc0
Author: Rahil Chertara <[email protected]>
AuthorDate: Wed Jul 20 17:04:00 2022 -0700
Disable EmrFS file metadata caching and EMR Spark's data prefetcher feature
---
.../src/main/scala/org/apache/hudi/DefaultSource.scala | 3 +++
1 file changed, 3 insertions(+)
diff --git
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala
index 484debbb81..af8fb1b61c 100644
---
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala
+++
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala
@@ -56,6 +56,9 @@ class DefaultSource extends RelationProvider
// Enable "passPartitionByAsOptions" to support "write.partitionBy(...)"
spark.conf.set("spark.sql.legacy.sources.write.passPartitionByAsOptions",
"true")
}
+ // Revisit EMR Spark and EMRFS incompatibilities, for now disable
+ spark.conf.set("spark.sql.dataPrefetch.enabled", "false")
+
spark.sparkContext.hadoopConfiguration.set("fs.s3.metadata.cache.expiration.seconds",
"0")
}
private val log = LogManager.getLogger(classOf[DefaultSource])