This is an automated email from the ASF dual-hosted git repository.
yangzy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 91800b43fa [GLUTEN-8616] [VL] Make filescan limit for encrypted
fallback as configurable (#8621)
91800b43fa is described below
commit 91800b43fa1703517a92cf19150b7261891ced8b
Author: Arnav Balyan <[email protected]>
AuthorDate: Fri Feb 7 18:19:29 2025 +0530
[GLUTEN-8616] [VL] Make filescan limit for encrypted fallback as
configurable (#8621)
---
.../org/apache/gluten/backendsapi/velox/VeloxBackend.scala | 7 ++++++-
.../scala/org/apache/gluten/utils/ParquetMetadataUtils.scala | 5 +++--
.../main/scala/org/apache/gluten/config/GlutenConfig.scala | 12 ++++++++++++
3 files changed, 21 insertions(+), 3 deletions(-)
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
index 61ed9a2de1..56d6fb2e65 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
@@ -200,8 +200,13 @@ object VeloxBackendSettings extends BackendSettingsApi {
return None
}
+ val fileLimit = GlutenConfig.get.parquetEncryptionValidationFileLimit
val encryptionResult =
- ParquetMetadataUtils.validateEncryption(format, rootPaths,
serializableHadoopConf)
+ ParquetMetadataUtils.validateEncryption(
+ format,
+ rootPaths,
+ serializableHadoopConf,
+ fileLimit)
if (encryptionResult.ok()) {
None
} else {
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala
b/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala
index 9f43575cf9..48d0629268 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala
@@ -46,7 +46,8 @@ object ParquetMetadataUtils {
def validateEncryption(
format: ReadFileFormat,
rootPaths: Seq[String],
- serializableHadoopConf: Option[SerializableConfiguration]
+ serializableHadoopConf: Option[SerializableConfiguration],
+ fileLimit: Int
): ValidationResult = {
if (format != ParquetReadFormat || rootPaths.isEmpty) {
return ValidationResult.succeeded
@@ -59,7 +60,7 @@ object ParquetMetadataUtils {
val fs = new Path(rootPath).getFileSystem(conf)
try {
val encryptionDetected =
- checkForEncryptionWithLimit(fs, new Path(rootPath), conf,
fileLimit = 10)
+ checkForEncryptionWithLimit(fs, new Path(rootPath), conf,
fileLimit = fileLimit)
if (encryptionDetected) {
return ValidationResult.failed("Encrypted Parquet file detected.")
}
diff --git
a/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
b/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
index 1cdc3d552a..e9e2dbac54 100644
--- a/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
+++ b/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
@@ -503,6 +503,8 @@ class GlutenConfig(conf: SQLConf) extends Logging {
def autoAdjustStageFallenNodeThreshold: Double =
getConf(AUTO_ADJUST_STAGE_RESOURCES_FALLEN_NODE_RATIO_THRESHOLD)
+
+ def parquetEncryptionValidationFileLimit: Int =
getConf(ENCRYPTED_PARQUET_FALLBACK_FILE_LIMIT)
}
object GlutenConfig {
@@ -2310,4 +2312,14 @@ object GlutenConfig {
"count exceeds the total node count ratio.")
.doubleConf
.createWithDefault(0.5d)
+
+ val ENCRYPTED_PARQUET_FALLBACK_FILE_LIMIT =
+ buildConf("spark.gluten.sql.fallbackEncryptedParquet.limit")
+ .internal()
+ .doc("If supplied, `limit` number of files will be checked to determine
encryption " +
+ "and falling back java scan")
+ .intConf
+ .checkValue(_ > 0, s"must be positive.")
+ .createWithDefault(10)
+
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]