This is an automated email from the ASF dual-hosted git repository.

yangzy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 91800b43fa [GLUTEN-8616] [VL] Make filescan limit for encrypted 
fallback as configurable (#8621)
91800b43fa is described below

commit 91800b43fa1703517a92cf19150b7261891ced8b
Author: Arnav Balyan <[email protected]>
AuthorDate: Fri Feb 7 18:19:29 2025 +0530

    [GLUTEN-8616] [VL] Make filescan limit for encrypted fallback as 
configurable (#8621)
---
 .../org/apache/gluten/backendsapi/velox/VeloxBackend.scala   |  7 ++++++-
 .../scala/org/apache/gluten/utils/ParquetMetadataUtils.scala |  5 +++--
 .../main/scala/org/apache/gluten/config/GlutenConfig.scala   | 12 ++++++++++++
 3 files changed, 21 insertions(+), 3 deletions(-)

diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
index 61ed9a2de1..56d6fb2e65 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
@@ -200,8 +200,13 @@ object VeloxBackendSettings extends BackendSettingsApi {
         return None
       }
 
+      val fileLimit = GlutenConfig.get.parquetEncryptionValidationFileLimit
       val encryptionResult =
-        ParquetMetadataUtils.validateEncryption(format, rootPaths, 
serializableHadoopConf)
+        ParquetMetadataUtils.validateEncryption(
+          format,
+          rootPaths,
+          serializableHadoopConf,
+          fileLimit)
       if (encryptionResult.ok()) {
         None
       } else {
diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala
index 9f43575cf9..48d0629268 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala
@@ -46,7 +46,8 @@ object ParquetMetadataUtils {
   def validateEncryption(
       format: ReadFileFormat,
       rootPaths: Seq[String],
-      serializableHadoopConf: Option[SerializableConfiguration]
+      serializableHadoopConf: Option[SerializableConfiguration],
+      fileLimit: Int
   ): ValidationResult = {
     if (format != ParquetReadFormat || rootPaths.isEmpty) {
       return ValidationResult.succeeded
@@ -59,7 +60,7 @@ object ParquetMetadataUtils {
         val fs = new Path(rootPath).getFileSystem(conf)
         try {
           val encryptionDetected =
-            checkForEncryptionWithLimit(fs, new Path(rootPath), conf, 
fileLimit = 10)
+            checkForEncryptionWithLimit(fs, new Path(rootPath), conf, 
fileLimit = fileLimit)
           if (encryptionDetected) {
             return ValidationResult.failed("Encrypted Parquet file detected.")
           }
diff --git 
a/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala 
b/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
index 1cdc3d552a..e9e2dbac54 100644
--- a/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
+++ b/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
@@ -503,6 +503,8 @@ class GlutenConfig(conf: SQLConf) extends Logging {
 
   def autoAdjustStageFallenNodeThreshold: Double =
     getConf(AUTO_ADJUST_STAGE_RESOURCES_FALLEN_NODE_RATIO_THRESHOLD)
+
+  def parquetEncryptionValidationFileLimit: Int = 
getConf(ENCRYPTED_PARQUET_FALLBACK_FILE_LIMIT)
 }
 
 object GlutenConfig {
@@ -2310,4 +2312,14 @@ object GlutenConfig {
         "count exceeds the total node count ratio.")
       .doubleConf
       .createWithDefault(0.5d)
+
+  val ENCRYPTED_PARQUET_FALLBACK_FILE_LIMIT =
+    buildConf("spark.gluten.sql.fallbackEncryptedParquet.limit")
+      .internal()
+      .doc("If supplied, `limit` number of files will be checked to determine 
encryption " +
+        "and falling back java scan")
+      .intConf
+      .checkValue(_ > 0, s"must be positive.")
+      .createWithDefault(10)
+
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to