This is an automated email from the ASF dual-hosted git repository.

ulyssesyou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new ec6db12dd [VL] Allow user to specify os to load corresponding 
third-party libraries (#5549)
ec6db12dd is described below

commit ec6db12dd9227d2154be9be71e6607237ded45df
Author: Xiduo You <[email protected]>
AuthorDate: Fri Apr 26 18:24:21 2024 +0800

    [VL] Allow user to specify os to load corresponding third-party libraries 
(#5549)
    
    ## What changes were proposed in this pull request?
    
    Some system OS are derived from Centos or Ubuntu and it's hard to add it to 
codebase one by one. This pr adds new configs to specify system os and version 
manually.
    - spark.gluten.loadLibOS
    - spark.gluten.loadLibOSVersion
    
    ## How was this patch tested?
    
    N/A
---
 .../backendsapi/velox/VeloxListenerApi.scala       | 48 +++++++++++++++-------
 docs/Configuration.md                              |  2 +
 .../scala/org/apache/gluten/GlutenConfig.scala     |  2 +
 3 files changed, 37 insertions(+), 15 deletions(-)

diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala
index 9f1cde2be..1eaf92b5a 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala
@@ -60,19 +60,11 @@ class VeloxListenerApi extends ListenerApi {
 
   override def onExecutorShutdown(): Unit = shutdown()
 
-  private def loadLibFromJar(load: JniLibLoader): Unit = {
-    val system = "cat /etc/os-release".!!
-    val systemNamePattern = "^NAME=\"?(.*)\"?".r
-    val systemVersionPattern = "^VERSION=\"?(.*)\"?".r
-    val systemInfoLines = system.stripMargin.split("\n")
-    val systemNamePattern(systemName) =
-      systemInfoLines.find(_.startsWith("NAME=")).getOrElse("")
-    val systemVersionPattern(systemVersion) =
-      systemInfoLines.find(_.startsWith("VERSION=")).getOrElse("")
-    if (systemName.isEmpty || systemVersion.isEmpty) {
-      throw new GlutenException("Failed to get OS name and version info.")
-    }
-    val loader = if (systemName.contains("Ubuntu") && 
systemVersion.startsWith("20.04")) {
+  private def getLibraryLoaderForOS(
+      systemName: String,
+      systemVersion: String,
+      system: String): SharedLibraryLoader = {
+    if (systemName.contains("Ubuntu") && systemVersion.startsWith("20.04")) {
       new SharedLibraryLoaderUbuntu2004
     } else if (systemName.contains("Ubuntu") && 
systemVersion.startsWith("22.04")) {
       new SharedLibraryLoaderUbuntu2204
@@ -100,11 +92,37 @@ class VeloxListenerApi extends ListenerApi {
       new SharedLibraryLoaderDebian12
     } else {
       throw new GlutenException(
-        "Found unsupported OS! Currently, Gluten's Velox backend" +
+        s"Found unsupported OS($systemName, $systemVersion)! Currently, 
Gluten's Velox backend" +
           " only supports Ubuntu 20.04/22.04, CentOS 7/8, " +
           "Alibaba Cloud Linux 2/3 & Anolis 7/8, tencentos 3.2, RedHat 7/8, " +
           "Debian 11/12.")
     }
+  }
+
+  private def loadLibFromJar(load: JniLibLoader, conf: SparkConf): Unit = {
+    val systemName = conf.getOption(GlutenConfig.GLUTEN_LOAD_LIB_OS)
+    val loader = if (systemName.isDefined) {
+      val systemVersion = 
conf.getOption(GlutenConfig.GLUTEN_LOAD_LIB_OS_VERSION)
+      if (systemVersion.isEmpty) {
+        throw new GlutenException(
+          s"${GlutenConfig.GLUTEN_LOAD_LIB_OS_VERSION} must be specified when 
specifies the " +
+            s"${GlutenConfig.GLUTEN_LOAD_LIB_OS}")
+      }
+      getLibraryLoaderForOS(systemName.get, systemVersion.get, "")
+    } else {
+      val system = "cat /etc/os-release".!!
+      val systemNamePattern = "^NAME=\"?(.*)\"?".r
+      val systemVersionPattern = "^VERSION=\"?(.*)\"?".r
+      val systemInfoLines = system.stripMargin.split("\n")
+      val systemNamePattern(systemName) =
+        systemInfoLines.find(_.startsWith("NAME=")).getOrElse("")
+      val systemVersionPattern(systemVersion) =
+        systemInfoLines.find(_.startsWith("VERSION=")).getOrElse("")
+      if (systemName.isEmpty || systemVersion.isEmpty) {
+        throw new GlutenException("Failed to get OS name and version info.")
+      }
+      getLibraryLoaderForOS(systemName, systemVersion, system)
+    }
     loader.loadLib(load)
   }
 
@@ -114,7 +132,7 @@ class VeloxListenerApi extends ListenerApi {
         GlutenConfig.GLUTEN_LOAD_LIB_FROM_JAR,
         GlutenConfig.GLUTEN_LOAD_LIB_FROM_JAR_DEFAULT)
     ) {
-      loadLibFromJar(loader)
+      loadLibFromJar(loader, conf)
     }
     loader
       .newTransaction()
diff --git a/docs/Configuration.md b/docs/Configuration.md
index 715419457..06a766f2d 100644
--- a/docs/Configuration.md
+++ b/docs/Configuration.md
@@ -58,6 +58,8 @@ You can add these configurations into spark-defaults.conf to 
enable or disable t
 | spark.gluten.sql.columnar.maxBatchSize                     | Set the number 
of rows for the output batch.                                                   
                                                                                
                                                                                
                                                                                
                                                                                
               [...]
 | spark.gluten.shuffleWriter.bufferSize                      | Set the number 
of buffer rows for the shuffle writer                                           
                                                                                
                                                                                
                                                                                
                                                                                
               [...]
 | spark.gluten.loadLibFromJar                                | Controls 
whether to load dynamic link library from a packed jar for gluten/cpp. Not 
applicable to static build and clickhouse backend.                              
                                                                                
                                                                                
                                                                                
                          [...]
+| spark.gluten.loadLibOS                                     | When 
`spark.gluten.loadLibFromJar` is true. Manually specify the system os to load 
library, e.g., CentOS                                                           
                                                                                
                                                                                
                                                                                
                           [...]
+| spark.gluten.loadLibOSVersion                              | Manually 
specify the system os version to load library, e.g., if 
`spark.gluten.loadLibOS` is CentOS, this config can be 7                        
                                                                                
                                                                                
                                                                                
                                             [...]
 | spark.gluten.sql.columnar.force.hashagg                    | Force to use 
hash agg to replace sort agg.                                                   
                                                                                
                                                                                
                                                                                
                                                                                
                 [...]
 | spark.gluten.sql.columnar.vanillaReaders                   | Enable vanilla 
spark's vectorized reader. Please note it may bring perf. overhead due to extra 
data transition. We recommend to disable it if most queries can be fully 
offloaded to gluten.                                                            
                                                                                
                                                                                
                      [...]
 | spark.gluten.expression.blacklist                          | A black list of 
expression to skip transform, multiple values separated by commas.              
                                                                                
                                                                                
                                                                                
                                                                                
              [...]
diff --git a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala 
b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
index a55fdc7f2..d19d8875f 100644
--- a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
+++ b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
@@ -511,6 +511,8 @@ object GlutenConfig {
   // where deployed gluten jar is generated through static build (e.g., 
Gluten's release jar).
   val GLUTEN_LOAD_LIB_FROM_JAR = "spark.gluten.loadLibFromJar"
   val GLUTEN_LOAD_LIB_FROM_JAR_DEFAULT = false
+  val GLUTEN_LOAD_LIB_OS = "spark.gluten.loadLibOS"
+  val GLUTEN_LOAD_LIB_OS_VERSION = "spark.gluten.loadLibOSVersion"
 
   // Expired time of execution with resource relation has cached
   val GLUTEN_RESOURCE_RELATION_EXPIRED_TIME = 
"spark.gluten.execution.resource.expired.time"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to