(incubator-gluten) branch main updated: [GLUTEN-10275] Refine exception for not fully supported functions and update generate doc script (#10391)

philo Wed, 13 Aug 2025 17:29:47 -0700

This is an automated email from the ASF dual-hosted git repository.

philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git



The following commit(s) were added to refs/heads/main by this push:
     new e7c7f74846 [GLUTEN-10275] Refine exception for not fully supported 
functions and update generate doc script (#10391)
e7c7f74846 is described below

commit e7c7f7484660ccaa384af17c2570f23d35fbbbde
Author: Rong Ma <[email protected]>
AuthorDate: Thu Aug 14 01:22:21 2025 +0100

    [GLUTEN-10275] Refine exception for not fully supported functions and 
update generate doc script (#10391)
---
 .../backendsapi/velox/VeloxSparkPlanExecApi.scala  |  71 ++++++--
 .../gluten/expression/ExpressionRestrictions.scala | 101 ++++++++++++
 docs/velox-backend-scalar-function-support.md      | 180 ++++++++++-----------
 .../gluten/exception/GlutenExceptionUtil.scala     |  25 +++
 .../gluten/backendsapi/SparkPlanExecApi.scala      |  15 ++
 .../gluten/expression/ExpressionConverter.scala    |  14 +-
 tools/scripts/gen-function-support-docs.py         |  98 ++++++++---
 7 files changed, 373 insertions(+), 131 deletions(-)

diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
index cafea465fd..2bad45ee2e 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
@@ -18,7 +18,7 @@ package org.apache.gluten.backendsapi.velox
 
 import org.apache.gluten.backendsapi.SparkPlanExecApi
 import org.apache.gluten.config.{GlutenConfig, HashShuffleWriterType, 
ReservedKeys, RssSortShuffleWriterType, ShuffleWriterType, 
SortShuffleWriterType, VeloxConfig}
-import org.apache.gluten.exception.GlutenNotSupportException
+import org.apache.gluten.exception.{GlutenExceptionUtil, 
GlutenNotSupportException}
 import org.apache.gluten.execution._
 import org.apache.gluten.expression._
 import org.apache.gluten.expression.aggregate.{HLLAdapter, 
VeloxBloomFilterAggregate, VeloxCollectList, VeloxCollectSet}
@@ -37,6 +37,7 @@ import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions._
 import 
org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, 
CollectList, CollectSet}
+import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
 import org.apache.spark.sql.catalyst.optimizer.BuildSide
 import org.apache.spark.sql.catalyst.plans.JoinType
 import org.apache.spark.sql.catalyst.plans.physical._
@@ -495,6 +496,7 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
       isSkewJoin,
       projectList)
   }
+
   override def genCartesianProductExecTransformer(
       left: SparkPlan,
       right: SparkPlan,
@@ -731,7 +733,10 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
       SQLConf.get.getConf(SQLConf.MAP_KEY_DEDUP_POLICY)
         != SQLConf.MapKeyDedupPolicy.EXCEPTION.toString
     ) {
-      throw new GlutenNotSupportException("Only EXCEPTION policy is 
supported!")
+      GlutenExceptionUtil.throwsNotFullySupported(
+        ExpressionNames.STR_TO_MAP,
+        StrToMapRestrictions.ONLY_SUPPORT_MAP_KEY_DEDUP_POLICY
+      )
     }
     GenericExpressionTransformer(substraitExprName, children, expr)
   }
@@ -753,15 +758,20 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
     if (!enablePartialResults) {
       // Velox only supports partial results mode. We need to fall back this 
when
       // 'spark.sql.json.enablePartialResults' is set to false or not defined.
-      throw new GlutenNotSupportException(
-        s"'from_json' with 'spark.sql.json.enablePartialResults = false' is 
not supported in Velox")
+      GlutenExceptionUtil.throwsNotFullySupported(
+        ExpressionNames.FROM_JSON,
+        FromJsonRestrictions.MUST_ENABLE_PARTIAL_RESULTS
+      )
     }
-    if (!expr.options.isEmpty) {
-      throw new GlutenNotSupportException("'from_json' with options is not 
supported in Velox")
+    if (expr.options.nonEmpty) {
+      GlutenExceptionUtil.throwsNotFullySupported(
+        ExpressionNames.FROM_JSON,
+        FromJsonRestrictions.NOT_SUPPORT_WITH_OPTIONS)
     }
     if (SQLConf.get.caseSensitiveAnalysis) {
-      throw new GlutenNotSupportException(
-        "'from_json' with 'spark.sql.caseSensitive = true' is not supported in 
Velox")
+      GlutenExceptionUtil.throwsNotFullySupported(
+        ExpressionNames.FROM_JSON,
+        FromJsonRestrictions.NOT_SUPPORT_CASE_SENSITIVE)
     }
 
     val hasDuplicateKey = expr.schema match {
@@ -778,8 +788,9 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
         false
     }
     if (hasDuplicateKey) {
-      throw new GlutenNotSupportException(
-        "'from_json' with duplicate keys is not supported in Velox")
+      GlutenExceptionUtil.throwsNotFullySupported(
+        ExpressionNames.FROM_JSON,
+        FromJsonRestrictions.NOT_SUPPORT_DUPLICATE_KEYS)
     }
     val hasCorruptRecord = expr.schema match {
       case s: StructType =>
@@ -788,8 +799,9 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
         false
     }
     if (hasCorruptRecord) {
-      throw new GlutenNotSupportException(
-        "'from_json' with column corrupt record is not supported in Velox")
+      GlutenExceptionUtil.throwsNotFullySupported(
+        ExpressionNames.FROM_JSON,
+        FromJsonRestrictions.NOT_SUPPORT_COLUMN_CORRUPT_RECORD)
     }
     GenericExpressionTransformer(substraitExprName, children, expr)
   }
@@ -800,11 +812,44 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
       child: ExpressionTransformer,
       expr: StructsToJson): ExpressionTransformer = {
     if (!expr.options.isEmpty) {
-      throw new GlutenNotSupportException("'to_json' with options is not 
supported in Velox")
+      GlutenExceptionUtil.throwsNotFullySupported(
+        ExpressionNames.TO_JSON,
+        ToJsonRestrictions.NOT_SUPPORT_WITH_OPTIONS)
     }
     ToJsonTransformer(substraitExprName, child, expr)
   }
 
+  override def genUnbase64Transformer(
+      substraitExprName: String,
+      child: ExpressionTransformer,
+      expr: UnBase64): ExpressionTransformer = {
+    if (SparkShimLoader.getSparkShims.unBase64FunctionFailsOnError(expr)) {
+      GlutenExceptionUtil
+        .throwsNotFullySupported(
+          ExpressionNames.UNBASE64,
+          Unbase64Restrictions.NOT_SUPPORT_FAIL_ON_ERROR
+        )
+    }
+    GenericExpressionTransformer(substraitExprName, child, expr)
+  }
+
+  override def genBase64StaticInvokeTransformer(
+      substraitExprName: String,
+      child: ExpressionTransformer,
+      expr: StaticInvoke): ExpressionTransformer = {
+    if (!SQLConf.get.getConfString("spark.sql.chunkBase64String.enabled", 
"true").toBoolean) {
+      GlutenExceptionUtil
+        .throwsNotFullySupported(
+          ExpressionNames.BASE64,
+          Base64Restrictions.NOT_SUPPORT_DISABLE_CHUNK_BASE64_STRING)
+    }
+    GenericExpressionTransformer(
+      ExpressionNames.BASE64,
+      child,
+      expr
+    )
+  }
+
   /** Generate an expression transformer to transform NamedStruct to 
Substrait. */
   override def genNamedStructTransformer(
       substraitExprName: String,
diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionRestrictions.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionRestrictions.scala
new file mode 100644
index 0000000000..569da7d2fc
--- /dev/null
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionRestrictions.scala
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.expression
+
+import org.apache.spark.sql.internal.SQLConf
+
+trait ExpressionRestrictions {
+  val functionName: String
+  val restrictionMessages: Array[String]
+}
+
+object StrToMapRestrictions extends ExpressionRestrictions {
+  val ONLY_SUPPORT_MAP_KEY_DEDUP_POLICY: String =
+    s"Only ${SQLConf.MAP_KEY_DEDUP_POLICY.key} = " +
+      s"${SQLConf.MapKeyDedupPolicy.EXCEPTION.toString} is supported for Velox 
backend"
+
+  override val functionName: String = ExpressionNames.STR_TO_MAP
+
+  override val restrictionMessages: Array[String] = Array(
+    ONLY_SUPPORT_MAP_KEY_DEDUP_POLICY
+  )
+}
+
+object FromJsonRestrictions extends ExpressionRestrictions {
+  val MUST_ENABLE_PARTIAL_RESULTS: String =
+    s"${ExpressionNames.FROM_JSON} with 'spark.sql.json.enablePartialResults = 
false' " +
+      s"is not supported in Velox"
+  val NOT_SUPPORT_WITH_OPTIONS: String =
+    s"${ExpressionNames.FROM_JSON} with options is not supported in Velox"
+  val NOT_SUPPORT_CASE_SENSITIVE: String =
+    s"${ExpressionNames.FROM_JSON} with " +
+      s"'${SQLConf.CASE_SENSITIVE.key} = true' is not supported in Velox"
+  val NOT_SUPPORT_DUPLICATE_KEYS: String =
+    s"${ExpressionNames.FROM_JSON} with duplicate keys is not supported in 
Velox"
+  val NOT_SUPPORT_COLUMN_CORRUPT_RECORD: String =
+    s"${ExpressionNames.FROM_JSON} with column corrupt record is not supported 
in Velox"
+
+  override val functionName: String = ExpressionNames.FROM_JSON
+
+  override val restrictionMessages: Array[String] = Array(
+    MUST_ENABLE_PARTIAL_RESULTS,
+    NOT_SUPPORT_WITH_OPTIONS,
+    NOT_SUPPORT_CASE_SENSITIVE,
+    NOT_SUPPORT_DUPLICATE_KEYS,
+    NOT_SUPPORT_COLUMN_CORRUPT_RECORD
+  )
+}
+
+object ToJsonRestrictions extends ExpressionRestrictions {
+  val NOT_SUPPORT_WITH_OPTIONS: String =
+    s"${ExpressionNames.TO_JSON} with options is not supported in Velox"
+
+  override val functionName: String = ExpressionNames.TO_JSON
+
+  override val restrictionMessages: Array[String] = 
Array(NOT_SUPPORT_WITH_OPTIONS)
+}
+
+object Unbase64Restrictions extends ExpressionRestrictions {
+  val NOT_SUPPORT_FAIL_ON_ERROR: String =
+    s"${ExpressionNames.UNBASE64} with failOnError is not supported"
+
+  override val functionName: String = ExpressionNames.UNBASE64
+
+  override val restrictionMessages: Array[String] = 
Array(NOT_SUPPORT_FAIL_ON_ERROR)
+}
+
+object Base64Restrictions extends ExpressionRestrictions {
+  val NOT_SUPPORT_DISABLE_CHUNK_BASE64_STRING: String =
+    s"${ExpressionNames.BASE64} with chunkBase64String disabled is not 
supported"
+
+  override val functionName: String = ExpressionNames.BASE64
+
+  override val restrictionMessages: Array[String] = 
Array(NOT_SUPPORT_DISABLE_CHUNK_BASE64_STRING)
+}
+
+object ExpressionRestrictions {
+  // Called by gen-function-support-docs.py to get all restrictions.
+  def listAllRestrictions(): Array[ExpressionRestrictions] = {
+    Array(
+      StrToMapRestrictions,
+      FromJsonRestrictions,
+      ToJsonRestrictions,
+      Unbase64Restrictions,
+      Base64Restrictions
+    )
+  }
+}
diff --git a/docs/velox-backend-scalar-function-support.md 
b/docs/velox-backend-scalar-function-support.md
index d0e68e7108..5325b97e9b 100644
--- a/docs/velox-backend-scalar-function-support.md
+++ b/docs/velox-backend-scalar-function-support.md
@@ -1,6 +1,6 @@
 # Scalar Functions Support Status
 
-**Out of 357 scalar functions in Spark 3.5, Gluten currently fully supports 
243 functions and partially supports 19 functions.**
+**Out of 357 scalar functions in Spark 3.5, Gluten currently fully supports 
239 functions and partially supports 24 functions.**
 
 ## Array Functions
 
@@ -175,15 +175,15 @@
 
 ## JSON Functions
 
-| Spark Functions   | Spark Expressions   | Status   | Restrictions   |
-|-------------------|---------------------|----------|----------------|
-| from_json         | JsonToStructs       | S        |                |
-| get_json_object   | GetJsonObject       | S        |                |
-| json_array_length | LengthOfJsonArray   | S        |                |
-| json_object_keys  | JsonObjectKeys      | S        |                |
-| json_tuple        | JsonTuple           | S        |                |
-| schema_of_json    | SchemaOfJson        |          |                |
-| to_json           | StructsToJson       |          |                |
+| Spark Functions   | Spark Expressions   | Status   | Restrictions            
                                                                                
                                                                                
                                                                                
                                                                             |
+|-------------------|---------------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| from_json         | JsonToStructs       | PS       | from_json with 
'spark.sql.caseSensitive = true' is not supported in Velox<br>from_json with 
'spark.sql.json.enablePartialResults = false' is not supported in 
Velox<br>from_json with column corrupt record is not supported in 
Velox<br>from_json with duplicate keys is not supported in Velox<br>from_json 
with options is not supported in Velox |
+| get_json_object   | GetJsonObject       | S        |                         
                                                                                
                                                                                
                                                                                
                                                                             |
+| json_array_length | LengthOfJsonArray   | S        |                         
                                                                                
                                                                                
                                                                                
                                                                             |
+| json_object_keys  | JsonObjectKeys      | S        |                         
                                                                                
                                                                                
                                                                                
                                                                             |
+| json_tuple        | JsonTuple           | S        |                         
                                                                                
                                                                                
                                                                                
                                                                             |
+| schema_of_json    | SchemaOfJson        |          |                         
                                                                                
                                                                                
                                                                                
                                                                             |
+| to_json           | StructsToJson       | PS       | to_json with options is 
not supported in Velox                                                          
                                                                                
                                                                                
                                                                             |
 
 ## Lambda Functions
 
@@ -204,19 +204,19 @@
 
 ## Map Functions
 
-| Spark Functions   | Spark Expressions   | Status   | Restrictions   |
-|-------------------|---------------------|----------|----------------|
-| element_at        | ElementAt           | S        |                |
-| map               | CreateMap           | PS       |                |
-| map_concat        | MapConcat           | PS       |                |
-| map_contains_key  | MapContainsKey      | S        |                |
-| map_entries       | MapEntries          | S        |                |
-| map_from_arrays   | MapFromArrays       |          |                |
-| map_from_entries  | MapFromEntries      |          |                |
-| map_keys          | MapKeys             | S        |                |
-| map_values        | MapValues           | S        |                |
-| str_to_map        | StringToMap         | S        |                |
-| try_element_at    | TryElementAt        |          |                |
+| Spark Functions   | Spark Expressions   | Status   | Restrictions            
                                                    |
+|-------------------|---------------------|----------|-----------------------------------------------------------------------------|
+| element_at        | ElementAt           | S        |                         
                                                    |
+| map               | CreateMap           | PS       |                         
                                                    |
+| map_concat        | MapConcat           | PS       |                         
                                                    |
+| map_contains_key  | MapContainsKey      | S        |                         
                                                    |
+| map_entries       | MapEntries          | S        |                         
                                                    |
+| map_from_arrays   | MapFromArrays       |          |                         
                                                    |
+| map_from_entries  | MapFromEntries      |          |                         
                                                    |
+| map_keys          | MapKeys             | S        |                         
                                                    |
+| map_values        | MapValues           | S        |                         
                                                    |
+| str_to_map        | StringToMap         | PS       | Only 
spark.sql.mapKeyDedupPolicy = EXCEPTION is supported for Velox backend |
+| try_element_at    | TryElementAt        |          |                         
                                                    |
 
 ## Mathematical Functions
 
@@ -352,73 +352,73 @@
 
 ## String Functions
 
-| Spark Functions    | Spark Expressions           | Status   | Restrictions   
        |
-|--------------------|-----------------------------|----------|------------------------|
-| ascii              | Ascii                       | S        |                
        |
-| base64             | Base64                      | S        |                
        |
-| bit_length         | BitLength                   | S        |                
        |
-| btrim              | StringTrimBoth              | S        |                
        |
-| char               | Chr                         | S        |                
        |
-| char_length        | Length                      | S        |                
        |
-| character_length   | Length                      | S        |                
        |
-| chr                | Chr                         | S        |                
        |
-| concat_ws          | ConcatWs                    | S        |                
        |
-| contains           | ContainsExpressionBuilder   | PS       | BinaryType 
unsupported |
-| decode             | Decode                      |          |                
        |
-| elt                | Elt                         |          |                
        |
-| encode             | Encode                      |          |                
        |
-| endswith           | EndsWithExpressionBuilder   | PS       | BinaryType 
unsupported |
-| find_in_set        | FindInSet                   | S        |                
        |
-| format_number      | FormatNumber                |          |                
        |
-| format_string      | FormatString                |          |                
        |
-| initcap            | InitCap                     | S        |                
        |
-| instr              | StringInstr                 | S        |                
        |
-| lcase              | Lower                       | S        |                
        |
-| left               | Left                        | S        |                
        |
-| len                | Length                      | S        |                
        |
-| length             | Length                      | S        |                
        |
-| levenshtein        | Levenshtein                 | S        |                
        |
-| locate             | StringLocate                | S        |                
        |
-| lower              | Lower                       | S        |                
        |
-| lpad               | LPadExpressionBuilder       | PS       | BinaryType 
unsupported |
-| ltrim              | StringTrimLeft              | S        |                
        |
-| luhn_check         | Luhncheck                   | S        |                
        |
-| mask               | MaskExpressionBuilder       | S        |                
        |
-| octet_length       | OctetLength                 |          |                
        |
-| overlay            | Overlay                     | S        |                
        |
-| position           | StringLocate                | S        |                
        |
-| printf             | FormatString                |          |                
        |
-| regexp_count       | RegExpCount                 |          |                
        |
-| regexp_extract     | RegExpExtract               | PS       | Lookaround 
unsupported |
-| regexp_extract_all | RegExpExtractAll            | PS       | Lookaround 
unsupported |
-| regexp_instr       | RegExpInStr                 |          |                
        |
-| regexp_replace     | RegExpReplace               | PS       | Lookaround 
unsupported |
-| regexp_substr      | RegExpSubStr                |          |                
        |
-| repeat             | StringRepeat                | S        |                
        |
-| replace            | StringReplace               | S        |                
        |
-| right              | Right                       | S        |                
        |
-| rpad               | RPadExpressionBuilder       | PS       | BinaryType 
unsupported |
-| rtrim              | StringTrimRight             | S        |                
        |
-| sentences          | Sentences                   |          |                
        |
-| soundex            | SoundEx                     | S        |                
        |
-| space              | StringSpace                 |          |                
        |
-| split              | StringSplit                 | S        |                
        |
-| split_part         | SplitPart                   | S        |                
        |
-| startswith         | StartsWithExpressionBuilder | PS       | BinaryType 
unsupported |
-| substr             | Substring                   | PS       |                
        |
-| substring          | Substring                   | PS       |                
        |
-| substring_index    | SubstringIndex              | S        |                
        |
-| to_binary          | ToBinary                    |          |                
        |
-| to_char            | ToCharacter                 |          |                
        |
-| to_number          | ToNumber                    |          |                
        |
-| to_varchar         | ToCharacter                 |          |                
        |
-| translate          | StringTranslate             | S        |                
        |
-| trim               | StringTrim                  | S        |                
        |
-| try_to_binary      | TryToBinary                 |          |                
        |
-| try_to_number      | TryToNumber                 |          |                
        |
-| ucase              | Upper                       | S        |                
        |
-| unbase64           | UnBase64                    | S        |                
        |
-| upper              | Upper                       | S        |                
        |
+| Spark Functions    | Spark Expressions           | Status   | Restrictions   
                                         |
+|--------------------|-----------------------------|----------|---------------------------------------------------------|
+| ascii              | Ascii                       | S        |                
                                         |
+| base64             | Base64                      | PS       | base64 with 
chunkBase64String disabled is not supported |
+| bit_length         | BitLength                   | S        |                
                                         |
+| btrim              | StringTrimBoth              | S        |                
                                         |
+| char               | Chr                         | S        |                
                                         |
+| char_length        | Length                      | S        |                
                                         |
+| character_length   | Length                      | S        |                
                                         |
+| chr                | Chr                         | S        |                
                                         |
+| concat_ws          | ConcatWs                    | S        |                
                                         |
+| contains           | ContainsExpressionBuilder   | PS       | BinaryType 
unsupported                                  |
+| decode             | Decode                      |          |                
                                         |
+| elt                | Elt                         |          |                
                                         |
+| encode             | Encode                      |          |                
                                         |
+| endswith           | EndsWithExpressionBuilder   | PS       | BinaryType 
unsupported                                  |
+| find_in_set        | FindInSet                   | S        |                
                                         |
+| format_number      | FormatNumber                |          |                
                                         |
+| format_string      | FormatString                |          |                
                                         |
+| initcap            | InitCap                     | S        |                
                                         |
+| instr              | StringInstr                 | S        |                
                                         |
+| lcase              | Lower                       | S        |                
                                         |
+| left               | Left                        | S        |                
                                         |
+| len                | Length                      | S        |                
                                         |
+| length             | Length                      | S        |                
                                         |
+| levenshtein        | Levenshtein                 | S        |                
                                         |
+| locate             | StringLocate                | S        |                
                                         |
+| lower              | Lower                       | S        |                
                                         |
+| lpad               | LPadExpressionBuilder       | PS       | BinaryType 
unsupported                                  |
+| ltrim              | StringTrimLeft              | S        |                
                                         |
+| luhn_check         | Luhncheck                   | S        |                
                                         |
+| mask               | MaskExpressionBuilder       | S        |                
                                         |
+| octet_length       | OctetLength                 |          |                
                                         |
+| overlay            | Overlay                     | S        |                
                                         |
+| position           | StringLocate                | S        |                
                                         |
+| printf             | FormatString                |          |                
                                         |
+| regexp_count       | RegExpCount                 |          |                
                                         |
+| regexp_extract     | RegExpExtract               | PS       | Lookaround 
unsupported                                  |
+| regexp_extract_all | RegExpExtractAll            | PS       | Lookaround 
unsupported                                  |
+| regexp_instr       | RegExpInStr                 |          |                
                                         |
+| regexp_replace     | RegExpReplace               | PS       | Lookaround 
unsupported                                  |
+| regexp_substr      | RegExpSubStr                |          |                
                                         |
+| repeat             | StringRepeat                | S        |                
                                         |
+| replace            | StringReplace               | S        |                
                                         |
+| right              | Right                       | S        |                
                                         |
+| rpad               | RPadExpressionBuilder       | PS       | BinaryType 
unsupported                                  |
+| rtrim              | StringTrimRight             | S        |                
                                         |
+| sentences          | Sentences                   |          |                
                                         |
+| soundex            | SoundEx                     | S        |                
                                         |
+| space              | StringSpace                 |          |                
                                         |
+| split              | StringSplit                 | S        |                
                                         |
+| split_part         | SplitPart                   | S        |                
                                         |
+| startswith         | StartsWithExpressionBuilder | PS       | BinaryType 
unsupported                                  |
+| substr             | Substring                   | PS       |                
                                         |
+| substring          | Substring                   | PS       |                
                                         |
+| substring_index    | SubstringIndex              | S        |                
                                         |
+| to_binary          | ToBinary                    |          |                
                                         |
+| to_char            | ToCharacter                 |          |                
                                         |
+| to_number          | ToNumber                    |          |                
                                         |
+| to_varchar         | ToCharacter                 |          |                
                                         |
+| translate          | StringTranslate             | S        |                
                                         |
+| trim               | StringTrim                  | S        |                
                                         |
+| try_to_binary      | TryToBinary                 |          |                
                                         |
+| try_to_number      | TryToNumber                 |          |                
                                         |
+| ucase              | Upper                       | S        |                
                                         |
+| unbase64           | UnBase64                    | PS       | unbase64 with 
failOnError is not supported              |
+| upper              | Upper                       | S        |                
                                         |
 
 ## Struct Functions
 
diff --git 
a/gluten-substrait/src/main/java/org/apache/gluten/exception/GlutenExceptionUtil.scala
 
b/gluten-substrait/src/main/java/org/apache/gluten/exception/GlutenExceptionUtil.scala
new file mode 100644
index 0000000000..2dd0ebdfe8
--- /dev/null
+++ 
b/gluten-substrait/src/main/java/org/apache/gluten/exception/GlutenExceptionUtil.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.exception
+
+object GlutenExceptionUtil {
+  // Utility methods for throwing exceptions for not fully supported functions.
+  def throwsNotFullySupported(function: String, cause: String) = {
+    throw new GlutenNotSupportException(
+      s"Function '$function' is not fully supported. Cause: $cause")
+  }
+}
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
index c6cdcc3f32..1625cce062 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
+import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
 import org.apache.spark.sql.catalyst.optimizer.BuildSide
 import org.apache.spark.sql.catalyst.plans.JoinType
 import org.apache.spark.sql.catalyst.plans.physical.{BroadcastMode, 
Partitioning}
@@ -175,6 +176,20 @@ trait SparkPlanExecApi {
     GenericExpressionTransformer(substraitExprName, child, expr)
   }
 
+  def genUnbase64Transformer(
+      substraitExprName: String,
+      child: ExpressionTransformer,
+      expr: UnBase64): ExpressionTransformer = {
+    GenericExpressionTransformer(substraitExprName, child, expr)
+  }
+
+  def genBase64StaticInvokeTransformer(
+      substraitExprName: String,
+      child: ExpressionTransformer,
+      expr: StaticInvoke): ExpressionTransformer = {
+    GenericExpressionTransformer(substraitExprName, child, expr)
+  }
+
   /** Transform GetArrayItem to Substrait. */
   def genGetArrayItemTransformer(
       substraitExprName: String,
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
index 7673b1a6f1..fc1de383d0 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
@@ -175,11 +175,7 @@ object ExpressionConverter extends SQLConfHelper with 
Logging {
           i)
       case i: StaticInvoke
           if Seq("encode", "decode").contains(i.functionName) && 
i.objectName.endsWith("Base64") =>
-        if (!SQLConf.get.getConfString("spark.sql.chunkBase64String.enabled", 
"true").toBoolean) {
-          throw new GlutenNotSupportException(
-            "Base64 with chunkBase64String disabled is not supported in 
gluten.")
-        }
-        return GenericExpressionTransformer(
+        return 
BackendsApiManager.getSparkPlanExecApiInstance.genBase64StaticInvokeTransformer(
           ExpressionNames.BASE64,
           replaceWithExpressionTransformer0(i.arguments.head, attributeSeq, 
expressionsMap),
           i
@@ -770,8 +766,12 @@ object ExpressionConverter extends SQLConfHelper with 
Logging {
           replaceWithExpressionTransformer0(s.child, attributeSeq, 
expressionsMap),
           s
         )
-      case u: UnBase64 if 
SparkShimLoader.getSparkShims.unBase64FunctionFailsOnError(u) =>
-        throw new GlutenNotSupportException("UnBase64 with failOnError is not 
supported in gluten.")
+      case u: UnBase64 =>
+        BackendsApiManager.getSparkPlanExecApiInstance.genUnbase64Transformer(
+          substraitExprName,
+          replaceWithExpressionTransformer0(u.child, attributeSeq, 
expressionsMap),
+          u
+        )
       case ce if 
BackendsApiManager.getSparkPlanExecApiInstance.expressionFlattenSupported(ce) =>
         replaceFlattenedExpressionWithExpressionTransformer(
           substraitExprName,
diff --git a/tools/scripts/gen-function-support-docs.py 
b/tools/scripts/gen-function-support-docs.py
index f6778403c9..4ba59b067e 100644
--- a/tools/scripts/gen-function-support-docs.py
+++ b/tools/scripts/gen-function-support-docs.py
@@ -520,19 +520,19 @@ STATIC_INVOKES = {
 # Known Restrictions in Gluten.
 LOOKAROUND_UNSUPPORTED = "Lookaround unsupported"
 BINARY_TYPE_UNSUPPORTED = "BinaryType unsupported"
-GLUTEN_RESTRICTIONS = {
+KNOWN_RESTRICTIONS = {
     "scalar": {
-        "regexp": LOOKAROUND_UNSUPPORTED,
-        "regexp_like": LOOKAROUND_UNSUPPORTED,
-        "rlike": LOOKAROUND_UNSUPPORTED,
-        "regexp_extract": LOOKAROUND_UNSUPPORTED,
-        "regexp_extract_all": LOOKAROUND_UNSUPPORTED,
-        "regexp_replace": LOOKAROUND_UNSUPPORTED,
-        "contains": BINARY_TYPE_UNSUPPORTED,
-        "startswith": BINARY_TYPE_UNSUPPORTED,
-        "endswith": BINARY_TYPE_UNSUPPORTED,
-        "lpad": BINARY_TYPE_UNSUPPORTED,
-        "rpad": BINARY_TYPE_UNSUPPORTED,
+        "regexp": {LOOKAROUND_UNSUPPORTED},
+        "regexp_like": {LOOKAROUND_UNSUPPORTED},
+        "rlike": {LOOKAROUND_UNSUPPORTED},
+        "regexp_extract": {LOOKAROUND_UNSUPPORTED},
+        "regexp_extract_all": {LOOKAROUND_UNSUPPORTED},
+        "regexp_replace": {LOOKAROUND_UNSUPPORTED},
+        "contains": {BINARY_TYPE_UNSUPPORTED},
+        "startswith": {BINARY_TYPE_UNSUPPORTED},
+        "endswith": {BINARY_TYPE_UNSUPPORTED},
+        "lpad": {BINARY_TYPE_UNSUPPORTED},
+        "rpad": {BINARY_TYPE_UNSUPPORTED},
     },
     "aggregate": {},
     "window": {},
@@ -760,6 +760,29 @@ def parse_logs(log_file):
 
     unresolved = []
 
+    pkg = jvm.org.apache.gluten.expression
+    cls = getattr(pkg, "ExpressionRestrictions$")
+    obj = getattr(cls, "MODULE$")
+
+    jrestrictions = {
+        r.functionName(): set(m for m in r.restrictionMessages())
+        for r in obj.listAllRestrictions()
+    }
+
+    restrictions = KNOWN_RESTRICTIONS.copy()
+    print(restrictions)
+    for f, v in jrestrictions.items():
+        print(v)
+        for c in FUNCTION_CATEGORIES:
+            if f in functions[c]:
+                if f in KNOWN_RESTRICTIONS[c]:
+                    restrictions[c][f].union(v)
+                else:
+                    restrictions[c][f] = v
+                break
+
+    print(restrictions)
+
     def filter_fallback_reasons():
         with open(log_file, "r") as f:
             lines = f.readlines()
@@ -832,7 +855,7 @@ def parse_logs(log_file):
                     )
                     
support_list[category]["unsupported"].add(function_name_tuple(f))
 
-        for f in GLUTEN_RESTRICTIONS[category].keys():
+        for f in restrictions[category].keys():
             support_list[category]["partial"].add(function_name_tuple(f))
 
     for r in filter_fallback_reasons():
@@ -905,6 +928,39 @@ def parse_logs(log_file):
             else:
                 function_not_found(r)
 
+        # Partially supported: throws not fully supported exception for 
certain conditions.
+        elif "is not fully supported" in r:
+            pattern = r"Function '([\w0-9]+)' is not fully supported. Cause: 
(.*)"
+
+            # Extract the function name and reason
+            match = re.search(pattern, r)
+
+            if match:
+                function_name = match.group(1)
+                if function_name in function_names:
+                    support_list["scalar"]["partial"].add(
+                        function_name_tuple(function_name)
+                    )
+                else:
+                    support_list["scalar"]["unknown"].add(
+                        function_name_tuple(function_name)
+                    )
+                cause = match.group(2)
+                not_listed = False
+                if function_name not in restrictions["scalar"]:
+                    restrictions["scalar"][function_name] = set()
+                    not_listed = True
+                elif cause not in restrictions["scalar"][function_name]:
+                    not_listed = True
+                if not_listed:
+                    restrictions["scalar"][function_name].add(cause)
+                    logging.log(
+                        logging.WARNING,
+                        f"Restriction for function {function_name} found in 
logs but not listed in the ExpressionRestrictions: {cause}",
+                    )
+            else:
+                function_not_found(r)
+
         # Not supported: Special case for unsupported expressions.
         elif "Not support expression" in r:
             pattern = r"Not support expression ([\w0-9]+)"
@@ -933,7 +989,7 @@ def parse_logs(log_file):
             else:
                 function_not_found(r)
 
-        # Not supported: Special case for unsupported functions.
+        # Not supported: Function is in the native blacklist.
         elif "Function is not supported:" in r:
             pattern = r"Function is not supported:\s+([\w0-9]+)"
 
@@ -1051,7 +1107,7 @@ def parse_logs(log_file):
         else:
             unresolved.append(r)
 
-    return support_list, unresolved
+    return support_list, unresolved, restrictions
 
 
 def generate_function_doc(category, output):
@@ -1131,16 +1187,16 @@ def generate_function_doc(category, output):
                     f = "&#124;"
                 elif f == "||":
                     f = "&#124;&#124;"
+
+                r = ""
+                if f in restrictions[category]:
+                    r = "<br>".join(sorted(restrictions[category][f]))
                 data.append(
                     [
                         f,
                         classname,
                         support,
-                        (
-                            ""
-                            if f not in GLUTEN_RESTRICTIONS[category]
-                            else GLUTEN_RESTRICTIONS[category][f]
-                        ),
+                        r,
                     ]
                 )
             table = tabulate.tabulate(data, headers, tablefmt="github")
@@ -1259,7 +1315,7 @@ if __name__ == "__main__":
 
     spark_function_map = create_spark_function_map()
 
-    support_list, unresolved = parse_logs(
+    support_list, unresolved, restrictions = parse_logs(
         os.path.join(
             gluten_home,
             "gluten-ut",


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(incubator-gluten) branch main updated: [GLUTEN-10275] Refine exception for not fully supported functions and update generate doc script (#10391)

Reply via email to