This is an automated email from the ASF dual-hosted git repository.
philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new e7c7f74846 [GLUTEN-10275] Refine exception for not fully supported
functions and update generate doc script (#10391)
e7c7f74846 is described below
commit e7c7f7484660ccaa384af17c2570f23d35fbbbde
Author: Rong Ma <[email protected]>
AuthorDate: Thu Aug 14 01:22:21 2025 +0100
[GLUTEN-10275] Refine exception for not fully supported functions and
update generate doc script (#10391)
---
.../backendsapi/velox/VeloxSparkPlanExecApi.scala | 71 ++++++--
.../gluten/expression/ExpressionRestrictions.scala | 101 ++++++++++++
docs/velox-backend-scalar-function-support.md | 180 ++++++++++-----------
.../gluten/exception/GlutenExceptionUtil.scala | 25 +++
.../gluten/backendsapi/SparkPlanExecApi.scala | 15 ++
.../gluten/expression/ExpressionConverter.scala | 14 +-
tools/scripts/gen-function-support-docs.py | 98 ++++++++---
7 files changed, 373 insertions(+), 131 deletions(-)
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
index cafea465fd..2bad45ee2e 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
@@ -18,7 +18,7 @@ package org.apache.gluten.backendsapi.velox
import org.apache.gluten.backendsapi.SparkPlanExecApi
import org.apache.gluten.config.{GlutenConfig, HashShuffleWriterType,
ReservedKeys, RssSortShuffleWriterType, ShuffleWriterType,
SortShuffleWriterType, VeloxConfig}
-import org.apache.gluten.exception.GlutenNotSupportException
+import org.apache.gluten.exception.{GlutenExceptionUtil,
GlutenNotSupportException}
import org.apache.gluten.execution._
import org.apache.gluten.expression._
import org.apache.gluten.expression.aggregate.{HLLAdapter,
VeloxBloomFilterAggregate, VeloxCollectList, VeloxCollectSet}
@@ -37,6 +37,7 @@ import org.apache.spark.sql.catalyst.catalog.BucketSpec
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
import org.apache.spark.sql.catalyst.expressions._
import
org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression,
CollectList, CollectSet}
+import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
import org.apache.spark.sql.catalyst.optimizer.BuildSide
import org.apache.spark.sql.catalyst.plans.JoinType
import org.apache.spark.sql.catalyst.plans.physical._
@@ -495,6 +496,7 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
isSkewJoin,
projectList)
}
+
override def genCartesianProductExecTransformer(
left: SparkPlan,
right: SparkPlan,
@@ -731,7 +733,10 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
SQLConf.get.getConf(SQLConf.MAP_KEY_DEDUP_POLICY)
!= SQLConf.MapKeyDedupPolicy.EXCEPTION.toString
) {
- throw new GlutenNotSupportException("Only EXCEPTION policy is
supported!")
+ GlutenExceptionUtil.throwsNotFullySupported(
+ ExpressionNames.STR_TO_MAP,
+ StrToMapRestrictions.ONLY_SUPPORT_MAP_KEY_DEDUP_POLICY
+ )
}
GenericExpressionTransformer(substraitExprName, children, expr)
}
@@ -753,15 +758,20 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
if (!enablePartialResults) {
// Velox only supports partial results mode. We need to fall back this
when
// 'spark.sql.json.enablePartialResults' is set to false or not defined.
- throw new GlutenNotSupportException(
- s"'from_json' with 'spark.sql.json.enablePartialResults = false' is
not supported in Velox")
+ GlutenExceptionUtil.throwsNotFullySupported(
+ ExpressionNames.FROM_JSON,
+ FromJsonRestrictions.MUST_ENABLE_PARTIAL_RESULTS
+ )
}
- if (!expr.options.isEmpty) {
- throw new GlutenNotSupportException("'from_json' with options is not
supported in Velox")
+ if (expr.options.nonEmpty) {
+ GlutenExceptionUtil.throwsNotFullySupported(
+ ExpressionNames.FROM_JSON,
+ FromJsonRestrictions.NOT_SUPPORT_WITH_OPTIONS)
}
if (SQLConf.get.caseSensitiveAnalysis) {
- throw new GlutenNotSupportException(
- "'from_json' with 'spark.sql.caseSensitive = true' is not supported in
Velox")
+ GlutenExceptionUtil.throwsNotFullySupported(
+ ExpressionNames.FROM_JSON,
+ FromJsonRestrictions.NOT_SUPPORT_CASE_SENSITIVE)
}
val hasDuplicateKey = expr.schema match {
@@ -778,8 +788,9 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
false
}
if (hasDuplicateKey) {
- throw new GlutenNotSupportException(
- "'from_json' with duplicate keys is not supported in Velox")
+ GlutenExceptionUtil.throwsNotFullySupported(
+ ExpressionNames.FROM_JSON,
+ FromJsonRestrictions.NOT_SUPPORT_DUPLICATE_KEYS)
}
val hasCorruptRecord = expr.schema match {
case s: StructType =>
@@ -788,8 +799,9 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
false
}
if (hasCorruptRecord) {
- throw new GlutenNotSupportException(
- "'from_json' with column corrupt record is not supported in Velox")
+ GlutenExceptionUtil.throwsNotFullySupported(
+ ExpressionNames.FROM_JSON,
+ FromJsonRestrictions.NOT_SUPPORT_COLUMN_CORRUPT_RECORD)
}
GenericExpressionTransformer(substraitExprName, children, expr)
}
@@ -800,11 +812,44 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
child: ExpressionTransformer,
expr: StructsToJson): ExpressionTransformer = {
if (!expr.options.isEmpty) {
- throw new GlutenNotSupportException("'to_json' with options is not
supported in Velox")
+ GlutenExceptionUtil.throwsNotFullySupported(
+ ExpressionNames.TO_JSON,
+ ToJsonRestrictions.NOT_SUPPORT_WITH_OPTIONS)
}
ToJsonTransformer(substraitExprName, child, expr)
}
+ override def genUnbase64Transformer(
+ substraitExprName: String,
+ child: ExpressionTransformer,
+ expr: UnBase64): ExpressionTransformer = {
+ if (SparkShimLoader.getSparkShims.unBase64FunctionFailsOnError(expr)) {
+ GlutenExceptionUtil
+ .throwsNotFullySupported(
+ ExpressionNames.UNBASE64,
+ Unbase64Restrictions.NOT_SUPPORT_FAIL_ON_ERROR
+ )
+ }
+ GenericExpressionTransformer(substraitExprName, child, expr)
+ }
+
+ override def genBase64StaticInvokeTransformer(
+ substraitExprName: String,
+ child: ExpressionTransformer,
+ expr: StaticInvoke): ExpressionTransformer = {
+ if (!SQLConf.get.getConfString("spark.sql.chunkBase64String.enabled",
"true").toBoolean) {
+ GlutenExceptionUtil
+ .throwsNotFullySupported(
+ ExpressionNames.BASE64,
+ Base64Restrictions.NOT_SUPPORT_DISABLE_CHUNK_BASE64_STRING)
+ }
+ GenericExpressionTransformer(
+ ExpressionNames.BASE64,
+ child,
+ expr
+ )
+ }
+
/** Generate an expression transformer to transform NamedStruct to
Substrait. */
override def genNamedStructTransformer(
substraitExprName: String,
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionRestrictions.scala
b/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionRestrictions.scala
new file mode 100644
index 0000000000..569da7d2fc
--- /dev/null
+++
b/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionRestrictions.scala
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.expression
+
+import org.apache.spark.sql.internal.SQLConf
+
+trait ExpressionRestrictions {
+ val functionName: String
+ val restrictionMessages: Array[String]
+}
+
+object StrToMapRestrictions extends ExpressionRestrictions {
+ val ONLY_SUPPORT_MAP_KEY_DEDUP_POLICY: String =
+ s"Only ${SQLConf.MAP_KEY_DEDUP_POLICY.key} = " +
+ s"${SQLConf.MapKeyDedupPolicy.EXCEPTION.toString} is supported for Velox
backend"
+
+ override val functionName: String = ExpressionNames.STR_TO_MAP
+
+ override val restrictionMessages: Array[String] = Array(
+ ONLY_SUPPORT_MAP_KEY_DEDUP_POLICY
+ )
+}
+
+object FromJsonRestrictions extends ExpressionRestrictions {
+ val MUST_ENABLE_PARTIAL_RESULTS: String =
+ s"${ExpressionNames.FROM_JSON} with 'spark.sql.json.enablePartialResults =
false' " +
+ s"is not supported in Velox"
+ val NOT_SUPPORT_WITH_OPTIONS: String =
+ s"${ExpressionNames.FROM_JSON} with options is not supported in Velox"
+ val NOT_SUPPORT_CASE_SENSITIVE: String =
+ s"${ExpressionNames.FROM_JSON} with " +
+ s"'${SQLConf.CASE_SENSITIVE.key} = true' is not supported in Velox"
+ val NOT_SUPPORT_DUPLICATE_KEYS: String =
+ s"${ExpressionNames.FROM_JSON} with duplicate keys is not supported in
Velox"
+ val NOT_SUPPORT_COLUMN_CORRUPT_RECORD: String =
+ s"${ExpressionNames.FROM_JSON} with column corrupt record is not supported
in Velox"
+
+ override val functionName: String = ExpressionNames.FROM_JSON
+
+ override val restrictionMessages: Array[String] = Array(
+ MUST_ENABLE_PARTIAL_RESULTS,
+ NOT_SUPPORT_WITH_OPTIONS,
+ NOT_SUPPORT_CASE_SENSITIVE,
+ NOT_SUPPORT_DUPLICATE_KEYS,
+ NOT_SUPPORT_COLUMN_CORRUPT_RECORD
+ )
+}
+
+object ToJsonRestrictions extends ExpressionRestrictions {
+ val NOT_SUPPORT_WITH_OPTIONS: String =
+ s"${ExpressionNames.TO_JSON} with options is not supported in Velox"
+
+ override val functionName: String = ExpressionNames.TO_JSON
+
+ override val restrictionMessages: Array[String] =
Array(NOT_SUPPORT_WITH_OPTIONS)
+}
+
+object Unbase64Restrictions extends ExpressionRestrictions {
+ val NOT_SUPPORT_FAIL_ON_ERROR: String =
+ s"${ExpressionNames.UNBASE64} with failOnError is not supported"
+
+ override val functionName: String = ExpressionNames.UNBASE64
+
+ override val restrictionMessages: Array[String] =
Array(NOT_SUPPORT_FAIL_ON_ERROR)
+}
+
+object Base64Restrictions extends ExpressionRestrictions {
+ val NOT_SUPPORT_DISABLE_CHUNK_BASE64_STRING: String =
+ s"${ExpressionNames.BASE64} with chunkBase64String disabled is not
supported"
+
+ override val functionName: String = ExpressionNames.BASE64
+
+ override val restrictionMessages: Array[String] =
Array(NOT_SUPPORT_DISABLE_CHUNK_BASE64_STRING)
+}
+
+object ExpressionRestrictions {
+ // Called by gen-function-support-docs.py to get all restrictions.
+ def listAllRestrictions(): Array[ExpressionRestrictions] = {
+ Array(
+ StrToMapRestrictions,
+ FromJsonRestrictions,
+ ToJsonRestrictions,
+ Unbase64Restrictions,
+ Base64Restrictions
+ )
+ }
+}
diff --git a/docs/velox-backend-scalar-function-support.md
b/docs/velox-backend-scalar-function-support.md
index d0e68e7108..5325b97e9b 100644
--- a/docs/velox-backend-scalar-function-support.md
+++ b/docs/velox-backend-scalar-function-support.md
@@ -1,6 +1,6 @@
# Scalar Functions Support Status
-**Out of 357 scalar functions in Spark 3.5, Gluten currently fully supports
243 functions and partially supports 19 functions.**
+**Out of 357 scalar functions in Spark 3.5, Gluten currently fully supports
239 functions and partially supports 24 functions.**
## Array Functions
@@ -175,15 +175,15 @@
## JSON Functions
-| Spark Functions | Spark Expressions | Status | Restrictions |
-|-------------------|---------------------|----------|----------------|
-| from_json | JsonToStructs | S | |
-| get_json_object | GetJsonObject | S | |
-| json_array_length | LengthOfJsonArray | S | |
-| json_object_keys | JsonObjectKeys | S | |
-| json_tuple | JsonTuple | S | |
-| schema_of_json | SchemaOfJson | | |
-| to_json | StructsToJson | | |
+| Spark Functions | Spark Expressions | Status | Restrictions
|
+|-------------------|---------------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| from_json | JsonToStructs | PS | from_json with
'spark.sql.caseSensitive = true' is not supported in Velox<br>from_json with
'spark.sql.json.enablePartialResults = false' is not supported in
Velox<br>from_json with column corrupt record is not supported in
Velox<br>from_json with duplicate keys is not supported in Velox<br>from_json
with options is not supported in Velox |
+| get_json_object | GetJsonObject | S |
|
+| json_array_length | LengthOfJsonArray | S |
|
+| json_object_keys | JsonObjectKeys | S |
|
+| json_tuple | JsonTuple | S |
|
+| schema_of_json | SchemaOfJson | |
|
+| to_json | StructsToJson | PS | to_json with options is
not supported in Velox
|
## Lambda Functions
@@ -204,19 +204,19 @@
## Map Functions
-| Spark Functions | Spark Expressions | Status | Restrictions |
-|-------------------|---------------------|----------|----------------|
-| element_at | ElementAt | S | |
-| map | CreateMap | PS | |
-| map_concat | MapConcat | PS | |
-| map_contains_key | MapContainsKey | S | |
-| map_entries | MapEntries | S | |
-| map_from_arrays | MapFromArrays | | |
-| map_from_entries | MapFromEntries | | |
-| map_keys | MapKeys | S | |
-| map_values | MapValues | S | |
-| str_to_map | StringToMap | S | |
-| try_element_at | TryElementAt | | |
+| Spark Functions | Spark Expressions | Status | Restrictions
|
+|-------------------|---------------------|----------|-----------------------------------------------------------------------------|
+| element_at | ElementAt | S |
|
+| map | CreateMap | PS |
|
+| map_concat | MapConcat | PS |
|
+| map_contains_key | MapContainsKey | S |
|
+| map_entries | MapEntries | S |
|
+| map_from_arrays | MapFromArrays | |
|
+| map_from_entries | MapFromEntries | |
|
+| map_keys | MapKeys | S |
|
+| map_values | MapValues | S |
|
+| str_to_map | StringToMap | PS | Only
spark.sql.mapKeyDedupPolicy = EXCEPTION is supported for Velox backend |
+| try_element_at | TryElementAt | |
|
## Mathematical Functions
@@ -352,73 +352,73 @@
## String Functions
-| Spark Functions | Spark Expressions | Status | Restrictions
|
-|--------------------|-----------------------------|----------|------------------------|
-| ascii | Ascii | S |
|
-| base64 | Base64 | S |
|
-| bit_length | BitLength | S |
|
-| btrim | StringTrimBoth | S |
|
-| char | Chr | S |
|
-| char_length | Length | S |
|
-| character_length | Length | S |
|
-| chr | Chr | S |
|
-| concat_ws | ConcatWs | S |
|
-| contains | ContainsExpressionBuilder | PS | BinaryType
unsupported |
-| decode | Decode | |
|
-| elt | Elt | |
|
-| encode | Encode | |
|
-| endswith | EndsWithExpressionBuilder | PS | BinaryType
unsupported |
-| find_in_set | FindInSet | S |
|
-| format_number | FormatNumber | |
|
-| format_string | FormatString | |
|
-| initcap | InitCap | S |
|
-| instr | StringInstr | S |
|
-| lcase | Lower | S |
|
-| left | Left | S |
|
-| len | Length | S |
|
-| length | Length | S |
|
-| levenshtein | Levenshtein | S |
|
-| locate | StringLocate | S |
|
-| lower | Lower | S |
|
-| lpad | LPadExpressionBuilder | PS | BinaryType
unsupported |
-| ltrim | StringTrimLeft | S |
|
-| luhn_check | Luhncheck | S |
|
-| mask | MaskExpressionBuilder | S |
|
-| octet_length | OctetLength | |
|
-| overlay | Overlay | S |
|
-| position | StringLocate | S |
|
-| printf | FormatString | |
|
-| regexp_count | RegExpCount | |
|
-| regexp_extract | RegExpExtract | PS | Lookaround
unsupported |
-| regexp_extract_all | RegExpExtractAll | PS | Lookaround
unsupported |
-| regexp_instr | RegExpInStr | |
|
-| regexp_replace | RegExpReplace | PS | Lookaround
unsupported |
-| regexp_substr | RegExpSubStr | |
|
-| repeat | StringRepeat | S |
|
-| replace | StringReplace | S |
|
-| right | Right | S |
|
-| rpad | RPadExpressionBuilder | PS | BinaryType
unsupported |
-| rtrim | StringTrimRight | S |
|
-| sentences | Sentences | |
|
-| soundex | SoundEx | S |
|
-| space | StringSpace | |
|
-| split | StringSplit | S |
|
-| split_part | SplitPart | S |
|
-| startswith | StartsWithExpressionBuilder | PS | BinaryType
unsupported |
-| substr | Substring | PS |
|
-| substring | Substring | PS |
|
-| substring_index | SubstringIndex | S |
|
-| to_binary | ToBinary | |
|
-| to_char | ToCharacter | |
|
-| to_number | ToNumber | |
|
-| to_varchar | ToCharacter | |
|
-| translate | StringTranslate | S |
|
-| trim | StringTrim | S |
|
-| try_to_binary | TryToBinary | |
|
-| try_to_number | TryToNumber | |
|
-| ucase | Upper | S |
|
-| unbase64 | UnBase64 | S |
|
-| upper | Upper | S |
|
+| Spark Functions | Spark Expressions | Status | Restrictions
|
+|--------------------|-----------------------------|----------|---------------------------------------------------------|
+| ascii | Ascii | S |
|
+| base64 | Base64 | PS | base64 with
chunkBase64String disabled is not supported |
+| bit_length | BitLength | S |
|
+| btrim | StringTrimBoth | S |
|
+| char | Chr | S |
|
+| char_length | Length | S |
|
+| character_length | Length | S |
|
+| chr | Chr | S |
|
+| concat_ws | ConcatWs | S |
|
+| contains | ContainsExpressionBuilder | PS | BinaryType
unsupported |
+| decode | Decode | |
|
+| elt | Elt | |
|
+| encode | Encode | |
|
+| endswith | EndsWithExpressionBuilder | PS | BinaryType
unsupported |
+| find_in_set | FindInSet | S |
|
+| format_number | FormatNumber | |
|
+| format_string | FormatString | |
|
+| initcap | InitCap | S |
|
+| instr | StringInstr | S |
|
+| lcase | Lower | S |
|
+| left | Left | S |
|
+| len | Length | S |
|
+| length | Length | S |
|
+| levenshtein | Levenshtein | S |
|
+| locate | StringLocate | S |
|
+| lower | Lower | S |
|
+| lpad | LPadExpressionBuilder | PS | BinaryType
unsupported |
+| ltrim | StringTrimLeft | S |
|
+| luhn_check | Luhncheck | S |
|
+| mask | MaskExpressionBuilder | S |
|
+| octet_length | OctetLength | |
|
+| overlay | Overlay | S |
|
+| position | StringLocate | S |
|
+| printf | FormatString | |
|
+| regexp_count | RegExpCount | |
|
+| regexp_extract | RegExpExtract | PS | Lookaround
unsupported |
+| regexp_extract_all | RegExpExtractAll | PS | Lookaround
unsupported |
+| regexp_instr | RegExpInStr | |
|
+| regexp_replace | RegExpReplace | PS | Lookaround
unsupported |
+| regexp_substr | RegExpSubStr | |
|
+| repeat | StringRepeat | S |
|
+| replace | StringReplace | S |
|
+| right | Right | S |
|
+| rpad | RPadExpressionBuilder | PS | BinaryType
unsupported |
+| rtrim | StringTrimRight | S |
|
+| sentences | Sentences | |
|
+| soundex | SoundEx | S |
|
+| space | StringSpace | |
|
+| split | StringSplit | S |
|
+| split_part | SplitPart | S |
|
+| startswith | StartsWithExpressionBuilder | PS | BinaryType
unsupported |
+| substr | Substring | PS |
|
+| substring | Substring | PS |
|
+| substring_index | SubstringIndex | S |
|
+| to_binary | ToBinary | |
|
+| to_char | ToCharacter | |
|
+| to_number | ToNumber | |
|
+| to_varchar | ToCharacter | |
|
+| translate | StringTranslate | S |
|
+| trim | StringTrim | S |
|
+| try_to_binary | TryToBinary | |
|
+| try_to_number | TryToNumber | |
|
+| ucase | Upper | S |
|
+| unbase64 | UnBase64 | PS | unbase64 with
failOnError is not supported |
+| upper | Upper | S |
|
## Struct Functions
diff --git
a/gluten-substrait/src/main/java/org/apache/gluten/exception/GlutenExceptionUtil.scala
b/gluten-substrait/src/main/java/org/apache/gluten/exception/GlutenExceptionUtil.scala
new file mode 100644
index 0000000000..2dd0ebdfe8
--- /dev/null
+++
b/gluten-substrait/src/main/java/org/apache/gluten/exception/GlutenExceptionUtil.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.exception
+
+object GlutenExceptionUtil {
+ // Utility methods for throwing exceptions for not fully supported functions.
+ def throwsNotFullySupported(function: String, cause: String) = {
+ throw new GlutenNotSupportException(
+ s"Function '$function' is not fully supported. Cause: $cause")
+ }
+}
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
index c6cdcc3f32..1625cce062 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.catalyst.catalog.BucketSpec
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
+import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
import org.apache.spark.sql.catalyst.optimizer.BuildSide
import org.apache.spark.sql.catalyst.plans.JoinType
import org.apache.spark.sql.catalyst.plans.physical.{BroadcastMode,
Partitioning}
@@ -175,6 +176,20 @@ trait SparkPlanExecApi {
GenericExpressionTransformer(substraitExprName, child, expr)
}
+ def genUnbase64Transformer(
+ substraitExprName: String,
+ child: ExpressionTransformer,
+ expr: UnBase64): ExpressionTransformer = {
+ GenericExpressionTransformer(substraitExprName, child, expr)
+ }
+
+ def genBase64StaticInvokeTransformer(
+ substraitExprName: String,
+ child: ExpressionTransformer,
+ expr: StaticInvoke): ExpressionTransformer = {
+ GenericExpressionTransformer(substraitExprName, child, expr)
+ }
+
/** Transform GetArrayItem to Substrait. */
def genGetArrayItemTransformer(
substraitExprName: String,
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
index 7673b1a6f1..fc1de383d0 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
@@ -175,11 +175,7 @@ object ExpressionConverter extends SQLConfHelper with
Logging {
i)
case i: StaticInvoke
if Seq("encode", "decode").contains(i.functionName) &&
i.objectName.endsWith("Base64") =>
- if (!SQLConf.get.getConfString("spark.sql.chunkBase64String.enabled",
"true").toBoolean) {
- throw new GlutenNotSupportException(
- "Base64 with chunkBase64String disabled is not supported in
gluten.")
- }
- return GenericExpressionTransformer(
+ return
BackendsApiManager.getSparkPlanExecApiInstance.genBase64StaticInvokeTransformer(
ExpressionNames.BASE64,
replaceWithExpressionTransformer0(i.arguments.head, attributeSeq,
expressionsMap),
i
@@ -770,8 +766,12 @@ object ExpressionConverter extends SQLConfHelper with
Logging {
replaceWithExpressionTransformer0(s.child, attributeSeq,
expressionsMap),
s
)
- case u: UnBase64 if
SparkShimLoader.getSparkShims.unBase64FunctionFailsOnError(u) =>
- throw new GlutenNotSupportException("UnBase64 with failOnError is not
supported in gluten.")
+ case u: UnBase64 =>
+ BackendsApiManager.getSparkPlanExecApiInstance.genUnbase64Transformer(
+ substraitExprName,
+ replaceWithExpressionTransformer0(u.child, attributeSeq,
expressionsMap),
+ u
+ )
case ce if
BackendsApiManager.getSparkPlanExecApiInstance.expressionFlattenSupported(ce) =>
replaceFlattenedExpressionWithExpressionTransformer(
substraitExprName,
diff --git a/tools/scripts/gen-function-support-docs.py
b/tools/scripts/gen-function-support-docs.py
index f6778403c9..4ba59b067e 100644
--- a/tools/scripts/gen-function-support-docs.py
+++ b/tools/scripts/gen-function-support-docs.py
@@ -520,19 +520,19 @@ STATIC_INVOKES = {
# Known Restrictions in Gluten.
LOOKAROUND_UNSUPPORTED = "Lookaround unsupported"
BINARY_TYPE_UNSUPPORTED = "BinaryType unsupported"
-GLUTEN_RESTRICTIONS = {
+KNOWN_RESTRICTIONS = {
"scalar": {
- "regexp": LOOKAROUND_UNSUPPORTED,
- "regexp_like": LOOKAROUND_UNSUPPORTED,
- "rlike": LOOKAROUND_UNSUPPORTED,
- "regexp_extract": LOOKAROUND_UNSUPPORTED,
- "regexp_extract_all": LOOKAROUND_UNSUPPORTED,
- "regexp_replace": LOOKAROUND_UNSUPPORTED,
- "contains": BINARY_TYPE_UNSUPPORTED,
- "startswith": BINARY_TYPE_UNSUPPORTED,
- "endswith": BINARY_TYPE_UNSUPPORTED,
- "lpad": BINARY_TYPE_UNSUPPORTED,
- "rpad": BINARY_TYPE_UNSUPPORTED,
+ "regexp": {LOOKAROUND_UNSUPPORTED},
+ "regexp_like": {LOOKAROUND_UNSUPPORTED},
+ "rlike": {LOOKAROUND_UNSUPPORTED},
+ "regexp_extract": {LOOKAROUND_UNSUPPORTED},
+ "regexp_extract_all": {LOOKAROUND_UNSUPPORTED},
+ "regexp_replace": {LOOKAROUND_UNSUPPORTED},
+ "contains": {BINARY_TYPE_UNSUPPORTED},
+ "startswith": {BINARY_TYPE_UNSUPPORTED},
+ "endswith": {BINARY_TYPE_UNSUPPORTED},
+ "lpad": {BINARY_TYPE_UNSUPPORTED},
+ "rpad": {BINARY_TYPE_UNSUPPORTED},
},
"aggregate": {},
"window": {},
@@ -760,6 +760,29 @@ def parse_logs(log_file):
unresolved = []
+ pkg = jvm.org.apache.gluten.expression
+ cls = getattr(pkg, "ExpressionRestrictions$")
+ obj = getattr(cls, "MODULE$")
+
+ jrestrictions = {
+ r.functionName(): set(m for m in r.restrictionMessages())
+ for r in obj.listAllRestrictions()
+ }
+
+ restrictions = KNOWN_RESTRICTIONS.copy()
+ print(restrictions)
+ for f, v in jrestrictions.items():
+ print(v)
+ for c in FUNCTION_CATEGORIES:
+ if f in functions[c]:
+ if f in KNOWN_RESTRICTIONS[c]:
+ restrictions[c][f].union(v)
+ else:
+ restrictions[c][f] = v
+ break
+
+ print(restrictions)
+
def filter_fallback_reasons():
with open(log_file, "r") as f:
lines = f.readlines()
@@ -832,7 +855,7 @@ def parse_logs(log_file):
)
support_list[category]["unsupported"].add(function_name_tuple(f))
- for f in GLUTEN_RESTRICTIONS[category].keys():
+ for f in restrictions[category].keys():
support_list[category]["partial"].add(function_name_tuple(f))
for r in filter_fallback_reasons():
@@ -905,6 +928,39 @@ def parse_logs(log_file):
else:
function_not_found(r)
+ # Partially supported: throws not fully supported exception for
certain conditions.
+ elif "is not fully supported" in r:
+ pattern = r"Function '([\w0-9]+)' is not fully supported. Cause:
(.*)"
+
+ # Extract the function name and reason
+ match = re.search(pattern, r)
+
+ if match:
+ function_name = match.group(1)
+ if function_name in function_names:
+ support_list["scalar"]["partial"].add(
+ function_name_tuple(function_name)
+ )
+ else:
+ support_list["scalar"]["unknown"].add(
+ function_name_tuple(function_name)
+ )
+ cause = match.group(2)
+ not_listed = False
+ if function_name not in restrictions["scalar"]:
+ restrictions["scalar"][function_name] = set()
+ not_listed = True
+ elif cause not in restrictions["scalar"][function_name]:
+ not_listed = True
+ if not_listed:
+ restrictions["scalar"][function_name].add(cause)
+ logging.log(
+ logging.WARNING,
+ f"Restriction for function {function_name} found in
logs but not listed in the ExpressionRestrictions: {cause}",
+ )
+ else:
+ function_not_found(r)
+
# Not supported: Special case for unsupported expressions.
elif "Not support expression" in r:
pattern = r"Not support expression ([\w0-9]+)"
@@ -933,7 +989,7 @@ def parse_logs(log_file):
else:
function_not_found(r)
- # Not supported: Special case for unsupported functions.
+ # Not supported: Function is in the native blacklist.
elif "Function is not supported:" in r:
pattern = r"Function is not supported:\s+([\w0-9]+)"
@@ -1051,7 +1107,7 @@ def parse_logs(log_file):
else:
unresolved.append(r)
- return support_list, unresolved
+ return support_list, unresolved, restrictions
def generate_function_doc(category, output):
@@ -1131,16 +1187,16 @@ def generate_function_doc(category, output):
f = "|"
elif f == "||":
f = "||"
+
+ r = ""
+ if f in restrictions[category]:
+ r = "<br>".join(sorted(restrictions[category][f]))
data.append(
[
f,
classname,
support,
- (
- ""
- if f not in GLUTEN_RESTRICTIONS[category]
- else GLUTEN_RESTRICTIONS[category][f]
- ),
+ r,
]
)
table = tabulate.tabulate(data, headers, tablefmt="github")
@@ -1259,7 +1315,7 @@ if __name__ == "__main__":
spark_function_map = create_spark_function_map()
- support_list, unresolved = parse_logs(
+ support_list, unresolved, restrictions = parse_logs(
os.path.join(
gluten_home,
"gluten-ut",
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]