[GitHub] [spark] allisonwang-db commented on a diff in pull request #41864: [SPARK-44059] Add analyzer support of named arguments for built-in functions

via GitHub Wed, 12 Jul 2023 14:52:18 -0700


allisonwang-db commented on code in PR #41864:
URL: https://github.com/apache/spark/pull/41864#discussion_r1261749089



##########
sql/core/src/test/resources/sql-tests/results/named-function-arguments.sql.out:
##########
@@ -2,121 +2,258 @@
 -- !query
 SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar => 
'o', digitChar => 'd')
 -- !query schema
+struct<mask(AbCD123-@$#, Q, q, d, o):string>
+-- !query output
+QqQQdddoooo
+
+
+-- !query
+SELECT mask(lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 
'd', str => 'AbCD123-@$#')
+-- !query schema
+struct<mask(AbCD123-@$#, Q, q, d, o):string>
+-- !query output
+QqQQdddoooo
+
+
+-- !query
+SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', digitChar => 
'd')
+-- !query schema
+struct<mask(AbCD123-@$#, Q, q, d, NULL):string>
+-- !query output
+QqQQddd-@$#
+
+
+-- !query
+SELECT mask(lowerChar => 'q', upperChar => 'Q', digitChar => 'd', str => 
'AbCD123-@$#')
+-- !query schema
+struct<mask(AbCD123-@$#, Q, q, d, NULL):string>
+-- !query output
+QqQQddd-@$#
+
+
+-- !query
+create temporary view t2 as select * from values
+  ('val2a', 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 
01:01:00.000', date '2014-04-04'),
+  ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04'),
+  ('val1b', 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 
01:01:00.000', date '2015-05-04'),
+  ('val1c', 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 
01:01:00.000', date '2016-05-04'),
+  ('val1b', null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 
01:01:00.000', null),
+  ('val2e', 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 
01:01:00.000', date '2014-06-04'),
+  ('val1f', 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04'),
+  ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 
01:01:00.000', date '2014-06-04'),
+  ('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 
01:01:00.000', date '2014-07-04'),
+  ('val1c', 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 
01:01:00.000', date '2014-08-05'),
+  ('val1e', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 
01:01:00.000', date '2014-09-04'),
+  ('val1f', 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 
01:01:00.000', date '2014-10-04'),
+  ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', null)
+  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT hex(count_min_sketch(t2d, seed => 1, epsilon => 0.5d, confidence => 
0.5d)) FROM t2
+-- !query schema
+struct<hex(count_min_sketch(t2d, 0.5, 0.5, 1)):string>
+-- !query output
+00000001000000000000000D0000000100000004000000005D8D6AB90000000000000002000000000000000700000000000000010000000000000003
+
+
+-- !query
+SELECT hex(count_min_sketch(seed => 1, epsilon => 0.5d, confidence => 0.5d, 
column => t2d)) FROM t2
+-- !query schema
+struct<hex(count_min_sketch(t2d, 0.5, 0.5, 1)):string>
+-- !query output
+00000001000000000000000D0000000100000004000000005D8D6AB90000000000000002000000000000000700000000000000010000000000000003
+
+
+-- !query
+SELECT hex(count_min_sketch(t2d, 0.5d, seed => 1, confidence => 0.5d)) FROM t2
+-- !query schema
+struct<hex(count_min_sketch(t2d, 0.5, 0.5, 1)):string>
+-- !query output
+00000001000000000000000D0000000100000004000000005D8D6AB90000000000000002000000000000000700000000000000010000000000000003
+
+
+-- !query
+SELECT * FROM explode(collection => array(1, 2))
+-- !query schema
+struct<col:int>
+-- !query output
+1
+2
+
+
+-- !query
+SELECT * FROM explode_outer(collection => map('a', 1, 'b', 2))
+-- !query schema
+struct<key:string,value:int>
+-- !query output
+a      1
+b      2
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW v AS SELECT id FROM range(0, 8)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM explode(collection => TABLE v)
+-- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "inputExpr" : "\"namedargumentexpression(q)\"",
-    "inputName" : "upperChar",
-    "inputType" : "\"STRING\"",
-    "sqlExpr" : "\"mask(AbCD123-@$#, namedargumentexpression(q), 
namedargumentexpression(Q), namedargumentexpression(o), 
namedargumentexpression(d))\""
+    "inputSql" : "\"outer(__auto_generated_subquery_name_0.c)\"",
+    "inputType" : "\"STRUCT<id: BIGINT>\"",
+    "paramIndex" : "1",
+    "requiredType" : "(\"ARRAY\" or \"MAP\")",
+    "sqlExpr" : "\"explode(outer(__auto_generated_subquery_name_0.c))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 44,
+    "fragment" : "explode(collection => TABLE v)"
+  } ]
+}
+
+
+-- !query
+SELECT mask(lowerChar => 'q', 'AbCD123-@$#', upperChar => 'Q', otherChar => 
'o', digitChar => 'd')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNEXPECTED_POSITIONAL_ARGUMENT",
+  "sqlState" : "4274K",
+  "messageParameters" : {
+    "functionName" : "`mask`"
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 8,
     "stopIndex" : 98,
-    "fragment" : "mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', 
otherChar => 'o', digitChar => 'd')"
+    "fragment" : "mask(lowerChar => 'q', 'AbCD123-@$#', upperChar => 'Q', 
otherChar => 'o', digitChar => 'd')"
   } ]
 }
 
 
 -- !query
-SELECT mask(lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 
'd', str => 'AbCD123-@$#')
+SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar => 
'o', digitChar => 'd', digitChar => 'e')
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
-  "sqlState" : "42K09",
+  "errorClass" : 
"DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT.DOUBLE_NAMED_ARGUMENT_REFERENCE",
+  "sqlState" : "4274K",
   "messageParameters" : {
-    "inputExpr" : "\"namedargumentexpression(Q)\"",
-    "inputName" : "upperChar",
-    "inputType" : "\"STRING\"",
-    "sqlExpr" : "\"mask(namedargumentexpression(q), 
namedargumentexpression(Q), namedargumentexpression(o), 
namedargumentexpression(d), namedargumentexpression(AbCD123-@$#))\""
+    "functionName" : "`mask`",
+    "parameterName" : "`digitChar`"
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 8,
-    "stopIndex" : 105,
-    "fragment" : "mask(lowerChar => 'q', upperChar => 'Q', otherChar => 'o', 
digitChar => 'd', str => 'AbCD123-@$#')"
+    "stopIndex" : 116,
+    "fragment" : "mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', 
otherChar => 'o', digitChar => 'd', digitChar => 'e')"
   } ]
 }
 
 
 -- !query
-SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', digitChar => 
'd')
+SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar => 
'o', digitChar => 'd', str => 'AbC')
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
-  "sqlState" : "42K09",
+  "errorClass" : 
"DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT.POSITIONAL_AND_NAMED_ARGUMENT_DOUBLE_REFERENCE",
+  "sqlState" : "4274K",
   "messageParameters" : {
-    "inputExpr" : "\"namedargumentexpression(q)\"",
-    "inputName" : "upperChar",
-    "inputType" : "\"STRING\"",
-    "sqlExpr" : "\"mask(AbCD123-@$#, namedargumentexpression(q), 
namedargumentexpression(Q), namedargumentexpression(d), NULL)\""
+    "functionName" : "`mask`",
+    "parameterName" : "`str`"
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 8,
-    "stopIndex" : 80,
-    "fragment" : "mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', 
digitChar => 'd')"
+    "stopIndex" : 112,
+    "fragment" : "mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', 
otherChar => 'o', digitChar => 'd', str => 'AbC')"
   } ]
 }
 
 
 -- !query
-SELECT mask(lowerChar => 'q', upperChar => 'Q', digitChar => 'd', str => 
'AbCD123-@$#')
+SELECT mask(lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 
'd')
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
-  "sqlState" : "42K09",
+  "errorClass" : "REQUIRED_PARAMETER_NOT_FOUND",
+  "sqlState" : "4274K",
   "messageParameters" : {
-    "inputExpr" : "\"namedargumentexpression(Q)\"",
-    "inputName" : "upperChar",
-    "inputType" : "\"STRING\"",
-    "sqlExpr" : "\"mask(namedargumentexpression(q), 
namedargumentexpression(Q), namedargumentexpression(d), 
namedargumentexpression(AbCD123-@$#), NULL)\""
+    "functionName" : "`mask`",
+    "parameterName" : "`str`"
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 8,
-    "stopIndex" : 87,
-    "fragment" : "mask(lowerChar => 'q', upperChar => 'Q', digitChar => 'd', 
str => 'AbCD123-@$#')"
+    "stopIndex" : 83,
+    "fragment" : "mask(lowerChar => 'q', upperChar => 'Q', otherChar => 'o', 
digitChar => 'd')"
   } ]
 }
 
 
 -- !query
-SELECT mask(lowerChar => 'q', 'AbCD123-@$#', upperChar => 'Q', otherChar => 
'o', digitChar => 'd')
+SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar => 
'o', digitChar => 'd', cellular => 'automata')
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.INPUT_SIZE_NOT_ONE",
-  "sqlState" : "42K09",
+  "errorClass" : "UNRECOGNIZED_PARAMETER_NAME",
+  "sqlState" : "4274K",
   "messageParameters" : {
-    "exprName" : "upperChar",
-    "sqlExpr" : "\"mask(namedargumentexpression(q), AbCD123-@$#, 
namedargumentexpression(Q), namedargumentexpression(o), 
namedargumentexpression(d))\""
+    "argumentName" : "`cellular`",
+    "functionName" : "`mask`",
+    "proposal" : "`str` `upperChar` `otherChar` "
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 8,
-    "stopIndex" : 98,
-    "fragment" : "mask(lowerChar => 'q', 'AbCD123-@$#', upperChar => 'Q', 
otherChar => 'o', digitChar => 'd')"
+    "stopIndex" : 122,
+    "fragment" : "mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', 
otherChar => 'o', digitChar => 'd', cellular => 'automata')"
   } ]
 }
+
+
+-- !query
+SELECT encode(str => 'a', charset => 'utf-8')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkException
+{
+  "errorClass" : "INTERNAL_ERROR",
+  "sqlState" : "XX000",
+  "messageParameters" : {
+    "message" : "Cannot generate code for expression: str => a"

Review Comment:
   Is this internal error "cannot generate code for expression" expected?



##########
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/NamedArgumentFunctionSuite.scala:
##########
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.analysis
+
+// import org.apache.spark.SparkThrowable
+// import org.apache.spark.sql.catalyst.InternalRow
+// import org.apache.spark.sql.catalyst.expressions.{Expression, Literal, 
NamedArgumentExpression}
+// import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, 
ExprCode}
+// import org.apache.spark.sql.catalyst.plans.logical.{FixedArgumentType, 
FunctionSignature,
+// NamedArgument, SupportsNamedArguments}
+// import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLId
+// import org.apache.spark.sql.types.{DataType, StringType}
+//
+// class NamedArgumentFunctionSuite extends AnalysisTest {

Review Comment:
   Do we need this file?



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SupportsNamedArguments.scala:
##########
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.expressions.{Expression, 
NamedArgumentExpression}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.AbstractDataType
+
+object SupportsNamedArguments {
+  final def defaultRearrange(functionSignature: FunctionSignature,
+      args: Seq[Expression],
+      functionName: String): Seq[Expression] = {
+    val parameters: Seq[NamedArgument] = functionSignature.parameters
+    val firstNamedArgIdx: Int = 
args.indexWhere(_.isInstanceOf[NamedArgumentExpression])
+    val (positionalArgs, namedArgs) =
+      if (firstNamedArgIdx == -1) {
+        (args, Nil)
+      } else {
+        args.splitAt(firstNamedArgIdx)
+      }
+    val namedParameters: Seq[NamedArgument] = 
parameters.drop(positionalArgs.size)
+
+    // Performing some checking to ensure valid argument list
+    val allParameterNames: Seq[String] = parameters.map(_.name)
+    val parameterNamesSet: Set[String] = allParameterNames.toSet
+    val positionalParametersSet = 
allParameterNames.take(positionalArgs.size).toSet
+    val namedParametersSet = collection.mutable.Set[String]()
+
+    for (arg <- namedArgs) {
+      arg match {
+        case namedArg: NamedArgumentExpression =>
+          val parameterName = namedArg.key
+          if (!parameterNamesSet.contains(parameterName)) {
+            throw 
QueryCompilationErrors.unrecognizedParameterName(functionName, namedArg.key,
+              parameterNamesSet.toSeq)
+          }
+          if (positionalParametersSet.contains(parameterName)) {
+            throw 
QueryCompilationErrors.positionalAndNamedArgumentDoubleReference(
+              functionName, namedArg.key)
+          }
+          if (namedParametersSet.contains(parameterName)) {
+            throw QueryCompilationErrors.doubleNamedArgumentReference(
+              functionName, namedArg.key)
+          }
+          namedParametersSet.add(namedArg.key)
+        case _ =>
+          throw 
QueryCompilationErrors.unexpectedPositionalArgument(functionName)
+      }
+    }
+
+    // Construct a map from argument name to value for argument rearrangement
+    val namedArgMap = namedArgs.map { arg =>
+      val namedArg = arg.asInstanceOf[NamedArgumentExpression]
+      namedArg.key -> namedArg.value
+    }.toMap
+
+    // Rearrange named arguments to match their positional order
+    val rearrangedNamedArgs: Seq[Expression] = namedParameters.map { param =>
+      namedArgMap.getOrElse(
+        param.name,
+        if (param.default.isEmpty) {
+          throw QueryCompilationErrors.requiredParameterNotFound(functionName, 
param.name)
+        } else {
+          param.default.get
+        }
+      )
+    }
+    positionalArgs ++ rearrangedNamedArgs
+  }
+}
+
+/**
+ * Identifies which forms of provided argument values are expected for each 
call
+ * to the associated SQL function
+ */
+trait NamedArgumentType
+
+/**
+ * Represents a named argument that expects a scalar value of one specific 
DataType
+ *
+ * @param dataType The data type of some argument
+ */
+case class FixedArgumentType(dataType: AbstractDataType) extends 
NamedArgumentType

Review Comment:
   Do we still need this?



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala:
##########
@@ -1015,6 +1050,62 @@ object TableFunctionRegistry {
   val functionSet: Set[FunctionIdentifier] = builtin.listFunction().toSet
 }
 
-trait ExpressionBuilder {
-  def build(funcName: String, expressions: Seq[Expression]): Expression
+trait Builder[T] {
+  /**
+   * A method that returns the signatures of overloads that is associated with 
this function
+   *
+   * @return a list of function signatures
+   */
+  def functionSignatures: Option[Seq[FunctionSignature]] = None
+
+  /**
+   * This function rearranges the arguments provided during function 
invocation in positional order
+   * according to the function signature. This method will fill in the default 
values if optional
+   * parmaeters do not have their values specified. Any function which 
supports named arguments
+   * will have this routine invoked, even if no named arguments are present in 
the argument list.
+   * This is done to eliminate constructor overloads in some methods which use 
them for default
+   * values prior to the implementation of the named argument framework. This 
function will also
+   * check if the number of arguments are correct. If that is not the case, 
then an error will be thrown.
+   *
+   * IMPORTANT: This method will be called before the [[Builder.build]] method 
is invoked. It is
+   * guaranteed that the expressions provided to the [[Builder.build]] 
functions forms a valid set
+   * of argument expressions that can be used in the construction of the 
function expression.
+   *
+   * @param expectedSignature The method signature which we rearrange our 
arguments according to
+   * @param providedArguments The list of arguments passed from function 
invocation
+   * @param functionName The name of the function
+   * @return The rearranged arugument list with arguments in positional order
+   */
+  def rearrange(
+      expectedSignature: FunctionSignature,
+      providedArguments: Seq[Expression],
+      functionName: String) : Seq[Expression] = {
+    SupportsNamedArguments.defaultRearrange(expectedSignature, 
providedArguments, functionName)
+  }
+
+  def build(funcName: String, expressions: Seq[Expression]): T
+}
+
+/**
+ * A trait used for scalar valued functions that defines how their expression 
representations
+ * are constructed in [[FunctionRegistry]]
+ */
+trait ExpressionBuilder extends Builder[Expression]
+
+/**
+ * A trait used for table valued functions that defines how their expression 
representations
+ * are constructed in [[FunctionRegistry]]

Review Comment:
   TableFunctionRegistry



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SupportsNamedArguments.scala:
##########
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.expressions.{Expression, 
NamedArgumentExpression}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.AbstractDataType
+
+object SupportsNamedArguments {
+  final def defaultRearrange(functionSignature: FunctionSignature,

Review Comment:
   Can we add some tests for this function?



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala:
##########
@@ -431,6 +432,22 @@ case class Explode(child: Expression) extends ExplodeBase {
     copy(child = newChild)
 }
 
+trait ExplodeGeneratorBuilderBase extends GeneratorBuilder {

Review Comment:
   If we support the named argument for Explode, we should also update its 
ExpressionDescription and make sure `DESCRIBE FUNCTION` shows the correct 
information.



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala:
##########
@@ -431,6 +432,22 @@ case class Explode(child: Expression) extends ExplodeBase {
     copy(child = newChild)
 }
 
+trait ExplodeGeneratorBuilderBase extends GeneratorBuilder {
+  override def functionSignatures: Option[Seq[FunctionSignature]] =
+    Some(Seq(FunctionSignature(Seq(NamedArgument("collection")))))

Review Comment:
   Does it have to be defined like this? Can this be simplified? 



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala:
##########
@@ -1015,6 +1050,62 @@ object TableFunctionRegistry {
   val functionSet: Set[FunctionIdentifier] = builtin.listFunction().toSet
 }
 
-trait ExpressionBuilder {
-  def build(funcName: String, expressions: Seq[Expression]): Expression
+trait Builder[T] {
+  /**
+   * A method that returns the signatures of overloads that is associated with 
this function
+   *
+   * @return a list of function signatures
+   */
+  def functionSignatures: Option[Seq[FunctionSignature]] = None
+
+  /**
+   * This function rearranges the arguments provided during function 
invocation in positional order
+   * according to the function signature. This method will fill in the default 
values if optional
+   * parmaeters do not have their values specified. Any function which 
supports named arguments
+   * will have this routine invoked, even if no named arguments are present in 
the argument list.
+   * This is done to eliminate constructor overloads in some methods which use 
them for default
+   * values prior to the implementation of the named argument framework. This 
function will also
+   * check if the number of arguments are correct. If that is not the case, 
then an error will be thrown.
+   *
+   * IMPORTANT: This method will be called before the [[Builder.build]] method 
is invoked. It is
+   * guaranteed that the expressions provided to the [[Builder.build]] 
functions forms a valid set
+   * of argument expressions that can be used in the construction of the 
function expression.
+   *
+   * @param expectedSignature The method signature which we rearrange our 
arguments according to
+   * @param providedArguments The list of arguments passed from function 
invocation
+   * @param functionName The name of the function
+   * @return The rearranged arugument list with arguments in positional order
+   */
+  def rearrange(
+      expectedSignature: FunctionSignature,
+      providedArguments: Seq[Expression],
+      functionName: String) : Seq[Expression] = {
+    SupportsNamedArguments.defaultRearrange(expectedSignature, 
providedArguments, functionName)
+  }
+
+  def build(funcName: String, expressions: Seq[Expression]): T
+}
+
+/**
+ * A trait used for scalar valued functions that defines how their expression 
representations
+ * are constructed in [[FunctionRegistry]]
+ */
+trait ExpressionBuilder extends Builder[Expression]
+
+/**
+ * A trait used for table valued functions that defines how their expression 
representations
+ * are constructed in [[FunctionRegistry]]
+ */
+trait GeneratorBuilder extends Builder[LogicalPlan] {
+  override final def build(funcName: String, expressions: Seq[Expression]) : 
LogicalPlan = {
+    Generate(
+      buildGenerator(funcName, expressions),
+      unrequiredChildIndex = Nil,
+      outer = isOuter,
+      qualifier = None,
+      generatorOutput = Nil,
+      child = OneRowRelation())
+  }
+  def isOuter: Boolean
+  def buildGenerator(funcName: String, expressions: Seq[Expression]) : 
Generator

Review Comment:
   If each table-valued generator function needs to extend this 
GeneratorBuilder, why do we need `funcName` here? 



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala:
##########
@@ -973,6 +994,20 @@ object TableFunctionRegistry {
     (name, (info, (expressions: Seq[Expression]) => builder(expressions)))
   }
 
+  def generatorBuilder[T <: GeneratorBuilder : ClassTag](
+      name: String,
+      builder: T,
+      since: Option[String] = None): (String, (ExpressionInfo, 
TableFunctionBuilder)) = {
+    val info = FunctionRegistryBase.expressionInfo[T](name, since)
+    val funcBuilder = (expressions: Seq[Expression]) => {
+      assert(expressions.forall(_.resolved), "function arguments must be 
resolved.")
+      val rearrangedExpressions = FunctionRegistry.rearrangeExpressions(name, 
builder, expressions)
+      val expr = builder.build(name, rearrangedExpressions)
+      expr

Review Comment:
   `expr` -> `plan` this should be a logical plan



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [spark] allisonwang-db commented on a diff in pull request #41864: [SPARK-44059] Add analyzer support of named arguments for built-in functions

Reply via email to