dtenedor commented on code in PR #41864:
URL: https://github.com/apache/spark/pull/41864#discussion_r1262838682


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala:
##########
@@ -980,6 +1014,20 @@ object TableFunctionRegistry {
     (name, (info, (expressions: Seq[Expression]) => builder(expressions)))
   }
 
+  def generatorBuilder[T <: GeneratorBuilder : ClassTag](
+      name: String,
+      builder: T,
+      since: Option[String] = None): (String, (ExpressionInfo, 
TableFunctionBuilder)) = {
+    val info = FunctionRegistryBase.expressionInfo[T](name, since)
+    val funcBuilder = (expressions: Seq[Expression]) => {
+      assert(expressions.forall(_.resolved), "function arguments must be 
resolved.")
+      val rearrangedExpressions = FunctionRegistry.rearrangeExpressions(name, 
builder, expressions)
+      val plan = builder.build(name, rearrangedExpressions)

Review Comment:
   you don't need the `val plan` here since you can just return 
`builder.build(...)`



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala:
##########
@@ -1022,6 +1070,68 @@ object TableFunctionRegistry {
   val functionSet: Set[FunctionIdentifier] = builtin.listFunction().toSet
 }
 
-trait ExpressionBuilder {
-  def build(funcName: String, expressions: Seq[Expression]): Expression
+trait Builder[T] {

Review Comment:
   please put some kind of short general comment for this `Builder` class 
mentioning that it is an interface for describing the expected parameters for 
SQL functions and takes responsibility for validating provided arguments for 
tham.



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala:
##########
@@ -1022,6 +1070,68 @@ object TableFunctionRegistry {
   val functionSet: Set[FunctionIdentifier] = builtin.listFunction().toSet
 }
 
-trait ExpressionBuilder {
-  def build(funcName: String, expressions: Seq[Expression]): Expression
+trait Builder[T] {
+  /**
+   * A method that returns the signatures of overloads that are associated 
with this function.
+   * Each function signature includes a list of parameters to which the 
analyzer can
+   * compare a function call with provided arguments to determine if that 
function
+   * call is a match for the function signature.
+   *
+   * @return a list of function signatures
+   */
+  def functionSignatures: Option[Seq[FunctionSignature]] = None
+
+  /**
+   * This function rearranges the arguments provided during function 
invocation in positional order
+   * according to the function signature. This method will fill in the default 
values if optional
+   * parameters do not have their values specified. Any function which 
supports named arguments
+   * will have this routine invoked, even if no named arguments are present in 
the argument list.
+   * This is done to eliminate constructor overloads in some methods which use 
them for default
+   * values prior to the implementation of the named argument framework. This 
function will also
+   * check if the number of arguments are correct. If that is not the case, 
then an error will be
+   * thrown.
+   *
+   * IMPORTANT: This method will be called before the [[Builder.build]] method 
is invoked. It is
+   * guaranteed that the expressions provided to the [[Builder.build]] 
functions forms a valid set
+   * of argument expressions that can be used in the construction of the 
function expression.
+   *
+   * @param expectedSignature The method signature which we rearrange our 
arguments according to
+   * @param providedArguments The list of arguments passed from function 
invocation
+   * @param functionName The name of the function
+   * @return The rearranged arugument list with arguments in positional order
+   */
+  def rearrange(
+      expectedSignature: FunctionSignature,
+      providedArguments: Seq[Expression],
+      functionName: String) : Seq[Expression] = {
+    SupportsNamedArguments.defaultRearrange(expectedSignature, 
providedArguments, functionName)
+  }
+
+  def build(funcName: String, expressions: Seq[Expression]): T
+}
+
+/**
+ * A trait used for scalar valued functions that defines how their expression 
representations

Review Comment:
   please change this to a complete sentence ending with a period (same for 
L1120 below)



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala:
##########
@@ -50,6 +50,90 @@ import org.apache.spark.sql.types._
  */
 private[sql] object QueryCompilationErrors extends QueryErrorsBase {
 
+  def unexpectedRequiredParameterInFunctionSignature(
+      functionName: String, functionSignature: FunctionSignature) : Throwable 
= {
+    val errorMessage = s"Function $functionName has an unexpected required 
argument for" +
+      s" the provided function signature $functionSignature. All required 
arguments should" +
+      s" come before optional arguments."
+    SparkException.internalError(errorMessage)
+  }
+
+  def multipleFunctionSignatures(functionName: String,
+      functionSignatures: Seq[FunctionSignature]): Throwable = {
+    var errorMessage = s"Function $functionName cannot have multiple method 
signatures." +
+      s" The function signatures found were: \n"
+    for (functionSignature <- functionSignatures) {
+      errorMessage = errorMessage + s"${functionSignature}\n"
+    }
+    SparkException.internalError(errorMessage)
+  }
+
+  def namedArgumentsNotSupported(functionName: String) : Throwable = {
+    new AnalysisException(
+      errorClass = "NAMED_ARGUMENTS_NOT_SUPPORTED",
+      messageParameters = Map("functionName" -> toSQLId(functionName))
+    )
+  }
+
+  def positionalAndNamedArgumentDoubleReference(
+      functionName: String, parameterName: String) : Throwable = {
+    val errorClass =
+      
"DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT.POSITIONAL_AND_NAMED_ARGUMENT_DOUBLE_REFERENCE"

Review Comment:
   maybe `DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT.BOTH_POSITIONAL_AND_NAMED`?



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala:
##########
@@ -887,13 +887,46 @@ object FunctionRegistry {
       since: Option[String] = None): (String, (ExpressionInfo, 
FunctionBuilder)) = {
     val (expressionInfo, builder) = FunctionRegistryBase.build[T](name, since)
     val newBuilder = (expressions: Seq[Expression]) => {
+      if (expressions.exists(_.isInstanceOf[NamedArgumentExpression])) {
+        throw QueryCompilationErrors.namedArgumentsNotSupported(name)
+      }
       val expr = builder(expressions)
       if (setAlias) expr.setTagValue(FUNC_ALIAS, name)
       expr
     }
     (name, (expressionInfo, newBuilder))
   }
 
+  /**
+   * This method will be used to rearrange the arguments provided in function 
invocation
+   * in the order defined by the function signature given in the builder 
instance.
+   *
+   * @param name The name of the function
+   * @param builder The builder of the function expression
+   * @param expressions The argument list passed in function invocation
+   * @tparam T The class of the builder
+   * @return An argument list in positional order defined by the builder
+   */
+  def rearrangeExpressions[T <: Builder[_]](
+      name: String,
+      builder: T,
+      expressions: Seq[Expression]) : Seq[Expression] = {
+    val rearrangedExpressions = if (!builder.functionSignatures.isEmpty) {
+      val functionSignatures = builder.functionSignatures.get
+      if (functionSignatures.length != 1) {

Review Comment:
   Good question, I imagine we probably would not get around to prioritizing 
overloads soon with this framework. But if we did, would it become necessary to 
update all the places where these function signatures are defined, from an 
`Option[FunctionSignature]` to an `Option[Seq[FunctionSignature]]`? In 
contrast, if we go with the latter syntax of a `Seq` of just one function 
signature, we wouldn't have to update all those sites later.
   
   However, I imagine such an update would be a fairly mechanical change, and 
in the interim, we get better compiler enforcement. It seems like this 
suggestion is better for now to switch the API to return just an 
`Option[FunctionSignature]` for now.



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala:
##########
@@ -980,6 +1014,20 @@ object TableFunctionRegistry {
     (name, (info, (expressions: Seq[Expression]) => builder(expressions)))
   }
 
+  def generatorBuilder[T <: GeneratorBuilder : ClassTag](

Review Comment:
   please add a comment for this method



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/FunctionBuilderBase.scala:
##########
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.expressions.{Expression, 
NamedArgumentExpression}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+
+trait FunctionBuilderBase[T] {
+  /**
+   * A method that returns the signatures of overloads that are associated 
with this function.
+   * Each function signature includes a list of parameters to which the 
analyzer can
+   * compare a function call with provided arguments to determine if that 
function
+   * call is a match for the function signature.
+   *
+   * @return a list of function signatures
+   */
+  def functionSignatures: Option[Seq[FunctionSignature]] = None
+
+  /**
+   * This function rearranges the arguments provided during function 
invocation in positional order
+   * according to the function signature. This method will fill in the default 
values if optional
+   * parameters do not have their values specified. Any function which 
supports named arguments
+   * will have this routine invoked, even if no named arguments are present in 
the argument list.
+   * This is done to eliminate constructor overloads in some methods which use 
them for default
+   * values prior to the implementation of the named argument framework. This 
function will also
+   * check if the number of arguments are correct. If that is not the case, 
then an error will be
+   * thrown.
+   *
+   * IMPORTANT: This method will be called before the 
[[FunctionBuilderBase.build]] method is
+   * invoked. It is guaranteed that the expressions provided to the 
[[FunctionBuilderBase.build]]
+   * functions forms a valid set of argument expressions that can be used in 
the construction of
+   * the function expression.
+   *
+   * @param expectedSignature The method signature which we rearrange our 
arguments according to
+   * @param providedArguments The list of arguments passed from function 
invocation
+   * @param functionName The name of the function
+   * @return The rearranged argument list with arguments in positional order
+   */
+  def rearrange(
+                 expectedSignature: FunctionSignature,
+                 providedArguments: Seq[Expression],
+                 functionName: String) : Seq[Expression] = {
+    NamedArgumentsSupport.defaultRearrange(expectedSignature, 
providedArguments, functionName)
+  }
+
+  def build(funcName: String, expressions: Seq[Expression]): T
+}
+
+object NamedArgumentsSupport {
+  /**
+   * This method is the default routine which rearranges the arguments in 
positional order according
+   * to the function signature provided. This will also fill in any default 
values that exists for
+   * optional arguments. This method will also be invoked even if there are no 
named arguments in
+   * the argument list.
+   *
+   * @param functionSignature The function signature that defines the 
positional ordering
+   * @param args The argument list provided in function invocation
+   * @param functionName The name of the function
+   * @return A list of arguments rearranged in positional order defined by the 
provided signature
+   */
+  final def defaultRearrange(
+      functionSignature: FunctionSignature,
+      args: Seq[Expression],
+      functionName: String): Seq[Expression] = {
+    val parameters: Seq[NamedArgument] = functionSignature.parameters
+    if (parameters.dropWhile(_.default.isEmpty).exists(_.default.isEmpty)) {
+      throw 
QueryCompilationErrors.unexpectedRequiredParameterInFunctionSignature(
+        functionName, functionSignature)
+    }
+
+    val (positionalArgs, namedArgs) = 
args.span(!_.isInstanceOf[NamedArgumentExpression])
+    val namedParameters: Seq[NamedArgument] = 
parameters.drop(positionalArgs.size)
+
+    // The following loop checks for the following:
+    // 1. Unrecognized parameter names
+    // 2. Duplicate routine parameter assignments
+    val allParameterNames: Seq[String] = parameters.map(_.name)
+    val parameterNamesSet: Set[String] = allParameterNames.toSet
+    val positionalParametersSet = 
allParameterNames.take(positionalArgs.size).toSet
+    val namedParametersSet = collection.mutable.Set[String]()
+
+    for (arg <- namedArgs) {
+      arg match {
+        case namedArg: NamedArgumentExpression =>
+          val parameterName = namedArg.key
+          if (!parameterNamesSet.contains(parameterName)) {
+            throw 
QueryCompilationErrors.unrecognizedParameterName(functionName, namedArg.key,
+              parameterNamesSet.toSeq)
+          }
+          if (positionalParametersSet.contains(parameterName)) {
+            throw 
QueryCompilationErrors.positionalAndNamedArgumentDoubleReference(
+              functionName, namedArg.key)
+          }
+          if (namedParametersSet.contains(parameterName)) {
+            throw QueryCompilationErrors.doubleNamedArgumentReference(
+              functionName, namedArg.key)
+          }
+          namedParametersSet.add(namedArg.key)
+        case _ =>
+          throw 
QueryCompilationErrors.unexpectedPositionalArgument(functionName)
+      }
+    }
+
+    // Check argument list size against provided parameter list length

Review Comment:
   ```suggestion
       // Check the argument list size against the provided parameter list 
length.
   ```



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/FunctionBuilderBase.scala:
##########
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.expressions.{Expression, 
NamedArgumentExpression}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+
+trait FunctionBuilderBase[T] {
+  /**
+   * A method that returns the signatures of overloads that are associated 
with this function.
+   * Each function signature includes a list of parameters to which the 
analyzer can
+   * compare a function call with provided arguments to determine if that 
function
+   * call is a match for the function signature.
+   *
+   * @return a list of function signatures

Review Comment:
   no need for this line since the above comment conveys that information.



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/FunctionBuilderBase.scala:
##########
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.expressions.{Expression, 
NamedArgumentExpression}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+
+trait FunctionBuilderBase[T] {

Review Comment:
   please add a general comment describing what this object represents (using 
full sentences). Same for `NamedArgumentsSupport` below.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to