dtenedor commented on code in PR #41864:
URL: https://github.com/apache/spark/pull/41864#discussion_r1253625316
##########
common/utils/src/main/resources/error/error-classes.json:
##########
@@ -674,6 +674,12 @@
],
"sqlState" : "23505"
},
+ "DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT" : {
+ "message" : [
+ "Call to function <functionName> is invalid because it includes multiple
argument assignments to the same name <parameterName>."
Review Comment:
```suggestion
"Call to function <functionName> is invalid because it includes
multiple argument assignments to the same parameter name <parameterName>."
```
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala:
##########
@@ -320,4 +321,18 @@ object Mask {
case _ => maskedChar(c, maskOther)
}
}
+ override def functionSignatures: Seq[FunctionSignature] = {
Review Comment:
```suggestion
override def functionSignatures: Seq[FunctionSignature] = {
```
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SupportsNamedArguments.scala:
##########
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.sql.catalyst.expressions.{Expression,
NamedArgumentExpression}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.AbstractDataType
+
+/**
+ * A general trait which is used to identify the DataType of the argument
+ */
+trait NamedArgumentType
+
+/**
+ * The standard case class used to represent a simple data type
+ *
+ * @param dataType The data type of some argument
+ */
+case class FixedArgumentType(dataType: AbstractDataType) extends
NamedArgumentType
+
+/**
+ * A named parameter
+ *
+ * @param name The name of the string.
+ * @param dataType The datatype of the argument.
+ * @param default The default value of the argument. If the default is none,
then that means the
+ * argument is required. If no argument is provided, an
exception is thrown.
+ */
+case class NamedArgument(
+ name: String,
+ dataType: NamedArgumentType,
+ default: Option[Expression] = None)
+
+/**
+ * Represents a method signature and the list of arguments it receives as
input.
+ * Currently, overloads are not supported and only one FunctionSignature is
allowed
+ * per function expression.
+ *
+ * @param parameters The list of arguments which the function takes
+ */
+case class FunctionSignature(parameters: Seq[NamedArgument])
+
+/**
+ * The class which companion objects of function expression implement to
+ * support named arguments for that function expression.
Review Comment:
Can you add a short example here as well?
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SupportsNamedArguments.scala:
##########
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.sql.catalyst.expressions.{Expression,
NamedArgumentExpression}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.AbstractDataType
+
+/**
+ * A general trait which is used to identify the DataType of the argument
+ */
+trait NamedArgumentType
+
+/**
+ * The standard case class used to represent a simple data type
+ *
+ * @param dataType The data type of some argument
+ */
+case class FixedArgumentType(dataType: AbstractDataType) extends
NamedArgumentType
+
+/**
+ * A named parameter
+ *
+ * @param name The name of the string.
+ * @param dataType The datatype of the argument.
+ * @param default The default value of the argument. If the default is none,
then that means the
+ * argument is required. If no argument is provided, an
exception is thrown.
+ */
+case class NamedArgument(
+ name: String,
+ dataType: NamedArgumentType,
+ default: Option[Expression] = None)
+
+/**
+ * Represents a method signature and the list of arguments it receives as
input.
+ * Currently, overloads are not supported and only one FunctionSignature is
allowed
+ * per function expression.
+ *
+ * @param parameters The list of arguments which the function takes
+ */
+case class FunctionSignature(parameters: Seq[NamedArgument])
+
+/**
+ * The class which companion objects of function expression implement to
+ * support named arguments for that function expression.
+ */
+abstract class SupportsNamedArguments {
+ /**
+ * This is the method overridden by function expressions to define their
method signatures.
+ * Currently, we don't support overloads, so we restrict each function
expression to return
+ * only one FunctionSignature.
+ *
+ * @return the signature of the function expression
+ */
+ def functionSignatures: Seq[FunctionSignature]
+
+ /**
+ * This function rearranges the list of expressions according to the
function signature
+ * It is recommended to use the provided version rearrange as it is
consistent with
+ * the SQL standard. If absolutely necessary the developer can choose to
override the default
+ * behavior for additional flexibility.
+ *
+ * @param functionSignature Function signature that denotes positional order
of arguments
+ * @param args The sequence of expressions from function invocation
+ * @param functionName The name of the function invoked for debugging
purposes
+ * @return positional order of arguments according to FunctionSignature
+ */
+ protected def rearrange(functionSignature: FunctionSignature,
Review Comment:
please start this on the next line with an indent of +4 spaces per the style
guide
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SupportsNamedArguments.scala:
##########
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.sql.catalyst.expressions.{Expression,
NamedArgumentExpression}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.AbstractDataType
+
+/**
+ * A general trait which is used to identify the DataType of the argument
+ */
+trait NamedArgumentType
+
+/**
+ * The standard case class used to represent a simple data type
+ *
+ * @param dataType The data type of some argument
+ */
+case class FixedArgumentType(dataType: AbstractDataType) extends
NamedArgumentType
+
+/**
+ * A named parameter
+ *
+ * @param name The name of the string.
+ * @param dataType The datatype of the argument.
+ * @param default The default value of the argument. If the default is none,
then that means the
+ * argument is required. If no argument is provided, an
exception is thrown.
+ */
+case class NamedArgument(
+ name: String,
+ dataType: NamedArgumentType,
+ default: Option[Expression] = None)
+
+/**
+ * Represents a method signature and the list of arguments it receives as
input.
+ * Currently, overloads are not supported and only one FunctionSignature is
allowed
+ * per function expression.
+ *
+ * @param parameters The list of arguments which the function takes
+ */
+case class FunctionSignature(parameters: Seq[NamedArgument])
+
+/**
+ * The class which companion objects of function expression implement to
+ * support named arguments for that function expression.
+ */
+abstract class SupportsNamedArguments {
+ /**
+ * This is the method overridden by function expressions to define their
method signatures.
+ * Currently, we don't support overloads, so we restrict each function
expression to return
+ * only one FunctionSignature.
+ *
+ * @return the signature of the function expression
+ */
+ def functionSignatures: Seq[FunctionSignature]
+
+ /**
+ * This function rearranges the list of expressions according to the
function signature
+ * It is recommended to use the provided version rearrange as it is
consistent with
+ * the SQL standard. If absolutely necessary the developer can choose to
override the default
+ * behavior for additional flexibility.
+ *
+ * @param functionSignature Function signature that denotes positional order
of arguments
+ * @param args The sequence of expressions from function invocation
+ * @param functionName The name of the function invoked for debugging
purposes
+ * @return positional order of arguments according to FunctionSignature
+ */
+ protected def rearrange(functionSignature: FunctionSignature,
+ args: Seq[Expression],
+ functionName: String): Seq[Expression] = {
+ SupportsNamedArguments.defaultRearrange(functionSignature, args,
functionName)
+ }
+}
+
+object SupportsNamedArguments {
+
+ /**
+ * Given a generic type, we check if the companion object of said type
exists.
+ * If that object extends the trait [[SupportsNamedArguments]], then we
rearrange
+ * the expressions in the order specified by the object.
+ *
+ * It is here we resubstitute [[Unevaluable]] [[NamedArgumentExpression]]s
with
+ * normal expressions. This method will produce an positional argument list
which
+ * is equivalent to the original argumnet list, except the expressions are
now
+ * fit for consumption by [[ResolveFunctions]]
+ *
+ * @param expressions The list of positional and named argument expressions
+ * @tparam T The actual expression class.
+ * @return positional argument list
+ */
+ final def getRearrangedExpressions[T <: Expression : ClassTag](
+ expressions: Seq[Expression], functionName: String): Seq[Expression] = {
+
+ if (!expressions.exists(_.isInstanceOf[NamedArgumentExpression])) {
+ return expressions
+ }
+
+ import scala.reflect.runtime.currentMirror
+
+ // This code heavily utilizes Scala reflection which is unfamiliar to most
developers.
+ // Here are the steps of this function:
+ // 1. Obtain the module symbol for the companion object of the function
expression.
+ // 2. Obtain the module class symbol that represents the companion object.
+ // 3. Check if the base classes of the module class symbol contains
SupportsNamedArguments.
+ // This checks if the companion object is an implementor of
SupportsNamedArguments.
+ // 4. Check if the module class symbol is a top level object. Reflection
is unable to
+ // obtain a companion object instance if it is member of some enclosing
class unless
+ // instance of said enclosing class is provided which we do not have.
+ // 5. Use reflection to obtain instance of companion object and perform
immediate cast to
+ // SupportsNamedArguments as it is already verified the cast is safe.
+ // 6. Obtain function signature and rearrange expression according to the
given signature.
+ val runtimeClass = scala.reflect.classTag[T].runtimeClass
+ val targetModuleSymbol = currentMirror.classSymbol(runtimeClass).companion
+ val parentClass =
scala.reflect.classTag[SupportsNamedArguments].runtimeClass
+ val parentSymbol = currentMirror.classSymbol(parentClass)
+
+ targetModuleSymbol match {
+ case scala.reflect.runtime.universe.NoSymbol =>
Review Comment:
you can just check `if
(targetModuleSymbol.isInstanceOf[scala.reflect.runtime.universe.NoSymbol])` and
throw the exception in that case, removing the `case _` and de-denting the rest
of the block.
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SupportsNamedArguments.scala:
##########
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.sql.catalyst.expressions.{Expression,
NamedArgumentExpression}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.AbstractDataType
+
+/**
+ * A general trait which is used to identify the DataType of the argument
+ */
+trait NamedArgumentType
+
+/**
+ * The standard case class used to represent a simple data type
+ *
+ * @param dataType The data type of some argument
+ */
+case class FixedArgumentType(dataType: AbstractDataType) extends
NamedArgumentType
+
+/**
+ * A named parameter
+ *
+ * @param name The name of the string.
+ * @param dataType The datatype of the argument.
+ * @param default The default value of the argument. If the default is none,
then that means the
+ * argument is required. If no argument is provided, an
exception is thrown.
+ */
+case class NamedArgument(
+ name: String,
+ dataType: NamedArgumentType,
+ default: Option[Expression] = None)
+
+/**
+ * Represents a method signature and the list of arguments it receives as
input.
+ * Currently, overloads are not supported and only one FunctionSignature is
allowed
+ * per function expression.
+ *
+ * @param parameters The list of arguments which the function takes
+ */
+case class FunctionSignature(parameters: Seq[NamedArgument])
+
+/**
+ * The class which companion objects of function expression implement to
+ * support named arguments for that function expression.
+ */
+abstract class SupportsNamedArguments {
+ /**
+ * This is the method overridden by function expressions to define their
method signatures.
+ * Currently, we don't support overloads, so we restrict each function
expression to return
+ * only one FunctionSignature.
+ *
+ * @return the signature of the function expression
+ */
+ def functionSignatures: Seq[FunctionSignature]
+
+ /**
+ * This function rearranges the list of expressions according to the
function signature
+ * It is recommended to use the provided version rearrange as it is
consistent with
+ * the SQL standard. If absolutely necessary the developer can choose to
override the default
+ * behavior for additional flexibility.
+ *
+ * @param functionSignature Function signature that denotes positional order
of arguments
+ * @param args The sequence of expressions from function invocation
+ * @param functionName The name of the function invoked for debugging
purposes
+ * @return positional order of arguments according to FunctionSignature
+ */
+ protected def rearrange(functionSignature: FunctionSignature,
+ args: Seq[Expression],
+ functionName: String): Seq[Expression] = {
+ SupportsNamedArguments.defaultRearrange(functionSignature, args,
functionName)
+ }
+}
+
+object SupportsNamedArguments {
+
+ /**
+ * Given a generic type, we check if the companion object of said type
exists.
+ * If that object extends the trait [[SupportsNamedArguments]], then we
rearrange
+ * the expressions in the order specified by the object.
+ *
+ * It is here we resubstitute [[Unevaluable]] [[NamedArgumentExpression]]s
with
+ * normal expressions. This method will produce an positional argument list
which
+ * is equivalent to the original argumnet list, except the expressions are
now
+ * fit for consumption by [[ResolveFunctions]]
+ *
+ * @param expressions The list of positional and named argument expressions
+ * @tparam T The actual expression class.
+ * @return positional argument list
+ */
+ final def getRearrangedExpressions[T <: Expression : ClassTag](
+ expressions: Seq[Expression], functionName: String): Seq[Expression] = {
+
+ if (!expressions.exists(_.isInstanceOf[NamedArgumentExpression])) {
+ return expressions
+ }
+
+ import scala.reflect.runtime.currentMirror
+
+ // This code heavily utilizes Scala reflection which is unfamiliar to most
developers.
+ // Here are the steps of this function:
+ // 1. Obtain the module symbol for the companion object of the function
expression.
+ // 2. Obtain the module class symbol that represents the companion object.
+ // 3. Check if the base classes of the module class symbol contains
SupportsNamedArguments.
+ // This checks if the companion object is an implementor of
SupportsNamedArguments.
+ // 4. Check if the module class symbol is a top level object. Reflection
is unable to
+ // obtain a companion object instance if it is member of some enclosing
class unless
+ // instance of said enclosing class is provided which we do not have.
+ // 5. Use reflection to obtain instance of companion object and perform
immediate cast to
+ // SupportsNamedArguments as it is already verified the cast is safe.
+ // 6. Obtain function signature and rearrange expression according to the
given signature.
+ val runtimeClass = scala.reflect.classTag[T].runtimeClass
+ val targetModuleSymbol = currentMirror.classSymbol(runtimeClass).companion
+ val parentClass =
scala.reflect.classTag[SupportsNamedArguments].runtimeClass
+ val parentSymbol = currentMirror.classSymbol(parentClass)
+
+ targetModuleSymbol match {
+ case scala.reflect.runtime.universe.NoSymbol =>
+ throw QueryCompilationErrors.namedArgumentsNotSupported(functionName)
+ case _ =>
+ val moduleClassSymbol = targetModuleSymbol.asModule.moduleClass.asClass
+ if (moduleClassSymbol.baseClasses.contains(parentSymbol)) {
+ if (currentMirror.runtimeClass(moduleClassSymbol).getEnclosingClass
!= null) {
+ throw
QueryCompilationErrors.cannotObtainCompanionObjectInstance(functionName)
+ }
+ val instance =
currentMirror.reflectModule(targetModuleSymbol.asModule)
+ .instance.asInstanceOf[SupportsNamedArguments]
+ if (instance.functionSignatures.size != 1) {
+ throw QueryCompilationErrors.multipleFunctionSignatures(
+ functionName, instance.functionSignatures)
+ }
+ instance.rearrange(instance.functionSignatures.head, expressions,
functionName)
+ } else {
+ throw QueryCompilationErrors.namedArgumentsNotSupported(functionName)
+ }
+ }
+ }
+
+ // Exposed for testing
+ final def defaultRearrange(functionSignature: FunctionSignature,
+ args: Seq[Expression],
+ functionName: String): Seq[Expression] = {
+ val parameters = functionSignature.parameters
Review Comment:
please add explicit types for these `val`s for better readability?
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala:
##########
@@ -50,6 +50,65 @@ import org.apache.spark.sql.types._
*/
private[sql] object QueryCompilationErrors extends QueryErrorsBase {
+ def cannotObtainCompanionObjectInstance(functionName: String): Throwable = {
+ SparkException.internalError(s"Cannot obtain companion object for " +
+ s"function expression: $functionName. Companion must be top-level
object.")
Review Comment:
```suggestion
s"function expression: $functionName. Please note that this companion
must be a top-level object.")
```
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SupportsNamedArguments.scala:
##########
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.sql.catalyst.expressions.{Expression,
NamedArgumentExpression}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.AbstractDataType
+
+/**
+ * A general trait which is used to identify the DataType of the argument
+ */
+trait NamedArgumentType
+
+/**
+ * The standard case class used to represent a simple data type
+ *
+ * @param dataType The data type of some argument
+ */
+case class FixedArgumentType(dataType: AbstractDataType) extends
NamedArgumentType
+
+/**
+ * A named parameter
+ *
+ * @param name The name of the string.
+ * @param dataType The datatype of the argument.
+ * @param default The default value of the argument. If the default is none,
then that means the
+ * argument is required. If no argument is provided, an
exception is thrown.
+ */
+case class NamedArgument(
+ name: String,
+ dataType: NamedArgumentType,
+ default: Option[Expression] = None)
+
+/**
+ * Represents a method signature and the list of arguments it receives as
input.
+ * Currently, overloads are not supported and only one FunctionSignature is
allowed
+ * per function expression.
+ *
+ * @param parameters The list of arguments which the function takes
+ */
+case class FunctionSignature(parameters: Seq[NamedArgument])
+
+/**
+ * The class which companion objects of function expression implement to
+ * support named arguments for that function expression.
+ */
+abstract class SupportsNamedArguments {
+ /**
+ * This is the method overridden by function expressions to define their
method signatures.
+ * Currently, we don't support overloads, so we restrict each function
expression to return
+ * only one FunctionSignature.
+ *
+ * @return the signature of the function expression
+ */
+ def functionSignatures: Seq[FunctionSignature]
+
+ /**
+ * This function rearranges the list of expressions according to the
function signature
+ * It is recommended to use the provided version rearrange as it is
consistent with
+ * the SQL standard. If absolutely necessary the developer can choose to
override the default
+ * behavior for additional flexibility.
+ *
+ * @param functionSignature Function signature that denotes positional order
of arguments
+ * @param args The sequence of expressions from function invocation
+ * @param functionName The name of the function invoked for debugging
purposes
+ * @return positional order of arguments according to FunctionSignature
+ */
+ protected def rearrange(functionSignature: FunctionSignature,
+ args: Seq[Expression],
+ functionName: String): Seq[Expression] = {
+ SupportsNamedArguments.defaultRearrange(functionSignature, args,
functionName)
+ }
+}
+
+object SupportsNamedArguments {
+
+ /**
+ * Given a generic type, we check if the companion object of said type
exists.
+ * If that object extends the trait [[SupportsNamedArguments]], then we
rearrange
+ * the expressions in the order specified by the object.
+ *
+ * It is here we resubstitute [[Unevaluable]] [[NamedArgumentExpression]]s
with
+ * normal expressions. This method will produce an positional argument list
which
+ * is equivalent to the original argumnet list, except the expressions are
now
+ * fit for consumption by [[ResolveFunctions]]
+ *
+ * @param expressions The list of positional and named argument expressions
+ * @tparam T The actual expression class.
+ * @return positional argument list
+ */
+ final def getRearrangedExpressions[T <: Expression : ClassTag](
+ expressions: Seq[Expression], functionName: String): Seq[Expression] = {
+
+ if (!expressions.exists(_.isInstanceOf[NamedArgumentExpression])) {
+ return expressions
+ }
+
+ import scala.reflect.runtime.currentMirror
+
+ // This code heavily utilizes Scala reflection which is unfamiliar to most
developers.
+ // Here are the steps of this function:
+ // 1. Obtain the module symbol for the companion object of the function
expression.
+ // 2. Obtain the module class symbol that represents the companion object.
+ // 3. Check if the base classes of the module class symbol contains
SupportsNamedArguments.
+ // This checks if the companion object is an implementor of
SupportsNamedArguments.
+ // 4. Check if the module class symbol is a top level object. Reflection
is unable to
+ // obtain a companion object instance if it is member of some enclosing
class unless
+ // instance of said enclosing class is provided which we do not have.
+ // 5. Use reflection to obtain instance of companion object and perform
immediate cast to
+ // SupportsNamedArguments as it is already verified the cast is safe.
+ // 6. Obtain function signature and rearrange expression according to the
given signature.
+ val runtimeClass = scala.reflect.classTag[T].runtimeClass
+ val targetModuleSymbol = currentMirror.classSymbol(runtimeClass).companion
+ val parentClass =
scala.reflect.classTag[SupportsNamedArguments].runtimeClass
+ val parentSymbol = currentMirror.classSymbol(parentClass)
+
+ targetModuleSymbol match {
+ case scala.reflect.runtime.universe.NoSymbol =>
+ throw QueryCompilationErrors.namedArgumentsNotSupported(functionName)
+ case _ =>
+ val moduleClassSymbol = targetModuleSymbol.asModule.moduleClass.asClass
+ if (moduleClassSymbol.baseClasses.contains(parentSymbol)) {
+ if (currentMirror.runtimeClass(moduleClassSymbol).getEnclosingClass
!= null) {
+ throw
QueryCompilationErrors.cannotObtainCompanionObjectInstance(functionName)
+ }
+ val instance =
currentMirror.reflectModule(targetModuleSymbol.asModule)
+ .instance.asInstanceOf[SupportsNamedArguments]
+ if (instance.functionSignatures.size != 1) {
+ throw QueryCompilationErrors.multipleFunctionSignatures(
+ functionName, instance.functionSignatures)
+ }
+ instance.rearrange(instance.functionSignatures.head, expressions,
functionName)
+ } else {
+ throw QueryCompilationErrors.namedArgumentsNotSupported(functionName)
+ }
+ }
+ }
+
+ // Exposed for testing
+ final def defaultRearrange(functionSignature: FunctionSignature,
+ args: Seq[Expression],
+ functionName: String): Seq[Expression] = {
+ val parameters = functionSignature.parameters
+ val firstNamedArgIdx =
args.indexWhere(_.isInstanceOf[NamedArgumentExpression])
+ val (positionalArgs, namedArgs) = args.splitAt(firstNamedArgIdx)
+ val namedParameters = parameters.drop(positionalArgs.size)
+
+ // Performing some checking to ensure valid argument list
+ val allParameterNames: Seq[String] = parameters.map(_.name)
+ val parameterNamesSet: Set[String] = allParameterNames.toSet
+ val assignedParameterSet = collection.mutable.Set[String](
+ allParameterNames.take(positionalArgs.size): _*)
+ for (arg <- namedArgs) {
+ arg match {
+ case namedArg: NamedArgumentExpression =>
+ if (assignedParameterSet.contains(namedArg.key)) {
+ throw QueryCompilationErrors.duplicateRoutineParameterAssignment(
+ functionName, namedArg.key)
+ }
+ if (!parameterNamesSet.contains(namedArg.key)) {
+ throw
QueryCompilationErrors.unrecognizedParameterName(functionName, namedArg.key)
+ }
+ assignedParameterSet.add(namedArg.key)
+ case _ =>
+ throw
QueryCompilationErrors.unexpectedPositionalArgument(functionName)
+ }
+ }
+
+ // Construct a map from argument name to value for argument rearrangement
+ val namedArgMap = namedArgs.map { arg =>
+ val namedArg = arg.asInstanceOf[NamedArgumentExpression]
+ namedArg.key -> namedArg.value
+ }.toMap
+
+ // Rearrange named arguments to match their positional order
+ val rearrangedNamedArgs = namedParameters.map { param =>
+ namedArgMap.getOrElse(
+ param.name,
+ if (param.default.isEmpty) {
+ throw QueryCompilationErrors.requiredParameterNotFound(functionName,
param.name)
+ } else {
Review Comment:
you can drop the `else` here since you throw an exception above.
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SupportsNamedArguments.scala:
##########
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.sql.catalyst.expressions.{Expression,
NamedArgumentExpression}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.AbstractDataType
+
+/**
+ * A general trait which is used to identify the DataType of the argument
+ */
+trait NamedArgumentType
+
+/**
+ * The standard case class used to represent a simple data type
+ *
+ * @param dataType The data type of some argument
+ */
+case class FixedArgumentType(dataType: AbstractDataType) extends
NamedArgumentType
+
+/**
+ * A named parameter
+ *
+ * @param name The name of the string.
+ * @param dataType The datatype of the argument.
+ * @param default The default value of the argument. If the default is none,
then that means the
+ * argument is required. If no argument is provided, an
exception is thrown.
+ */
+case class NamedArgument(
Review Comment:
Let's put this class definition first in the file, with the
`NamedArgumentType` and `FixedArgumentType` following, so the reader sees the
former first (object definitions in Scala may appear in any order).
Also, can you please update `A named parameter` in the class comment to
something more descriptive, indicating how expression classes that implement
SQL functions are supposed to use this?
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala:
##########
@@ -208,3 +209,13 @@ case class CountMinSketchAgg(
confidenceExpression = third,
seedExpression = fourth)
}
+object CountMinSketchAgg extends SupportsNamedArguments {
Review Comment:
```suggestion
object CountMinSketchAgg extends SupportsNamedArguments {
```
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SupportsNamedArguments.scala:
##########
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.sql.catalyst.expressions.{Expression,
NamedArgumentExpression}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.AbstractDataType
+
+/**
+ * A general trait which is used to identify the DataType of the argument
+ */
+trait NamedArgumentType
+
+/**
+ * The standard case class used to represent a simple data type
+ *
+ * @param dataType The data type of some argument
+ */
+case class FixedArgumentType(dataType: AbstractDataType) extends
NamedArgumentType
+
+/**
+ * A named parameter
+ *
+ * @param name The name of the string.
+ * @param dataType The datatype of the argument.
+ * @param default The default value of the argument. If the default is none,
then that means the
+ * argument is required. If no argument is provided, an
exception is thrown.
+ */
+case class NamedArgument(
+ name: String,
+ dataType: NamedArgumentType,
+ default: Option[Expression] = None)
+
+/**
+ * Represents a method signature and the list of arguments it receives as
input.
+ * Currently, overloads are not supported and only one FunctionSignature is
allowed
+ * per function expression.
+ *
+ * @param parameters The list of arguments which the function takes
+ */
+case class FunctionSignature(parameters: Seq[NamedArgument])
+
+/**
+ * The class which companion objects of function expression implement to
Review Comment:
```suggestion
* The class which companion objects of function expression may implement to
```
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SupportsNamedArguments.scala:
##########
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.sql.catalyst.expressions.{Expression,
NamedArgumentExpression}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.AbstractDataType
+
+/**
+ * A general trait which is used to identify the DataType of the argument
+ */
+trait NamedArgumentType
+
+/**
+ * The standard case class used to represent a simple data type
+ *
+ * @param dataType The data type of some argument
+ */
+case class FixedArgumentType(dataType: AbstractDataType) extends
NamedArgumentType
+
+/**
+ * A named parameter
+ *
+ * @param name The name of the string.
+ * @param dataType The datatype of the argument.
+ * @param default The default value of the argument. If the default is none,
then that means the
+ * argument is required. If no argument is provided, an
exception is thrown.
+ */
+case class NamedArgument(
+ name: String,
+ dataType: NamedArgumentType,
+ default: Option[Expression] = None)
+
+/**
+ * Represents a method signature and the list of arguments it receives as
input.
+ * Currently, overloads are not supported and only one FunctionSignature is
allowed
+ * per function expression.
+ *
+ * @param parameters The list of arguments which the function takes
+ */
+case class FunctionSignature(parameters: Seq[NamedArgument])
+
+/**
+ * The class which companion objects of function expression implement to
+ * support named arguments for that function expression.
+ */
+abstract class SupportsNamedArguments {
+ /**
+ * This is the method overridden by function expressions to define their
method signatures.
+ * Currently, we don't support overloads, so we restrict each function
expression to return
+ * only one FunctionSignature.
+ *
+ * @return the signature of the function expression
+ */
+ def functionSignatures: Seq[FunctionSignature]
+
+ /**
+ * This function rearranges the list of expressions according to the
function signature
+ * It is recommended to use the provided version rearrange as it is
consistent with
Review Comment:
```suggestion
* It is recommended to use this provided implementation as it is
consistent with
```
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SupportsNamedArguments.scala:
##########
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.sql.catalyst.expressions.{Expression,
NamedArgumentExpression}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.AbstractDataType
+
+/**
+ * A general trait which is used to identify the DataType of the argument
+ */
+trait NamedArgumentType
+
+/**
+ * The standard case class used to represent a simple data type
Review Comment:
```suggestion
* Represents a named argument that expects a scalar value of one specific
DataType.
```
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SupportsNamedArguments.scala:
##########
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.sql.catalyst.expressions.{Expression,
NamedArgumentExpression}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.AbstractDataType
+
+/**
+ * A general trait which is used to identify the DataType of the argument
Review Comment:
```suggestion
* Identifies which forms of provided argument values are expected for each
* call to the associated SQL function.
```
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SupportsNamedArguments.scala:
##########
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.sql.catalyst.expressions.{Expression,
NamedArgumentExpression}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.AbstractDataType
+
+/**
+ * A general trait which is used to identify the DataType of the argument
+ */
+trait NamedArgumentType
+
+/**
+ * The standard case class used to represent a simple data type
+ *
+ * @param dataType The data type of some argument
+ */
+case class FixedArgumentType(dataType: AbstractDataType) extends
NamedArgumentType
+
+/**
+ * A named parameter
+ *
+ * @param name The name of the string.
+ * @param dataType The datatype of the argument.
+ * @param default The default value of the argument. If the default is none,
then that means the
+ * argument is required. If no argument is provided, an
exception is thrown.
+ */
+case class NamedArgument(
+ name: String,
+ dataType: NamedArgumentType,
+ default: Option[Expression] = None)
+
+/**
+ * Represents a method signature and the list of arguments it receives as
input.
+ * Currently, overloads are not supported and only one FunctionSignature is
allowed
+ * per function expression.
+ *
+ * @param parameters The list of arguments which the function takes
+ */
+case class FunctionSignature(parameters: Seq[NamedArgument])
+
+/**
+ * The class which companion objects of function expression implement to
+ * support named arguments for that function expression.
+ */
+abstract class SupportsNamedArguments {
+ /**
+ * This is the method overridden by function expressions to define their
method signatures.
+ * Currently, we don't support overloads, so we restrict each function
expression to return
+ * only one FunctionSignature.
+ *
+ * @return the signature of the function expression
+ */
+ def functionSignatures: Seq[FunctionSignature]
+
+ /**
+ * This function rearranges the list of expressions according to the
function signature
+ * It is recommended to use the provided version rearrange as it is
consistent with
+ * the SQL standard. If absolutely necessary the developer can choose to
override the default
+ * behavior for additional flexibility.
+ *
+ * @param functionSignature Function signature that denotes positional order
of arguments
+ * @param args The sequence of expressions from function invocation
+ * @param functionName The name of the function invoked for debugging
purposes
+ * @return positional order of arguments according to FunctionSignature
+ */
+ protected def rearrange(functionSignature: FunctionSignature,
+ args: Seq[Expression],
+ functionName: String): Seq[Expression] = {
+ SupportsNamedArguments.defaultRearrange(functionSignature, args,
functionName)
+ }
+}
+
+object SupportsNamedArguments {
+
+ /**
+ * Given a generic type, we check if the companion object of said type
exists.
+ * If that object extends the trait [[SupportsNamedArguments]], then we
rearrange
+ * the expressions in the order specified by the object.
+ *
+ * It is here we resubstitute [[Unevaluable]] [[NamedArgumentExpression]]s
with
+ * normal expressions. This method will produce an positional argument list
which
+ * is equivalent to the original argumnet list, except the expressions are
now
+ * fit for consumption by [[ResolveFunctions]]
+ *
+ * @param expressions The list of positional and named argument expressions
+ * @tparam T The actual expression class.
+ * @return positional argument list
+ */
+ final def getRearrangedExpressions[T <: Expression : ClassTag](
+ expressions: Seq[Expression], functionName: String): Seq[Expression] = {
+
+ if (!expressions.exists(_.isInstanceOf[NamedArgumentExpression])) {
+ return expressions
+ }
+
+ import scala.reflect.runtime.currentMirror
+
+ // This code heavily utilizes Scala reflection which is unfamiliar to most
developers.
+ // Here are the steps of this function:
+ // 1. Obtain the module symbol for the companion object of the function
expression.
+ // 2. Obtain the module class symbol that represents the companion object.
+ // 3. Check if the base classes of the module class symbol contains
SupportsNamedArguments.
+ // This checks if the companion object is an implementor of
SupportsNamedArguments.
+ // 4. Check if the module class symbol is a top level object. Reflection
is unable to
+ // obtain a companion object instance if it is member of some enclosing
class unless
+ // instance of said enclosing class is provided which we do not have.
+ // 5. Use reflection to obtain instance of companion object and perform
immediate cast to
+ // SupportsNamedArguments as it is already verified the cast is safe.
+ // 6. Obtain function signature and rearrange expression according to the
given signature.
+ val runtimeClass = scala.reflect.classTag[T].runtimeClass
+ val targetModuleSymbol = currentMirror.classSymbol(runtimeClass).companion
+ val parentClass =
scala.reflect.classTag[SupportsNamedArguments].runtimeClass
+ val parentSymbol = currentMirror.classSymbol(parentClass)
+
+ targetModuleSymbol match {
+ case scala.reflect.runtime.universe.NoSymbol =>
+ throw QueryCompilationErrors.namedArgumentsNotSupported(functionName)
+ case _ =>
+ val moduleClassSymbol = targetModuleSymbol.asModule.moduleClass.asClass
+ if (moduleClassSymbol.baseClasses.contains(parentSymbol)) {
Review Comment:
same, you can reverse the logic and throw the exception, removing the `else`
clause and de-denting the rest of the block.
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SupportsNamedArguments.scala:
##########
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.sql.catalyst.expressions.{Expression,
NamedArgumentExpression}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.AbstractDataType
+
+/**
+ * A general trait which is used to identify the DataType of the argument
+ */
+trait NamedArgumentType
+
+/**
+ * The standard case class used to represent a simple data type
+ *
+ * @param dataType The data type of some argument
+ */
+case class FixedArgumentType(dataType: AbstractDataType) extends
NamedArgumentType
+
+/**
+ * A named parameter
+ *
+ * @param name The name of the string.
+ * @param dataType The datatype of the argument.
+ * @param default The default value of the argument. If the default is none,
then that means the
+ * argument is required. If no argument is provided, an
exception is thrown.
+ */
+case class NamedArgument(
+ name: String,
+ dataType: NamedArgumentType,
+ default: Option[Expression] = None)
+
+/**
+ * Represents a method signature and the list of arguments it receives as
input.
+ * Currently, overloads are not supported and only one FunctionSignature is
allowed
+ * per function expression.
+ *
+ * @param parameters The list of arguments which the function takes
+ */
+case class FunctionSignature(parameters: Seq[NamedArgument])
+
+/**
+ * The class which companion objects of function expression implement to
+ * support named arguments for that function expression.
+ */
+abstract class SupportsNamedArguments {
+ /**
+ * This is the method overridden by function expressions to define their
method signatures.
+ * Currently, we don't support overloads, so we restrict each function
expression to return
+ * only one FunctionSignature.
+ *
+ * @return the signature of the function expression
+ */
+ def functionSignatures: Seq[FunctionSignature]
+
+ /**
+ * This function rearranges the list of expressions according to the
function signature
+ * It is recommended to use the provided version rearrange as it is
consistent with
+ * the SQL standard. If absolutely necessary the developer can choose to
override the default
+ * behavior for additional flexibility.
+ *
+ * @param functionSignature Function signature that denotes positional order
of arguments
+ * @param args The sequence of expressions from function invocation
+ * @param functionName The name of the function invoked for debugging
purposes
+ * @return positional order of arguments according to FunctionSignature
Review Comment:
```suggestion
* @return positional order of arguments according to FunctionSignature
obtained
* by changing the order of the above provided arguments
```
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SupportsNamedArguments.scala:
##########
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.sql.catalyst.expressions.{Expression,
NamedArgumentExpression}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.AbstractDataType
+
+/**
+ * A general trait which is used to identify the DataType of the argument
+ */
+trait NamedArgumentType
+
+/**
+ * The standard case class used to represent a simple data type
+ *
+ * @param dataType The data type of some argument
+ */
+case class FixedArgumentType(dataType: AbstractDataType) extends
NamedArgumentType
+
+/**
+ * A named parameter
+ *
+ * @param name The name of the string.
+ * @param dataType The datatype of the argument.
+ * @param default The default value of the argument. If the default is none,
then that means the
+ * argument is required. If no argument is provided, an
exception is thrown.
+ */
+case class NamedArgument(
+ name: String,
+ dataType: NamedArgumentType,
+ default: Option[Expression] = None)
+
+/**
+ * Represents a method signature and the list of arguments it receives as
input.
+ * Currently, overloads are not supported and only one FunctionSignature is
allowed
+ * per function expression.
+ *
+ * @param parameters The list of arguments which the function takes
+ */
+case class FunctionSignature(parameters: Seq[NamedArgument])
+
+/**
+ * The class which companion objects of function expression implement to
+ * support named arguments for that function expression.
+ */
+abstract class SupportsNamedArguments {
+ /**
+ * This is the method overridden by function expressions to define their
method signatures.
+ * Currently, we don't support overloads, so we restrict each function
expression to return
+ * only one FunctionSignature.
+ *
+ * @return the signature of the function expression
+ */
+ def functionSignatures: Seq[FunctionSignature]
+
+ /**
+ * This function rearranges the list of expressions according to the
function signature
+ * It is recommended to use the provided version rearrange as it is
consistent with
+ * the SQL standard. If absolutely necessary the developer can choose to
override the default
+ * behavior for additional flexibility.
+ *
+ * @param functionSignature Function signature that denotes positional order
of arguments
Review Comment:
Since it can be confusing after a while to read code in this area that deals
with a lot of expected function signatures and provided arguments, I suggest to
rename the method arguments to clarify, e.g. expectedSignature,
providedArguments, functionName.
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SupportsNamedArguments.scala:
##########
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.sql.catalyst.expressions.{Expression,
NamedArgumentExpression}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.AbstractDataType
+
+/**
+ * A general trait which is used to identify the DataType of the argument
+ */
+trait NamedArgumentType
+
+/**
+ * The standard case class used to represent a simple data type
+ *
+ * @param dataType The data type of some argument
+ */
+case class FixedArgumentType(dataType: AbstractDataType) extends
NamedArgumentType
+
+/**
+ * A named parameter
+ *
+ * @param name The name of the string.
+ * @param dataType The datatype of the argument.
+ * @param default The default value of the argument. If the default is none,
then that means the
+ * argument is required. If no argument is provided, an
exception is thrown.
+ */
+case class NamedArgument(
+ name: String,
+ dataType: NamedArgumentType,
+ default: Option[Expression] = None)
+
+/**
+ * Represents a method signature and the list of arguments it receives as
input.
+ * Currently, overloads are not supported and only one FunctionSignature is
allowed
+ * per function expression.
+ *
+ * @param parameters The list of arguments which the function takes
+ */
+case class FunctionSignature(parameters: Seq[NamedArgument])
+
+/**
+ * The class which companion objects of function expression implement to
+ * support named arguments for that function expression.
+ */
+abstract class SupportsNamedArguments {
+ /**
+ * This is the method overridden by function expressions to define their
method signatures.
+ * Currently, we don't support overloads, so we restrict each function
expression to return
+ * only one FunctionSignature.
+ *
+ * @return the signature of the function expression
+ */
+ def functionSignatures: Seq[FunctionSignature]
+
+ /**
+ * This function rearranges the list of expressions according to the
function signature
+ * It is recommended to use the provided version rearrange as it is
consistent with
+ * the SQL standard. If absolutely necessary the developer can choose to
override the default
+ * behavior for additional flexibility.
+ *
+ * @param functionSignature Function signature that denotes positional order
of arguments
+ * @param args The sequence of expressions from function invocation
+ * @param functionName The name of the function invoked for debugging
purposes
+ * @return positional order of arguments according to FunctionSignature
+ */
+ protected def rearrange(functionSignature: FunctionSignature,
+ args: Seq[Expression],
+ functionName: String): Seq[Expression] = {
+ SupportsNamedArguments.defaultRearrange(functionSignature, args,
functionName)
+ }
+}
+
+object SupportsNamedArguments {
+
+ /**
+ * Given a generic type, we check if the companion object of said type
exists.
+ * If that object extends the trait [[SupportsNamedArguments]], then we
rearrange
+ * the expressions in the order specified by the object.
+ *
+ * It is here we resubstitute [[Unevaluable]] [[NamedArgumentExpression]]s
with
+ * normal expressions. This method will produce an positional argument list
which
+ * is equivalent to the original argumnet list, except the expressions are
now
+ * fit for consumption by [[ResolveFunctions]]
+ *
+ * @param expressions The list of positional and named argument expressions
+ * @tparam T The actual expression class.
+ * @return positional argument list
+ */
+ final def getRearrangedExpressions[T <: Expression : ClassTag](
+ expressions: Seq[Expression], functionName: String): Seq[Expression] = {
+
+ if (!expressions.exists(_.isInstanceOf[NamedArgumentExpression])) {
+ return expressions
+ }
+
+ import scala.reflect.runtime.currentMirror
+
+ // This code heavily utilizes Scala reflection which is unfamiliar to most
developers.
+ // Here are the steps of this function:
+ // 1. Obtain the module symbol for the companion object of the function
expression.
+ // 2. Obtain the module class symbol that represents the companion object.
+ // 3. Check if the base classes of the module class symbol contains
SupportsNamedArguments.
+ // This checks if the companion object is an implementor of
SupportsNamedArguments.
+ // 4. Check if the module class symbol is a top level object. Reflection
is unable to
+ // obtain a companion object instance if it is member of some enclosing
class unless
+ // instance of said enclosing class is provided which we do not have.
+ // 5. Use reflection to obtain instance of companion object and perform
immediate cast to
+ // SupportsNamedArguments as it is already verified the cast is safe.
+ // 6. Obtain function signature and rearrange expression according to the
given signature.
+ val runtimeClass = scala.reflect.classTag[T].runtimeClass
+ val targetModuleSymbol = currentMirror.classSymbol(runtimeClass).companion
+ val parentClass =
scala.reflect.classTag[SupportsNamedArguments].runtimeClass
+ val parentSymbol = currentMirror.classSymbol(parentClass)
+
+ targetModuleSymbol match {
+ case scala.reflect.runtime.universe.NoSymbol =>
+ throw QueryCompilationErrors.namedArgumentsNotSupported(functionName)
+ case _ =>
+ val moduleClassSymbol = targetModuleSymbol.asModule.moduleClass.asClass
+ if (moduleClassSymbol.baseClasses.contains(parentSymbol)) {
+ if (currentMirror.runtimeClass(moduleClassSymbol).getEnclosingClass
!= null) {
+ throw
QueryCompilationErrors.cannotObtainCompanionObjectInstance(functionName)
+ }
+ val instance =
currentMirror.reflectModule(targetModuleSymbol.asModule)
+ .instance.asInstanceOf[SupportsNamedArguments]
+ if (instance.functionSignatures.size != 1) {
+ throw QueryCompilationErrors.multipleFunctionSignatures(
+ functionName, instance.functionSignatures)
+ }
+ instance.rearrange(instance.functionSignatures.head, expressions,
functionName)
+ } else {
+ throw QueryCompilationErrors.namedArgumentsNotSupported(functionName)
+ }
+ }
+ }
+
+ // Exposed for testing
Review Comment:
you can just delete this line, it is OK for this method to be public in case
others want to use it later
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SupportsNamedArguments.scala:
##########
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.sql.catalyst.expressions.{Expression,
NamedArgumentExpression}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.AbstractDataType
+
+/**
+ * A general trait which is used to identify the DataType of the argument
+ */
+trait NamedArgumentType
+
+/**
+ * The standard case class used to represent a simple data type
+ *
+ * @param dataType The data type of some argument
+ */
+case class FixedArgumentType(dataType: AbstractDataType) extends
NamedArgumentType
+
+/**
+ * A named parameter
+ *
+ * @param name The name of the string.
+ * @param dataType The datatype of the argument.
+ * @param default The default value of the argument. If the default is none,
then that means the
+ * argument is required. If no argument is provided, an
exception is thrown.
+ */
+case class NamedArgument(
+ name: String,
+ dataType: NamedArgumentType,
+ default: Option[Expression] = None)
+
+/**
+ * Represents a method signature and the list of arguments it receives as
input.
+ * Currently, overloads are not supported and only one FunctionSignature is
allowed
+ * per function expression.
+ *
+ * @param parameters The list of arguments which the function takes
+ */
+case class FunctionSignature(parameters: Seq[NamedArgument])
+
+/**
+ * The class which companion objects of function expression implement to
+ * support named arguments for that function expression.
+ */
+abstract class SupportsNamedArguments {
+ /**
+ * This is the method overridden by function expressions to define their
method signatures.
+ * Currently, we don't support overloads, so we restrict each function
expression to return
+ * only one FunctionSignature.
+ *
+ * @return the signature of the function expression
+ */
+ def functionSignatures: Seq[FunctionSignature]
+
+ /**
+ * This function rearranges the list of expressions according to the
function signature
+ * It is recommended to use the provided version rearrange as it is
consistent with
+ * the SQL standard. If absolutely necessary the developer can choose to
override the default
+ * behavior for additional flexibility.
+ *
+ * @param functionSignature Function signature that denotes positional order
of arguments
+ * @param args The sequence of expressions from function invocation
+ * @param functionName The name of the function invoked for debugging
purposes
+ * @return positional order of arguments according to FunctionSignature
+ */
+ protected def rearrange(functionSignature: FunctionSignature,
+ args: Seq[Expression],
+ functionName: String): Seq[Expression] = {
+ SupportsNamedArguments.defaultRearrange(functionSignature, args,
functionName)
+ }
+}
+
+object SupportsNamedArguments {
+
+ /**
+ * Given a generic type, we check if the companion object of said type
exists.
+ * If that object extends the trait [[SupportsNamedArguments]], then we
rearrange
+ * the expressions in the order specified by the object.
+ *
+ * It is here we resubstitute [[Unevaluable]] [[NamedArgumentExpression]]s
with
+ * normal expressions. This method will produce an positional argument list
which
+ * is equivalent to the original argumnet list, except the expressions are
now
+ * fit for consumption by [[ResolveFunctions]]
+ *
+ * @param expressions The list of positional and named argument expressions
+ * @tparam T The actual expression class.
+ * @return positional argument list
+ */
+ final def getRearrangedExpressions[T <: Expression : ClassTag](
+ expressions: Seq[Expression], functionName: String): Seq[Expression] = {
+
+ if (!expressions.exists(_.isInstanceOf[NamedArgumentExpression])) {
+ return expressions
+ }
+
+ import scala.reflect.runtime.currentMirror
+
+ // This code heavily utilizes Scala reflection which is unfamiliar to most
developers.
+ // Here are the steps of this function:
+ // 1. Obtain the module symbol for the companion object of the function
expression.
+ // 2. Obtain the module class symbol that represents the companion object.
+ // 3. Check if the base classes of the module class symbol contains
SupportsNamedArguments.
+ // This checks if the companion object is an implementor of
SupportsNamedArguments.
+ // 4. Check if the module class symbol is a top level object. Reflection
is unable to
+ // obtain a companion object instance if it is member of some enclosing
class unless
+ // instance of said enclosing class is provided which we do not have.
+ // 5. Use reflection to obtain instance of companion object and perform
immediate cast to
+ // SupportsNamedArguments as it is already verified the cast is safe.
+ // 6. Obtain function signature and rearrange expression according to the
given signature.
+ val runtimeClass = scala.reflect.classTag[T].runtimeClass
+ val targetModuleSymbol = currentMirror.classSymbol(runtimeClass).companion
+ val parentClass =
scala.reflect.classTag[SupportsNamedArguments].runtimeClass
+ val parentSymbol = currentMirror.classSymbol(parentClass)
+
+ targetModuleSymbol match {
+ case scala.reflect.runtime.universe.NoSymbol =>
+ throw QueryCompilationErrors.namedArgumentsNotSupported(functionName)
+ case _ =>
+ val moduleClassSymbol = targetModuleSymbol.asModule.moduleClass.asClass
+ if (moduleClassSymbol.baseClasses.contains(parentSymbol)) {
+ if (currentMirror.runtimeClass(moduleClassSymbol).getEnclosingClass
!= null) {
+ throw
QueryCompilationErrors.cannotObtainCompanionObjectInstance(functionName)
+ }
+ val instance =
currentMirror.reflectModule(targetModuleSymbol.asModule)
+ .instance.asInstanceOf[SupportsNamedArguments]
Review Comment:
how do we know this `asInstanceOf` will succeed?
##########
common/utils/src/main/resources/error/error-classes.json:
##########
@@ -1781,6 +1787,11 @@
"Not allowed to implement multiple UDF interfaces, UDF class
<className>."
]
},
+ "NAMED_ARGUMENTS_NOT_SUPPORTED" : {
+ "message" : [
+ "Named arguments are not supported for function <functionName>; please
retry the query with positional arguments to the function call instead."
Review Comment:
@MaxGekk note that even if the SQL config is enabled, this feature will only
work for the subset of SQL functions that have explicitly opted into support
for named arguments (by defining the argument names). We probably want to take
this fact into account in this error message string as well.
##########
sql/core/src/test/resources/sql-tests/inputs/named-function-arguments.sql:
##########
@@ -1,5 +1,36 @@
+-- Test for named arguments for Mask
SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar =>
'o', digitChar => 'd');
SELECT mask(lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar =>
'd', str => 'AbCD123-@$#');
SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', digitChar =>
'd');
SELECT mask(lowerChar => 'q', upperChar => 'Q', digitChar => 'd', str =>
'AbCD123-@$#');
+
+-- Test for named arguments for CountMinSketchAgg
+create temporary view t2 as select * from values
+ ('val2a', 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04
01:01:00.000', date '2014-04-04'),
+ ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04
01:01:00.000', date '2014-05-04'),
+ ('val1b', 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04
01:01:00.000', date '2015-05-04'),
+ ('val1c', 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04
01:01:00.000', date '2016-05-04'),
+ ('val1b', null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04
01:01:00.000', null),
+ ('val2e', 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04
01:01:00.000', date '2014-06-04'),
+ ('val1f', 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04
01:01:00.000', date '2014-05-04'),
+ ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04
01:01:00.000', date '2014-06-04'),
+ ('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04
01:01:00.000', date '2014-07-04'),
+ ('val1c', 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04
01:01:00.000', date '2014-08-05'),
+ ('val1e', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04
01:01:00.000', date '2014-09-04'),
+ ('val1f', 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04
01:01:00.000', date '2014-10-04'),
+ ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04
01:01:00.000', null)
+ as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i);
+
+SELECT hex(count_min_sketch(t2d, seed => 1, epsilon => 0.5d, confidence =>
0.5d)) FROM t2;
+
+-- Unexpected positional argument
Review Comment:
also add a test with an invalid function call with a mix of positional and
named arguments, where one of the positional arguments corresponds to an
argument type declaration in the function signature with the same name as one
of the provided named arguments? If needed, add a new error class for this case?
##########
sql/core/src/test/resources/sql-tests/inputs/named-function-arguments.sql:
##########
@@ -1,5 +1,36 @@
+-- Test for named arguments for Mask
SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar =>
'o', digitChar => 'd');
SELECT mask(lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar =>
'd', str => 'AbCD123-@$#');
SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', digitChar =>
'd');
SELECT mask(lowerChar => 'q', upperChar => 'Q', digitChar => 'd', str =>
'AbCD123-@$#');
+
+-- Test for named arguments for CountMinSketchAgg
+create temporary view t2 as select * from values
+ ('val2a', 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04
01:01:00.000', date '2014-04-04'),
+ ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04
01:01:00.000', date '2014-05-04'),
+ ('val1b', 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04
01:01:00.000', date '2015-05-04'),
+ ('val1c', 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04
01:01:00.000', date '2016-05-04'),
+ ('val1b', null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04
01:01:00.000', null),
+ ('val2e', 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04
01:01:00.000', date '2014-06-04'),
+ ('val1f', 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04
01:01:00.000', date '2014-05-04'),
+ ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04
01:01:00.000', date '2014-06-04'),
+ ('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04
01:01:00.000', date '2014-07-04'),
+ ('val1c', 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04
01:01:00.000', date '2014-08-05'),
+ ('val1e', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04
01:01:00.000', date '2014-09-04'),
+ ('val1f', 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04
01:01:00.000', date '2014-10-04'),
+ ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04
01:01:00.000', null)
+ as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i);
+
+SELECT hex(count_min_sketch(t2d, seed => 1, epsilon => 0.5d, confidence =>
0.5d)) FROM t2;
+
+-- Unexpected positional argument
SELECT mask(lowerChar => 'q', 'AbCD123-@$#', upperChar => 'Q', otherChar =>
'o', digitChar => 'd');
+-- Duplicate parameter assignment
Review Comment:
@ueshin FYI
We also now support the TABLE keyword for arguments for tbale-valued
functions (https://github.com/apache/spark/pull/41750).
Can you add some tests with calls to `mask` with provided arguments of the
form `TABLE name` where the `name` may or may not match the expected argument
names, to show the behavior? (It may be necessary to sync your PR to pick up
this change.)
To reduce the scope of this PR, it may be prudent to just explicitly return
an error in the event of any of these type of arguments if the function accepts
named args, with a specific error class, until we have the time to look more
closely later.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]