dtenedor commented on code in PR #41864:
URL: https://github.com/apache/spark/pull/41864#discussion_r1261740266
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala:
##########
@@ -887,6 +887,26 @@ object FunctionRegistry {
(name, (expressionInfo, newBuilder))
}
+ private[FunctionRegistry$] def rearrangeExpressions[T <: Builder[_]](
Review Comment:
please add a comment for this method?
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SupportsNamedArguments.scala:
##########
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.expressions.{Expression,
NamedArgumentExpression}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.AbstractDataType
+
+object SupportsNamedArguments {
+ final def defaultRearrange(functionSignature: FunctionSignature,
Review Comment:
please add a descriptive comment for this method? Also, move
`functionSignature: FunctionSignature` to the next line with an indent of +4
spaces per style recommendations?
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SupportsNamedArguments.scala:
##########
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.expressions.{Expression,
NamedArgumentExpression}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.AbstractDataType
+
+object SupportsNamedArguments {
+ final def defaultRearrange(functionSignature: FunctionSignature,
+ args: Seq[Expression],
+ functionName: String): Seq[Expression] = {
+ val parameters: Seq[NamedArgument] = functionSignature.parameters
+ val firstNamedArgIdx: Int =
args.indexWhere(_.isInstanceOf[NamedArgumentExpression])
+ val (positionalArgs, namedArgs) =
+ if (firstNamedArgIdx == -1) {
+ (args, Nil)
Review Comment:
It's safer to use `None` and `Option` types instead of `null` or `Nil` in
Scala, since the API prompts invokers to check for the object's presence before
extracting.
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala:
##########
@@ -344,4 +346,13 @@ object Mask extends SupportsNamedArguments {
strArg, upperCharArg, lowerCharArg, digitCharArg, otherCharArg))
Seq(functionSignature)
}
+
+ override def build(funcName: String, expressions: Seq[Expression]):
Expression = {
+ if (expressions.length < 1 || expressions.length > 5) {
+ throw QueryCompilationErrors.wrongNumArgsError(
+ funcName, Seq(1, 2, 3, 4, 5), expressions.length)
+ }
+ new Mask(expressions(0), expressions(1), expressions(2), expressions(3),
expressions(4))
Review Comment:
Yes, +1 on this approach as well.
Eventually, we can considering putting more required properties in the
function signatures, such that most function expression classes do not have to
implement any matching logic between provided vs. required arguments at all,
and it will be handled generically by the analyzer instead. For now, keeping
just the argument names and optional default values in the function signature
lets us accomplish the goal of supporting named arguments without breaking any
existing use cases.
##########
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/NamedArgumentFunctionSuite.scala:
##########
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.analysis
+
+// import org.apache.spark.SparkThrowable
Review Comment:
please uncomment or delete commented-out code
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SupportsNamedArguments.scala:
##########
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.expressions.{Expression,
NamedArgumentExpression}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.AbstractDataType
+
+object SupportsNamedArguments {
+ final def defaultRearrange(functionSignature: FunctionSignature,
+ args: Seq[Expression],
+ functionName: String): Seq[Expression] = {
+ val parameters: Seq[NamedArgument] = functionSignature.parameters
+ val firstNamedArgIdx: Int =
args.indexWhere(_.isInstanceOf[NamedArgumentExpression])
+ val (positionalArgs, namedArgs) =
+ if (firstNamedArgIdx == -1) {
+ (args, Nil)
+ } else {
+ args.splitAt(firstNamedArgIdx)
+ }
+ val namedParameters: Seq[NamedArgument] =
parameters.drop(positionalArgs.size)
+
+ // Performing some checking to ensure valid argument list
Review Comment:
nit: please make each comment a complete sentence and end it with a period
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SupportsNamedArguments.scala:
##########
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.expressions.{Expression,
NamedArgumentExpression}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.AbstractDataType
+
+object SupportsNamedArguments {
+ final def defaultRearrange(functionSignature: FunctionSignature,
+ args: Seq[Expression],
+ functionName: String): Seq[Expression] = {
+ val parameters: Seq[NamedArgument] = functionSignature.parameters
+ val firstNamedArgIdx: Int =
args.indexWhere(_.isInstanceOf[NamedArgumentExpression])
+ val (positionalArgs, namedArgs) =
+ if (firstNamedArgIdx == -1) {
+ (args, Nil)
+ } else {
+ args.splitAt(firstNamedArgIdx)
+ }
+ val namedParameters: Seq[NamedArgument] =
parameters.drop(positionalArgs.size)
+
+ // Performing some checking to ensure valid argument list
+ val allParameterNames: Seq[String] = parameters.map(_.name)
+ val parameterNamesSet: Set[String] = allParameterNames.toSet
+ val positionalParametersSet =
allParameterNames.take(positionalArgs.size).toSet
+ val namedParametersSet = collection.mutable.Set[String]()
+
+ for (arg <- namedArgs) {
+ arg match {
+ case namedArg: NamedArgumentExpression =>
+ val parameterName = namedArg.key
+ if (!parameterNamesSet.contains(parameterName)) {
+ throw
QueryCompilationErrors.unrecognizedParameterName(functionName, namedArg.key,
+ parameterNamesSet.toSeq)
+ }
+ if (positionalParametersSet.contains(parameterName)) {
+ throw
QueryCompilationErrors.positionalAndNamedArgumentDoubleReference(
+ functionName, namedArg.key)
+ }
+ if (namedParametersSet.contains(parameterName)) {
+ throw QueryCompilationErrors.doubleNamedArgumentReference(
+ functionName, namedArg.key)
+ }
+ namedParametersSet.add(namedArg.key)
+ case _ =>
+ throw
QueryCompilationErrors.unexpectedPositionalArgument(functionName)
+ }
+ }
+
+ // Construct a map from argument name to value for argument rearrangement
+ val namedArgMap = namedArgs.map { arg =>
+ val namedArg = arg.asInstanceOf[NamedArgumentExpression]
+ namedArg.key -> namedArg.value
+ }.toMap
+
+ // Rearrange named arguments to match their positional order
+ val rearrangedNamedArgs: Seq[Expression] = namedParameters.map { param =>
+ namedArgMap.getOrElse(
+ param.name,
+ if (param.default.isEmpty) {
+ throw QueryCompilationErrors.requiredParameterNotFound(functionName,
param.name)
+ } else {
+ param.default.get
+ }
+ )
+ }
+ positionalArgs ++ rearrangedNamedArgs
+ }
+}
+
+/**
+ * Identifies which forms of provided argument values are expected for each
call
+ * to the associated SQL function
+ */
+trait NamedArgumentType
+
+/**
+ * Represents a named argument that expects a scalar value of one specific
DataType
+ *
+ * @param dataType The data type of some argument
+ */
+case class FixedArgumentType(dataType: AbstractDataType) extends
NamedArgumentType
Review Comment:
this class is no longer referenced in this PR, since we removed the data
types from the function signatures. Maybe just delete it for now, until we
decide to add them back in a later PR?
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala:
##########
@@ -1015,6 +1050,62 @@ object TableFunctionRegistry {
val functionSet: Set[FunctionIdentifier] = builtin.listFunction().toSet
}
-trait ExpressionBuilder {
- def build(funcName: String, expressions: Seq[Expression]): Expression
+trait Builder[T] {
+ /**
+ * A method that returns the signatures of overloads that is associated with
this function
Review Comment:
```suggestion
* A method that returns the signatures of overloads that are associated
with this function.
* Each function signature includes a list of parameters to which the
analyzer can
* compare a function call with provided arguments to determine if that
function
* call is a match for the function signature.
```
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SupportsNamedArguments.scala:
##########
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.expressions.{Expression,
NamedArgumentExpression}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.AbstractDataType
+
+object SupportsNamedArguments {
+ final def defaultRearrange(functionSignature: FunctionSignature,
+ args: Seq[Expression],
+ functionName: String): Seq[Expression] = {
+ val parameters: Seq[NamedArgument] = functionSignature.parameters
+ val firstNamedArgIdx: Int =
args.indexWhere(_.isInstanceOf[NamedArgumentExpression])
+ val (positionalArgs, namedArgs) =
Review Comment:
```suggestion
val (positionalArgs: Seq[Expression], namedArgs:
Option[Seq[Expression]]) =
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]