dtenedor commented on code in PR #41864:
URL: https://github.com/apache/spark/pull/41864#discussion_r1258767723
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala:
##########
@@ -1016,5 +1029,14 @@ object TableFunctionRegistry {
}
trait ExpressionBuilder {
+ def functionSignatures: Seq[FunctionSignature] = Nil
+
+ def rearrange(
Review Comment:
Can you please add comments for each of these methods describing what they
are for and who calls them?
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala:
##########
@@ -321,3 +322,37 @@ object Mask {
}
}
}
+
+object MaskExpressionBuilder extends ExpressionBuilder {
+ override def functionSignatures: Seq[FunctionSignature] = {
+ val strArg = NamedArgument("str", FixedArgumentType(StringType))
+ val upperCharArg = NamedArgument(
+ "upperChar",
+ FixedArgumentType(StringType),
+ Some(Literal(Mask.MASKED_UPPERCASE)))
+ val lowerCharArg = NamedArgument(
+ "lowerChar",
+ FixedArgumentType(StringType),
+ Some(Literal(Mask.MASKED_LOWERCASE)))
+ val digitCharArg = NamedArgument(
+ "digitChar",
+ FixedArgumentType(StringType),
+ Some(Literal(Mask.MASKED_DIGIT)))
+ val otherCharArg = NamedArgument(
+ "otherChar",
+ FixedArgumentType(StringType),
+ Some(Literal(Mask.MASKED_IGNORE)))
+ val functionSignature: FunctionSignature = FunctionSignature(Seq(
+ strArg, upperCharArg, lowerCharArg, digitCharArg, otherCharArg))
+ Seq(functionSignature)
+ }
+
+ override def build(funcName: String, expressions: Seq[Expression]):
Expression = {
+ if (expressions.length < 1 || expressions.length > 5) {
+ throw QueryCompilationErrors.wrongNumArgsError(
+ funcName, Seq(1, 2, 3, 4, 5), expressions.length)
+ }
+ new Mask(expressions(0), expressions(1), expressions(2), expressions(3),
expressions(4))
Review Comment:
here it seems like by this point, since we provided an override for
`functionSignatures`, that we have the guarantee that the `expressions` that
show up here are all 5 expressions from the function signature, even if one or
more of the provided arguments are missing (and we substitute the corresponding
default expressions instead). Can you comment this on the `build` method inside
the base `ExpressionBuilder` to make sure the semantics are clear?
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala:
##########
@@ -1016,5 +1029,14 @@ object TableFunctionRegistry {
}
trait ExpressionBuilder {
+ def functionSignatures: Seq[FunctionSignature] = Nil
Review Comment:
using null/Nil is generally not advisable because if the user forgets to
check, it leads to exceptions. Maybe make this return
`Option[Seq[FunctionSignature]]` instead with `None` as the default instead?
##########
sql/core/src/test/resources/sql-tests/inputs/named-function-arguments.sql:
##########
@@ -1,5 +1,36 @@
+-- Test for named arguments for Mask
SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar =>
'o', digitChar => 'd');
SELECT mask(lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar =>
'd', str => 'AbCD123-@$#');
SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', digitChar =>
'd');
SELECT mask(lowerChar => 'q', upperChar => 'Q', digitChar => 'd', str =>
'AbCD123-@$#');
+
+-- Test for named arguments for CountMinSketchAgg
+create temporary view t2 as select * from values
+ ('val2a', 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04
01:01:00.000', date '2014-04-04'),
+ ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04
01:01:00.000', date '2014-05-04'),
+ ('val1b', 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04
01:01:00.000', date '2015-05-04'),
+ ('val1c', 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04
01:01:00.000', date '2016-05-04'),
+ ('val1b', null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04
01:01:00.000', null),
+ ('val2e', 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04
01:01:00.000', date '2014-06-04'),
+ ('val1f', 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04
01:01:00.000', date '2014-05-04'),
+ ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04
01:01:00.000', date '2014-06-04'),
+ ('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04
01:01:00.000', date '2014-07-04'),
+ ('val1c', 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04
01:01:00.000', date '2014-08-05'),
+ ('val1e', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04
01:01:00.000', date '2014-09-04'),
+ ('val1f', 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04
01:01:00.000', date '2014-10-04'),
+ ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04
01:01:00.000', null)
+ as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i);
+
+SELECT hex(count_min_sketch(t2d, seed => 1, epsilon => 0.5d, confidence =>
0.5d)) FROM t2;
+
+-- Unexpected positional argument
SELECT mask(lowerChar => 'q', 'AbCD123-@$#', upperChar => 'Q', otherChar =>
'o', digitChar => 'd');
+-- Duplicate parameter assignment
Review Comment:
Where is the testing for this?
##########
sql/core/src/test/resources/sql-tests/inputs/named-function-arguments.sql:
##########
@@ -1,5 +1,36 @@
+-- Test for named arguments for Mask
SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar =>
'o', digitChar => 'd');
SELECT mask(lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar =>
'd', str => 'AbCD123-@$#');
SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', digitChar =>
'd');
SELECT mask(lowerChar => 'q', upperChar => 'Q', digitChar => 'd', str =>
'AbCD123-@$#');
+
+-- Test for named arguments for CountMinSketchAgg
+create temporary view t2 as select * from values
+ ('val2a', 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04
01:01:00.000', date '2014-04-04'),
+ ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04
01:01:00.000', date '2014-05-04'),
+ ('val1b', 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04
01:01:00.000', date '2015-05-04'),
+ ('val1c', 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04
01:01:00.000', date '2016-05-04'),
+ ('val1b', null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04
01:01:00.000', null),
+ ('val2e', 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04
01:01:00.000', date '2014-06-04'),
+ ('val1f', 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04
01:01:00.000', date '2014-05-04'),
+ ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04
01:01:00.000', date '2014-06-04'),
+ ('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04
01:01:00.000', date '2014-07-04'),
+ ('val1c', 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04
01:01:00.000', date '2014-08-05'),
+ ('val1e', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04
01:01:00.000', date '2014-09-04'),
+ ('val1f', 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04
01:01:00.000', date '2014-10-04'),
+ ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04
01:01:00.000', null)
+ as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i);
+
+SELECT hex(count_min_sketch(t2d, seed => 1, epsilon => 0.5d, confidence =>
0.5d)) FROM t2;
+
+-- Unexpected positional argument
Review Comment:
Ping on this comment :)
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SupportsNamedArguments.scala:
##########
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.expressions.{Expression,
NamedArgumentExpression}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.AbstractDataType
+
+/**
+ * The class which companion objects of function expression may implement to
+ * support named arguments for that function expression. Please note that
variadic final
+ * arguments are NOT supported for named arguments. Do not use for functions
that
+ * has variadic final arguments!
Review Comment:
```suggestion
* have variadic final arguments!
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]