dtenedor commented on code in PR #41864:
URL: https://github.com/apache/spark/pull/41864#discussion_r1258767723


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala:
##########
@@ -1016,5 +1029,14 @@ object TableFunctionRegistry {
 }
 
 trait ExpressionBuilder {
+  def functionSignatures: Seq[FunctionSignature] = Nil
+
+  def rearrange(

Review Comment:
   Can you please add comments for each of these methods describing what they 
are for and who calls them?



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala:
##########
@@ -321,3 +322,37 @@ object Mask {
     }
   }
 }
+
+object MaskExpressionBuilder extends ExpressionBuilder {
+  override def functionSignatures: Seq[FunctionSignature] = {
+    val strArg = NamedArgument("str", FixedArgumentType(StringType))
+    val upperCharArg = NamedArgument(
+      "upperChar",
+      FixedArgumentType(StringType),
+      Some(Literal(Mask.MASKED_UPPERCASE)))
+    val lowerCharArg = NamedArgument(
+      "lowerChar",
+      FixedArgumentType(StringType),
+      Some(Literal(Mask.MASKED_LOWERCASE)))
+    val digitCharArg = NamedArgument(
+      "digitChar",
+      FixedArgumentType(StringType),
+      Some(Literal(Mask.MASKED_DIGIT)))
+    val otherCharArg = NamedArgument(
+      "otherChar",
+      FixedArgumentType(StringType),
+      Some(Literal(Mask.MASKED_IGNORE)))
+    val functionSignature: FunctionSignature = FunctionSignature(Seq(
+      strArg, upperCharArg, lowerCharArg, digitCharArg, otherCharArg))
+    Seq(functionSignature)
+  }
+
+  override def build(funcName: String, expressions: Seq[Expression]): 
Expression = {
+    if (expressions.length < 1 || expressions.length > 5) {
+      throw QueryCompilationErrors.wrongNumArgsError(
+        funcName, Seq(1, 2, 3, 4, 5), expressions.length)
+    }
+    new Mask(expressions(0), expressions(1), expressions(2), expressions(3), 
expressions(4))

Review Comment:
   here it seems like by this point, since we provided an override for 
`functionSignatures`, that we have the guarantee that the `expressions` that 
show up here are all 5 expressions from the function signature, even if one or 
more of the provided arguments are missing (and we substitute the corresponding 
default expressions instead). Can you comment this on the `build` method inside 
the base `ExpressionBuilder` to make sure the semantics are clear?



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala:
##########
@@ -1016,5 +1029,14 @@ object TableFunctionRegistry {
 }
 
 trait ExpressionBuilder {
+  def functionSignatures: Seq[FunctionSignature] = Nil

Review Comment:
   using null/Nil is generally not advisable because if the user forgets to 
check, it leads to exceptions. Maybe make this return 
`Option[Seq[FunctionSignature]]` instead with `None` as the default instead?



##########
sql/core/src/test/resources/sql-tests/inputs/named-function-arguments.sql:
##########
@@ -1,5 +1,36 @@
+-- Test for named arguments for Mask
 SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar => 
'o', digitChar => 'd');
 SELECT mask(lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 
'd', str => 'AbCD123-@$#');
 SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', digitChar => 
'd');
 SELECT mask(lowerChar => 'q', upperChar => 'Q', digitChar => 'd', str => 
'AbCD123-@$#');
+
+-- Test for named arguments for CountMinSketchAgg
+create temporary view t2 as select * from values
+  ('val2a', 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 
01:01:00.000', date '2014-04-04'),
+  ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04'),
+  ('val1b', 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 
01:01:00.000', date '2015-05-04'),
+  ('val1c', 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 
01:01:00.000', date '2016-05-04'),
+  ('val1b', null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 
01:01:00.000', null),
+  ('val2e', 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 
01:01:00.000', date '2014-06-04'),
+  ('val1f', 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04'),
+  ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 
01:01:00.000', date '2014-06-04'),
+  ('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 
01:01:00.000', date '2014-07-04'),
+  ('val1c', 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 
01:01:00.000', date '2014-08-05'),
+  ('val1e', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 
01:01:00.000', date '2014-09-04'),
+  ('val1f', 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 
01:01:00.000', date '2014-10-04'),
+  ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', null)
+  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i);
+
+SELECT hex(count_min_sketch(t2d, seed => 1, epsilon => 0.5d, confidence => 
0.5d)) FROM t2;
+
+-- Unexpected positional argument
 SELECT mask(lowerChar => 'q', 'AbCD123-@$#', upperChar => 'Q', otherChar => 
'o', digitChar => 'd');
+-- Duplicate parameter assignment

Review Comment:
   Where is the testing for this?



##########
sql/core/src/test/resources/sql-tests/inputs/named-function-arguments.sql:
##########
@@ -1,5 +1,36 @@
+-- Test for named arguments for Mask
 SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar => 
'o', digitChar => 'd');
 SELECT mask(lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 
'd', str => 'AbCD123-@$#');
 SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', digitChar => 
'd');
 SELECT mask(lowerChar => 'q', upperChar => 'Q', digitChar => 'd', str => 
'AbCD123-@$#');
+
+-- Test for named arguments for CountMinSketchAgg
+create temporary view t2 as select * from values
+  ('val2a', 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 
01:01:00.000', date '2014-04-04'),
+  ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04'),
+  ('val1b', 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 
01:01:00.000', date '2015-05-04'),
+  ('val1c', 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 
01:01:00.000', date '2016-05-04'),
+  ('val1b', null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 
01:01:00.000', null),
+  ('val2e', 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 
01:01:00.000', date '2014-06-04'),
+  ('val1f', 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04'),
+  ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 
01:01:00.000', date '2014-06-04'),
+  ('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 
01:01:00.000', date '2014-07-04'),
+  ('val1c', 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 
01:01:00.000', date '2014-08-05'),
+  ('val1e', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 
01:01:00.000', date '2014-09-04'),
+  ('val1f', 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 
01:01:00.000', date '2014-10-04'),
+  ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', null)
+  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i);
+
+SELECT hex(count_min_sketch(t2d, seed => 1, epsilon => 0.5d, confidence => 
0.5d)) FROM t2;
+
+-- Unexpected positional argument

Review Comment:
   Ping on this comment :)



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SupportsNamedArguments.scala:
##########
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.expressions.{Expression, 
NamedArgumentExpression}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.AbstractDataType
+
+/**
+ * The class which companion objects of function expression may implement to
+ * support named arguments for that function expression. Please note that 
variadic final
+ * arguments are NOT supported for named arguments. Do not use for functions 
that
+ * has variadic final arguments!

Review Comment:
   ```suggestion
    * have variadic final arguments!
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to