dtenedor commented on code in PR #39747:
URL: https://github.com/apache/spark/pull/39747#discussion_r1087116548


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala:
##########
@@ -3039,3 +3039,113 @@ case class SplitPart (
       partNum = newChildren.apply(2))
   }
 }
+
+/**
+ * Function to check if a given number string is a valid Luhn number.
+ * Returns true, if the number string is a valid Luhn number, false otherwise
+ */
+@ExpressionDescription(
+  usage = """
+    _FUNC_(str ) - Checks that a string of digits is valid according to the 
Luhn algorithm

Review Comment:
   ```suggestion
       _FUNC_(str ) - Checks that a string of digits is valid according to the 
Luhn algorithm.
   ```



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala:
##########
@@ -3039,3 +3039,113 @@ case class SplitPart (
       partNum = newChildren.apply(2))
   }
 }
+
+/**
+ * Function to check if a given number string is a valid Luhn number.
+ * Returns true, if the number string is a valid Luhn number, false otherwise
+ */
+@ExpressionDescription(
+  usage = """
+    _FUNC_(str ) - Checks that a string of digits is valid according to the 
Luhn algorithm
+    This checksum function is widely applied on credit card numbers and 
government identification
+    numbers to distinguish valid numbers from mistyped, incorrect numbers.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_('8112189876');
+       true
+      > SELECT _FUNC_('79927398713');
+       true
+      > SELECT _FUNC_('79927398714');
+       false
+  """,
+  since = "3.4.0",
+  group = "string_funcs")
+case class Luhncheck(child: Expression) extends UnaryExpression with 
ExpectsInputTypes {
+
+  override def nullable: Boolean = false
+
+  override protected def withNewChildInternal(newChild: Expression): Luhncheck 
=
+    copy(child = newChild)
+
+  /**
+   * Expected input types from child expressions. The i-th position in the 
returned seq indicates
+   * the type requirement for the i-th child.
+   *
+   * The possible values at each position are:
+   *   1. a specific data type, for example, LongType, StringType.
+   *   2. a non-leaf abstract data type, for example, NumericType, 
IntegralType, FractionalType.
+   */
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringType)
+
+  /**
+   * Returns the [[DataType]] of the result of evaluating this expression. It 
is invalid to query
+   * the dataType of an unresolved expression (i.e., when `resolved` == false).
+   */
+  override def dataType: DataType = BooleanType
+
+  /**
+   * Default behavior of evaluation according to the default nullability of 
UnaryExpression. If
+   * subclass of UnaryExpression override nullable, probably should also 
override this.
+   */
+  override def eval(input: InternalRow): Any = 
Luhncheck.isLuhnNumber(child.eval(input))
+
+  /**
+   * Returns Java source code that can be compiled to evaluate this 
expression. The default
+   * behavior is to call the eval method of the expression. Concrete 
expression implementations
+   * should override this to do actual code generation.
+   *
+   * @param ctx
+   *   a [[CodegenContext]]
+   * @param ev
+   *   an [[ExprCode]] with unique terms.
+   * @return
+   *   an [[ExprCode]] containing the Java source code to generate the given 
expression
+   */
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): 
ExprCode = {
+    defineCodeGen(
+      ctx,
+      ev,
+      c => {
+        
s"""org.apache.spark.sql.catalyst.expressions.Luhncheck.isLuhnNumber($c)"""
+      })
+  }
+}
+
+object Luhncheck {
+
+  /**
+   * Function to check if a given number string is a valid Luhn number
+   *
+   * @param numberString
+   *   the number string to check
+   * @return
+   *   true if the number string is a valid Luhn number, false otherwise
+   */
+  def isLuhnNumber(numberString: Any): Boolean = {
+    if (numberString != null) {
+      val digits = numberString.asInstanceOf[UTF8String].toString
+      // check if all characters in the input string are digits
+      if (!digits.forall(_.isDigit)) {
+        return false // if not, return false
+      }
+      // reverse the string so that we can use a foldLeft function

Review Comment:
   please update the comment to start with a capital letter and end with 
punctuation (here and everywhere else in the PR)



##########
sql/core/src/test/resources/sql-tests/inputs/string-functions.sql:
##########
@@ -231,3 +231,29 @@ CREATE TEMPORARY VIEW fmtTable(fmtField) AS SELECT * FROM 
VALUES ('invalidFormat
 SELECT to_binary('abc', fmtField) FROM fmtTable;
 -- Clean up
 DROP VIEW IF EXISTS fmtTable;
+-- luhn_check
+select luhn_check('4111111111111111');
+select luhn_check('5500000000000004');
+select luhn_check('340000000000009');
+select luhn_check('6011000000000004');
+select luhn_check('378282246310005');
+select luhn_check('6011000990139424');
+select luhn_check('1234567890');
+select luhn_check('4111111111111');
+select luhn_check('4111111111111112');
+select luhn_check('371449635398431');
+select luhn_check('371449635398432');
+select luhn_check('30569309025904');
+select luhn_check('30569309025905');
+select luhn_check('6011111111111117');
+select luhn_check('6011111111111118');
+select luhn_check('3530111333300000');
+select luhn_check('3530111333300001');
+select luhn_check('5105105105105100');
+select luhn_check('5105105105105106');
+select luhn_check('510B105105105106');
+select luhn_check('E105105105105106');

Review Comment:
   also add some negative test cases where the input is the wrong type? and 
some positive tests where the input is an attribute reference instead of a 
constant string?



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala:
##########
@@ -3039,3 +3039,113 @@ case class SplitPart (
       partNum = newChildren.apply(2))
   }
 }
+
+/**
+ * Function to check if a given number string is a valid Luhn number.
+ * Returns true, if the number string is a valid Luhn number, false otherwise
+ */
+@ExpressionDescription(
+  usage = """
+    _FUNC_(str ) - Checks that a string of digits is valid according to the 
Luhn algorithm
+    This checksum function is widely applied on credit card numbers and 
government identification
+    numbers to distinguish valid numbers from mistyped, incorrect numbers.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_('8112189876');
+       true
+      > SELECT _FUNC_('79927398713');
+       true
+      > SELECT _FUNC_('79927398714');
+       false
+  """,
+  since = "3.4.0",
+  group = "string_funcs")
+case class Luhncheck(child: Expression) extends UnaryExpression with 
ExpectsInputTypes {
+
+  override def nullable: Boolean = false
+
+  override protected def withNewChildInternal(newChild: Expression): Luhncheck 
=
+    copy(child = newChild)
+
+  /**
+   * Expected input types from child expressions. The i-th position in the 
returned seq indicates
+   * the type requirement for the i-th child.
+   *
+   * The possible values at each position are:
+   *   1. a specific data type, for example, LongType, StringType.
+   *   2. a non-leaf abstract data type, for example, NumericType, 
IntegralType, FractionalType.
+   */
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringType)
+
+  /**
+   * Returns the [[DataType]] of the result of evaluating this expression. It 
is invalid to query
+   * the dataType of an unresolved expression (i.e., when `resolved` == false).
+   */
+  override def dataType: DataType = BooleanType
+
+  /**
+   * Default behavior of evaluation according to the default nullability of 
UnaryExpression. If
+   * subclass of UnaryExpression override nullable, probably should also 
override this.
+   */
+  override def eval(input: InternalRow): Any = 
Luhncheck.isLuhnNumber(child.eval(input))
+
+  /**
+   * Returns Java source code that can be compiled to evaluate this 
expression. The default
+   * behavior is to call the eval method of the expression. Concrete 
expression implementations
+   * should override this to do actual code generation.
+   *
+   * @param ctx
+   *   a [[CodegenContext]]
+   * @param ev
+   *   an [[ExprCode]] with unique terms.
+   * @return
+   *   an [[ExprCode]] containing the Java source code to generate the given 
expression
+   */
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): 
ExprCode = {
+    defineCodeGen(
+      ctx,
+      ev,
+      c => {
+        
s"""org.apache.spark.sql.catalyst.expressions.Luhncheck.isLuhnNumber($c)"""
+      })
+  }
+}
+
+object Luhncheck {
+
+  /**
+   * Function to check if a given number string is a valid Luhn number
+   *
+   * @param numberString
+   *   the number string to check
+   * @return
+   *   true if the number string is a valid Luhn number, false otherwise
+   */
+  def isLuhnNumber(numberString: Any): Boolean = {
+    if (numberString != null) {
+      val digits = numberString.asInstanceOf[UTF8String].toString
+      // check if all characters in the input string are digits
+      if (!digits.forall(_.isDigit)) {
+        return false // if not, return false

Review Comment:
   `return` is generally unsafe in Scala/Spark, please use an `if` block instead



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to