cloud-fan commented on code in PR #36154:
URL: https://github.com/apache/spark/pull/36154#discussion_r849114431


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala:
##########
@@ -340,21 +322,65 @@ class ToNumberParser(numberFormat: String, errorOnFail: 
Boolean) extends Seriali
             false
         })
     }) {
-      "Thousands separators (,) must have digits in between them " +
+      return "Thousands separators (,) must have digits in between them " +
         s"in the number format: '$numberFormat'"
     }
-    // Thousands separators are not allowed after the decimal point, if any.
-    else if (digitGroupsAfterDecimalPoint.exists {
+    // Make sure that thousands separators does not appear after the decimal 
point, if any.
+    if (digitGroupsAfterDecimalPoint.exists {
       case DigitGroups(tokens, digits) =>
         tokens.length > digits.length
     }) {
-      "Thousands separators (,) may not appear after the decimal point " +
+      return "Thousands separators (,) may not appear after the decimal point 
" +
         s"in the number format: '$numberFormat'"
     }
-    // Validation of the format string finished successfully.
-    else {
-      ""
+    // Make sure that the format string does not contain any prohibited 
duplicate tokens.
+    val inputTokenCounts = formatTokens.groupBy(identity).mapValues(_.size)
+    Seq(DecimalPoint(),
+      OptionalPlusOrMinusSign(),
+      OptionalMinusSign(),
+      DollarSign(),
+      ClosingAngleBracket()).foreach {
+      token => if (inputTokenCounts.getOrElse(token, 0) > 1) {
+        return s"At most one ${token.toString} is allowed in the number 
format: '$numberFormat'"
+      }
+    }
+    // Enforce the ordering of tokens in the format string according to this 
specification:
+    // [ MI ] [ S ] [ L | $ ]
+    // [ 0 | 9 | G | , ] [...]
+    // [ . | D ]
+    // [ 0 | 9 ] [...]
+    // [ L | $ ] [ PR | MI | S ]
+    val allowedFormatTokens: Seq[Seq[InputToken]] = Seq(
+      Seq(OpeningAngleBracket()),
+      Seq(OptionalMinusSign()),
+      Seq(OptionalPlusOrMinusSign()),
+      Seq(DollarSign()),
+      Seq(DigitGroups(Seq(), Seq())),
+      Seq(DecimalPoint()),
+      Seq(DigitGroups(Seq(), Seq())),
+      Seq(DollarSign()),
+      Seq(OptionalMinusSign(), OptionalPlusOrMinusSign(), 
ClosingAngleBracket())
+    )
+    var formatTokenIndex = 0
+    for (allowed <- allowedFormatTokens) {
+      def tokensMatch(lhs: InputToken, rhs: InputToken): Boolean = {
+        lhs match {
+          case _: DigitGroups => rhs.isInstanceOf[DigitGroups]
+          case _ => lhs == rhs
+        }
+      }
+      if (formatTokenIndex < formatTokens.length &&
+        allowed.exists(tokensMatch(_, formatTokens(formatTokenIndex)))) {
+        formatTokenIndex += 1
+      }
     }
+    if (formatTokenIndex < formatTokens.length) {
+      return s"Unexpected ${formatTokens(formatTokenIndex).toString} found in 
the format string " +
+        s"'$numberFormat'; the structure of the format string must match: " +
+        "[MI] [S] [$] [0|9|G|,]* [.|D] [0|9]* [$] [PR|MI|S]"

Review Comment:
   Do we require the ordering of `MI`, `S` and `$`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to