cloud-fan commented on code in PR #36154:
URL: https://github.com/apache/spark/pull/36154#discussion_r849114431
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala:
##########
@@ -340,21 +322,65 @@ class ToNumberParser(numberFormat: String, errorOnFail:
Boolean) extends Seriali
false
})
}) {
- "Thousands separators (,) must have digits in between them " +
+ return "Thousands separators (,) must have digits in between them " +
s"in the number format: '$numberFormat'"
}
- // Thousands separators are not allowed after the decimal point, if any.
- else if (digitGroupsAfterDecimalPoint.exists {
+ // Make sure that thousands separators does not appear after the decimal
point, if any.
+ if (digitGroupsAfterDecimalPoint.exists {
case DigitGroups(tokens, digits) =>
tokens.length > digits.length
}) {
- "Thousands separators (,) may not appear after the decimal point " +
+ return "Thousands separators (,) may not appear after the decimal point
" +
s"in the number format: '$numberFormat'"
}
- // Validation of the format string finished successfully.
- else {
- ""
+ // Make sure that the format string does not contain any prohibited
duplicate tokens.
+ val inputTokenCounts = formatTokens.groupBy(identity).mapValues(_.size)
+ Seq(DecimalPoint(),
+ OptionalPlusOrMinusSign(),
+ OptionalMinusSign(),
+ DollarSign(),
+ ClosingAngleBracket()).foreach {
+ token => if (inputTokenCounts.getOrElse(token, 0) > 1) {
+ return s"At most one ${token.toString} is allowed in the number
format: '$numberFormat'"
+ }
+ }
+ // Enforce the ordering of tokens in the format string according to this
specification:
+ // [ MI ] [ S ] [ L | $ ]
+ // [ 0 | 9 | G | , ] [...]
+ // [ . | D ]
+ // [ 0 | 9 ] [...]
+ // [ L | $ ] [ PR | MI | S ]
+ val allowedFormatTokens: Seq[Seq[InputToken]] = Seq(
+ Seq(OpeningAngleBracket()),
+ Seq(OptionalMinusSign()),
+ Seq(OptionalPlusOrMinusSign()),
+ Seq(DollarSign()),
+ Seq(DigitGroups(Seq(), Seq())),
+ Seq(DecimalPoint()),
+ Seq(DigitGroups(Seq(), Seq())),
+ Seq(DollarSign()),
+ Seq(OptionalMinusSign(), OptionalPlusOrMinusSign(),
ClosingAngleBracket())
+ )
+ var formatTokenIndex = 0
+ for (allowed <- allowedFormatTokens) {
+ def tokensMatch(lhs: InputToken, rhs: InputToken): Boolean = {
+ lhs match {
+ case _: DigitGroups => rhs.isInstanceOf[DigitGroups]
+ case _ => lhs == rhs
+ }
+ }
+ if (formatTokenIndex < formatTokens.length &&
+ allowed.exists(tokensMatch(_, formatTokens(formatTokenIndex)))) {
+ formatTokenIndex += 1
+ }
}
+ if (formatTokenIndex < formatTokens.length) {
+ return s"Unexpected ${formatTokens(formatTokenIndex).toString} found in
the format string " +
+ s"'$numberFormat'; the structure of the format string must match: " +
+ "[MI] [S] [$] [0|9|G|,]* [.|D] [0|9]* [$] [PR|MI|S]"
Review Comment:
Do we require the ordering of `MI`, `S` and `$`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]