cloud-fan commented on code in PR #36365:
URL: https://github.com/apache/spark/pull/36365#discussion_r868996671
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala:
##########
@@ -599,4 +614,254 @@ class ToNumberParser(numberFormat: String, errorOnFail:
Boolean) extends Seriali
Decimal(javaDecimal, precision, scale)
}
}
+
+ /**
+ * Converts a decimal value to a string based on the given number format.
+ *
+ * Iterates through the [[formatTokens]] obtained from processing the format
string, while also
+ * inspecting the input decimal value.
+ *
+ * @param input the decimal value that needs to be converted
+ * @return the result String value obtained from string formatting
+ */
+ def format(input: Decimal): UTF8String = {
+ val result = new StringBuilder()
+ // These are string representations of the input Decimal value.
+ val (inputBeforeDecimalPoint: String,
+ inputAfterDecimalPoint: String) =
+ formatSplitInputBeforeAndAfterDecimalPoint(input)
+ // These are indexes into the characters of the input string before and
after the decimal point.
+ formattingBeforeDecimalPointIndex = 0
+ formattingAfterDecimalPointIndex = 0
+ var reachedDecimalPoint = false
+
+ // Iterate through the tokens representing the provided format string, in
order.
+ for (formatToken: InputToken <- formatTokens) {
+ formatToken match {
+ case groups: DigitGroups =>
+ formatDigitGroups(
+ groups, inputBeforeDecimalPoint, inputAfterDecimalPoint,
reachedDecimalPoint, result)
+ case DecimalPoint() =>
+ // If the last character so far is a space, change it to a zero.
This means the input
+ // decimal does not have an integer part.
+ if (result.nonEmpty && result.last == SPACE) {
+ result(result.length - 1) = ZERO_DIGIT
+ }
+ result.append(POINT_SIGN)
+ reachedDecimalPoint = true
+ case DollarSign() =>
+ result.append(DOLLAR_SIGN)
+ case _: OptionalPlusOrMinusSign =>
+ stripTrailingLoneDecimalPoint(result)
+ if (input < Decimal.ZERO) {
+ addCharacterCheckingTrailingSpaces(result, MINUS_SIGN)
+ } else {
+ addCharacterCheckingTrailingSpaces(result, PLUS_SIGN)
+ }
+ case _: OptionalMinusSign =>
+ if (input < Decimal.ZERO) {
+ stripTrailingLoneDecimalPoint(result)
+ addCharacterCheckingTrailingSpaces(result, MINUS_SIGN)
+ // Add a second space to account for the "MI" sequence comprising
two characters in the
+ // format string.
+ result.append(SPACE)
+ } else {
+ result.append(SPACE)
+ result.append(SPACE)
+ }
+ case OpeningAngleBracket() =>
+ if (input < Decimal.ZERO) {
+ result.append(ANGLE_BRACKET_OPEN)
+ }
+ case ClosingAngleBracket() =>
+ stripTrailingLoneDecimalPoint(result)
+ if (input < Decimal.ZERO) {
+ addCharacterCheckingTrailingSpaces(result, ANGLE_BRACKET_CLOSE)
+ } else {
+ result.append(SPACE)
+ result.append(SPACE)
+ }
+ }
+ }
+
+ if (formattingBeforeDecimalPointIndex < inputBeforeDecimalPoint.length ||
+ formattingAfterDecimalPointIndex < inputAfterDecimalPoint.length) {
+ // Remaining digits before or after the decimal point exist in the
decimal value but not in
+ // the format string.
+ formatMatchFailure(input, numberFormat)
+ } else {
+ stripTrailingLoneDecimalPoint(result)
+ if (result.isEmpty || result.toString == "+" || result.toString == "-") {
+ result.clear()
+ result.append('0')
+ }
+ UTF8String.fromString(result.toString())
+ }
+ }
+
+ /**
+ * Splits the provided Decimal value's string representation by the decimal
point, if any.
+ * @param input the Decimal value to consume
+ * @return two strings representing the contents before and after the
decimal point (if any)
+ */
+ private def formatSplitInputBeforeAndAfterDecimalPoint(input: Decimal):
(String, String) = {
+ // Convert the input Decimal value to a string (without exponent notation).
+ val inputString = input.toJavaBigDecimal.toPlainString
+ // Split the digits before and after the decimal point.
+ val tokens: Array[String] = inputString.split(POINT_SIGN)
+ var beforeDecimalPoint: String = tokens(0)
+ var afterDecimalPoint: String = if (tokens.length > 1) tokens(1) else ""
+ // Strip any leading minus sign to consider the digits only.
+ // Strip leading and trailing zeros to match cases when the format string
begins with a decimal
+ // point.
+ beforeDecimalPoint = beforeDecimalPoint.dropWhile(c => c == MINUS_SIGN ||
c == ZERO_DIGIT)
+ afterDecimalPoint = afterDecimalPoint.reverse.dropWhile(_ ==
ZERO_DIGIT).reverse
+
+ // If the format string specifies more digits than the
'beforeDecimalPoint', prepend leading
+ // spaces to make them the same length. Likewise, if the format string
specifies more digits
+ // than the 'afterDecimalPoint', append trailing spaces to make them the
same length. This step
+ // simplifies logic consuming the format tokens later.
+ var reachedDecimalPoint = false
+ var numFormatDigitsBeforeDecimalPoint = 0
+ var numFormatDigitsAfterDecimalPoint = 0
+ formatTokens.foreach {
+ case digitGroups: DigitGroups =>
+ digitGroups.digits.foreach { digits =>
+ val numDigits = digits match {
+ case ExactlyAsManyDigits(num) => num
+ case AtMostAsManyDigits(num) => num
+ }
+ for (_ <- 0 until numDigits) {
+ if (!reachedDecimalPoint) {
+ numFormatDigitsBeforeDecimalPoint += 1
+ } else {
+ numFormatDigitsAfterDecimalPoint += 1
+ }
+ }
+ }
+ case _: DecimalPoint =>
+ reachedDecimalPoint = true
+ case _ =>
+ }
+ // If there were more digits in the provided input string (before or after
the decimal point)
+ // than specified in the format string, this is an overflow.
+ if (numFormatDigitsBeforeDecimalPoint < beforeDecimalPoint.length ||
+ numFormatDigitsAfterDecimalPoint < afterDecimalPoint.length) {
+ beforeDecimalPoint = "#" * numFormatDigitsBeforeDecimalPoint
+ afterDecimalPoint = "#" * numFormatDigitsAfterDecimalPoint
+ }
+ val leadingSpaces = " " * (numFormatDigitsBeforeDecimalPoint -
beforeDecimalPoint.length)
+ val trailingSpaces = " " * (numFormatDigitsAfterDecimalPoint -
afterDecimalPoint.length)
+ (leadingSpaces + beforeDecimalPoint, afterDecimalPoint + trailingSpaces)
+ }
+
+ /**
+ * Performs format processing on the digits in [[groups]], updating
[[result]].
+ *
+ * @param groups the token representing a group of digits from the format
string
+ * @param inputBeforeDecimalPoint string representation of the input decimal
value before the
+ * decimal point
+ * @param inputAfterDecimalPoint string representation of the input decimal
value after the
+ * decimal point
+ * @param reachedDecimalPoint true if we have reached the decimal point so
far during processing
+ * @param result the result of formatting is built here as a string during
iteration
+ */
+ private def formatDigitGroups(
+ groups: DigitGroups,
+ inputBeforeDecimalPoint: String,
+ inputAfterDecimalPoint: String,
+ reachedDecimalPoint: Boolean,
+ result: StringBuilder): Unit = {
+ // Iterate through the tokens in the DigitGroups. Reverse the order of the
tokens so we
+ // consume them in the left-to-right order that they originally appeared
in the format
+ // string.
+ for (digitGroupToken <- groups.tokens.reverse) {
+ digitGroupToken match {
+ case digits: Digits if !reachedDecimalPoint =>
+ val numDigits = digits match {
+ case ExactlyAsManyDigits(num) => num
+ case AtMostAsManyDigits(num) => num
+ }
+ for (_ <- 0 until numDigits) {
+ inputBeforeDecimalPoint(formattingBeforeDecimalPointIndex) match {
+ case SPACE if digits.isInstanceOf[ExactlyAsManyDigits] =>
+ // The format string started with a zero and had more digits
than the provided
+ // input string, so we prepend a zero to the result.
+ result.append(ZERO_DIGIT)
+ case SPACE =>
+ addSpaceCheckingTrailingCharacters(result)
+ case c: Char =>
+ result.append(c)
+ }
+ formattingBeforeDecimalPointIndex += 1
+ }
+ case digits: Digits if reachedDecimalPoint =>
+ val numDigits = digits match {
+ case ExactlyAsManyDigits(num) => num
+ case AtMostAsManyDigits(num) => num
+ }
+ for (_ <- 0 until numDigits) {
+ inputAfterDecimalPoint(formattingAfterDecimalPointIndex) match {
+ case SPACE =>
+ addSpaceCheckingTrailingCharacters(result)
+ case c: Char =>
+ result.append(c)
+ }
+ formattingAfterDecimalPointIndex += 1
+ }
+ case _: ThousandsSeparator =>
+ if (result.nonEmpty && result.last >= ZERO_DIGIT && result.last <=
NINE_DIGIT) {
Review Comment:
nit: `Character.isDigit(result.last)` is clearer
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]