dtenedor commented on code in PR #36365:
URL: https://github.com/apache/spark/pull/36365#discussion_r860053078
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala:
##########
@@ -599,4 +612,227 @@ class ToNumberParser(numberFormat: String, errorOnFail:
Boolean) extends Seriali
Decimal(javaDecimal, precision, scale)
}
}
+
+ /**
+ * Converts a decimal value to a string based on the given number format.
+ *
+ * Iterates through the [[formatTokens]] obtained from processing the format
string, while also
+ * inspecting the input decimal value.
+ *
+ * @param input the decimal value that needs to be converted
+ * @return the result String value obtained from string formatting
+ */
+ def format(input: Decimal): UTF8String = {
+ val result = new StringBuilder()
+ // These are string representations of the input Decimal value.
+ val (inputBeforeDecimalPoint: String,
+ inputAfterDecimalPoint: String) =
+ formatSplitInputBeforeAndAfterDecimalPoint(input).getOrElse(
+ return formatMatchFailure(input, numberFormat))
+ // These are indexes into the characters of the input string before and
after the decimal point.
+ formattingBeforeDecimalPointIndex = 0
+ formattingAfterDecimalPointIndex = 0
+ var reachedDecimalPoint = false
+
+ // Iterate through the tokens representing the provided format string, in
order.
+ for (formatToken: InputToken <- formatTokens) {
+ formatToken match {
+ case groups: DigitGroups =>
+ formatDigitGroups(
+ groups, inputBeforeDecimalPoint, inputAfterDecimalPoint,
reachedDecimalPoint, result)
+ case DecimalPoint() =>
+ result.append('.')
+ reachedDecimalPoint = true
+ case DollarSign() =>
+ result.append('$')
+ case _: OptionalPlusOrMinusSign | _: OptionalMinusSign =>
+ if (input < Decimal.ZERO) {
+ result.append('-')
+ } else {
+ result.append(' ')
+ }
+ case OpeningAngleBracket() =>
+ if (input < Decimal.ZERO) {
+ result.append('<')
+ } else {
+ result.append(' ')
+ }
+ case ClosingAngleBracket() =>
+ if (input < Decimal.ZERO) {
+ if (result.nonEmpty && result.last == ' ') {
+ result.setCharAt(result.length - 1, '>')
+ result.append(' ')
+ } else {
+ result.append('>')
+ }
+ } else {
+ result.append(' ')
+ }
+ }
+ }
+
+ if (formattingBeforeDecimalPointIndex < inputBeforeDecimalPoint.length ||
+ formattingAfterDecimalPointIndex < inputAfterDecimalPoint.length) {
+ // Remaining digits before or after the decimal point exist in the
decimal value but not in
+ // the format string.
+ formatMatchFailure(input, numberFormat)
+ } else {
+ UTF8String.fromString(result.toString())
+ }
+ }
+
+ /**
+ * Splits the provided Decimal value's string representation by the decimal
point, if any.
+ * @param input the Decimal value to consume
+ * @return two strings representing the contents before and after the
decimal point (if any),
+ * respectively, or None if the input string did not match the
format string.
+ */
+ private def formatSplitInputBeforeAndAfterDecimalPoint(
+ input: Decimal): Option[(String, String)] = {
+ val beforeDecimalPointBuilder = new StringBuilder()
+ val afterDecimalPointBuilder = new StringBuilder()
+ var numInputDigitsBeforeDecimalPoint: Int = 0
+ var numInputDigitsAfterDecimalPoint: Int = 0
+ var reachedDecimalPoint = false
+ var negateResult = false
+ // Convert the input Decimal value to a string (without exponent
notation). Strip leading zeros
+ // in order to match cases when the format string begins with a decimal
point.
+ val inputString = input.toJavaBigDecimal.toPlainString.dropWhile(_ == '0')
+ for (c: Char <- inputString) {
+ c match {
+ case _ if c >= ZERO_DIGIT && c <= NINE_DIGIT =>
+ if (reachedDecimalPoint) {
+ afterDecimalPointBuilder.append(c)
+ numInputDigitsAfterDecimalPoint += 1
+ } else {
+ beforeDecimalPointBuilder.append(c)
+ numInputDigitsBeforeDecimalPoint += 1
+ }
+ case POINT_SIGN =>
+ reachedDecimalPoint = true
+ case MINUS_SIGN =>
+ negateResult = true
+ }
+ }
+ // If the format string specifies more digits than the
'beforeDecimalPointBuilder', prepend
+ // leading spaces to make them the same length. Likewise, if the format
string specifies more
+ // digits than the 'afterDecimalPointBuilder', append trailing spaces to
make them the same
+ // length. This step simplifies logic consuming the format tokens later.
+ reachedDecimalPoint = false
+ var numFormatDigitsBeforeDecimalPoint: Int = 0
+ var numFormatDigitsAfterDecimalPoint: Int = 0
+ formatTokens.foreach {
+ case digitGroups: DigitGroups =>
+ digitGroups.digits.foreach { digits =>
+ val numDigits = digits match {
+ case ExactlyAsManyDigits(num) => num
+ case AtMostAsManyDigits(num) => num
+ }
+ for (_ <- 0 until numDigits) {
+ if (!reachedDecimalPoint) {
+ numFormatDigitsBeforeDecimalPoint += 1
+ } else {
+ numFormatDigitsAfterDecimalPoint += 1
+ }
+ }
+ }
+ case _: DecimalPoint =>
+ reachedDecimalPoint = true
+ case _ =>
+ }
+ // If there were more digits in the provided input string (before or after
the decimal point)
+ // than specified in the format string, the input string does not match
the format.
+ if (numFormatDigitsBeforeDecimalPoint < numInputDigitsBeforeDecimalPoint ||
+ numFormatDigitsAfterDecimalPoint < numInputDigitsAfterDecimalPoint) {
+ return None
+ }
+ val leadingSpaces = " " * (numFormatDigitsBeforeDecimalPoint -
numInputDigitsBeforeDecimalPoint)
+ val trailingSpaces = " " * (numFormatDigitsAfterDecimalPoint -
numInputDigitsAfterDecimalPoint)
+ Some((leadingSpaces + beforeDecimalPointBuilder.toString,
+ afterDecimalPointBuilder.toString + trailingSpaces))
+ }
+
+ /**
+ * Performs format processing on the digits in [[groups]], updating
[[result]].
+ *
+ * @param groups the token representing a group of digits from the format
string
+ * @param inputBeforeDecimalPoint string representation of the input decimal
value before the
+ * decimal point
+ * @param inputAfterDecimalPoint string representation of the input decimal
value after the
+ * decimal point
+ * @param reachedDecimalPoint true if we have reached the decimal point so
far during processing
+ * @param result the result of formatting is built here as a string during
iteration
+ */
+ def formatDigitGroups(
+ groups: DigitGroups,
+ inputBeforeDecimalPoint: String,
+ inputAfterDecimalPoint: String,
+ reachedDecimalPoint: Boolean,
+ result: StringBuilder): Unit = {
+ // Iterate through the tokens in the DigitGroups. Reverse the order of the
tokens so we
+ // consume them in the left-to-right order that they originally appeared
in the format
+ // string.
+ for (digitGroupToken <- groups.tokens.reverse) {
+ digitGroupToken match {
+ case digits: Digits if !reachedDecimalPoint =>
+ val numDigits = digits match {
+ case ExactlyAsManyDigits(num) => num
+ case AtMostAsManyDigits(num) => num
+ }
+ for (_ <- 0 until numDigits) {
+ inputBeforeDecimalPoint(formattingBeforeDecimalPointIndex) match {
+ case c: Char if c != ' ' =>
+ result.append(c)
+ case _ => digits match {
+ case _: ExactlyAsManyDigits =>
+ // The format string started with a 0 and had more digits
than the provided
+ // input string, so we prepend a 0 to the result.
+ result.append('0')
+ case _: AtMostAsManyDigits =>
+ // The format string started with a 9 and had more digits
than the provided
+ // input string, so we prepend a space to the result.
+ if (result.nonEmpty && result.last == '<') {
Review Comment:
The goal is just to check if we just added space after a '>' or '-', and if
so, swap the characters. I added a comment to clarify this.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]