dtenedor commented on code in PR #36365: URL: https://github.com/apache/spark/pull/36365#discussion_r871723101
########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala: ########## @@ -599,4 +614,250 @@ class ToNumberParser(numberFormat: String, errorOnFail: Boolean) extends Seriali Decimal(javaDecimal, precision, scale) } } + + /** + * Converts a decimal value to a string based on the given number format. + * + * Iterates through the [[formatTokens]] obtained from processing the format string, while also + * inspecting the input decimal value. + * + * @param input the decimal value that needs to be converted + * @return the result String value obtained from string formatting + */ + def format(input: Decimal): UTF8String = { + val result = new StringBuilder() + // These are string representations of the input Decimal value. + val (inputBeforeDecimalPoint: String, + inputAfterDecimalPoint: String) = + formatSplitInputBeforeAndAfterDecimalPoint(input) + // These are indexes into the characters of the input string before and after the decimal point. + formattingBeforeDecimalPointIndex = 0 + formattingAfterDecimalPointIndex = 0 + var reachedDecimalPoint = false + + // Iterate through the tokens representing the provided format string, in order. + for (formatToken: InputToken <- formatTokens) { + formatToken match { + case groups: DigitGroups => + formatDigitGroups( + groups, inputBeforeDecimalPoint, inputAfterDecimalPoint, reachedDecimalPoint, result) + case DecimalPoint() => + // If the last character so far is a space, change it to a zero. This means the input + // decimal does not have an integer part. + if (result.nonEmpty && result.last == SPACE) { + result(result.length - 1) = ZERO_DIGIT + } + result.append(POINT_SIGN) + reachedDecimalPoint = true + case DollarSign() => + result.append(DOLLAR_SIGN) + case _: OptionalPlusOrMinusSign => + stripTrailingLoneDecimalPoint(result) + if (input < Decimal.ZERO) { + addCharacterCheckingTrailingSpaces(result, MINUS_SIGN) + } else { + addCharacterCheckingTrailingSpaces(result, PLUS_SIGN) + } + case _: OptionalMinusSign => + if (input < Decimal.ZERO) { + stripTrailingLoneDecimalPoint(result) + addCharacterCheckingTrailingSpaces(result, MINUS_SIGN) + // Add a second space to account for the "MI" sequence comprising two characters in the + // format string. + result.append(SPACE) + } else { + result.append(SPACE) + result.append(SPACE) + } + case OpeningAngleBracket() => + if (input < Decimal.ZERO) { + result.append(ANGLE_BRACKET_OPEN) + } + case ClosingAngleBracket() => + stripTrailingLoneDecimalPoint(result) + if (input < Decimal.ZERO) { + addCharacterCheckingTrailingSpaces(result, ANGLE_BRACKET_CLOSE) + } else { + result.append(SPACE) + result.append(SPACE) + } + } + } + + if (formattingBeforeDecimalPointIndex < inputBeforeDecimalPoint.length || + formattingAfterDecimalPointIndex < inputAfterDecimalPoint.length) { + // Remaining digits before or after the decimal point exist in the decimal value but not in + // the format string. + formatMatchFailure(input, numberFormat) + } else { + stripTrailingLoneDecimalPoint(result) + val str = result.toString + if (result.isEmpty || str == "+" || str == "-") { + UTF8String.fromString("0") + } else { + UTF8String.fromString(str) + } + } + } + + /** + * Splits the provided Decimal value's string representation by the decimal point, if any. + * @param input the Decimal value to consume + * @return two strings representing the contents before and after the decimal point (if any) + */ + private def formatSplitInputBeforeAndAfterDecimalPoint(input: Decimal): (String, String) = { + // Convert the input Decimal value to a string (without exponent notation). + val inputString = input.toJavaBigDecimal.toPlainString + // Split the digits before and after the decimal point. + val tokens: Array[String] = inputString.split(POINT_SIGN) + var beforeDecimalPoint: String = tokens(0) + var afterDecimalPoint: String = if (tokens.length > 1) tokens(1) else "" + // Strip any leading minus sign to consider the digits only. + // Strip leading and trailing zeros to match cases when the format string begins with a decimal + // point. + beforeDecimalPoint = beforeDecimalPoint.dropWhile(c => c == MINUS_SIGN || c == ZERO_DIGIT) + afterDecimalPoint = afterDecimalPoint.reverse.dropWhile(_ == ZERO_DIGIT).reverse + + // If the format string specifies more digits than the 'beforeDecimalPoint', prepend leading + // spaces to make them the same length. Likewise, if the format string specifies more digits + // than the 'afterDecimalPoint', append trailing spaces to make them the same length. This step + // simplifies logic consuming the format tokens later. + var reachedDecimalPoint = false + var numFormatDigitsBeforeDecimalPoint = 0 + var numFormatDigitsAfterDecimalPoint = 0 + formatTokens.foreach { + case digitGroups: DigitGroups => + digitGroups.digits.foreach { digits => + val numDigits = digits match { + case ExactlyAsManyDigits(num) => num + case AtMostAsManyDigits(num) => num + } + for (_ <- 0 until numDigits) { + if (!reachedDecimalPoint) { + numFormatDigitsBeforeDecimalPoint += 1 + } else { + numFormatDigitsAfterDecimalPoint += 1 + } + } + } + case _: DecimalPoint => + reachedDecimalPoint = true + case _ => + } + // If there were more digits in the provided input string (before or after the decimal point) + // than specified in the format string, this is an overflow. + if (numFormatDigitsBeforeDecimalPoint < beforeDecimalPoint.length || + numFormatDigitsAfterDecimalPoint < afterDecimalPoint.length) { + beforeDecimalPoint = "#" * numFormatDigitsBeforeDecimalPoint + afterDecimalPoint = "#" * numFormatDigitsAfterDecimalPoint + } + val leadingSpaces = " " * (numFormatDigitsBeforeDecimalPoint - beforeDecimalPoint.length) + val trailingSpaces = "0" * (numFormatDigitsAfterDecimalPoint - afterDecimalPoint.length) Review Comment: Nice catch, done. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org