dtenedor commented on code in PR #36365:
URL: https://github.com/apache/spark/pull/36365#discussion_r861170782


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala:
##########
@@ -599,4 +617,240 @@ class ToNumberParser(numberFormat: String, errorOnFail: 
Boolean) extends Seriali
       Decimal(javaDecimal, precision, scale)
     }
   }
+
+  /**
+   * Converts a decimal value to a string based on the given number format.
+   *
+   * Iterates through the [[formatTokens]] obtained from processing the format 
string, while also
+   * inspecting the input decimal value.
+   *
+   * @param input the decimal value that needs to be converted
+   * @return the result String value obtained from string formatting
+   */
+  def format(input: Decimal): UTF8String = {
+    val result = new StringBuilder()
+    // These are string representations of the input Decimal value.
+    val (inputBeforeDecimalPoint: String,
+      inputAfterDecimalPoint: String) =
+      formatSplitInputBeforeAndAfterDecimalPoint(input).getOrElse(
+        return formatMatchFailure(input, numberFormat))
+    // These are indexes into the characters of the input string before and 
after the decimal point.
+    formattingBeforeDecimalPointIndex = 0
+    formattingAfterDecimalPointIndex = 0
+    var reachedDecimalPoint = false
+
+    // Iterate through the tokens representing the provided format string, in 
order.
+    for (formatToken: InputToken <- formatTokens) {
+      formatToken match {
+        case groups: DigitGroups =>
+          formatDigitGroups(
+            groups, inputBeforeDecimalPoint, inputAfterDecimalPoint, 
reachedDecimalPoint, result)
+        case DecimalPoint() =>
+          // If the last character so far is a space, change it to a zero.
+          if (result.nonEmpty && result.last == SPACE) {
+            result(result.length - 1) = ZERO_DIGIT
+          }
+          result.append(POINT_SIGN)
+          reachedDecimalPoint = true
+        case DollarSign() =>
+          result.append(DOLLAR_SIGN)
+        case _: OptionalPlusOrMinusSign | _: OptionalMinusSign =>
+          if (input < Decimal.ZERO) {
+            stripTrailingLoneDecimalPoint(result)
+            addCharacterCheckingTrailingSpaces(result, MINUS_SIGN)
+          } else {
+            result.append(SPACE)
+          }
+        case OpeningAngleBracket() =>
+          if (input < Decimal.ZERO) {
+            result.append(ANGLE_BRACKET_OPEN)
+          } else {
+            result.append(SPACE)
+          }
+        case ClosingAngleBracket() =>
+          if (input < Decimal.ZERO) {
+            stripTrailingLoneDecimalPoint(result)
+            addCharacterCheckingTrailingSpaces(result, ANGLE_BRACKET_CLOSE)
+          } else {
+            result.append(SPACE)
+          }
+      }
+    }
+
+    if (formattingBeforeDecimalPointIndex < inputBeforeDecimalPoint.length ||
+      formattingAfterDecimalPointIndex < inputAfterDecimalPoint.length) {
+      // Remaining digits before or after the decimal point exist in the 
decimal value but not in
+      // the format string.
+      formatMatchFailure(input, numberFormat)
+    } else {
+      stripTrailingLoneDecimalPoint(result)
+      UTF8String.fromString(result.toString())
+    }
+  }
+
+  /**
+   * Splits the provided Decimal value's string representation by the decimal 
point, if any.
+   * @param input the Decimal value to consume
+   * @return two strings representing the contents before and after the 
decimal point (if any),
+   *         respectively, or None if the input string did not match the 
format string.
+   */
+  private def formatSplitInputBeforeAndAfterDecimalPoint(
+      input: Decimal): Option[(String, String)] = {
+    // Convert the input Decimal value to a string (without exponent notation).
+    val inputString = input.toJavaBigDecimal.toPlainString
+    // Split the digits before and after the decimal point.
+    val tokens = inputString.split(POINT_SIGN)
+    var beforeDecimalPoint = tokens(0)
+    var afterDecimalPoint = if (tokens.length > 1) tokens(1) else ""
+    // Strip any leading minus sign to consider the digits only.
+    // Strip leading and trailing zeros to match cases when the format string 
begins with a decimal
+    // point.
+    beforeDecimalPoint = beforeDecimalPoint.dropWhile(c => c == MINUS_SIGN || 
c == ZERO_DIGIT)
+    afterDecimalPoint = afterDecimalPoint.reverse.dropWhile(_ == 
ZERO_DIGIT).reverse
+
+    // If the format string specifies more digits than the 
'beforeDecimalPoint', prepend leading
+    // spaces to make them the same length. Likewise, if the format string 
specifies more digits
+    // than the 'afterDecimalPoint', append trailing spaces to make them the 
same length. This step
+    // simplifies logic consuming the format tokens later.
+    var reachedDecimalPoint = false
+    var numFormatDigitsBeforeDecimalPoint = 0
+    var numFormatDigitsAfterDecimalPoint = 0
+    formatTokens.foreach {
+      case digitGroups: DigitGroups =>
+        digitGroups.digits.foreach { digits =>
+          val numDigits = digits match {
+            case ExactlyAsManyDigits(num) => num
+            case AtMostAsManyDigits(num) => num
+          }
+          for (_ <- 0 until numDigits) {
+            if (!reachedDecimalPoint) {
+              numFormatDigitsBeforeDecimalPoint += 1
+            } else {
+              numFormatDigitsAfterDecimalPoint += 1
+            }
+          }
+        }
+      case _: DecimalPoint =>
+        reachedDecimalPoint = true
+      case _ =>
+    }
+    // If there were more digits in the provided input string (before or after 
the decimal point)
+    // than specified in the format string, the input string does not match 
the format.
+    if (numFormatDigitsBeforeDecimalPoint < beforeDecimalPoint.length ||
+        numFormatDigitsAfterDecimalPoint < afterDecimalPoint.length) {
+      return None
+    }
+    val leadingSpaces = " " * (numFormatDigitsBeforeDecimalPoint - 
beforeDecimalPoint.length)
+    val trailingSpaces = " " * (numFormatDigitsAfterDecimalPoint - 
afterDecimalPoint.length)
+    Some((leadingSpaces + beforeDecimalPoint, afterDecimalPoint + 
trailingSpaces))
+  }
+
+  /**
+   * Performs format processing on the digits in [[groups]], updating 
[[result]].
+   *
+   * @param groups the token representing a group of digits from the format 
string
+   * @param inputBeforeDecimalPoint string representation of the input decimal 
value before the
+   *                                decimal point
+   * @param inputAfterDecimalPoint string representation of the input decimal 
value after the
+   *                               decimal point
+   * @param reachedDecimalPoint true if we have reached the decimal point so 
far during processing
+   * @param result the result of formatting is built here as a string during 
iteration
+   */
+  private def formatDigitGroups(
+      groups: DigitGroups,
+      inputBeforeDecimalPoint: String,
+      inputAfterDecimalPoint: String,
+      reachedDecimalPoint: Boolean,
+      result: StringBuilder): Unit = {
+    // Iterate through the tokens in the DigitGroups. Reverse the order of the 
tokens so we
+    // consume them in the left-to-right order that they originally appeared 
in the format
+    // string.
+    for (digitGroupToken <- groups.tokens.reverse) {
+      digitGroupToken match {
+        case digits: Digits if !reachedDecimalPoint =>
+          val numDigits = digits match {
+            case ExactlyAsManyDigits(num) => num
+            case AtMostAsManyDigits(num) => num
+          }
+          for (_ <- 0 until numDigits) {
+            inputBeforeDecimalPoint(formattingBeforeDecimalPointIndex) match {
+              case c: Char if c != SPACE =>
+                result.append(c)
+              case _ => digits match {
+                case _: ExactlyAsManyDigits =>
+                  // The format string started with a 0 and had more digits 
than the provided
+                  // input string, so we prepend a 0 to the result.
+                  result.append(ZERO_DIGIT)
+                case _: AtMostAsManyDigits =>
+                  // The format string started with a 9 and had more digits 
than the provided
+                  // input string, so we prepend a space to the result.
+                  addSpaceCheckingOpenBracketOrMinusSign(result)
+              }
+            }
+            formattingBeforeDecimalPointIndex += 1
+          }
+        case digits: Digits if reachedDecimalPoint =>
+          val numDigits = digits match {
+            case ExactlyAsManyDigits(num) => num
+            case AtMostAsManyDigits(num) => num

Review Comment:
   I tried this but it causes obscure serialization/deserialization errors for 
the expression. It complains that there is no way to handle the subclasses.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to