dtenedor commented on code in PR #36365:
URL: https://github.com/apache/spark/pull/36365#discussion_r871723101


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala:
##########
@@ -599,4 +614,250 @@ class ToNumberParser(numberFormat: String, errorOnFail: 
Boolean) extends Seriali
       Decimal(javaDecimal, precision, scale)
     }
   }
+
+  /**
+   * Converts a decimal value to a string based on the given number format.
+   *
+   * Iterates through the [[formatTokens]] obtained from processing the format 
string, while also
+   * inspecting the input decimal value.
+   *
+   * @param input the decimal value that needs to be converted
+   * @return the result String value obtained from string formatting
+   */
+  def format(input: Decimal): UTF8String = {
+    val result = new StringBuilder()
+    // These are string representations of the input Decimal value.
+    val (inputBeforeDecimalPoint: String,
+      inputAfterDecimalPoint: String) =
+      formatSplitInputBeforeAndAfterDecimalPoint(input)
+    // These are indexes into the characters of the input string before and 
after the decimal point.
+    formattingBeforeDecimalPointIndex = 0
+    formattingAfterDecimalPointIndex = 0
+    var reachedDecimalPoint = false
+
+    // Iterate through the tokens representing the provided format string, in 
order.
+    for (formatToken: InputToken <- formatTokens) {
+      formatToken match {
+        case groups: DigitGroups =>
+          formatDigitGroups(
+            groups, inputBeforeDecimalPoint, inputAfterDecimalPoint, 
reachedDecimalPoint, result)
+        case DecimalPoint() =>
+          // If the last character so far is a space, change it to a zero. 
This means the input
+          // decimal does not have an integer part.
+          if (result.nonEmpty && result.last == SPACE) {
+            result(result.length - 1) = ZERO_DIGIT
+          }
+          result.append(POINT_SIGN)
+          reachedDecimalPoint = true
+        case DollarSign() =>
+          result.append(DOLLAR_SIGN)
+        case _: OptionalPlusOrMinusSign =>
+          stripTrailingLoneDecimalPoint(result)
+          if (input < Decimal.ZERO) {
+            addCharacterCheckingTrailingSpaces(result, MINUS_SIGN)
+          } else {
+            addCharacterCheckingTrailingSpaces(result, PLUS_SIGN)
+          }
+        case _: OptionalMinusSign =>
+          if (input < Decimal.ZERO) {
+            stripTrailingLoneDecimalPoint(result)
+            addCharacterCheckingTrailingSpaces(result, MINUS_SIGN)
+            // Add a second space to account for the "MI" sequence comprising 
two characters in the
+            // format string.
+            result.append(SPACE)
+          } else {
+            result.append(SPACE)
+            result.append(SPACE)
+          }
+        case OpeningAngleBracket() =>
+          if (input < Decimal.ZERO) {
+            result.append(ANGLE_BRACKET_OPEN)
+          }
+        case ClosingAngleBracket() =>
+          stripTrailingLoneDecimalPoint(result)
+          if (input < Decimal.ZERO) {
+            addCharacterCheckingTrailingSpaces(result, ANGLE_BRACKET_CLOSE)
+          } else {
+            result.append(SPACE)
+            result.append(SPACE)
+          }
+      }
+    }
+
+    if (formattingBeforeDecimalPointIndex < inputBeforeDecimalPoint.length ||
+      formattingAfterDecimalPointIndex < inputAfterDecimalPoint.length) {
+      // Remaining digits before or after the decimal point exist in the 
decimal value but not in
+      // the format string.
+      formatMatchFailure(input, numberFormat)
+    } else {
+      stripTrailingLoneDecimalPoint(result)
+      val str = result.toString
+      if (result.isEmpty || str == "+" || str == "-") {
+        UTF8String.fromString("0")
+      } else {
+        UTF8String.fromString(str)
+      }
+    }
+  }
+
+  /**
+   * Splits the provided Decimal value's string representation by the decimal 
point, if any.
+   * @param input the Decimal value to consume
+   * @return two strings representing the contents before and after the 
decimal point (if any)
+   */
+  private def formatSplitInputBeforeAndAfterDecimalPoint(input: Decimal): 
(String, String) = {
+    // Convert the input Decimal value to a string (without exponent notation).
+    val inputString = input.toJavaBigDecimal.toPlainString
+    // Split the digits before and after the decimal point.
+    val tokens: Array[String] = inputString.split(POINT_SIGN)
+    var beforeDecimalPoint: String = tokens(0)
+    var afterDecimalPoint: String = if (tokens.length > 1) tokens(1) else ""
+    // Strip any leading minus sign to consider the digits only.
+    // Strip leading and trailing zeros to match cases when the format string 
begins with a decimal
+    // point.
+    beforeDecimalPoint = beforeDecimalPoint.dropWhile(c => c == MINUS_SIGN || 
c == ZERO_DIGIT)
+    afterDecimalPoint = afterDecimalPoint.reverse.dropWhile(_ == 
ZERO_DIGIT).reverse
+
+    // If the format string specifies more digits than the 
'beforeDecimalPoint', prepend leading
+    // spaces to make them the same length. Likewise, if the format string 
specifies more digits
+    // than the 'afterDecimalPoint', append trailing spaces to make them the 
same length. This step
+    // simplifies logic consuming the format tokens later.
+    var reachedDecimalPoint = false
+    var numFormatDigitsBeforeDecimalPoint = 0
+    var numFormatDigitsAfterDecimalPoint = 0
+    formatTokens.foreach {
+      case digitGroups: DigitGroups =>
+        digitGroups.digits.foreach { digits =>
+          val numDigits = digits match {
+            case ExactlyAsManyDigits(num) => num
+            case AtMostAsManyDigits(num) => num
+          }
+          for (_ <- 0 until numDigits) {
+            if (!reachedDecimalPoint) {
+              numFormatDigitsBeforeDecimalPoint += 1
+            } else {
+              numFormatDigitsAfterDecimalPoint += 1
+            }
+          }
+        }
+      case _: DecimalPoint =>
+        reachedDecimalPoint = true
+      case _ =>
+    }
+    // If there were more digits in the provided input string (before or after 
the decimal point)
+    // than specified in the format string, this is an overflow.
+    if (numFormatDigitsBeforeDecimalPoint < beforeDecimalPoint.length ||
+      numFormatDigitsAfterDecimalPoint < afterDecimalPoint.length) {
+      beforeDecimalPoint = "#" * numFormatDigitsBeforeDecimalPoint
+      afterDecimalPoint = "#" * numFormatDigitsAfterDecimalPoint
+    }
+    val leadingSpaces = " " * (numFormatDigitsBeforeDecimalPoint - 
beforeDecimalPoint.length)
+    val trailingSpaces = "0" * (numFormatDigitsAfterDecimalPoint - 
afterDecimalPoint.length)

Review Comment:
   Nice catch, done.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to