beliefer commented on a change in pull request #25963: [SPARK-28137][SQL] Add
Postgresql function to_number.
URL: https://github.com/apache/spark/pull/25963#discussion_r336821355
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
##########
@@ -2267,6 +2267,156 @@ case class FormatNumber(x: Expression, d: Expression)
override def prettyName: String = "format_number"
}
+object ToNumber {
+ def transfer(input: UTF8String, pattern: UTF8String): UTF8String = {
+ val inputStr = input.toString
+ val patternStr =
pattern.toString.toUpperCase(Locale.ROOT).replaceAll("FM", "")
+ val hasSign = inputStr.startsWith("-") ||
+ (patternStr.contains('S') && inputStr.contains('-')) ||
+ (patternStr.contains("MI") && inputStr.contains('-')) ||
+ (patternStr.contains("PR") && inputStr.startsWith("<"))
+ val (newInputStr, newPatternStr) = if (hasSign) {
+ (inputStr.replaceAll("-|<|>", ""),
patternStr.replaceAll("S|(MI)|(PR)|(FM)", ""))
+ } else {
+ (inputStr, patternStr.replaceAll("FM", ""))
+ }
+ val inputChars = newInputStr.toCharArray()
+ val patternChars = newPatternStr.toIterator
+ val builder = new UTF8StringBuilder
+ var indexOfString = 0
+ var hasPoint = false
+ patternChars.foreach { c =>
+ if (newInputStr.length > indexOfString) {
+ val currentChar = inputChars(indexOfString)
+ c match {
+ case '9' | '0' if Character.isDigit(currentChar) =>
+ builder.append(newInputStr.substring(indexOfString, indexOfString
+ 1))
+ indexOfString += 1
+ case '9' | '0' | ' ' if currentChar == '.' =>
+ if (builder.build.numChars == 0) {
+ builder.append("0")
+ }
+ case '9' | '0' if currentChar == ',' =>
+ case '.' | 'D' if currentChar.equals('.') =>
+ hasPoint = true
+ builder.append(newInputStr.substring(indexOfString, indexOfString
+ 1))
+ indexOfString += 1
+ case ',' | 'G' if currentChar.equals(',') =>
+ indexOfString += 1
+ case ',' | 'G' if Character.isDigit(currentChar) =>
+ case 'L' =>
+ while (Character.isLetter(inputChars(indexOfString)) ||
+ inputChars(indexOfString) == '$') {
+ indexOfString += 1
+ }
+ case _ =>
+ indexOfString += 1
+ }
+ }
+ }
+
+ val buildUTF8String = builder.build
+ if (buildUTF8String.numChars > 0) {
+ val doubleVal = java.lang.Double.valueOf(buildUTF8String.toString)
+ var decimal = java.math.BigDecimal.valueOf(doubleVal).stripTrailingZeros
+ if (hasSign) {
+ decimal = decimal.negate
+ }
+ UTF8String.fromString(decimal.toPlainString)
+ } else {
+ UTF8String.fromString("")
+ }
+ }
+}
+
+/**
+ * A function that converts string to numeric.
+ */
+@ExpressionDescription(
+ usage = """
+ _FUNC_(strExpr, patternExpr) - Convert `strExpr` to a number based on the
`patternExpr`.
+ The pattern can consist of the following characters:
+ '9': digit position (can be dropped if insignificant)
+ '0': digit position (will not be dropped, even if insignificant)
+ '.': decimal point (only allowed once)
+ ',': group (thousands) separator
+ 'PR': negative value in angle brackets
+ 'S': sign anchored to number (uses locale)
+ 'L': currency symbol (uses locale)
+ 'D': decimal point (uses locale)
+ 'G': group separator (uses locale)
+ 'MI': minus sign in specified position (if number < 0)
+ """,
+ examples = """
+ Examples:
+ > SELECT _FUNC_('4540', '999');
+ 454
+ > SELECT _FUNC_('454.00', '000D00');
+ 454
+ > SELECT _FUNC_('12,454.8-', '99G999D9S');
+ -12454.8
+ > SELECT _FUNC_('CNY234234.4350', 'L999999.0000');
+ 234234.435
+ """)
+case class ToNumber(left: Expression, right: Expression)
+ extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
+
+ // scalastyle:off caselocale
+ @transient private lazy val pattern: Option[String] = {
+ if (right.foldable) {
+
Option(right.eval()).map(_.toString.toUpperCase(Locale.ROOT).replaceAll("FM",
""))
+ } else None
+ }
+ // scalastyle:on caselocale
+
+ override def dataType: DataType = StringType
+ override def inputTypes: Seq[DataType] = Seq(StringType, StringType)
+
+ override def checkInputDataTypes(): TypeCheckResult = {
+ def checkDecimalPointNum(c: Char): Boolean = {
+ c == '.' || c == 'D'
+ }
+
+ def checkSignNum(c: Char): Boolean = {
Review comment:
OK. I will remove this method.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]