Github user chenghao-intel commented on a diff in the pull request:

    https://github.com/apache/spark/pull/6872#discussion_r33112140
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala
 ---
    @@ -154,6 +156,216 @@ case class Cos(child: Expression) extends 
UnaryMathExpression(math.cos, "COS")
     
     case class Cosh(child: Expression) extends UnaryMathExpression(math.cosh, 
"COSH")
     
    +// This logic is borrowed from hive
    +class ConvUtil {
    +
    +  private val value = new Array[Byte](64)
    +
    +  /**
    +   * Divide x by m as if x is an unsigned 64-bit integer. Examples:
    +   * unsignedLongDiv(-1, 2) == Long.MAX_VALUE unsignedLongDiv(6, 3) == 2
    +   * unsignedLongDiv(0, 5) == 0
    +   *
    +   * @param x
    +   *          is treated as unsigned
    +   * @param m
    +   *          is treated as signed
    +   */
    +  private def unsignedLongDiv(x: Long, m: Int): Long = {
    +    if (x >= 0) {
    +      x / m
    +    } else {
    +      // Let uval be the value of the unsigned long with the same bits as x
    +      // Two's complement => x = uval - 2*MAX - 2
    +      // => uval = x + 2*MAX + 2
    +      // Now, use the fact: (a+b)/c = a/c + b/c + (a%c+b%c)/c
    +      (x / m + 2 * (Long.MaxValue / m) + 2 / m + (x % m + 2 * 
(Long.MaxValue % m) + 2 % m) / m)
    +    }
    +  }
    +
    +  /**
    +   * Decode v into value[].
    +   *
    +   * @param v
    +   *          is treated as an unsigned 64-bit integer
    +   * @param radix
    +   *          must be between MIN_RADIX and MAX_RADIX
    +   */
    +  private def decode(v: Long, radix: Int): Unit = {
    +    var tmpV = v
    +    Arrays.fill(value, 0.asInstanceOf[Byte])
    +    var i = value.length - 1
    +    while (tmpV != 0) {
    +      val q = unsignedLongDiv(tmpV, radix)
    +      value(i) = (tmpV - q * radix).asInstanceOf[Byte]
    +      tmpV = q
    +      i -= 1
    +    }
    +  }
    +
    +  /**
    +   * Convert value[] into a long. On overflow, return -1 (as mySQL does). 
If a
    +   * negative digit is found, ignore the suffix starting there.
    +   *
    +   * @param radix
    +   *          must be between MIN_RADIX and MAX_RADIX
    +   * @param fromPos
    +   *          is the first element that should be conisdered
    +   * @return the result should be treated as an unsigned 64-bit integer.
    +   */
    +  private def encode(radix: Int, fromPos: Int): Long = {
    +    var v: Long = 0L
    +    val bound = unsignedLongDiv(-1 - radix, radix) // Possible overflow 
once
    +    // val
    +    // exceeds this value
    +    var i = fromPos
    +    while (i < value.length && value(i) >= 0) {
    +      if (v >= bound) {
    +        // Check for overflow
    +        if (unsignedLongDiv(-1 - value(i), radix) < v) {
    +          return -1
    +        }
    +      }
    +      v = v * radix + value(i)
    +      i += 1
    +    }
    +    return v
    +  }
    +
    +  /**
    +   * Convert the bytes in value[] to the corresponding chars.
    +   *
    +   * @param radix
    +   *          must be between MIN_RADIX and MAX_RADIX
    +   * @param fromPos
    +   *          is the first nonzero element
    +   */
    +  private def byte2char(radix: Int, fromPos: Int): Unit = {
    +    var i = fromPos
    +    while (i < value.length) {
    +      value(i) = Character.toUpperCase(Character.forDigit(value(i), 
radix)).asInstanceOf[Byte]
    +      i += 1
    +    }
    +  }
    +
    +  /**
    +   * Convert the chars in value[] to the corresponding integers. Convert 
invalid
    +   * characters to -1.
    +   *
    +   * @param radix
    +   *          must be between MIN_RADIX and MAX_RADIX
    +   * @param fromPos
    +   *          is the first nonzero element
    +   */
    +  private def char2byte(radix: Int, fromPos: Int): Unit = {
    +    var i = fromPos
    +    while ( i < value.length) {
    +      value(i) = Character.digit(value(i), radix).asInstanceOf[Byte]
    +      i += 1
    +    }
    +  }
    +
    +  /**
    +   * Convert numbers between different number bases. If toBase>0 the 
result is
    +   * unsigned, otherwise it is signed.
    +   *
    +   */
    +   def conv(n: Array[Byte] , fromBase: Int , toBase: Int ): UTF8String = {
    +    if (n == null || fromBase == null || toBase == null) {
    +      return null
    +    }
    +
    +    if (fromBase < Character.MIN_RADIX || fromBase > Character.MAX_RADIX
    +      || Math.abs(toBase) < Character.MIN_RADIX
    +      || Math.abs(toBase) > Character.MAX_RADIX) {
    +      return null
    +    }
    +
    +    var (negative, first) = if (n(0) == '-') (true, 1) else (false, 0)
    +
    +    // Copy the digits in the right side of the array
    +    var i = 1
    +    while (i <= n.length - first) {
    +      value(value.length - i) = n(n.length - i)
    +      i += 1
    +    }
    +    char2byte(fromBase, value.length - n.length + first)
    +
    +    // Do the conversion by going through a 64 bit integer
    +    var v = encode(fromBase, value.length - n.length + first)
    +    if (negative && toBase > 0) {
    +      if (v < 0) {
    +        v = -1
    +      } else {
    +        v = -v
    +      }
    +    }
    +    if (toBase < 0 && v < 0) {
    +      v = -v
    +      negative = true
    +    }
    +    decode(v, Math.abs(toBase))
    +
    +    // Find the first non-zero digit or the last digits if all are zero.
    +    val firstNonZeroPos = {
    +      val firstNonZero = value.indexWhere( _ != 0)
    +      if (firstNonZero != -1) firstNonZero else value.length - 1
    +    }
    +
    +    byte2char(Math.abs(toBase), firstNonZeroPos)
    +
    +    var resultStartPos = firstNonZeroPos
    +    if (negative && toBase < 0) {
    +      resultStartPos = firstNonZeroPos - 1
    +      value(resultStartPos) = '-'
    +    }
    +    UTF8String.fromBytes( Arrays.copyOfRange(value, resultStartPos, 
value.length))
    +  }
    +}
    +
    +case class Conv(numExpr: Expression, fromBaseExpr: Expression, toBaseExpr: 
Expression)
    +  extends TernaryExpression {
    +
    +  override def symbol: String = "conv"
    +
    +  override def toString: String = s"(conv($first, $second, $third))"
    +
    +  /** Returns the result of evaluating this expression on a given input 
Row */
    +  override def eval(input: InternalRow): Any = {
    +    val num = numExpr.eval(input)
    +    val fromBase = fromBaseExpr.eval(input)
    +    val toBase = toBaseExpr.eval(input)
    +    if (num == null || fromBase == null || toBase == null) {
    +      null
    +    } else {
    +      numExpr.dataType match {
    +        case ShortType | IntegerType | LongType | StringType =>
    +          new ConvUtil().conv(num.toString.getBytes(),
    +            fromBase.asInstanceOf[Integer], toBase.asInstanceOf[Integer])
    +        case _ => null
    --- End diff --
    
    This probably need to solved during the data type checking, is it an 
illegal data type?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to