dtenedor commented on code in PR #48004:
URL: https://github.com/apache/spark/pull/48004#discussion_r1759212891


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala:
##########
@@ -181,3 +189,218 @@ case class Randn(child: Expression, hideSeed: Boolean = 
false) extends RDG {
 object Randn {
   def apply(seed: Long): Randn = Randn(Literal(seed, LongType))
 }
+
+@ExpressionDescription(
+  usage = """
+    _FUNC_(min, max[, seed]) - Returns a random value with independent and 
identically
+      distributed (i.i.d.) values with the specified range of numbers. The 
random seed is optional.
+      The provided numbers specifying the minimum and maximum values of the 
range must be constant.
+      If both of these numbers are integers, then the result will also be an 
integer. Otherwise if
+      one or both of these are floating-point numbers, then the result will 
also be a floating-point
+      number.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(10, 20, 0) > 0 AS result;
+      true
+  """,
+  since = "4.0.0",
+  group = "math_funcs")
+case class Uniform(min: Expression, max: Expression, seedExpression: 
Expression)
+  extends RuntimeReplaceable with TernaryLike[Expression] with RDG {
+  def this(min: Expression, max: Expression) =
+    this(min, max, Literal(Uniform.random.nextLong(), LongType))
+
+  final override lazy val deterministic: Boolean = false
+  override val nodePatterns: Seq[TreePattern] =
+    Seq(RUNTIME_REPLACEABLE, EXPRESSION_WITH_RANDOM_SEED)
+
+  override val dataType: DataType = {
+    val first = min.dataType
+    val second = max.dataType
+    (min.dataType, max.dataType) match {
+      case _ if !valid(min) || !valid(max) => NullType
+      case (_, LongType) | (LongType, _) if Seq(first, second).forall(integer) 
=> LongType
+      case (_, IntegerType) | (IntegerType, _) if Seq(first, 
second).forall(integer) => IntegerType
+      case (_, ShortType) | (ShortType, _) if Seq(first, 
second).forall(integer) => ShortType
+      case (_, DoubleType) | (DoubleType, _) => DoubleType
+      case (_, FloatType) | (FloatType, _) => FloatType
+      case _ => NullType

Review Comment:
   Generally, yes. I suppose since `NullType` is actually a valid input 
argument (literal `NULL`) I added it to `checkInputDataTypes` and left it here.



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala:
##########
@@ -181,3 +189,218 @@ case class Randn(child: Expression, hideSeed: Boolean = 
false) extends RDG {
 object Randn {
   def apply(seed: Long): Randn = Randn(Literal(seed, LongType))
 }
+
+@ExpressionDescription(
+  usage = """
+    _FUNC_(min, max[, seed]) - Returns a random value with independent and 
identically
+      distributed (i.i.d.) values with the specified range of numbers. The 
random seed is optional.
+      The provided numbers specifying the minimum and maximum values of the 
range must be constant.
+      If both of these numbers are integers, then the result will also be an 
integer. Otherwise if
+      one or both of these are floating-point numbers, then the result will 
also be a floating-point
+      number.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(10, 20, 0) > 0 AS result;
+      true
+  """,
+  since = "4.0.0",
+  group = "math_funcs")
+case class Uniform(min: Expression, max: Expression, seedExpression: 
Expression)
+  extends RuntimeReplaceable with TernaryLike[Expression] with RDG {
+  def this(min: Expression, max: Expression) =
+    this(min, max, Literal(Uniform.random.nextLong(), LongType))
+
+  final override lazy val deterministic: Boolean = false
+  override val nodePatterns: Seq[TreePattern] =
+    Seq(RUNTIME_REPLACEABLE, EXPRESSION_WITH_RANDOM_SEED)
+
+  override val dataType: DataType = {
+    val first = min.dataType
+    val second = max.dataType
+    (min.dataType, max.dataType) match {
+      case _ if !valid(min) || !valid(max) => NullType

Review Comment:
   Sounds good, done.



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala:
##########
@@ -181,3 +189,218 @@ case class Randn(child: Expression, hideSeed: Boolean = 
false) extends RDG {
 object Randn {
   def apply(seed: Long): Randn = Randn(Literal(seed, LongType))
 }
+
+@ExpressionDescription(
+  usage = """
+    _FUNC_(min, max[, seed]) - Returns a random value with independent and 
identically
+      distributed (i.i.d.) values with the specified range of numbers. The 
random seed is optional.
+      The provided numbers specifying the minimum and maximum values of the 
range must be constant.
+      If both of these numbers are integers, then the result will also be an 
integer. Otherwise if
+      one or both of these are floating-point numbers, then the result will 
also be a floating-point
+      number.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(10, 20, 0) > 0 AS result;
+      true
+  """,
+  since = "4.0.0",
+  group = "math_funcs")
+case class Uniform(min: Expression, max: Expression, seedExpression: 
Expression)
+  extends RuntimeReplaceable with TernaryLike[Expression] with RDG {
+  def this(min: Expression, max: Expression) =
+    this(min, max, Literal(Uniform.random.nextLong(), LongType))
+
+  final override lazy val deterministic: Boolean = false
+  override val nodePatterns: Seq[TreePattern] =
+    Seq(RUNTIME_REPLACEABLE, EXPRESSION_WITH_RANDOM_SEED)
+
+  override val dataType: DataType = {
+    val first = min.dataType
+    val second = max.dataType
+    (min.dataType, max.dataType) match {
+      case _ if !valid(min) || !valid(max) => NullType
+      case (_, LongType) | (LongType, _) if Seq(first, second).forall(integer) 
=> LongType
+      case (_, IntegerType) | (IntegerType, _) if Seq(first, 
second).forall(integer) => IntegerType
+      case (_, ShortType) | (ShortType, _) if Seq(first, 
second).forall(integer) => ShortType
+      case (_, DoubleType) | (DoubleType, _) => DoubleType
+      case (_, FloatType) | (FloatType, _) => FloatType
+      case _ => NullType
+    }
+  }
+
+  private def valid(e: Expression): Boolean = e.dataType match {
+    case _: ShortType | _: IntegerType | _: LongType | _: FloatType | _: 
DoubleType => true
+    case _ => false
+  }
+
+  private def integer(t: DataType): Boolean = t match {
+    case _: ShortType | _: IntegerType | _: LongType => true
+    case _ => false
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    var result: TypeCheckResult = TypeCheckResult.TypeCheckSuccess
+    def requiredType = "integer or floating-point"
+    Seq((min, "min", 0),
+      (max, "max", 1),
+      (seedExpression, "seed", 2)).foreach {
+      case (expr: Expression, name: String, index: Int) =>
+        if (!expr.foldable && result == TypeCheckResult.TypeCheckSuccess) {
+          result = DataTypeMismatch(
+            errorSubClass = "NON_FOLDABLE_INPUT",
+            messageParameters = Map(
+              "inputName" -> name,
+              "inputType" -> requiredType,
+              "inputExpr" -> toSQLExpr(expr)))
+        } else if (!valid(expr) && result == TypeCheckResult.TypeCheckSuccess) 
{
+          result = DataTypeMismatch(
+            errorSubClass = "UNEXPECTED_INPUT_TYPE",
+            messageParameters = Map(
+              "paramIndex" -> ordinalNumber(index),
+              "requiredType" -> requiredType,
+              "inputSql" -> toSQLExpr(expr),
+              "inputType" -> toSQLType(expr.dataType)))
+        }
+    }
+    result
+  }
+
+  override def first: Expression = min
+  override def second: Expression = max
+  override def third: Expression = seedExpression
+
+  override def withNewSeed(newSeed: Long): Expression =
+    Uniform(min, max, Literal(newSeed, LongType))
+
+  override def withNewChildrenInternal(
+      newFirst: Expression, newSecond: Expression, newThird: Expression): 
Expression =
+    Uniform(newFirst, newSecond, newThird)
+
+  override def replacement: Expression = {
+    def cast(e: Expression, to: DataType): Expression = if (e.dataType == to) 
e else Cast(e, to)
+    cast(Add(
+      cast(min, DoubleType),
+      Multiply(
+        Subtract(
+          cast(max, DoubleType),
+          cast(min, DoubleType)),
+        Rand(seed))),
+      dataType)
+  }
+}
+
+object Uniform {

Review Comment:
   Sounds good, this is done



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala:
##########
@@ -181,3 +189,218 @@ case class Randn(child: Expression, hideSeed: Boolean = 
false) extends RDG {
 object Randn {
   def apply(seed: Long): Randn = Randn(Literal(seed, LongType))
 }
+
+@ExpressionDescription(
+  usage = """
+    _FUNC_(min, max[, seed]) - Returns a random value with independent and 
identically
+      distributed (i.i.d.) values with the specified range of numbers. The 
random seed is optional.
+      The provided numbers specifying the minimum and maximum values of the 
range must be constant.
+      If both of these numbers are integers, then the result will also be an 
integer. Otherwise if
+      one or both of these are floating-point numbers, then the result will 
also be a floating-point
+      number.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(10, 20, 0) > 0 AS result;
+      true
+  """,
+  since = "4.0.0",
+  group = "math_funcs")
+case class Uniform(min: Expression, max: Expression, seedExpression: 
Expression)
+  extends RuntimeReplaceable with TernaryLike[Expression] with RDG {
+  def this(min: Expression, max: Expression) =
+    this(min, max, Literal(Uniform.random.nextLong(), LongType))
+
+  final override lazy val deterministic: Boolean = false
+  override val nodePatterns: Seq[TreePattern] =
+    Seq(RUNTIME_REPLACEABLE, EXPRESSION_WITH_RANDOM_SEED)
+
+  override val dataType: DataType = {
+    val first = min.dataType
+    val second = max.dataType
+    (min.dataType, max.dataType) match {
+      case _ if !valid(min) || !valid(max) => NullType
+      case (_, LongType) | (LongType, _) if Seq(first, second).forall(integer) 
=> LongType
+      case (_, IntegerType) | (IntegerType, _) if Seq(first, 
second).forall(integer) => IntegerType
+      case (_, ShortType) | (ShortType, _) if Seq(first, 
second).forall(integer) => ShortType
+      case (_, DoubleType) | (DoubleType, _) => DoubleType
+      case (_, FloatType) | (FloatType, _) => FloatType
+      case _ => NullType
+    }
+  }
+
+  private def valid(e: Expression): Boolean = e.dataType match {
+    case _: ShortType | _: IntegerType | _: LongType | _: FloatType | _: 
DoubleType => true
+    case _ => false
+  }
+
+  private def integer(t: DataType): Boolean = t match {
+    case _: ShortType | _: IntegerType | _: LongType => true
+    case _ => false
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    var result: TypeCheckResult = TypeCheckResult.TypeCheckSuccess
+    def requiredType = "integer or floating-point"
+    Seq((min, "min", 0),
+      (max, "max", 1),
+      (seedExpression, "seed", 2)).foreach {
+      case (expr: Expression, name: String, index: Int) =>
+        if (!expr.foldable && result == TypeCheckResult.TypeCheckSuccess) {
+          result = DataTypeMismatch(
+            errorSubClass = "NON_FOLDABLE_INPUT",
+            messageParameters = Map(
+              "inputName" -> name,
+              "inputType" -> requiredType,
+              "inputExpr" -> toSQLExpr(expr)))
+        } else if (!valid(expr) && result == TypeCheckResult.TypeCheckSuccess) 
{
+          result = DataTypeMismatch(
+            errorSubClass = "UNEXPECTED_INPUT_TYPE",
+            messageParameters = Map(
+              "paramIndex" -> ordinalNumber(index),
+              "requiredType" -> requiredType,
+              "inputSql" -> toSQLExpr(expr),
+              "inputType" -> toSQLType(expr.dataType)))
+        }
+    }
+    result
+  }
+
+  override def first: Expression = min
+  override def second: Expression = max
+  override def third: Expression = seedExpression
+
+  override def withNewSeed(newSeed: Long): Expression =
+    Uniform(min, max, Literal(newSeed, LongType))
+
+  override def withNewChildrenInternal(
+      newFirst: Expression, newSecond: Expression, newThird: Expression): 
Expression =
+    Uniform(newFirst, newSecond, newThird)
+
+  override def replacement: Expression = {
+    def cast(e: Expression, to: DataType): Expression = if (e.dataType == to) 
e else Cast(e, to)
+    cast(Add(
+      cast(min, DoubleType),
+      Multiply(
+        Subtract(
+          cast(max, DoubleType),
+          cast(min, DoubleType)),
+        Rand(seed))),
+      dataType)
+  }
+}
+
+object Uniform {
+  lazy val random = new Random()
+}
+
+@ExpressionDescription(
+  usage = """
+    _FUNC_(length[, seed]) - Returns a string of the specified length whose 
characters are chosen
+      uniformly at random from the following pool of characters: 0-9, a-z, 
A-Z. The random seed is
+      optional. The string length must be a constant two-byte or four-byte 
integer (SMALLINT or INT,
+      respectively).
+  """,
+  examples =
+    """
+    Examples:
+      > SELECT _FUNC_(3, 0) AS result;
+       8i7
+  """,
+  since = "4.0.0",
+  group = "string_funcs")
+case class RandStr(length: Expression, override val seedExpression: Expression)
+  extends ExpressionWithRandomSeed with BinaryLike[Expression] with 
Nondeterministic {
+  def this(length: Expression) = this(length, 
Literal(Uniform.random.nextLong(), LongType))
+
+  override def nullable: Boolean = false
+  override def dataType: DataType = StringType
+  override def stateful: Boolean = true
+  override def left: Expression = length
+  override def right: Expression = seedExpression
+
+  /**
+   * Record ID within each partition. By being transient, the Random Number 
Generator is
+   * reset every time we serialize and deserialize and initialize it.
+   */
+  @transient protected var rng: XORShiftRandom = _
+
+  @transient protected lazy val seed: Long = seedExpression match {
+    case e if e.dataType == IntegerType => e.eval().asInstanceOf[Int]
+    case e if e.dataType == LongType => e.eval().asInstanceOf[Long]
+  }
+  override protected def initializeInternal(partitionIndex: Int): Unit = {
+    rng = new XORShiftRandom(seed + partitionIndex)
+  }
+
+  override def withNewSeed(newSeed: Long): Expression = RandStr(length, 
Literal(newSeed, LongType))
+  override def withNewChildrenInternal(newFirst: Expression, newSecond: 
Expression): Expression =
+    RandStr(newFirst, newSecond)
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    var result: TypeCheckResult = TypeCheckResult.TypeCheckSuccess
+    Seq(length, seedExpression).zipWithIndex.foreach { case (expr: Expression, 
index: Int) =>
+      val valid = expr.dataType match {
+        case _ if !expr.foldable => false

Review Comment:
   Sounds good, done.



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala:
##########
@@ -181,3 +189,218 @@ case class Randn(child: Expression, hideSeed: Boolean = 
false) extends RDG {
 object Randn {
   def apply(seed: Long): Randn = Randn(Literal(seed, LongType))
 }
+
+@ExpressionDescription(
+  usage = """
+    _FUNC_(min, max[, seed]) - Returns a random value with independent and 
identically
+      distributed (i.i.d.) values with the specified range of numbers. The 
random seed is optional.
+      The provided numbers specifying the minimum and maximum values of the 
range must be constant.
+      If both of these numbers are integers, then the result will also be an 
integer. Otherwise if
+      one or both of these are floating-point numbers, then the result will 
also be a floating-point
+      number.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(10, 20, 0) > 0 AS result;
+      true
+  """,
+  since = "4.0.0",
+  group = "math_funcs")
+case class Uniform(min: Expression, max: Expression, seedExpression: 
Expression)
+  extends RuntimeReplaceable with TernaryLike[Expression] with RDG {
+  def this(min: Expression, max: Expression) =
+    this(min, max, Literal(Uniform.random.nextLong(), LongType))
+
+  final override lazy val deterministic: Boolean = false
+  override val nodePatterns: Seq[TreePattern] =
+    Seq(RUNTIME_REPLACEABLE, EXPRESSION_WITH_RANDOM_SEED)
+
+  override val dataType: DataType = {
+    val first = min.dataType
+    val second = max.dataType
+    (min.dataType, max.dataType) match {
+      case _ if !valid(min) || !valid(max) => NullType
+      case (_, LongType) | (LongType, _) if Seq(first, second).forall(integer) 
=> LongType
+      case (_, IntegerType) | (IntegerType, _) if Seq(first, 
second).forall(integer) => IntegerType
+      case (_, ShortType) | (ShortType, _) if Seq(first, 
second).forall(integer) => ShortType
+      case (_, DoubleType) | (DoubleType, _) => DoubleType
+      case (_, FloatType) | (FloatType, _) => FloatType
+      case _ => NullType
+    }
+  }
+
+  private def valid(e: Expression): Boolean = e.dataType match {
+    case _: ShortType | _: IntegerType | _: LongType | _: FloatType | _: 
DoubleType => true
+    case _ => false
+  }
+
+  private def integer(t: DataType): Boolean = t match {
+    case _: ShortType | _: IntegerType | _: LongType => true
+    case _ => false
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    var result: TypeCheckResult = TypeCheckResult.TypeCheckSuccess
+    def requiredType = "integer or floating-point"
+    Seq((min, "min", 0),
+      (max, "max", 1),
+      (seedExpression, "seed", 2)).foreach {
+      case (expr: Expression, name: String, index: Int) =>
+        if (!expr.foldable && result == TypeCheckResult.TypeCheckSuccess) {
+          result = DataTypeMismatch(
+            errorSubClass = "NON_FOLDABLE_INPUT",
+            messageParameters = Map(
+              "inputName" -> name,
+              "inputType" -> requiredType,
+              "inputExpr" -> toSQLExpr(expr)))
+        } else if (!valid(expr) && result == TypeCheckResult.TypeCheckSuccess) 
{
+          result = DataTypeMismatch(
+            errorSubClass = "UNEXPECTED_INPUT_TYPE",
+            messageParameters = Map(
+              "paramIndex" -> ordinalNumber(index),
+              "requiredType" -> requiredType,
+              "inputSql" -> toSQLExpr(expr),
+              "inputType" -> toSQLType(expr.dataType)))
+        }
+    }
+    result
+  }
+
+  override def first: Expression = min
+  override def second: Expression = max
+  override def third: Expression = seedExpression
+
+  override def withNewSeed(newSeed: Long): Expression =
+    Uniform(min, max, Literal(newSeed, LongType))
+
+  override def withNewChildrenInternal(
+      newFirst: Expression, newSecond: Expression, newThird: Expression): 
Expression =
+    Uniform(newFirst, newSecond, newThird)
+
+  override def replacement: Expression = {
+    def cast(e: Expression, to: DataType): Expression = if (e.dataType == to) 
e else Cast(e, to)
+    cast(Add(
+      cast(min, DoubleType),
+      Multiply(
+        Subtract(
+          cast(max, DoubleType),
+          cast(min, DoubleType)),
+        Rand(seed))),
+      dataType)
+  }
+}
+
+object Uniform {
+  lazy val random = new Random()
+}
+
+@ExpressionDescription(
+  usage = """
+    _FUNC_(length[, seed]) - Returns a string of the specified length whose 
characters are chosen
+      uniformly at random from the following pool of characters: 0-9, a-z, 
A-Z. The random seed is
+      optional. The string length must be a constant two-byte or four-byte 
integer (SMALLINT or INT,
+      respectively).
+  """,
+  examples =
+    """
+    Examples:
+      > SELECT _FUNC_(3, 0) AS result;
+       8i7
+  """,
+  since = "4.0.0",
+  group = "string_funcs")
+case class RandStr(length: Expression, override val seedExpression: Expression)
+  extends ExpressionWithRandomSeed with BinaryLike[Expression] with 
Nondeterministic {
+  def this(length: Expression) = this(length, 
Literal(Uniform.random.nextLong(), LongType))
+
+  override def nullable: Boolean = false
+  override def dataType: DataType = StringType
+  override def stateful: Boolean = true
+  override def left: Expression = length
+  override def right: Expression = seedExpression
+
+  /**
+   * Record ID within each partition. By being transient, the Random Number 
Generator is
+   * reset every time we serialize and deserialize and initialize it.
+   */
+  @transient protected var rng: XORShiftRandom = _
+
+  @transient protected lazy val seed: Long = seedExpression match {
+    case e if e.dataType == IntegerType => e.eval().asInstanceOf[Int]
+    case e if e.dataType == LongType => e.eval().asInstanceOf[Long]
+  }
+  override protected def initializeInternal(partitionIndex: Int): Unit = {
+    rng = new XORShiftRandom(seed + partitionIndex)
+  }
+
+  override def withNewSeed(newSeed: Long): Expression = RandStr(length, 
Literal(newSeed, LongType))
+  override def withNewChildrenInternal(newFirst: Expression, newSecond: 
Expression): Expression =
+    RandStr(newFirst, newSecond)
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    var result: TypeCheckResult = TypeCheckResult.TypeCheckSuccess
+    Seq(length, seedExpression).zipWithIndex.foreach { case (expr: Expression, 
index: Int) =>
+      val valid = expr.dataType match {
+        case _ if !expr.foldable => false
+        case _: ShortType | _: IntegerType => true
+        case _: LongType if index == 1 => true
+        case _ => false
+      }
+      if (!valid) {
+        result = DataTypeMismatch(
+          errorSubClass = "UNEXPECTED_INPUT_TYPE",
+          messageParameters = Map(
+            "paramIndex" -> ordinalNumber(index),
+            "requiredType" -> "constant value of INT or SMALLINT",
+            "inputSql" -> toSQLExpr(expr),
+            "inputType" -> toSQLType(expr.dataType)))
+      }
+    }
+    result
+  }
+
+  override def evalInternal(input: InternalRow): Any = {

Review Comment:
   Busted :) I added some separate testing in `RandomSuite.scala` for the 
non-codegen case.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to