srowen closed pull request #21055: [SPARK-23693][SQL] Functions to generate 
UUIDs
URL: https://github.com/apache/spark/pull/21055
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/pom.xml b/pom.xml
index 0a711f287a53f..8118d389385d4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -160,6 +160,7 @@
     <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
     <fasterxml.jackson.version>2.6.7</fasterxml.jackson.version>
     
<fasterxml.jackson.databind.version>2.6.7.1</fasterxml.jackson.databind.version>
+    
<fasterxml.java.uuid.generator.version>3.1.5</fasterxml.java.uuid.generator.version>
     <snappy.version>1.1.7.1</snappy.version>
     <netlib.java.version>1.1.2</netlib.java.version>
     <calcite.version>1.2.0-incubating</calcite.version>
@@ -654,6 +655,11 @@
         <artifactId>jackson-module-jaxb-annotations</artifactId>
         <version>${fasterxml.jackson.version}</version>
       </dependency>
+      <dependency>
+        <groupId>com.fasterxml.uuid</groupId>
+        <artifactId>java-uuid-generator</artifactId>
+        <version>${fasterxml.java.uuid.generator.version}</version>
+      </dependency>
       <dependency>
         <groupId>org.glassfish.jersey.core</groupId>
         <artifactId>jersey-server</artifactId>
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 7d23637e28342..0463f6076d2c4 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -103,6 +103,10 @@
       <groupId>commons-codec</groupId>
       <artifactId>commons-codec</artifactId>
     </dependency>
+    <dependency>
+      <groupId>com.fasterxml.uuid</groupId>
+      <artifactId>java-uuid-generator</artifactId>
+    </dependency>
   </dependencies>
   <build>
     
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index e821e96522f7c..b05d0c886a808 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1997,7 +1997,7 @@ class Analyzer(
   }
 
   /**
-   * Set the seed for random number generation in Uuid expressions.
+   * Set the seed for random number generation in random-based UUID 
expressions.
    */
   object ResolvedUuidExpressions extends Rule[LogicalPlan] {
     private lazy val random = new Random()
@@ -2005,7 +2005,7 @@ class Analyzer(
     override def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp {
       case p if p.resolved => p
       case p => p transformExpressionsUp {
-        case Uuid(None) => Uuid(Some(random.nextLong()))
+        case RandomBasedUuid(None) => RandomBasedUuid(Some(random.nextLong()))
       }
     }
   }
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 747016beb06e7..e891a0f332f94 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -414,7 +414,9 @@ object FunctionRegistry {
     expression[AssertTrue]("assert_true"),
     expression[Crc32]("crc32"),
     expression[Md5]("md5"),
-    expression[Uuid]("uuid"),
+    expression[TimeBasedUuid]("time_based_uuid"),
+    expression[RandomBasedUuid]("random_based_uuid"),
+    expression[RandomBasedUuid]("uuid"),
     expression[Murmur3Hash]("hash"),
     expression[Sha1]("sha"),
     expression[Sha1]("sha1"),
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/RandomBasedUuid.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/RandomBasedUuid.scala
new file mode 100644
index 0000000000000..54d0321d5d502
--- /dev/null
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/RandomBasedUuid.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import java.util.Random
+
+import com.fasterxml.uuid.Generators
+
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, 
ExprCode, FalseLiteral}
+
+@ExpressionDescription(
+  usage = "_FUNC_() - Returns a random-based universally unique identifier 
(UUID)." +
+    " The value is returned as a canonical UUID 36-character string.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_();
+       46707d92-02f4-4817-8116-a4c3b23e6266
+  """,
+  since = "2.4.0"
+)
+case class RandomBasedUuid(randomSeed: Option[Long] = None) extends 
UuidExpression {
+
+  def this() = this(None)
+
+  override lazy val resolved: Boolean = randomSeed.isDefined
+
+  override protected def initializeInternal(partitionIndex: Int): Unit = {
+    generator = Generators.randomBasedGenerator(new Random(randomSeed.get + 
partitionIndex))
+  }
+
+  override def freshCopy(): RandomBasedUuid = RandomBasedUuid(randomSeed)
+
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): 
ExprCode = {
+    val gen = ctx.freshName("gen")
+    ctx.addMutableState("com.fasterxml.uuid.NoArgGenerator",
+      gen,
+      forceInline = true,
+      useFreshName = false)
+    ctx.addPartitionInitializationStatement(s"$gen = " +
+      "com.fasterxml.uuid.Generators.randomBasedGenerator(" +
+      s"new java.util.Random(${randomSeed.get}L + partitionIndex)" +
+      ");")
+    ev.copy(code = s"final UTF8String ${ev.value} = " +
+      s"UTF8String.fromString(${gen}.generate().toString());",
+      isNull = FalseLiteral)
+  }
+}
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeBasedUuid.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeBasedUuid.scala
new file mode 100644
index 0000000000000..c1527955da035
--- /dev/null
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeBasedUuid.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import com.fasterxml.uuid.{EthernetAddress, Generators}
+
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, 
ExprCode, FalseLiteral}
+
+@ExpressionDescription(
+  usage = "_FUNC_() - Returns a time-based universally unique identifier 
(UUID)." +
+    " The value is returned as a canonical UUID 36-character string.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_();
+       46707d92-02f4-4817-8116-a4c3b23e6266
+  """,
+  since = "2.4.0"
+)
+case class TimeBasedUuid() extends UuidExpression {
+
+  override protected def initializeInternal(partitionIndex: Int): Unit = {
+    generator = Generators.timeBasedGenerator(EthernetAddress.fromInterface())
+  }
+
+  override def freshCopy(): TimeBasedUuid = TimeBasedUuid()
+
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): 
ExprCode = {
+    val gen = ctx.freshName("gen")
+    ctx.addMutableState("com.fasterxml.uuid.NoArgGenerator",
+      gen,
+      forceInline = true,
+      useFreshName = false)
+    ctx.addPartitionInitializationStatement(s"$gen = " +
+      "com.fasterxml.uuid.Generators.timeBasedGenerator(" +
+      "com.fasterxml.uuid.EthernetAddress.fromInterface()" +
+      ");")
+    ev.copy(code = s"final UTF8String ${ev.value} = " +
+      s"UTF8String.fromString(${gen}.generate().toString());",
+      isNull = FalseLiteral)
+  }
+
+}
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RandomUUIDGenerator.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/UuidExpression.scala
similarity index 51%
rename from 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RandomUUIDGenerator.scala
rename to 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/UuidExpression.scala
index 4fe07a071c1ca..61bf8331dd3ae 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RandomUUIDGenerator.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/UuidExpression.scala
@@ -15,29 +15,27 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalyst.util
+package org.apache.spark.sql.catalyst.expressions
 
-import java.util.UUID
-
-import org.apache.commons.math3.random.MersenneTwister
+import com.fasterxml.uuid.NoArgGenerator
 
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.types.{DataType, StringType}
 import org.apache.spark.unsafe.types.UTF8String
 
-/**
- * This class is used to generate a UUID from Pseudo-Random Numbers.
- *
- * For the algorithm, see RFC 4122: A Universally Unique IDentifier (UUID) URN 
Namespace,
- * section 4.4 "Algorithms for Creating a UUID from Truly Random or 
Pseudo-Random Numbers".
- */
-case class RandomUUIDGenerator(randomSeed: Long) {
-  private val random = new MersenneTwister(randomSeed)
+abstract class UuidExpression extends LeafExpression with Stateful {
 
-  def getNextUUID(): UUID = {
-    val mostSigBits = (random.nextLong() & 0xFFFFFFFFFFFF0FFFL) | 
0x0000000000004000L
-    val leastSigBits = (random.nextLong() | 0x8000000000000000L) & 
0xBFFFFFFFFFFFFFFFL
+  @transient protected var generator: NoArgGenerator = _
 
-    new UUID(mostSigBits, leastSigBits)
+  override protected def evalInternal(input: InternalRow): Any = {
+    val uuid = generator
+      .generate()
+      .toString
+    UTF8String.fromString(uuid)
   }
 
-  def getNextUUIDUTF8String(): UTF8String = 
UTF8String.fromString(getNextUUID().toString())
+  override def nullable: Boolean = false
+
+  override def dataType: DataType = StringType
+
 }
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index 7eda65a867028..d03728728f106 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -17,13 +17,9 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import java.util.UUID
-
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen._
-import org.apache.spark.sql.catalyst.util.RandomUUIDGenerator
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.UTF8String
 
 /**
  * Print the result of an expression to stderr (used for debugging codegen).
@@ -114,45 +110,3 @@ case class CurrentDatabase() extends LeafExpression with 
Unevaluable {
   override def nullable: Boolean = false
   override def prettyName: String = "current_database"
 }
-
-// scalastyle:off line.size.limit
-@ExpressionDescription(
-  usage = "_FUNC_() - Returns an universally unique identifier (UUID) string. 
The value is returned as a canonical UUID 36-character string.",
-  examples = """
-    Examples:
-      > SELECT _FUNC_();
-       46707d92-02f4-4817-8116-a4c3b23e6266
-  """)
-// scalastyle:on line.size.limit
-case class Uuid(randomSeed: Option[Long] = None) extends LeafExpression with 
Stateful {
-
-  def this() = this(None)
-
-  override lazy val resolved: Boolean = randomSeed.isDefined
-
-  override def nullable: Boolean = false
-
-  override def dataType: DataType = StringType
-
-  @transient private[this] var randomGenerator: RandomUUIDGenerator = _
-
-  override protected def initializeInternal(partitionIndex: Int): Unit =
-    randomGenerator = RandomUUIDGenerator(randomSeed.get + partitionIndex)
-
-  override protected def evalInternal(input: InternalRow): Any =
-    randomGenerator.getNextUUIDUTF8String()
-
-  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val randomGen = ctx.freshName("randomGen")
-    
ctx.addMutableState("org.apache.spark.sql.catalyst.util.RandomUUIDGenerator", 
randomGen,
-      forceInline = true,
-      useFreshName = false)
-    ctx.addPartitionInitializationStatement(s"$randomGen = " +
-      "new org.apache.spark.sql.catalyst.util.RandomUUIDGenerator(" +
-      s"${randomSeed.get}L + partitionIndex);")
-    ev.copy(code = s"final UTF8String ${ev.value} = 
$randomGen.getNextUUIDUTF8String();",
-      isNull = FalseLiteral)
-  }
-
-  override def freshCopy(): Uuid = Uuid(randomSeed)
-}
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolvedUuidExpressionsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolvedUuidExpressionsSuite.scala
index fe57c199b8744..b5a4e4eff9c7d 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolvedUuidExpressionsSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolvedUuidExpressionsSuite.scala
@@ -23,29 +23,28 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 
 /**
- * Test suite for resolving Uuid expressions.
+ * Test suite for resolving random-based UUID expressions.
  */
 class ResolvedUuidExpressionsSuite extends AnalysisTest {
 
   private lazy val a = 'a.int
   private lazy val r = LocalRelation(a)
-  private lazy val uuid1 = Uuid().as('_uuid1)
-  private lazy val uuid2 = Uuid().as('_uuid2)
-  private lazy val uuid3 = Uuid().as('_uuid3)
+  private lazy val uuid1 = RandomBasedUuid().as('_uuid1)
+  private lazy val uuid2 = RandomBasedUuid().as('_uuid2)
+  private lazy val uuid3 = RandomBasedUuid().as('_uuid3)
   private lazy val uuid1Ref = uuid1.toAttribute
 
   private val analyzer = getAnalyzer(caseSensitive = true)
 
-  private def getUuidExpressions(plan: LogicalPlan): Seq[Uuid] = {
-    plan.flatMap {
-      case p =>
-        p.expressions.flatMap(_.collect {
-          case u: Uuid => u
-        })
+  private def getUuidExpressions(plan: LogicalPlan): Seq[RandomBasedUuid] = {
+    plan.flatMap { p =>
+      p.expressions.flatMap(_.collect {
+        case u: RandomBasedUuid => u
+      })
     }
   }
 
-  test("analyzed plan sets random seed for Uuid expression") {
+  test("analyzed plan sets random seed for random-based UUID expression") {
     val plan = r.select(a, uuid1)
     val resolvedPlan = analyzer.executeAndCheck(plan)
     getUuidExpressions(resolvedPlan).foreach { u =>
@@ -54,13 +53,13 @@ class ResolvedUuidExpressionsSuite extends AnalysisTest {
     }
   }
 
-  test("Uuid expressions should have different random seeds") {
+  test("Random-based UUID expressions should have different random seeds") {
     val plan = r.select(a, uuid1).groupBy(uuid1Ref)(uuid2, uuid3)
     val resolvedPlan = analyzer.executeAndCheck(plan)
     
assert(getUuidExpressions(resolvedPlan).map(_.randomSeed.get).distinct.length 
== 3)
   }
 
-  test("Different analyzed plans should have different random seeds in Uuids") 
{
+  test("Different analyzed plans should have different random seeds in 
random-based UUIDs") {
     val plan = r.select(a, uuid1).groupBy(uuid1Ref)(uuid2, uuid3)
     val resolvedPlan1 = analyzer.executeAndCheck(plan)
     val resolvedPlan2 = analyzer.executeAndCheck(plan)
@@ -68,6 +67,6 @@ class ResolvedUuidExpressionsSuite extends AnalysisTest {
     val uuids2 = getUuidExpressions(resolvedPlan2)
     assert(uuids1.distinct.length == 3)
     assert(uuids2.distinct.length == 3)
-    assert(uuids1.intersect(uuids2).length == 0)
+    assert(uuids1.intersect(uuids2).isEmpty)
   }
 }
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscExpressionsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscExpressionsSuite.scala
index b6c269348b002..11fe284ae95d8 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscExpressionsSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscExpressionsSuite.scala
@@ -19,8 +19,6 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.io.PrintStream
 
-import scala.util.Random
-
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.types._
 
@@ -43,30 +41,6 @@ class MiscExpressionsSuite extends SparkFunSuite with 
ExpressionEvalHelper {
     checkEvaluation(AssertTrue(Cast(Literal(1), BooleanType)), null)
   }
 
-  test("uuid") {
-    checkEvaluation(Length(Uuid(Some(0))), 36)
-    val r = new Random()
-    val seed1 = Some(r.nextLong())
-    assert(evaluateWithoutCodegen(Uuid(seed1)) === 
evaluateWithoutCodegen(Uuid(seed1)))
-    assert(evaluateWithGeneratedMutableProjection(Uuid(seed1)) ===
-      evaluateWithGeneratedMutableProjection(Uuid(seed1)))
-    assert(evaluateWithUnsafeProjection(Uuid(seed1)) ===
-      evaluateWithUnsafeProjection(Uuid(seed1)))
-
-    val seed2 = Some(r.nextLong())
-    assert(evaluateWithoutCodegen(Uuid(seed1)) !== 
evaluateWithoutCodegen(Uuid(seed2)))
-    assert(evaluateWithGeneratedMutableProjection(Uuid(seed1)) !==
-      evaluateWithGeneratedMutableProjection(Uuid(seed2)))
-    assert(evaluateWithUnsafeProjection(Uuid(seed1)) !==
-      evaluateWithUnsafeProjection(Uuid(seed2)))
-
-    val uuid = Uuid(seed1)
-    assert(uuid.fastEquals(uuid))
-    assert(!uuid.fastEquals(Uuid(seed1)))
-    assert(!uuid.fastEquals(uuid.freshCopy()))
-    assert(!uuid.fastEquals(Uuid(seed2)))
-  }
-
   test("PrintToStderr") {
     val inputExpr = Literal(1)
     val systemErr = System.err
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomBasedUuidSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomBasedUuidSuite.scala
new file mode 100644
index 0000000000000..74cd65ad86732
--- /dev/null
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomBasedUuidSuite.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.SparkFunSuite
+
+class RandomBasedUuidSuite extends SparkFunSuite with ExpressionEvalHelper {
+
+  test("uuid length") {
+    checkEvaluation(Length(RandomBasedUuid(Some(0))), 36)
+  }
+
+  test("uuid equals") {
+    val seed1 = Some(5L)
+    val seed2 = Some(10L)
+    val uuid = RandomBasedUuid(seed1)
+    assert(uuid.fastEquals(uuid))
+    assert(!uuid.fastEquals(RandomBasedUuid(seed1)))
+    assert(!uuid.fastEquals(RandomBasedUuid(seed2)))
+    assert(!uuid.fastEquals(uuid.freshCopy()))
+  }
+
+  test("uuid evaluate") {
+    val seed1 = Some(5L)
+    assert(evaluateWithoutCodegen(RandomBasedUuid(seed1)) ===
+      evaluateWithoutCodegen(RandomBasedUuid(seed1)))
+    assert(evaluateWithGeneratedMutableProjection(RandomBasedUuid(seed1)) ===
+      evaluateWithGeneratedMutableProjection(RandomBasedUuid(seed1)))
+    assert(evaluateWithUnsafeProjection(RandomBasedUuid(seed1)) ===
+      evaluateWithUnsafeProjection(RandomBasedUuid(seed1)))
+
+    val seed2 = Some(10L)
+    assert(evaluateWithoutCodegen(RandomBasedUuid(seed1)) !==
+      evaluateWithoutCodegen(RandomBasedUuid(seed2)))
+    assert(evaluateWithGeneratedMutableProjection(RandomBasedUuid(seed1)) !==
+      evaluateWithGeneratedMutableProjection(RandomBasedUuid(seed2)))
+    assert(evaluateWithUnsafeProjection(RandomBasedUuid(seed1)) !==
+      evaluateWithUnsafeProjection(RandomBasedUuid(seed2)))
+  }
+}
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeBasedUuidSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeBasedUuidSuite.scala
new file mode 100644
index 0000000000000..2519e42fc2a0e
--- /dev/null
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeBasedUuidSuite.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.SparkFunSuite
+
+class TimeBasedUuidSuite extends SparkFunSuite with ExpressionEvalHelper {
+
+  test("uuid length") {
+    checkEvaluation(Length(TimeBasedUuid()), 36)
+  }
+
+  test("uuid equals") {
+    val uuid = TimeBasedUuid()
+    assert(uuid.fastEquals(uuid))
+    assert(!uuid.fastEquals(TimeBasedUuid()))
+    assert(!uuid.fastEquals(uuid.freshCopy()))
+  }
+}
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RandomUUIDGeneratorSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RandomUUIDGeneratorSuite.scala
deleted file mode 100644
index b75739e5a3a65..0000000000000
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RandomUUIDGeneratorSuite.scala
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.util
-
-import scala.util.Random
-
-import org.apache.spark.SparkFunSuite
-
-class RandomUUIDGeneratorSuite extends SparkFunSuite {
-  test("RandomUUIDGenerator should generate version 4, variant 2 UUIDs") {
-    val generator = RandomUUIDGenerator(new Random().nextLong())
-    for (_ <- 0 to 100) {
-      val uuid = generator.getNextUUID()
-      assert(uuid.version() == 4)
-      assert(uuid.variant() == 2)
-    }
-  }
-
- test("UUID from RandomUUIDGenerator should be deterministic") {
-   val r1 = new Random(100)
-   val generator1 = RandomUUIDGenerator(r1.nextLong())
-   val r2 = new Random(100)
-   val generator2 = RandomUUIDGenerator(r2.nextLong())
-   val r3 = new Random(101)
-   val generator3 = RandomUUIDGenerator(r3.nextLong())
-
-   for (_ <- 0 to 100) {
-      val uuid1 = generator1.getNextUUID()
-      val uuid2 = generator2.getNextUUID()
-      val uuid3 = generator3.getNextUUID()
-      assert(uuid1 == uuid2)
-      assert(uuid1 != uuid3)
-   }
- }
-
- test("Get UTF8String UUID") {
-   val generator = RandomUUIDGenerator(new Random().nextLong())
-   val utf8StringUUID = generator.getNextUUIDUTF8String()
-   val uuid = java.util.UUID.fromString(utf8StringUUID.toString)
-   assert(uuid.version() == 4 && uuid.variant() == 2 && 
utf8StringUUID.toString == uuid.toString)
- }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index c658f25ced053..124597eae3522 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -1127,6 +1127,52 @@ object functions {
    */
   def monotonically_increasing_id(): Column = withExpr { 
MonotonicallyIncreasingID() }
 
+  /**
+   * A column expression that generates time-based UUIDs (variant 1).
+   *
+   * Universally unique identifiers
+   * (<a 
href="https://en.wikipedia.org/wiki/Universally_unique_identifier";>UUIDs</a>)
+   * are generated in the standard string format with total 36 characters, for 
example:
+   * {{{
+   * 8919d47c-34bb-11e8-b81c-6cc2172fd3c0
+   * }}}
+   * This function does not produce same UUID values for retries on the same 
data frame.
+   *
+   * @group normal_funcs
+   * @since 2.4.0
+   */
+  def time_based_uuid(): Column = withExpr {
+    TimeBasedUuid()
+  }
+
+  /**
+   * A column expression that generates random-based UUIDs (variant 4).
+   *
+   * Universally unique identifiers
+   * (<a 
href="https://en.wikipedia.org/wiki/Universally_unique_identifier";>UUIDs</a>)
+   * are generated in the standard string format with total 36 characters, for 
example:
+   * {{{
+   * 8919d47c-34bb-11e8-b81c-6cc2172fd3c0
+   * }}}
+   * This function produces same UUID values for retries on the same data 
frame.
+   *
+   * @group normal_funcs
+   * @since 2.4.0
+   */
+  def random_based_uuid(): Column = withExpr {
+    RandomBasedUuid()
+  }
+
+  /**
+   * A column expression that generates random-based UUIDs (variant 4).
+   *
+   * Alias for [[random_based_uuid()]].
+   *
+   * @group normal_funcs
+   * @since 2.4.0
+   */
+  def uuid(): Column = random_based_uuid()
+
   /**
    * Returns col1 if it is not NaN, or col2 if col1 is NaN.
    *
diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql 
b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
index 4113734e1707e..6a91dfcc03b65 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
@@ -13,8 +13,8 @@ FROM (SELECT id col1, id col2, id col3, id col4 FROM 
range(10));
 select replace('abc', 'b', '123');
 select replace('abc', 'b');
 
--- uuid
-select length(uuid()), (uuid() <> uuid());
+-- random_based_uuid
+select length(random_based_uuid()), (random_based_uuid() <> 
random_based_uuid());
 
 -- position
 select position('bar' in 'foobarbar'), position(null, 'foobarbar'), 
position('aaads', null);
diff --git 
a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out 
b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
index d5f8705a35ed6..b0001ba1d1340 100644
--- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
@@ -73,9 +73,9 @@ ac
 
 
 -- !query 6
-select length(uuid()), (uuid() <> uuid())
+select length(random_based_uuid()), (random_based_uuid() <> 
random_based_uuid())
 -- !query 6 schema
-struct<length(uuid()):int,(NOT (uuid() = uuid())):boolean>
+struct<length(randombaseduuid()):int,(NOT (randombaseduuid() = 
randombaseduuid())):boolean>
 -- !query 6 output
 36     true
 
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index 7c45be21961d3..4bd96a60f1e19 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -519,6 +519,42 @@ class ColumnExpressionSuite extends QueryTest with 
SharedSQLContext {
     )
   }
 
+  test("time_based_uuid") {
+    // Make sure we have 2 partitions, each with 2 records.
+    val df = sparkContext.parallelize(Seq[Int](), 2)
+      .mapPartitions(_ => Iterator(1, 2))
+      .toDF("value")
+    val dataSize = df.count()
+
+    val uuids = df.select(time_based_uuid())
+      .distinct()
+      .collect()
+    assert(uuids.length === dataSize,
+      "Number of unique identifiers must be equal to the number of records in 
the dataset")
+    uuids.foreach { uuidRow =>
+      val uuid = uuidRow.getAs[String](0)
+      assert(uuid.length === 36, s"Length of uuid $uuid")
+    }
+  }
+
+  test("random_based_uuid") {
+    // Make sure we have 2 partitions, each with 2 records.
+    val df = sparkContext.parallelize(Seq[Int](), 2)
+      .mapPartitions(_ => Iterator(1, 2))
+      .toDF("value")
+    val dataSize = df.count()
+
+    val uuids = df.select(random_based_uuid())
+      .distinct()
+      .collect()
+    assert(uuids.length === dataSize,
+      "Number of unique identifiers must be equal to the number of records in 
the dataset")
+    uuids.foreach { uuidRow =>
+      val uuid = uuidRow.getAs[String](0)
+      assert(uuid.length === 36, s"Length of uuid $uuid")
+    }
+  }
+
   test("spark_partition_id") {
     // Make sure we have 2 partitions, each with 2 records.
     val df = sparkContext.parallelize(Seq[Int](), 2).mapPartitions { _ =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 60e84e6ee7504..4df84034b5d7a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -28,7 +28,7 @@ import org.scalatest.Matchers._
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.expressions.Uuid
+import org.apache.spark.sql.catalyst.expressions.RandomBasedUuid
 import org.apache.spark.sql.catalyst.plans.logical.{Filter, OneRowRelation, 
Union}
 import org.apache.spark.sql.execution.{FilterExec, QueryExecution, 
WholeStageCodegenExec}
 import org.apache.spark.sql.execution.aggregate.HashAggregateExec
@@ -2261,8 +2261,8 @@ class DataFrameSuite extends QueryTest with 
SharedSQLContext {
     assert(df.queryExecution.executedPlan.isInstanceOf[WholeStageCodegenExec])
   }
 
-  test("Uuid expressions should produce same results at retries in the same 
DataFrame") {
-    val df = spark.range(1).select($"id", new Column(Uuid()))
+  test("Random-based UUIDs should produce same results at retries in the same 
DataFrame") {
+    val df = spark.range(1).select($"id", new Column(RandomBasedUuid()))
     checkAnswer(df, df.collect())
   }
 }


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to