[GitHub] [spark] zhengruifeng commented on a diff in pull request #41463: [SPARK-43930][SQL][PYTHON][CONNECT] Add unix_* functions to Scala and Python

via GitHub Mon, 05 Jun 2023 20:31:22 -0700


zhengruifeng commented on code in PR #41463:
URL: https://github.com/apache/spark/pull/41463#discussion_r1218824504



##########
sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala:
##########
@@ -545,6 +545,50 @@ class DateFunctionsSuite extends QueryTest with 
SharedSparkSession {
       df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29")) :: 
Row(null) :: Nil)
   }
 
+  test("function unix_date") {
+    val d1 = Date.valueOf("2015-07-22")
+    val d2 = Date.valueOf("2015-07-01")
+    val d3 = Date.valueOf("2014-12-31")
+    val df = Seq(d1, d2, d3).toDF("d")
+
+    checkAnswer(
+      df.select(unix_date(col("d"))),
+      Seq(Row(16435), Row(16617), Row(16638)))

Review Comment:
   can we compare the result with `df.selectExpr("unix_date(d)")`? the numbers 
here are hard to understand



##########
python/pyspark/sql/functions.py:
##########
@@ -4981,6 +4981,64 @@ def to_date(col: "ColumnOrName", format: Optional[str] = 
None) -> Column:
         return _invoke_function("to_date", _to_java_column(col), format)
 
 
+@try_remote_functions
+def unix_date(col: "ColumnOrName") -> Column:
+    """Returns the number of days since 1970-01-01.
+
+    Examples
+    --------
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
+    >>> df = spark.createDataFrame([('1970-01-02',)], ['t'])
+    >>> df.select(unix_date(to_date(df.t)).alias('n')).collect()
+    [Row(n=1)]

Review Comment:
   should unset the config after this doctest, in order to make sure not 
pollute the env
   ```
   spark.conf.unset("spark.sql.session.timeZone")
   ```



##########
python/pyspark/sql/functions.py:
##########
@@ -4981,6 +4981,64 @@ def to_date(col: "ColumnOrName", format: Optional[str] = 
None) -> Column:
         return _invoke_function("to_date", _to_java_column(col), format)
 
 
+@try_remote_functions
+def unix_date(col: "ColumnOrName") -> Column:
+    """Returns the number of days since 1970-01-01.
+
+    Examples
+    --------
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
+    >>> df = spark.createDataFrame([('1970-01-02',)], ['t'])
+    >>> df.select(unix_date(to_date(df.t)).alias('n')).collect()
+    [Row(n=1)]
+    """
+    return _invoke_function_over_columns("unix_date", col)
+
+
+@try_remote_functions
+def unix_micros(col: "ColumnOrName") -> Column:
+    """Returns the number of microseconds since 1970-01-01 00:00:00 UTC.
+
+    Examples
+    --------
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
+    >>> df = spark.createDataFrame([('2015-07-22 10:00:00',)], ['t'])
+    >>> df.select(unix_micros(to_timestamp(df.t)).alias('n')).collect()
+    [Row(n=1437584400000000)]

Review Comment:
   ditto



##########
python/pyspark/sql/functions.py:
##########
@@ -4981,6 +4981,64 @@ def to_date(col: "ColumnOrName", format: Optional[str] = 
None) -> Column:
         return _invoke_function("to_date", _to_java_column(col), format)
 
 
+@try_remote_functions
+def unix_date(col: "ColumnOrName") -> Column:
+    """Returns the number of days since 1970-01-01.
+
+    Examples
+    --------
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
+    >>> df = spark.createDataFrame([('1970-01-02',)], ['t'])
+    >>> df.select(unix_date(to_date(df.t)).alias('n')).collect()
+    [Row(n=1)]
+    """
+    return _invoke_function_over_columns("unix_date", col)
+
+
+@try_remote_functions
+def unix_micros(col: "ColumnOrName") -> Column:
+    """Returns the number of microseconds since 1970-01-01 00:00:00 UTC.
+
+    Examples
+    --------
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
+    >>> df = spark.createDataFrame([('2015-07-22 10:00:00',)], ['t'])
+    >>> df.select(unix_micros(to_timestamp(df.t)).alias('n')).collect()
+    [Row(n=1437584400000000)]
+    """
+    return _invoke_function_over_columns("unix_micros", col)
+
+
+@try_remote_functions
+def unix_millis(col: "ColumnOrName") -> Column:
+    """Returns the number of milliseconds since 1970-01-01 00:00:00 UTC.
+    Truncates higher levels of precision.
+
+    Examples
+    --------
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
+    >>> df = spark.createDataFrame([('2015-07-22 10:00:00',)], ['t'])
+    >>> df.select(unix_millis(to_timestamp(df.t)).alias('n')).collect()
+    [Row(n=1437584400000)]
+    """
+    return _invoke_function_over_columns("unix_millis", col)
+
+
+@try_remote_functions
+def unix_seconds(col: "ColumnOrName") -> Column:
+    """Returns the number of seconds since 1970-01-01 00:00:00 UTC.
+    Truncates higher levels of precision.
+
+    Examples
+    --------
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
+    >>> df = spark.createDataFrame([('2015-07-22 10:00:00',)], ['t'])
+    >>> df.select(unix_seconds(to_timestamp(df.t)).alias('n')).collect()
+    [Row(n=1437584400)]

Review Comment:
   ditto



##########
sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala:
##########
@@ -545,6 +545,50 @@ class DateFunctionsSuite extends QueryTest with 
SharedSparkSession {
       df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29")) :: 
Row(null) :: Nil)
   }
 
+  test("function unix_date") {
+    val d1 = Date.valueOf("2015-07-22")
+    val d2 = Date.valueOf("2015-07-01")
+    val d3 = Date.valueOf("2014-12-31")
+    val df = Seq(d1, d2, d3).toDF("d")
+
+    checkAnswer(
+      df.select(unix_date(col("d"))),
+      Seq(Row(16435), Row(16617), Row(16638)))
+  }
+
+  test("function unix_micros") {
+    val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
+    val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
+    val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
+    val df = Seq(t1, t2, t3).toDF("t")
+
+    checkAnswer(
+      df.select(unix_micros(col("t"))),
+      Seq(Row(1420099199000000L), Row(1420099199000000L), 
Row(1437584400000000L)))

Review Comment:
   ditto



##########
sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala:
##########
@@ -545,6 +545,50 @@ class DateFunctionsSuite extends QueryTest with 
SharedSparkSession {
       df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29")) :: 
Row(null) :: Nil)
   }
 
+  test("function unix_date") {
+    val d1 = Date.valueOf("2015-07-22")
+    val d2 = Date.valueOf("2015-07-01")
+    val d3 = Date.valueOf("2014-12-31")
+    val df = Seq(d1, d2, d3).toDF("d")
+
+    checkAnswer(
+      df.select(unix_date(col("d"))),
+      Seq(Row(16435), Row(16617), Row(16638)))
+  }
+
+  test("function unix_micros") {
+    val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
+    val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
+    val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
+    val df = Seq(t1, t2, t3).toDF("t")
+
+    checkAnswer(
+      df.select(unix_micros(col("t"))),
+      Seq(Row(1420099199000000L), Row(1420099199000000L), 
Row(1437584400000000L)))
+  }
+
+  test("function unix_millis") {
+    val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
+    val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
+    val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
+    val df = Seq(t1, t2, t3).toDF("t")
+
+    checkAnswer(
+      df.select(unix_millis(col("t"))),
+      Seq(Row(1420099199000L), Row(1420099199000L), Row(1437584400000L)))

Review Comment:
   ditto



##########
python/pyspark/sql/functions.py:
##########
@@ -4981,6 +4981,64 @@ def to_date(col: "ColumnOrName", format: Optional[str] = 
None) -> Column:
         return _invoke_function("to_date", _to_java_column(col), format)
 
 
+@try_remote_functions
+def unix_date(col: "ColumnOrName") -> Column:
+    """Returns the number of days since 1970-01-01.
+
+    Examples
+    --------
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
+    >>> df = spark.createDataFrame([('1970-01-02',)], ['t'])
+    >>> df.select(unix_date(to_date(df.t)).alias('n')).collect()
+    [Row(n=1)]
+    """
+    return _invoke_function_over_columns("unix_date", col)
+
+
+@try_remote_functions
+def unix_micros(col: "ColumnOrName") -> Column:
+    """Returns the number of microseconds since 1970-01-01 00:00:00 UTC.
+
+    Examples
+    --------
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
+    >>> df = spark.createDataFrame([('2015-07-22 10:00:00',)], ['t'])
+    >>> df.select(unix_micros(to_timestamp(df.t)).alias('n')).collect()
+    [Row(n=1437584400000000)]
+    """
+    return _invoke_function_over_columns("unix_micros", col)
+
+
+@try_remote_functions
+def unix_millis(col: "ColumnOrName") -> Column:
+    """Returns the number of milliseconds since 1970-01-01 00:00:00 UTC.
+    Truncates higher levels of precision.
+
+    Examples
+    --------
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
+    >>> df = spark.createDataFrame([('2015-07-22 10:00:00',)], ['t'])
+    >>> df.select(unix_millis(to_timestamp(df.t)).alias('n')).collect()
+    [Row(n=1437584400000)]

Review Comment:
   ditto



##########
sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala:
##########
@@ -545,6 +545,50 @@ class DateFunctionsSuite extends QueryTest with 
SharedSparkSession {
       df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29")) :: 
Row(null) :: Nil)
   }
 
+  test("function unix_date") {
+    val d1 = Date.valueOf("2015-07-22")
+    val d2 = Date.valueOf("2015-07-01")
+    val d3 = Date.valueOf("2014-12-31")
+    val df = Seq(d1, d2, d3).toDF("d")
+
+    checkAnswer(
+      df.select(unix_date(col("d"))),
+      Seq(Row(16435), Row(16617), Row(16638)))
+  }
+
+  test("function unix_micros") {
+    val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
+    val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
+    val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
+    val df = Seq(t1, t2, t3).toDF("t")
+
+    checkAnswer(
+      df.select(unix_micros(col("t"))),
+      Seq(Row(1420099199000000L), Row(1420099199000000L), 
Row(1437584400000000L)))
+  }
+
+  test("function unix_millis") {
+    val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
+    val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
+    val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
+    val df = Seq(t1, t2, t3).toDF("t")
+
+    checkAnswer(
+      df.select(unix_millis(col("t"))),
+      Seq(Row(1420099199000L), Row(1420099199000L), Row(1437584400000L)))
+  }
+
+  test("function unix_seconds") {
+    val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
+    val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
+    val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
+    val df = Seq(t1, t2, t3).toDF("t")
+
+    checkAnswer(
+      df.select(unix_seconds(col("t"))),
+      Seq(Row(1420099199L), Row(1420099199L), Row(1437584400L)))

Review Comment:
   ditto



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] zhengruifeng commented on a diff in pull request #41463: [SPARK-43930][SQL][PYTHON][CONNECT] Add unix_* functions to Scala and Python

Reply via email to