[GitHub] [spark] zhengruifeng commented on a diff in pull request #41653: [SPARK-43939][CONNECT][PYTHON] Add try_* functions to Scala and Python

via GitHub Mon, 19 Jun 2023 20:55:19 -0700


zhengruifeng commented on code in PR #41653:
URL: https://github.com/apache/spark/pull/41653#discussion_r1234701774



##########
connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala:
##########
@@ -1807,6 +1807,75 @@ object functions {
    */
   def sqrt(colName: String): Column = sqrt(Column(colName))
 
+  /**
+   * Returns the sum of `left` and `right` and the result is null on overflow. 
The acceptable
+   * input types are the same with the `+` operator.
+   *
+   * @note

Review Comment:
   it should be supported naturally in Connect



##########
python/pyspark/sql/functions.py:
##########
@@ -350,6 +350,159 @@ def sqrt(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("sqrt", col)
 
 
+@try_remote_functions
+def try_add(left: "ColumnOrName", right: "ColumnOrName") -> Column:
+    """
+    Returns the sum of `left`and `right` and the result is null on overflow.
+    The acceptable input types are the same with the `+` operator.
+
+    .. versionadded:: 3.5.0
+
+    Notes
+    -----
+    Only Numeric type is supported in this function, while `try_add` in SQL 
supports Numeric,
+    DATE, TIMESTAMP, and INTERVAL.
+
+    Parameters
+    ----------
+    left : :class:`~pyspark.sql.Column` or str
+    right : :class:`~pyspark.sql.Column` or str
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([(1982, 15), (1990, 2)], ["birth", "age"])
+    >>> df.select(try_add(df.birth, df.age).alias('r')).collect()
+    [Row(r=1997), Row(r=1992)]
+    """
+    return _invoke_function_over_columns("try_add", left, right)
+
+
+@try_remote_functions
+def try_avg(col: "ColumnOrName") -> Column:
+    """
+    Returns the mean calculated from values of a group and the result is null 
on overflow.
+
+    .. versionadded:: 3.5.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([(1982, 15), (1990, 2)], ["birth", "age"])
+    >>> df.select(try_avg(df.age).alias('r')).collect()
+    [Row(r=8.5)]
+    """
+    return _invoke_function_over_columns("try_avg", col)
+
+
+@try_remote_functions
+def try_divide(left: "ColumnOrName", right: "ColumnOrName") -> Column:
+    """
+    Returns `dividend`/`divisor`. It always performs floating point division. 
Its result is
+    always null if `divisor` is 0.
+
+    .. versionadded:: 3.5.0
+
+    Notes

Review Comment:
   ditto



##########
python/pyspark/sql/functions.py:
##########
@@ -350,6 +350,159 @@ def sqrt(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("sqrt", col)
 
 
+@try_remote_functions
+def try_add(left: "ColumnOrName", right: "ColumnOrName") -> Column:
+    """
+    Returns the sum of `left`and `right` and the result is null on overflow.
+    The acceptable input types are the same with the `+` operator.
+
+    .. versionadded:: 3.5.0
+
+    Notes
+    -----
+    Only Numeric type is supported in this function, while `try_add` in SQL 
supports Numeric,
+    DATE, TIMESTAMP, and INTERVAL.
+
+    Parameters
+    ----------
+    left : :class:`~pyspark.sql.Column` or str
+    right : :class:`~pyspark.sql.Column` or str
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([(1982, 15), (1990, 2)], ["birth", "age"])
+    >>> df.select(try_add(df.birth, df.age).alias('r')).collect()
+    [Row(r=1997), Row(r=1992)]
+    """
+    return _invoke_function_over_columns("try_add", left, right)
+
+
+@try_remote_functions
+def try_avg(col: "ColumnOrName") -> Column:
+    """
+    Returns the mean calculated from values of a group and the result is null 
on overflow.
+
+    .. versionadded:: 3.5.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([(1982, 15), (1990, 2)], ["birth", "age"])
+    >>> df.select(try_avg(df.age).alias('r')).collect()
+    [Row(r=8.5)]
+    """
+    return _invoke_function_over_columns("try_avg", col)
+
+
+@try_remote_functions
+def try_divide(left: "ColumnOrName", right: "ColumnOrName") -> Column:
+    """
+    Returns `dividend`/`divisor`. It always performs floating point division. 
Its result is
+    always null if `divisor` is 0.
+
+    .. versionadded:: 3.5.0
+
+    Notes
+    -----
+    The `dividend` must be a numeric, `divisor` must be a numeric in this 
function. While the
+    `dividend` can be a numeric or an interval, `divisor` must be a numeric in 
SQL function
+    `try_divide`.
+
+    Parameters
+    ----------
+    left : :class:`~pyspark.sql.Column` or str
+        dividend
+    right : :class:`~pyspark.sql.Column` or str
+        divisor
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([(6000, 15), (1990, 2)], ["a", "b"])
+    >>> df.select(try_divide(df.a, df.b).alias('r')).collect()
+    [Row(r=400.0), Row(r=995.0)]
+    """
+    return _invoke_function_over_columns("try_divide", left, right)
+
+
+@try_remote_functions
+def try_multiply(left: "ColumnOrName", right: "ColumnOrName") -> Column:
+    """
+    Returns `left`*`right` and the result is null on overflow. The acceptable 
input types are the
+    same with the `*` operator.
+
+    .. versionadded:: 3.5.0
+
+    Notes

Review Comment:
   ditto



##########
sql/core/src/main/scala/org/apache/spark/sql/functions.scala:
##########
@@ -1872,6 +1872,87 @@ object functions {
    */
   def sqrt(colName: String): Column = sqrt(Column(colName))
 
+  /**
+   * Returns the sum of `left` and `right` and the result is null on overflow. 
The acceptable
+   * input types are the same with the `+` operator.
+   *
+   * @note

Review Comment:
   please check similar function in vanilla scala apis



##########
connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala:
##########
@@ -1807,6 +1807,75 @@ object functions {
    */
   def sqrt(colName: String): Column = sqrt(Column(colName))
 
+  /**
+   * Returns the sum of `left` and `right` and the result is null on overflow. 
The acceptable
+   * input types are the same with the `+` operator.
+   *
+   * @note
+   *   Only Numeric type is supported in this function, while `try_add` in SQL 
supports Numeric,
+   *   DATE, TIMESTAMP, and INTERVAL.
+   *
+   * @group math_funcs
+   * @since 3.5.0
+   */
+  def try_add(left: Column, right: Column): Column = Column.fn("try_add", 
left, right)
+
+  /**
+   * Returns the mean calculated from values of a group and the result is null 
on overflow.
+   *
+   * @group math_funcs
+   * @since 3.5.0
+   */
+  def try_avg(e: Column): Column = Column.fn("try_avg", e)
+
+  /**
+   * Returns `dividend``/``divisor`. It always performs floating point 
division. Its result is
+   * always null if `divisor` is 0.
+   *
+   * @note
+   *   The `dividend` must be a numeric, `divisor` must be a numeric in this 
function. While the
+   *   `dividend` can be a numeric or an interval, `divisor` must be a numeric 
in SQL function
+   *   `try_divide`.
+   *
+   * @group math_funcs
+   * @since 3.5.0
+   */
+  def try_divide(left: Column, right: Column): Column = 
Column.fn("try_divide", left, right)
+
+  /**
+   * Returns `left``*``right` and the result is null on overflow. The 
acceptable input types are the
+   * same with the `*` operator.
+   *
+   * @note
+   *   Only Numeric type is supported in this function, while `try_multiply` 
in SQL supports
+   *   Numeric and INTERVAL.
+   *
+   * @group math_funcs
+   * @since 3.5.0
+   */
+  def try_multiply(left: Column, right: Column): Column = 
Column.fn("try_multiply", left, right)
+
+  /**
+   * Returns `left`-`right` and the result is null on overflow. The acceptable 
input types are the
+   * same with the `-` operator.
+   *
+   * @note

Review Comment:
   ditto



##########
python/pyspark/sql/functions.py:
##########
@@ -350,6 +350,159 @@ def sqrt(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("sqrt", col)
 
 
+@try_remote_functions
+def try_add(left: "ColumnOrName", right: "ColumnOrName") -> Column:
+    """
+    Returns the sum of `left`and `right` and the result is null on overflow.
+    The acceptable input types are the same with the `+` operator.
+
+    .. versionadded:: 3.5.0
+
+    Notes
+    -----
+    Only Numeric type is supported in this function, while `try_add` in SQL 
supports Numeric,
+    DATE, TIMESTAMP, and INTERVAL.
+
+    Parameters
+    ----------
+    left : :class:`~pyspark.sql.Column` or str
+    right : :class:`~pyspark.sql.Column` or str
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([(1982, 15), (1990, 2)], ["birth", "age"])
+    >>> df.select(try_add(df.birth, df.age).alias('r')).collect()
+    [Row(r=1997), Row(r=1992)]
+    """
+    return _invoke_function_over_columns("try_add", left, right)
+
+
+@try_remote_functions
+def try_avg(col: "ColumnOrName") -> Column:
+    """
+    Returns the mean calculated from values of a group and the result is null 
on overflow.
+
+    .. versionadded:: 3.5.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([(1982, 15), (1990, 2)], ["birth", "age"])
+    >>> df.select(try_avg(df.age).alias('r')).collect()
+    [Row(r=8.5)]
+    """
+    return _invoke_function_over_columns("try_avg", col)
+
+
+@try_remote_functions
+def try_divide(left: "ColumnOrName", right: "ColumnOrName") -> Column:
+    """
+    Returns `dividend`/`divisor`. It always performs floating point division. 
Its result is
+    always null if `divisor` is 0.
+
+    .. versionadded:: 3.5.0
+
+    Notes
+    -----
+    The `dividend` must be a numeric, `divisor` must be a numeric in this 
function. While the
+    `dividend` can be a numeric or an interval, `divisor` must be a numeric in 
SQL function
+    `try_divide`.
+
+    Parameters
+    ----------
+    left : :class:`~pyspark.sql.Column` or str
+        dividend
+    right : :class:`~pyspark.sql.Column` or str
+        divisor
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([(6000, 15), (1990, 2)], ["a", "b"])
+    >>> df.select(try_divide(df.a, df.b).alias('r')).collect()
+    [Row(r=400.0), Row(r=995.0)]
+    """
+    return _invoke_function_over_columns("try_divide", left, right)
+
+
+@try_remote_functions
+def try_multiply(left: "ColumnOrName", right: "ColumnOrName") -> Column:
+    """
+    Returns `left`*`right` and the result is null on overflow. The acceptable 
input types are the
+    same with the `*` operator.
+
+    .. versionadded:: 3.5.0
+
+    Notes
+    -----
+    Only Numeric type is supported in this function, while `try_multiply` in 
SQL supports
+    Numeric and INTERVAL.
+
+    Parameters
+    ----------
+    left : :class:`~pyspark.sql.Column` or str
+        multiplicand
+    right : :class:`~pyspark.sql.Column` or str
+        multiplier
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([(6000, 15), (1990, 2)], ["a", "b"])
+    >>> df.select(try_multiply(df.a, df.b).alias('r')).collect()
+    [Row(r=90000), Row(r=3980)]
+    """
+    return _invoke_function_over_columns("try_multiply", left, right)
+
+
+@try_remote_functions
+def try_subtract(left: "ColumnOrName", right: "ColumnOrName") -> Column:
+    """
+    Returns `left`-`right` and the result is null on overflow. The acceptable 
input types are the
+    same with the `-` operator.
+
+    .. versionadded:: 3.5.0
+
+    Notes

Review Comment:
   ditto



##########
connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala:
##########
@@ -1807,6 +1807,75 @@ object functions {
    */
   def sqrt(colName: String): Column = sqrt(Column(colName))
 
+  /**
+   * Returns the sum of `left` and `right` and the result is null on overflow. 
The acceptable
+   * input types are the same with the `+` operator.
+   *
+   * @note
+   *   Only Numeric type is supported in this function, while `try_add` in SQL 
supports Numeric,
+   *   DATE, TIMESTAMP, and INTERVAL.
+   *
+   * @group math_funcs
+   * @since 3.5.0
+   */
+  def try_add(left: Column, right: Column): Column = Column.fn("try_add", 
left, right)
+
+  /**
+   * Returns the mean calculated from values of a group and the result is null 
on overflow.
+   *
+   * @group math_funcs
+   * @since 3.5.0
+   */
+  def try_avg(e: Column): Column = Column.fn("try_avg", e)
+
+  /**
+   * Returns `dividend``/``divisor`. It always performs floating point 
division. Its result is
+   * always null if `divisor` is 0.
+   *
+   * @note
+   *   The `dividend` must be a numeric, `divisor` must be a numeric in this 
function. While the
+   *   `dividend` can be a numeric or an interval, `divisor` must be a numeric 
in SQL function
+   *   `try_divide`.
+   *
+   * @group math_funcs
+   * @since 3.5.0
+   */
+  def try_divide(left: Column, right: Column): Column = 
Column.fn("try_divide", left, right)
+
+  /**
+   * Returns `left``*``right` and the result is null on overflow. The 
acceptable input types are the
+   * same with the `*` operator.
+   *
+   * @note

Review Comment:
   ditto



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [spark] zhengruifeng commented on a diff in pull request #41653: [SPARK-43939][CONNECT][PYTHON] Add try_* functions to Scala and Python

Reply via email to