HyukjinKwon commented on a change in pull request #33992:
URL: https://github.com/apache/spark/pull/33992#discussion_r708347128



##########
File path: python/pyspark/sql/functions.py
##########
@@ -3098,6 +3098,52 @@ def length(col):
     return Column(sc._jvm.functions.length(_to_java_column(col)))
 
 
+def octet_length(col):
+    """
+    Calculates the byte length for the specified string column.
+
+    .. versionadded:: 3.3.0

Review comment:
       ```suggestion
       .. versionadded:: 3.3.0
   
   ```

##########
File path: R/pkg/R/functions.R
##########
@@ -646,6 +646,19 @@ setMethod("bin",
             column(jc)
           })
 
+#' @details
+#' \code{bit_length}:  Calculates the bit length for the specified string 
column.

Review comment:
       ```suggestion
   #' \code{bit_length}: Calculates the bit length for the specified string 
column.
   ```

##########
File path: R/pkg/R/functions.R
##########
@@ -1569,6 +1582,19 @@ setMethod("negate",
             column(jc)
           })
 
+#' @details
+#' \code{octet_length}:  Calculates the byte length for the specified string 
column.

Review comment:
       ```suggestion
   #' \code{octet_length}: Calculates the byte length for the specified string 
column.
   ```

##########
File path: python/pyspark/sql/functions.py
##########
@@ -3098,6 +3098,52 @@ def length(col):
     return Column(sc._jvm.functions.length(_to_java_column(col)))
 
 
+def octet_length(col):
+    """
+    Calculates the byte length for the specified string column.
+
+    .. versionadded:: 3.3.0
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        Source column or strings

Review comment:
       ```suggestion
           Source column or strings
   
   ```

##########
File path: python/pyspark/sql/functions.py
##########
@@ -3098,6 +3098,52 @@ def length(col):
     return Column(sc._jvm.functions.length(_to_java_column(col)))
 
 
+def octet_length(col):
+    """
+    Calculates the byte length for the specified string column.
+
+    .. versionadded:: 3.3.0
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        Source column or strings
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        Byte length of the col

Review comment:
       ```suggestion
           Byte length of the col
   
   ```

##########
File path: python/pyspark/sql/functions.py
##########
@@ -3098,6 +3098,52 @@ def length(col):
     return Column(sc._jvm.functions.length(_to_java_column(col)))
 
 
+def octet_length(col):
+    """
+    Calculates the byte length for the specified string column.
+
+    .. versionadded:: 3.3.0
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        Source column or strings
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        Byte length of the col
+    Examples
+    -------
+    >>> from pyspark.sql.functions import octet_length
+    >>> spark.createDataFrame([('cat',), ( '\U0001F408',)], ['cat']) \
+            .select(octet_length('cat')).collect()
+        [Row(octet_length(cat)=3), Row(octet_length(cat)=4)]
+    """
+    return _invoke_function_over_column("octet_length", col)
+
+
+def bit_length(col):
+    """
+    Calculates the bit length for the specified string column.
+
+    .. versionadded:: 3.3.0
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        Source column or strings
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        Bit length of the col

Review comment:
       ```suggestion
           Bit length of the col
   
   ```

##########
File path: python/pyspark/sql/functions.py
##########
@@ -3098,6 +3098,52 @@ def length(col):
     return Column(sc._jvm.functions.length(_to_java_column(col)))
 
 
+def octet_length(col):
+    """
+    Calculates the byte length for the specified string column.
+
+    .. versionadded:: 3.3.0
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        Source column or strings
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        Byte length of the col
+    Examples
+    -------
+    >>> from pyspark.sql.functions import octet_length
+    >>> spark.createDataFrame([('cat',), ( '\U0001F408',)], ['cat']) \
+            .select(octet_length('cat')).collect()
+        [Row(octet_length(cat)=3), Row(octet_length(cat)=4)]
+    """
+    return _invoke_function_over_column("octet_length", col)
+
+
+def bit_length(col):
+    """
+    Calculates the bit length for the specified string column.
+
+    .. versionadded:: 3.3.0

Review comment:
       ```suggestion
       .. versionadded:: 3.3.0
   
   ```

##########
File path: python/pyspark/sql/tests/test_functions.py
##########
@@ -197,6 +197,18 @@ def test_string_functions(self):
                 df.select(getattr(functions, name)("name")).first()[0],
                 df.select(getattr(functions, name)(col("name"))).first()[0])
 
+    def test_octet_length_function(self):
+        # SPARK-36751: add octet/bit length api for python

Review comment:
       ```suggestion
           # SPARK-36751: add octet length api for python
   ```

##########
File path: python/pyspark/sql/tests/test_functions.py
##########
@@ -197,6 +197,18 @@ def test_string_functions(self):
                 df.select(getattr(functions, name)("name")).first()[0],
                 df.select(getattr(functions, name)(col("name"))).first()[0])
 
+    def test_octet_length_function(self):
+        # SPARK-36751: add octet/bit length api for python
+        df = self.spark.createDataFrame([('cat',), ('\U0001F408',)], ['cat'])
+        actual = df.select(octet_length('cat')).collect()
+        self.assertEqual([Row(3), Row(4)], actual)
+
+    def test_bit_length_function(self):
+        # SPARK-36751: add octet/bit length api for python

Review comment:
       ```suggestion
           # SPARK-36751: add bit length api for python
   ```

##########
File path: python/pyspark/sql/functions.py
##########
@@ -3098,6 +3098,52 @@ def length(col):
     return Column(sc._jvm.functions.length(_to_java_column(col)))
 
 
+def octet_length(col):
+    """
+    Calculates the byte length for the specified string column.
+
+    .. versionadded:: 3.3.0
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        Source column or strings
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        Byte length of the col
+    Examples
+    -------
+    >>> from pyspark.sql.functions import octet_length
+    >>> spark.createDataFrame([('cat',), ( '\U0001F408',)], ['cat']) \
+            .select(octet_length('cat')).collect()
+        [Row(octet_length(cat)=3), Row(octet_length(cat)=4)]
+    """
+    return _invoke_function_over_column("octet_length", col)
+
+
+def bit_length(col):
+    """
+    Calculates the bit length for the specified string column.
+
+    .. versionadded:: 3.3.0
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        Source column or strings

Review comment:
       ```suggestion
           Source column or strings
   
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to