HyukjinKwon commented on a change in pull request #33992:
URL: https://github.com/apache/spark/pull/33992#discussion_r708347128
##########
File path: python/pyspark/sql/functions.py
##########
@@ -3098,6 +3098,52 @@ def length(col):
return Column(sc._jvm.functions.length(_to_java_column(col)))
+def octet_length(col):
+ """
+ Calculates the byte length for the specified string column.
+
+ .. versionadded:: 3.3.0
Review comment:
```suggestion
.. versionadded:: 3.3.0
```
##########
File path: R/pkg/R/functions.R
##########
@@ -646,6 +646,19 @@ setMethod("bin",
column(jc)
})
+#' @details
+#' \code{bit_length}: Calculates the bit length for the specified string
column.
Review comment:
```suggestion
#' \code{bit_length}: Calculates the bit length for the specified string
column.
```
##########
File path: R/pkg/R/functions.R
##########
@@ -1569,6 +1582,19 @@ setMethod("negate",
column(jc)
})
+#' @details
+#' \code{octet_length}: Calculates the byte length for the specified string
column.
Review comment:
```suggestion
#' \code{octet_length}: Calculates the byte length for the specified string
column.
```
##########
File path: python/pyspark/sql/functions.py
##########
@@ -3098,6 +3098,52 @@ def length(col):
return Column(sc._jvm.functions.length(_to_java_column(col)))
+def octet_length(col):
+ """
+ Calculates the byte length for the specified string column.
+
+ .. versionadded:: 3.3.0
+ Parameters
+ ----------
+ col : :class:`~pyspark.sql.Column` or str
+ Source column or strings
Review comment:
```suggestion
Source column or strings
```
##########
File path: python/pyspark/sql/functions.py
##########
@@ -3098,6 +3098,52 @@ def length(col):
return Column(sc._jvm.functions.length(_to_java_column(col)))
+def octet_length(col):
+ """
+ Calculates the byte length for the specified string column.
+
+ .. versionadded:: 3.3.0
+ Parameters
+ ----------
+ col : :class:`~pyspark.sql.Column` or str
+ Source column or strings
+ Returns
+ -------
+ :class:`~pyspark.sql.Column`
+ Byte length of the col
Review comment:
```suggestion
Byte length of the col
```
##########
File path: python/pyspark/sql/functions.py
##########
@@ -3098,6 +3098,52 @@ def length(col):
return Column(sc._jvm.functions.length(_to_java_column(col)))
+def octet_length(col):
+ """
+ Calculates the byte length for the specified string column.
+
+ .. versionadded:: 3.3.0
+ Parameters
+ ----------
+ col : :class:`~pyspark.sql.Column` or str
+ Source column or strings
+ Returns
+ -------
+ :class:`~pyspark.sql.Column`
+ Byte length of the col
+ Examples
+ -------
+ >>> from pyspark.sql.functions import octet_length
+ >>> spark.createDataFrame([('cat',), ( '\U0001F408',)], ['cat']) \
+ .select(octet_length('cat')).collect()
+ [Row(octet_length(cat)=3), Row(octet_length(cat)=4)]
+ """
+ return _invoke_function_over_column("octet_length", col)
+
+
+def bit_length(col):
+ """
+ Calculates the bit length for the specified string column.
+
+ .. versionadded:: 3.3.0
+ Parameters
+ ----------
+ col : :class:`~pyspark.sql.Column` or str
+ Source column or strings
+ Returns
+ -------
+ :class:`~pyspark.sql.Column`
+ Bit length of the col
Review comment:
```suggestion
Bit length of the col
```
##########
File path: python/pyspark/sql/functions.py
##########
@@ -3098,6 +3098,52 @@ def length(col):
return Column(sc._jvm.functions.length(_to_java_column(col)))
+def octet_length(col):
+ """
+ Calculates the byte length for the specified string column.
+
+ .. versionadded:: 3.3.0
+ Parameters
+ ----------
+ col : :class:`~pyspark.sql.Column` or str
+ Source column or strings
+ Returns
+ -------
+ :class:`~pyspark.sql.Column`
+ Byte length of the col
+ Examples
+ -------
+ >>> from pyspark.sql.functions import octet_length
+ >>> spark.createDataFrame([('cat',), ( '\U0001F408',)], ['cat']) \
+ .select(octet_length('cat')).collect()
+ [Row(octet_length(cat)=3), Row(octet_length(cat)=4)]
+ """
+ return _invoke_function_over_column("octet_length", col)
+
+
+def bit_length(col):
+ """
+ Calculates the bit length for the specified string column.
+
+ .. versionadded:: 3.3.0
Review comment:
```suggestion
.. versionadded:: 3.3.0
```
##########
File path: python/pyspark/sql/tests/test_functions.py
##########
@@ -197,6 +197,18 @@ def test_string_functions(self):
df.select(getattr(functions, name)("name")).first()[0],
df.select(getattr(functions, name)(col("name"))).first()[0])
+ def test_octet_length_function(self):
+ # SPARK-36751: add octet/bit length api for python
Review comment:
```suggestion
# SPARK-36751: add octet length api for python
```
##########
File path: python/pyspark/sql/tests/test_functions.py
##########
@@ -197,6 +197,18 @@ def test_string_functions(self):
df.select(getattr(functions, name)("name")).first()[0],
df.select(getattr(functions, name)(col("name"))).first()[0])
+ def test_octet_length_function(self):
+ # SPARK-36751: add octet/bit length api for python
+ df = self.spark.createDataFrame([('cat',), ('\U0001F408',)], ['cat'])
+ actual = df.select(octet_length('cat')).collect()
+ self.assertEqual([Row(3), Row(4)], actual)
+
+ def test_bit_length_function(self):
+ # SPARK-36751: add octet/bit length api for python
Review comment:
```suggestion
# SPARK-36751: add bit length api for python
```
##########
File path: python/pyspark/sql/functions.py
##########
@@ -3098,6 +3098,52 @@ def length(col):
return Column(sc._jvm.functions.length(_to_java_column(col)))
+def octet_length(col):
+ """
+ Calculates the byte length for the specified string column.
+
+ .. versionadded:: 3.3.0
+ Parameters
+ ----------
+ col : :class:`~pyspark.sql.Column` or str
+ Source column or strings
+ Returns
+ -------
+ :class:`~pyspark.sql.Column`
+ Byte length of the col
+ Examples
+ -------
+ >>> from pyspark.sql.functions import octet_length
+ >>> spark.createDataFrame([('cat',), ( '\U0001F408',)], ['cat']) \
+ .select(octet_length('cat')).collect()
+ [Row(octet_length(cat)=3), Row(octet_length(cat)=4)]
+ """
+ return _invoke_function_over_column("octet_length", col)
+
+
+def bit_length(col):
+ """
+ Calculates the bit length for the specified string column.
+
+ .. versionadded:: 3.3.0
+ Parameters
+ ----------
+ col : :class:`~pyspark.sql.Column` or str
+ Source column or strings
Review comment:
```suggestion
Source column or strings
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]