zhengruifeng commented on code in PR #41561:
URL: https://github.com/apache/spark/pull/41561#discussion_r1232946021
##########
python/pyspark/sql/functions.py:
##########
@@ -8810,6 +8812,391 @@ def to_number(col: "ColumnOrName", format:
"ColumnOrName") -> Column:
return _invoke_function_over_columns("to_number", col, format)
+@try_remote_functions
+def char(col: "ColumnOrName") -> Column:
+ """
+ Returns the ASCII character having the binary equivalent to `col`. If col
is larger than 256 the
+ result is equivalent to char(col % 256)
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ col : :class:`~pyspark.sql.Column` or str
+ Input column or strings.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([(65,)], ['a'])
+ >>> df.select(char(df.a).alias('r')).collect()
+ [Row(r='A')]
+ """
+ return _invoke_function_over_columns("char", col)
+
+
+@try_remote_functions
+def btrim(str: "ColumnOrName", trim: Optional["ColumnOrName"] = None) ->
Column:
+ """
+ Remove the leading and trailing `trim` characters from `str`.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ str : :class:`~pyspark.sql.Column` or str
+ Input column or strings.
+ trim : :class:`~pyspark.sql.Column` or str
+ The trim string characters to trim, the default value is a single space
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([("SSparkSQLS", "SL", )], ['a', 'b'])
+ >>> df.select(btrim(df.a, df.b).alias('r')).collect()
+ [Row(r='parkSQ')]
+
+ >>> df = spark.createDataFrame([(" SparkSQL ",)], ['a'])
+ >>> df.select(btrim(df.a).alias('r')).collect()
+ [Row(r='SparkSQL')]
+ """
+ if trim is not None:
+ return _invoke_function_over_columns("btrim", str, trim)
+ else:
+ return _invoke_function_over_columns("btrim", str)
+
+
+@try_remote_functions
+def char_length(str: "ColumnOrName") -> Column:
+ """
+ Returns the character length of string data or number of bytes of binary
data.
+ The length of string data includes the trailing spaces.
+ The length of binary data includes binary zeros.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ str : :class:`~pyspark.sql.Column` or str
+ Input column or strings.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([("SparkSQL",)], ['a'])
+ >>> df.select(char_length(df.a).alias('r')).collect()
+ [Row(r=8)]
+ """
+ return _invoke_function_over_columns("char_length", str)
+
+
+@try_remote_functions
+def character_length(str: "ColumnOrName") -> Column:
+ """
+ Returns the character length of string data or number of bytes of binary
data.
+ The length of string data includes the trailing spaces.
+ The length of binary data includes binary zeros.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ str : :class:`~pyspark.sql.Column` or str
+ Input column or strings.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([("SparkSQL",)], ['a'])
+ >>> df.select(character_length(df.a).alias('r')).collect()
+ [Row(r=8)]
+ """
+ return _invoke_function_over_columns("character_length", str)
+
+
+@try_remote_functions
+def chr(col: "ColumnOrName") -> Column:
+ """
+ Returns the ASCII character having the binary equivalent to `col`.
+ If col is larger than 256 the result is equivalent to chr(col % 256)
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ col : :class:`~pyspark.sql.Column` or str
+ Input column or strings.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([(65,)], ['a'])
+ >>> df.select(chr(df.a).alias('r')).collect()
+ [Row(r='A')]
+ """
+ return _invoke_function_over_columns("chr", col)
+
+
+@try_remote_functions
+def contains(left: "ColumnOrName", right: "ColumnOrName") -> Column:
+ """
+ Returns a boolean. The value is True if right is found inside left.
+ Returns NULL if either input expression is NULL. Otherwise, returns False.
+ Both left or right must be of STRING or BINARY type.
Review Comment:
+1, let's also add `Notes` here, you can refer to
https://github.com/apache/spark/blob/1b12094d2d182ccd103be6bef33b568c3ecee3e5/python/pyspark/sql/functions.py#L9318-L9321
##########
sql/core/src/main/scala/org/apache/spark/sql/functions.scala:
##########
@@ -3815,6 +3815,200 @@ object functions {
ToNumber(e.expr, format.expr)
}
+ /**
+ * Returns the ASCII character having the binary equivalent to `n`.
+ * If n is larger than 256 the result is equivalent to char(n % 256)
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def char(n: Column): Column = withExpr {
+ Chr(n.expr)
+ }
+
+ /**
+ * Removes the leading and trailing space characters from `str`.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def btrim(str: Column): Column = withExpr {
+ new StringTrimBoth(str.expr)
+ }
+
+ /**
+ * Remove the leading and trailing `trim` characters from `str`.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def btrim(str: Column, trim: Column): Column = withExpr {
+ new StringTrimBoth(str.expr, trim.expr)
+ }
+
+ /**
+ * Returns the character length of string data or number of bytes of binary
data.
+ * The length of string data includes the trailing spaces.
+ * The length of binary data includes binary zeros.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def char_length(str: Column): Column = withExpr {
+ Length(str.expr)
+ }
+
+ /**
+ * Returns the character length of string data or number of bytes of binary
data.
+ * The length of string data includes the trailing spaces.
+ * The length of binary data includes binary zeros.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def character_length(str: Column): Column = withExpr {
+ Length(str.expr)
+ }
+
+ /**
+ * Returns the ASCII character having the binary equivalent to `n`.
+ * If n is larger than 256 the result is equivalent to chr(n % 256)
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def chr(n: Column): Column = withExpr {
+ Chr(n.expr)
+ }
+
+ /**
+ * Returns a boolean. The value is True if right is found inside left.
+ * Returns NULL if either input expression is NULL. Otherwise, returns False.
+ * Both left or right must be of STRING type.
+ *
+ * @note
+ * This is different from the `contains` method in SQL that supports both
STRING and BINARY
+ * type.
Review Comment:
```suggestion
* @note Only STRING type is supported in this function, while `contains`
in SQL supports both STRING and BINARY.
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]