HyukjinKwon commented on a change in pull request #32566:
URL: https://github.com/apache/spark/pull/32566#discussion_r634063862
##########
File path: python/pyspark/sql/functions.py
##########
@@ -2681,6 +2681,45 @@ def overlay(src, replace, pos, len=-1):
))
+def sentences(string, language=None, country=None):
+ """
+ Splits a string into arrays of sentences, where each sentence is an array
of words.
+ The 'language' and 'country' arguments are optional, and if omitted, the
default locale is used.
+
+ .. versionadded:: 3.2.0
+
+ Parameters
+ ----------
+ string : str
+ a string to be split
+ language : str
Review comment:
```suggestion
language : :class:`~pyspark.sql.Column` or str, optional
```
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
##########
@@ -589,6 +596,5 @@ class StringFunctionsSuite extends QueryTest with
SharedSparkSession {
df2.selectExpr("str_to_map(a)"),
Seq(Row(Map("a" -> "1", "b" -> "2", "c" -> "3")))
)
Review comment:
```suggestion
)
```
##########
File path: python/pyspark/sql/functions.py
##########
@@ -2681,6 +2681,45 @@ def overlay(src, replace, pos, len=-1):
))
+def sentences(string, language=None, country=None):
+ """
+ Splits a string into arrays of sentences, where each sentence is an array
of words.
+ The 'language' and 'country' arguments are optional, and if omitted, the
default locale is used.
+
+ .. versionadded:: 3.2.0
+
+ Parameters
+ ----------
+ string : str
+ a string to be split
+ language : str
+ a language of the locale
+ country : str
Review comment:
```suggestion
country : :class:`~pyspark.sql.Column` or str, optional
```
##########
File path: python/pyspark/sql/functions.py
##########
@@ -2681,6 +2681,45 @@ def overlay(src, replace, pos, len=-1):
))
+def sentences(string, language=None, country=None):
+ """
+ Splits a string into arrays of sentences, where each sentence is an array
of words.
+ The 'language' and 'country' arguments are optional, and if omitted, the
default locale is used.
+
+ .. versionadded:: 3.2.0
+
+ Parameters
+ ----------
+ string : str
Review comment:
```suggestion
string : :class:`~pyspark.sql.Column` or str
```
##########
File path: python/pyspark/sql/functions.pyi
##########
@@ -221,6 +221,7 @@ def map_from_entries(col: ColumnOrName) -> Column: ...
def array_repeat(col: ColumnOrName, count: Union[Column, int]) -> Column: ...
def arrays_zip(*cols: ColumnOrName) -> Column: ...
def map_concat(*cols: ColumnOrName) -> Column: ...
+def sentences(col: ColumnOrName, language: ColumnOrName, country:
ColumnOrName) -> Column: ...
Review comment:
```suggestion
def sentences(col: ColumnOrName, language: Optional[ColumnOrName], country:
Optional[ColumnOrName]) -> Column: ...
```
##########
File path: sql/core/src/main/scala/org/apache/spark/sql/functions.scala
##########
@@ -2867,6 +2867,25 @@ object functions {
new Overlay(src.expr, replace.expr, pos.expr)
}
+ /**
+ * Splits a string into arrays of sentences, where each sentence is an array
of words.
+ * @group string_funcs
+ * @since 3.2.0
+ */
+ def sentences(string: Column, language: Column, country: Column): Column =
withExpr {
+ Sentences(string.expr, language.expr, country.expr)
+ }
+
+ /**
+ * Splits a string into arrays of sentences, where each sentence is an array
of words.
+ * The default locale is used.
+ * @group string_funcs
+ * @since 3.2.0
+ */
+ def sentences(str: Column): Column = withExpr {
Review comment:
```suggestion
def sentences(string: Column): Column = withExpr {
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]