MaxGekk commented on code in PR #48454:
URL: https://github.com/apache/spark/pull/48454#discussion_r1799505336


##########
sql/api/src/main/scala/org/apache/spark/sql/functions.scala:
##########
@@ -3911,6 +3911,44 @@ object functions {
   def encode(value: Column, charset: String): Column =
     Column.fn("encode", value, lit(charset))
 
+  /**
+   * Returns true if the input is a valid UTF-8 string, otherwise returns 
false.
+   *
+   * @group string_funcs
+   * @since 4.0.0
+   */
+  def is_valid_utf8(str: Column): Column =
+    Column.fn("is_valid_utf8", str)
+
+  /**
+   * Returns a new string in which all invalid UTF-8 byte sequences, if any, 
are replaced by the
+   * Unicode replacement character (U+FFFD).
+   *
+   * @group string_funcs
+   * @since 4.0.0
+   */
+  def make_valid_utf8(str: Column): Column =
+    Column.fn("make_valid_utf8", str)
+
+  /**
+   * Returns the input value if it corresponds to a valid UTF-8 string, or 
emits an error

Review Comment:
   Let's reflect that in the comment.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to