HaoYang670 commented on code in PR #1577: URL: https://github.com/apache/arrow-rs/pull/1577#discussion_r851909480
########## arrow/src/compute/kernels/substring.rs: ########## @@ -107,29 +127,26 @@ fn generic_substring<OffsetSize: StringOffsetSizeTrait>( /// /// Attention: Both `start` and `length` are counted by byte, not by char. /// -/// # Warning -/// -/// This function **might** return in invalid utf-8 format if the -/// character length falls on a non-utf8 boundary, which we -/// [hope to fix](https://github.com/apache/arrow-rs/issues/1531) -/// in a future release. -/// -/// ## Example of getting an invalid substring +/// # Basic usage /// ``` -/// # // Doesn't pass due to https://github.com/apache/arrow-rs/issues/1531 -/// # #[cfg(not(feature = "force_validate"))] -/// # { /// # use arrow::array::StringArray; /// # use arrow::compute::kernels::substring::substring; -/// let array = StringArray::from(vec![Some("E=mc²")]); -/// let result = substring(&array, -1, None).unwrap(); +/// let array = StringArray::from(vec![Some("arrow"), None, Some("rust")]); +/// let result = substring(&array, 1, Some(&4)).unwrap(); /// let result = result.as_any().downcast_ref::<StringArray>().unwrap(); -/// assert_eq!(result.value(0).as_bytes(), &[0x00B2]); // invalid utf-8 format -/// # } +/// assert_eq!(result, &StringArray::from(vec![Some("rrow"), None, Some("ust")])); /// ``` /// /// # Error -/// this function errors when the passed array is not a \[Large\]String array. +/// - The function errors when the passed array is not a \[Large\]String array. +/// - The function errors when you try to create a substring in the middle of a multibyte character. Review Comment: Done! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
