This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 08f386a13 update the doc of `substring` (#1529)
08f386a13 is described below
commit 08f386a1324c632230e108c44c55a6370397e1c5
Author: Remzi Yang <[email protected]>
AuthorDate: Sun Apr 10 19:15:27 2022 +0800
update the doc of `substring` (#1529)
* update doc
Signed-off-by: remzi <[email protected]>
* update doc
Signed-off-by: remzi <[email protected]>
* Update arrow/src/compute/kernels/substring.rs
Co-authored-by: Andrew Lamb <[email protected]>
Co-authored-by: Andrew Lamb <[email protected]>
---
arrow/src/compute/kernels/substring.rs | 29 +++++++++++++++++++++++++++--
1 file changed, 27 insertions(+), 2 deletions(-)
diff --git a/arrow/src/compute/kernels/substring.rs
b/arrow/src/compute/kernels/substring.rs
index 1be829f72..6e75f74ef 100644
--- a/arrow/src/compute/kernels/substring.rs
+++ b/arrow/src/compute/kernels/substring.rs
@@ -94,8 +94,33 @@ fn generic_substring<OffsetSize: StringOffsetSizeTrait>(
Ok(make_array(data))
}
-/// Returns an ArrayRef with a substring starting from `start` and with
optional length `length` of each of the elements in `array`.
-/// `start` can be negative, in which case the start counts from the end of
the string.
+/// Returns an ArrayRef with substrings of all the elements in `array`.
+///
+/// # Arguments
+///
+/// * `start` - The start index of all substrings.
+/// If `start >= 0`, then count from the start of the string,
+/// otherwise count from the end of the string.
+///
+/// * `length`(option) - The length of all substrings.
+/// If `length` is `None`, then the substring is from `start` to the end of
the string.
+///
+/// Attention: Both `start` and `length` are counted by byte, not by char.
+///
+/// # Warning
+///
+/// This function **might** return in invalid utf-8 format if the character
length falls on a non-utf8 boundary.
+/// ## Example of getting an invalid substring
+/// ```
+/// # use arrow::array::StringArray;
+/// # use arrow::compute::kernels::substring::substring;
+/// let array = StringArray::from(vec![Some("E=mc²")]);
+/// let result = substring(&array, -1, &None).unwrap();
+/// let result = result.as_any().downcast_ref::<StringArray>().unwrap();
+/// assert_eq!(result.value(0).as_bytes(), &[0x00B2]); // invalid utf-8 format
+/// ```
+///
+/// # Error
/// this function errors when the passed array is not a \[Large\]String array.
pub fn substring(
array: &dyn Array,