This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 20f6c7e  support LargeUtf8 in sort kernel (#26)
20f6c7e is described below

commit 20f6c7e26d14d6df461d17c95d9a254c71bb0c72
Author: Ritchie Vink <[email protected]>
AuthorDate: Mon Apr 26 23:49:52 2021 +0200

    support LargeUtf8 in sort kernel (#26)
---
 .gitignore                        |  2 +-
 arrow/src/compute/kernels/sort.rs | 25 ++++++++++++++++++++-----
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/.gitignore b/.gitignore
index 5b3bf6c..e8d9955 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,6 @@ target
 rusty-tags.vi
 .history
 .flatbuffers/
-
+.idea/
 .vscode
 venv/*
diff --git a/arrow/src/compute/kernels/sort.rs 
b/arrow/src/compute/kernels/sort.rs
index bf8eda3..30341b6 100644
--- a/arrow/src/compute/kernels/sort.rs
+++ b/arrow/src/compute/kernels/sort.rs
@@ -257,7 +257,8 @@ pub fn sort_to_indices(
                 values, v, n, cmp, &options, limit,
             )
         }
-        DataType::Utf8 => sort_string(values, v, n, &options, limit),
+        DataType::Utf8 => sort_string::<i32>(values, v, n, &options, limit),
+        DataType::LargeUtf8 => sort_string::<i64>(values, v, n, &options, 
limit),
         DataType::List(field) => match field.data_type() {
             DataType::Int8 => sort_list::<i32, Int8Type>(values, v, n, 
&options, limit),
             DataType::Int16 => sort_list::<i32, Int16Type>(values, v, n, 
&options, limit),
@@ -545,14 +546,17 @@ fn insert_valid_values<T>(result_slice: &mut [u32], 
offset: usize, valids: &[(u3
 }
 
 /// Sort strings
-fn sort_string(
+fn sort_string<Offset: StringOffsetSizeTrait>(
     values: &ArrayRef,
     value_indices: Vec<u32>,
     null_indices: Vec<u32>,
     options: &SortOptions,
     limit: Option<usize>,
 ) -> Result<UInt32Array> {
-    let values = as_string_array(values);
+    let values = values
+        .as_any()
+        .downcast_ref::<GenericStringArray<Offset>>()
+        .unwrap();
 
     sort_string_helper(
         values,
@@ -958,14 +962,25 @@ mod tests {
         assert_eq!(output, expected)
     }
 
+    /// Tests both Utf8 and LargeUtf8
     fn test_sort_string_arrays(
         data: Vec<Option<&str>>,
         options: Option<SortOptions>,
         limit: Option<usize>,
         expected_data: Vec<Option<&str>>,
     ) {
-        let output = StringArray::from(data);
-        let expected = Arc::new(StringArray::from(expected_data)) as ArrayRef;
+        let output = StringArray::from(data.clone());
+        let expected = Arc::new(StringArray::from(expected_data.clone())) as 
ArrayRef;
+        let output = match limit {
+            Some(_) => {
+                sort_limit(&(Arc::new(output) as ArrayRef), options, 
limit).unwrap()
+            }
+            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
+        };
+        assert_eq!(&output, &expected);
+
+        let output = LargeStringArray::from(data);
+        let expected = Arc::new(LargeStringArray::from(expected_data)) as 
ArrayRef;
         let output = match limit {
             Some(_) => {
                 sort_limit(&(Arc::new(output) as ArrayRef), options, 
limit).unwrap()

Reply via email to