psvri commented on code in PR #4473:
URL: https://github.com/apache/arrow-rs/pull/4473#discussion_r1251111511
##########
arrow-ord/src/sort.rs:
##########
@@ -57,11 +60,211 @@ pub fn sort(
values: &dyn Array,
options: Option<SortOptions>,
) -> Result<ArrayRef, ArrowError> {
- if let DataType::RunEndEncoded(_, _) = values.data_type() {
- return sort_run(values, options, None);
+ match values.data_type() {
+ DataType::Int8 => sort_native_type::<Int8Type, i8>(values, options),
+ DataType::Int16 => sort_native_type::<Int16Type, i16>(values, options),
+ DataType::Int32 => sort_native_type::<Int32Type, i32>(values, options),
+ DataType::Int64 => sort_native_type::<Int64Type, i64>(values, options),
+ DataType::UInt8 => sort_native_type::<UInt8Type, u8>(values, options),
+ DataType::UInt16 => sort_native_type::<UInt16Type, u16>(values,
options),
+ DataType::UInt32 => sort_native_type::<UInt32Type, u32>(values,
options),
+ DataType::UInt64 => sort_native_type::<UInt64Type, u64>(values,
options),
+ DataType::Float32 => sort_native_type::<Float32Type, f32>(values,
options),
+ DataType::Float64 => sort_native_type::<Float64Type, f64>(values,
options),
+ DataType::Date32 => sort_native_type::<Date32Type, i32>(values,
options),
+ DataType::Date64 => sort_native_type::<Date64Type, i64>(values,
options),
+ DataType::Time32(TimeUnit::Second) => {
+ sort_native_type::<Time32SecondType, i32>(values, options)
+ }
+ DataType::Time32(TimeUnit::Millisecond) => {
+ sort_native_type::<Time32MillisecondType, i32>(values, options)
+ }
+ DataType::Time64(TimeUnit::Microsecond) => {
+ sort_native_type::<Time64MicrosecondType, i64>(values, options)
+ }
+ DataType::Time64(TimeUnit::Nanosecond) => {
+ sort_native_type::<Time64NanosecondType, i64>(values, options)
+ }
+ DataType::Timestamp(TimeUnit::Second, _) => {
+ sort_native_type::<TimestampSecondType, i64>(values, options)
+ }
+ DataType::Timestamp(TimeUnit::Millisecond, _) => {
+ sort_native_type::<TimestampMillisecondType, i64>(values, options)
+ }
+ DataType::Timestamp(TimeUnit::Microsecond, _) => {
+ sort_native_type::<TimestampMicrosecondType, i64>(values, options)
+ }
+ DataType::Timestamp(TimeUnit::Nanosecond, _) => {
+ sort_native_type::<TimestampNanosecondType, i64>(values, options)
+ }
+ DataType::Interval(IntervalUnit::YearMonth) => {
+ sort_native_type::<IntervalYearMonthType, i32>(values, options)
+ }
+ DataType::Interval(IntervalUnit::DayTime) => {
+ sort_native_type::<IntervalDayTimeType, i64>(values, options)
+ }
+ DataType::Interval(IntervalUnit::MonthDayNano) => {
+ sort_native_type::<IntervalMonthDayNanoType, i128>(values, options)
+ }
+ DataType::Duration(TimeUnit::Second) => {
+ sort_native_type::<DurationSecondType, i64>(values, options)
+ }
+ DataType::Duration(TimeUnit::Millisecond) => {
+ sort_native_type::<DurationMillisecondType, i64>(values, options)
+ }
+ DataType::Duration(TimeUnit::Microsecond) => {
+ sort_native_type::<DurationMicrosecondType, i64>(values, options)
+ }
+ DataType::Duration(TimeUnit::Nanosecond) => {
+ sort_native_type::<DurationNanosecondType, i64>(values, options)
+ }
+ DataType::RunEndEncoded(_, _) => sort_run(values, options, None),
+ _ => {
+ let indices = sort_to_indices(values, options, None)?;
+ take(values, &indices, None)
+ }
}
- let indices = sort_to_indices(values, options, None)?;
- take(values, &indices, None)
+}
+
+fn compress_store<U>(input: *const U, mut output: *mut U, mask: u8) -> isize
+where
+ U: ArrowNativeType,
+{
+ let mut offset = 0;
+ if mask != 0 {
+ for i in 0..8 {
+ if (mask & (1 << i)) != 0 {
+ // This is safe since a valid bit i.e bit set to 1 indicates a
valid value
+ unsafe {
+ *output = *input.offset(i);
+ offset += 1;
+ output = output.offset(1);
+ }
+ }
+ }
+ }
+ offset
+}
+
+fn create_null_buffer(
+ valid_count: usize,
+ nulls_count: usize,
+ length: usize,
+ sort_options: SortOptions,
+) -> Option<Buffer> {
+ let null_capacity = (length / 8) + (length % 8 != 0) as usize;
Review Comment:
Thanks for this pointer. I have used it in the latest commit.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]