This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 931c6fcc4 Simplify FixedLengthEncoding (#2812)
931c6fcc4 is described below

commit 931c6fcc45f4aca441191f007cf31daa19eb66d2
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Mon Oct 3 14:53:53 2022 +0100

    Simplify FixedLengthEncoding (#2812)
---
 arrow/src/row/fixed.rs | 56 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 35 insertions(+), 21 deletions(-)

diff --git a/arrow/src/row/fixed.rs b/arrow/src/row/fixed.rs
index 781082742..04b9a30ec 100644
--- a/arrow/src/row/fixed.rs
+++ b/arrow/src/row/fixed.rs
@@ -24,13 +24,17 @@ use half::f16;
 
 /// Encodes a value of a particular fixed width type into bytes according to 
the rules
 /// described on [`super::RowConverter`]
-pub trait FixedLengthEncoding<const N: usize>: Copy {
-    const ENCODED_LEN: usize = 1 + N;
+pub trait FixedLengthEncoding: Copy {
+    const ENCODED_LEN: usize = 1 + std::mem::size_of::<Self::Encoded>();
 
-    fn encode(self) -> [u8; N];
+    type Encoded: Sized + Copy + AsRef<[u8]> + AsMut<[u8]>;
+
+    fn encode(self) -> Self::Encoded;
 }
 
-impl FixedLengthEncoding<1> for bool {
+impl FixedLengthEncoding for bool {
+    type Encoded = [u8; 1];
+
     fn encode(self) -> [u8; 1] {
         [self as u8]
     }
@@ -38,7 +42,9 @@ impl FixedLengthEncoding<1> for bool {
 
 macro_rules! encode_signed {
     ($n:expr, $t:ty) => {
-        impl FixedLengthEncoding<$n> for $t {
+        impl FixedLengthEncoding for $t {
+            type Encoded = [u8; $n];
+
             fn encode(self) -> [u8; $n] {
                 let mut b = self.to_be_bytes();
                 // Toggle top "sign" bit to ensure consistent sort order
@@ -57,7 +63,9 @@ encode_signed!(16, i128);
 
 macro_rules! encode_unsigned {
     ($n:expr, $t:ty) => {
-        impl FixedLengthEncoding<$n> for $t {
+        impl FixedLengthEncoding for $t {
+            type Encoded = [u8; $n];
+
             fn encode(self) -> [u8; $n] {
                 self.to_be_bytes()
             }
@@ -70,7 +78,9 @@ encode_unsigned!(2, u16);
 encode_unsigned!(4, u32);
 encode_unsigned!(8, u64);
 
-impl FixedLengthEncoding<2> for f16 {
+impl FixedLengthEncoding for f16 {
+    type Encoded = [u8; 2];
+
     fn encode(self) -> [u8; 2] {
         // 
https://github.com/rust-lang/rust/blob/9c20b2a8cc7588decb6de25ac6a7912dcef24d65/library/core/src/num/f32.rs#L1176-L1260
         let s = self.to_bits() as i16;
@@ -79,7 +89,9 @@ impl FixedLengthEncoding<2> for f16 {
     }
 }
 
-impl FixedLengthEncoding<4> for f32 {
+impl FixedLengthEncoding for f32 {
+    type Encoded = [u8; 4];
+
     fn encode(self) -> [u8; 4] {
         // 
https://github.com/rust-lang/rust/blob/9c20b2a8cc7588decb6de25ac6a7912dcef24d65/library/core/src/num/f32.rs#L1176-L1260
         let s = self.to_bits() as i32;
@@ -88,7 +100,9 @@ impl FixedLengthEncoding<4> for f32 {
     }
 }
 
-impl FixedLengthEncoding<8> for f64 {
+impl FixedLengthEncoding for f64 {
+    type Encoded = [u8; 8];
+
     fn encode(self) -> [u8; 8] {
         // 
https://github.com/rust-lang/rust/blob/9c20b2a8cc7588decb6de25ac6a7912dcef24d65/library/core/src/num/f32.rs#L1176-L1260
         let s = self.to_bits() as i64;
@@ -97,7 +111,9 @@ impl FixedLengthEncoding<8> for f64 {
     }
 }
 
-impl FixedLengthEncoding<16> for Decimal128 {
+impl FixedLengthEncoding for Decimal128 {
+    type Encoded = [u8; 16];
+
     fn encode(self) -> [u8; 16] {
         let mut val = *self.raw_value();
         // Convert to big endian representation
@@ -108,7 +124,9 @@ impl FixedLengthEncoding<16> for Decimal128 {
     }
 }
 
-impl FixedLengthEncoding<32> for Decimal256 {
+impl FixedLengthEncoding for Decimal256 {
+    type Encoded = [u8; 32];
+
     fn encode(self) -> [u8; 32] {
         let mut val = *self.raw_value();
         // Convert to big endian representation
@@ -120,10 +138,10 @@ impl FixedLengthEncoding<32> for Decimal256 {
 }
 
 /// Returns the total encoded length (including null byte) for a value of type 
`T::Native`
-pub const fn encoded_len<const N: usize, T>(_col: &PrimitiveArray<T>) -> usize
+pub const fn encoded_len<T>(_col: &PrimitiveArray<T>) -> usize
 where
     T: ArrowPrimitiveType,
-    T::Native: FixedLengthEncoding<N>,
+    T::Native: FixedLengthEncoding,
 {
     T::Native::ENCODED_LEN
 }
@@ -132,26 +150,22 @@ where
 ///
 /// - 1 byte `0` if null or `1` if valid
 /// - bytes of [`FixedLengthEncoding`]
-pub fn encode<
-    const N: usize,
-    T: FixedLengthEncoding<N>,
-    I: IntoIterator<Item = Option<T>>,
->(
+pub fn encode<T: FixedLengthEncoding, I: IntoIterator<Item = Option<T>>>(
     out: &mut Rows,
     i: I,
     opts: SortOptions,
 ) {
     for (offset, maybe_val) in out.offsets.iter_mut().skip(1).zip(i) {
-        let end_offset = *offset + N + 1;
+        let end_offset = *offset + T::ENCODED_LEN;
         if let Some(val) = maybe_val {
             let to_write = &mut out.buffer[*offset..end_offset];
             to_write[0] = 1;
             let mut encoded = val.encode();
             if opts.descending {
                 // Flip bits to reverse order
-                encoded.iter_mut().for_each(|v| *v = !*v)
+                encoded.as_mut().iter_mut().for_each(|v| *v = !*v)
             }
-            to_write[1..].copy_from_slice(&encoded)
+            to_write[1..].copy_from_slice(encoded.as_ref())
         } else if !opts.nulls_first {
             out.buffer[*offset] = 0xFF;
         }

Reply via email to