tyrelr commented on a change in pull request #9304:
URL: https://github.com/apache/arrow/pull/9304#discussion_r563336446



##########
File path: rust/arrow/src/buffer.rs
##########
@@ -1188,6 +1209,57 @@ impl Drop for SetLenOnDrop<'_> {
     }
 }
 
+/// Creating a `MutableBuffer` instance by setting bits according to the 
boolean values
+impl std::iter::FromIterator<bool> for MutableBuffer {
+    fn from_iter<I>(iter: I) -> Self
+    where
+        I: IntoIterator<Item = bool>,
+    {
+        let mut iterator = iter.into_iter();
+        let mut result = {
+            let byte_capacity: usize = 
iterator.size_hint().0.saturating_add(7) / 8;
+            MutableBuffer::new(byte_capacity)
+        };
+
+        while let Some(value) = iterator.next() {
+            //we have the first bit of the byte
+            let mut exhausted = false;
+            let mut byte_accum: u8 = match value {

Review comment:
       That approach seems to be worse in the micro-benchmark, but better for 
the comparison kernel.
   ```
   critcmp master-67d0c2e3 bool-38d4e395 bool-11dd14af -t 10
   group                                    bool-11dd14af                       
   bool-38d4e395                          master-67d0c2e3
   -----                                    -------------                       
   -------------                          ---------------
   Buffer::from_iter bool                   1.66     10.2±0.02ms        ? B/sec 
   1.00      6.2±0.01ms        ? B/sec  
   MutableBuffer iter bitset                1.00     56.0±0.11ms        ? B/sec 
   1.12     62.5±0.13ms        ? B/sec  
   MutableBuffer::from_iter bool            1.45      8.9±0.02ms        ? B/sec 
   1.00      6.1±0.01ms        ? B/sec  
   add 512                                  1.06    225.1±1.05ns        ? B/sec 
   1.00    212.9±0.56ns        ? B/sec    1.38    293.0±1.54ns        ? B/sec
   array_from_vec 256                       1.00    629.6±2.56ns        ? B/sec 
   1.03    649.4±1.78ns        ? B/sec    1.11    701.7±1.29ns        ? B/sec
   cast int64 to int32 512                  1.00      2.2±0.01µs        ? B/sec 
   1.14      2.5±0.01µs        ? B/sec    1.02      2.3±0.01µs        ? B/sec
   cast time32s to time32ms 512             1.23    901.3±2.65ns        ? B/sec 
   1.01    736.1±1.17ns        ? B/sec    1.00    731.5±1.15ns        ? B/sec
   cast timestamp_ms to timestamp_ns 512    1.17   1211.8±2.48ns        ? B/sec 
   1.00   1036.8±1.71ns        ? B/sec    1.11   1149.7±2.41ns        ? B/sec
   eq Float32                               1.00     84.6±0.08µs        ? B/sec 
   1.05     89.2±0.28µs        ? B/sec    1.49    125.9±0.23µs        ? B/sec
   eq scalar Float32                        1.02     71.2±0.08µs        ? B/sec 
   1.12     78.2±0.18µs        ? B/sec    1.00     69.8±0.07µs        ? B/sec
   from_slice                               1.00    488.4±0.92µs        ? B/sec 
   1.81    883.0±1.48µs        ? B/sec    1.76    861.2±1.55µs        ? B/sec
   from_slice prepared                      1.00    480.7±1.00µs        ? B/sec 
   1.15    552.1±1.44µs        ? B/sec    1.18    564.8±0.85µs        ? B/sec
   gt scalar Float32                        1.24     64.8±0.06µs        ? B/sec 
   1.37     71.5±0.07µs        ? B/sec    1.00     52.4±0.06µs        ? B/sec
   gt_eq Float32                            1.00     72.3±0.26µs        ? B/sec 
   1.04     75.3±0.09µs        ? B/sec    1.72    124.2±0.09µs        ? B/sec
   lt scalar Float32                        1.07     65.5±0.09µs        ? B/sec 
   1.15     70.9±0.13µs        ? B/sec    1.00     61.4±0.09µs        ? B/sec
   lt_eq Float32                            1.00     72.0±0.07µs        ? B/sec 
   1.03     74.2±0.08µs        ? B/sec    1.71    123.1±0.23µs        ? B/sec
   lt_eq scalar Float32                     1.00     58.0±0.10µs        ? B/sec 
   1.05     61.1±0.07µs        ? B/sec    1.52     88.3±0.15µs        ? B/sec
   min nulls 512                            1.11      2.1±0.01µs        ? B/sec 
   1.03  1961.9±13.05ns        ? B/sec    1.00  1908.7±19.95ns        ? B/sec
   multiply 512                             1.34    350.1±0.66ns        ? B/sec 
   1.30    339.1±0.32ns        ? B/sec    1.00    261.6±0.30ns        ? B/sec
   mutable                                  3.79   1630.3±2.29µs        ? B/sec 
   1.00    429.8±1.01µs        ? B/sec    1.02    436.7±1.19µs        ? B/sec
   mutable extend                           2.45      2.0±0.00ms        ? B/sec 
   1.00    835.7±3.31µs        ? B/sec    1.01    846.4±1.40µs        ? B/sec
   mutable iter extend_from_slice           2.20      2.2±0.00ms        ? B/sec 
   1.00   1007.3±1.72µs        ? B/sec  
   mutable str nulls 1024                   1.16      5.4±0.01ms        ? B/sec 
   1.00      4.7±0.01ms        ? B/sec    1.05      4.9±0.01ms        ? B/sec
   neq scalar Float32                       1.12     71.3±0.12µs        ? B/sec 
   1.23     78.2±0.08µs        ? B/sec    1.00     63.6±0.18µs        ? B/sec
   not                                      1.13   1145.6±1.91ns        ? B/sec 
   1.13   1146.5±1.20ns        ? B/sec    1.00   1014.1±0.90ns        ? B/sec
   subtract 512                             1.02    258.8±1.15ns        ? B/sec 
   1.40    354.9±0.66ns        ? B/sec 
   ```
   
   I may try playing with the baselines to see if I'm missing a black-box 
that's causing an unrealistic optimization...

##########
File path: rust/arrow/src/buffer.rs
##########
@@ -1188,6 +1209,57 @@ impl Drop for SetLenOnDrop<'_> {
     }
 }
 
+/// Creating a `MutableBuffer` instance by setting bits according to the 
boolean values
+impl std::iter::FromIterator<bool> for MutableBuffer {
+    fn from_iter<I>(iter: I) -> Self
+    where
+        I: IntoIterator<Item = bool>,
+    {
+        let mut iterator = iter.into_iter();
+        let mut result = {
+            let byte_capacity: usize = 
iterator.size_hint().0.saturating_add(7) / 8;
+            MutableBuffer::new(byte_capacity)
+        };
+
+        while let Some(value) = iterator.next() {
+            //we have the first bit of the byte
+            let mut exhausted = false;
+            let mut byte_accum: u8 = match value {

Review comment:
       That approach seems to be worse in the micro-benchmark, but better for 
the comparison kernel.
   ```
   critcmp master-67d0c2e3 bool-38d4e395 bool-11dd14af -t 10
   group                                    bool-11dd14af                       
   bool-38d4e395                          master-67d0c2e3
   -----                                    -------------                       
   -------------                          ---------------
   Buffer::from_iter bool                   1.66     10.2±0.02ms        ? B/sec 
   1.00      6.2±0.01ms        ? B/sec  
   MutableBuffer iter bitset                1.00     56.0±0.11ms        ? B/sec 
   1.12     62.5±0.13ms        ? B/sec  
   MutableBuffer::from_iter bool            1.45      8.9±0.02ms        ? B/sec 
   1.00      6.1±0.01ms        ? B/sec  
   add 512                                  1.06    225.1±1.05ns        ? B/sec 
   1.00    212.9±0.56ns        ? B/sec    1.38    293.0±1.54ns        ? B/sec
   array_from_vec 256                       1.00    629.6±2.56ns        ? B/sec 
   1.03    649.4±1.78ns        ? B/sec    1.11    701.7±1.29ns        ? B/sec
   cast int64 to int32 512                  1.00      2.2±0.01µs        ? B/sec 
   1.14      2.5±0.01µs        ? B/sec    1.02      2.3±0.01µs        ? B/sec
   cast time32s to time32ms 512             1.23    901.3±2.65ns        ? B/sec 
   1.01    736.1±1.17ns        ? B/sec    1.00    731.5±1.15ns        ? B/sec
   cast timestamp_ms to timestamp_ns 512    1.17   1211.8±2.48ns        ? B/sec 
   1.00   1036.8±1.71ns        ? B/sec    1.11   1149.7±2.41ns        ? B/sec
   eq Float32                               1.00     84.6±0.08µs        ? B/sec 
   1.05     89.2±0.28µs        ? B/sec    1.49    125.9±0.23µs        ? B/sec
   eq scalar Float32                        1.02     71.2±0.08µs        ? B/sec 
   1.12     78.2±0.18µs        ? B/sec    1.00     69.8±0.07µs        ? B/sec
   from_slice                               1.00    488.4±0.92µs        ? B/sec 
   1.81    883.0±1.48µs        ? B/sec    1.76    861.2±1.55µs        ? B/sec
   from_slice prepared                      1.00    480.7±1.00µs        ? B/sec 
   1.15    552.1±1.44µs        ? B/sec    1.18    564.8±0.85µs        ? B/sec
   gt scalar Float32                        1.24     64.8±0.06µs        ? B/sec 
   1.37     71.5±0.07µs        ? B/sec    1.00     52.4±0.06µs        ? B/sec
   gt_eq Float32                            1.00     72.3±0.26µs        ? B/sec 
   1.04     75.3±0.09µs        ? B/sec    1.72    124.2±0.09µs        ? B/sec
   lt scalar Float32                        1.07     65.5±0.09µs        ? B/sec 
   1.15     70.9±0.13µs        ? B/sec    1.00     61.4±0.09µs        ? B/sec
   lt_eq Float32                            1.00     72.0±0.07µs        ? B/sec 
   1.03     74.2±0.08µs        ? B/sec    1.71    123.1±0.23µs        ? B/sec
   lt_eq scalar Float32                     1.00     58.0±0.10µs        ? B/sec 
   1.05     61.1±0.07µs        ? B/sec    1.52     88.3±0.15µs        ? B/sec
   min nulls 512                            1.11      2.1±0.01µs        ? B/sec 
   1.03  1961.9±13.05ns        ? B/sec    1.00  1908.7±19.95ns        ? B/sec
   multiply 512                             1.34    350.1±0.66ns        ? B/sec 
   1.30    339.1±0.32ns        ? B/sec    1.00    261.6±0.30ns        ? B/sec
   mutable                                  3.79   1630.3±2.29µs        ? B/sec 
   1.00    429.8±1.01µs        ? B/sec    1.02    436.7±1.19µs        ? B/sec
   mutable extend                           2.45      2.0±0.00ms        ? B/sec 
   1.00    835.7±3.31µs        ? B/sec    1.01    846.4±1.40µs        ? B/sec
   mutable iter extend_from_slice           2.20      2.2±0.00ms        ? B/sec 
   1.00   1007.3±1.72µs        ? B/sec  
   mutable str nulls 1024                   1.16      5.4±0.01ms        ? B/sec 
   1.00      4.7±0.01ms        ? B/sec    1.05      4.9±0.01ms        ? B/sec
   neq scalar Float32                       1.12     71.3±0.12µs        ? B/sec 
   1.23     78.2±0.08µs        ? B/sec    1.00     63.6±0.18µs        ? B/sec
   not                                      1.13   1145.6±1.91ns        ? B/sec 
   1.13   1146.5±1.20ns        ? B/sec    1.00   1014.1±0.90ns        ? B/sec
   subtract 512                             1.02    258.8±1.15ns        ? B/sec 
   1.40    354.9±0.66ns        ? B/sec 
   ```
   
   I may try playing with the benchmarks to see if I'm missing a black-box 
that's causing an unrealistic optimization...

##########
File path: rust/arrow/src/buffer.rs
##########
@@ -1188,6 +1209,57 @@ impl Drop for SetLenOnDrop<'_> {
     }
 }
 
+/// Creating a `MutableBuffer` instance by setting bits according to the 
boolean values
+impl std::iter::FromIterator<bool> for MutableBuffer {
+    fn from_iter<I>(iter: I) -> Self
+    where
+        I: IntoIterator<Item = bool>,
+    {
+        let mut iterator = iter.into_iter();
+        let mut result = {
+            let byte_capacity: usize = 
iterator.size_hint().0.saturating_add(7) / 8;
+            MutableBuffer::new(byte_capacity)
+        };
+
+        while let Some(value) = iterator.next() {
+            //we have the first bit of the byte
+            let mut exhausted = false;
+            let mut byte_accum: u8 = match value {

Review comment:
       After making that change, it seems like this could be achieved without 
requiring any changes to the previous Buffer API's: The byte-accumulation loop 
could be exposed as a PackedBoolIterator that wraps an Iter<bool>, exposing it 
as Iter<u8>().  I'm not sure if that would a better or worse approach...

##########
File path: rust/arrow/src/buffer.rs
##########
@@ -1188,6 +1209,57 @@ impl Drop for SetLenOnDrop<'_> {
     }
 }
 
+/// Creating a `MutableBuffer` instance by setting bits according to the 
boolean values
+impl std::iter::FromIterator<bool> for MutableBuffer {
+    fn from_iter<I>(iter: I) -> Self
+    where
+        I: IntoIterator<Item = bool>,
+    {
+        let mut iterator = iter.into_iter();
+        let mut result = {
+            let byte_capacity: usize = 
iterator.size_hint().0.saturating_add(7) / 8;
+            MutableBuffer::new(byte_capacity)
+        };
+
+        while let Some(value) = iterator.next() {
+            //we have the first bit of the byte
+            let mut exhausted = false;
+            let mut byte_accum: u8 = match value {

Review comment:
       After making that change, it seems like this could be achieved without 
requiring any changes to the previous Buffer API's: The byte-accumulation loop 
could be exposed as a ```PackedBoolIterator``` that wraps an 
```Iterator<Item=bool>```, exposing it as ``Iterator<Item=u8>()```.  I'm not 
sure if that would a better or worse approach...




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to