nevi-me commented on a change in pull request #8938:
URL: https://github.com/apache/arrow/pull/8938#discussion_r544609090



##########
File path: rust/arrow/src/json/reader.rs
##########
@@ -888,265 +855,362 @@ impl Decoder {
         ))
     }
 
-    fn build_list_array<T: ArrowPrimitiveType>(
+    /// Build a nested GenericListArray from a list of unnested `Value`s
+    fn build_nested_list_array<OffsetSize: OffsetSizeTrait>(
         &self,
         rows: &[Value],
-        col_name: &str,
-    ) -> Result<ArrayRef>
-    where
-        T::Native: num::NumCast,
-    {
-        let values_builder: PrimitiveBuilder<T> = 
PrimitiveBuilder::new(rows.len());
-        let mut builder = ListBuilder::new(values_builder);
-        for row in rows {
-            if let Some(value) = row.get(&col_name) {
-                // value can be an array or a scalar
-                let vals: Vec<Option<f64>> = if let Value::Number(value) = 
value {
-                    vec![value.as_f64()]
-                } else if let Value::Array(n) = value {
-                    n.iter().map(|v: &Value| v.as_f64()).collect()
-                } else if let Value::Null = value {
-                    vec![None]
-                } else {
-                    return Err(ArrowError::JsonError(
-                        "3Only scalars are currently supported in JSON arrays"
-                            .to_string(),
-                    ));
-                };
-                for val in vals {
-                    match val {
-                        Some(v) => match num::cast::cast(v) {
-                            Some(v) => builder.values().append_value(v)?,
-                            None => builder.values().append_null()?,
-                        },
-                        None => builder.values().append_null()?,
-                    };
-                }
+        list_field: &Field,
+    ) -> Result<ArrayRef> {
+        // build list offsets
+        let mut cur_offset = OffsetSize::zero();
+        let list_len = rows.len();
+        let num_list_bytes = bit_util::ceil(list_len, 8);
+        let mut offsets = Vec::with_capacity(list_len + 1);
+        let mut list_nulls =
+            MutableBuffer::new(num_list_bytes).with_bitset(num_list_bytes, 
false);
+        offsets.push(cur_offset);
+        rows.iter().enumerate().for_each(|(i, v)| {
+            if let Value::Array(a) = v {
+                cur_offset = cur_offset + 
OffsetSize::from_usize(a.len()).unwrap();
+                bit_util::set_bit(list_nulls.data_mut(), i);
+            } else if let Value::Null = v {
+                // value is null, not incremented
+            } else {
+                cur_offset = cur_offset + OffsetSize::one();
             }
-            builder.append(true)?
-        }
-        Ok(Arc::new(builder.finish()))
+            offsets.push(cur_offset);
+        });
+        let valid_len = cur_offset.to_usize().unwrap();
+        let array_data = match list_field.data_type() {
+            DataType::Null => NullArray::new(valid_len).data(),
+            DataType::Boolean => {
+                let num_bytes = bit_util::ceil(valid_len, 8);
+                let mut bool_values =
+                    MutableBuffer::new(num_bytes).with_bitset(num_bytes, 
false);
+                let mut bool_nulls =
+                    MutableBuffer::new(num_bytes).with_bitset(num_bytes, true);
+                let mut curr_index = 0;
+                rows.iter().for_each(|v| {
+                    if let Value::Array(vs) = v {
+                        vs.iter().for_each(|value| {
+                            if let Value::Bool(child) = value {
+                                // if valid boolean, append value
+                                if *child {
+                                    bit_util::set_bit(bool_values.data_mut(), 
curr_index);
+                                }
+                            } else {
+                                // null slot
+                                bit_util::unset_bit(bool_nulls.data_mut(), 
curr_index);
+                            }
+                            curr_index += 1;

Review comment:
       Doesn't work, I have to keep track of the index but I iterate twice. So 
enumerate won't have the same effect.
   At least from looking at the code, that's what I'm seeing. Look at line 895




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to