alamb commented on code in PR #7627:
URL: https://github.com/apache/arrow-rs/pull/7627#discussion_r2283155956


##########
arrow-row/src/lib.rs:
##########
@@ -1668,8 +1668,21 @@ unsafe fn decode_column(
             rows.iter_mut().for_each(|row| *row = &row[1..]);
             let children = converter.convert_raw(rows, validate_utf8)?;
 
-            let child_data = children.iter().map(|c| c.to_data()).collect();
-            let builder = ArrayDataBuilder::new(field.data_type.clone())
+            let child_data: Vec<ArrayData> = children.iter().map(|c| 
c.to_data()).collect();
+            // Since RowConverter flattens certian data types (i.e. 
Dictionary),
+            // we need to use updated data type instead of original field
+            let corrected_fields: Vec<Field> = match &field.data_type {
+                DataType::Struct(struct_fields) => struct_fields
+                    .iter()
+                    .zip(child_data.iter())
+                    .map(|(orig_field, child_array)| {
+                        rebuild_field_with_child_type(orig_field, child_array)

Review Comment:
   I think this is the same as using `Field::with_data_type` which I think is 
more explicit
   
   ```suggestion
                           
orig_field.clone().with_data_type(child_array.data_type().clone())
   ```



##########
arrow-row/src/lib.rs:
##########
@@ -2148,6 +2172,166 @@ mod tests {
         back[0].to_data().validate_full().unwrap();
     }
 
+    #[test]
+    fn test_dictionary_in_struct() {
+        let ty = DataType::Struct(
+            vec![Field::new_dictionary(
+                "foo",
+                DataType::Int32,
+                DataType::Int32,
+                false,
+            )]
+            .into(),
+        );
+        // Test Case 1. empty array
+        let s = arrow_array::new_empty_array(&ty);
+
+        let sort_fields = vec![SortField::new(s.data_type().clone())];
+        let converter = RowConverter::new(sort_fields).unwrap();
+        let r = converter.convert_columns(&[Arc::clone(&s)]).unwrap();
+
+        let back = converter.convert_rows(&r).unwrap();
+        let [s2] = back.try_into().unwrap();
+
+        // RowConverter flattens Dictionary
+        // s.ty = Struct(foo Dictionary(Int32, Int32)), s2.ty = Struct(foo 
Int32)
+        assert_ne!(&s.data_type(), &s2.data_type());
+        s2.to_data().validate_full().unwrap();
+        assert_eq!(s.len(), s2.len());
+
+        // Test Case 2. None empty array
+        let builder = PrimitiveDictionaryBuilder::<Int32Type, 
Int32Type>::new();
+        let mut struct_builder = StructBuilder::new(
+            vec![Field::new_dictionary(
+                "foo",
+                DataType::Int32,
+                DataType::Int32,
+                false,
+            )],
+            vec![Box::new(builder)],
+        );
+        let dict_builder = struct_builder
+            .field_builder::<PrimitiveDictionaryBuilder<Int32Type, 
Int32Type>>(0)

Review Comment:
   Can we please change this so it uses different types for the keys and values 
-- otherwise it is hard to keep track of the key and values in the code below



##########
arrow-row/src/lib.rs:
##########
@@ -1668,8 +1668,21 @@ unsafe fn decode_column(
             rows.iter_mut().for_each(|row| *row = &row[1..]);
             let children = converter.convert_raw(rows, validate_utf8)?;
 
-            let child_data = children.iter().map(|c| c.to_data()).collect();
-            let builder = ArrayDataBuilder::new(field.data_type.clone())
+            let child_data: Vec<ArrayData> = children.iter().map(|c| 
c.to_data()).collect();
+            // Since RowConverter flattens certian data types (i.e. 
Dictionary),

Review Comment:
   ```suggestion
               // Since RowConverter flattens certain data types (i.e. 
Dictionary),
   ```



##########
arrow-row/src/list.rs:
##########
@@ -179,7 +182,20 @@ pub unsafe fn decode<O: OffsetSizeTrait>(
 
     let child_data = child[0].to_data();
 
-    let builder = ArrayDataBuilder::new(field.data_type.clone())
+    // Since RowConverter flattens certian data types (i.e. Dictionary),

Review Comment:
   ```suggestion
       // Since RowConverter flattens certain data types (i.e. Dictionary),
   ```



##########
arrow-row/src/lib.rs:
##########
@@ -2148,6 +2172,166 @@ mod tests {
         back[0].to_data().validate_full().unwrap();
     }
 
+    #[test]
+    fn test_dictionary_in_struct() {
+        let ty = DataType::Struct(
+            vec![Field::new_dictionary(
+                "foo",
+                DataType::Int32,
+                DataType::Int32,
+                false,
+            )]
+            .into(),
+        );
+        // Test Case 1. empty array
+        let s = arrow_array::new_empty_array(&ty);
+
+        let sort_fields = vec![SortField::new(s.data_type().clone())];
+        let converter = RowConverter::new(sort_fields).unwrap();
+        let r = converter.convert_columns(&[Arc::clone(&s)]).unwrap();
+
+        let back = converter.convert_rows(&r).unwrap();
+        let [s2] = back.try_into().unwrap();
+
+        // RowConverter flattens Dictionary
+        // s.ty = Struct(foo Dictionary(Int32, Int32)), s2.ty = Struct(foo 
Int32)
+        assert_ne!(&s.data_type(), &s2.data_type());
+        s2.to_data().validate_full().unwrap();
+        assert_eq!(s.len(), s2.len());
+
+        // Test Case 2. None empty array

Review Comment:
   Can you please break test case 2 into its own `#test` function so it is 
easier to see?



##########
arrow-row/src/lib.rs:
##########
@@ -1863,6 +1887,64 @@ mod tests {
         back[0].to_data().validate_full().unwrap();
     }
 
+    #[test]
+    fn test_dictionary_in_struct() {
+        let ty = DataType::Struct(
+            vec![Field::new_dictionary(
+                "foo",
+                DataType::Int32,
+                DataType::Int32,
+                false,
+            )]
+            .into(),
+        );
+        let s = arrow_array::new_empty_array(&ty);
+
+        let sort_fields = vec![SortField::new(s.data_type().clone())];
+        let converter = RowConverter::new(sort_fields).unwrap();
+        let r = converter.convert_columns(&[Arc::clone(&s)]).unwrap();
+
+        let back = converter.convert_rows(&r).unwrap();
+        let [s2] = back.try_into().unwrap();
+
+        // RowConverter flattens Dictionary
+        // s.ty = Struct(foo Dictionary(Int32, Int32)), s2.ty = Struct(foo 
Int32)
+        assert_ne!(&s.data_type(), &s2.data_type());
+
+        s2.to_data().validate_full().unwrap();

Review Comment:
   I don't see an assert that the values came through correctly -- it is only 
checking the length)



##########
arrow-row/src/lib.rs:
##########
@@ -1668,8 +1668,21 @@ unsafe fn decode_column(
             rows.iter_mut().for_each(|row| *row = &row[1..]);
             let children = converter.convert_raw(rows, validate_utf8)?;
 
-            let child_data = children.iter().map(|c| c.to_data()).collect();
-            let builder = ArrayDataBuilder::new(field.data_type.clone())
+            let child_data: Vec<ArrayData> = children.iter().map(|c| 
c.to_data()).collect();
+            // Since RowConverter flattens certian data types (i.e. 
Dictionary),
+            // we need to use updated data type instead of original field
+            let corrected_fields: Vec<Field> = match &field.data_type {
+                DataType::Struct(struct_fields) => struct_fields
+                    .iter()
+                    .zip(child_data.iter())
+                    .map(|(orig_field, child_array)| {
+                        rebuild_field_with_child_type(orig_field, child_array)

Review Comment:
   The same comment applies below as well



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to