ding-young commented on code in PR #7627: URL: https://github.com/apache/arrow-rs/pull/7627#discussion_r2281500608
########## arrow-row/src/lib.rs: ########## @@ -2148,6 +2172,166 @@ mod tests { back[0].to_data().validate_full().unwrap(); } + #[test] + fn test_dictionary_in_struct() { + let ty = DataType::Struct( + vec![Field::new_dictionary( + "foo", + DataType::Int32, + DataType::Int32, + false, + )] + .into(), + ); + // Test Case 1. empty array + let s = arrow_array::new_empty_array(&ty); + + let sort_fields = vec![SortField::new(s.data_type().clone())]; + let converter = RowConverter::new(sort_fields).unwrap(); + let r = converter.convert_columns(&[Arc::clone(&s)]).unwrap(); + + let back = converter.convert_rows(&r).unwrap(); + let [s2] = back.try_into().unwrap(); + + // RowConverter flattens Dictionary + // s.ty = Struct(foo Dictionary(Int32, Int32)), s2.ty = Struct(foo Int32) + assert_ne!(&s.data_type(), &s2.data_type()); + s2.to_data().validate_full().unwrap(); + assert_eq!(s.len(), s2.len()); + + // Test Case 2. None empty array + let builder = PrimitiveDictionaryBuilder::<Int32Type, Int32Type>::new(); + let mut struct_builder = StructBuilder::new( + vec![Field::new_dictionary( + "foo", + DataType::Int32, + DataType::Int32, + false, + )], + vec![Box::new(builder)], + ); + let dict_builder = struct_builder + .field_builder::<PrimitiveDictionaryBuilder<Int32Type, Int32Type>>(0) + .unwrap(); + + dict_builder.append(0).unwrap(); + dict_builder.append(1).unwrap(); + dict_builder.append(0).unwrap(); + dict_builder.append(-1).unwrap(); + + for _ in 0..4 { + struct_builder.append(true); + } + + let s = Arc::new(struct_builder.finish()) as ArrayRef; + let sort_fields = vec![SortField::new(s.data_type().clone())]; + let converter = RowConverter::new(sort_fields).unwrap(); + let r = converter.convert_columns(&[Arc::clone(&s)]).unwrap(); + + let back = converter.convert_rows(&r).unwrap(); + let [s2] = back.try_into().unwrap(); + + // RowConverter flattens Dictionary + // s.ty = Struct(foo Dictionary(Int32, Int32)), s2.ty = Struct(foo Int32) + assert_ne!(&s.data_type(), &s2.data_type()); + s2.to_data().validate_full().unwrap(); + + // Check if the logical data remains the same + let s1_struct = s.as_struct(); + let s2_struct = s2.as_struct(); + let s1_0 = s1_struct.column(0); + let s2_0 = s2_struct.column(0); + let s1_idx_0 = s1_0.as_dictionary::<Int32Type>(); + let s2_idx_0 = s2_0.as_primitive::<Int32Type>(); + let keys = s1_idx_0.keys(); + let values = s1_idx_0.values().as_primitive::<Int32Type>(); + + for i in 0..keys.len() { + let dict_index = keys.value(i) as usize; + assert_eq!(values.value(dict_index), s2_idx_0.value(i)); + } + } + + #[test] + fn test_list_of_primitive_dictionary() { + let mut builder = + ListBuilder::<PrimitiveDictionaryBuilder<Int32Type, Int32Type>>::default(); + // List[0] = [2, 3, 0, null, 5, 3, -1 (dict)] + builder.values().append(2).unwrap(); + builder.values().append(3).unwrap(); + builder.values().append(0).unwrap(); + builder.values().append_null(); + builder.values().append(5).unwrap(); + builder.values().append(3).unwrap(); + builder.values().append(-1).unwrap(); + builder.append(true); + // List[1] = null + builder.append(false); + // List[2] = [7, 0, 8 (dict)] + builder.values().append(7).unwrap(); + builder.values().append(0).unwrap(); + builder.values().append(8).unwrap(); + builder.append(true); + + let a = Arc::new(builder.finish()) as ArrayRef; + let data_type = a.data_type().clone(); + + let field = SortField::new(data_type.clone()); + let converter = RowConverter::new(vec![field]).unwrap(); + let rows = converter.convert_columns(&[Arc::clone(&a)]).unwrap(); + + let back = converter.convert_rows(&rows).unwrap(); + assert_eq!(back.len(), 1); + let [a2] = back.try_into().unwrap(); + + // RowConverter flattens Dictionary + // a.ty: List(Dictionary(Int32, Int32)), a2.ty: List(Int32) + assert_ne!(&a.data_type(), &a2.data_type()); + + a2.to_data().validate_full().unwrap(); + + let a2_list = a2.as_list::<i32>(); + let a1_list = a.as_list::<i32>(); + + // Check if the logical data remains the same + // List[0] = [2, 3, 0, null, 5, 3, -1] + let a1_0 = a1_list.value(0); + let a2_0 = a2_list.value(0); + let a1_idx_0 = a1_0.as_dictionary::<Int32Type>(); + let a2_idx_0 = a2_0.as_primitive::<Int32Type>(); + let keys = a1_idx_0.keys(); + let values = a1_idx_0.values().as_primitive::<Int32Type>(); + + for i in 0..keys.len() { + if keys.is_null(i) { + assert!(a2_idx_0.is_null(i)); + } else { + let dict_index = keys.value(i) as usize; + assert_eq!(values.value(dict_index), a2_idx_0.value(i)); + } + } Review Comment: Here ########## arrow-row/src/lib.rs: ########## @@ -2148,6 +2172,166 @@ mod tests { back[0].to_data().validate_full().unwrap(); } + #[test] + fn test_dictionary_in_struct() { + let ty = DataType::Struct( + vec![Field::new_dictionary( + "foo", + DataType::Int32, + DataType::Int32, + false, + )] + .into(), + ); + // Test Case 1. empty array + let s = arrow_array::new_empty_array(&ty); + + let sort_fields = vec![SortField::new(s.data_type().clone())]; + let converter = RowConverter::new(sort_fields).unwrap(); + let r = converter.convert_columns(&[Arc::clone(&s)]).unwrap(); + + let back = converter.convert_rows(&r).unwrap(); + let [s2] = back.try_into().unwrap(); + + // RowConverter flattens Dictionary + // s.ty = Struct(foo Dictionary(Int32, Int32)), s2.ty = Struct(foo Int32) + assert_ne!(&s.data_type(), &s2.data_type()); + s2.to_data().validate_full().unwrap(); + assert_eq!(s.len(), s2.len()); + + // Test Case 2. None empty array + let builder = PrimitiveDictionaryBuilder::<Int32Type, Int32Type>::new(); + let mut struct_builder = StructBuilder::new( + vec![Field::new_dictionary( + "foo", + DataType::Int32, + DataType::Int32, + false, + )], + vec![Box::new(builder)], + ); + let dict_builder = struct_builder + .field_builder::<PrimitiveDictionaryBuilder<Int32Type, Int32Type>>(0) + .unwrap(); + + dict_builder.append(0).unwrap(); + dict_builder.append(1).unwrap(); + dict_builder.append(0).unwrap(); + dict_builder.append(-1).unwrap(); + + for _ in 0..4 { + struct_builder.append(true); + } + + let s = Arc::new(struct_builder.finish()) as ArrayRef; + let sort_fields = vec![SortField::new(s.data_type().clone())]; + let converter = RowConverter::new(sort_fields).unwrap(); + let r = converter.convert_columns(&[Arc::clone(&s)]).unwrap(); + + let back = converter.convert_rows(&r).unwrap(); + let [s2] = back.try_into().unwrap(); + + // RowConverter flattens Dictionary + // s.ty = Struct(foo Dictionary(Int32, Int32)), s2.ty = Struct(foo Int32) + assert_ne!(&s.data_type(), &s2.data_type()); + s2.to_data().validate_full().unwrap(); + + // Check if the logical data remains the same Review Comment: And here too -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org