tustvold commented on code in PR #1746:
URL: https://github.com/apache/arrow-rs/pull/1746#discussion_r883099251


##########
parquet/src/arrow/levels.rs:
##########
@@ -1039,210 +649,200 @@ mod tests {
         // If the first values of a list are null due to a parent, we have to 
still account for them
         // while indexing, because they would affect the way the child is 
indexed
         // i.e. in the above example, we have to know that [0, 1] has to be 
skipped
-        let parent_levels = LevelInfo {
-            definition: vec![0, 1, 0, 1, 1],
-            repetition: None,
-            array_offsets: vec![0, 1, 2, 3, 4, 5],
-            array_mask: vec![false, true, false, true, true],
-            max_definition: 1,
-            level_type: LevelType::Struct(true),
-            offset: 0,
-            length: 5,
-        };
-        let array_offsets = vec![0, 2, 2, 4, 8, 11];
-        let array_mask = vec![true, false, true, true, true];
+        let leaf = Int32Array::from_iter(0..11);
+        let leaf_field = Field::new("leaf", DataType::Int32, false);
+
+        let list_type = DataType::List(Box::new(leaf_field.clone()));
+        let list = ArrayData::builder(list_type.clone())
+            .len(5)
+            .add_child_data(leaf.data().clone())
+            .add_buffer(Buffer::from_iter([0_i32, 2, 2, 4, 8, 11]))
+            .build()
+            .unwrap();
+
+        let list = make_array(list);
+        let list_field = Field::new("list", list_type, true);
+
+        let struct_array =
+            StructArray::from((vec![(list_field, list)], 
Buffer::from([0b00011010])));
+        let array = Arc::new(struct_array) as ArrayRef;
+
+        let struct_field = Field::new("struct", array.data_type().clone(), 
true);
+
+        let levels = calculate_array_levels(&array, &struct_field).unwrap();
+        assert_eq!(levels.len(), 1);
 
-        let levels = parent_levels.calculate_child_levels(
-            array_offsets.clone(),
-            array_mask,
-            LevelType::List(true),
-        );
         let expected_levels = LevelInfo {
-            // 0 1 [2] are 0 (not defined at level 1)
-            // [2] is 1, but has 0 slots so is not populated (defined at level 
1 only)
-            // 2 3 [4] are 0
-            // 4 5 6 7 [8] are 1 (defined at level 1 only)
-            // 8 9 10 [11] are 2 (defined at both levels)
-            definition: vec![0, 0, 1, 0, 0, 3, 3, 3, 3, 3, 3, 3],
-            repetition: Some(vec![0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1]),
-            array_offsets,
-            array_mask: vec![
-                false, false, false, false, false, true, true, true, true, 
true, true,
-                true,
-            ],
-            max_definition: 3,
-            level_type: LevelType::List(true),
-            offset: 0,
-            length: 11,
+            def_levels: Some(vec![0, 2, 0, 3, 3, 3, 3, 3, 3, 3]),
+            rep_levels: Some(vec![0, 0, 0, 0, 1, 1, 1, 0, 1, 1]),
+            non_null_indices: (4..11).collect(),
+            max_def_level: 3,
+            max_rep_level: 1,
         };
-        assert_eq!(&levels.definition, &expected_levels.definition);
-        assert_eq!(&levels.repetition, &expected_levels.repetition);
-        assert_eq!(&levels.array_offsets, &expected_levels.array_offsets);
-        assert_eq!(&levels.max_definition, &expected_levels.max_definition);
-        assert_eq!(&levels.level_type, &expected_levels.level_type);
-        assert_eq!(&levels, &expected_levels);
-
-        // nested lists (using previous test)
-        let nested_parent_levels = levels;
-        let array_offsets = vec![0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22];
-        let array_mask = vec![
-            true, true, true, true, true, true, true, true, true, true, true,
-        ];
-        let levels = nested_parent_levels.calculate_child_levels(
-            array_offsets.clone(),
-            array_mask,
-            LevelType::List(true),
-        );
+
+        assert_eq!(&levels[0], &expected_levels);
+
+        // nested lists
+
+        // 0: [[100, 101], [102, 103]]
+        // 1: []
+        // 2: [[104, 105], [106, 107]]
+        // 3: [[108, 109], [110, 111], [112, 113], [114, 115]]
+        // 4: [[116, 117], [118, 119], [120, 121]]
+
+        let leaf = Int32Array::from_iter(100..122);
+        let leaf_field = Field::new("leaf", DataType::Int32, true);
+
+        let l1_type = DataType::List(Box::new(leaf_field.clone()));
+        let offsets = Buffer::from_iter([0_i32, 2, 4, 6, 8, 10, 12, 14, 16, 
18, 20, 22]);
+        let l1 = ArrayData::builder(l1_type.clone())
+            .len(11)
+            .add_child_data(leaf.data().clone())
+            .add_buffer(offsets)
+            .build()
+            .unwrap();
+
+        let l1_field = Field::new("l1", l1_type, true);
+        let l2_type = DataType::List(Box::new(l1_field.clone()));
+        let l2 = ArrayData::builder(l2_type.clone())
+            .len(5)
+            .add_child_data(l1)
+            .add_buffer(Buffer::from_iter([0, 2, 2, 4, 8, 11]))
+            .build()
+            .unwrap();
+
+        let l2 = make_array(l2);
+        let l2_field = Field::new("l2", l2.data_type().clone(), true);
+
+        let levels = calculate_array_levels(&l2, &l2_field).unwrap();
+        assert_eq!(levels.len(), 1);
+
         let expected_levels = LevelInfo {
-            // (def: 0) 0 1 [2] are 0 (take parent)
-            // (def: 0) 2 3 [4] are 0 (take parent)
-            // (def: 0) 4 5 [6] are 0 (take parent)
-            // (def: 0) 6 7 [8] are 0 (take parent)
-            // (def: 1) 8 9 [10] are 1 (take parent)
-            // (def: 1) 10 11 [12] are 1 (take parent)
-            // (def: 1) 12 23 [14] are 1 (take parent)
-            // (def: 1) 14 15 [16] are 1 (take parent)
-            // (def: 2) 16 17 [18] are 2 (defined at all levels)
-            // (def: 2) 18 19 [20] are 2 (defined at all levels)
-            // (def: 2) 20 21 [22] are 2 (defined at all levels)
-            //
-            // 0 1 [2] are 0 (not defined at level 1)
-            // [2] is 1, but has 0 slots so is not populated (defined at level 
1 only)
-            // 2 3 [4] are 0
-            // 4 5 6 7 [8] are 1 (defined at level 1 only)
-            // 8 9 10 [11] are 2 (defined at both levels)
-            //
-            // 0: [[100, 101], [102, 103]]
-            // 1: []
-            // 2: [[104, 105], [106, 107]]
-            // 3: [[108, 109], [110, 111], [112, 113], [114, 115]]
-            // 4: [[116, 117], [118, 119], [120, 121]]
-            definition: vec![
-                0, 0, 0, 0, 1, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
5, 5,

Review Comment:
   Again, these definition levels are wrong. In the structure above the only 
non-max definition level will come from the empty slice.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to