helgikrs commented on a change in pull request #1166:
URL: https://github.com/apache/arrow-rs/pull/1166#discussion_r784256363



##########
File path: parquet/src/arrow/levels.rs
##########
@@ -1675,4 +1682,95 @@ mod tests {
         };
         assert_eq!(list_level, &expected_level);
     }
+
+    #[test]
+    fn test_list_of_struct() {
+        // define schema
+        let int_field = Field::new("a", DataType::Int32, true);
+        let item_field =
+            Field::new("item", DataType::Struct(vec![int_field.clone()]), 
true);
+        let list_field = Field::new("list", 
DataType::List(Box::new(item_field)), true);
+
+        let int_builder = Int32Builder::new(10);
+        let struct_builder =
+            StructBuilder::new(vec![int_field], vec![Box::new(int_builder)]);
+        let mut list_builder = ListBuilder::new(struct_builder);
+
+        // [{a: 1}], [], null, [null, null], [{a: null}], [{a: 2}]
+        //
+        // [{a: 1}]
+        let values = list_builder.values();
+        values
+            .field_builder::<Int32Builder>(0)
+            .unwrap()
+            .append_value(1)
+            .unwrap();
+        values.append(true).unwrap();
+        list_builder.append(true).unwrap();
+
+        // []
+        list_builder.append(true).unwrap();
+
+        // null
+        list_builder.append(false).unwrap();
+
+        // [null, null]
+        let values = list_builder.values();
+        values
+            .field_builder::<Int32Builder>(0)
+            .unwrap()
+            .append_null()
+            .unwrap();
+        values.append(false).unwrap();
+        values
+            .field_builder::<Int32Builder>(0)
+            .unwrap()
+            .append_null()
+            .unwrap();
+        values.append(false).unwrap();
+        list_builder.append(true).unwrap();
+
+        // [{a: null}]
+        let values = list_builder.values();
+        values
+            .field_builder::<Int32Builder>(0)
+            .unwrap()
+            .append_null()
+            .unwrap();
+        values.append(true).unwrap();
+        list_builder.append(true).unwrap();
+
+        // [{a: 2}]
+        let values = list_builder.values();
+        values
+            .field_builder::<Int32Builder>(0)
+            .unwrap()
+            .append_value(2)
+            .unwrap();
+        values.append(true).unwrap();
+        list_builder.append(true).unwrap();
+
+        let array = Arc::new(list_builder.finish());
+
+        let schema = Arc::new(Schema::new(vec![list_field]));
+
+        let rb = RecordBatch::try_new(schema, vec![array]).unwrap();
+
+        let batch_level = LevelInfo::new(0, rb.num_rows());
+        let list_level =
+            &batch_level.calculate_array_levels(rb.column(0), 
rb.schema().field(0))[0];
+
+        let expected_level = LevelInfo {

Review comment:
       I'm not super confident in this either--it would be great if someone 
with knowledge about the details of this code could chime in.
   
   The definition and repetition levels I compared with what the c++ parquet 
writer produces. I exported the above record batch and used the C++ parquet 
writer to generate a parquet file. I then used `parquet-dump` on the resulting 
file, which produced the following
   ```
   value 1: R:0 D:4 V:1
   value 2: R:0 D:1 V:<null>
   value 3: R:0 D:0 V:<null>
   value 4: R:0 D:2 V:<null>
   value 5: R:1 D:2 V:<null>
   value 6: R:0 D:3 V:<null>
   value 7: R:0 D:4 V:2
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to