alamb commented on code in PR #3906:
URL: https://github.com/apache/arrow-rs/pull/3906#discussion_r1147419834


##########
arrow-array/src/array/boolean_array.rs:
##########
@@ -105,15 +105,10 @@ impl BooleanArray {
     pub fn true_count(&self) -> usize {
         match self.data.nulls() {
             Some(nulls) => {
-                let null_chunks = nulls.inner().bit_chunks();
-                let value_chunks = self.values().bit_chunks();
+                let null_chunks = nulls.inner().bit_chunks().iter_padded();
+                let value_chunks = self.values().bit_chunks().iter_padded();
                 null_chunks
-                    .iter()
-                    .zip(value_chunks.iter())
-                    .chain(std::iter::once((
-                        null_chunks.remainder_bits(),
-                        value_chunks.remainder_bits(),
-                    )))
+                    .zip(value_chunks)

Review Comment:
   drive by consolidation it look like 👍 



##########
arrow-json/src/raw/mod.rs:
##########
@@ -1004,4 +1004,84 @@ mod tests {
         test_time::<Time64MicrosecondType>();
         test_time::<Time64NanosecondType>();
     }
+
+    #[test]
+    fn test_delta_checkpoint() {
+        let json = 
"{\"protocol\":{\"minReaderVersion\":1,\"minWriterVersion\":2}}";
+        let schema = Arc::new(Schema::new(vec![
+            Field::new(
+                "protocol",
+                DataType::Struct(vec![
+                    Field::new("minReaderVersion", DataType::Int32, true),
+                    Field::new("minWriterVersion", DataType::Int32, true),
+                ]),
+                true,
+            ),
+            Field::new(
+                "add",
+                DataType::Struct(vec![Field::new(
+                    "partitionValues",
+                    DataType::Map(
+                        Box::new(Field::new(
+                            "key_value",
+                            DataType::Struct(vec![
+                                Field::new("key", DataType::Utf8, false),
+                                Field::new("value", DataType::Utf8, true),
+                            ]),
+                            false,
+                        )),
+                        false,
+                    ),
+                    false,
+                )]),
+                true,
+            ),
+        ]));
+
+        let batches = do_read(json, 1024, true, schema);
+        assert_eq!(batches.len(), 1);

Review Comment:
   I wonder if asserting the contents with `assert_batches_eq` or similar would 
be helpful here 
   
   Also would it help to verify that the key is not null is enforced. Like that 
this document errors` { add: { partition_values: [ foo: bar, null: baz ] } } `?



##########
arrow-json/src/raw/mod.rs:
##########
@@ -514,8 +514,8 @@ mod tests {
             Field::new(
                 "nested",
                 DataType::Struct(vec![
-                    Field::new("a", DataType::Int32, false),
-                    Field::new("b", DataType::Int32, false),
+                    Field::new("a", DataType::Int32, true),

Review Comment:
   This is because
   
   ```
              {"list": null, "nested": {"a": null}}
   ```
   
   has a `null` for `nested:a`, right? So clearly the field needs to be nullable



##########
arrow-json/src/raw/mod.rs:
##########
@@ -594,7 +594,7 @@ mod tests {
                     "list2",
                     DataType::List(Box::new(Field::new(
                         "element",
-                        DataType::Struct(vec![Field::new("d", DataType::Int32, 
false)]),
+                        DataType::Struct(vec![Field::new("d", DataType::Int32, 
true)]),

Review Comment:
   Is the issue that in
   ```
              {"list": [], "nested": {"a": 1, "b": 2}, "nested_list": {"list2": 
[{"c": 3, "d": 5}, {"c": 4}]}}
   ```
   
   The second list element, `{"c": 4}`,  has an implict null in `d`?



##########
arrow-json/src/raw/mod.rs:
##########
@@ -1004,4 +1004,84 @@ mod tests {
         test_time::<Time64MicrosecondType>();
         test_time::<Time64NanosecondType>();
     }
+
+    #[test]
+    fn test_delta_checkpoint() {
+        let json = 
"{\"protocol\":{\"minReaderVersion\":1,\"minWriterVersion\":2}}";
+        let schema = Arc::new(Schema::new(vec![
+            Field::new(
+                "protocol",
+                DataType::Struct(vec![
+                    Field::new("minReaderVersion", DataType::Int32, true),
+                    Field::new("minWriterVersion", DataType::Int32, true),
+                ]),
+                true,
+            ),
+            Field::new(
+                "add",
+                DataType::Struct(vec![Field::new(
+                    "partitionValues",
+                    DataType::Map(
+                        Box::new(Field::new(
+                            "key_value",
+                            DataType::Struct(vec![
+                                Field::new("key", DataType::Utf8, false),
+                                Field::new("value", DataType::Utf8, true),
+                            ]),
+                            false,
+                        )),
+                        false,
+                    ),
+                    false,
+                )]),
+                true,
+            ),
+        ]));
+
+        let batches = do_read(json, 1024, true, schema);
+        assert_eq!(batches.len(), 1);
+    }
+
+    #[test]
+    fn struct_nullability() {
+        let do_test = |child: DataType| {
+            // Test correctly enforced nullability
+            let non_null = r#"{"foo": {}}"#;
+            let schema = Arc::new(Schema::new(vec![Field::new(
+                "foo",
+                DataType::Struct(vec![Field::new("bar", child, false)]),
+                true,
+            )]));
+            let mut reader = RawReaderBuilder::new(schema.clone())
+                .build(Cursor::new(non_null.as_bytes()))
+                .unwrap();
+            assert!(reader.next().unwrap().is_err()); // Should error as not 
nullable
+
+            // Test nulls in nullable parent can mask nulls in non-nullable 
child

Review Comment:
   Should we also verify that this case errors too?
   
   ```
               let null = r#"{"foo": {bar: null}}"#;



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to