sdf-jkl commented on code in PR #9598:
URL: https://github.com/apache/arrow-rs/pull/9598#discussion_r3048457085


##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -4376,4 +4434,167 @@ mod test {
             );
         }
     }
+
+    /// Test extracting a struct with mixed typed and variant fields.
+    /// Fields with VariantType extension metadata should be extracted as 
VariantArrays.
+    #[test]
+    fn test_struct_extraction_with_variant_fields() {
+        // Create test data: [{"id": 1, "name": "Alice", "data": {"score": 
95}},
+        //                   {"id": 2, "name": "Bob", "data": null}]
+        let json_strings = vec![
+            r#"{"id": 1, "name": "Alice", "data": {"score": 95}}"#,
+            r#"{"id": 2, "name": "Bob", "data": null}"#,
+            r#"{"id": 3, "name": null, "data": {"level": 5}}"#,
+        ];
+        let string_array: Arc<dyn Array> = 
Arc::new(StringArray::from(json_strings));
+        let variant_array = json_to_variant(&string_array).unwrap();
+
+        // Request struct where:
+        // - "id" is extracted as Int32
+        // - "name" is extracted as String (Utf8)
+        // - "data" is extracted as Variant (using VariantType extension 
metadata)
+        let struct_fields = Fields::from(vec![
+            Field::new("id", DataType::Int32, true),
+            Field::new("name", DataType::Utf8, true),
+            // Use VariantType extension metadata to request extraction as 
VariantArray.
+            // The data type must be Struct to satisfy 
VariantType::supports_data_type.
+            Field::new("data", DataType::Struct(Fields::empty()), true)
+                .with_extension_type(VariantType),
+        ]);
+        let struct_type = DataType::Struct(struct_fields);
+
+        let options = GetOptions {
+            path: VariantPath::default(),
+            as_type: Some(Arc::new(Field::new("result", struct_type, true))),
+            cast_options: CastOptions::default(),
+        };
+
+        let variant_array_ref = ArrayRef::from(variant_array);
+        let result = variant_get(&variant_array_ref, options).unwrap();
+
+        // Verify the result is a StructArray with 3 fields
+        let struct_result = 
result.as_any().downcast_ref::<StructArray>().unwrap();
+        assert_eq!(struct_result.len(), 3);
+        assert_eq!(struct_result.num_columns(), 3);
+
+        // Verify "id" field (Int32)
+        let id_field = struct_result
+            .column(0)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        assert_eq!(id_field.value(0), 1);
+        assert_eq!(id_field.value(1), 2);
+        assert_eq!(id_field.value(2), 3);
+
+        // Verify "name" field (String/Utf8)
+        let name_field = struct_result
+            .column(1)
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .unwrap();
+        assert_eq!(name_field.value(0), "Alice");
+        assert_eq!(name_field.value(1), "Bob");
+        assert!(name_field.is_null(2)); // null name in row 2
+
+        // Verify "data" field schema has VariantType extension metadata
+        let data_schema_field = struct_result
+            .fields()
+            .iter()
+            .find(|f| f.name() == "data")
+            .unwrap();
+        assert!(
+            data_schema_field
+                .try_extension_type::<VariantType>()
+                .is_ok(),
+            "data field should have VariantType extension metadata"
+        );
+
+        // Verify "data" field (VariantArray)
+        let data_field = struct_result.column(2);
+        // The data field should be a StructArray representing VariantArray's 
internal structure
+        // It has columns: metadata, value (optional), typed_value (optional)
+        let data_as_struct = data_field.as_any().downcast_ref::<StructArray>();
+        assert!(
+            data_as_struct.is_some(),
+            "data field should be a VariantArray (represented as StructArray)"
+        );
+
+        // Verify we can access the variant values
+        let data_variant_array = VariantArray::try_new(data_field).unwrap();
+        assert_eq!(data_variant_array.len(), 3);
+
+        // Row 0: data = {"score": 95}
+        let data0 = data_variant_array.value(0);
+        let obj0 = data0.as_object().expect("row 0 data should be an object");
+        let score = obj0.get("score").expect("row 0 data should have 'score'");
+        assert_eq!(score.as_int16(), Some(95));
+
+        // Row 1: data = null
+        assert!(
+            data_variant_array.is_null(1) || 
matches!(data_variant_array.value(1), Variant::Null)
+        );
+
+        // Row 2: data = {"level": 5}
+        let data2 = data_variant_array.value(2);
+        let obj2 = data2.as_object().expect("row 2 data should be an object");
+        let level = obj2.get("level").expect("row 2 data should have 'level'");
+        assert_eq!(level.as_int8(), Some(5));
+    }
+
+    /// Test that requesting a variant field absent in all rows does not panic.
+    /// Regression test: with_extension_type(VariantType) used to panic on 
NullArray.
+    #[test]
+    fn test_struct_extraction_missing_variant_field_no_panic() {
+        // Data has "id" but NOT "missing_field"
+        let json_strings = vec![r#"{"id": 1}"#, r#"{"id": 2}"#];
+        let string_array: Arc<dyn Array> = 
Arc::new(StringArray::from(json_strings));
+        let variant_array = json_to_variant(&string_array).unwrap();
+
+        // Request struct with a variant field that doesn't exist in any row
+        let struct_fields = Fields::from(vec![
+            Field::new("id", DataType::Int32, true),
+            Field::new("missing_field", DataType::Struct(Fields::empty()), 
true)
+                .with_extension_type(VariantType),
+        ]);
+        let struct_type = DataType::Struct(struct_fields);
+
+        let options = GetOptions {
+            path: VariantPath::default(),
+            as_type: Some(Arc::new(Field::new("result", struct_type, true))),
+            cast_options: CastOptions::default(),
+        };
+
+        let variant_array_ref = ArrayRef::from(variant_array);
+        // This should not panic
+        let result = variant_get(&variant_array_ref, options).unwrap();
+
+        let struct_result = 
result.as_any().downcast_ref::<StructArray>().unwrap();
+        assert_eq!(struct_result.len(), 2);
+        assert_eq!(struct_result.num_columns(), 2);
+
+        // The missing variant field should be all nulls
+        let missing_col = struct_result.column(1);
+        assert_eq!(missing_col.null_count(), missing_col.len());
+
+        // The missing variant field should preserve VariantType extension 
metadata
+        let missing_schema_field = struct_result
+            .fields()
+            .iter()
+            .find(|f| f.name() == "missing_field")
+            .unwrap();
+        assert!(
+            missing_schema_field
+                .try_extension_type::<VariantType>()
+                .is_ok(),
+            "missing variant field should preserve VariantType extension 
metadata"
+        );
+
+        // The missing variant field should be a valid VariantArray
+        let missing_variant = VariantArray::try_new(missing_col);
+        assert!(
+            missing_variant.is_ok(),
+            "missing variant field should be a valid VariantArray"
+        );
+    }

Review Comment:
   We should add a test that distinguishes handling json `Null` and missing 
value.
   ```suggestion
       }
       /// VariantType field extraction should distinguish:
       /// - explicit JSON null  => present Variant::Null
       /// - missing path        => SQL NULL
       #[test]
       fn test_struct_variant_field_distinguishes_missing_and_variant_null() {
           let json_strings = vec![
               r#"{"id": 1, "data": null}"#,
               r#"{"id": 2}"#,
               r#"{"id": 3, "data": {"score": 95}}"#,
           ];
   ```



##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -4376,4 +4434,167 @@ mod test {
             );
         }
     }
+
+    /// Test extracting a struct with mixed typed and variant fields.
+    /// Fields with VariantType extension metadata should be extracted as 
VariantArrays.
+    #[test]
+    fn test_struct_extraction_with_variant_fields() {
+        // Create test data: [{"id": 1, "name": "Alice", "data": {"score": 
95}},
+        //                   {"id": 2, "name": "Bob", "data": null}]
+        let json_strings = vec![
+            r#"{"id": 1, "name": "Alice", "data": {"score": 95}}"#,
+            r#"{"id": 2, "name": "Bob", "data": null}"#,
+            r#"{"id": 3, "name": null, "data": {"level": 5}}"#,

Review Comment:
   I think it's self explanatory, or at least there's no need to show the 
values.
   ```suggestion
           // Create test data
           let json_strings = vec![
               r#"{"id": 1, "name": "Alice", "data": {"score": 95}}"#,
               r#"{"id": 2, "name": "Bob", "data": null}"#,
               r#"{"id": 3, "name": null, "data": {"level": 5}}"#,
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to