alamb commented on code in PR #7896:
URL: https://github.com/apache/arrow-rs/pull/7896#discussion_r2202886096


##########
parquet-variant/src/variant/object.rs:
##########
@@ -618,4 +620,112 @@ mod tests {
             ArrowError::InvalidArgumentError(ref msg) if msg.contains("Tried 
to extract byte(s) ..16 from 15-byte buffer")
         ));
     }
+
+    fn test_variant_object_with_count(count: i32, expected_field_id_size: 
OffsetSizeBytes) {
+        let mut builder = VariantBuilder::new();
+        let mut obj = builder.new_object();
+        for val in 0..count {
+            let key = format!("id_{}", val);
+            obj.insert(&key, val);
+        }
+
+        obj.finish().unwrap();
+        let (metadata, value) = builder.finish();
+        let variant = Variant::try_new(&metadata, &value).unwrap();
+
+        if let Variant::Object(obj) = variant {
+            assert_eq!(obj.len(), count as usize);
+            assert_eq!(obj.get(&format!("id_{}", 0)).unwrap(), 
Variant::Int32(0));
+            assert_eq!(
+                obj.get(&format!("id_{}", count - 1)).unwrap(),
+                Variant::Int32(count - 1)
+            );
+
+            let header_byte = first_byte_from_slice(&value).unwrap();
+            let header = VariantObjectHeader::try_new(header_byte).unwrap();
+            assert_eq!(
+                header.field_id_size, expected_field_id_size,
+                "Expected {}-byte field IDs, got {}-byte field IDs",
+                expected_field_id_size as usize, header.field_id_size as usize
+            );
+        } else {
+            panic!("Expected object variant");
+        }
+    }
+
+    #[test]
+    fn test_variant_object_257_elements() {
+        test_variant_object_with_count(2_i32.pow(8) + 1, 
OffsetSizeBytes::Two); // 2^8 + 1, expected 2-byte field IDs
+    }
+
+    #[test]
+    fn test_variant_object_65537_elements() {
+        test_variant_object_with_count(2_i32.pow(16) + 1, 
OffsetSizeBytes::Three);
+        // 2^16 + 1, expected 3-byte field IDs
+    }
+
+    #[test]
+    fn test_variant_object_16777217_elements() {

Review Comment:
   my brief profiling suggests that a large amount of the time was inserting 
the field names in  (basically doing the hash table lookup). I am not sure how 
we would fix that -- you could try pre-populating the field names in the 
builder I suppose



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to