carpecodeum commented on code in PR #7849:
URL: https://github.com/apache/arrow-rs/pull/7849#discussion_r2184228028
##########
parquet-variant/tests/variant_interop.rs:
##########
@@ -275,3 +278,309 @@ fn variant_object_builder() {
}
// TODO: Add tests for object_nested and array_nested
+
+//
+// Validation Fuzzing Tests
+//
+// 1. Generate valid variants using the builder
+// 2. Randomly corrupt bytes in the serialized data
+// 3. Test both validation pathways:
+// - If validation succeeds -> verify infallible APIs don't panic
+// - If validation fails -> verify fallible APIs handle errors gracefully
+//
+
+#[test]
+fn test_validation_fuzz_integration() {
+ let mut rng = StdRng::seed_from_u64(42);
+
+ for _ in 0..1000 {
+ // Generate a random valid variant
+ let (metadata, value) = generate_random_variant(&mut rng);
+
+ // Corrupt it
+ let (corrupted_metadata, corrupted_value) = corrupt_variant_data(&mut
rng, metadata, value);
+
+ // Test the validation workflow
+ test_validation_workflow(&corrupted_metadata, &corrupted_value);
+ }
+}
+
+fn generate_random_variant(rng: &mut StdRng) -> (Vec<u8>, Vec<u8>) {
+ let mut builder = VariantBuilder::new();
+ generate_random_value(rng, &mut builder, 3); // Max depth of 3
+ builder.finish()
+}
+
+fn generate_random_value(rng: &mut StdRng, builder: &mut VariantBuilder,
max_depth: u32) {
+ if max_depth == 0 {
+ // Force simple values at max depth
+ builder.append_value(rng.random::<i32>());
+ return;
+ }
+
+ match rng.random_range(0..15) {
+ 0 => builder.append_value(()),
+ 1 => builder.append_value(rng.random::<bool>()),
+ 2 => builder.append_value(rng.random::<i8>()),
+ 3 => builder.append_value(rng.random::<i16>()),
+ 4 => builder.append_value(rng.random::<i32>()),
+ 5 => builder.append_value(rng.random::<i64>()),
+ 6 => builder.append_value(rng.random::<f32>()),
+ 7 => builder.append_value(rng.random::<f64>()),
+ 8 => {
+ let len = rng.random_range(0..50);
+ let s: String = (0..len).map(|_| rng.random::<char>()).collect();
+ builder.append_value(s.as_str());
+ }
+ 9 => {
+ let len = rng.random_range(0..50);
+ let bytes: Vec<u8> = (0..len).map(|_| rng.random()).collect();
+ builder.append_value(bytes.as_slice());
+ }
+ 10 => {
+ if let Ok(decimal) = VariantDecimal4::try_new(rng.random(),
rng.random_range(0..10)) {
+ builder.append_value(decimal);
+ } else {
+ builder.append_value(0i32);
+ }
+ }
+ 11 => {
+ if let Ok(decimal) = VariantDecimal8::try_new(rng.random(),
rng.random_range(0..19)) {
+ builder.append_value(decimal);
+ } else {
+ builder.append_value(0i64);
+ }
+ }
+ 12 => {
+ if let Ok(decimal) = VariantDecimal16::try_new(rng.random(),
rng.random_range(0..39)) {
+ builder.append_value(decimal);
+ } else {
+ builder.append_value(0i64); // Use i64 instead of i128
+ }
+ }
+ 13 => {
+ // Generate a list
+ let mut list_builder = builder.new_list();
+ let list_len = rng.random_range(0..10);
+
+ for _ in 0..list_len {
+ list_builder.append_value(rng.random::<i32>());
+ }
+ list_builder.finish();
+ }
+ 14 => {
+ // Generate an object
+ let mut object_builder = builder.new_object();
+ let obj_size = rng.random_range(0..10);
+
+ for i in 0..obj_size {
+ let key = format!("field_{i}");
+ object_builder.insert(&key, rng.random::<i32>());
+ }
+ object_builder.finish().unwrap();
+ }
+ _ => unreachable!(),
+ }
+}
+
+fn corrupt_variant_data(
+ rng: &mut StdRng,
+ mut metadata: Vec<u8>,
+ mut value: Vec<u8>,
+) -> (Vec<u8>, Vec<u8>) {
+ // Randomly decide what to corrupt
+ let corrupt_metadata = rng.random_bool(0.3);
+ let corrupt_value = rng.random_bool(0.7);
+
+ if corrupt_metadata && !metadata.is_empty() {
+ let num_corruptions = rng.random_range(1..=(metadata.len().min(5)));
Review Comment:
I changed to exactly 1 corruption is that fine?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]