This is an automated email from the ASF dual-hosted git repository.
jeffreyvo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new ddde66de91 Add a test for reading nested REE data in json (#9634)
ddde66de91 is described below
commit ddde66de9139af42448d775cc2b49c77570d2386
Author: Andrew Lamb <[email protected]>
AuthorDate: Thu Apr 16 01:54:06 2026 -0400
Add a test for reading nested REE data in json (#9634)
# Which issue does this PR close?
- related to https://github.com/apache/arrow-rs/pull/9497
# Rationale for this change
I (well Codex) found a regression in the changes in
https://github.com/apache/arrow-rs/pull/9497 but they are not covered by
a test. So we should add a test
# What changes are included in this PR?
Add a test for reading nested REE data from json (that currently passes
on main but fails on https://github.com/apache/arrow-rs/pull/9497)
# Are these changes tested?
Only tests
# Are there any user-facing changes?
No this is just a test
Co-authored-by: Jeffrey Vo <[email protected]>
---
arrow-json/src/reader/mod.rs | 39 +++++++++++++++++++++++++++++++++++++++
1 file changed, 39 insertions(+)
diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs
index 62c13c70ed..32ac243283 100644
--- a/arrow-json/src/reader/mod.rs
+++ b/arrow-json/src/reader/mod.rs
@@ -3457,4 +3457,43 @@ mod tests {
assert_eq!(run_array.len(), 3);
assert_eq!(run_array.run_ends().values(), &[2i16, 3]);
}
+
+ #[test]
+ fn test_read_nested_run_end_encoded() {
+ let buf = r#"
+ {"a": "x"}
+ {"a": "x"}
+ {"a": "y"}
+ "#;
+
+ // The outer REE compresses whole rows, while the inner REE compresses
the
+ // repeated string values produced by decoding those rows.
+ let inner_type = DataType::RunEndEncoded(
+ Arc::new(Field::new("run_ends", DataType::Int64, false)),
+ Arc::new(Field::new("values", DataType::Utf8, true)),
+ );
+ let outer_type = DataType::RunEndEncoded(
+ Arc::new(Field::new("run_ends", DataType::Int64, false)),
+ Arc::new(Field::new("values", inner_type, true)),
+ );
+ let schema = Arc::new(Schema::new(vec![Field::new("a", outer_type,
true)]));
+ let batches = do_read(buf, 1024, false, false, schema);
+ assert_eq!(batches.len(), 1);
+
+ let col = batches[0].column(0);
+ let outer = col.as_run::<arrow_array::types::Int64Type>();
+ // Three logical rows compress to two outer runs: ["x", "x"] and ["y"].
+ assert_eq!(outer.len(), 3);
+ assert_eq!(outer.run_ends().values(), &[2, 3]);
+
+ let nested = outer.values().as_run::<arrow_array::types::Int64Type>();
+ // The physical values of the outer REE are themselves a two-element
REE.
+ assert_eq!(nested.len(), 2);
+ assert_eq!(nested.run_ends().values(), &[1, 2]);
+
+ let nested_values = nested.values().as_string::<i32>();
+ assert_eq!(nested_values.len(), 2);
+ assert_eq!(nested_values.value(0), "x");
+ assert_eq!(nested_values.value(1), "y");
+ }
}