Abhisheklearn12 commented on code in PR #9379:
URL: https://github.com/apache/arrow-rs/pull/9379#discussion_r2789471351
##########
arrow-json/src/reader/mod.rs:
##########
@@ -2857,4 +2860,187 @@ mod tests {
"Json error: whilst decoding field 'a': failed to parse \"a\" as
Int32".to_owned()
);
}
+
+ #[test]
+ fn test_read_run_end_encoded() {
+ let buf = r#"
+ {"a": "x"}
+ {"a": "x"}
+ {"a": "y"}
+ {"a": "y"}
+ {"a": "y"}
+ "#;
+
+ let ree_type = DataType::RunEndEncoded(
+ Arc::new(Field::new("run_ends", DataType::Int32, false)),
+ Arc::new(Field::new("values", DataType::Utf8, true)),
+ );
+ let schema = Arc::new(Schema::new(vec![Field::new("a", ree_type,
true)]));
+ let batches = do_read(buf, 1024, false, false, schema);
+ assert_eq!(batches.len(), 1);
+
+ let col = batches[0].column(0);
+ let run_array = col
Review Comment:
gotcha, will switch to `as_run`
##########
arrow-json/src/reader/mod.rs:
##########
@@ -2857,4 +2860,187 @@ mod tests {
"Json error: whilst decoding field 'a': failed to parse \"a\" as
Int32".to_owned()
);
}
+
+ #[test]
+ fn test_read_run_end_encoded() {
+ let buf = r#"
+ {"a": "x"}
+ {"a": "x"}
+ {"a": "y"}
+ {"a": "y"}
+ {"a": "y"}
+ "#;
+
+ let ree_type = DataType::RunEndEncoded(
+ Arc::new(Field::new("run_ends", DataType::Int32, false)),
+ Arc::new(Field::new("values", DataType::Utf8, true)),
+ );
+ let schema = Arc::new(Schema::new(vec![Field::new("a", ree_type,
true)]));
+ let batches = do_read(buf, 1024, false, false, schema);
+ assert_eq!(batches.len(), 1);
+
+ let col = batches[0].column(0);
+ let run_array = col
+ .as_any()
+
.downcast_ref::<arrow_array::RunArray<arrow_array::types::Int32Type>>()
+ .unwrap();
+
+ // 5 logical values compressed into 2 runs
+ assert_eq!(run_array.len(), 5);
+ assert_eq!(run_array.run_ends().values(), &[2, 5]);
+
+ let values = run_array
+ .values()
+ .as_any()
+ .downcast_ref::<StringArray>()
+ .unwrap();
+ assert_eq!(values.len(), 2);
+ assert_eq!(values.value(0), "x");
+ assert_eq!(values.value(1), "y");
+ }
+
+ #[test]
+ fn test_run_end_encoded_roundtrip() {
+ let run_ends = arrow_array::Int32Array::from(vec![3, 5, 7]);
+ let values = StringArray::from(vec![Some("a"), None, Some("b")]);
+ let ree =
+
arrow_array::RunArray::<arrow_array::types::Int32Type>::try_new(&run_ends,
&values)
+ .unwrap();
+
+ let schema = Arc::new(Schema::new(vec![Field::new(
+ "c",
+ ree.data_type().clone(),
+ true,
+ )]));
+ let batch = RecordBatch::try_new(schema.clone(),
vec![Arc::new(ree)]).unwrap();
+
+ // Write to JSON
+ let mut buf = Vec::new();
+ {
+ let mut writer = crate::writer::LineDelimitedWriter::new(&mut buf);
+ writer.write_batches(&[&batch]).unwrap();
Review Comment:
makes sense, will move it to the writer tests
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]