alamb opened a new issue, #4799:
URL: https://github.com/apache/arrow-rs/issues/4799
**Describe the bug**
When concatenating multiple `RecordBatch`es together, if the batches differ
only in metadata, `concat_batches` raises an error
**To Reproduce**
Run this test:
```diff
diff --git a/arrow-select/src/concat.rs b/arrow-select/src/concat.rs
index 31846ee1fd..045bb313bc 100644
--- a/arrow-select/src/concat.rs
+++ b/arrow-select/src/concat.rs
@@ -142,7 +142,7 @@ mod tests {
use super::*;
use arrow_array::cast::AsArray;
use arrow_schema::{Field, Schema};
- use std::sync::Arc;
+ use std::{sync::Arc, collections::HashMap};
#[test]
fn test_concat_empty_vec() {
@@ -604,6 +604,41 @@ mod tests {
assert!(!new.values().to_data().ptr_eq(&com.values().to_data()));
}
+ #[test]
+ fn concat_record_batches_different_metadata() {
+ let metadata = HashMap::from([("foo".to_string(),
"bar".to_string())]);
+ let field = Field::new("a", DataType::Int32, false);
+
+ let schema1 = Arc::new(Schema::new(vec![
+ field.clone(),
+ ]));
+
+ let batch1 = RecordBatch::try_new(
+ schema1,
+ vec![
+ Arc::new(Int32Array::from(vec![1])),
+ ],
+ )
+ .unwrap();
+
+ let schema2 = Arc::new(Schema::new(vec![
+ field.with_metadata(metadata)
+ ]));
+
+ let batch2 = RecordBatch::try_new(
+ schema2,
+ vec![
+ Arc::new(Int32Array::from(vec![3])),
+ ],
+ )
+ .unwrap();
+
+ // should be able to concat batches with differnet metadata
+ let new_batch = concat_batches(&batch1.schema(), [&batch1,
&batch2]).unwrap();
+ assert_eq!(new_batch.schema(), batch1.schema());
+ assert_eq!(2, new_batch.num_rows());
+ }
+
#[test]
fn concat_record_batches() {
let schema = Arc::new(Schema::new(vec![
```
This fails with this error:
```
thread 'concat::tests::concat_record_batches_different_metadata' panicked at
'called `Result::unwrap()` on an `Err` value: InvalidArgumentError("batches[1]
schema is different with argument schema.\n batches[1] schema:
Schema { fields: [Field { name: \"a\", data_type: Int32, nullable: false,
dict_id: 0, dict_is_ordered: false, metadata: {\"foo\": \"bar\"} }], metadata:
{} },\n argument schema: Schema { fields: [Field { name: \"a\",
data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false,
metadata: {} }], metadata: {} }\n ")',
arrow-select/src/concat.rs:636:78
```
**Expected behavior**
I expect the test to pass
**Additional context**
<!--
Add any other context about the problem here.
-->
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]