This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new c9029c5388 Don't omit schema metadata when removing column (#5328)
c9029c5388 is described below
commit c9029c53883d984bdb7421d0d37ad7c65c8fe95a
Author: Kyle Barron <[email protected]>
AuthorDate: Mon Feb 12 06:57:11 2024 -0500
Don't omit schema metadata when removing column (#5328)
* Don't omit schema metadata when removing column
* Add test
* Update arrow-schema/src/schema.rs
Co-authored-by: Andrew Lamb <[email protected]>
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
arrow-array/src/record_batch.rs | 31 ++++++++++++++++++++++++++++++-
arrow-schema/src/schema.rs | 6 ++++++
2 files changed, 36 insertions(+), 1 deletion(-)
diff --git a/arrow-array/src/record_batch.rs b/arrow-array/src/record_batch.rs
index dab6ae343a..d89020a656 100644
--- a/arrow-array/src/record_batch.rs
+++ b/arrow-array/src/record_batch.rs
@@ -355,7 +355,7 @@ impl RecordBatch {
/// assert_eq!(batch.num_columns(), 1);
/// ```
pub fn remove_column(&mut self, index: usize) -> ArrayRef {
- let mut builder = SchemaBuilder::from(self.schema.fields());
+ let mut builder = SchemaBuilder::from(self.schema.as_ref());
builder.remove(index);
self.schema = Arc::new(builder.finish());
self.columns.remove(index)
@@ -618,6 +618,8 @@ where
#[cfg(test)]
mod tests {
+ use std::collections::HashMap;
+
use super::*;
use crate::{BooleanArray, Int32Array, Int64Array, Int8Array, ListArray,
StringArray};
use arrow_buffer::{Buffer, ToByteSlice};
@@ -1155,4 +1157,31 @@ mod tests {
let size = get_size(reader);
assert_eq!(size, 0);
}
+
+ #[test]
+ fn test_remove_column_maintains_schema_metadata() {
+ let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
+ let bool_array = BooleanArray::from(vec![true, false, false, true,
true]);
+
+ let mut metadata = HashMap::new();
+ metadata.insert("foo".to_string(), "bar".to_string());
+ let schema = Schema::new(vec![
+ Field::new("id", DataType::Int32, false),
+ Field::new("bool", DataType::Boolean, false),
+ ])
+ .with_metadata(metadata);
+
+ let mut batch = RecordBatch::try_new(
+ Arc::new(schema),
+ vec![Arc::new(id_array), Arc::new(bool_array)],
+ )
+ .unwrap();
+
+ let _removed_column = batch.remove_column(0);
+ assert_eq!(batch.schema().metadata().len(), 1);
+ assert_eq!(
+ batch.schema().metadata().get("foo").unwrap().as_str(),
+ "bar"
+ );
+ }
}
diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs
index e547e5df3a..ede158fcf2 100644
--- a/arrow-schema/src/schema.rs
+++ b/arrow-schema/src/schema.rs
@@ -140,6 +140,12 @@ impl From<Fields> for SchemaBuilder {
}
}
+impl From<&Schema> for SchemaBuilder {
+ fn from(value: &Schema) -> Self {
+ Self::from(value.clone())
+ }
+}
+
impl From<Schema> for SchemaBuilder {
fn from(value: Schema) -> Self {
Self {