This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 5601b7a8c8 Add `Field::remove()`, `Schema::remove()`, and
`RecordBatch::remove_column()` APIs (#4959)
5601b7a8c8 is described below
commit 5601b7a8c8fa7ebdd34a7ab0a90aff7958913143
Author: Folyd <[email protected]>
AuthorDate: Thu Oct 26 23:05:08 2023 +0800
Add `Field::remove()`, `Schema::remove()`, and
`RecordBatch::remove_column()` APIs (#4959)
* Add `Field::remove()`, `Schema::remove_field()`, and
`RecordBatch::remove_column()` APIs
* Update arrow-schema/src/fields.rs
Co-authored-by: Raphael Taylor-Davies
<[email protected]>
* Update arrow-schema/src/schema.rs
Co-authored-by: Raphael Taylor-Davies
<[email protected]>
* Fix docs testing
* Use `SchemaBuilder` to build the new `Schema`
* Recommend `SchemaBuilder`
* Apply review suggestions
* Update arrow-schema/src/schema.rs
Co-authored-by: Raphael Taylor-Davies
<[email protected]>
---------
Co-authored-by: Raphael Taylor-Davies
<[email protected]>
---
arrow-array/src/record_batch.rs | 34 ++++++++++++++++++++++++++++++++++
arrow-schema/src/fields.rs | 27 ++++++++++++++++++++++++++-
arrow-schema/src/schema.rs | 24 ++++++++++++++++++++++++
3 files changed, 84 insertions(+), 1 deletion(-)
diff --git a/arrow-array/src/record_batch.rs b/arrow-array/src/record_batch.rs
index 1f3e1df847..4e859fdfe7 100644
--- a/arrow-array/src/record_batch.rs
+++ b/arrow-array/src/record_batch.rs
@@ -327,6 +327,40 @@ impl RecordBatch {
&self.columns[..]
}
+ /// Remove column by index and return it.
+ ///
+ /// Return the `ArrayRef` if the column is removed.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `index`` out of bounds.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::sync::Arc;
+ /// use arrow_array::{BooleanArray, Int32Array, RecordBatch};
+ /// use arrow_schema::{DataType, Field, Schema};
+ /// let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
+ /// let bool_array = BooleanArray::from(vec![true, false, false, true,
true]);
+ /// let schema = Schema::new(vec![
+ /// Field::new("id", DataType::Int32, false),
+ /// Field::new("bool", DataType::Boolean, false),
+ /// ]);
+ ///
+ /// let mut batch = RecordBatch::try_new(Arc::new(schema),
vec![Arc::new(id_array), Arc::new(bool_array)]).unwrap();
+ ///
+ /// let removed_column = batch.remove_column(0);
+ ///
assert_eq!(removed_column.as_any().downcast_ref::<Int32Array>().unwrap(),
&Int32Array::from(vec![1, 2, 3, 4, 5]));
+ /// assert_eq!(batch.num_columns(), 1);
+ /// ```
+ pub fn remove_column(&mut self, index: usize) -> ArrayRef {
+ let mut builder = SchemaBuilder::from(self.schema.fields());
+ builder.remove(index);
+ self.schema = Arc::new(builder.finish());
+ self.columns.remove(index)
+ }
+
/// Return a new RecordBatch where each column is sliced
/// according to `offset` and `length`
///
diff --git a/arrow-schema/src/fields.rs b/arrow-schema/src/fields.rs
index 368ecabbf3..70cb1968e9 100644
--- a/arrow-schema/src/fields.rs
+++ b/arrow-schema/src/fields.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-use crate::{ArrowError, Field, FieldRef};
+use crate::{ArrowError, Field, FieldRef, SchemaBuilder};
use std::ops::Deref;
use std::sync::Arc;
@@ -98,6 +98,31 @@ impl Fields {
.zip(other.iter())
.all(|(a, b)| Arc::ptr_eq(a, b) || a.contains(b))
}
+
+ /// Remove a field by index and return it.
+ ///
+ /// # Panic
+ ///
+ /// Panics if `index` is out of bounds.
+ ///
+ /// # Example
+ /// ```
+ /// use arrow_schema::{DataType, Field, Fields};
+ /// let mut fields = Fields::from(vec![
+ /// Field::new("a", DataType::Boolean, false),
+ /// Field::new("b", DataType::Int8, false),
+ /// Field::new("c", DataType::Utf8, false),
+ /// ]);
+ /// assert_eq!(fields.len(), 3);
+ /// assert_eq!(fields.remove(1), Field::new("b", DataType::Int8,
false).into());
+ /// assert_eq!(fields.len(), 2);
+ /// ```
+ pub fn remove(&mut self, index: usize) -> FieldRef {
+ let mut builder = SchemaBuilder::from(Fields::from(&*self.0));
+ let field = builder.remove(index);
+ *self = builder.finish().fields;
+ field
+ }
}
impl Default for Fields {
diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs
index c0f58e077a..711e4cb331 100644
--- a/arrow-schema/src/schema.rs
+++ b/arrow-schema/src/schema.rs
@@ -381,6 +381,30 @@ impl Schema {
.iter()
.all(|(k, v1)| self.metadata.get(k).map(|v2| v1 ==
v2).unwrap_or_default())
}
+
+ /// Remove field by index and return it. Recommend to use [`SchemaBuilder`]
+ /// if you are looking to remove multiple columns, as this will save
allocations.
+ ///
+ /// # Panic
+ ///
+ /// Panics if `index` is out of bounds.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use arrow_schema::{DataType, Field, Schema};
+ /// let mut schema = Schema::new(vec![
+ /// Field::new("a", DataType::Boolean, false),
+ /// Field::new("b", DataType::Int8, false),
+ /// Field::new("c", DataType::Utf8, false),
+ /// ]);
+ /// assert_eq!(schema.fields.len(), 3);
+ /// assert_eq!(schema.remove(1), Field::new("b", DataType::Int8,
false).into());
+ /// assert_eq!(schema.fields.len(), 2);
+ /// ```
+ pub fn remove(&mut self, index: usize) -> FieldRef {
+ self.fields.remove(index)
+ }
}
impl fmt::Display for Schema {