This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 5601b7a8c8 Add `Field::remove()`, `Schema::remove()`, and 
`RecordBatch::remove_column()` APIs (#4959)
5601b7a8c8 is described below

commit 5601b7a8c8fa7ebdd34a7ab0a90aff7958913143
Author: Folyd <[email protected]>
AuthorDate: Thu Oct 26 23:05:08 2023 +0800

    Add `Field::remove()`, `Schema::remove()`, and 
`RecordBatch::remove_column()` APIs (#4959)
    
    * Add `Field::remove()`, `Schema::remove_field()`, and 
`RecordBatch::remove_column()` APIs
    
    * Update arrow-schema/src/fields.rs
    
    Co-authored-by: Raphael Taylor-Davies 
<[email protected]>
    
    * Update arrow-schema/src/schema.rs
    
    Co-authored-by: Raphael Taylor-Davies 
<[email protected]>
    
    * Fix docs testing
    
    * Use `SchemaBuilder` to build the new `Schema`
    
    * Recommend `SchemaBuilder`
    
    * Apply review suggestions
    
    * Update arrow-schema/src/schema.rs
    
    Co-authored-by: Raphael Taylor-Davies 
<[email protected]>
    
    ---------
    
    Co-authored-by: Raphael Taylor-Davies 
<[email protected]>
---
 arrow-array/src/record_batch.rs | 34 ++++++++++++++++++++++++++++++++++
 arrow-schema/src/fields.rs      | 27 ++++++++++++++++++++++++++-
 arrow-schema/src/schema.rs      | 24 ++++++++++++++++++++++++
 3 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/arrow-array/src/record_batch.rs b/arrow-array/src/record_batch.rs
index 1f3e1df847..4e859fdfe7 100644
--- a/arrow-array/src/record_batch.rs
+++ b/arrow-array/src/record_batch.rs
@@ -327,6 +327,40 @@ impl RecordBatch {
         &self.columns[..]
     }
 
+    /// Remove column by index and return it.
+    ///
+    /// Return the `ArrayRef` if the column is removed.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `index`` out of bounds.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use std::sync::Arc;
+    /// use arrow_array::{BooleanArray, Int32Array, RecordBatch};
+    /// use arrow_schema::{DataType, Field, Schema};
+    /// let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
+    /// let bool_array = BooleanArray::from(vec![true, false, false, true, 
true]);
+    /// let schema = Schema::new(vec![
+    ///     Field::new("id", DataType::Int32, false),
+    ///     Field::new("bool", DataType::Boolean, false),
+    /// ]);
+    ///
+    /// let mut batch = RecordBatch::try_new(Arc::new(schema), 
vec![Arc::new(id_array), Arc::new(bool_array)]).unwrap();
+    ///
+    /// let removed_column = batch.remove_column(0);
+    /// 
assert_eq!(removed_column.as_any().downcast_ref::<Int32Array>().unwrap(), 
&Int32Array::from(vec![1, 2, 3, 4, 5]));
+    /// assert_eq!(batch.num_columns(), 1);
+    /// ```
+    pub fn remove_column(&mut self, index: usize) -> ArrayRef {
+        let mut builder = SchemaBuilder::from(self.schema.fields());
+        builder.remove(index);
+        self.schema = Arc::new(builder.finish());
+        self.columns.remove(index)
+    }
+
     /// Return a new RecordBatch where each column is sliced
     /// according to `offset` and `length`
     ///
diff --git a/arrow-schema/src/fields.rs b/arrow-schema/src/fields.rs
index 368ecabbf3..70cb1968e9 100644
--- a/arrow-schema/src/fields.rs
+++ b/arrow-schema/src/fields.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::{ArrowError, Field, FieldRef};
+use crate::{ArrowError, Field, FieldRef, SchemaBuilder};
 use std::ops::Deref;
 use std::sync::Arc;
 
@@ -98,6 +98,31 @@ impl Fields {
                 .zip(other.iter())
                 .all(|(a, b)| Arc::ptr_eq(a, b) || a.contains(b))
     }
+
+    /// Remove a field by index and return it.
+    ///
+    /// # Panic
+    ///
+    /// Panics if `index` is out of bounds.
+    ///
+    /// # Example
+    /// ```
+    /// use arrow_schema::{DataType, Field, Fields};
+    /// let mut fields = Fields::from(vec![
+    ///   Field::new("a", DataType::Boolean, false),
+    ///   Field::new("b", DataType::Int8, false),
+    ///   Field::new("c", DataType::Utf8, false),
+    /// ]);
+    /// assert_eq!(fields.len(), 3);
+    /// assert_eq!(fields.remove(1), Field::new("b", DataType::Int8, 
false).into());
+    /// assert_eq!(fields.len(), 2);
+    /// ```
+    pub fn remove(&mut self, index: usize) -> FieldRef {
+        let mut builder = SchemaBuilder::from(Fields::from(&*self.0));
+        let field = builder.remove(index);
+        *self = builder.finish().fields;
+        field
+    }
 }
 
 impl Default for Fields {
diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs
index c0f58e077a..711e4cb331 100644
--- a/arrow-schema/src/schema.rs
+++ b/arrow-schema/src/schema.rs
@@ -381,6 +381,30 @@ impl Schema {
                 .iter()
                 .all(|(k, v1)| self.metadata.get(k).map(|v2| v1 == 
v2).unwrap_or_default())
     }
+
+    /// Remove field by index and return it. Recommend to use [`SchemaBuilder`]
+    /// if you are looking to remove multiple columns, as this will save 
allocations.
+    ///
+    /// # Panic
+    ///
+    /// Panics if `index` is out of bounds.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use arrow_schema::{DataType, Field, Schema};
+    /// let mut schema = Schema::new(vec![
+    ///   Field::new("a", DataType::Boolean, false),
+    ///   Field::new("b", DataType::Int8, false),
+    ///   Field::new("c", DataType::Utf8, false),
+    /// ]);
+    /// assert_eq!(schema.fields.len(), 3);
+    /// assert_eq!(schema.remove(1), Field::new("b", DataType::Int8, 
false).into());
+    /// assert_eq!(schema.fields.len(), 2);
+    /// ```
+    pub fn remove(&mut self, index: usize) -> FieldRef {
+        self.fields.remove(index)
+    }
 }
 
 impl fmt::Display for Schema {

Reply via email to