This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 8e9d7132f3 fix: Return an error on type mismatch rather than panic 
(#4995) (#5341)
8e9d7132f3 is described below

commit 8e9d7132f3f66611a2888f2568ef52dd72cbc10f
Author: Carol (Nichols || Goulding) 
<[email protected]>
AuthorDate: Tue Jan 30 04:30:00 2024 -0500

    fix: Return an error on type mismatch rather than panic (#4995) (#5341)
    
    * fix: Return an error on type mismatch rather than panic (#4995)
    
    * test: ArrowWriter and batch schema mismatch is an error
    
    * docs: Clarify that ArrowWriter expects the batch's schema to match
---
 parquet/src/arrow/arrow_writer/levels.rs | 26 +++++++++++++++++++++++++-
 parquet/src/arrow/arrow_writer/mod.rs    | 29 ++++++++++++++++++++++++++++-
 2 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/parquet/src/arrow/arrow_writer/levels.rs 
b/parquet/src/arrow/arrow_writer/levels.rs
index 4fa04aa150..1f30782241 100644
--- a/parquet/src/arrow/arrow_writer/levels.rs
+++ b/parquet/src/arrow/arrow_writer/levels.rs
@@ -101,6 +101,7 @@ struct LevelContext {
 }
 
 /// A helper to construct [`ArrayLevels`] from a potentially nested [`Field`]
+#[derive(Debug)]
 enum LevelInfoBuilder {
     /// A primitive, leaf array
     Primitive(ArrayLevels),
@@ -132,7 +133,15 @@ enum LevelInfoBuilder {
 impl LevelInfoBuilder {
     /// Create a new [`LevelInfoBuilder`] for the given [`Field`] and parent 
[`LevelContext`]
     fn try_new(field: &Field, parent_ctx: LevelContext, array: &ArrayRef) -> 
Result<Self> {
-        assert_eq!(field.data_type(), array.data_type());
+        if field.data_type() != array.data_type() {
+            return Err(arrow_err!(format!(
+                "Incompatible type. Field '{}' has type {}, array has type {}",
+                field.name(),
+                field.data_type(),
+                array.data_type(),
+            )));
+        }
+
         let is_nullable = field.is_nullable();
 
         match array.data_type() {
@@ -1835,6 +1844,21 @@ mod tests {
         assert_eq!(levels[0], expected_level);
     }
 
+    #[test]
+    fn mismatched_types() {
+        let array = Arc::new(Int32Array::from_iter(0..10)) as ArrayRef;
+        let field = Field::new("item", DataType::Float64, false);
+
+        let err = LevelInfoBuilder::try_new(&field, Default::default(), &array)
+            .unwrap_err()
+            .to_string();
+
+        assert_eq!(
+            err,
+            "Arrow: Incompatible type. Field 'item' has type Float64, array 
has type Int32",
+        );
+    }
+
     fn levels<T: Array + 'static>(field: &Field, array: T) -> LevelInfoBuilder 
{
         let v = Arc::new(array) as ArrayRef;
         LevelInfoBuilder::try_new(field, Default::default(), &v).unwrap()
diff --git a/parquet/src/arrow/arrow_writer/mod.rs 
b/parquet/src/arrow/arrow_writer/mod.rs
index d9771838ad..f3f190c01f 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -183,7 +183,9 @@ impl<W: Write + Send> ArrowWriter<W> {
     ///
     /// If this would cause the current row group to exceed 
[`WriterProperties::max_row_group_size`]
     /// rows, the contents of `batch` will be written to one or more row 
groups such that all but
-    /// the final row group in the file contain 
[`WriterProperties::max_row_group_size`] rows
+    /// the final row group in the file contain 
[`WriterProperties::max_row_group_size`] rows.
+    ///
+    /// This will fail if the `batch`'s schema does not match the writer's 
schema.
     pub fn write(&mut self, batch: &RecordBatch) -> Result<()> {
         if batch.num_rows() == 0 {
             return Ok(());
@@ -2963,4 +2965,29 @@ mod tests {
                 .any(|kv| kv.key.as_str() == ARROW_SCHEMA_META_KEY));
         }
     }
+
+    #[test]
+    fn mismatched_schemas() {
+        let batch_schema = Schema::new(vec![Field::new("count", 
DataType::Int32, false)]);
+        let file_schema = Arc::new(Schema::new(vec![Field::new(
+            "temperature",
+            DataType::Float64,
+            false,
+        )]));
+
+        let batch = RecordBatch::try_new(
+            Arc::new(batch_schema),
+            vec![Arc::new(Int32Array::from(vec![1, 2, 3, 4])) as _],
+        )
+        .unwrap();
+
+        let mut buf = Vec::with_capacity(1024);
+        let mut writer = ArrowWriter::try_new(&mut buf, file_schema.clone(), 
None).unwrap();
+
+        let err = writer.write(&batch).unwrap_err().to_string();
+        assert_eq!(
+            err,
+            "Arrow: Incompatible type. Field 'temperature' has type Float64, 
array has type Int32"
+        );
+    }
 }

Reply via email to