This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 8e9d7132f3 fix: Return an error on type mismatch rather than panic
(#4995) (#5341)
8e9d7132f3 is described below
commit 8e9d7132f3f66611a2888f2568ef52dd72cbc10f
Author: Carol (Nichols || Goulding)
<[email protected]>
AuthorDate: Tue Jan 30 04:30:00 2024 -0500
fix: Return an error on type mismatch rather than panic (#4995) (#5341)
* fix: Return an error on type mismatch rather than panic (#4995)
* test: ArrowWriter and batch schema mismatch is an error
* docs: Clarify that ArrowWriter expects the batch's schema to match
---
parquet/src/arrow/arrow_writer/levels.rs | 26 +++++++++++++++++++++++++-
parquet/src/arrow/arrow_writer/mod.rs | 29 ++++++++++++++++++++++++++++-
2 files changed, 53 insertions(+), 2 deletions(-)
diff --git a/parquet/src/arrow/arrow_writer/levels.rs
b/parquet/src/arrow/arrow_writer/levels.rs
index 4fa04aa150..1f30782241 100644
--- a/parquet/src/arrow/arrow_writer/levels.rs
+++ b/parquet/src/arrow/arrow_writer/levels.rs
@@ -101,6 +101,7 @@ struct LevelContext {
}
/// A helper to construct [`ArrayLevels`] from a potentially nested [`Field`]
+#[derive(Debug)]
enum LevelInfoBuilder {
/// A primitive, leaf array
Primitive(ArrayLevels),
@@ -132,7 +133,15 @@ enum LevelInfoBuilder {
impl LevelInfoBuilder {
/// Create a new [`LevelInfoBuilder`] for the given [`Field`] and parent
[`LevelContext`]
fn try_new(field: &Field, parent_ctx: LevelContext, array: &ArrayRef) ->
Result<Self> {
- assert_eq!(field.data_type(), array.data_type());
+ if field.data_type() != array.data_type() {
+ return Err(arrow_err!(format!(
+ "Incompatible type. Field '{}' has type {}, array has type {}",
+ field.name(),
+ field.data_type(),
+ array.data_type(),
+ )));
+ }
+
let is_nullable = field.is_nullable();
match array.data_type() {
@@ -1835,6 +1844,21 @@ mod tests {
assert_eq!(levels[0], expected_level);
}
+ #[test]
+ fn mismatched_types() {
+ let array = Arc::new(Int32Array::from_iter(0..10)) as ArrayRef;
+ let field = Field::new("item", DataType::Float64, false);
+
+ let err = LevelInfoBuilder::try_new(&field, Default::default(), &array)
+ .unwrap_err()
+ .to_string();
+
+ assert_eq!(
+ err,
+ "Arrow: Incompatible type. Field 'item' has type Float64, array
has type Int32",
+ );
+ }
+
fn levels<T: Array + 'static>(field: &Field, array: T) -> LevelInfoBuilder
{
let v = Arc::new(array) as ArrayRef;
LevelInfoBuilder::try_new(field, Default::default(), &v).unwrap()
diff --git a/parquet/src/arrow/arrow_writer/mod.rs
b/parquet/src/arrow/arrow_writer/mod.rs
index d9771838ad..f3f190c01f 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -183,7 +183,9 @@ impl<W: Write + Send> ArrowWriter<W> {
///
/// If this would cause the current row group to exceed
[`WriterProperties::max_row_group_size`]
/// rows, the contents of `batch` will be written to one or more row
groups such that all but
- /// the final row group in the file contain
[`WriterProperties::max_row_group_size`] rows
+ /// the final row group in the file contain
[`WriterProperties::max_row_group_size`] rows.
+ ///
+ /// This will fail if the `batch`'s schema does not match the writer's
schema.
pub fn write(&mut self, batch: &RecordBatch) -> Result<()> {
if batch.num_rows() == 0 {
return Ok(());
@@ -2963,4 +2965,29 @@ mod tests {
.any(|kv| kv.key.as_str() == ARROW_SCHEMA_META_KEY));
}
}
+
+ #[test]
+ fn mismatched_schemas() {
+ let batch_schema = Schema::new(vec![Field::new("count",
DataType::Int32, false)]);
+ let file_schema = Arc::new(Schema::new(vec![Field::new(
+ "temperature",
+ DataType::Float64,
+ false,
+ )]));
+
+ let batch = RecordBatch::try_new(
+ Arc::new(batch_schema),
+ vec![Arc::new(Int32Array::from(vec![1, 2, 3, 4])) as _],
+ )
+ .unwrap();
+
+ let mut buf = Vec::with_capacity(1024);
+ let mut writer = ArrowWriter::try_new(&mut buf, file_schema.clone(),
None).unwrap();
+
+ let err = writer.write(&batch).unwrap_err().to_string();
+ assert_eq!(
+ err,
+ "Arrow: Incompatible type. Field 'temperature' has type Float64,
array has type Int32"
+ );
+ }
}