This is an automated email from the ASF dual-hosted git repository.
xushiyan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hudi-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 72b376e build(deps): upgrade apache-avro to v0.21.0 (#503)
72b376e is described below
commit 72b376e4413c8a70598dda4c17bdaf60aff95105
Author: Shiyan Xu <[email protected]>
AuthorDate: Thu Jan 1 15:37:50 2026 -0600
build(deps): upgrade apache-avro to v0.21.0 (#503)
Update apache-avro and apache-avro-derive from v0.17.0 to v0.21.0.
---
Cargo.toml | 4 +--
.../core/src/avro_to_arrow/arrow_array_reader.rs | 11 ++++----
crates/core/src/avro_to_arrow/schema.rs | 4 ++-
crates/core/src/file_group/log_file/avro.rs | 30 +++++++++++++++++++++-
4 files changed, 40 insertions(+), 9 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index 2fb01c9..52133a7 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -51,8 +51,8 @@ object_store = { version = "~0.11.2", features = ["aws",
"azure", "gcp"] }
parquet = { version = "~54.2.0", features = ["async", "object_store"] }
# avro
-apache-avro = { version = "~0.17.0", features = ["derive"] }
-apache-avro-derive = { version = "~0.17.0" }
+apache-avro = { version = "~0.21.0", features = ["derive"] }
+apache-avro-derive = { version = "~0.21.0" }
# datafusion
datafusion = { version = "~46.0.0" }
diff --git a/crates/core/src/avro_to_arrow/arrow_array_reader.rs
b/crates/core/src/avro_to_arrow/arrow_array_reader.rs
index 4ada3ad..10c3abc 100644
--- a/crates/core/src/avro_to_arrow/arrow_array_reader.rs
+++ b/crates/core/src/avro_to_arrow/arrow_array_reader.rs
@@ -21,6 +21,7 @@ use crate::avro_to_arrow::to_arrow_schema;
use crate::error::{CoreError, Result};
use apache_avro::schema::RecordSchema;
use apache_avro::{
+ error::Details as AvroDetails,
schema::{Schema as AvroSchema, SchemaKind},
types::Value,
AvroResult, Error as AvroError,
@@ -856,11 +857,11 @@ fn resolve_string(v: &Value) ->
ArrowResult<Option<String>> {
match v {
Value::String(s) => Ok(Some(s.clone())),
Value::Bytes(bytes) => String::from_utf8(bytes.to_vec())
- .map_err(AvroError::ConvertToUtf8)
+ .map_err(|e| AvroError::new(AvroDetails::ConvertToUtf8(e)))
.map(Some),
Value::Enum(_, s) => Ok(Some(s.clone())),
Value::Null => Ok(None),
- other => Err(AvroError::GetString(other.into())),
+ other => Err(AvroError::new(AvroDetails::GetString(other.clone()))),
}
.map_err(|e| SchemaError(format!("expected resolvable string : {e:?}")))
}
@@ -869,7 +870,7 @@ fn resolve_u8(v: &Value) -> AvroResult<u8> {
let int = match v {
Value::Int(n) => Ok(Value::Int(*n)),
Value::Long(n) => Ok(Value::Int(*n as i32)),
- other => Err(AvroError::GetU8(other.into())),
+ other => Err(AvroError::new(AvroDetails::GetU8(other.clone()))),
}?;
if let Value::Int(n) = int {
if n >= 0 && n <= u8::MAX as i32 {
@@ -877,7 +878,7 @@ fn resolve_u8(v: &Value) -> AvroResult<u8> {
}
}
- Err(AvroError::GetU8(int.into()))
+ Err(AvroError::new(AvroDetails::GetU8(int)))
}
fn resolve_bytes(v: &Value) -> Option<Vec<u8>> {
@@ -892,7 +893,7 @@ fn resolve_bytes(v: &Value) -> Option<Vec<u8>> {
.collect::<Result<Vec<_>, _>>()
.ok()?,
)),
- other => Err(AvroError::GetBytes(other.into())),
+ other => Err(AvroError::new(AvroDetails::GetBytes(other.clone()))),
}
.ok()
.and_then(|v| match v {
diff --git a/crates/core/src/avro_to_arrow/schema.rs
b/crates/core/src/avro_to_arrow/schema.rs
index afb0773..baf67fd 100644
--- a/crates/core/src/avro_to_arrow/schema.rs
+++ b/crates/core/src/avro_to_arrow/schema.rs
@@ -100,7 +100,9 @@ fn schema_to_field_with_props(
.data_type()
.clone()
} else {
- return
Err(CoreError::from(apache_avro::Error::GetUnionDuplicate));
+ return Err(CoreError::from(apache_avro::Error::new(
+ apache_avro::error::Details::GetUnionDuplicate,
+ )));
}
} else {
let fields = sub_schemas
diff --git a/crates/core/src/file_group/log_file/avro.rs
b/crates/core/src/file_group/log_file/avro.rs
index 8c1d6f1..062c1a9 100644
--- a/crates/core/src/file_group/log_file/avro.rs
+++ b/crates/core/src/file_group/log_file/avro.rs
@@ -50,7 +50,11 @@ impl<R: Read> Iterator for AvroDataBlockContentReader<R> {
let mut record_content_length = [0u8; 4];
match self.reader.read_exact(&mut record_content_length) {
Ok(_) => {}
- Err(e) => return Some(Err(apache_avro::Error::ReadBytes(e))),
+ Err(e) => {
+ return Some(Err(apache_avro::Error::new(
+ apache_avro::error::Details::ReadBytes(e),
+ )))
+ }
}
let record_content_length = u32::from_be_bytes(record_content_length);
@@ -62,3 +66,27 @@ impl<R: Read> Iterator for AvroDataBlockContentReader<R> {
Some(result)
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use std::io::Cursor;
+
+ #[test]
+ fn test_read_error_on_truncated_data() {
+ // Create a simple Avro schema
+ let schema = AvroSchema::parse_str(r#"{"type": "null"}"#).unwrap();
+
+ // Create a reader with only 2 bytes when we need 4 bytes for record
length
+ let truncated_data = vec![0u8, 1u8];
+ let reader = Cursor::new(truncated_data);
+
+ // Create reader expecting 1 record but with insufficient data
+ let mut avro_reader = AvroDataBlockContentReader::new(reader, &schema,
1);
+
+ // Should return an error because read_exact fails
+ let result = avro_reader.next();
+ assert!(result.is_some());
+ assert!(result.unwrap().is_err());
+ }
+}