This is an automated email from the ASF dual-hosted git repository.

xushiyan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hudi-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 72b376e  build(deps): upgrade apache-avro to v0.21.0 (#503)
72b376e is described below

commit 72b376e4413c8a70598dda4c17bdaf60aff95105
Author: Shiyan Xu <[email protected]>
AuthorDate: Thu Jan 1 15:37:50 2026 -0600

    build(deps): upgrade apache-avro to v0.21.0 (#503)
    
    Update apache-avro and apache-avro-derive from v0.17.0 to v0.21.0.
---
 Cargo.toml                                         |  4 +--
 .../core/src/avro_to_arrow/arrow_array_reader.rs   | 11 ++++----
 crates/core/src/avro_to_arrow/schema.rs            |  4 ++-
 crates/core/src/file_group/log_file/avro.rs        | 30 +++++++++++++++++++++-
 4 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 2fb01c9..52133a7 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -51,8 +51,8 @@ object_store = { version = "~0.11.2", features = ["aws", 
"azure", "gcp"] }
 parquet = { version = "~54.2.0", features = ["async", "object_store"] }
 
 # avro
-apache-avro = { version = "~0.17.0", features = ["derive"] }
-apache-avro-derive = { version = "~0.17.0" }
+apache-avro = { version = "~0.21.0", features = ["derive"] }
+apache-avro-derive = { version = "~0.21.0" }
 
 # datafusion
 datafusion = { version = "~46.0.0" }
diff --git a/crates/core/src/avro_to_arrow/arrow_array_reader.rs 
b/crates/core/src/avro_to_arrow/arrow_array_reader.rs
index 4ada3ad..10c3abc 100644
--- a/crates/core/src/avro_to_arrow/arrow_array_reader.rs
+++ b/crates/core/src/avro_to_arrow/arrow_array_reader.rs
@@ -21,6 +21,7 @@ use crate::avro_to_arrow::to_arrow_schema;
 use crate::error::{CoreError, Result};
 use apache_avro::schema::RecordSchema;
 use apache_avro::{
+    error::Details as AvroDetails,
     schema::{Schema as AvroSchema, SchemaKind},
     types::Value,
     AvroResult, Error as AvroError,
@@ -856,11 +857,11 @@ fn resolve_string(v: &Value) -> 
ArrowResult<Option<String>> {
     match v {
         Value::String(s) => Ok(Some(s.clone())),
         Value::Bytes(bytes) => String::from_utf8(bytes.to_vec())
-            .map_err(AvroError::ConvertToUtf8)
+            .map_err(|e| AvroError::new(AvroDetails::ConvertToUtf8(e)))
             .map(Some),
         Value::Enum(_, s) => Ok(Some(s.clone())),
         Value::Null => Ok(None),
-        other => Err(AvroError::GetString(other.into())),
+        other => Err(AvroError::new(AvroDetails::GetString(other.clone()))),
     }
     .map_err(|e| SchemaError(format!("expected resolvable string : {e:?}")))
 }
@@ -869,7 +870,7 @@ fn resolve_u8(v: &Value) -> AvroResult<u8> {
     let int = match v {
         Value::Int(n) => Ok(Value::Int(*n)),
         Value::Long(n) => Ok(Value::Int(*n as i32)),
-        other => Err(AvroError::GetU8(other.into())),
+        other => Err(AvroError::new(AvroDetails::GetU8(other.clone()))),
     }?;
     if let Value::Int(n) = int {
         if n >= 0 && n <= u8::MAX as i32 {
@@ -877,7 +878,7 @@ fn resolve_u8(v: &Value) -> AvroResult<u8> {
         }
     }
 
-    Err(AvroError::GetU8(int.into()))
+    Err(AvroError::new(AvroDetails::GetU8(int)))
 }
 
 fn resolve_bytes(v: &Value) -> Option<Vec<u8>> {
@@ -892,7 +893,7 @@ fn resolve_bytes(v: &Value) -> Option<Vec<u8>> {
                 .collect::<Result<Vec<_>, _>>()
                 .ok()?,
         )),
-        other => Err(AvroError::GetBytes(other.into())),
+        other => Err(AvroError::new(AvroDetails::GetBytes(other.clone()))),
     }
     .ok()
     .and_then(|v| match v {
diff --git a/crates/core/src/avro_to_arrow/schema.rs 
b/crates/core/src/avro_to_arrow/schema.rs
index afb0773..baf67fd 100644
--- a/crates/core/src/avro_to_arrow/schema.rs
+++ b/crates/core/src/avro_to_arrow/schema.rs
@@ -100,7 +100,9 @@ fn schema_to_field_with_props(
                         .data_type()
                         .clone()
                 } else {
-                    return 
Err(CoreError::from(apache_avro::Error::GetUnionDuplicate));
+                    return Err(CoreError::from(apache_avro::Error::new(
+                        apache_avro::error::Details::GetUnionDuplicate,
+                    )));
                 }
             } else {
                 let fields = sub_schemas
diff --git a/crates/core/src/file_group/log_file/avro.rs 
b/crates/core/src/file_group/log_file/avro.rs
index 8c1d6f1..062c1a9 100644
--- a/crates/core/src/file_group/log_file/avro.rs
+++ b/crates/core/src/file_group/log_file/avro.rs
@@ -50,7 +50,11 @@ impl<R: Read> Iterator for AvroDataBlockContentReader<R> {
         let mut record_content_length = [0u8; 4];
         match self.reader.read_exact(&mut record_content_length) {
             Ok(_) => {}
-            Err(e) => return Some(Err(apache_avro::Error::ReadBytes(e))),
+            Err(e) => {
+                return Some(Err(apache_avro::Error::new(
+                    apache_avro::error::Details::ReadBytes(e),
+                )))
+            }
         }
 
         let record_content_length = u32::from_be_bytes(record_content_length);
@@ -62,3 +66,27 @@ impl<R: Read> Iterator for AvroDataBlockContentReader<R> {
         Some(result)
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::io::Cursor;
+
+    #[test]
+    fn test_read_error_on_truncated_data() {
+        // Create a simple Avro schema
+        let schema = AvroSchema::parse_str(r#"{"type": "null"}"#).unwrap();
+
+        // Create a reader with only 2 bytes when we need 4 bytes for record 
length
+        let truncated_data = vec![0u8, 1u8];
+        let reader = Cursor::new(truncated_data);
+
+        // Create reader expecting 1 record but with insufficient data
+        let mut avro_reader = AvroDataBlockContentReader::new(reader, &schema, 
1);
+
+        // Should return an error because read_exact fails
+        let result = avro_reader.next();
+        assert!(result.is_some());
+        assert!(result.unwrap().is_err());
+    }
+}

Reply via email to