This is an automated email from the ASF dual-hosted git repository.
mgrigorov pushed a commit to branch branch-1.11
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/branch-1.11 by this push:
new 1b3591d0e AVRO-3912: big dec fix1 11 (#2607)
1b3591d0e is described below
commit 1b3591d0e78ffb5a2e6449553a0e11cf857e5ee2
Author: Christophe Le Saec <[email protected]>
AuthorDate: Mon Dec 4 14:06:54 2023 +0100
AVRO-3912: big dec fix1 11 (#2607)
* AVRO-3912: Fix Big decimal deser (#2599)
* AVRO-3912: Fix Big decimal deser
Co-authored-by: Martin Tzvetanov Grigorov <[email protected]>
* AVRO-3912: error comment
---
lang/rust/avro/src/bigdecimal.rs | 203 +++++++++++++++++++++++++++++++++++++++
lang/rust/avro/src/decimal.rs | 85 +---------------
lang/rust/avro/src/decode.rs | 3 +-
lang/rust/avro/src/encode.rs | 6 +-
lang/rust/avro/src/error.rs | 8 +-
lang/rust/avro/src/lib.rs | 1 +
lang/rust/avro/src/types.rs | 3 +-
lang/rust/avro/tests/bigdec.avro | Bin 0 -> 189 bytes
lang/rust/avro/tests/shared.rs | 13 ++-
9 files changed, 223 insertions(+), 99 deletions(-)
diff --git a/lang/rust/avro/src/bigdecimal.rs b/lang/rust/avro/src/bigdecimal.rs
new file mode 100644
index 000000000..93ddae48d
--- /dev/null
+++ b/lang/rust/avro/src/bigdecimal.rs
@@ -0,0 +1,203 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::{
+ decode::{decode_len, decode_long},
+ encode::{encode_bytes, encode_long},
+ types::Value,
+ Error,
+};
+use bigdecimal::BigDecimal;
+use num_bigint::BigInt;
+use std::io::Read;
+
+pub(crate) fn serialize_big_decimal(decimal: &BigDecimal) -> Vec<u8> {
+ // encode big decimal, without global size
+ let mut buffer: Vec<u8> = Vec::new();
+ let (big_int, exponent): (BigInt, i64) = decimal.as_bigint_and_exponent();
+ let big_endian_value: Vec<u8> = big_int.to_signed_bytes_be();
+ encode_bytes(&big_endian_value, &mut buffer);
+ encode_long(exponent, &mut buffer);
+
+ // encode global size and content
+ let mut final_buffer: Vec<u8> = Vec::new();
+ encode_bytes(&buffer, &mut final_buffer);
+ final_buffer
+}
+
+pub(crate) fn deserialize_big_decimal(bytes: &Vec<u8>) -> Result<BigDecimal,
Error> {
+ let mut bytes: &[u8] = bytes.as_slice();
+ let mut big_decimal_buffer = match decode_len(&mut bytes) {
+ Ok(size) => vec![0u8; size],
+ Err(err) => return Err(Error::BigDecimalLen(Box::new(err))),
+ };
+
+ bytes
+ .read_exact(&mut big_decimal_buffer[..])
+ .map_err(Error::ReadDouble)?;
+
+ match decode_long(&mut bytes) {
+ Ok(Value::Long(scale_value)) => {
+ let big_int: BigInt =
BigInt::from_signed_bytes_be(&big_decimal_buffer);
+ let decimal = BigDecimal::new(big_int, scale_value);
+ Ok(decimal)
+ }
+ _ => Err(Error::BigDecimalScale),
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::{
+ types::{Record, Value},
+ Codec, Reader, Schema, Writer,
+ };
+ use apache_avro_test_helper::TestResult;
+ use bigdecimal::{One, Zero};
+ use pretty_assertions::assert_eq;
+ use std::{
+ fs::File,
+ io::BufReader,
+ ops::{Div, Mul},
+ str::FromStr,
+ };
+
+ #[test]
+ fn test_avro_3779_bigdecimal_serial() -> TestResult {
+ let value: BigDecimal =
+
bigdecimal::BigDecimal::from(-1421).div(bigdecimal::BigDecimal::from(2));
+ let mut current: BigDecimal = BigDecimal::one();
+
+ for iter in 1..180 {
+ let buffer: Vec<u8> = serialize_big_decimal(¤t);
+
+ let mut as_slice = buffer.as_slice();
+ decode_long(&mut as_slice)?;
+
+ let mut result: Vec<u8> = Vec::new();
+ result.extend_from_slice(as_slice);
+
+ let deserialize_big_decimal: Result<BigDecimal, Error> =
+ deserialize_big_decimal(&result);
+ assert!(
+ deserialize_big_decimal.is_ok(),
+ "can't deserialize for iter {iter}"
+ );
+ assert_eq!(current, deserialize_big_decimal?, "not equals for
{iter}");
+ current = current.mul(&value);
+ }
+
+ let buffer: Vec<u8> = serialize_big_decimal(&BigDecimal::zero());
+ let mut as_slice = buffer.as_slice();
+ decode_long(&mut as_slice)?;
+
+ let mut result: Vec<u8> = Vec::new();
+ result.extend_from_slice(as_slice);
+
+ let deserialize_big_decimal: Result<BigDecimal, Error> =
deserialize_big_decimal(&result);
+ assert!(
+ deserialize_big_decimal.is_ok(),
+ "can't deserialize for zero"
+ );
+ assert_eq!(
+ BigDecimal::zero(),
+ deserialize_big_decimal?,
+ "not equals for zero"
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_avro_3779_record_with_bg() -> TestResult {
+ let schema_str = r#"
+ {
+ "type": "record",
+ "name": "test",
+ "fields": [
+ {
+ "name": "field_name",
+ "type": "bytes",
+ "logicalType": "big-decimal"
+ }
+ ]
+ }
+ "#;
+ let schema = Schema::parse_str(schema_str)?;
+
+ // build record with big decimal value
+ let mut record = Record::new(&schema).unwrap();
+ let val = BigDecimal::new(BigInt::from(12), 2);
+ record.put("field_name", val.clone());
+
+ // write a record
+ let codec = Codec::Null;
+ let mut writer = Writer::builder()
+ .schema(&schema)
+ .codec(codec)
+ .writer(Vec::new())
+ .build();
+
+ writer.append(record.clone())?;
+ writer.flush()?;
+
+ // read record
+ let wrote_data = writer.into_inner()?;
+ let mut reader = Reader::new(&wrote_data[..])?;
+
+ let value = reader.next().unwrap()?;
+
+ // extract field value
+ let big_decimal_value: &Value = match value {
+ Value::Record(ref fields) => Ok(&fields[0].1),
+ other => Err(format!("Expected a Value::Record, got: {other:?}")),
+ }?;
+
+ let x1res: &BigDecimal = match big_decimal_value {
+ Value::BigDecimal(ref s) => Ok(s),
+ other => Err(format!("Expected Value::BigDecimal, got:
{other:?}")),
+ }?;
+ assert_eq!(&val, x1res);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_avro_3779_from_java_file() -> TestResult {
+ // Open file generated with Java code to ensure compatibility
+ // with Java big decimal logical type.
+ let file: File = File::open("./tests/bigdec.avro")?;
+ let mut reader = Reader::new(BufReader::new(&file))?;
+ let next_element = reader.next();
+ assert!(next_element.is_some());
+ let value = next_element.unwrap()?;
+ let bg = match value {
+ Value::Record(ref fields) => Ok(&fields[0].1),
+ other => Err(format!("Expected a Value::Record, got: {other:?}")),
+ }?;
+ let value_big_decimal = match bg {
+ Value::BigDecimal(val) => Ok(val),
+ other => Err(format!("Expected a Value::BigDecimal, got:
{other:?}")),
+ }?;
+
+ let ref_value = BigDecimal::from_str("2.24")?;
+ assert_eq!(&ref_value, value_big_decimal);
+
+ Ok(())
+ }
+}
diff --git a/lang/rust/avro/src/decimal.rs b/lang/rust/avro/src/decimal.rs
index 5a88f8670..0139e3719 100644
--- a/lang/rust/avro/src/decimal.rs
+++ b/lang/rust/avro/src/decimal.rs
@@ -15,15 +15,8 @@
// specific language governing permissions and limitations
// under the License.
-use crate::{
- decode::{decode_len, decode_long},
- encode::{encode_bytes, encode_long},
- types::Value,
- AvroResult, Error,
-};
-use bigdecimal::BigDecimal;
+use crate::{AvroResult, Error};
use num_bigint::{BigInt, Sign};
-use std::io::Read;
#[derive(Debug, Clone)]
pub struct Decimal {
@@ -112,47 +105,12 @@ impl<T: AsRef<[u8]>> From<T> for Decimal {
}
}
-pub(crate) fn serialize_big_decimal(decimal: &BigDecimal) -> Vec<u8> {
- let mut buffer: Vec<u8> = Vec::new();
- let (big_int, exponent): (BigInt, i64) = decimal.as_bigint_and_exponent();
- let big_endian_value: Vec<u8> = big_int.to_signed_bytes_be();
- encode_bytes(&big_endian_value, &mut buffer);
- encode_long(exponent, &mut buffer);
-
- buffer
-}
-
-pub(crate) fn deserialize_big_decimal(bytes: &Vec<u8>) -> Result<BigDecimal,
Error> {
- let mut bytes: &[u8] = bytes.as_slice();
- let mut big_decimal_buffer = match decode_len(&mut bytes) {
- Ok(size) => vec![0u8; size],
- Err(_err) => return Err(Error::BigDecimalLen),
- };
-
- bytes
- .read_exact(&mut big_decimal_buffer[..])
- .map_err(Error::ReadDouble)?;
-
- match decode_long(&mut bytes) {
- Ok(Value::Long(scale_value)) => {
- let big_int: BigInt =
BigInt::from_signed_bytes_be(&big_decimal_buffer);
- let decimal = BigDecimal::new(big_int, scale_value);
- Ok(decimal)
- }
- _ => Err(Error::BigDecimalScale),
- }
-}
-
#[cfg(test)]
mod tests {
use super::*;
use apache_avro_test_helper::TestResult;
- use bigdecimal::{One, Zero};
use pretty_assertions::assert_eq;
- use std::{
- convert::TryFrom,
- ops::{Div, Mul},
- };
+ use std::convert::TryFrom;
#[test]
fn test_decimal_from_bytes_from_ref_decimal() -> TestResult {
@@ -175,43 +133,4 @@ mod tests {
Ok(())
}
-
- #[test]
- fn test_avro_3779_bigdecimal_serial() -> TestResult {
- let value: bigdecimal::BigDecimal =
-
bigdecimal::BigDecimal::from(-1421).div(bigdecimal::BigDecimal::from(2));
- let mut current: bigdecimal::BigDecimal =
bigdecimal::BigDecimal::one();
-
- for iter in 1..180 {
- let result: Vec<u8> = serialize_big_decimal(¤t);
-
- let deserialize_big_decimal: Result<bigdecimal::BigDecimal, Error>
=
- deserialize_big_decimal(&result);
- assert!(
- deserialize_big_decimal.is_ok(),
- "can't deserialize for iter {iter}"
- );
- assert_eq!(
- current,
- deserialize_big_decimal.unwrap(),
- "not equals for ${iter}"
- );
- current = current.mul(&value);
- }
-
- let result: Vec<u8> = serialize_big_decimal(&BigDecimal::zero());
- let deserialize_big_decimal: Result<bigdecimal::BigDecimal, Error> =
- deserialize_big_decimal(&result);
- assert!(
- deserialize_big_decimal.is_ok(),
- "can't deserialize for zero"
- );
- assert_eq!(
- BigDecimal::zero(),
- deserialize_big_decimal.unwrap(),
- "not equals for zero"
- );
-
- Ok(())
- }
}
diff --git a/lang/rust/avro/src/decode.rs b/lang/rust/avro/src/decode.rs
index 7857bbec5..8c50e7702 100644
--- a/lang/rust/avro/src/decode.rs
+++ b/lang/rust/avro/src/decode.rs
@@ -16,7 +16,8 @@
// under the License.
use crate::{
- decimal::{deserialize_big_decimal, Decimal},
+ bigdecimal::deserialize_big_decimal,
+ decimal::Decimal,
duration::Duration,
schema::{
DecimalSchema, EnumSchema, FixedSchema, Name, Namespace, RecordSchema,
ResolvedSchema,
diff --git a/lang/rust/avro/src/encode.rs b/lang/rust/avro/src/encode.rs
index 1f7e40da9..5dd087771 100644
--- a/lang/rust/avro/src/encode.rs
+++ b/lang/rust/avro/src/encode.rs
@@ -16,7 +16,7 @@
// under the License.
use crate::{
- decimal::serialize_big_decimal,
+ bigdecimal::serialize_big_decimal,
schema::{
DecimalSchema, EnumSchema, FixedSchema, Name, Namespace, RecordSchema,
ResolvedSchema,
Schema, SchemaKind, UnionSchema,
@@ -130,8 +130,8 @@ pub(crate) fn encode_internal<S: Borrow<Schema>>(
buffer,
),
Value::BigDecimal(bg) => {
- let mut buf: Vec<u8> = serialize_big_decimal(bg);
- buffer.append(&mut buf);
+ let buf: Vec<u8> = serialize_big_decimal(bg);
+ buffer.extend_from_slice(buf.as_slice());
}
Value::Bytes(bytes) => match *schema {
Schema::Bytes => encode_bytes(bytes, buffer),
diff --git a/lang/rust/avro/src/error.rs b/lang/rust/avro/src/error.rs
index a7960656c..13cec65c7 100644
--- a/lang/rust/avro/src/error.rs
+++ b/lang/rust/avro/src/error.rs
@@ -292,13 +292,13 @@ pub enum Error {
#[error("The decimal precision ({precision}) must be a positive number")]
DecimalPrecisionMuBePositive { precision: usize },
- #[error("Unreadable decimal sign")]
+ #[error("Unreadable big decimal sign")]
BigDecimalSign,
- #[error("Unreadable length for decimal inner bytes")]
- BigDecimalLen,
+ #[error("Unreadable length for big decimal inner bytes: {0}")]
+ BigDecimalLen(#[source] Box<Error>),
- #[error("Unreadable decimal scale")]
+ #[error("Unreadable big decimal scale")]
BigDecimalScale,
#[error("Unexpected `type` {0} variant for `logicalType`")]
diff --git a/lang/rust/avro/src/lib.rs b/lang/rust/avro/src/lib.rs
index 35b1b431a..f1b9c5132 100644
--- a/lang/rust/avro/src/lib.rs
+++ b/lang/rust/avro/src/lib.rs
@@ -763,6 +763,7 @@
//! assert_eq!(false, SchemaCompatibility::can_read(&writers_schema,
&readers_schema));
//! ```
+mod bigdecimal;
mod codec;
mod de;
mod decimal;
diff --git a/lang/rust/avro/src/types.rs b/lang/rust/avro/src/types.rs
index c90e69933..88300c1a6 100644
--- a/lang/rust/avro/src/types.rs
+++ b/lang/rust/avro/src/types.rs
@@ -17,7 +17,8 @@
//! Logic handling the intermediate representation of Avro values.
use crate::{
- decimal::{deserialize_big_decimal, serialize_big_decimal, Decimal},
+ bigdecimal::{deserialize_big_decimal, serialize_big_decimal},
+ decimal::Decimal,
duration::Duration,
schema::{
DecimalSchema, EnumSchema, FixedSchema, Name, Namespace, Precision,
RecordField,
diff --git a/lang/rust/avro/tests/bigdec.avro b/lang/rust/avro/tests/bigdec.avro
new file mode 100644
index 000000000..641db7e2a
Binary files /dev/null and b/lang/rust/avro/tests/bigdec.avro differ
diff --git a/lang/rust/avro/tests/shared.rs b/lang/rust/avro/tests/shared.rs
index 9790ddfe4..3915d5d12 100644
--- a/lang/rust/avro/tests/shared.rs
+++ b/lang/rust/avro/tests/shared.rs
@@ -49,7 +49,7 @@ fn test_schema() -> TestResult {
ROOT_DIRECTORY.to_owned() + "/" +
entry.file_name().to_str().unwrap();
let dir_result = test_folder(sub_folder.as_str());
- if let Result::Err(ed) = dir_result {
+ if let Err(ed) = dir_result {
result = match result {
Ok(()) => Err(ed),
Err(e) => Err(e.merge(&ed)),
@@ -58,9 +58,8 @@ fn test_schema() -> TestResult {
}
}
}
- if let Err(e) = result {
- core::panic!("{}", e)
- }
+ result?;
+
Ok(())
}
@@ -99,16 +98,16 @@ impl fmt::Display for ErrorsDesc {
fn test_folder(folder: &str) -> Result<(), ErrorsDesc> {
let file_name = folder.to_owned() + "/schema.json";
- let content = std::fs::read_to_string(file_name).expect("Unable to find
schema.jon file");
+ let content = std::fs::read_to_string(file_name).expect("Unable to find
schema.json file");
let schema: Schema = Schema::parse_str(content.as_str()).expect("Can't
read schema");
let data_file_name = folder.to_owned() + "/data.avro";
let data_path: &Path = Path::new(data_file_name.as_str());
- let mut result = Result::Ok(());
+ let mut result = Ok(());
if !data_path.exists() {
log::error!("{}", format!("folder {folder} does not exist"));
- return Result::Err(ErrorsDesc::new(
+ return Err(ErrorsDesc::new(
format!("folder {folder} does not exist").as_str(),
));
} else {