This is an automated email from the ASF dual-hosted git repository. kriskras99 pushed a commit to branch fix/array_map_default in repository https://gitbox.apache.org/repos/asf/avro-rs.git
commit 8fd495e891f9a2e3460950a8bc122aa559505e62 Author: Kriskras99 <[email protected]> AuthorDate: Sat Mar 7 19:53:00 2026 +0100 fix: Remove `MapSchema` and `ArraySchema` `default` fields --- avro/src/schema/builders.rs | 13 +- avro/src/schema/mod.rs | 350 ++++++-------------------------------------- avro/src/schema/parser.rs | 46 +----- 3 files changed, 46 insertions(+), 363 deletions(-) diff --git a/avro/src/schema/builders.rs b/avro/src/schema/builders.rs index dd16b3e..e1b8c21 100644 --- a/avro/src/schema/builders.rs +++ b/avro/src/schema/builders.rs @@ -19,42 +19,35 @@ use crate::schema::{ Alias, ArraySchema, EnumSchema, FixedSchema, MapSchema, Name, RecordField, RecordSchema, UnionSchema, }; -use crate::types::Value; use crate::{AvroResult, Schema}; use bon::bon; use serde_json::Value as JsonValue; -use std::collections::{BTreeMap, HashMap}; +use std::collections::BTreeMap; #[bon] impl Schema { - /// Returns a `Schema::Map` with the given types and optional default - /// and custom attributes. + /// Returns a `Schema::Map` with the given types and custom attributes. #[builder(finish_fn = build)] pub fn map( #[builder(start_fn)] types: Schema, - default: Option<HashMap<String, Value>>, attributes: Option<BTreeMap<String, JsonValue>>, ) -> Self { let attributes = attributes.unwrap_or_default(); Schema::Map(MapSchema { types: Box::new(types), - default, attributes, }) } - /// Returns a `Schema::Array` with the given items and optional default - /// and custom attributes. + /// Returns a `Schema::Array` with the given items and custom attributes. #[builder(finish_fn = build)] pub fn array( #[builder(start_fn)] items: Schema, - default: Option<Vec<Value>>, attributes: Option<BTreeMap<String, JsonValue>>, ) -> Self { let attributes = attributes.unwrap_or_default(); Schema::Array(ArraySchema { items: Box::new(items), - default, attributes, }) } diff --git a/avro/src/schema/mod.rs b/avro/src/schema/mod.rs index 893b13c..a85d96e 100644 --- a/avro/src/schema/mod.rs +++ b/avro/src/schema/mod.rs @@ -37,13 +37,12 @@ use crate::{ AvroResult, error::{Details, Error}, schema::parser::Parser, - schema_equality, - types::{self, Value}, + schema_equality, types, }; use digest::Digest; use serde::{ Serialize, Serializer, - ser::{Error as _, SerializeMap, SerializeSeq}, + ser::{SerializeMap, SerializeSeq}, }; use serde_json::{Map, Value as JsonValue}; use std::fmt::Formatter; @@ -166,7 +165,6 @@ pub enum Schema { #[derive(Clone, PartialEq)] pub struct MapSchema { pub types: Box<Schema>, - pub default: Option<HashMap<String, Value>>, pub attributes: BTreeMap<String, JsonValue>, } @@ -174,13 +172,10 @@ impl Debug for MapSchema { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { let mut debug = f.debug_struct("MapSchema"); debug.field("types", &self.types); - if let Some(default) = &self.default { - debug.field("default", default); - } if !self.attributes.is_empty() { debug.field("attributes", &self.attributes); } - if self.default.is_none() || self.attributes.is_empty() { + if self.attributes.is_empty() { debug.finish_non_exhaustive() } else { debug.finish() @@ -191,7 +186,6 @@ impl Debug for MapSchema { #[derive(Clone, PartialEq)] pub struct ArraySchema { pub items: Box<Schema>, - pub default: Option<Vec<Value>>, pub attributes: BTreeMap<String, JsonValue>, } @@ -199,13 +193,10 @@ impl Debug for ArraySchema { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { let mut debug = f.debug_struct("ArraySchema"); debug.field("items", &self.items); - if let Some(default) = &self.default { - debug.field("default", default); - } if !self.attributes.is_empty() { debug.field("attributes", &self.attributes); } - if self.default.is_none() || self.attributes.is_empty() { + if self.attributes.is_empty() { debug.finish_non_exhaustive() } else { debug.finish() @@ -810,41 +801,19 @@ impl Serialize for Schema { Schema::Double => serializer.serialize_str("double"), Schema::Bytes => serializer.serialize_str("bytes"), Schema::String => serializer.serialize_str("string"), - Schema::Array(ArraySchema { - items, - default, - attributes, - }) => { - let mut map = serializer.serialize_map(Some( - 2 + attributes.len() + if default.is_some() { 1 } else { 0 }, - ))?; + Schema::Array(ArraySchema { items, attributes }) => { + let mut map = serializer.serialize_map(Some(2 + attributes.len()))?; map.serialize_entry("type", "array")?; map.serialize_entry("items", items)?; - if let Some(default) = default { - let value = JsonValue::try_from(Value::Array(default.clone())) - .map_err(S::Error::custom)?; - map.serialize_entry("default", &value)?; - } for (key, value) in attributes { map.serialize_entry(key, value)?; } map.end() } - Schema::Map(MapSchema { - types, - default, - attributes, - }) => { - let mut map = serializer.serialize_map(Some( - 2 + attributes.len() + if default.is_some() { 1 } else { 0 }, - ))?; + Schema::Map(MapSchema { types, attributes }) => { + let mut map = serializer.serialize_map(Some(2 + attributes.len()))?; map.serialize_entry("type", "map")?; map.serialize_entry("values", types)?; - if let Some(default) = default { - let value = JsonValue::try_from(Value::Map(default.clone())) - .map_err(S::Error::custom)?; - map.serialize_entry("default", &value)?; - } for (key, value) in attributes { map.serialize_entry(key, value)?; } @@ -4790,11 +4759,7 @@ mod tests { let schema1 = Schema::parse_str(raw_schema)?; match &schema1 { - Schema::Array(ArraySchema { - items, - default: _, - attributes, - }) => { + Schema::Array(ArraySchema { items, attributes }) => { assert!(attributes.is_empty()); match **items { @@ -4813,7 +4778,6 @@ mod tests { match &fields[0].schema { Schema::Array(ArraySchema { items: _, - default: _, attributes, }) => { assert!(attributes.is_empty()); @@ -4865,11 +4829,7 @@ mod tests { let schema1 = Schema::parse_str(raw_schema)?; match &schema1 { - Schema::Array(ArraySchema { - items, - default: _, - attributes, - }) => { + Schema::Array(ArraySchema { items, attributes }) => { assert!(attributes.is_empty()); match **items { @@ -4888,7 +4848,6 @@ mod tests { match &fields[0].schema { Schema::Map(MapSchema { types: _, - default: _, attributes, }) => { assert!(attributes.is_empty()); @@ -5102,289 +5061,62 @@ mod tests { } #[test] - fn avro_rs_467_array_default() -> TestResult { - let schema = Schema::parse_str( - r#"{ - "type": "array", - "items": "string", - "default": [] - }"#, - )?; - - let Schema::Array(array) = schema else { - panic!("Expected Schema::Array, got {schema:?}"); - }; - - assert_eq!(array.attributes, BTreeMap::new()); - assert_eq!(array.default, Some(Vec::new())); - - let json = serde_json::to_string(&Schema::Array(array))?; - assert!(json.contains(r#""default":[]"#)); - - Ok(()) - } - - #[test] - fn avro_rs_467_array_default_with_actual_values() -> TestResult { - let schema = Schema::parse_str( - r#"{ - "type": "array", - "items": "string", - "default": ["foo", "bar"] - }"#, - )?; - - let Schema::Array(array) = schema else { - panic!("Expected Schema::Array, got {schema:?}"); - }; - - assert_eq!(array.attributes, BTreeMap::new()); - assert_eq!( - array.default, - Some(vec![ - Value::String("foo".into()), - Value::String("bar".into()) - ]) - ); - - let json = serde_json::to_string(&Schema::Array(array))?; - assert!(json.contains(r#""default":["foo","bar"]"#)); - - Ok(()) - } - - #[test] - fn avro_rs_467_array_default_with_invalid_values() -> TestResult { - let err = Schema::parse_str( - r#"{ - "type": "array", - "items": "string", - "default": [false, true] - }"#, - ) - .unwrap_err(); - - assert_eq!( - err.to_string(), - "Default value for an array must be an array of String! Found: Boolean(false)" - ); - - Ok(()) - } - - #[test] - fn avro_rs_467_array_default_with_mixed_values() -> TestResult { - let err = Schema::parse_str( - r#"{ - "type": "array", - "items": "string", - "default": ["foo", true] - }"#, - ) - .unwrap_err(); - - assert_eq!( - err.to_string(), - "Default value for an array must be an array of String! Found: Boolean(true)" - ); - - Ok(()) - } - - #[test] - fn avro_rs_467_array_default_with_reference() -> TestResult { - let schema = Schema::parse_str( - r#"{ + fn avro_rs_476_enum_cannot_be_directly_in_field() -> TestResult { + let schema_str = r#"{ "type": "record", - "name": "Something", + "name": "ExampleEnum", + "namespace": "com.schema", "fields": [ { - "name": "one", - "type": { - "type": "enum", - "name": "ABC", - "symbols": ["A", "B", "C"] - } - }, - { - "name": "two", - "type": { - "type": "array", - "items": "ABC", - "default": ["A", "B", "C"] - } + "name": "wrong_enum", + "type": "enum", + "symbols": ["INSERT", "UPDATE"] } ] - }"#, - )?; - - let Schema::Record(record) = schema else { - panic!("Expected Schema::Record, got {schema:?}"); - }; - let Schema::Array(array) = &record.fields[1].schema else { - panic!("Expected Schema::Array, got {:?}", record.fields[1].schema); - }; - - assert_eq!(array.attributes, BTreeMap::new()); - assert_eq!( - array.default, - Some(vec![ - Value::String("A".into()), - Value::String("B".into()), - Value::String("C".into()) - ]) - ); - - Ok(()) - } - - #[test] - fn avro_rs_467_map_default() -> TestResult { - let schema = Schema::parse_str( - r#"{ - "type": "map", - "values": "string", - "default": {} - }"#, - )?; - - let Schema::Map(map) = schema else { - panic!("Expected Schema::Map, got {schema:?}"); - }; - - assert_eq!(map.attributes, BTreeMap::new()); - assert_eq!(map.default, Some(HashMap::new())); - - let json = serde_json::to_string(&Schema::Map(map))?; - assert!(json.contains(r#""default":{}"#)); - - Ok(()) - } - - #[test] - fn avro_rs_467_map_default_with_actual_values() -> TestResult { - let schema = Schema::parse_str( - r#"{ - "type": "map", - "values": "string", - "default": {"foo": "bar"} - }"#, - )?; - - let Schema::Map(map) = schema else { - panic!("Expected Schema::Map, got {schema:?}"); - }; - - let mut hashmap = HashMap::new(); - hashmap.insert("foo".to_string(), Value::String("bar".into())); - assert_eq!(map.attributes, BTreeMap::new()); - assert_eq!(map.default, Some(hashmap)); - - let json = serde_json::to_string(&Schema::Map(map))?; - assert!(json.contains(r#""default":{"foo":"bar"}"#)); - - Ok(()) - } - - #[test] - fn avro_rs_467_map_default_with_invalid_values() -> TestResult { - let err = Schema::parse_str( - r#"{ - "type": "map", - "values": "string", - "default": {"foo": true} - }"#, - ) - .unwrap_err(); - - assert_eq!( - err.to_string(), - "Default value for a map must be an object with (String, String)! Found: (String, Boolean(true))" - ); - - Ok(()) - } - - #[test] - fn avro_rs_467_map_default_with_mixed_values() -> TestResult { - let err = Schema::parse_str( - r#"{ - "type": "map", - "values": "string", - "default": {"foo": "bar", "spam": true} - }"#, - ) - .unwrap_err(); - + }"#; + let result = Schema::parse_str(schema_str).unwrap_err(); assert_eq!( - err.to_string(), - "Default value for a map must be an object with (String, String)! Found: (String, Boolean(true))" + result.to_string(), + "Invalid schema: There is no type called 'enum', if you meant to define a non-primitive schema, it should be defined inside `type` attribute." ); - Ok(()) } #[test] - fn avro_rs_467_map_default_with_reference() -> TestResult { + fn avro_rs_xxx_default_must_be_in_custom_attributes_for_map_and_enum() -> TestResult { let schema = Schema::parse_str( r#"{ + "name": "ignore_defaults", "type": "record", - "name": "Something", "fields": [ - { - "name": "one", - "type": { - "type": "enum", - "name": "ABC", - "symbols": ["A", "B", "C"] - } - }, - { - "name": "two", - "type": { - "type": "map", - "values": "ABC", - "default": {"foo": "A"} - } - } + {"name": "a", "type": { "type": "map", "values": "string", "default": null }}, + {"name": "b", "type": { "type": "array", "items": "string", "default": null }} ] }"#, )?; let Schema::Record(record) = schema else { - panic!("Expected Schema::Record, got {schema:?}"); + panic!("Expect Schema::Record for {schema:?}"); }; - let Schema::Map(map) = &record.fields[1].schema else { - panic!("Expected Schema::Map, got {:?}", record.fields[1].schema); + println!("{:?}", record); + let Schema::Map(map) = &record.fields[0].schema else { + panic!("Expect Schema::Map for first field of {record:?}"); }; + assert_eq!(map.attributes.len(), 1); + assert_eq!( + map.attributes.get("default"), + Some(&serde_json::Value::Null) + ); - let mut hashmap = HashMap::new(); - hashmap.insert("foo".to_string(), Value::String("A".into())); - assert_eq!(map.attributes, BTreeMap::new()); - assert_eq!(map.default, Some(hashmap)); - - Ok(()) - } - - #[test] - fn avro_rs_476_enum_cannot_be_directly_in_field() -> TestResult { - let schema_str = r#"{ - "type": "record", - "name": "ExampleEnum", - "namespace": "com.schema", - "fields": [ - { - "name": "wrong_enum", - "type": "enum", - "symbols": ["INSERT", "UPDATE"] - } - ] - }"#; - let result = Schema::parse_str(schema_str).unwrap_err(); + let Schema::Array(array) = &record.fields[1].schema else { + panic!("Expect Schema::Array for second field of {record:?}"); + }; + assert_eq!(array.attributes.len(), 1); assert_eq!( - result.to_string(), - "Invalid schema: There is no type called 'enum', if you meant to define a non-primitive schema, it should be defined inside `type` attribute." + array.attributes.get("default"), + Some(&serde_json::Value::Null) ); + Ok(()) } } diff --git a/avro/src/schema/parser.rs b/avro/src/schema/parser.rs index 0f89018..6809fd2 100644 --- a/avro/src/schema/parser.rs +++ b/avro/src/schema/parser.rs @@ -693,30 +693,9 @@ impl Parser { .get("items") .ok_or_else(|| Details::GetArrayItemsField.into()) .and_then(|items| self.parse(items, enclosing_namespace))?; - let default = if let Some(default) = complex.get("default").cloned() { - if let Value::Array(_) = default { - let crate::types::Value::Array(array) = crate::types::Value::try_from(default)? - else { - unreachable!("JsonValue::Array can only become a Value::Array") - }; - // Check that the default type matches the schema type - if let Some(value) = array.iter().find(|v| { - v.validate_internal(&items, &self.parsed_schemas, enclosing_namespace) - .is_some() - }) { - return Err(Details::ArrayDefaultWrongInnerType(items, value.clone()).into()); - } - Some(array) - } else { - return Err(Details::ArrayDefaultWrongType(default).into()); - } - } else { - None - }; Ok(Schema::Array(ArraySchema { items: Box::new(items), - default, - attributes: self.get_custom_attributes(complex, vec!["items", "default"]), + attributes: self.get_custom_attributes(complex, vec!["items"]), })) } @@ -731,30 +710,9 @@ impl Parser { .ok_or_else(|| Details::GetMapValuesField.into()) .and_then(|types| self.parse(types, enclosing_namespace))?; - let default = if let Some(default) = complex.get("default").cloned() { - if let Value::Object(_) = default { - let crate::types::Value::Map(map) = crate::types::Value::try_from(default)? else { - unreachable!("JsonValue::Object can only become a Value::Map") - }; - // Check that the default type matches the schema type - if let Some(value) = map.values().find(|v| { - v.validate_internal(&types, &self.parsed_schemas, enclosing_namespace) - .is_some() - }) { - return Err(Details::MapDefaultWrongInnerType(types, value.clone()).into()); - } - Some(map) - } else { - return Err(Details::MapDefaultWrongType(default).into()); - } - } else { - None - }; - Ok(Schema::Map(MapSchema { types: Box::new(types), - default, - attributes: self.get_custom_attributes(complex, vec!["values", "default"]), + attributes: self.get_custom_attributes(complex, vec!["values"]), })) }
