martin-g commented on a change in pull request #1602:
URL: https://github.com/apache/avro/pull/1602#discussion_r826328402



##########
File path: lang/rust/avro/src/util.rs
##########
@@ -44,6 +49,19 @@ impl MapHelper for Map<String, Value> {
             .and_then(|v| v.as_str())
             .map(|v| v.to_string())
     }
+
+    fn aliases(&self) -> Aliases {
+        // FIXME no warning with aliases isn't a json array of json strings

Review comment:
       ```suggestion
           // FIXME no warning when aliases aren't a json array of json strings
   ```

##########
File path: lang/rust/avro/src/schema.rs
##########
@@ -271,44 +275,51 @@ impl Name {
             _ => None,
         };
 
-        let aliases: Option<Vec<String>> = complex
-            .get("aliases")
-            .and_then(|aliases| aliases.as_array())
-            .and_then(|aliases| {
-                aliases
-                    .iter()
-                    .map(|alias| alias.as_str())
-                    .map(|alias| alias.map(|a| a.to_string()))
-                    .collect::<Option<_>>()
-            });
-
         Ok(Name {
             name: type_name.unwrap_or(name),
             namespace: namespace_from_name.or_else(|| 
complex.string("namespace")),
-            aliases,
         })
     }
 
     /// Return the `fullname` of this `Name`
     ///
     /// More information about fullnames can be found in the
     /// [Avro 
specification](https://avro.apache.org/docs/current/spec.html#names)
-    pub fn fullname(&self, default_namespace: Option<&str>) -> String {
+    pub fn fullname(&self, default_namespace: Namespace) -> String {
         if self.name.contains('.') {
             self.name.clone()
         } else {
-            let namespace = self
-                .namespace
-                .as_ref()
-                .map(|s| s.as_ref())
-                .or(default_namespace);
+            let namespace = self.namespace.clone().or(default_namespace);
 
             match namespace {
                 Some(ref namespace) => format!("{}.{}", namespace, self.name),
                 None => self.name.clone(),
             }
         }
     }
+
+    /// Return the fully qualified name needed for indexing or searching for 
the schema within a schema/schema env context. Puts the enclosing namespace 
into the name's namespace for clarity in schema/schema env parsing
+    /// ```
+    /// use apache_avro::schema::Name;
+    ///
+    /// assert_eq!(
+    /// 
Name::new("some_name").unwrap().fully_qualified_name(&Some("some_namespace".into())),
+    /// Name::new("some_namespace.some_name").unwrap()
+    /// );
+    /// assert_eq!(
+    /// 
Name::new("some_namespace.some_name").unwrap().fully_qualified_name(&Some("other_namespace".into())),
+    /// Name::new("some_namespace.some_name").unwrap()
+    /// );
+    /// ```
+    pub fn fully_qualified_name(&self, enclosing_namespace: &Namespace) -> 
Name {

Review comment:
       `pub(crate)` ?!

##########
File path: lang/rust/avro/src/schema.rs
##########
@@ -317,17 +328,73 @@ impl From<&str> for Name {
     }
 }
 
-impl Hash for Name {
-    fn hash<H: Hasher>(&self, state: &mut H) {
-        self.fullname(None).hash(state);
+pub struct ResolvedSchema<'s> {
+    names: HashMap<Name, &'s Schema>,
+    schema: &'s Schema,
+}
+
+impl<'s> From<&'s Schema> for ResolvedSchema<'s> {
+    fn from(schema: &'s Schema) -> Self {
+        let names = HashMap::new();
+        let mut rs = ResolvedSchema { names, schema };
+        Self::from_internal(rs.schema, &mut rs.names, &None);
+        rs
     }
 }
 
-impl Eq for Name {}
+impl<'s> ResolvedSchema<'s> {
+    pub fn get_names(&self) -> &HashMap<Name, &'s Schema> {
+        &self.names
+    }
 
-impl PartialEq for Name {
-    fn eq(&self, other: &Name) -> bool {
-        self.fullname(None).eq(&other.fullname(None))
+    pub fn get_schema(&self) -> &Schema {
+        self.schema
+    }
+
+    fn from_internal(
+        schema: &'s Schema,
+        idx: &mut HashMap<Name, &'s Schema>,
+        enclosing_namespace: &Namespace,
+    ) {
+        match schema {
+            Schema::Array(schema) | Schema::Map(schema) => {
+                Self::from_internal(schema, idx, enclosing_namespace)
+            }
+            Schema::Union(UnionSchema { schemas, .. }) => {
+                for schema in schemas {
+                    Self::from_internal(schema, idx, enclosing_namespace)
+                }
+            }
+            Schema::Enum { name, .. } | Schema::Fixed { name, .. } => {
+                let fully_qualified_name = 
name.fully_qualified_name(enclosing_namespace);
+                if idx.insert(fully_qualified_name.clone(), schema).is_some() {
+                    panic!(

Review comment:
       I try to reduce the usage of `panic!()` as much as possible.
   How about returning an `AvroResult<Self>` and use `TryFrom` instead of 
`From` at line 336 ?

##########
File path: lang/rust/avro/src/schema.rs
##########
@@ -2365,4 +2513,560 @@ mod tests {
             _ => panic!("Expected an Error::InvalidSchemaName!"),
         }
     }
+
+    #[test]
+    fn avro_3448_test_proper_resolution_inner_record_inherited_namespace() {
+        let schema = r#"
+        {
+          "name": "record_name",
+          "namespace": "space",
+          "type": "record",
+          "fields": [
+            {
+              "name": "outer_field_1",
+              "type": [
+                        "null",
+                        {
+                            "type":"record",
+                            "name":"inner_record_name",
+                            "fields":[
+                                {
+                                    "name":"inner_field_1",
+                                    "type":"double"
+                                }
+                            ]
+                        }
+                    ]
+            },
+            {
+                "name": "outer_field_2",
+                "type" : "inner_record_name"
+            }
+          ]
+        }
+        "#;
+        let schema = Schema::parse_str(schema).unwrap();
+        let rs = ResolvedSchema::from(&schema);

Review comment:
       I think it would be good to assert here that `rs.get_names()` has size = 
2.
   Same for the following tests.

##########
File path: lang/rust/avro/src/schema.rs
##########
@@ -317,17 +328,73 @@ impl From<&str> for Name {
     }
 }
 
-impl Hash for Name {
-    fn hash<H: Hasher>(&self, state: &mut H) {
-        self.fullname(None).hash(state);
+pub struct ResolvedSchema<'s> {
+    names: HashMap<Name, &'s Schema>,
+    schema: &'s Schema,

Review comment:
       Is it correct to call/name this `root_schema` ?

##########
File path: lang/rust/avro/src/schema.rs
##########
@@ -271,44 +275,51 @@ impl Name {
             _ => None,
         };
 
-        let aliases: Option<Vec<String>> = complex
-            .get("aliases")
-            .and_then(|aliases| aliases.as_array())
-            .and_then(|aliases| {
-                aliases
-                    .iter()
-                    .map(|alias| alias.as_str())
-                    .map(|alias| alias.map(|a| a.to_string()))
-                    .collect::<Option<_>>()
-            });
-
         Ok(Name {
             name: type_name.unwrap_or(name),
             namespace: namespace_from_name.or_else(|| 
complex.string("namespace")),
-            aliases,
         })
     }
 
     /// Return the `fullname` of this `Name`
     ///
     /// More information about fullnames can be found in the
     /// [Avro 
specification](https://avro.apache.org/docs/current/spec.html#names)
-    pub fn fullname(&self, default_namespace: Option<&str>) -> String {
+    pub fn fullname(&self, default_namespace: Namespace) -> String {
         if self.name.contains('.') {

Review comment:
       I believe this is no more possible and the `if` branch can be safely 
removed.

##########
File path: lang/rust/avro/src/schema.rs
##########
@@ -317,17 +328,73 @@ impl From<&str> for Name {
     }
 }
 
-impl Hash for Name {
-    fn hash<H: Hasher>(&self, state: &mut H) {
-        self.fullname(None).hash(state);
+pub struct ResolvedSchema<'s> {

Review comment:
       `pub(crate)` ?!

##########
File path: lang/rust/avro/src/schema.rs
##########
@@ -225,30 +228,31 @@ impl<'a> From<&'a types::Value> for SchemaKind {
 ///
 /// More information about schema names can be found in the
 /// [Avro specification](https://avro.apache.org/docs/current/spec.html#names)
-#[derive(Clone, Debug, Deserialize)]
+#[derive(Clone, Debug, Deserialize, Hash, PartialEq, Eq)]
 pub struct Name {
     pub name: String,
-    pub namespace: Option<String>,
-    pub aliases: Option<Vec<String>>,
+    pub namespace: Namespace,
 }
 
 /// Represents documentation for complex Avro schemas.
 pub type Documentation = Option<String>;
+/// Represents the aliases for Named Schema
+pub type Aliases = Option<Vec<String>>;
+/// Represents Schema lookup within a schema env
+pub type Names = HashMap<Name, Schema>;

Review comment:
       `pub(crate)` ?
   Does this need to be visible outside of the crate ?

##########
File path: lang/rust/avro/src/schema.rs
##########
@@ -640,20 +720,23 @@ impl Parser {
         }
 
         let name = Name::new(name)?;
+        let fully_qualified_name = 
name.fully_qualified_name(enclosing_namespace);
 
-        if let Some(parsed) = self.parsed_schemas.get(&name) {
-            return Ok(get_schema_ref(parsed));
+        if self.parsed_schemas.get(&fully_qualified_name).is_some() {
+            return Ok(Schema::Ref { name });
         }
-        if let Some(resolving_schema) = self.resolving_schemas.get(&name) {
+        if let Some(resolving_schema) = 
self.resolving_schemas.get(&fully_qualified_name) {
             return Ok(resolving_schema.clone());
         }
 
         let value = self
             .input_schemas
-            .remove(&name)
+            .remove(&fully_qualified_name)
+            // TODO make a better descriptive error message here that conveys 
that a names schema cannot be found

Review comment:
       ```suggestion
               // TODO make a better descriptive error message here that 
conveys that a named schema cannot be found
   ```

##########
File path: lang/rust/avro/src/encode.rs
##########
@@ -50,24 +51,25 @@ fn encode_int(i: i32, buffer: &mut Vec<u8>) {
 /// **NOTE** This will not perform schema validation. The value is assumed to
 /// be valid with regards to the schema. Schema are needed only to guide the
 /// encoding for complex type values.
-pub fn encode_ref(value: &Value, schema: &Schema, buffer: &mut Vec<u8>) {
+pub fn encode_ref(

Review comment:
       Can we remove `pub` now ?
   The wrong usage in `decode.rs` is fixed now

##########
File path: lang/rust/avro/src/schema.rs
##########
@@ -317,17 +328,73 @@ impl From<&str> for Name {
     }
 }
 
-impl Hash for Name {
-    fn hash<H: Hasher>(&self, state: &mut H) {
-        self.fullname(None).hash(state);
+pub struct ResolvedSchema<'s> {
+    names: HashMap<Name, &'s Schema>,
+    schema: &'s Schema,
+}
+
+impl<'s> From<&'s Schema> for ResolvedSchema<'s> {
+    fn from(schema: &'s Schema) -> Self {
+        let names = HashMap::new();
+        let mut rs = ResolvedSchema { names, schema };
+        Self::from_internal(rs.schema, &mut rs.names, &None);
+        rs
     }
 }
 
-impl Eq for Name {}
+impl<'s> ResolvedSchema<'s> {
+    pub fn get_names(&self) -> &HashMap<Name, &'s Schema> {
+        &self.names
+    }
 
-impl PartialEq for Name {
-    fn eq(&self, other: &Name) -> bool {
-        self.fullname(None).eq(&other.fullname(None))
+    pub fn get_schema(&self) -> &Schema {
+        self.schema
+    }
+
+    fn from_internal(
+        schema: &'s Schema,
+        idx: &mut HashMap<Name, &'s Schema>,
+        enclosing_namespace: &Namespace,
+    ) {
+        match schema {
+            Schema::Array(schema) | Schema::Map(schema) => {
+                Self::from_internal(schema, idx, enclosing_namespace)
+            }
+            Schema::Union(UnionSchema { schemas, .. }) => {
+                for schema in schemas {
+                    Self::from_internal(schema, idx, enclosing_namespace)
+                }
+            }
+            Schema::Enum { name, .. } | Schema::Fixed { name, .. } => {
+                let fully_qualified_name = 
name.fully_qualified_name(enclosing_namespace);
+                if idx.insert(fully_qualified_name.clone(), schema).is_some() {
+                    panic!(
+                        "Invalid Schema: Two schemas found with identical 
fullname: {}.",
+                        fully_qualified_name.fullname(None)
+                    );
+                }
+            }
+            Schema::Record { name, fields, .. } => {
+                let fully_qualified_name = 
name.fully_qualified_name(enclosing_namespace);
+                if idx.insert(fully_qualified_name.clone(), schema).is_some() {
+                    panic!(
+                        "Invalid Schema: Two schemas found with identical 
fullname: {}.",
+                        fully_qualified_name.fullname(None)
+                    );
+                }
+                let enclosing_namespace = 
name.fully_qualified_name(enclosing_namespace).namespace;
+                for field in fields {
+                    Self::from_internal(&field.schema, idx, 
&enclosing_namespace)
+                }
+            }
+            Schema::Ref { name } => {
+                let fully_qualified_name = 
name.fully_qualified_name(enclosing_namespace);
+                if !idx.contains_key(&fully_qualified_name) {
+                    panic!("Invalid schema: Unable to find schema for 
reference {}. Make sure to use inherited namespace as needed.", 
fully_qualified_name.fullname(None))

Review comment:
       interesting that the code style check does not complain on this long line

##########
File path: lang/rust/avro/src/encode.rs
##########
@@ -50,24 +51,25 @@ fn encode_int(i: i32, buffer: &mut Vec<u8>) {
 /// **NOTE** This will not perform schema validation. The value is assumed to
 /// be valid with regards to the schema. Schema are needed only to guide the
 /// encoding for complex type values.
-pub fn encode_ref(value: &Value, schema: &Schema, buffer: &mut Vec<u8>) {
+pub fn encode_ref(
+    value: &Value,
+    schema: &Schema,
+    names: &HashMap<Name, &Schema>,
+    enclosing_namespace: &Namespace,
+    buffer: &mut Vec<u8>,
+) {
     fn encode_ref0(
         value: &Value,
         schema: &Schema,
+        names: &HashMap<Name, &Schema>,
         buffer: &mut Vec<u8>,
-        schemas_by_name: &mut HashMap<Name, Schema>,
+        enclosing_namespace: &Namespace,
     ) {
-        match &schema {
-            Schema::Ref { ref name } => {
-                let resolved = schemas_by_name.get(name).unwrap();
-                return encode_ref0(value, resolved, buffer, &mut 
schemas_by_name.clone());
-            }
-            Schema::Record { ref name, .. }
-            | Schema::Enum { ref name, .. }
-            | Schema::Fixed { ref name, .. } => {
-                schemas_by_name.insert(name.clone(), schema.clone());
-            }
-            _ => (),
+        if let Schema::Ref { ref name } = schema {
+            let resolved = *names
+                .get(&name.fully_qualified_name(enclosing_namespace))
+                .unwrap();

Review comment:
       let's improve the error reporting here. It is not guaranteed that the 
FQN is in Names. `unwrap()` will just panic without details what is being 
looked up.

##########
File path: lang/rust/avro/src/schema.rs
##########
@@ -317,17 +328,73 @@ impl From<&str> for Name {
     }
 }
 
-impl Hash for Name {
-    fn hash<H: Hasher>(&self, state: &mut H) {
-        self.fullname(None).hash(state);
+pub struct ResolvedSchema<'s> {
+    names: HashMap<Name, &'s Schema>,
+    schema: &'s Schema,
+}
+
+impl<'s> From<&'s Schema> for ResolvedSchema<'s> {
+    fn from(schema: &'s Schema) -> Self {
+        let names = HashMap::new();
+        let mut rs = ResolvedSchema { names, schema };
+        Self::from_internal(rs.schema, &mut rs.names, &None);
+        rs
     }
 }
 
-impl Eq for Name {}
+impl<'s> ResolvedSchema<'s> {
+    pub fn get_names(&self) -> &HashMap<Name, &'s Schema> {
+        &self.names
+    }
 
-impl PartialEq for Name {
-    fn eq(&self, other: &Name) -> bool {
-        self.fullname(None).eq(&other.fullname(None))
+    pub fn get_schema(&self) -> &Schema {
+        self.schema
+    }
+
+    fn from_internal(
+        schema: &'s Schema,
+        idx: &mut HashMap<Name, &'s Schema>,
+        enclosing_namespace: &Namespace,
+    ) {
+        match schema {
+            Schema::Array(schema) | Schema::Map(schema) => {
+                Self::from_internal(schema, idx, enclosing_namespace)
+            }
+            Schema::Union(UnionSchema { schemas, .. }) => {
+                for schema in schemas {
+                    Self::from_internal(schema, idx, enclosing_namespace)
+                }
+            }
+            Schema::Enum { name, .. } | Schema::Fixed { name, .. } => {
+                let fully_qualified_name = 
name.fully_qualified_name(enclosing_namespace);
+                if idx.insert(fully_qualified_name.clone(), schema).is_some() {
+                    panic!(
+                        "Invalid Schema: Two schemas found with identical 
fullname: {}.",
+                        fully_qualified_name.fullname(None)
+                    );
+                }
+            }
+            Schema::Record { name, fields, .. } => {
+                let fully_qualified_name = 
name.fully_qualified_name(enclosing_namespace);
+                if idx.insert(fully_qualified_name.clone(), schema).is_some() {
+                    panic!(
+                        "Invalid Schema: Two schemas found with identical 
fullname: {}.",
+                        fully_qualified_name.fullname(None)
+                    );
+                }
+                let enclosing_namespace = 
name.fully_qualified_name(enclosing_namespace).namespace;

Review comment:
       ```suggestion
                   let enclosing_namespace = 
fully_qualified_name(enclosing_namespace).namespace;
   ```

##########
File path: lang/rust/avro/src/schema.rs
##########
@@ -640,20 +720,23 @@ impl Parser {
         }
 
         let name = Name::new(name)?;
+        let fully_qualified_name = 
name.fully_qualified_name(enclosing_namespace);
 
-        if let Some(parsed) = self.parsed_schemas.get(&name) {
-            return Ok(get_schema_ref(parsed));
+        if self.parsed_schemas.get(&fully_qualified_name).is_some() {
+            return Ok(Schema::Ref { name });
         }
-        if let Some(resolving_schema) = self.resolving_schemas.get(&name) {
+        if let Some(resolving_schema) = 
self.resolving_schemas.get(&fully_qualified_name) {
             return Ok(resolving_schema.clone());
         }
 
         let value = self
             .input_schemas
-            .remove(&name)
+            .remove(&fully_qualified_name)
+            // TODO make a better descriptive error message here that conveys 
that a names schema cannot be found
             .ok_or_else(|| Error::ParsePrimitive(name.fullname(None)))?;

Review comment:
       ```suggestion
               .ok_or_else(|| Error::ParsePrimitive(fully_qualified_name))?;
   ```

##########
File path: lang/rust/avro/src/encode.rs
##########
@@ -496,4 +495,268 @@ mod tests {
         encode(&outer_value2, &schema, &mut buf);
         assert!(!buf.is_empty());
     }
+
+    #[test]
+    fn test_avro_3448_proper_multi_level_encoding_outer_namespace() {
+        let schema = r#"
+        {
+          "name": "record_name",
+          "namespace": "space",
+          "type": "record",
+          "fields": [
+            {
+              "name": "outer_field_1",
+              "type": [
+                        "null",
+                        {
+                            "type": "record",
+                            "name": "middle_record_name",
+                            "fields":[
+                                {
+                                    "name":"middle_field_1",
+                                    "type":[
+                                        "null",
+                                        {
+                                            "type":"record",
+                                            "name":"inner_record_name",
+                                            "fields":[
+                                                {
+                                                    "name":"inner_field_1",
+                                                    "type":"double"
+                                                }
+                                            ]
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+            },
+            {
+                "name": "outer_field_2",
+                "type" : "space.inner_record_name"
+            }
+          ]
+        }
+        "#;
+        let schema = Schema::parse_str(schema).unwrap();
+        let inner_record = Value::Record(vec![("inner_field_1".into(), 
Value::Double(5.4))]);
+        let middle_record_variation_1 = Value::Record(vec![(
+            "middle_field_1".into(),
+            Value::Union(0, Box::new(Value::Null)),
+        )]);
+        let middle_record_variation_2 = Value::Record(vec![(
+            "middle_field_1".into(),
+            Value::Union(1, Box::new(inner_record.clone())),
+        )]);
+        let outer_record_variation_1 = Value::Record(vec![
+            (
+                "outer_field_1".into(),
+                Value::Union(0, Box::new(Value::Null)),
+            ),
+            ("outer_field_2".into(), inner_record.clone()),
+        ]);
+        let outer_record_variation_2 = Value::Record(vec![
+            (
+                "outer_field_1".into(),
+                Value::Union(1, Box::new(middle_record_variation_1)),
+            ),
+            ("outer_field_2".into(), inner_record.clone()),
+        ]);
+        let outer_record_variation_3 = Value::Record(vec![
+            (
+                "outer_field_1".into(),
+                Value::Union(1, Box::new(middle_record_variation_2)),
+            ),
+            ("outer_field_2".into(), inner_record),
+        ]);
+
+        // TODO add in error result wrapping to encoding flow

Review comment:
       This comment is not very clear to me. What do you mean ?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to