This is an automated email from the ASF dual-hosted git repository.

mgrigorov pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/main by this push:
     new 728b807c4 AVRO-4004: [Rust] Ignore logicalType fields when creating 
the canonical form (#2976)
728b807c4 is described below

commit 728b807c43c84f245d8ba6d621b2082b37b65671
Author: Martin Grigorov <[email protected]>
AuthorDate: Fri Jul 12 17:39:34 2024 +0300

    AVRO-4004: [Rust] Ignore logicalType fields when creating the canonical 
form (#2976)
    
    * AVRO-4004: [Rust] Ignore logicalType fields when creating the canonical 
form
    
    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
    
    * AVRO-4004: [Rust] Ignore the namespace for non-named schemas
    
    When creating the canonical parsing form of a Schema ignore the
    namespace for any non-named Schemas, i.e. anything but Record, Enum,
    Fixed and Ref
    
    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
    
    * AVRO-4004 Remove the test for round trip after canonical form
    
    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
    
    ---------
    
    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
---
 lang/rust/avro/src/schema.rs           |  50 ++-
 lang/rust/avro/tests/schema.rs         | 651 +--------------------------------
 lang/rust/avro_test_helper/src/data.rs | 636 ++++++++++++++++++++++++++++++++
 lang/rust/avro_test_helper/src/lib.rs  |   1 +
 4 files changed, 686 insertions(+), 652 deletions(-)

diff --git a/lang/rust/avro/src/schema.rs b/lang/rust/avro/src/schema.rs
index f58892ca0..1d2272a78 100644
--- a/lang/rust/avro/src/schema.rs
+++ b/lang/rust/avro/src/schema.rs
@@ -2156,6 +2156,7 @@ fn parsing_canonical_form(schema: &Value) -> String {
 fn pcf_map(schema: &Map<String, Value>) -> String {
     // Look for the namespace variant up front.
     let ns = schema.get("namespace").and_then(|v| v.as_str());
+    let typ = schema.get("type").and_then(|v| v.as_str());
     let mut fields = Vec::new();
     for (k, v) in schema {
         // Reduce primitive types to their simple form. ([PRIMITIVE] rule)
@@ -2167,7 +2168,12 @@ fn pcf_map(schema: &Map<String, Value>) -> String {
         }
 
         // Strip out unused fields ([STRIP] rule)
-        if field_ordering_position(k).is_none() || k == "default" || k == 
"doc" || k == "aliases" {
+        if field_ordering_position(k).is_none()
+            || k == "default"
+            || k == "doc"
+            || k == "aliases"
+            || k == "logicalType"
+        {
             continue;
         }
 
@@ -2176,7 +2182,9 @@ fn pcf_map(schema: &Map<String, Value>) -> String {
             // Invariant: Only valid schemas. Must be a string.
             let name = v.as_str().unwrap();
             let n = match ns {
-                Some(namespace) if !name.contains('.') => 
Cow::Owned(format!("{namespace}.{name}")),
+                Some(namespace) if is_named_type(typ) && !name.contains('.') 
=> {
+                    Cow::Owned(format!("{namespace}.{name}"))
+                }
                 _ => Cow::Borrowed(name),
             };
 
@@ -2211,6 +2219,13 @@ fn pcf_map(schema: &Map<String, Value>) -> String {
     format!("{{{inter}}}")
 }
 
+fn is_named_type(typ: Option<&str>) -> bool {
+    matches!(
+        typ,
+        Some("record") | Some("enum") | Some("fixed") | Some("ref")
+    )
+}
+
 fn pcf_array(arr: &[Value]) -> String {
     let inter = arr
         .iter()
@@ -2443,6 +2458,7 @@ pub mod derive {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::rabin::Rabin;
     use apache_avro_test_helper::{
         logger::{assert_logged, assert_not_logged},
         TestResult,
@@ -3415,16 +3431,16 @@ mod tests {
 
         let schema = Schema::parse_str(raw_schema)?;
         assert_eq!(
-            "abf662f831715ff78f88545a05a9262af75d6406b54e1a8a174ff1d2b75affc4",
+            "7eb3b28d73dfc99bdd9af1848298b40804a2f8ad5d2642be2ecc2ad34842b987",
             format!("{}", schema.fingerprint::<Sha256>())
         );
 
         assert_eq!(
-            "6e21c350f71b1a34e9efe90970f1bc69",
+            "cb11615e412ee5d872620d8df78ff6ae",
             format!("{}", schema.fingerprint::<Md5>())
         );
         assert_eq!(
-            "28cf0a67d9937bb3",
+            "92f2ccef718c6754",
             format!("{}", schema.fingerprint::<Rabin>())
         );
 
@@ -6764,4 +6780,28 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn avro_4004_canonical_form_strip_logical_types() -> TestResult {
+        let schema_str = r#"
+      {
+        "type": "record",
+        "name": "test",
+        "fields": [
+            {"name": "a", "type": "long", "default": 42, "doc": "The field a"},
+            {"name": "b", "type": "string", "namespace": "test.a"},
+            {"name": "c", "type": "long", "logicalType": "timestamp-micros"}
+        ]
+    }"#;
+
+        let schema = Schema::parse_str(schema_str)?;
+        let canonical_form = schema.canonical_form();
+        let fp_rabin = schema.fingerprint::<Rabin>();
+        assert_eq!(
+            
r#"{"name":"test","type":"record","fields":[{"name":"a","type":"long"},{"name":"b","type":"string"},{"name":"c","type":{"type":"long"}}]}"#,
+            canonical_form
+        );
+        assert_eq!("92f2ccef718c6754", fp_rabin.to_string());
+        Ok(())
+    }
 }
diff --git a/lang/rust/avro/tests/schema.rs b/lang/rust/avro/tests/schema.rs
index 7851d957d..13cf6af26 100644
--- a/lang/rust/avro/tests/schema.rs
+++ b/lang/rust/avro/tests/schema.rs
@@ -18,7 +18,6 @@
 use std::{
     collections::HashMap,
     io::{Cursor, Read},
-    sync::OnceLock,
 };
 
 use apache_avro::{
@@ -28,638 +27,10 @@ use apache_avro::{
     types::{Record, Value},
     Codec, Error, Reader, Schema, Writer,
 };
-use apache_avro_test_helper::{init, TestResult};
-
-const PRIMITIVE_EXAMPLES: &[(&str, bool)] = &[
-    (r#""null""#, true),
-    (r#"{"type": "null"}"#, true),
-    (r#""boolean""#, true),
-    (r#"{"type": "boolean"}"#, true),
-    (r#""string""#, true),
-    (r#"{"type": "string"}"#, true),
-    (r#""bytes""#, true),
-    (r#"{"type": "bytes"}"#, true),
-    (r#""int""#, true),
-    (r#"{"type": "int"}"#, true),
-    (r#""long""#, true),
-    (r#"{"type": "long"}"#, true),
-    (r#""float""#, true),
-    (r#"{"type": "float"}"#, true),
-    (r#""double""#, true),
-    (r#"{"type": "double"}"#, true),
-    (r#""true""#, false),
-    (r#"true"#, false),
-    (r#"{"no_type": "test"}"#, false),
-    (r#"{"type": "panther"}"#, false),
-];
-
-const FIXED_EXAMPLES: &[(&str, bool)] = &[
-    (r#"{"type": "fixed", "name": "Test", "size": 1}"#, true),
-    (
-        r#"{
-                "type": "fixed",
-                "name": "MyFixed",
-                "namespace": "org.apache.hadoop.avro",
-                "size": 1
-            }"#,
-        true,
-    ),
-    (r#"{"type": "fixed", "name": "MissingSize"}"#, false),
-    (r#"{"type": "fixed", "size": 314}"#, false),
-];
-
-const ENUM_EXAMPLES: &[(&str, bool)] = &[
-    (
-        r#"{"type": "enum", "name": "Test", "symbols": ["A", "B"]}"#,
-        true,
-    ),
-    (
-        r#"{
-                "type": "enum",
-                "name": "Status",
-                "symbols": "Normal Caution Critical"
-            }"#,
-        false,
-    ),
-    (
-        r#"{
-                "type": "enum",
-                "name": [ 0, 1, 1, 2, 3, 5, 8 ],
-                "symbols": ["Golden", "Mean"]
-            }"#,
-        false,
-    ),
-    (
-        r#"{
-                "type": "enum",
-                "symbols" : ["I", "will", "fail", "no", "name"]
-            }"#,
-        false,
-    ),
-    (
-        r#"{
-                "type": "enum",
-                 "name": "Test"
-                 "symbols" : ["AA", "AA"]
-            }"#,
-        false,
-    ),
-];
-
-const ARRAY_EXAMPLES: &[(&str, bool)] = &[
-    (r#"{"type": "array", "items": "long"}"#, true),
-    (
-        r#"{
-                "type": "array",
-                 "items": {"type": "enum", "name": "Test", "symbols": ["A", 
"B"]}
-            }"#,
-        true,
-    ),
-];
-
-const MAP_EXAMPLES: &[(&str, bool)] = &[
-    (r#"{"type": "map", "values": "long"}"#, true),
-    (
-        r#"{
-                "type": "map",
-                "values": {"type": "enum", "name": "Test", "symbols": ["A", 
"B"]}
-            }"#,
-        true,
-    ),
-];
-
-const UNION_EXAMPLES: &[(&str, bool)] = &[
-    (r#"["string", "null", "long"]"#, true),
-    (r#"["null", "null"]"#, false),
-    (r#"["long", "long"]"#, false),
-    (
-        r#"[
-                {"type": "array", "items": "long"}
-                {"type": "array", "items": "string"}
-            ]"#,
-        false,
-    ),
-    // Unions with default values
-    (
-        r#"{"name": "foo", "type": ["string", "long"], "default": "bar"}"#,
-        true,
-    ),
-    (
-        r#"{"name": "foo", "type": ["long", "string"], "default": 1}"#,
-        true,
-    ),
-    (
-        r#"{"name": "foo", "type": ["null", "string"], "default": null}"#,
-        true,
-    ),
-    (
-        r#"{"name": "foo", "type": ["string", "long"], "default": 1}"#,
-        true,
-    ),
-    (
-        r#"{"name": "foo", "type": ["string", "null"], "default": null}"#,
-        true,
-    ),
-    (
-        r#"{"name": "foo", "type": ["null", "string"], "default": "null"}"#,
-        true,
-    ),
-    (
-        r#"{"name": "foo", "type": ["long", "string"], "default": "str"}"#,
-        true,
-    ),
-];
-
-const RECORD_EXAMPLES: &[(&str, bool)] = &[
-    (
-        r#"{
-                "type": "record",
-                "name": "Test",
-                "fields": [{"name": "f", "type": "long"}]
-            }"#,
-        true,
-    ),
-    (
-        r#"{
-            "type": "error",
-            "name": "Test",
-            "fields": [{"name": "f", "type": "long"}]
-        }"#,
-        false,
-    ),
-    (
-        r#"{
-            "type": "record",
-            "name": "Node",
-            "fields": [
-                {"name": "label", "type": "string"},
-                {"name": "children", "type": {"type": "array", "items": 
"Node"}}
-            ]
-        }"#,
-        true,
-    ),
-    (
-        r#"{
-            "type": "record",
-            "name": "Lisp",
-            "fields": [
-                {
-                    "name": "value",
-                    "type": [
-                        "null", "string",
-                        {
-                            "type": "record",
-                            "name": "Cons",
-                            "fields": [
-                                {"name": "car", "type": "Lisp"},
-                                {"name": "cdr", "type": "Lisp"}
-                            ]
-                        }
-                    ]
-                }
-            ]
-        }"#,
-        true,
-    ),
-    (
-        r#"{
-            "type": "record",
-            "name": "HandshakeRequest",
-            "namespace": "org.apache.avro.ipc",
-            "fields": [
-                {"name": "clientHash", "type": {"type": "fixed", "name": 
"MD5", "size": 16}},
-                {"name": "clientProtocol", "type": ["null", "string"]},
-                {"name": "serverHash", "type": "MD5"},
-                {"name": "meta", "type": ["null", {"type": "map", "values": 
"bytes"}]}
-            ]
-        }"#,
-        true,
-    ),
-    (
-        r#"{
-                "type":"record",
-                "name":"HandshakeResponse",
-                "namespace":"org.apache.avro.ipc",
-                "fields":[
-                    {
-                        "name":"match",
-                        "type":{
-                           "type":"enum",
-                           "name":"HandshakeMatch",
-                           "symbols":["BOTH", "CLIENT", "NONE"]
-                        }
-                    },
-                    {"name":"serverProtocol", "type":["null", "string"]},
-                    {
-                        "name":"serverHash",
-                        "type":["null", {"name":"MD5", "size":16, 
"type":"fixed"}]
-                    },
-                    {
-                        "name":"meta",
-                        "type":["null", {"type":"map", "values":"bytes"}]
-                    }
-                ]
-            }"#,
-        true,
-    ),
-    (
-        r#"{
-                "type":"record",
-                "name":"HandshakeResponse",
-                "namespace":"org.apache.avro.ipc",
-                "fields":[
-                    {
-                        "name":"match",
-                        "type":{
-                            "type":"enum",
-                            "name":"HandshakeMatch",
-                            "symbols":["BOTH", "CLIENT", "NONE"]
-                        }
-                    },
-                    {"name":"serverProtocol", "type":["null", "string"]},
-                    {
-                        "name":"serverHash",
-                        "type":["null", { "name":"MD5", "size":16, 
"type":"fixed"}]
-                    },
-                    {"name":"meta", "type":["null", { "type":"map", 
"values":"bytes"}]}
-                ]
-            }"#,
-        true,
-    ),
-    // Unions may not contain more than one schema with the same type, except 
for the named
-    // types record, fixed and enum. For example, unions containing two array 
types or two map
-    // types are not permitted, but two types with different names are 
permitted.
-    // (Names permit efficient resolution when reading and writing unions.)
-    (
-        r#"{
-            "type": "record",
-            "name": "ipAddr",
-            "fields": [
-                {
-                    "name": "addr",
-                    "type": [
-                        {"name": "IPv6", "type": "fixed", "size": 16},
-                        {"name": "IPv4", "type": "fixed", "size": 4}
-                    ]
-                }
-            ]
-        }"#,
-        true,
-    ),
-    (
-        r#"{
-                "type": "record",
-                "name": "Address",
-                "fields": [
-                    {"type": "string"},
-                    {"type": "string", "name": "City"}
-                ]
-            }"#,
-        false,
-    ),
-    (
-        r#"{
-                "type": "record",
-                "name": "Event",
-                "fields": [{"name": "Sponsor"}, {"name": "City", "type": 
"string"}]
-            }"#,
-        false,
-    ),
-    (
-        r#"{
-                "type": "record",
-                "fields": "His vision, from the constantly passing bars,"
-                "name",
-                "Rainer"
-            }"#,
-        false,
-    ),
-    (
-        r#"{
-                "name": ["Tom", "Jerry"],
-                "type": "record",
-                "fields": [{"name": "name", "type": "string"}]
-            }"#,
-        false,
-    ),
-];
-
-const DOC_EXAMPLES: &[(&str, bool)] = &[
-    (
-        r#"{
-                "type": "record",
-                "name": "TestDoc",
-                "doc":  "Doc string",
-                "fields": [{"name": "name", "type": "string", "doc" : "Doc 
String"}]
-            }"#,
-        true,
-    ),
-    (
-        r#"{"type": "enum", "name": "Test", "symbols": ["A", "B"], "doc": "Doc 
String"}"#,
-        true,
-    ),
-    (
-        r#"{"type": "fixed", "name": "Test", "size": 1, "doc": "Fixed Doc 
String"}"#,
-        true,
-    ),
-];
-
-const OTHER_ATTRIBUTES_EXAMPLES: &[(&str, bool)] = &[
-    (
-        r#"{
-                "type": "record",
-                "name": "TestRecord",
-                "cp_string": "string",
-                "cp_int": 1,
-                "cp_array": [ 1, 2, 3, 4],
-                "fields": [
-                    {"name": "f1", "type": "string", "cp_object": 
{"a":1,"b":2}},
-                    {"name": "f2", "type": "long", "cp_null": null}
-                ]
-            }"#,
-        true,
-    ),
-    (
-        r#"{"type": "map", "values": "long", "cp_boolean": true}"#,
-        true,
-    ),
-    (
-        r#"{
-                "type": "enum",
-                 "name": "TestEnum",
-                 "symbols": [ "one", "two", "three" ],
-                 "cp_float" : 1.0
-            }"#,
-        true,
-    ),
-    (r#"{"type": "long", "date": "true"}"#, true),
-];
-
-const DECIMAL_LOGICAL_TYPE: &[(&str, bool)] = &[
-    (
-        r#"{
-            "type": {
-                "type": "fixed",
-                "name": "TestDecimal",
-                "size": 10
-            },
-            "logicalType": "decimal",
-            "precision": 4,
-            "scale": 2
-        }"#,
-        true,
-    ),
-    (
-        r#"{
-            "type": {
-                "type": "fixed",
-                "name": "ScaleIsImplicitlyZero",
-                "size": 10
-            },
-            "logicalType": "decimal",
-            "precision": 4
-        }"#,
-        true,
-    ),
-    (
-        r#"{
-            "type": {
-                "type": "fixed",
-                "name": "PrecisionMustBeGreaterThanZero",
-                "size": 10
-            },
-            "logicalType": "decimal",
-            "precision": 0
-        }"#,
-        true,
-    ),
-    (
-        r#"{
-             "type": "bytes",
-             "logicalType": "decimal",
-             "precision": 4,
-             "scale": 2
-         }"#,
-        true,
-    ),
-    (
-        r#"{
-             "type": "bytes",
-             "logicalType": "decimal",
-             "precision": 2,
-             "scale": -2
-         }"#,
-        true,
-    ),
-    (
-        r#"{
-             "type": "bytes",
-             "logicalType": "decimal",
-             "precision": -2,
-             "scale": 2
-         }"#,
-        true,
-    ),
-    (
-        r#"{
-             "type": "bytes",
-             "logicalType": "decimal",
-             "precision": 2,
-             "scale": 3
-         }"#,
-        true,
-    ),
-    (
-        r#"{
-             "type": "fixed",
-             "logicalType": "decimal",
-             "name": "TestDecimal",
-             "precision": -10,
-             "scale": 2,
-             "size": 5
-         }"#,
-        true,
-    ),
-    (
-        r#"{
-             "type": "fixed",
-             "logicalType": "decimal",
-             "name": "TestDecimal",
-             "precision": 2,
-             "scale": 3,
-             "size": 2
-         }"#,
-        true,
-    ),
-    (
-        r#"{
-             "type": "fixed",
-             "logicalType": "decimal",
-             "name": "TestDecimal",
-             "precision": 2,
-             "scale": 2,
-             "size": -2
-         }"#,
-        false,
-    ),
-];
-
-const DECIMAL_LOGICAL_TYPE_ATTRIBUTES: &[(&str, bool)] = &[
-    /*
-    // TODO: (#93) support logical types and attributes and uncomment
-    (
-        r#"{
-            "type": "fixed",
-            "logicalType": "decimal",
-            "name": "TestDecimal",
-            "precision": 4,
-            "scale": 2,
-            "size": 2
-        }"#,
-        true
-    ),
-    (
-        r#"{
-            "type": "bytes",
-            "logicalType": "decimal",
-            "precision": 4
-        }"#,
-        true
-    ),
-    */
-];
-
-const DATE_LOGICAL_TYPE: &[(&str, bool)] = &[
-    (r#"{"type": "int", "logicalType": "date"}"#, true),
-    // this is valid even though its logical type is "date1", because unknown 
logical types are
-    // ignored
-    (r#"{"type": "int", "logicalType": "date1"}"#, true),
-    // this is still valid because unknown logicalType should be ignored
-    (r#"{"type": "long", "logicalType": "date"}"#, true),
-];
-
-const TIMEMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[
-    (r#"{"type": "int", "logicalType": "time-millis"}"#, true),
-    // this is valid even though its logical type is "time-milis" (missing the 
second "l"),
-    // because unknown logical types are ignored
-    (r#"{"type": "int", "logicalType": "time-milis"}"#, true),
-    // this is still valid because unknown logicalType should be ignored
-    (r#"{"type": "long", "logicalType": "time-millis"}"#, true),
-];
-
-const TIMEMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[
-    (r#"{"type": "long", "logicalType": "time-micros"}"#, true),
-    // this is valid even though its logical type is "time-micro" (missing the 
last "s"), because
-    // unknown logical types are ignored
-    (r#"{"type": "long", "logicalType": "time-micro"}"#, true),
-    // this is still valid because unknown logicalType should be ignored
-    (r#"{"type": "int", "logicalType": "time-micros"}"#, true),
-];
-
-const TIMESTAMPMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[
-    (
-        r#"{"type": "long", "logicalType": "timestamp-millis"}"#,
-        true,
-    ),
-    // this is valid even though its logical type is "timestamp-milis" 
(missing the second "l"), because
-    // unknown logical types are ignored
-    (
-        r#"{"type": "long", "logicalType": "timestamp-milis"}"#,
-        true,
-    ),
-    (
-        // this is still valid because unknown logicalType should be ignored
-        r#"{"type": "int", "logicalType": "timestamp-millis"}"#,
-        true,
-    ),
-];
-
-const TIMESTAMPMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[
-    (
-        r#"{"type": "long", "logicalType": "timestamp-micros"}"#,
-        true,
-    ),
-    // this is valid even though its logical type is "timestamp-micro" 
(missing the last "s"), because
-    // unknown logical types are ignored
-    (
-        r#"{"type": "long", "logicalType": "timestamp-micro"}"#,
-        true,
-    ),
-    (
-        // this is still valid because unknown logicalType should be ignored
-        r#"{"type": "int", "logicalType": "timestamp-micros"}"#,
-        true,
-    ),
-];
-
-const LOCAL_TIMESTAMPMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[
-    (
-        r#"{"type": "long", "logicalType": "local-timestamp-millis"}"#,
-        true,
-    ),
-    // this is valid even though its logical type is "local-timestamp-milis" 
(missing the second "l"), because
-    // unknown logical types are ignored
-    (
-        r#"{"type": "long", "logicalType": "local-timestamp-milis"}"#,
-        true,
-    ),
-    (
-        // this is still valid because unknown logicalType should be ignored
-        r#"{"type": "int", "logicalType": "local-timestamp-millis"}"#,
-        true,
-    ),
-];
-
-const LOCAL_TIMESTAMPMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[
-    (
-        r#"{"type": "long", "logicalType": "local-timestamp-micros"}"#,
-        true,
-    ),
-    // this is valid even though its logical type is "local-timestamp-micro" 
(missing the last "s"), because
-    // unknown logical types are ignored
-    (
-        r#"{"type": "long", "logicalType": "local-timestamp-micro"}"#,
-        true,
-    ),
-    (
-        // this is still valid because unknown logicalType should be ignored
-        r#"{"type": "int", "logicalType": "local-timestamp-micros"}"#,
-        true,
-    ),
-];
-
-fn examples() -> &'static Vec<(&'static str, bool)> {
-    static EXAMPLES_ONCE: OnceLock<Vec<(&'static str, bool)>> = 
OnceLock::new();
-    EXAMPLES_ONCE.get_or_init(|| {
-        Vec::new()
-            .iter()
-            .copied()
-            .chain(PRIMITIVE_EXAMPLES.iter().copied())
-            .chain(FIXED_EXAMPLES.iter().copied())
-            .chain(ENUM_EXAMPLES.iter().copied())
-            .chain(ARRAY_EXAMPLES.iter().copied())
-            .chain(MAP_EXAMPLES.iter().copied())
-            .chain(UNION_EXAMPLES.iter().copied())
-            .chain(RECORD_EXAMPLES.iter().copied())
-            .chain(DOC_EXAMPLES.iter().copied())
-            .chain(OTHER_ATTRIBUTES_EXAMPLES.iter().copied())
-            .chain(DECIMAL_LOGICAL_TYPE.iter().copied())
-            .chain(DECIMAL_LOGICAL_TYPE_ATTRIBUTES.iter().copied())
-            .chain(DATE_LOGICAL_TYPE.iter().copied())
-            .chain(TIMEMILLIS_LOGICAL_TYPE.iter().copied())
-            .chain(TIMEMICROS_LOGICAL_TYPE.iter().copied())
-            .chain(TIMESTAMPMILLIS_LOGICAL_TYPE.iter().copied())
-            .chain(TIMESTAMPMICROS_LOGICAL_TYPE.iter().copied())
-            .chain(LOCAL_TIMESTAMPMILLIS_LOGICAL_TYPE.iter().copied())
-            .chain(LOCAL_TIMESTAMPMICROS_LOGICAL_TYPE.iter().copied())
-            .collect()
-    })
-}
-
-fn valid_examples() -> &'static Vec<(&'static str, bool)> {
-    static VALID_EXAMPLES_ONCE: OnceLock<Vec<(&'static str, bool)>> = 
OnceLock::new();
-    VALID_EXAMPLES_ONCE.get_or_init(|| examples().iter().copied().filter(|s| 
s.1).collect())
-}
+use apache_avro_test_helper::{
+    data::{examples, valid_examples, DOC_EXAMPLES},
+    init, TestResult,
+};
 
 #[test]
 fn test_correct_recursive_extraction() -> TestResult {
@@ -798,20 +169,6 @@ fn test_valid_cast_to_string_after_parse() -> TestResult {
     Ok(())
 }
 
-#[test]
-/// 1. Given a string, parse it to get Avro schema "original".
-/// 2. Serialize "original" to a string and parse that string to generate Avro 
schema "round trip".
-/// 3. Ensure "original" and "round trip" schemas are equivalent.
-fn test_equivalence_after_round_trip() -> TestResult {
-    init();
-    for (raw_schema, _) in valid_examples().iter() {
-        let original_schema = Schema::parse_str(raw_schema)?;
-        let round_trip_schema = 
Schema::parse_str(original_schema.canonical_form().as_str())?;
-        assert_eq!(original_schema, round_trip_schema);
-    }
-    Ok(())
-}
-
 #[test]
 /// Test that a list of schemas whose definitions do not depend on each other 
produces the same
 /// result as parsing each element of the list individually
diff --git a/lang/rust/avro_test_helper/src/data.rs 
b/lang/rust/avro_test_helper/src/data.rs
new file mode 100644
index 000000000..662df23d3
--- /dev/null
+++ b/lang/rust/avro_test_helper/src/data.rs
@@ -0,0 +1,636 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Provides a set of Avro schema examples that are used in the tests.
+
+use std::sync::OnceLock;
+
+pub const PRIMITIVE_EXAMPLES: &[(&str, bool)] = &[
+    (r#""null""#, true),
+    (r#"{"type": "null"}"#, true),
+    (r#""boolean""#, true),
+    (r#"{"type": "boolean"}"#, true),
+    (r#""string""#, true),
+    (r#"{"type": "string"}"#, true),
+    (r#""bytes""#, true),
+    (r#"{"type": "bytes"}"#, true),
+    (r#""int""#, true),
+    (r#"{"type": "int"}"#, true),
+    (r#""long""#, true),
+    (r#"{"type": "long"}"#, true),
+    (r#""float""#, true),
+    (r#"{"type": "float"}"#, true),
+    (r#""double""#, true),
+    (r#"{"type": "double"}"#, true),
+    (r#""true""#, false),
+    (r#"true"#, false),
+    (r#"{"no_type": "test"}"#, false),
+    (r#"{"type": "panther"}"#, false),
+];
+
+pub const FIXED_EXAMPLES: &[(&str, bool)] = &[
+    (r#"{"type": "fixed", "name": "Test", "size": 1}"#, true),
+    (
+        r#"{
+                "type": "fixed",
+                "name": "MyFixed",
+                "namespace": "org.apache.hadoop.avro",
+                "size": 1
+            }"#,
+        true,
+    ),
+    (r#"{"type": "fixed", "name": "MissingSize"}"#, false),
+    (r#"{"type": "fixed", "size": 314}"#, false),
+];
+
+pub const ENUM_EXAMPLES: &[(&str, bool)] = &[
+    (
+        r#"{"type": "enum", "name": "Test", "symbols": ["A", "B"]}"#,
+        true,
+    ),
+    (
+        r#"{
+                "type": "enum",
+                "name": "Status",
+                "symbols": "Normal Caution Critical"
+            }"#,
+        false,
+    ),
+    (
+        r#"{
+                "type": "enum",
+                "name": [ 0, 1, 1, 2, 3, 5, 8 ],
+                "symbols": ["Golden", "Mean"]
+            }"#,
+        false,
+    ),
+    (
+        r#"{
+                "type": "enum",
+                "symbols" : ["I", "will", "fail", "no", "name"]
+            }"#,
+        false,
+    ),
+    (
+        r#"{
+                "type": "enum",
+                 "name": "Test"
+                 "symbols" : ["AA", "AA"]
+            }"#,
+        false,
+    ),
+];
+
+pub const ARRAY_EXAMPLES: &[(&str, bool)] = &[
+    (r#"{"type": "array", "items": "long"}"#, true),
+    (
+        r#"{
+                "type": "array",
+                 "items": {"type": "enum", "name": "Test", "symbols": ["A", 
"B"]}
+            }"#,
+        true,
+    ),
+];
+
+pub const MAP_EXAMPLES: &[(&str, bool)] = &[
+    (r#"{"type": "map", "values": "long"}"#, true),
+    (
+        r#"{
+                "type": "map",
+                "values": {"type": "enum", "name": "Test", "symbols": ["A", 
"B"]}
+            }"#,
+        true,
+    ),
+];
+
+pub const UNION_EXAMPLES: &[(&str, bool)] = &[
+    (r#"["string", "null", "long"]"#, true),
+    (r#"["null", "null"]"#, false),
+    (r#"["long", "long"]"#, false),
+    (
+        r#"[
+                {"type": "array", "items": "long"}
+                {"type": "array", "items": "string"}
+            ]"#,
+        false,
+    ),
+    // Unions with default values
+    (
+        r#"{"name": "foo", "type": ["string", "long"], "default": "bar"}"#,
+        true,
+    ),
+    (
+        r#"{"name": "foo", "type": ["long", "string"], "default": 1}"#,
+        true,
+    ),
+    (
+        r#"{"name": "foo", "type": ["null", "string"], "default": null}"#,
+        true,
+    ),
+    (
+        r#"{"name": "foo", "type": ["string", "long"], "default": 1}"#,
+        true,
+    ),
+    (
+        r#"{"name": "foo", "type": ["string", "null"], "default": null}"#,
+        true,
+    ),
+    (
+        r#"{"name": "foo", "type": ["null", "string"], "default": "null"}"#,
+        true,
+    ),
+    (
+        r#"{"name": "foo", "type": ["long", "string"], "default": "str"}"#,
+        true,
+    ),
+];
+
+pub const RECORD_EXAMPLES: &[(&str, bool)] = &[
+    (
+        r#"{
+                "type": "record",
+                "name": "Test",
+                "fields": [{"name": "f", "type": "long"}]
+            }"#,
+        true,
+    ),
+    (
+        r#"{
+            "type": "error",
+            "name": "Test",
+            "fields": [{"name": "f", "type": "long"}]
+        }"#,
+        false,
+    ),
+    (
+        r#"{
+            "type": "record",
+            "name": "Node",
+            "fields": [
+                {"name": "label", "type": "string"},
+                {"name": "children", "type": {"type": "array", "items": 
"Node"}}
+            ]
+        }"#,
+        true,
+    ),
+    (
+        r#"{
+            "type": "record",
+            "name": "Lisp",
+            "fields": [
+                {
+                    "name": "value",
+                    "type": [
+                        "null", "string",
+                        {
+                            "type": "record",
+                            "name": "Cons",
+                            "fields": [
+                                {"name": "car", "type": "Lisp"},
+                                {"name": "cdr", "type": "Lisp"}
+                            ]
+                        }
+                    ]
+                }
+            ]
+        }"#,
+        true,
+    ),
+    (
+        r#"{
+            "type": "record",
+            "name": "HandshakeRequest",
+            "namespace": "org.apache.avro.ipc",
+            "fields": [
+                {"name": "clientHash", "type": {"type": "fixed", "name": 
"MD5", "size": 16}},
+                {"name": "clientProtocol", "type": ["null", "string"]},
+                {"name": "serverHash", "type": "MD5"},
+                {"name": "meta", "type": ["null", {"type": "map", "values": 
"bytes"}]}
+            ]
+        }"#,
+        true,
+    ),
+    (
+        r#"{
+                "type":"record",
+                "name":"HandshakeResponse",
+                "namespace":"org.apache.avro.ipc",
+                "fields":[
+                    {
+                        "name":"match",
+                        "type":{
+                           "type":"enum",
+                           "name":"HandshakeMatch",
+                           "symbols":["BOTH", "CLIENT", "NONE"]
+                        }
+                    },
+                    {"name":"serverProtocol", "type":["null", "string"]},
+                    {
+                        "name":"serverHash",
+                        "type":["null", {"name":"MD5", "size":16, 
"type":"fixed"}]
+                    },
+                    {
+                        "name":"meta",
+                        "type":["null", {"type":"map", "values":"bytes"}]
+                    }
+                ]
+            }"#,
+        true,
+    ),
+    (
+        r#"{
+                "type":"record",
+                "name":"HandshakeResponse",
+                "namespace":"org.apache.avro.ipc",
+                "fields":[
+                    {
+                        "name":"match",
+                        "type":{
+                            "type":"enum",
+                            "name":"HandshakeMatch",
+                            "symbols":["BOTH", "CLIENT", "NONE"]
+                        }
+                    },
+                    {"name":"serverProtocol", "type":["null", "string"]},
+                    {
+                        "name":"serverHash",
+                        "type":["null", { "name":"MD5", "size":16, 
"type":"fixed"}]
+                    },
+                    {"name":"meta", "type":["null", { "type":"map", 
"values":"bytes"}]}
+                ]
+            }"#,
+        true,
+    ),
+    // Unions may not contain more than one schema with the same type, except 
for the named
+    // types record, fixed and enum. For example, unions containing two array 
types or two map
+    // types are not permitted, but two types with different names are 
permitted.
+    // (Names permit efficient resolution when reading and writing unions.)
+    (
+        r#"{
+            "type": "record",
+            "name": "ipAddr",
+            "fields": [
+                {
+                    "name": "addr",
+                    "type": [
+                        {"name": "IPv6", "type": "fixed", "size": 16},
+                        {"name": "IPv4", "type": "fixed", "size": 4}
+                    ]
+                }
+            ]
+        }"#,
+        true,
+    ),
+    (
+        r#"{
+                "type": "record",
+                "name": "Address",
+                "fields": [
+                    {"type": "string"},
+                    {"type": "string", "name": "City"}
+                ]
+            }"#,
+        false,
+    ),
+    (
+        r#"{
+                "type": "record",
+                "name": "Event",
+                "fields": [{"name": "Sponsor"}, {"name": "City", "type": 
"string"}]
+            }"#,
+        false,
+    ),
+    (
+        r#"{
+                "type": "record",
+                "fields": "His vision, from the constantly passing bars,"
+                "name",
+                "Rainer"
+            }"#,
+        false,
+    ),
+    (
+        r#"{
+                "name": ["Tom", "Jerry"],
+                "type": "record",
+                "fields": [{"name": "name", "type": "string"}]
+            }"#,
+        false,
+    ),
+];
+
+pub const DOC_EXAMPLES: &[(&str, bool)] = &[
+    (
+        r#"{
+                "type": "record",
+                "name": "TestDoc",
+                "doc":  "Doc string",
+                "fields": [{"name": "name", "type": "string", "doc" : "Doc 
String"}]
+            }"#,
+        true,
+    ),
+    (
+        r#"{"type": "enum", "name": "Test", "symbols": ["A", "B"], "doc": "Doc 
String"}"#,
+        true,
+    ),
+    (
+        r#"{"type": "fixed", "name": "Test", "size": 1, "doc": "Fixed Doc 
String"}"#,
+        true,
+    ),
+];
+
+pub const OTHER_ATTRIBUTES_EXAMPLES: &[(&str, bool)] = &[
+    (
+        r#"{
+                "type": "record",
+                "name": "TestRecord",
+                "cp_string": "string",
+                "cp_int": 1,
+                "cp_array": [ 1, 2, 3, 4],
+                "fields": [
+                    {"name": "f1", "type": "string", "cp_object": 
{"a":1,"b":2}},
+                    {"name": "f2", "type": "long", "cp_null": null}
+                ]
+            }"#,
+        true,
+    ),
+    (
+        r#"{"type": "map", "values": "long", "cp_boolean": true}"#,
+        true,
+    ),
+    (
+        r#"{
+                "type": "enum",
+                 "name": "TestEnum",
+                 "symbols": [ "one", "two", "three" ],
+                 "cp_float" : 1.0
+            }"#,
+        true,
+    ),
+    (r#"{"type": "long", "date": "true"}"#, true),
+];
+
+pub const DECIMAL_LOGICAL_TYPE: &[(&str, bool)] = &[
+    (
+        r#"{
+            "type": {
+                "type": "fixed",
+                "name": "TestDecimal",
+                "size": 10
+            },
+            "logicalType": "decimal",
+            "precision": 4,
+            "scale": 2
+        }"#,
+        true,
+    ),
+    (
+        r#"{
+            "type": {
+                "type": "fixed",
+                "name": "ScaleIsImplicitlyZero",
+                "size": 10
+            },
+            "logicalType": "decimal",
+            "precision": 4
+        }"#,
+        true,
+    ),
+    (
+        r#"{
+            "type": {
+                "type": "fixed",
+                "name": "PrecisionMustBeGreaterThanZero",
+                "size": 10
+            },
+            "logicalType": "decimal",
+            "precision": 0
+        }"#,
+        true,
+    ),
+    (
+        r#"{
+             "type": "fixed",
+             "logicalType": "decimal",
+             "name": "TestDecimal",
+             "precision": 10,
+             "scale": 2,
+             "size": 18
+         }"#,
+        true,
+    ),
+    (
+        r#"{
+             "type": "bytes",
+             "logicalType": "decimal",
+             "precision": 4,
+             "scale": 2
+         }"#,
+        true,
+    ),
+    (
+        r#"{
+             "type": "bytes",
+             "logicalType": "decimal",
+             "precision": 2,
+             "scale": -2
+         }"#,
+        true,
+    ),
+    (
+        r#"{
+             "type": "bytes",
+             "logicalType": "decimal",
+             "precision": -2,
+             "scale": 2
+         }"#,
+        true,
+    ),
+    (
+        r#"{
+             "type": "bytes",
+             "logicalType": "decimal",
+             "precision": 2,
+             "scale": 3
+         }"#,
+        true,
+    ),
+    (
+        r#"{
+             "type": "fixed",
+             "logicalType": "decimal",
+             "name": "TestDecimal",
+             "precision": -10,
+             "scale": 2,
+             "size": 5
+         }"#,
+        true,
+    ),
+    (
+        r#"{
+             "type": "fixed",
+             "logicalType": "decimal",
+             "name": "TestDecimal",
+             "precision": 2,
+             "scale": 3,
+             "size": 2
+         }"#,
+        true,
+    ),
+    (
+        r#"{
+             "type": "fixed",
+             "logicalType": "decimal",
+             "name": "TestDecimal",
+             "precision": 2,
+             "scale": 2,
+             "size": -2
+         }"#,
+        false,
+    ),
+];
+
+pub const DATE_LOGICAL_TYPE: &[(&str, bool)] = &[
+    (r#"{"type": "int", "logicalType": "date"}"#, true),
+    // this is valid even though its logical type is "date1", because unknown 
logical types are
+    // ignored
+    (r#"{"type": "int", "logicalType": "date1"}"#, true),
+    // this is still valid because unknown logicalType should be ignored
+    (r#"{"type": "long", "logicalType": "date"}"#, true),
+];
+
+pub const TIMEMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[
+    (r#"{"type": "int", "logicalType": "time-millis"}"#, true),
+    // this is valid even though its logical type is "time-milis" (missing the 
second "l"),
+    // because unknown logical types are ignored
+    (r#"{"type": "int", "logicalType": "time-milis"}"#, true),
+    // this is still valid because unknown logicalType should be ignored
+    (r#"{"type": "long", "logicalType": "time-millis"}"#, true),
+];
+
+pub const TIMEMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[
+    (r#"{"type": "long", "logicalType": "time-micros"}"#, true),
+    // this is valid even though its logical type is "time-micro" (missing the 
last "s"), because
+    // unknown logical types are ignored
+    (r#"{"type": "long", "logicalType": "time-micro"}"#, true),
+    // this is still valid because unknown logicalType should be ignored
+    (r#"{"type": "int", "logicalType": "time-micros"}"#, true),
+];
+
+pub const TIMESTAMPMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[
+    (
+        r#"{"type": "long", "logicalType": "timestamp-millis"}"#,
+        true,
+    ),
+    // this is valid even though its logical type is "timestamp-milis" 
(missing the second "l"), because
+    // unknown logical types are ignored
+    (
+        r#"{"type": "long", "logicalType": "timestamp-milis"}"#,
+        true,
+    ),
+    (
+        // this is still valid because unknown logicalType should be ignored
+        r#"{"type": "int", "logicalType": "timestamp-millis"}"#,
+        true,
+    ),
+];
+
+pub const TIMESTAMPMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[
+    (
+        r#"{"type": "long", "logicalType": "timestamp-micros"}"#,
+        true,
+    ),
+    // this is valid even though its logical type is "timestamp-micro" 
(missing the last "s"), because
+    // unknown logical types are ignored
+    (
+        r#"{"type": "long", "logicalType": "timestamp-micro"}"#,
+        true,
+    ),
+    (
+        // this is still valid because unknown logicalType should be ignored
+        r#"{"type": "int", "logicalType": "timestamp-micros"}"#,
+        true,
+    ),
+];
+
+pub const LOCAL_TIMESTAMPMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[
+    (
+        r#"{"type": "long", "logicalType": "local-timestamp-millis"}"#,
+        true,
+    ),
+    // this is valid even though its logical type is "local-timestamp-milis" 
(missing the second "l"), because
+    // unknown logical types are ignored
+    (
+        r#"{"type": "long", "logicalType": "local-timestamp-milis"}"#,
+        true,
+    ),
+    (
+        // this is still valid because unknown logicalType should be ignored
+        r#"{"type": "int", "logicalType": "local-timestamp-millis"}"#,
+        true,
+    ),
+];
+
+pub const LOCAL_TIMESTAMPMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[
+    (
+        r#"{"type": "long", "logicalType": "local-timestamp-micros"}"#,
+        true,
+    ),
+    // this is valid even though its logical type is "local-timestamp-micro" 
(missing the last "s"), because
+    // unknown logical types are ignored
+    (
+        r#"{"type": "long", "logicalType": "local-timestamp-micro"}"#,
+        true,
+    ),
+    (
+        // this is still valid because unknown logicalType should be ignored
+        r#"{"type": "int", "logicalType": "local-timestamp-micros"}"#,
+        true,
+    ),
+];
+
+pub fn examples() -> &'static Vec<(&'static str, bool)> {
+    static EXAMPLES_ONCE: OnceLock<Vec<(&'static str, bool)>> = 
OnceLock::new();
+    EXAMPLES_ONCE.get_or_init(|| {
+        Vec::new()
+            .iter()
+            .copied()
+            .chain(PRIMITIVE_EXAMPLES.iter().copied())
+            .chain(FIXED_EXAMPLES.iter().copied())
+            .chain(ENUM_EXAMPLES.iter().copied())
+            .chain(ARRAY_EXAMPLES.iter().copied())
+            .chain(MAP_EXAMPLES.iter().copied())
+            .chain(UNION_EXAMPLES.iter().copied())
+            .chain(RECORD_EXAMPLES.iter().copied())
+            .chain(DOC_EXAMPLES.iter().copied())
+            .chain(OTHER_ATTRIBUTES_EXAMPLES.iter().copied())
+            .chain(DECIMAL_LOGICAL_TYPE.iter().copied())
+            .chain(DATE_LOGICAL_TYPE.iter().copied())
+            .chain(TIMEMILLIS_LOGICAL_TYPE.iter().copied())
+            .chain(TIMEMICROS_LOGICAL_TYPE.iter().copied())
+            .chain(TIMESTAMPMILLIS_LOGICAL_TYPE.iter().copied())
+            .chain(TIMESTAMPMICROS_LOGICAL_TYPE.iter().copied())
+            .chain(LOCAL_TIMESTAMPMILLIS_LOGICAL_TYPE.iter().copied())
+            .chain(LOCAL_TIMESTAMPMICROS_LOGICAL_TYPE.iter().copied())
+            .collect()
+    })
+}
+
+pub fn valid_examples() -> &'static Vec<(&'static str, bool)> {
+    static VALID_EXAMPLES_ONCE: OnceLock<Vec<(&'static str, bool)>> = 
OnceLock::new();
+    VALID_EXAMPLES_ONCE.get_or_init(|| examples().iter().copied().filter(|s| 
s.1).collect())
+}
diff --git a/lang/rust/avro_test_helper/src/lib.rs 
b/lang/rust/avro_test_helper/src/lib.rs
index e316dc818..f9fd05030 100644
--- a/lang/rust/avro_test_helper/src/lib.rs
+++ b/lang/rust/avro_test_helper/src/lib.rs
@@ -26,6 +26,7 @@ thread_local! {
     pub(crate) static LOG_MESSAGES: RefCell<Vec<String>> = const { 
RefCell::new(Vec::new()) };
 }
 
+pub mod data;
 pub mod logger;
 
 #[cfg(not(target_arch = "wasm32"))]


Reply via email to