This is an automated email from the ASF dual-hosted git repository.
mgrigorov pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/main by this push:
new 728b807c4 AVRO-4004: [Rust] Ignore logicalType fields when creating
the canonical form (#2976)
728b807c4 is described below
commit 728b807c43c84f245d8ba6d621b2082b37b65671
Author: Martin Grigorov <[email protected]>
AuthorDate: Fri Jul 12 17:39:34 2024 +0300
AVRO-4004: [Rust] Ignore logicalType fields when creating the canonical
form (#2976)
* AVRO-4004: [Rust] Ignore logicalType fields when creating the canonical
form
Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
* AVRO-4004: [Rust] Ignore the namespace for non-named schemas
When creating the canonical parsing form of a Schema ignore the
namespace for any non-named Schemas, i.e. anything but Record, Enum,
Fixed and Ref
Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
* AVRO-4004 Remove the test for round trip after canonical form
Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
---------
Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
---
lang/rust/avro/src/schema.rs | 50 ++-
lang/rust/avro/tests/schema.rs | 651 +--------------------------------
lang/rust/avro_test_helper/src/data.rs | 636 ++++++++++++++++++++++++++++++++
lang/rust/avro_test_helper/src/lib.rs | 1 +
4 files changed, 686 insertions(+), 652 deletions(-)
diff --git a/lang/rust/avro/src/schema.rs b/lang/rust/avro/src/schema.rs
index f58892ca0..1d2272a78 100644
--- a/lang/rust/avro/src/schema.rs
+++ b/lang/rust/avro/src/schema.rs
@@ -2156,6 +2156,7 @@ fn parsing_canonical_form(schema: &Value) -> String {
fn pcf_map(schema: &Map<String, Value>) -> String {
// Look for the namespace variant up front.
let ns = schema.get("namespace").and_then(|v| v.as_str());
+ let typ = schema.get("type").and_then(|v| v.as_str());
let mut fields = Vec::new();
for (k, v) in schema {
// Reduce primitive types to their simple form. ([PRIMITIVE] rule)
@@ -2167,7 +2168,12 @@ fn pcf_map(schema: &Map<String, Value>) -> String {
}
// Strip out unused fields ([STRIP] rule)
- if field_ordering_position(k).is_none() || k == "default" || k ==
"doc" || k == "aliases" {
+ if field_ordering_position(k).is_none()
+ || k == "default"
+ || k == "doc"
+ || k == "aliases"
+ || k == "logicalType"
+ {
continue;
}
@@ -2176,7 +2182,9 @@ fn pcf_map(schema: &Map<String, Value>) -> String {
// Invariant: Only valid schemas. Must be a string.
let name = v.as_str().unwrap();
let n = match ns {
- Some(namespace) if !name.contains('.') =>
Cow::Owned(format!("{namespace}.{name}")),
+ Some(namespace) if is_named_type(typ) && !name.contains('.')
=> {
+ Cow::Owned(format!("{namespace}.{name}"))
+ }
_ => Cow::Borrowed(name),
};
@@ -2211,6 +2219,13 @@ fn pcf_map(schema: &Map<String, Value>) -> String {
format!("{{{inter}}}")
}
+fn is_named_type(typ: Option<&str>) -> bool {
+ matches!(
+ typ,
+ Some("record") | Some("enum") | Some("fixed") | Some("ref")
+ )
+}
+
fn pcf_array(arr: &[Value]) -> String {
let inter = arr
.iter()
@@ -2443,6 +2458,7 @@ pub mod derive {
#[cfg(test)]
mod tests {
use super::*;
+ use crate::rabin::Rabin;
use apache_avro_test_helper::{
logger::{assert_logged, assert_not_logged},
TestResult,
@@ -3415,16 +3431,16 @@ mod tests {
let schema = Schema::parse_str(raw_schema)?;
assert_eq!(
- "abf662f831715ff78f88545a05a9262af75d6406b54e1a8a174ff1d2b75affc4",
+ "7eb3b28d73dfc99bdd9af1848298b40804a2f8ad5d2642be2ecc2ad34842b987",
format!("{}", schema.fingerprint::<Sha256>())
);
assert_eq!(
- "6e21c350f71b1a34e9efe90970f1bc69",
+ "cb11615e412ee5d872620d8df78ff6ae",
format!("{}", schema.fingerprint::<Md5>())
);
assert_eq!(
- "28cf0a67d9937bb3",
+ "92f2ccef718c6754",
format!("{}", schema.fingerprint::<Rabin>())
);
@@ -6764,4 +6780,28 @@ mod tests {
Ok(())
}
+
+ #[test]
+ fn avro_4004_canonical_form_strip_logical_types() -> TestResult {
+ let schema_str = r#"
+ {
+ "type": "record",
+ "name": "test",
+ "fields": [
+ {"name": "a", "type": "long", "default": 42, "doc": "The field a"},
+ {"name": "b", "type": "string", "namespace": "test.a"},
+ {"name": "c", "type": "long", "logicalType": "timestamp-micros"}
+ ]
+ }"#;
+
+ let schema = Schema::parse_str(schema_str)?;
+ let canonical_form = schema.canonical_form();
+ let fp_rabin = schema.fingerprint::<Rabin>();
+ assert_eq!(
+
r#"{"name":"test","type":"record","fields":[{"name":"a","type":"long"},{"name":"b","type":"string"},{"name":"c","type":{"type":"long"}}]}"#,
+ canonical_form
+ );
+ assert_eq!("92f2ccef718c6754", fp_rabin.to_string());
+ Ok(())
+ }
}
diff --git a/lang/rust/avro/tests/schema.rs b/lang/rust/avro/tests/schema.rs
index 7851d957d..13cf6af26 100644
--- a/lang/rust/avro/tests/schema.rs
+++ b/lang/rust/avro/tests/schema.rs
@@ -18,7 +18,6 @@
use std::{
collections::HashMap,
io::{Cursor, Read},
- sync::OnceLock,
};
use apache_avro::{
@@ -28,638 +27,10 @@ use apache_avro::{
types::{Record, Value},
Codec, Error, Reader, Schema, Writer,
};
-use apache_avro_test_helper::{init, TestResult};
-
-const PRIMITIVE_EXAMPLES: &[(&str, bool)] = &[
- (r#""null""#, true),
- (r#"{"type": "null"}"#, true),
- (r#""boolean""#, true),
- (r#"{"type": "boolean"}"#, true),
- (r#""string""#, true),
- (r#"{"type": "string"}"#, true),
- (r#""bytes""#, true),
- (r#"{"type": "bytes"}"#, true),
- (r#""int""#, true),
- (r#"{"type": "int"}"#, true),
- (r#""long""#, true),
- (r#"{"type": "long"}"#, true),
- (r#""float""#, true),
- (r#"{"type": "float"}"#, true),
- (r#""double""#, true),
- (r#"{"type": "double"}"#, true),
- (r#""true""#, false),
- (r#"true"#, false),
- (r#"{"no_type": "test"}"#, false),
- (r#"{"type": "panther"}"#, false),
-];
-
-const FIXED_EXAMPLES: &[(&str, bool)] = &[
- (r#"{"type": "fixed", "name": "Test", "size": 1}"#, true),
- (
- r#"{
- "type": "fixed",
- "name": "MyFixed",
- "namespace": "org.apache.hadoop.avro",
- "size": 1
- }"#,
- true,
- ),
- (r#"{"type": "fixed", "name": "MissingSize"}"#, false),
- (r#"{"type": "fixed", "size": 314}"#, false),
-];
-
-const ENUM_EXAMPLES: &[(&str, bool)] = &[
- (
- r#"{"type": "enum", "name": "Test", "symbols": ["A", "B"]}"#,
- true,
- ),
- (
- r#"{
- "type": "enum",
- "name": "Status",
- "symbols": "Normal Caution Critical"
- }"#,
- false,
- ),
- (
- r#"{
- "type": "enum",
- "name": [ 0, 1, 1, 2, 3, 5, 8 ],
- "symbols": ["Golden", "Mean"]
- }"#,
- false,
- ),
- (
- r#"{
- "type": "enum",
- "symbols" : ["I", "will", "fail", "no", "name"]
- }"#,
- false,
- ),
- (
- r#"{
- "type": "enum",
- "name": "Test"
- "symbols" : ["AA", "AA"]
- }"#,
- false,
- ),
-];
-
-const ARRAY_EXAMPLES: &[(&str, bool)] = &[
- (r#"{"type": "array", "items": "long"}"#, true),
- (
- r#"{
- "type": "array",
- "items": {"type": "enum", "name": "Test", "symbols": ["A",
"B"]}
- }"#,
- true,
- ),
-];
-
-const MAP_EXAMPLES: &[(&str, bool)] = &[
- (r#"{"type": "map", "values": "long"}"#, true),
- (
- r#"{
- "type": "map",
- "values": {"type": "enum", "name": "Test", "symbols": ["A",
"B"]}
- }"#,
- true,
- ),
-];
-
-const UNION_EXAMPLES: &[(&str, bool)] = &[
- (r#"["string", "null", "long"]"#, true),
- (r#"["null", "null"]"#, false),
- (r#"["long", "long"]"#, false),
- (
- r#"[
- {"type": "array", "items": "long"}
- {"type": "array", "items": "string"}
- ]"#,
- false,
- ),
- // Unions with default values
- (
- r#"{"name": "foo", "type": ["string", "long"], "default": "bar"}"#,
- true,
- ),
- (
- r#"{"name": "foo", "type": ["long", "string"], "default": 1}"#,
- true,
- ),
- (
- r#"{"name": "foo", "type": ["null", "string"], "default": null}"#,
- true,
- ),
- (
- r#"{"name": "foo", "type": ["string", "long"], "default": 1}"#,
- true,
- ),
- (
- r#"{"name": "foo", "type": ["string", "null"], "default": null}"#,
- true,
- ),
- (
- r#"{"name": "foo", "type": ["null", "string"], "default": "null"}"#,
- true,
- ),
- (
- r#"{"name": "foo", "type": ["long", "string"], "default": "str"}"#,
- true,
- ),
-];
-
-const RECORD_EXAMPLES: &[(&str, bool)] = &[
- (
- r#"{
- "type": "record",
- "name": "Test",
- "fields": [{"name": "f", "type": "long"}]
- }"#,
- true,
- ),
- (
- r#"{
- "type": "error",
- "name": "Test",
- "fields": [{"name": "f", "type": "long"}]
- }"#,
- false,
- ),
- (
- r#"{
- "type": "record",
- "name": "Node",
- "fields": [
- {"name": "label", "type": "string"},
- {"name": "children", "type": {"type": "array", "items":
"Node"}}
- ]
- }"#,
- true,
- ),
- (
- r#"{
- "type": "record",
- "name": "Lisp",
- "fields": [
- {
- "name": "value",
- "type": [
- "null", "string",
- {
- "type": "record",
- "name": "Cons",
- "fields": [
- {"name": "car", "type": "Lisp"},
- {"name": "cdr", "type": "Lisp"}
- ]
- }
- ]
- }
- ]
- }"#,
- true,
- ),
- (
- r#"{
- "type": "record",
- "name": "HandshakeRequest",
- "namespace": "org.apache.avro.ipc",
- "fields": [
- {"name": "clientHash", "type": {"type": "fixed", "name":
"MD5", "size": 16}},
- {"name": "clientProtocol", "type": ["null", "string"]},
- {"name": "serverHash", "type": "MD5"},
- {"name": "meta", "type": ["null", {"type": "map", "values":
"bytes"}]}
- ]
- }"#,
- true,
- ),
- (
- r#"{
- "type":"record",
- "name":"HandshakeResponse",
- "namespace":"org.apache.avro.ipc",
- "fields":[
- {
- "name":"match",
- "type":{
- "type":"enum",
- "name":"HandshakeMatch",
- "symbols":["BOTH", "CLIENT", "NONE"]
- }
- },
- {"name":"serverProtocol", "type":["null", "string"]},
- {
- "name":"serverHash",
- "type":["null", {"name":"MD5", "size":16,
"type":"fixed"}]
- },
- {
- "name":"meta",
- "type":["null", {"type":"map", "values":"bytes"}]
- }
- ]
- }"#,
- true,
- ),
- (
- r#"{
- "type":"record",
- "name":"HandshakeResponse",
- "namespace":"org.apache.avro.ipc",
- "fields":[
- {
- "name":"match",
- "type":{
- "type":"enum",
- "name":"HandshakeMatch",
- "symbols":["BOTH", "CLIENT", "NONE"]
- }
- },
- {"name":"serverProtocol", "type":["null", "string"]},
- {
- "name":"serverHash",
- "type":["null", { "name":"MD5", "size":16,
"type":"fixed"}]
- },
- {"name":"meta", "type":["null", { "type":"map",
"values":"bytes"}]}
- ]
- }"#,
- true,
- ),
- // Unions may not contain more than one schema with the same type, except
for the named
- // types record, fixed and enum. For example, unions containing two array
types or two map
- // types are not permitted, but two types with different names are
permitted.
- // (Names permit efficient resolution when reading and writing unions.)
- (
- r#"{
- "type": "record",
- "name": "ipAddr",
- "fields": [
- {
- "name": "addr",
- "type": [
- {"name": "IPv6", "type": "fixed", "size": 16},
- {"name": "IPv4", "type": "fixed", "size": 4}
- ]
- }
- ]
- }"#,
- true,
- ),
- (
- r#"{
- "type": "record",
- "name": "Address",
- "fields": [
- {"type": "string"},
- {"type": "string", "name": "City"}
- ]
- }"#,
- false,
- ),
- (
- r#"{
- "type": "record",
- "name": "Event",
- "fields": [{"name": "Sponsor"}, {"name": "City", "type":
"string"}]
- }"#,
- false,
- ),
- (
- r#"{
- "type": "record",
- "fields": "His vision, from the constantly passing bars,"
- "name",
- "Rainer"
- }"#,
- false,
- ),
- (
- r#"{
- "name": ["Tom", "Jerry"],
- "type": "record",
- "fields": [{"name": "name", "type": "string"}]
- }"#,
- false,
- ),
-];
-
-const DOC_EXAMPLES: &[(&str, bool)] = &[
- (
- r#"{
- "type": "record",
- "name": "TestDoc",
- "doc": "Doc string",
- "fields": [{"name": "name", "type": "string", "doc" : "Doc
String"}]
- }"#,
- true,
- ),
- (
- r#"{"type": "enum", "name": "Test", "symbols": ["A", "B"], "doc": "Doc
String"}"#,
- true,
- ),
- (
- r#"{"type": "fixed", "name": "Test", "size": 1, "doc": "Fixed Doc
String"}"#,
- true,
- ),
-];
-
-const OTHER_ATTRIBUTES_EXAMPLES: &[(&str, bool)] = &[
- (
- r#"{
- "type": "record",
- "name": "TestRecord",
- "cp_string": "string",
- "cp_int": 1,
- "cp_array": [ 1, 2, 3, 4],
- "fields": [
- {"name": "f1", "type": "string", "cp_object":
{"a":1,"b":2}},
- {"name": "f2", "type": "long", "cp_null": null}
- ]
- }"#,
- true,
- ),
- (
- r#"{"type": "map", "values": "long", "cp_boolean": true}"#,
- true,
- ),
- (
- r#"{
- "type": "enum",
- "name": "TestEnum",
- "symbols": [ "one", "two", "three" ],
- "cp_float" : 1.0
- }"#,
- true,
- ),
- (r#"{"type": "long", "date": "true"}"#, true),
-];
-
-const DECIMAL_LOGICAL_TYPE: &[(&str, bool)] = &[
- (
- r#"{
- "type": {
- "type": "fixed",
- "name": "TestDecimal",
- "size": 10
- },
- "logicalType": "decimal",
- "precision": 4,
- "scale": 2
- }"#,
- true,
- ),
- (
- r#"{
- "type": {
- "type": "fixed",
- "name": "ScaleIsImplicitlyZero",
- "size": 10
- },
- "logicalType": "decimal",
- "precision": 4
- }"#,
- true,
- ),
- (
- r#"{
- "type": {
- "type": "fixed",
- "name": "PrecisionMustBeGreaterThanZero",
- "size": 10
- },
- "logicalType": "decimal",
- "precision": 0
- }"#,
- true,
- ),
- (
- r#"{
- "type": "bytes",
- "logicalType": "decimal",
- "precision": 4,
- "scale": 2
- }"#,
- true,
- ),
- (
- r#"{
- "type": "bytes",
- "logicalType": "decimal",
- "precision": 2,
- "scale": -2
- }"#,
- true,
- ),
- (
- r#"{
- "type": "bytes",
- "logicalType": "decimal",
- "precision": -2,
- "scale": 2
- }"#,
- true,
- ),
- (
- r#"{
- "type": "bytes",
- "logicalType": "decimal",
- "precision": 2,
- "scale": 3
- }"#,
- true,
- ),
- (
- r#"{
- "type": "fixed",
- "logicalType": "decimal",
- "name": "TestDecimal",
- "precision": -10,
- "scale": 2,
- "size": 5
- }"#,
- true,
- ),
- (
- r#"{
- "type": "fixed",
- "logicalType": "decimal",
- "name": "TestDecimal",
- "precision": 2,
- "scale": 3,
- "size": 2
- }"#,
- true,
- ),
- (
- r#"{
- "type": "fixed",
- "logicalType": "decimal",
- "name": "TestDecimal",
- "precision": 2,
- "scale": 2,
- "size": -2
- }"#,
- false,
- ),
-];
-
-const DECIMAL_LOGICAL_TYPE_ATTRIBUTES: &[(&str, bool)] = &[
- /*
- // TODO: (#93) support logical types and attributes and uncomment
- (
- r#"{
- "type": "fixed",
- "logicalType": "decimal",
- "name": "TestDecimal",
- "precision": 4,
- "scale": 2,
- "size": 2
- }"#,
- true
- ),
- (
- r#"{
- "type": "bytes",
- "logicalType": "decimal",
- "precision": 4
- }"#,
- true
- ),
- */
-];
-
-const DATE_LOGICAL_TYPE: &[(&str, bool)] = &[
- (r#"{"type": "int", "logicalType": "date"}"#, true),
- // this is valid even though its logical type is "date1", because unknown
logical types are
- // ignored
- (r#"{"type": "int", "logicalType": "date1"}"#, true),
- // this is still valid because unknown logicalType should be ignored
- (r#"{"type": "long", "logicalType": "date"}"#, true),
-];
-
-const TIMEMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[
- (r#"{"type": "int", "logicalType": "time-millis"}"#, true),
- // this is valid even though its logical type is "time-milis" (missing the
second "l"),
- // because unknown logical types are ignored
- (r#"{"type": "int", "logicalType": "time-milis"}"#, true),
- // this is still valid because unknown logicalType should be ignored
- (r#"{"type": "long", "logicalType": "time-millis"}"#, true),
-];
-
-const TIMEMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[
- (r#"{"type": "long", "logicalType": "time-micros"}"#, true),
- // this is valid even though its logical type is "time-micro" (missing the
last "s"), because
- // unknown logical types are ignored
- (r#"{"type": "long", "logicalType": "time-micro"}"#, true),
- // this is still valid because unknown logicalType should be ignored
- (r#"{"type": "int", "logicalType": "time-micros"}"#, true),
-];
-
-const TIMESTAMPMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[
- (
- r#"{"type": "long", "logicalType": "timestamp-millis"}"#,
- true,
- ),
- // this is valid even though its logical type is "timestamp-milis"
(missing the second "l"), because
- // unknown logical types are ignored
- (
- r#"{"type": "long", "logicalType": "timestamp-milis"}"#,
- true,
- ),
- (
- // this is still valid because unknown logicalType should be ignored
- r#"{"type": "int", "logicalType": "timestamp-millis"}"#,
- true,
- ),
-];
-
-const TIMESTAMPMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[
- (
- r#"{"type": "long", "logicalType": "timestamp-micros"}"#,
- true,
- ),
- // this is valid even though its logical type is "timestamp-micro"
(missing the last "s"), because
- // unknown logical types are ignored
- (
- r#"{"type": "long", "logicalType": "timestamp-micro"}"#,
- true,
- ),
- (
- // this is still valid because unknown logicalType should be ignored
- r#"{"type": "int", "logicalType": "timestamp-micros"}"#,
- true,
- ),
-];
-
-const LOCAL_TIMESTAMPMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[
- (
- r#"{"type": "long", "logicalType": "local-timestamp-millis"}"#,
- true,
- ),
- // this is valid even though its logical type is "local-timestamp-milis"
(missing the second "l"), because
- // unknown logical types are ignored
- (
- r#"{"type": "long", "logicalType": "local-timestamp-milis"}"#,
- true,
- ),
- (
- // this is still valid because unknown logicalType should be ignored
- r#"{"type": "int", "logicalType": "local-timestamp-millis"}"#,
- true,
- ),
-];
-
-const LOCAL_TIMESTAMPMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[
- (
- r#"{"type": "long", "logicalType": "local-timestamp-micros"}"#,
- true,
- ),
- // this is valid even though its logical type is "local-timestamp-micro"
(missing the last "s"), because
- // unknown logical types are ignored
- (
- r#"{"type": "long", "logicalType": "local-timestamp-micro"}"#,
- true,
- ),
- (
- // this is still valid because unknown logicalType should be ignored
- r#"{"type": "int", "logicalType": "local-timestamp-micros"}"#,
- true,
- ),
-];
-
-fn examples() -> &'static Vec<(&'static str, bool)> {
- static EXAMPLES_ONCE: OnceLock<Vec<(&'static str, bool)>> =
OnceLock::new();
- EXAMPLES_ONCE.get_or_init(|| {
- Vec::new()
- .iter()
- .copied()
- .chain(PRIMITIVE_EXAMPLES.iter().copied())
- .chain(FIXED_EXAMPLES.iter().copied())
- .chain(ENUM_EXAMPLES.iter().copied())
- .chain(ARRAY_EXAMPLES.iter().copied())
- .chain(MAP_EXAMPLES.iter().copied())
- .chain(UNION_EXAMPLES.iter().copied())
- .chain(RECORD_EXAMPLES.iter().copied())
- .chain(DOC_EXAMPLES.iter().copied())
- .chain(OTHER_ATTRIBUTES_EXAMPLES.iter().copied())
- .chain(DECIMAL_LOGICAL_TYPE.iter().copied())
- .chain(DECIMAL_LOGICAL_TYPE_ATTRIBUTES.iter().copied())
- .chain(DATE_LOGICAL_TYPE.iter().copied())
- .chain(TIMEMILLIS_LOGICAL_TYPE.iter().copied())
- .chain(TIMEMICROS_LOGICAL_TYPE.iter().copied())
- .chain(TIMESTAMPMILLIS_LOGICAL_TYPE.iter().copied())
- .chain(TIMESTAMPMICROS_LOGICAL_TYPE.iter().copied())
- .chain(LOCAL_TIMESTAMPMILLIS_LOGICAL_TYPE.iter().copied())
- .chain(LOCAL_TIMESTAMPMICROS_LOGICAL_TYPE.iter().copied())
- .collect()
- })
-}
-
-fn valid_examples() -> &'static Vec<(&'static str, bool)> {
- static VALID_EXAMPLES_ONCE: OnceLock<Vec<(&'static str, bool)>> =
OnceLock::new();
- VALID_EXAMPLES_ONCE.get_or_init(|| examples().iter().copied().filter(|s|
s.1).collect())
-}
+use apache_avro_test_helper::{
+ data::{examples, valid_examples, DOC_EXAMPLES},
+ init, TestResult,
+};
#[test]
fn test_correct_recursive_extraction() -> TestResult {
@@ -798,20 +169,6 @@ fn test_valid_cast_to_string_after_parse() -> TestResult {
Ok(())
}
-#[test]
-/// 1. Given a string, parse it to get Avro schema "original".
-/// 2. Serialize "original" to a string and parse that string to generate Avro
schema "round trip".
-/// 3. Ensure "original" and "round trip" schemas are equivalent.
-fn test_equivalence_after_round_trip() -> TestResult {
- init();
- for (raw_schema, _) in valid_examples().iter() {
- let original_schema = Schema::parse_str(raw_schema)?;
- let round_trip_schema =
Schema::parse_str(original_schema.canonical_form().as_str())?;
- assert_eq!(original_schema, round_trip_schema);
- }
- Ok(())
-}
-
#[test]
/// Test that a list of schemas whose definitions do not depend on each other
produces the same
/// result as parsing each element of the list individually
diff --git a/lang/rust/avro_test_helper/src/data.rs
b/lang/rust/avro_test_helper/src/data.rs
new file mode 100644
index 000000000..662df23d3
--- /dev/null
+++ b/lang/rust/avro_test_helper/src/data.rs
@@ -0,0 +1,636 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Provides a set of Avro schema examples that are used in the tests.
+
+use std::sync::OnceLock;
+
+pub const PRIMITIVE_EXAMPLES: &[(&str, bool)] = &[
+ (r#""null""#, true),
+ (r#"{"type": "null"}"#, true),
+ (r#""boolean""#, true),
+ (r#"{"type": "boolean"}"#, true),
+ (r#""string""#, true),
+ (r#"{"type": "string"}"#, true),
+ (r#""bytes""#, true),
+ (r#"{"type": "bytes"}"#, true),
+ (r#""int""#, true),
+ (r#"{"type": "int"}"#, true),
+ (r#""long""#, true),
+ (r#"{"type": "long"}"#, true),
+ (r#""float""#, true),
+ (r#"{"type": "float"}"#, true),
+ (r#""double""#, true),
+ (r#"{"type": "double"}"#, true),
+ (r#""true""#, false),
+ (r#"true"#, false),
+ (r#"{"no_type": "test"}"#, false),
+ (r#"{"type": "panther"}"#, false),
+];
+
+pub const FIXED_EXAMPLES: &[(&str, bool)] = &[
+ (r#"{"type": "fixed", "name": "Test", "size": 1}"#, true),
+ (
+ r#"{
+ "type": "fixed",
+ "name": "MyFixed",
+ "namespace": "org.apache.hadoop.avro",
+ "size": 1
+ }"#,
+ true,
+ ),
+ (r#"{"type": "fixed", "name": "MissingSize"}"#, false),
+ (r#"{"type": "fixed", "size": 314}"#, false),
+];
+
+pub const ENUM_EXAMPLES: &[(&str, bool)] = &[
+ (
+ r#"{"type": "enum", "name": "Test", "symbols": ["A", "B"]}"#,
+ true,
+ ),
+ (
+ r#"{
+ "type": "enum",
+ "name": "Status",
+ "symbols": "Normal Caution Critical"
+ }"#,
+ false,
+ ),
+ (
+ r#"{
+ "type": "enum",
+ "name": [ 0, 1, 1, 2, 3, 5, 8 ],
+ "symbols": ["Golden", "Mean"]
+ }"#,
+ false,
+ ),
+ (
+ r#"{
+ "type": "enum",
+ "symbols" : ["I", "will", "fail", "no", "name"]
+ }"#,
+ false,
+ ),
+ (
+ r#"{
+ "type": "enum",
+ "name": "Test"
+ "symbols" : ["AA", "AA"]
+ }"#,
+ false,
+ ),
+];
+
+pub const ARRAY_EXAMPLES: &[(&str, bool)] = &[
+ (r#"{"type": "array", "items": "long"}"#, true),
+ (
+ r#"{
+ "type": "array",
+ "items": {"type": "enum", "name": "Test", "symbols": ["A",
"B"]}
+ }"#,
+ true,
+ ),
+];
+
+pub const MAP_EXAMPLES: &[(&str, bool)] = &[
+ (r#"{"type": "map", "values": "long"}"#, true),
+ (
+ r#"{
+ "type": "map",
+ "values": {"type": "enum", "name": "Test", "symbols": ["A",
"B"]}
+ }"#,
+ true,
+ ),
+];
+
+pub const UNION_EXAMPLES: &[(&str, bool)] = &[
+ (r#"["string", "null", "long"]"#, true),
+ (r#"["null", "null"]"#, false),
+ (r#"["long", "long"]"#, false),
+ (
+ r#"[
+ {"type": "array", "items": "long"}
+ {"type": "array", "items": "string"}
+ ]"#,
+ false,
+ ),
+ // Unions with default values
+ (
+ r#"{"name": "foo", "type": ["string", "long"], "default": "bar"}"#,
+ true,
+ ),
+ (
+ r#"{"name": "foo", "type": ["long", "string"], "default": 1}"#,
+ true,
+ ),
+ (
+ r#"{"name": "foo", "type": ["null", "string"], "default": null}"#,
+ true,
+ ),
+ (
+ r#"{"name": "foo", "type": ["string", "long"], "default": 1}"#,
+ true,
+ ),
+ (
+ r#"{"name": "foo", "type": ["string", "null"], "default": null}"#,
+ true,
+ ),
+ (
+ r#"{"name": "foo", "type": ["null", "string"], "default": "null"}"#,
+ true,
+ ),
+ (
+ r#"{"name": "foo", "type": ["long", "string"], "default": "str"}"#,
+ true,
+ ),
+];
+
+pub const RECORD_EXAMPLES: &[(&str, bool)] = &[
+ (
+ r#"{
+ "type": "record",
+ "name": "Test",
+ "fields": [{"name": "f", "type": "long"}]
+ }"#,
+ true,
+ ),
+ (
+ r#"{
+ "type": "error",
+ "name": "Test",
+ "fields": [{"name": "f", "type": "long"}]
+ }"#,
+ false,
+ ),
+ (
+ r#"{
+ "type": "record",
+ "name": "Node",
+ "fields": [
+ {"name": "label", "type": "string"},
+ {"name": "children", "type": {"type": "array", "items":
"Node"}}
+ ]
+ }"#,
+ true,
+ ),
+ (
+ r#"{
+ "type": "record",
+ "name": "Lisp",
+ "fields": [
+ {
+ "name": "value",
+ "type": [
+ "null", "string",
+ {
+ "type": "record",
+ "name": "Cons",
+ "fields": [
+ {"name": "car", "type": "Lisp"},
+ {"name": "cdr", "type": "Lisp"}
+ ]
+ }
+ ]
+ }
+ ]
+ }"#,
+ true,
+ ),
+ (
+ r#"{
+ "type": "record",
+ "name": "HandshakeRequest",
+ "namespace": "org.apache.avro.ipc",
+ "fields": [
+ {"name": "clientHash", "type": {"type": "fixed", "name":
"MD5", "size": 16}},
+ {"name": "clientProtocol", "type": ["null", "string"]},
+ {"name": "serverHash", "type": "MD5"},
+ {"name": "meta", "type": ["null", {"type": "map", "values":
"bytes"}]}
+ ]
+ }"#,
+ true,
+ ),
+ (
+ r#"{
+ "type":"record",
+ "name":"HandshakeResponse",
+ "namespace":"org.apache.avro.ipc",
+ "fields":[
+ {
+ "name":"match",
+ "type":{
+ "type":"enum",
+ "name":"HandshakeMatch",
+ "symbols":["BOTH", "CLIENT", "NONE"]
+ }
+ },
+ {"name":"serverProtocol", "type":["null", "string"]},
+ {
+ "name":"serverHash",
+ "type":["null", {"name":"MD5", "size":16,
"type":"fixed"}]
+ },
+ {
+ "name":"meta",
+ "type":["null", {"type":"map", "values":"bytes"}]
+ }
+ ]
+ }"#,
+ true,
+ ),
+ (
+ r#"{
+ "type":"record",
+ "name":"HandshakeResponse",
+ "namespace":"org.apache.avro.ipc",
+ "fields":[
+ {
+ "name":"match",
+ "type":{
+ "type":"enum",
+ "name":"HandshakeMatch",
+ "symbols":["BOTH", "CLIENT", "NONE"]
+ }
+ },
+ {"name":"serverProtocol", "type":["null", "string"]},
+ {
+ "name":"serverHash",
+ "type":["null", { "name":"MD5", "size":16,
"type":"fixed"}]
+ },
+ {"name":"meta", "type":["null", { "type":"map",
"values":"bytes"}]}
+ ]
+ }"#,
+ true,
+ ),
+ // Unions may not contain more than one schema with the same type, except
for the named
+ // types record, fixed and enum. For example, unions containing two array
types or two map
+ // types are not permitted, but two types with different names are
permitted.
+ // (Names permit efficient resolution when reading and writing unions.)
+ (
+ r#"{
+ "type": "record",
+ "name": "ipAddr",
+ "fields": [
+ {
+ "name": "addr",
+ "type": [
+ {"name": "IPv6", "type": "fixed", "size": 16},
+ {"name": "IPv4", "type": "fixed", "size": 4}
+ ]
+ }
+ ]
+ }"#,
+ true,
+ ),
+ (
+ r#"{
+ "type": "record",
+ "name": "Address",
+ "fields": [
+ {"type": "string"},
+ {"type": "string", "name": "City"}
+ ]
+ }"#,
+ false,
+ ),
+ (
+ r#"{
+ "type": "record",
+ "name": "Event",
+ "fields": [{"name": "Sponsor"}, {"name": "City", "type":
"string"}]
+ }"#,
+ false,
+ ),
+ (
+ r#"{
+ "type": "record",
+ "fields": "His vision, from the constantly passing bars,"
+ "name",
+ "Rainer"
+ }"#,
+ false,
+ ),
+ (
+ r#"{
+ "name": ["Tom", "Jerry"],
+ "type": "record",
+ "fields": [{"name": "name", "type": "string"}]
+ }"#,
+ false,
+ ),
+];
+
+pub const DOC_EXAMPLES: &[(&str, bool)] = &[
+ (
+ r#"{
+ "type": "record",
+ "name": "TestDoc",
+ "doc": "Doc string",
+ "fields": [{"name": "name", "type": "string", "doc" : "Doc
String"}]
+ }"#,
+ true,
+ ),
+ (
+ r#"{"type": "enum", "name": "Test", "symbols": ["A", "B"], "doc": "Doc
String"}"#,
+ true,
+ ),
+ (
+ r#"{"type": "fixed", "name": "Test", "size": 1, "doc": "Fixed Doc
String"}"#,
+ true,
+ ),
+];
+
+pub const OTHER_ATTRIBUTES_EXAMPLES: &[(&str, bool)] = &[
+ (
+ r#"{
+ "type": "record",
+ "name": "TestRecord",
+ "cp_string": "string",
+ "cp_int": 1,
+ "cp_array": [ 1, 2, 3, 4],
+ "fields": [
+ {"name": "f1", "type": "string", "cp_object":
{"a":1,"b":2}},
+ {"name": "f2", "type": "long", "cp_null": null}
+ ]
+ }"#,
+ true,
+ ),
+ (
+ r#"{"type": "map", "values": "long", "cp_boolean": true}"#,
+ true,
+ ),
+ (
+ r#"{
+ "type": "enum",
+ "name": "TestEnum",
+ "symbols": [ "one", "two", "three" ],
+ "cp_float" : 1.0
+ }"#,
+ true,
+ ),
+ (r#"{"type": "long", "date": "true"}"#, true),
+];
+
+pub const DECIMAL_LOGICAL_TYPE: &[(&str, bool)] = &[
+ (
+ r#"{
+ "type": {
+ "type": "fixed",
+ "name": "TestDecimal",
+ "size": 10
+ },
+ "logicalType": "decimal",
+ "precision": 4,
+ "scale": 2
+ }"#,
+ true,
+ ),
+ (
+ r#"{
+ "type": {
+ "type": "fixed",
+ "name": "ScaleIsImplicitlyZero",
+ "size": 10
+ },
+ "logicalType": "decimal",
+ "precision": 4
+ }"#,
+ true,
+ ),
+ (
+ r#"{
+ "type": {
+ "type": "fixed",
+ "name": "PrecisionMustBeGreaterThanZero",
+ "size": 10
+ },
+ "logicalType": "decimal",
+ "precision": 0
+ }"#,
+ true,
+ ),
+ (
+ r#"{
+ "type": "fixed",
+ "logicalType": "decimal",
+ "name": "TestDecimal",
+ "precision": 10,
+ "scale": 2,
+ "size": 18
+ }"#,
+ true,
+ ),
+ (
+ r#"{
+ "type": "bytes",
+ "logicalType": "decimal",
+ "precision": 4,
+ "scale": 2
+ }"#,
+ true,
+ ),
+ (
+ r#"{
+ "type": "bytes",
+ "logicalType": "decimal",
+ "precision": 2,
+ "scale": -2
+ }"#,
+ true,
+ ),
+ (
+ r#"{
+ "type": "bytes",
+ "logicalType": "decimal",
+ "precision": -2,
+ "scale": 2
+ }"#,
+ true,
+ ),
+ (
+ r#"{
+ "type": "bytes",
+ "logicalType": "decimal",
+ "precision": 2,
+ "scale": 3
+ }"#,
+ true,
+ ),
+ (
+ r#"{
+ "type": "fixed",
+ "logicalType": "decimal",
+ "name": "TestDecimal",
+ "precision": -10,
+ "scale": 2,
+ "size": 5
+ }"#,
+ true,
+ ),
+ (
+ r#"{
+ "type": "fixed",
+ "logicalType": "decimal",
+ "name": "TestDecimal",
+ "precision": 2,
+ "scale": 3,
+ "size": 2
+ }"#,
+ true,
+ ),
+ (
+ r#"{
+ "type": "fixed",
+ "logicalType": "decimal",
+ "name": "TestDecimal",
+ "precision": 2,
+ "scale": 2,
+ "size": -2
+ }"#,
+ false,
+ ),
+];
+
+pub const DATE_LOGICAL_TYPE: &[(&str, bool)] = &[
+ (r#"{"type": "int", "logicalType": "date"}"#, true),
+ // this is valid even though its logical type is "date1", because unknown
logical types are
+ // ignored
+ (r#"{"type": "int", "logicalType": "date1"}"#, true),
+ // this is still valid because unknown logicalType should be ignored
+ (r#"{"type": "long", "logicalType": "date"}"#, true),
+];
+
+pub const TIMEMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[
+ (r#"{"type": "int", "logicalType": "time-millis"}"#, true),
+ // this is valid even though its logical type is "time-milis" (missing the
second "l"),
+ // because unknown logical types are ignored
+ (r#"{"type": "int", "logicalType": "time-milis"}"#, true),
+ // this is still valid because unknown logicalType should be ignored
+ (r#"{"type": "long", "logicalType": "time-millis"}"#, true),
+];
+
+pub const TIMEMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[
+ (r#"{"type": "long", "logicalType": "time-micros"}"#, true),
+ // this is valid even though its logical type is "time-micro" (missing the
last "s"), because
+ // unknown logical types are ignored
+ (r#"{"type": "long", "logicalType": "time-micro"}"#, true),
+ // this is still valid because unknown logicalType should be ignored
+ (r#"{"type": "int", "logicalType": "time-micros"}"#, true),
+];
+
+pub const TIMESTAMPMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[
+ (
+ r#"{"type": "long", "logicalType": "timestamp-millis"}"#,
+ true,
+ ),
+ // this is valid even though its logical type is "timestamp-milis"
(missing the second "l"), because
+ // unknown logical types are ignored
+ (
+ r#"{"type": "long", "logicalType": "timestamp-milis"}"#,
+ true,
+ ),
+ (
+ // this is still valid because unknown logicalType should be ignored
+ r#"{"type": "int", "logicalType": "timestamp-millis"}"#,
+ true,
+ ),
+];
+
+pub const TIMESTAMPMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[
+ (
+ r#"{"type": "long", "logicalType": "timestamp-micros"}"#,
+ true,
+ ),
+ // this is valid even though its logical type is "timestamp-micro"
(missing the last "s"), because
+ // unknown logical types are ignored
+ (
+ r#"{"type": "long", "logicalType": "timestamp-micro"}"#,
+ true,
+ ),
+ (
+ // this is still valid because unknown logicalType should be ignored
+ r#"{"type": "int", "logicalType": "timestamp-micros"}"#,
+ true,
+ ),
+];
+
+pub const LOCAL_TIMESTAMPMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[
+ (
+ r#"{"type": "long", "logicalType": "local-timestamp-millis"}"#,
+ true,
+ ),
+ // this is valid even though its logical type is "local-timestamp-milis"
(missing the second "l"), because
+ // unknown logical types are ignored
+ (
+ r#"{"type": "long", "logicalType": "local-timestamp-milis"}"#,
+ true,
+ ),
+ (
+ // this is still valid because unknown logicalType should be ignored
+ r#"{"type": "int", "logicalType": "local-timestamp-millis"}"#,
+ true,
+ ),
+];
+
+pub const LOCAL_TIMESTAMPMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[
+ (
+ r#"{"type": "long", "logicalType": "local-timestamp-micros"}"#,
+ true,
+ ),
+ // this is valid even though its logical type is "local-timestamp-micro"
(missing the last "s"), because
+ // unknown logical types are ignored
+ (
+ r#"{"type": "long", "logicalType": "local-timestamp-micro"}"#,
+ true,
+ ),
+ (
+ // this is still valid because unknown logicalType should be ignored
+ r#"{"type": "int", "logicalType": "local-timestamp-micros"}"#,
+ true,
+ ),
+];
+
+pub fn examples() -> &'static Vec<(&'static str, bool)> {
+ static EXAMPLES_ONCE: OnceLock<Vec<(&'static str, bool)>> =
OnceLock::new();
+ EXAMPLES_ONCE.get_or_init(|| {
+ Vec::new()
+ .iter()
+ .copied()
+ .chain(PRIMITIVE_EXAMPLES.iter().copied())
+ .chain(FIXED_EXAMPLES.iter().copied())
+ .chain(ENUM_EXAMPLES.iter().copied())
+ .chain(ARRAY_EXAMPLES.iter().copied())
+ .chain(MAP_EXAMPLES.iter().copied())
+ .chain(UNION_EXAMPLES.iter().copied())
+ .chain(RECORD_EXAMPLES.iter().copied())
+ .chain(DOC_EXAMPLES.iter().copied())
+ .chain(OTHER_ATTRIBUTES_EXAMPLES.iter().copied())
+ .chain(DECIMAL_LOGICAL_TYPE.iter().copied())
+ .chain(DATE_LOGICAL_TYPE.iter().copied())
+ .chain(TIMEMILLIS_LOGICAL_TYPE.iter().copied())
+ .chain(TIMEMICROS_LOGICAL_TYPE.iter().copied())
+ .chain(TIMESTAMPMILLIS_LOGICAL_TYPE.iter().copied())
+ .chain(TIMESTAMPMICROS_LOGICAL_TYPE.iter().copied())
+ .chain(LOCAL_TIMESTAMPMILLIS_LOGICAL_TYPE.iter().copied())
+ .chain(LOCAL_TIMESTAMPMICROS_LOGICAL_TYPE.iter().copied())
+ .collect()
+ })
+}
+
+pub fn valid_examples() -> &'static Vec<(&'static str, bool)> {
+ static VALID_EXAMPLES_ONCE: OnceLock<Vec<(&'static str, bool)>> =
OnceLock::new();
+ VALID_EXAMPLES_ONCE.get_or_init(|| examples().iter().copied().filter(|s|
s.1).collect())
+}
diff --git a/lang/rust/avro_test_helper/src/lib.rs
b/lang/rust/avro_test_helper/src/lib.rs
index e316dc818..f9fd05030 100644
--- a/lang/rust/avro_test_helper/src/lib.rs
+++ b/lang/rust/avro_test_helper/src/lib.rs
@@ -26,6 +26,7 @@ thread_local! {
pub(crate) static LOG_MESSAGES: RefCell<Vec<String>> = const {
RefCell::new(Vec::new()) };
}
+pub mod data;
pub mod logger;
#[cfg(not(target_arch = "wasm32"))]