This is an automated email from the ASF dual-hosted git repository. mgrigorov pushed a commit to branch avro-3900-custom-name-validators in repository https://gitbox.apache.org/repos/asf/avro.git
commit 5b9df53e941369513a2507a47e5faf6f7f8f6574 Author: Martin Tzvetanov Grigorov <[email protected]> AuthorDate: Wed Dec 20 16:13:06 2023 +0200 AVRO-3900: [Rust] Make it possible to use custom validators WIP TODO: - Extract validators for enum symbols and record field names - Polish the APIs Signed-off-by: Martin Tzvetanov Grigorov <[email protected]> --- lang/rust/avro/src/lib.rs | 1 + lang/rust/avro/src/schema.rs | 61 ++----------- lang/rust/avro/src/validator.rs | 186 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 196 insertions(+), 52 deletions(-) diff --git a/lang/rust/avro/src/lib.rs b/lang/rust/avro/src/lib.rs index 2d9d79a76..64d7d8240 100644 --- a/lang/rust/avro/src/lib.rs +++ b/lang/rust/avro/src/lib.rs @@ -782,6 +782,7 @@ pub mod rabin; pub mod schema; pub mod schema_compatibility; pub mod types; +pub mod validator; pub use codec::Codec; pub use de::from_value; diff --git a/lang/rust/avro/src/schema.rs b/lang/rust/avro/src/schema.rs index 258ea87b0..d56657700 100644 --- a/lang/rust/avro/src/schema.rs +++ b/lang/rust/avro/src/schema.rs @@ -16,7 +16,13 @@ // under the License. //! Logic for parsing and interacting with schemas in Avro format. -use crate::{error::Error, types, util::MapHelper, AvroResult}; +use crate::{ + error::Error, + types, + util::MapHelper, + validator::{validate_name, validate_namespace}, + AvroResult, +}; use digest::Digest; use regex_lite::Regex; use serde::{ @@ -42,29 +48,11 @@ fn enum_symbol_name_r() -> &'static Regex { ENUM_SYMBOL_NAME_ONCE.get_or_init(|| Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap()) } -// An optional namespace (with optional dots) followed by a name without any dots in it. -fn schema_name_r() -> &'static Regex { - static SCHEMA_NAME_ONCE: OnceLock<Regex> = OnceLock::new(); - SCHEMA_NAME_ONCE.get_or_init(|| { - Regex::new( - r"^((?P<namespace>([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?)\.)?(?P<name>[A-Za-z_][A-Za-z0-9_]*)$", - ) - .unwrap() - }) -} - fn field_name_r() -> &'static Regex { static FIELD_NAME_ONCE: OnceLock<Regex> = OnceLock::new(); FIELD_NAME_ONCE.get_or_init(|| Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap()) } -fn namespace_r() -> &'static Regex { - static NAMESPACE_ONCE: OnceLock<Regex> = OnceLock::new(); - NAMESPACE_ONCE.get_or_init(|| { - Regex::new(r"^([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?$").unwrap() - }) -} - /// Represents an Avro schema fingerprint /// More information about Avro schema fingerprints can be found in the /// [Avro Schema Fingerprint documentation](https://avro.apache.org/docs/current/spec.html#schema_fingerprints) @@ -268,13 +256,7 @@ impl Name { } fn get_name_and_namespace(name: &str) -> AvroResult<(String, Namespace)> { - let caps = schema_name_r() - .captures(name) - .ok_or_else(|| Error::InvalidSchemaName(name.to_string(), schema_name_r().as_str()))?; - Ok(( - caps["name"].to_string(), - caps.name("namespace").map(|s| s.as_str().to_string()), - )) + validate_name(name) } /// Parse a `serde_json::Value` into a `Name`. @@ -301,12 +283,7 @@ impl Name { .filter(|ns| !ns.is_empty()); if let Some(ref ns) = namespace { - if !namespace_r().is_match(ns) { - return Err(Error::InvalidNamespace( - ns.to_string(), - namespace_r().as_str(), - )); - } + validate_namespace(ns)?; } Ok(Self { @@ -6203,26 +6180,6 @@ mod tests { Ok(()) } - #[test] - fn test_avro_3897_disallow_invalid_namespaces_in_fully_qualified_name() -> TestResult { - let full_name = "ns.0.record1"; - let name = Name::new(full_name); - assert!(name.is_err()); - let expected = - Error::InvalidSchemaName(full_name.to_string(), schema_name_r().as_str()).to_string(); - let err = name.map_err(|e| e.to_string()).err().unwrap(); - assert_eq!(expected, err); - - let full_name = "ns..record1"; - let name = Name::new(full_name); - assert!(name.is_err()); - let expected = - Error::InvalidSchemaName(full_name.to_string(), schema_name_r().as_str()).to_string(); - let err = name.map_err(|e| e.to_string()).err().unwrap(); - assert_eq!(expected, err); - Ok(()) - } - /// A test cases showing that names and namespaces can be constructed /// entirely by underscores. #[test] diff --git a/lang/rust/avro/src/validator.rs b/lang/rust/avro/src/validator.rs new file mode 100644 index 000000000..0bc604cbb --- /dev/null +++ b/lang/rust/avro/src/validator.rs @@ -0,0 +1,186 @@ +use crate::{schema::Namespace, AvroResult, Error}; +use regex_lite::Regex; +use std::{fmt::Debug, sync::OnceLock}; + +pub trait NameValidator { + fn regex(&self) -> &'static Regex; + + fn validate(&self, name: &str) -> AvroResult<(String, Namespace)>; +} + +struct DefaultNameValidator; + +impl NameValidator for DefaultNameValidator { + fn regex(&self) -> &'static Regex { + static SCHEMA_NAME_ONCE: OnceLock<Regex> = OnceLock::new(); + SCHEMA_NAME_ONCE.get_or_init(|| { + Regex::new( + // An optional namespace (with optional dots) followed by a name without any dots in it. + r"^((?P<namespace>([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?)\.)?(?P<name>[A-Za-z_][A-Za-z0-9_]*)$", + ) + .unwrap() + }) + } + + fn validate(&self, schema_name: &str) -> AvroResult<(String, Namespace)> { + let regex = self.regex(); + let caps = regex + .captures(schema_name) + .ok_or_else(|| Error::InvalidSchemaName(schema_name.to_string(), regex.as_str()))?; + Ok(( + caps["name"].to_string(), + caps.name("namespace").map(|s| s.as_str().to_string()), + )) + } +} + +static NAME_VALIDATOR_ONCE: OnceLock<Box<dyn NameValidator + Send + Sync>> = OnceLock::new(); + +#[allow(dead_code)] +pub fn set_name_validator( + validator: Box<dyn NameValidator + Send + Sync>, +) -> Result<(), Box<dyn NameValidator + Send + Sync>> { + NAME_VALIDATOR_ONCE.set(validator) +} + +pub(crate) fn validate_name(schema_name: &str) -> AvroResult<(String, Namespace)> { + NAME_VALIDATOR_ONCE + .get_or_init(|| { + debug!("Going to use the default name validator."); + Box::new(DefaultNameValidator) + }) + .validate(schema_name) +} + +pub trait NamespaceValidator: Sync + Debug { + fn validate(&self, name: &str) -> AvroResult<()>; +} + +#[derive(Debug)] +pub(crate) struct DefaultNamespaceValidator; + +impl NamespaceValidator for DefaultNamespaceValidator { + fn validate(&self, ns: &str) -> AvroResult<()> { + static NAMESPACE_ONCE: OnceLock<Regex> = OnceLock::new(); + let regex = NAMESPACE_ONCE.get_or_init(|| { + Regex::new(r"^([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?$").unwrap() + }); + + if !regex.is_match(ns) { + return Err(Error::InvalidNamespace(ns.to_string(), regex.as_str())); + } else { + Ok(()) + } + } +} + +static NAMESPACE_VALIDATOR_ONCE: OnceLock<Box<dyn NamespaceValidator + Send + Sync>> = + OnceLock::new(); + +#[allow(dead_code)] +pub fn set_namespace_validator( + validator: Box<dyn NamespaceValidator + Send + Sync>, +) -> Result<(), Box<dyn NamespaceValidator + Send + Sync>> { + NAMESPACE_VALIDATOR_ONCE.set(validator) +} + +pub(crate) fn validate_namespace(ns: &str) -> AvroResult<()> { + NAMESPACE_VALIDATOR_ONCE + .get_or_init(|| { + debug!("Going to use the default namespace validator."); + Box::new(DefaultNamespaceValidator) + }) + .validate(ns) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::schema::Name; + use apache_avro_test_helper::TestResult; + + #[test] + fn avro_3900_default_name_validator_with_valid_ns() -> TestResult { + validate_name("example")?; + + Ok(()) + } + + #[test] + fn avro_3900_default_name_validator_with_invalid_ns() -> TestResult { + assert!(validate_name("com-example").is_err()); + + Ok(()) + } + + #[test] + fn avro_3900_custom_name_validator_with_spec_invalid_ns() -> TestResult { + #[derive(Debug)] + struct CustomNameValidator; + impl NameValidator for CustomNameValidator { + fn regex(&self) -> &'static Regex { + unimplemented!() + } + + fn validate(&self, schema_name: &str) -> AvroResult<(String, Namespace)> { + Ok((schema_name.to_string(), None)) + } + } + + assert!(set_name_validator(Box::new(CustomNameValidator)).is_ok()); + validate_name("com-example")?; + + Ok(()) + } + + #[test] + fn test_avro_3897_disallow_invalid_namespaces_in_fully_qualified_name() -> TestResult { + let full_name = "ns.0.record1"; + let name = Name::new(full_name); + assert!(name.is_err()); + let validator = DefaultNameValidator; + let expected = + Error::InvalidSchemaName(full_name.to_string(), validator.regex().as_str()).to_string(); + let err = name.map_err(|e| e.to_string()).err().unwrap(); + pretty_assertions::assert_eq!(expected, err); + + let full_name = "ns..record1"; + let name = Name::new(full_name); + assert!(name.is_err()); + let expected = + Error::InvalidSchemaName(full_name.to_string(), validator.regex().as_str()).to_string(); + let err = name.map_err(|e| e.to_string()).err().unwrap(); + pretty_assertions::assert_eq!(expected, err); + Ok(()) + } + + #[test] + fn avro_3900_default_namespace_validator_with_valid_ns() -> TestResult { + validate_namespace("com.example")?; + + Ok(()) + } + + #[test] + fn avro_3900_default_namespace_validator_with_invalid_ns() -> TestResult { + assert!(validate_namespace("com-example").is_err()); + + Ok(()) + } + + #[test] + fn avro_3900_custom_namespace_validator_with_spec_invalid_ns() -> TestResult { + #[derive(Debug)] + struct CustomNamespaceValidator; + impl NamespaceValidator for CustomNamespaceValidator { + fn validate(&self, _ns: &str) -> AvroResult<()> { + Ok(()) + } + } + + assert!(set_namespace_validator(Box::new(CustomNamespaceValidator)).is_ok()); + validate_namespace("com-example")?; + + Ok(()) + } +}
