This is an automated email from the ASF dual-hosted git repository.

mgrigorov pushed a commit to branch avro-3900-custom-name-validators
in repository https://gitbox.apache.org/repos/asf/avro.git

commit 5b9df53e941369513a2507a47e5faf6f7f8f6574
Author: Martin Tzvetanov Grigorov <[email protected]>
AuthorDate: Wed Dec 20 16:13:06 2023 +0200

    AVRO-3900: [Rust] Make it possible to use custom validators
    
    WIP
    
    TODO:
    - Extract validators for enum symbols and record field names
    - Polish the APIs
    
    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
---
 lang/rust/avro/src/lib.rs       |   1 +
 lang/rust/avro/src/schema.rs    |  61 ++-----------
 lang/rust/avro/src/validator.rs | 186 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 196 insertions(+), 52 deletions(-)

diff --git a/lang/rust/avro/src/lib.rs b/lang/rust/avro/src/lib.rs
index 2d9d79a76..64d7d8240 100644
--- a/lang/rust/avro/src/lib.rs
+++ b/lang/rust/avro/src/lib.rs
@@ -782,6 +782,7 @@ pub mod rabin;
 pub mod schema;
 pub mod schema_compatibility;
 pub mod types;
+pub mod validator;
 
 pub use codec::Codec;
 pub use de::from_value;
diff --git a/lang/rust/avro/src/schema.rs b/lang/rust/avro/src/schema.rs
index 258ea87b0..d56657700 100644
--- a/lang/rust/avro/src/schema.rs
+++ b/lang/rust/avro/src/schema.rs
@@ -16,7 +16,13 @@
 // under the License.
 
 //! Logic for parsing and interacting with schemas in Avro format.
-use crate::{error::Error, types, util::MapHelper, AvroResult};
+use crate::{
+    error::Error,
+    types,
+    util::MapHelper,
+    validator::{validate_name, validate_namespace},
+    AvroResult,
+};
 use digest::Digest;
 use regex_lite::Regex;
 use serde::{
@@ -42,29 +48,11 @@ fn enum_symbol_name_r() -> &'static Regex {
     ENUM_SYMBOL_NAME_ONCE.get_or_init(|| 
Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap())
 }
 
-// An optional namespace (with optional dots) followed by a name without any 
dots in it.
-fn schema_name_r() -> &'static Regex {
-    static SCHEMA_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
-    SCHEMA_NAME_ONCE.get_or_init(|| {
-        Regex::new(
-            
r"^((?P<namespace>([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?)\.)?(?P<name>[A-Za-z_][A-Za-z0-9_]*)$",
-        )
-        .unwrap()
-    })
-}
-
 fn field_name_r() -> &'static Regex {
     static FIELD_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
     FIELD_NAME_ONCE.get_or_init(|| 
Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap())
 }
 
-fn namespace_r() -> &'static Regex {
-    static NAMESPACE_ONCE: OnceLock<Regex> = OnceLock::new();
-    NAMESPACE_ONCE.get_or_init(|| {
-        
Regex::new(r"^([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?$").unwrap()
-    })
-}
-
 /// Represents an Avro schema fingerprint
 /// More information about Avro schema fingerprints can be found in the
 /// [Avro Schema Fingerprint 
documentation](https://avro.apache.org/docs/current/spec.html#schema_fingerprints)
@@ -268,13 +256,7 @@ impl Name {
     }
 
     fn get_name_and_namespace(name: &str) -> AvroResult<(String, Namespace)> {
-        let caps = schema_name_r()
-            .captures(name)
-            .ok_or_else(|| Error::InvalidSchemaName(name.to_string(), 
schema_name_r().as_str()))?;
-        Ok((
-            caps["name"].to_string(),
-            caps.name("namespace").map(|s| s.as_str().to_string()),
-        ))
+        validate_name(name)
     }
 
     /// Parse a `serde_json::Value` into a `Name`.
@@ -301,12 +283,7 @@ impl Name {
             .filter(|ns| !ns.is_empty());
 
         if let Some(ref ns) = namespace {
-            if !namespace_r().is_match(ns) {
-                return Err(Error::InvalidNamespace(
-                    ns.to_string(),
-                    namespace_r().as_str(),
-                ));
-            }
+            validate_namespace(ns)?;
         }
 
         Ok(Self {
@@ -6203,26 +6180,6 @@ mod tests {
         Ok(())
     }
 
-    #[test]
-    fn test_avro_3897_disallow_invalid_namespaces_in_fully_qualified_name() -> 
TestResult {
-        let full_name = "ns.0.record1";
-        let name = Name::new(full_name);
-        assert!(name.is_err());
-        let expected =
-            Error::InvalidSchemaName(full_name.to_string(), 
schema_name_r().as_str()).to_string();
-        let err = name.map_err(|e| e.to_string()).err().unwrap();
-        assert_eq!(expected, err);
-
-        let full_name = "ns..record1";
-        let name = Name::new(full_name);
-        assert!(name.is_err());
-        let expected =
-            Error::InvalidSchemaName(full_name.to_string(), 
schema_name_r().as_str()).to_string();
-        let err = name.map_err(|e| e.to_string()).err().unwrap();
-        assert_eq!(expected, err);
-        Ok(())
-    }
-
     /// A test cases showing that names and namespaces can be constructed
     /// entirely by underscores.
     #[test]
diff --git a/lang/rust/avro/src/validator.rs b/lang/rust/avro/src/validator.rs
new file mode 100644
index 000000000..0bc604cbb
--- /dev/null
+++ b/lang/rust/avro/src/validator.rs
@@ -0,0 +1,186 @@
+use crate::{schema::Namespace, AvroResult, Error};
+use regex_lite::Regex;
+use std::{fmt::Debug, sync::OnceLock};
+
+pub trait NameValidator {
+    fn regex(&self) -> &'static Regex;
+
+    fn validate(&self, name: &str) -> AvroResult<(String, Namespace)>;
+}
+
+struct DefaultNameValidator;
+
+impl NameValidator for DefaultNameValidator {
+    fn regex(&self) -> &'static Regex {
+        static SCHEMA_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
+        SCHEMA_NAME_ONCE.get_or_init(|| {
+            Regex::new(
+                // An optional namespace (with optional dots) followed by a 
name without any dots in it.
+                
r"^((?P<namespace>([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?)\.)?(?P<name>[A-Za-z_][A-Za-z0-9_]*)$",
+            )
+            .unwrap()
+        })
+    }
+
+    fn validate(&self, schema_name: &str) -> AvroResult<(String, Namespace)> {
+        let regex = self.regex();
+        let caps = regex
+            .captures(schema_name)
+            .ok_or_else(|| Error::InvalidSchemaName(schema_name.to_string(), 
regex.as_str()))?;
+        Ok((
+            caps["name"].to_string(),
+            caps.name("namespace").map(|s| s.as_str().to_string()),
+        ))
+    }
+}
+
+static NAME_VALIDATOR_ONCE: OnceLock<Box<dyn NameValidator + Send + Sync>> = 
OnceLock::new();
+
+#[allow(dead_code)]
+pub fn set_name_validator(
+    validator: Box<dyn NameValidator + Send + Sync>,
+) -> Result<(), Box<dyn NameValidator + Send + Sync>> {
+    NAME_VALIDATOR_ONCE.set(validator)
+}
+
+pub(crate) fn validate_name(schema_name: &str) -> AvroResult<(String, 
Namespace)> {
+    NAME_VALIDATOR_ONCE
+        .get_or_init(|| {
+            debug!("Going to use the default name validator.");
+            Box::new(DefaultNameValidator)
+        })
+        .validate(schema_name)
+}
+
+pub trait NamespaceValidator: Sync + Debug {
+    fn validate(&self, name: &str) -> AvroResult<()>;
+}
+
+#[derive(Debug)]
+pub(crate) struct DefaultNamespaceValidator;
+
+impl NamespaceValidator for DefaultNamespaceValidator {
+    fn validate(&self, ns: &str) -> AvroResult<()> {
+        static NAMESPACE_ONCE: OnceLock<Regex> = OnceLock::new();
+        let regex = NAMESPACE_ONCE.get_or_init(|| {
+            
Regex::new(r"^([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?$").unwrap()
+        });
+
+        if !regex.is_match(ns) {
+            return Err(Error::InvalidNamespace(ns.to_string(), 
regex.as_str()));
+        } else {
+            Ok(())
+        }
+    }
+}
+
+static NAMESPACE_VALIDATOR_ONCE: OnceLock<Box<dyn NamespaceValidator + Send + 
Sync>> =
+    OnceLock::new();
+
+#[allow(dead_code)]
+pub fn set_namespace_validator(
+    validator: Box<dyn NamespaceValidator + Send + Sync>,
+) -> Result<(), Box<dyn NamespaceValidator + Send + Sync>> {
+    NAMESPACE_VALIDATOR_ONCE.set(validator)
+}
+
+pub(crate) fn validate_namespace(ns: &str) -> AvroResult<()> {
+    NAMESPACE_VALIDATOR_ONCE
+        .get_or_init(|| {
+            debug!("Going to use the default namespace validator.");
+            Box::new(DefaultNamespaceValidator)
+        })
+        .validate(ns)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::schema::Name;
+    use apache_avro_test_helper::TestResult;
+
+    #[test]
+    fn avro_3900_default_name_validator_with_valid_ns() -> TestResult {
+        validate_name("example")?;
+
+        Ok(())
+    }
+
+    #[test]
+    fn avro_3900_default_name_validator_with_invalid_ns() -> TestResult {
+        assert!(validate_name("com-example").is_err());
+
+        Ok(())
+    }
+
+    #[test]
+    fn avro_3900_custom_name_validator_with_spec_invalid_ns() -> TestResult {
+        #[derive(Debug)]
+        struct CustomNameValidator;
+        impl NameValidator for CustomNameValidator {
+            fn regex(&self) -> &'static Regex {
+                unimplemented!()
+            }
+
+            fn validate(&self, schema_name: &str) -> AvroResult<(String, 
Namespace)> {
+                Ok((schema_name.to_string(), None))
+            }
+        }
+
+        assert!(set_name_validator(Box::new(CustomNameValidator)).is_ok());
+        validate_name("com-example")?;
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_avro_3897_disallow_invalid_namespaces_in_fully_qualified_name() -> 
TestResult {
+        let full_name = "ns.0.record1";
+        let name = Name::new(full_name);
+        assert!(name.is_err());
+        let validator = DefaultNameValidator;
+        let expected =
+            Error::InvalidSchemaName(full_name.to_string(), 
validator.regex().as_str()).to_string();
+        let err = name.map_err(|e| e.to_string()).err().unwrap();
+        pretty_assertions::assert_eq!(expected, err);
+
+        let full_name = "ns..record1";
+        let name = Name::new(full_name);
+        assert!(name.is_err());
+        let expected =
+            Error::InvalidSchemaName(full_name.to_string(), 
validator.regex().as_str()).to_string();
+        let err = name.map_err(|e| e.to_string()).err().unwrap();
+        pretty_assertions::assert_eq!(expected, err);
+        Ok(())
+    }
+
+    #[test]
+    fn avro_3900_default_namespace_validator_with_valid_ns() -> TestResult {
+        validate_namespace("com.example")?;
+
+        Ok(())
+    }
+
+    #[test]
+    fn avro_3900_default_namespace_validator_with_invalid_ns() -> TestResult {
+        assert!(validate_namespace("com-example").is_err());
+
+        Ok(())
+    }
+
+    #[test]
+    fn avro_3900_custom_namespace_validator_with_spec_invalid_ns() -> 
TestResult {
+        #[derive(Debug)]
+        struct CustomNamespaceValidator;
+        impl NamespaceValidator for CustomNamespaceValidator {
+            fn validate(&self, _ns: &str) -> AvroResult<()> {
+                Ok(())
+            }
+        }
+
+        
assert!(set_namespace_validator(Box::new(CustomNamespaceValidator)).is_ok());
+        validate_namespace("com-example")?;
+
+        Ok(())
+    }
+}

Reply via email to