This is an automated email from the ASF dual-hosted git repository.

mgrigorov pushed a commit to branch branch-1.11
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/branch-1.11 by this push:
     new dcb6f1319 AVRO-3900: [Rust] Make it possible to use custom names 
validators (#2643)
dcb6f1319 is described below

commit dcb6f131993f1530cdc0a7bdec7bf1c7807c37fb
Author: Martin Grigorov <[email protected]>
AuthorDate: Thu Jan 25 09:21:15 2024 +0200

    AVRO-3900: [Rust] Make it possible to use custom names validators (#2643)
    
    * AVRO-3900: [Rust] Make it possible to use custom validators
    
    * AVRO-3900: Add validator trait for enum symbol names
    
    * AVRO-3900: Introduce validator for RecordField's name
    
    * AVRO-3900: Extract the custom validators tests as IT tests
    
    This way they use the public APIs of the crate, as a real user application.
    
    * AVRO-3900: Polish APIs
    
    * AVRO-3900: Split the validator integration tests
    
    Otherwise there are timing issues with the initializations of the
    OnceLock's for the different validators.
    Parsing a Schema leads to initialization of the default validator and
    later it is impossible to set a custom one
    
    * AVRO-3900: Simplify generate_interop_data.rs example to not use 
avro_test_helper crate
    
    For some reason clearing the log messages in the tests destructor
    (#[dtor]) was failing for this example.
    
    Simplify the TestLogger to use the std thread_local!() instead of the
    third party ref_thread_local!()
    
    * AVRO-3900: Merge the validators integration tests in one test
    
    Just make sure the setup of the custom validators is done before the
    parsing of any schema to prevent registering the default validator
    (SpecificationValidator)
    
    * AVRO-3900: Improve the documentation
    
    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
    (cherry picked from commit 61df187141081c47d7cbdb304644ebf426695bf4)
---
 lang/rust/Cargo.lock                             |   7 -
 lang/rust/avro/README.md                         |  34 +++
 lang/rust/avro/examples/generate_interop_data.rs |   6 +-
 lang/rust/avro/examples/test_interop_data.rs     |   4 +-
 lang/rust/avro/src/lib.rs                        |  36 +++
 lang/rust/avro/src/schema.rs                     |  84 +-----
 lang/rust/avro/src/validator.rs                  | 318 +++++++++++++++++++++++
 lang/rust/avro/tests/validators.rs               |  85 ++++++
 lang/rust/avro_test_helper/Cargo.toml            |   1 -
 lang/rust/avro_test_helper/src/lib.rs            |   7 +-
 lang/rust/avro_test_helper/src/logger.rs         |  15 +-
 11 files changed, 504 insertions(+), 93 deletions(-)

diff --git a/lang/rust/Cargo.lock b/lang/rust/Cargo.lock
index b2d6ab812..5c0a5cfaf 100644
--- a/lang/rust/Cargo.lock
+++ b/lang/rust/Cargo.lock
@@ -120,7 +120,6 @@ dependencies = [
  "ctor",
  "env_logger",
  "log",
- "ref_thread_local",
 ]
 
 [[package]]
@@ -999,12 +998,6 @@ dependencies = [
  "bitflags 1.3.2",
 ]
 
-[[package]]
-name = "ref_thread_local"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "a0d51660a68078997855ba5602f73ab3a5031bd7ad480a9d4c90fbbf04e1fff0"
-
 [[package]]
 name = "regex"
 version = "1.10.2"
diff --git a/lang/rust/avro/README.md b/lang/rust/avro/README.md
index 4dc43aa97..256a7c3f2 100644
--- a/lang/rust/avro/README.md
+++ b/lang/rust/avro/README.md
@@ -651,6 +651,40 @@ let writers_schema = Schema::parse_str(r#"{"type": 
"array", "items":"long"}"#).u
 let readers_schema = Schema::parse_str(r#"{"type": "array", 
"items":"int"}"#).unwrap();
 assert_eq!(false, SchemaCompatibility::can_read(&writers_schema, 
&readers_schema));
 ```
+### Custom names validators
+
+By default the library follows the rules by the
+[Avro specification](https://avro.apache.org/docs/1.11.1/specification/#names)!
+
+Some of the other Apache Avro language SDKs are not that strict and allow more
+characters in names. For interoperability with those SDKs, the library provides
+a way to customize the names validation.
+
+```rust
+use apache_avro::AvroResult;
+use apache_avro::schema::Namespace;
+use apache_avro::validator::{SchemaNameValidator, set_schema_name_validator};
+
+struct MyCustomValidator;
+
+impl SchemaNameValidator for MyCustomValidator {
+    fn validate(&self, name: &str) -> AvroResult<(String, Namespace)> {
+        todo!()
+    }
+}
+
+// don't parse any schema before registering the custom validator(s) !
+
+set_schema_name_validator(Box::new(MyCustomValidator));
+
+// ... use the library
+```
+
+Similar logic could be applied to the schema namespace, enum symbols and field 
names validation.
+
+**Note**: the library allows to set a validator only once per the application 
lifetime!
+If the application parses schemas before setting a validator, the default 
validator will be
+registered and used!
 
 <!-- cargo-rdme end -->
 
diff --git a/lang/rust/avro/examples/generate_interop_data.rs 
b/lang/rust/avro/examples/generate_interop_data.rs
index 35a6dc7c0..29d50a144 100644
--- a/lang/rust/avro/examples/generate_interop_data.rs
+++ b/lang/rust/avro/examples/generate_interop_data.rs
@@ -20,9 +20,9 @@ use apache_avro::{
     types::{Record, Value},
     Codec, Writer,
 };
-use apache_avro_test_helper::TestResult;
 use std::{
     collections::HashMap,
+    error::Error,
     io::{BufWriter, Write},
 };
 use strum::IntoEnumIterator;
@@ -75,7 +75,7 @@ fn create_datum(schema: &Schema) -> Record {
     datum
 }
 
-fn main() -> TestResult {
+fn main() -> Result<(), Box<dyn Error>> {
     let schema_str = 
std::fs::read_to_string("../../share/test/schemas/interop.avsc")
         .expect("Unable to read the interop Avro schema");
     let schema = Schema::parse_str(schema_str.as_str())?;
@@ -105,7 +105,7 @@ fn main() -> TestResult {
     Ok(())
 }
 
-fn write_user_metadata<W: Write>(writer: &mut Writer<BufWriter<W>>) -> 
TestResult {
+fn write_user_metadata<W: Write>(writer: &mut Writer<BufWriter<W>>) -> 
Result<(), Box<dyn Error>> {
     writer.add_user_metadata("user_metadata".to_string(), b"someByteArray")?;
 
     Ok(())
diff --git a/lang/rust/avro/examples/test_interop_data.rs 
b/lang/rust/avro/examples/test_interop_data.rs
index 736b1fd7d..39c97d064 100644
--- a/lang/rust/avro/examples/test_interop_data.rs
+++ b/lang/rust/avro/examples/test_interop_data.rs
@@ -16,14 +16,14 @@
 // under the License.
 
 use apache_avro::Reader;
-use apache_avro_test_helper::TestResult;
 use std::{
     collections::HashMap,
+    error::Error,
     ffi::OsStr,
     io::{BufReader, Read},
 };
 
-fn main() -> TestResult {
+fn main() -> Result<(), Box<dyn Error>> {
     let mut expected_user_metadata: HashMap<String, Vec<u8>> = HashMap::new();
     expected_user_metadata.insert("user_metadata".to_string(), 
b"someByteArray".to_vec());
 
diff --git a/lang/rust/avro/src/lib.rs b/lang/rust/avro/src/lib.rs
index dbce3ed0b..c6d1dc1e3 100644
--- a/lang/rust/avro/src/lib.rs
+++ b/lang/rust/avro/src/lib.rs
@@ -764,6 +764,41 @@
 //! let readers_schema = Schema::parse_str(r#"{"type": "array", 
"items":"int"}"#).unwrap();
 //! assert_eq!(false, SchemaCompatibility::can_read(&writers_schema, 
&readers_schema));
 //! ```
+//! ## Custom names validators
+//!
+//! By default the library follows the rules by the
+//! [Avro 
specification](https://avro.apache.org/docs/1.11.1/specification/#names)!
+//!
+//! Some of the other Apache Avro language SDKs are not that strict and allow 
more
+//! characters in names. For interoperability with those SDKs, the library 
provides
+//! a way to customize the names validation.
+//!
+//! ```rust
+//! use apache_avro::AvroResult;
+//! use apache_avro::schema::Namespace;
+//! use apache_avro::validator::{SchemaNameValidator, 
set_schema_name_validator};
+//!
+//! struct MyCustomValidator;
+//!
+//! impl SchemaNameValidator for MyCustomValidator {
+//!     fn validate(&self, name: &str) -> AvroResult<(String, Namespace)> {
+//!         todo!()
+//!     }
+//! }
+//!
+//! // don't parse any schema before registering the custom validator(s) !
+//!
+//! set_schema_name_validator(Box::new(MyCustomValidator));
+//!
+//! // ... use the library
+//! ```
+//!
+//! Similar logic could be applied to the schema namespace, enum symbols and 
field names validation.
+//!
+//! **Note**: the library allows to set a validator only once per the 
application lifetime!
+//! If the application parses schemas before setting a validator, the default 
validator will be
+//! registered and used!
+//!
 
 mod bigdecimal;
 mod codec;
@@ -782,6 +817,7 @@ pub mod rabin;
 pub mod schema;
 pub mod schema_compatibility;
 pub mod types;
+pub mod validator;
 
 pub use codec::Codec;
 pub use de::from_value;
diff --git a/lang/rust/avro/src/schema.rs b/lang/rust/avro/src/schema.rs
index f792ae6f3..8f16c8170 100644
--- a/lang/rust/avro/src/schema.rs
+++ b/lang/rust/avro/src/schema.rs
@@ -16,9 +16,17 @@
 // under the License.
 
 //! Logic for parsing and interacting with schemas in Avro format.
-use crate::{error::Error, types, util::MapHelper, AvroResult};
+use crate::{
+    error::Error,
+    types,
+    util::MapHelper,
+    validator::{
+        validate_enum_symbol_name, validate_namespace, 
validate_record_field_name,
+        validate_schema_name,
+    },
+    AvroResult,
+};
 use digest::Digest;
-use regex_lite::Regex;
 use serde::{
     ser::{SerializeMap, SerializeSeq},
     Deserialize, Serialize, Serializer,
@@ -33,37 +41,9 @@ use std::{
     hash::Hash,
     io::Read,
     str::FromStr,
-    sync::OnceLock,
 };
 use strum_macros::{EnumDiscriminants, EnumString};
 
-fn enum_symbol_name_r() -> &'static Regex {
-    static ENUM_SYMBOL_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
-    ENUM_SYMBOL_NAME_ONCE.get_or_init(|| 
Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap())
-}
-
-// An optional namespace (with optional dots) followed by a name without any 
dots in it.
-fn schema_name_r() -> &'static Regex {
-    static SCHEMA_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
-    SCHEMA_NAME_ONCE.get_or_init(|| {
-        Regex::new(
-            
r"^((?P<namespace>([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?)\.)?(?P<name>[A-Za-z_][A-Za-z0-9_]*)$",
-        ).unwrap()
-    })
-}
-
-fn field_name_r() -> &'static Regex {
-    static FIELD_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
-    FIELD_NAME_ONCE.get_or_init(|| 
Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap())
-}
-
-fn namespace_r() -> &'static Regex {
-    static NAMESPACE_ONCE: OnceLock<Regex> = OnceLock::new();
-    NAMESPACE_ONCE.get_or_init(|| {
-        
Regex::new(r"^([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?$").unwrap()
-    })
-}
-
 /// Represents an Avro schema fingerprint
 /// More information about Avro schema fingerprints can be found in the
 /// [Avro Schema Fingerprint 
documentation](https://avro.apache.org/docs/current/spec.html#schema_fingerprints)
@@ -279,13 +259,7 @@ impl Name {
     }
 
     fn get_name_and_namespace(name: &str) -> AvroResult<(String, Namespace)> {
-        let caps = schema_name_r()
-            .captures(name)
-            .ok_or_else(|| Error::InvalidSchemaName(name.to_string(), 
schema_name_r().as_str()))?;
-        Ok((
-            caps["name"].to_string(),
-            caps.name("namespace").map(|s| s.as_str().to_string()),
-        ))
+        validate_schema_name(name)
     }
 
     /// Parse a `serde_json::Value` into a `Name`.
@@ -312,12 +286,7 @@ impl Name {
             .filter(|ns| !ns.is_empty());
 
         if let Some(ref ns) = namespace {
-            if !namespace_r().is_match(ns) {
-                return Err(Error::InvalidNamespace(
-                    ns.to_string(),
-                    namespace_r().as_str(),
-                ));
-            }
+            validate_namespace(ns)?;
         }
 
         Ok(Self {
@@ -686,9 +655,7 @@ impl RecordField {
     ) -> AvroResult<Self> {
         let name = field.name().ok_or(Error::GetNameFieldFromRecord)?;
 
-        if !field_name_r().is_match(&name) {
-            return Err(Error::FieldName(name));
-        }
+        validate_record_field_name(&name)?;
 
         // TODO: "type" = "<record name>"
         let schema = parser.parse_complex(field, &enclosing_record.namespace)?;
@@ -1713,10 +1680,7 @@ impl Parser {
 
         let mut existing_symbols: HashSet<&String> = 
HashSet::with_capacity(symbols.len());
         for symbol in symbols.iter() {
-            // Ensure enum symbol names match [A-Za-z_][A-Za-z0-9_]*
-            if !enum_symbol_name_r().is_match(symbol) {
-                return Err(Error::EnumSymbolName(symbol.to_string()));
-            }
+            validate_enum_symbol_name(symbol)?;
 
             // Ensure there are no duplicate symbols
             if existing_symbols.contains(&symbol) {
@@ -6275,26 +6239,6 @@ mod tests {
         Ok(())
     }
 
-    #[test]
-    fn test_avro_3897_disallow_invalid_namespaces_in_fully_qualified_name() -> 
TestResult {
-        let full_name = "ns.0.record1";
-        let name = Name::new(full_name);
-        assert!(name.is_err());
-        let expected =
-            Error::InvalidSchemaName(full_name.to_string(), 
schema_name_r().as_str()).to_string();
-        let err = name.map_err(|e| e.to_string()).err().unwrap();
-        assert_eq!(expected, err);
-
-        let full_name = "ns..record1";
-        let name = Name::new(full_name);
-        assert!(name.is_err());
-        let expected =
-            Error::InvalidSchemaName(full_name.to_string(), 
schema_name_r().as_str()).to_string();
-        let err = name.map_err(|e| e.to_string()).err().unwrap();
-        assert_eq!(expected, err);
-        Ok(())
-    }
-
     /// A test cases showing that names and namespaces can be constructed
     /// entirely by underscores.
     #[test]
diff --git a/lang/rust/avro/src/validator.rs b/lang/rust/avro/src/validator.rs
new file mode 100644
index 000000000..2b4967d7f
--- /dev/null
+++ b/lang/rust/avro/src/validator.rs
@@ -0,0 +1,318 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::{schema::Namespace, AvroResult, Error};
+use regex_lite::Regex;
+use std::sync::OnceLock;
+
+/// A validator that validates names and namespaces according to the Avro 
specification.
+struct SpecificationValidator;
+
+/// A trait that validates schema names.
+/// To register a custom one use [set_schema_name_validator].
+pub trait SchemaNameValidator: Send + Sync {
+    /// Returns the regex used to validate the schema name
+    /// according to the Avro specification.
+    fn regex(&self) -> &'static Regex {
+        static SCHEMA_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
+        SCHEMA_NAME_ONCE.get_or_init(|| {
+            Regex::new(
+                // An optional namespace (with optional dots) followed by a 
name without any dots in it.
+                
r"^((?P<namespace>([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?)\.)?(?P<name>[A-Za-z_][A-Za-z0-9_]*)$",
+            )
+                .unwrap()
+        })
+    }
+
+    /// Validates the schema name and returns the name and the optional 
namespace,
+    /// or [Error::InvalidSchemaName] if it is invalid.
+    fn validate(&self, schema_name: &str) -> AvroResult<(String, Namespace)>;
+}
+
+impl SchemaNameValidator for SpecificationValidator {
+    fn validate(&self, schema_name: &str) -> AvroResult<(String, Namespace)> {
+        let regex = SchemaNameValidator::regex(self);
+        let caps = regex
+            .captures(schema_name)
+            .ok_or_else(|| Error::InvalidSchemaName(schema_name.to_string(), 
regex.as_str()))?;
+        Ok((
+            caps["name"].to_string(),
+            caps.name("namespace").map(|s| s.as_str().to_string()),
+        ))
+    }
+}
+
+static NAME_VALIDATOR_ONCE: OnceLock<Box<dyn SchemaNameValidator + Send + 
Sync>> = OnceLock::new();
+
+/// Sets a custom schema name validator.
+///
+/// Returns a unit if the registration was successful or the already
+/// registered validator if the registration failed.
+///
+/// **Note**: This function must be called before parsing any schema because 
this will
+/// register the default validator and the registration is one time only!
+pub fn set_schema_name_validator(
+    validator: Box<dyn SchemaNameValidator + Send + Sync>,
+) -> Result<(), Box<dyn SchemaNameValidator + Send + Sync>> {
+    debug!("Setting a custom schema name validator.");
+    NAME_VALIDATOR_ONCE.set(validator)
+}
+
+pub(crate) fn validate_schema_name(schema_name: &str) -> AvroResult<(String, 
Namespace)> {
+    NAME_VALIDATOR_ONCE
+        .get_or_init(|| {
+            debug!("Going to use the default name validator.");
+            Box::new(SpecificationValidator)
+        })
+        .validate(schema_name)
+}
+
+/// A trait that validates schema namespaces.
+/// To register a custom one use [set_schema_namespace_validator].
+pub trait SchemaNamespaceValidator: Send + Sync {
+    /// Returns the regex used to validate the schema namespace
+    /// according to the Avro specification.
+    fn regex(&self) -> &'static Regex {
+        static NAMESPACE_ONCE: OnceLock<Regex> = OnceLock::new();
+        NAMESPACE_ONCE.get_or_init(|| {
+            
Regex::new(r"^([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?$").unwrap()
+        })
+    }
+
+    /// Validates the schema namespace or [Error::InvalidNamespace] if it is 
invalid.
+    fn validate(&self, namespace: &str) -> AvroResult<()>;
+}
+
+impl SchemaNamespaceValidator for SpecificationValidator {
+    fn validate(&self, ns: &str) -> AvroResult<()> {
+        let regex = SchemaNamespaceValidator::regex(self);
+        if !regex.is_match(ns) {
+            return Err(Error::InvalidNamespace(ns.to_string(), 
regex.as_str()));
+        } else {
+            Ok(())
+        }
+    }
+}
+
+static NAMESPACE_VALIDATOR_ONCE: OnceLock<Box<dyn SchemaNamespaceValidator + 
Send + Sync>> =
+    OnceLock::new();
+
+/// Sets a custom schema namespace validator.
+///
+/// Returns a unit if the registration was successful or the already
+/// registered validator if the registration failed.
+///
+/// **Note**: This function must be called before parsing any schema because 
this will
+/// register the default validator and the registration is one time only!
+pub fn set_schema_namespace_validator(
+    validator: Box<dyn SchemaNamespaceValidator + Send + Sync>,
+) -> Result<(), Box<dyn SchemaNamespaceValidator + Send + Sync>> {
+    NAMESPACE_VALIDATOR_ONCE.set(validator)
+}
+
+pub(crate) fn validate_namespace(ns: &str) -> AvroResult<()> {
+    NAMESPACE_VALIDATOR_ONCE
+        .get_or_init(|| {
+            debug!("Going to use the default namespace validator.");
+            Box::new(SpecificationValidator)
+        })
+        .validate(ns)
+}
+
+/// A trait that validates enum symbol names.
+/// To register a custom one use [set_enum_symbol_name_validator].
+pub trait EnumSymbolNameValidator: Send + Sync {
+    /// Returns the regex used to validate the symbols of enum schema
+    /// according to the Avro specification.
+    fn regex(&self) -> &'static Regex {
+        static ENUM_SYMBOL_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
+        ENUM_SYMBOL_NAME_ONCE.get_or_init(|| 
Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap())
+    }
+
+    /// Validates the symbols of an Enum schema name and returns nothing 
(unit),
+    /// or [Error::EnumSymbolName] if it is invalid.
+    fn validate(&self, name: &str) -> AvroResult<()>;
+}
+
+impl EnumSymbolNameValidator for SpecificationValidator {
+    fn validate(&self, symbol: &str) -> AvroResult<()> {
+        let regex = EnumSymbolNameValidator::regex(self);
+        if !regex.is_match(symbol) {
+            return Err(Error::EnumSymbolName(symbol.to_string()));
+        }
+
+        Ok(())
+    }
+}
+
+static ENUM_SYMBOL_NAME_VALIDATOR_ONCE: OnceLock<Box<dyn 
EnumSymbolNameValidator + Send + Sync>> =
+    OnceLock::new();
+
+/// Sets a custom enum symbol name validator.
+///
+/// Returns a unit if the registration was successful or the already
+/// registered validator if the registration failed.
+///
+/// **Note**: This function must be called before parsing any schema because 
this will
+/// register the default validator and the registration is one time only!
+pub fn set_enum_symbol_name_validator(
+    validator: Box<dyn EnumSymbolNameValidator + Send + Sync>,
+) -> Result<(), Box<dyn EnumSymbolNameValidator + Send + Sync>> {
+    ENUM_SYMBOL_NAME_VALIDATOR_ONCE.set(validator)
+}
+
+pub(crate) fn validate_enum_symbol_name(symbol: &str) -> AvroResult<()> {
+    ENUM_SYMBOL_NAME_VALIDATOR_ONCE
+        .get_or_init(|| {
+            debug!("Going to use the default enum symbol name validator.");
+            Box::new(SpecificationValidator)
+        })
+        .validate(symbol)
+}
+
+/// A trait that validates record field names.
+/// To register a custom one use [set_record_field_name_validator].
+pub trait RecordFieldNameValidator: Send + Sync {
+    /// Returns the regex used to validate the record field names
+    /// according to the Avro specification.
+    fn regex(&self) -> &'static Regex {
+        static FIELD_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
+        FIELD_NAME_ONCE.get_or_init(|| 
Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap())
+    }
+
+    /// Validates the record field's names and returns nothing (unit),
+    /// or [Error::FieldName] if it is invalid.
+    fn validate(&self, name: &str) -> AvroResult<()>;
+}
+
+impl RecordFieldNameValidator for SpecificationValidator {
+    fn validate(&self, field_name: &str) -> AvroResult<()> {
+        let regex = RecordFieldNameValidator::regex(self);
+        if !regex.is_match(field_name) {
+            return Err(Error::FieldName(field_name.to_string()));
+        }
+
+        Ok(())
+    }
+}
+
+static RECORD_FIELD_NAME_VALIDATOR_ONCE: OnceLock<Box<dyn 
RecordFieldNameValidator + Send + Sync>> =
+    OnceLock::new();
+
+/// Sets a custom record field name validator.
+///
+/// Returns a unit if the registration was successful or the already
+/// registered validator if the registration failed.
+///
+/// **Note**: This function must be called before parsing any schema because 
this will
+/// register the default validator and the registration is one time only!
+pub fn set_record_field_name_validator(
+    validator: Box<dyn RecordFieldNameValidator + Send + Sync>,
+) -> Result<(), Box<dyn RecordFieldNameValidator + Send + Sync>> {
+    RECORD_FIELD_NAME_VALIDATOR_ONCE.set(validator)
+}
+
+pub(crate) fn validate_record_field_name(field_name: &str) -> AvroResult<()> {
+    RECORD_FIELD_NAME_VALIDATOR_ONCE
+        .get_or_init(|| {
+            debug!("Going to use the default record field name validator.");
+            Box::new(SpecificationValidator)
+        })
+        .validate(field_name)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::schema::Name;
+    use apache_avro_test_helper::TestResult;
+
+    #[test]
+    fn avro_3900_default_name_validator_with_valid_ns() -> TestResult {
+        validate_schema_name("example")?;
+        Ok(())
+    }
+
+    #[test]
+    fn avro_3900_default_name_validator_with_invalid_ns() -> TestResult {
+        assert!(validate_schema_name("com-example").is_err());
+        Ok(())
+    }
+
+    #[test]
+    fn test_avro_3897_disallow_invalid_namespaces_in_fully_qualified_name() -> 
TestResult {
+        let full_name = "ns.0.record1";
+        let name = Name::new(full_name);
+        assert!(name.is_err());
+        let validator = SpecificationValidator;
+        let expected = Error::InvalidSchemaName(
+            full_name.to_string(),
+            SchemaNameValidator::regex(&validator).as_str(),
+        )
+        .to_string();
+        let err = name.map_err(|e| e.to_string()).err().unwrap();
+        pretty_assertions::assert_eq!(expected, err);
+
+        let full_name = "ns..record1";
+        let name = Name::new(full_name);
+        assert!(name.is_err());
+        let expected = Error::InvalidSchemaName(
+            full_name.to_string(),
+            SchemaNameValidator::regex(&validator).as_str(),
+        )
+        .to_string();
+        let err = name.map_err(|e| e.to_string()).err().unwrap();
+        pretty_assertions::assert_eq!(expected, err);
+        Ok(())
+    }
+
+    #[test]
+    fn avro_3900_default_namespace_validator_with_valid_ns() -> TestResult {
+        validate_namespace("com.example")?;
+        Ok(())
+    }
+
+    #[test]
+    fn avro_3900_default_namespace_validator_with_invalid_ns() -> TestResult {
+        assert!(validate_namespace("com-example").is_err());
+        Ok(())
+    }
+
+    #[test]
+    fn avro_3900_default_enum_symbol_validator_with_valid_symbol_name() -> 
TestResult {
+        validate_enum_symbol_name("spades")?;
+        Ok(())
+    }
+
+    #[test]
+    fn avro_3900_default_enum_symbol_validator_with_invalid_symbol_name() -> 
TestResult {
+        assert!(validate_enum_symbol_name("com-example").is_err());
+        Ok(())
+    }
+
+    #[test]
+    fn avro_3900_default_record_field_validator_with_valid_name() -> 
TestResult {
+        validate_record_field_name("test")?;
+        Ok(())
+    }
+
+    #[test]
+    fn avro_3900_default_record_field_validator_with_invalid_name() -> 
TestResult {
+        assert!(validate_record_field_name("com-example").is_err());
+        Ok(())
+    }
+}
diff --git a/lang/rust/avro/tests/validators.rs 
b/lang/rust/avro/tests/validators.rs
new file mode 100644
index 000000000..941ffc3e7
--- /dev/null
+++ b/lang/rust/avro/tests/validators.rs
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use apache_avro::{
+    schema::Namespace,
+    validator::{
+        set_enum_symbol_name_validator, set_record_field_name_validator, 
set_schema_name_validator,
+        set_schema_namespace_validator, EnumSymbolNameValidator, 
RecordFieldNameValidator,
+        SchemaNameValidator, SchemaNamespaceValidator,
+    },
+    AvroResult,
+};
+use apache_avro_test_helper::TestResult;
+
+struct CustomValidator;
+
+#[test]
+fn avro_3900_custom_validator_with_spec_invalid_names() -> TestResult {
+    // Setup the custom validators before the schema is parsed
+    // because the parsing will trigger the validation and will
+    // setup the default validator (SpecificationValidator)!
+    impl SchemaNameValidator for CustomValidator {
+        fn validate(&self, schema_name: &str) -> AvroResult<(String, 
Namespace)> {
+            Ok((schema_name.to_string(), None))
+        }
+    }
+
+    impl SchemaNamespaceValidator for CustomValidator {
+        fn validate(&self, _ns: &str) -> AvroResult<()> {
+            Ok(())
+        }
+    }
+
+    impl EnumSymbolNameValidator for CustomValidator {
+        fn validate(&self, _ns: &str) -> AvroResult<()> {
+            Ok(())
+        }
+    }
+
+    impl RecordFieldNameValidator for CustomValidator {
+        fn validate(&self, _ns: &str) -> AvroResult<()> {
+            Ok(())
+        }
+    }
+
+    assert!(set_schema_name_validator(Box::new(CustomValidator)).is_ok());
+    assert!(set_schema_namespace_validator(Box::new(CustomValidator)).is_ok());
+    assert!(set_enum_symbol_name_validator(Box::new(CustomValidator)).is_ok());
+    
assert!(set_record_field_name_validator(Box::new(CustomValidator)).is_ok());
+
+    let invalid_schema = r#"{
+        "name": "invalid-schema-name",
+        "namespace": "invalid-namespace",
+        "type": "record",
+        "fields": [
+            {
+                "name": "invalid-field-name",
+                "type": "int"
+            },
+            {
+                "type": "enum",
+                "name": "Test",
+                "symbols": ["A-B", "B-A"]
+            }
+        ]
+    }"#;
+
+    apache_avro::Schema::parse_str(invalid_schema)?;
+
+    Ok(())
+}
diff --git a/lang/rust/avro_test_helper/Cargo.toml 
b/lang/rust/avro_test_helper/Cargo.toml
index 376dce33b..37d31c63d 100644
--- a/lang/rust/avro_test_helper/Cargo.toml
+++ b/lang/rust/avro_test_helper/Cargo.toml
@@ -36,4 +36,3 @@ better-panic = { default-features = false, version = "0.3.0" }
 ctor = { default-features = false, version = "0.2.6" }
 env_logger = { default-features = false, version = "0.10.2" }
 log = { workspace = true }
-ref_thread_local = { default-features = false, version = "0.1.1" }
diff --git a/lang/rust/avro_test_helper/src/lib.rs 
b/lang/rust/avro_test_helper/src/lib.rs
index 7e56c1261..5e1e04b50 100644
--- a/lang/rust/avro_test_helper/src/lib.rs
+++ b/lang/rust/avro_test_helper/src/lib.rs
@@ -17,14 +17,13 @@
 
 #[cfg(not(target_arch = "wasm32"))]
 use ctor::{ctor, dtor};
+use std::cell::RefCell;
 
-use ref_thread_local::ref_thread_local;
-
-ref_thread_local! {
+thread_local! {
     // The unit tests run in parallel
     // We need to keep the log messages in a thread-local variable
     // and clear them after assertion
-    pub(crate) static managed LOG_MESSAGES: Vec<String> = Vec::new();
+    pub(crate) static LOG_MESSAGES: RefCell<Vec<String>> = 
RefCell::new(Vec::new());
 }
 
 pub mod logger;
diff --git a/lang/rust/avro_test_helper/src/logger.rs 
b/lang/rust/avro_test_helper/src/logger.rs
index 505e42541..f1bb5f84d 100644
--- a/lang/rust/avro_test_helper/src/logger.rs
+++ b/lang/rust/avro_test_helper/src/logger.rs
@@ -17,7 +17,6 @@
 
 use crate::LOG_MESSAGES;
 use log::{LevelFilter, Log, Metadata};
-use ref_thread_local::RefThreadLocal;
 use std::sync::OnceLock;
 
 struct TestLogger {
@@ -32,7 +31,7 @@ impl Log for TestLogger {
 
     fn log(&self, record: &log::Record) {
         if self.enabled(record.metadata()) {
-            LOG_MESSAGES.borrow_mut().push(format!("{}", record.args()));
+            LOG_MESSAGES.with(|msgs| msgs.borrow_mut().push(format!("{}", 
record.args())));
 
             self.delegate.log(record);
         }
@@ -53,20 +52,24 @@ fn test_logger() -> &'static TestLogger {
 }
 
 pub fn clear_log_messages() {
-    LOG_MESSAGES.borrow_mut().clear();
+    LOG_MESSAGES.with(|msgs| match msgs.try_borrow_mut() {
+        Ok(mut log_messages) => log_messages.clear(),
+        Err(err) => panic!("Failed to clear log messages: {err:?}"),
+    });
 }
 
 pub fn assert_not_logged(unexpected_message: &str) {
-    match LOG_MESSAGES.borrow().last() {
+    LOG_MESSAGES.with(|msgs| match msgs.borrow().last() {
         Some(last_log) if last_log == unexpected_message => {
             panic!("The following log message should not have been logged: 
'{unexpected_message}'")
         }
         _ => (),
-    }
+    });
 }
 
 pub fn assert_logged(expected_message: &str) {
-    assert_eq!(LOG_MESSAGES.borrow_mut().pop().unwrap(), expected_message);
+    let last_message = LOG_MESSAGES.with(|msgs| 
msgs.borrow_mut().pop().unwrap());
+    assert_eq!(last_message, expected_message);
 }
 
 #[cfg(not(target_arch = "wasm32"))]

Reply via email to