This is an automated email from the ASF dual-hosted git repository.

clesaec pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/master by this push:
     new 0c862cab2 AVRO-3704: name validator interface (#2053)
0c862cab2 is described below

commit 0c862cab22e8e7164e0ac42d1b5d3585db3f7869
Author: Christophe Le Saec <[email protected]>
AuthorDate: Tue Sep 12 08:37:21 2023 +0200

    AVRO-3704: name validator interface (#2053)
    
    * AVRO-3704: name validator interface
---
 .../avro/src/main/java/org/apache/avro/Schema.java | 138 ++++++++++++++++-----
 .../java/org/apache/avro/file/DataFileStream.java  |   2 +-
 .../org/apache/avro/SchemaNameValidatorTest.java   |  59 +++++++++
 .../java/org/apache/avro/TestDataFileReader.java   |   6 +-
 .../src/test/java/org/apache/avro/TestSchema.java  |  11 +-
 .../java/org/apache/avro/TestSchemaBuilder.java    |  23 ++++
 .../java/org/apache/avro/reflect/TestReflect.java  |   4 +-
 .../src/test/java/org/apache/avro/TestSchema.java  |   6 +-
 8 files changed, 199 insertions(+), 50 deletions(-)

diff --git a/lang/java/avro/src/main/java/org/apache/avro/Schema.java 
b/lang/java/avro/src/main/java/org/apache/avro/Schema.java
index cec128ba6..8933f20f0 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/Schema.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/Schema.java
@@ -990,8 +990,8 @@ public abstract class Schema extends JsonProperties 
implements Serializable {
     }
   }
 
-  private static final ThreadLocal<Set> SEEN_EQUALS = 
ThreadLocalWithInitial.of(HashSet::new);
-  private static final ThreadLocal<Map> SEEN_HASHCODE = 
ThreadLocalWithInitial.of(IdentityHashMap::new);
+  private static final ThreadLocal<Set<SeenPair>> SEEN_EQUALS = 
ThreadLocalWithInitial.of(HashSet::new);
+  private static final ThreadLocal<Map<Schema, Schema>> SEEN_HASHCODE = 
ThreadLocalWithInitial.of(IdentityHashMap::new);
 
   @SuppressWarnings(value = "unchecked")
   private static class RecordSchema extends NamedSchema {
@@ -1087,7 +1087,7 @@ public abstract class Schema extends JsonProperties 
implements Serializable {
 
     @Override
     int computeHash() {
-      Map seen = SEEN_HASHCODE.get();
+      Map<Schema, Schema> seen = SEEN_HASHCODE.get();
       if (seen.containsKey(this))
         return 0; // prevent stack overflow
       boolean first = seen.isEmpty();
@@ -1109,8 +1109,8 @@ public abstract class Schema extends JsonProperties 
implements Serializable {
       gen.writeStringField("type", isError ? "error" : "record");
       writeName(names, gen);
       names.space = name.space; // set default namespace
-      if (getDoc() != null)
-        gen.writeStringField("doc", getDoc());
+      if (this.getDoc() != null)
+        gen.writeStringField("doc", this.getDoc());
 
       if (fields != null) {
         gen.writeFieldName("fields");
@@ -1480,9 +1480,17 @@ public abstract class Schema extends JsonProperties 
implements Serializable {
    */
   public static class Parser {
     private Names names = new Names();
-    private boolean validate = true;
+    private final Schema.NameValidator validate;
     private boolean validateDefaults = true;
 
+    public Parser() {
+      this(NameValidator.UTF_VALIDATOR);
+    }
+
+    public Parser(final NameValidator validate) {
+      this.validate = validate;
+    }
+
     /**
      * Adds the provided types to the set of defined, named types known to this
      * parser. deprecated: use addTypes(Iterable<Schema> types)
@@ -1510,17 +1518,6 @@ public abstract class Schema extends JsonProperties 
implements Serializable {
       return result;
     }
 
-    /** Enable or disable name validation. */
-    public Parser setValidate(boolean validate) {
-      this.validate = validate;
-      return this;
-    }
-
-    /** True iff names are validated. True by default. */
-    public boolean getValidate() {
-      return this.validate;
-    }
-
     /** Enable or disable default value validation. */
     public Parser setValidateDefaults(boolean validateDefaults) {
       this.validateDefaults = validateDefaults;
@@ -1587,7 +1584,7 @@ public abstract class Schema extends JsonProperties 
implements Serializable {
     }
 
     private Schema runParser(JsonParser parser, ParseFunction f) throws 
IOException {
-      boolean saved = validateNames.get();
+      NameValidator saved = validateNames.get();
       boolean savedValidateDefaults = VALIDATE_DEFAULTS.get();
       try {
         validateNames.set(validate);
@@ -1683,7 +1680,8 @@ public abstract class Schema extends JsonProperties 
implements Serializable {
    */
   @Deprecated
   public static Schema parse(String jsonSchema, boolean validate) {
-    return new Parser().setValidate(validate).parse(jsonSchema);
+    final NameValidator validator = validate ? NameValidator.UTF_VALIDATOR : 
NameValidator.NO_VALIDATION;
+    return new Parser(validator).parse(jsonSchema);
   }
 
   static final Map<String, Type> PRIMITIVES = new HashMap<>();
@@ -1752,27 +1750,21 @@ public abstract class Schema extends JsonProperties 
implements Serializable {
     }
   }
 
-  private static ThreadLocal<Boolean> validateNames = 
ThreadLocalWithInitial.of(() -> true);
+  private static ThreadLocal<Schema.NameValidator> validateNames = 
ThreadLocalWithInitial
+      .of(() -> NameValidator.UTF_VALIDATOR);
 
   private static String validateName(String name) {
-    if (!validateNames.get())
-      return name; // not validating names
-    if (name == null)
-      throw new SchemaParseException("Null name");
-    int length = name.length();
-    if (length == 0)
-      throw new SchemaParseException("Empty name");
-    char first = name.charAt(0);
-    if (!(Character.isLetter(first) || first == '_'))
-      throw new SchemaParseException("Illegal initial character: " + name);
-    for (int i = 1; i < length; i++) {
-      char c = name.charAt(i);
-      if (!(Character.isLetterOrDigit(c) || c == '_'))
-        throw new SchemaParseException("Illegal character in: " + name);
+    NameValidator.Result result = validateNames.get().validate(name);
+    if (!result.isOK()) {
+      throw new SchemaParseException(result.errors);
     }
     return name;
   }
 
+  public static void setNameValidator(final Schema.NameValidator validator) {
+    Schema.validateNames.set(validator);
+  }
+
   private static final ThreadLocal<Boolean> VALIDATE_DEFAULTS = 
ThreadLocalWithInitial.of(() -> true);
 
   private static JsonNode validateDefault(String fieldName, Schema schema, 
JsonNode defaultValue) {
@@ -2299,6 +2291,84 @@ public abstract class Schema extends JsonProperties 
implements Serializable {
     return alias;
   }
 
+  public interface NameValidator {
+
+    class Result {
+      private final String errors;
+
+      public Result(final String errors) {
+        this.errors = errors;
+      }
+
+      public boolean isOK() {
+        return this == NameValidator.OK;
+      }
+
+      public String getErrors() {
+        return errors;
+      }
+    }
+
+    Result OK = new Result(null);
+
+    default Result validate(String name) {
+      return OK;
+    }
+
+    NameValidator NO_VALIDATION = new NameValidator() {
+    };
+
+    NameValidator UTF_VALIDATOR = new NameValidator() {
+      @Override
+      public Result validate(final String name) {
+        if (name == null)
+          return new Result("Null name");
+        int length = name.length();
+        if (length == 0)
+          return new Result("Empty name");
+        char first = name.charAt(0);
+        if (!(Character.isLetter(first) || first == '_'))
+          return new Result("Illegal initial character: " + name);
+        for (int i = 1; i < length; i++) {
+          char c = name.charAt(i);
+          if (!(Character.isLetterOrDigit(c) || c == '_'))
+            return new Result("Illegal character in: " + name);
+        }
+        return OK;
+      }
+    };
+
+    NameValidator STRICT_VALIDATOR = new NameValidator() {
+      @Override
+      public Result validate(final String name) {
+        if (name == null)
+          return new Result("Null name");
+        int length = name.length();
+        if (length == 0)
+          return new Result("Empty name");
+        char first = name.charAt(0);
+        if (!(isLetter(first) || first == '_'))
+          return new Result("Illegal initial character: " + name);
+        for (int i = 1; i < length; i++) {
+          char c = name.charAt(i);
+          if (!(isLetter(c) || isDigit(c) || c == '_'))
+            return new Result("Illegal character in: " + name);
+        }
+        return OK;
+      }
+
+      private boolean isLetter(char c) {
+        return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+      }
+
+      private boolean isDigit(char c) {
+        return c >= '0' && c <= '9';
+      }
+
+    };
+
+  }
+
   /**
    * No change is permitted on LockableArrayList once lock() has been called on
    * it.
diff --git 
a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java 
b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java
index a2b517225..150d2ace9 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java
@@ -139,7 +139,7 @@ public class DataFileStream<D> implements Iterator<D>, 
Iterable<D>, Closeable {
 
     // finalize the header
     header.metaKeyList = Collections.unmodifiableList(header.metaKeyList);
-    header.schema = new 
Schema.Parser().setValidate(false).setValidateDefaults(false)
+    header.schema = new 
Schema.Parser(Schema.NameValidator.NO_VALIDATION).setValidateDefaults(false)
         .parse(getMetaString(DataFileConstants.SCHEMA));
     this.codec = resolveCodec();
     reader.setSchema(header.schema);
diff --git 
a/lang/java/avro/src/test/java/org/apache/avro/SchemaNameValidatorTest.java 
b/lang/java/avro/src/test/java/org/apache/avro/SchemaNameValidatorTest.java
new file mode 100644
index 000000000..6846c4434
--- /dev/null
+++ b/lang/java/avro/src/test/java/org/apache/avro/SchemaNameValidatorTest.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.util.stream.Stream;
+
+class SchemaNameValidatorTest {
+
+  @ParameterizedTest
+  @MethodSource("data")
+  void validator(Schema.NameValidator validator, String input, boolean 
expectedResult) {
+    Schema.NameValidator.Result result = validator.validate(input);
+    Assertions.assertEquals(expectedResult, result.isOK(), result.getErrors());
+  }
+
+  static Stream<Arguments> data() {
+    return Stream.of(Arguments.of(Schema.NameValidator.UTF_VALIDATOR, null, 
false), // null not accepted
+        Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, null, false), // 
null not accepted
+        Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "", false), // empty 
not accepted
+        Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "", false), // 
empty not accepted
+        Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "Hello world", 
false), // space not accepted
+        Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "Hello world", 
false), // space not accepted
+        Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "H&", false), // non 
letter or digit not accepted
+        Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "H&", false), // 
non letter or digit not accepted
+        Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "H=", false), // non 
letter or digit not accepted
+        Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "H=", false), // 
non letter or digit not accepted
+        Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "H]", false), // non 
letter or digit not accepted
+        Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "H]", false), // 
non letter or digit not accepted
+        Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "Hello_world", true),
+        Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "Hello_world", 
true),
+        Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "éàçô", true), // 
Accept accent
+        Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "éàçô", false), // 
Not Accept accent
+        Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "5éàçô", false), // 
can't start with number
+        Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "5éàçô", false), 
// can't start with number
+        Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "_Hello_world", true),
+        Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "_Hello_world", 
true));
+  }
+
+}
diff --git 
a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java 
b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java
index fb42d639e..a85b96640 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java
@@ -87,7 +87,7 @@ public class TestDataFileReader {
     // magic header check. This happens with throttled input stream,
     // where we read into buffer less bytes than requested.
 
-    Schema legacySchema = new 
Schema.Parser().setValidate(false).setValidateDefaults(false)
+    Schema legacySchema = new 
Schema.Parser(Schema.NameValidator.NO_VALIDATION).setValidateDefaults(false)
         .parse("{\"type\": \"record\", \"name\": \"TestSchema\", \"fields\": "
             + "[ {\"name\": \"id\", \"type\": [\"long\", \"null\"], 
\"default\": null}]}");
     File f = Files.createTempFile("testThrottledInputStream", 
".avro").toFile();
@@ -146,7 +146,7 @@ public class TestDataFileReader {
       // AVRO-2944 describes hanging/failure in reading Avro file with 
performing
       // magic header check. This potentially happens with a defective input 
stream
       // where a -1 value is unexpectedly returned from a read.
-      Schema legacySchema = new 
Schema.Parser().setValidate(false).setValidateDefaults(false)
+      Schema legacySchema = new 
Schema.Parser(Schema.NameValidator.NO_VALIDATION).setValidateDefaults(false)
           .parse("{\"type\": \"record\", \"name\": \"TestSchema\", \"fields\": 
"
               + "[ {\"name\": \"id\", \"type\": [\"long\", \"null\"], 
\"default\": null}]}");
       File f = Files.createTempFile("testInputStreamEOF", ".avro").toFile();
@@ -195,7 +195,7 @@ public class TestDataFileReader {
     // This schema has an accent in the name and the default for the field 
doesn't
     // match the first type in the union. A Java SDK in the past could create 
a file
     // containing this schema.
-    Schema legacySchema = new 
Schema.Parser().setValidate(false).setValidateDefaults(false)
+    Schema legacySchema = new 
Schema.Parser(Schema.NameValidator.NO_VALIDATION).setValidateDefaults(false)
         .parse("{\"type\": \"record\", \"name\": 
\"InvalidAccëntWithInvalidNull\", \"fields\": "
             + "[ {\"name\": \"id\", \"type\": [\"long\", \"null\"], 
\"default\": null}]}");
 
diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java 
b/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java
index b4c259e92..9900fd635 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java
@@ -500,17 +500,15 @@ public class TestSchema {
   }
 
   @Test
-  void testContentAfterAvsc() throws Exception {
-    Schema.Parser parser = new Schema.Parser();
-    parser.setValidate(true);
+  void testContentAfterAvsc() {
+    Schema.Parser parser = new 
Schema.Parser(Schema.NameValidator.UTF_VALIDATOR);
     parser.setValidateDefaults(true);
     assertThrows(SchemaParseException.class, () -> parser.parse("{\"type\": 
\"string\"}; DROP TABLE STUDENTS"));
   }
 
   @Test
   void testContentAfterAvscInInputStream() throws Exception {
-    Schema.Parser parser = new Schema.Parser();
-    parser.setValidate(true);
+    Schema.Parser parser = new 
Schema.Parser(Schema.NameValidator.UTF_VALIDATOR);
     parser.setValidateDefaults(true);
     String avsc = "{\"type\": \"string\"}; DROP TABLE STUDENTS";
     ByteArrayInputStream is = new 
ByteArrayInputStream(avsc.getBytes(StandardCharsets.UTF_8));
@@ -526,8 +524,7 @@ public class TestSchema {
       writer.flush();
     }
 
-    Schema.Parser parser = new Schema.Parser();
-    parser.setValidate(true);
+    Schema.Parser parser = new 
Schema.Parser(Schema.NameValidator.UTF_VALIDATOR);
     parser.setValidateDefaults(true);
     assertThrows(SchemaParseException.class, () -> parser.parse(avscFile));
   }
diff --git 
a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java 
b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java
index 784bfba02..fcbaae655 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java
@@ -38,6 +38,8 @@ import org.apache.avro.generic.GenericData;
 import org.apache.avro.generic.GenericDatumReader;
 import org.apache.avro.generic.GenericDatumWriter;
 import org.apache.avro.generic.GenericRecordBuilder;
+
+import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.io.TempDir;
 
@@ -874,4 +876,25 @@ public class TestSchemaBuilder {
 
     assertEquals(a2, a1);
   }
+
+  @Test
+  void namesAcceptAll() throws InterruptedException {
+    // Ensure that Schema.setNameValidator won't interfere with others unit 
tests.
+    Runnable r = () -> {
+      Schema.setNameValidator(Schema.NameValidator.NO_VALIDATION);
+      final Schema schema = 
SchemaBuilder.record("7name").fields().name("123").type(Schema.create(Schema.Type.INT))
+          .noDefault().endRecord();
+      Assertions.assertNotNull(schema);
+      Assertions.assertEquals("7name", schema.getName());
+      final Schema.Field field = schema.getField("123");
+      Assertions.assertEquals("123", field.name());
+    };
+
+    final Throwable[] exception = new Throwable[] { null };
+    Thread t = new Thread(r);
+    t.setUncaughtExceptionHandler((Thread th, Throwable e) -> exception[0] = 
e);
+    t.start();
+    t.join();
+    Assertions.assertNull(exception[0], () -> exception[0].getMessage());
+  }
 }
diff --git 
a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java 
b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java
index 470010e6f..e8fadeea7 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java
@@ -1232,7 +1232,7 @@ public class TestReflect {
   @Test
   void dollarTerminatedNamespaceCompatibility() {
     ReflectData data = ReflectData.get();
-    Schema s = new Schema.Parser().setValidate(false).parse(
+    Schema s = new Schema.Parser(Schema.NameValidator.NO_VALIDATION).parse(
         
"{\"type\":\"record\",\"name\":\"Z\",\"namespace\":\"org.apache.avro.reflect.TestReflect$\",\"fields\":[]}");
     assertEquals(data.getSchema(data.getClass(s)).toString(),
         
"{\"type\":\"record\",\"name\":\"Z\",\"namespace\":\"org.apache.avro.reflect.TestReflect\",\"fields\":[]}");
@@ -1242,7 +1242,7 @@ public class TestReflect {
   void dollarTerminatedNestedStaticClassNamespaceCompatibility() {
     ReflectData data = ReflectData.get();
     // Older versions of Avro generated this namespace on nested records.
-    Schema s = new Schema.Parser().setValidate(false).parse(
+    Schema s = new Schema.Parser(Schema.NameValidator.NO_VALIDATION).parse(
         
"{\"type\":\"record\",\"name\":\"AnotherSampleRecord\",\"namespace\":\"org.apache.avro.reflect.TestReflect$SampleRecord\",\"fields\":[]}");
     assertThat(data.getSchema(data.getClass(s)).getFullName(),
         
is("org.apache.avro.reflect.TestReflect.SampleRecord.AnotherSampleRecord"));
diff --git a/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java 
b/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java
index 9bb3e281a..eb2d5c918 100644
--- a/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java
+++ b/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java
@@ -205,9 +205,9 @@ public class TestSchema {
 
   @Test
   void invalidNameTolerance() {
-    new 
Schema.Parser().setValidate(false).parse("{\"type\":\"record\",\"name\":\"1X\",\"fields\":[]}");
-    new 
Schema.Parser().setValidate(false).parse("{\"type\":\"record\",\"name\":\"X-\",\"fields\":[]}");
-    new 
Schema.Parser().setValidate(false).parse("{\"type\":\"record\",\"name\":\"X$\",\"fields\":[]}");
+    new 
Schema.Parser(Schema.NameValidator.NO_VALIDATION).parse("{\"type\":\"record\",\"name\":\"1X\",\"fields\":[]}");
+    new 
Schema.Parser(Schema.NameValidator.NO_VALIDATION).parse("{\"type\":\"record\",\"name\":\"X-\",\"fields\":[]}");
+    new 
Schema.Parser(Schema.NameValidator.NO_VALIDATION).parse("{\"type\":\"record\",\"name\":\"X$\",\"fields\":[]}");
   }
 
   @Test

Reply via email to