This is an automated email from the ASF dual-hosted git repository.
clesaec pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/master by this push:
new 0c862cab2 AVRO-3704: name validator interface (#2053)
0c862cab2 is described below
commit 0c862cab22e8e7164e0ac42d1b5d3585db3f7869
Author: Christophe Le Saec <[email protected]>
AuthorDate: Tue Sep 12 08:37:21 2023 +0200
AVRO-3704: name validator interface (#2053)
* AVRO-3704: name validator interface
---
.../avro/src/main/java/org/apache/avro/Schema.java | 138 ++++++++++++++++-----
.../java/org/apache/avro/file/DataFileStream.java | 2 +-
.../org/apache/avro/SchemaNameValidatorTest.java | 59 +++++++++
.../java/org/apache/avro/TestDataFileReader.java | 6 +-
.../src/test/java/org/apache/avro/TestSchema.java | 11 +-
.../java/org/apache/avro/TestSchemaBuilder.java | 23 ++++
.../java/org/apache/avro/reflect/TestReflect.java | 4 +-
.../src/test/java/org/apache/avro/TestSchema.java | 6 +-
8 files changed, 199 insertions(+), 50 deletions(-)
diff --git a/lang/java/avro/src/main/java/org/apache/avro/Schema.java
b/lang/java/avro/src/main/java/org/apache/avro/Schema.java
index cec128ba6..8933f20f0 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/Schema.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/Schema.java
@@ -990,8 +990,8 @@ public abstract class Schema extends JsonProperties
implements Serializable {
}
}
- private static final ThreadLocal<Set> SEEN_EQUALS =
ThreadLocalWithInitial.of(HashSet::new);
- private static final ThreadLocal<Map> SEEN_HASHCODE =
ThreadLocalWithInitial.of(IdentityHashMap::new);
+ private static final ThreadLocal<Set<SeenPair>> SEEN_EQUALS =
ThreadLocalWithInitial.of(HashSet::new);
+ private static final ThreadLocal<Map<Schema, Schema>> SEEN_HASHCODE =
ThreadLocalWithInitial.of(IdentityHashMap::new);
@SuppressWarnings(value = "unchecked")
private static class RecordSchema extends NamedSchema {
@@ -1087,7 +1087,7 @@ public abstract class Schema extends JsonProperties
implements Serializable {
@Override
int computeHash() {
- Map seen = SEEN_HASHCODE.get();
+ Map<Schema, Schema> seen = SEEN_HASHCODE.get();
if (seen.containsKey(this))
return 0; // prevent stack overflow
boolean first = seen.isEmpty();
@@ -1109,8 +1109,8 @@ public abstract class Schema extends JsonProperties
implements Serializable {
gen.writeStringField("type", isError ? "error" : "record");
writeName(names, gen);
names.space = name.space; // set default namespace
- if (getDoc() != null)
- gen.writeStringField("doc", getDoc());
+ if (this.getDoc() != null)
+ gen.writeStringField("doc", this.getDoc());
if (fields != null) {
gen.writeFieldName("fields");
@@ -1480,9 +1480,17 @@ public abstract class Schema extends JsonProperties
implements Serializable {
*/
public static class Parser {
private Names names = new Names();
- private boolean validate = true;
+ private final Schema.NameValidator validate;
private boolean validateDefaults = true;
+ public Parser() {
+ this(NameValidator.UTF_VALIDATOR);
+ }
+
+ public Parser(final NameValidator validate) {
+ this.validate = validate;
+ }
+
/**
* Adds the provided types to the set of defined, named types known to this
* parser. deprecated: use addTypes(Iterable<Schema> types)
@@ -1510,17 +1518,6 @@ public abstract class Schema extends JsonProperties
implements Serializable {
return result;
}
- /** Enable or disable name validation. */
- public Parser setValidate(boolean validate) {
- this.validate = validate;
- return this;
- }
-
- /** True iff names are validated. True by default. */
- public boolean getValidate() {
- return this.validate;
- }
-
/** Enable or disable default value validation. */
public Parser setValidateDefaults(boolean validateDefaults) {
this.validateDefaults = validateDefaults;
@@ -1587,7 +1584,7 @@ public abstract class Schema extends JsonProperties
implements Serializable {
}
private Schema runParser(JsonParser parser, ParseFunction f) throws
IOException {
- boolean saved = validateNames.get();
+ NameValidator saved = validateNames.get();
boolean savedValidateDefaults = VALIDATE_DEFAULTS.get();
try {
validateNames.set(validate);
@@ -1683,7 +1680,8 @@ public abstract class Schema extends JsonProperties
implements Serializable {
*/
@Deprecated
public static Schema parse(String jsonSchema, boolean validate) {
- return new Parser().setValidate(validate).parse(jsonSchema);
+ final NameValidator validator = validate ? NameValidator.UTF_VALIDATOR :
NameValidator.NO_VALIDATION;
+ return new Parser(validator).parse(jsonSchema);
}
static final Map<String, Type> PRIMITIVES = new HashMap<>();
@@ -1752,27 +1750,21 @@ public abstract class Schema extends JsonProperties
implements Serializable {
}
}
- private static ThreadLocal<Boolean> validateNames =
ThreadLocalWithInitial.of(() -> true);
+ private static ThreadLocal<Schema.NameValidator> validateNames =
ThreadLocalWithInitial
+ .of(() -> NameValidator.UTF_VALIDATOR);
private static String validateName(String name) {
- if (!validateNames.get())
- return name; // not validating names
- if (name == null)
- throw new SchemaParseException("Null name");
- int length = name.length();
- if (length == 0)
- throw new SchemaParseException("Empty name");
- char first = name.charAt(0);
- if (!(Character.isLetter(first) || first == '_'))
- throw new SchemaParseException("Illegal initial character: " + name);
- for (int i = 1; i < length; i++) {
- char c = name.charAt(i);
- if (!(Character.isLetterOrDigit(c) || c == '_'))
- throw new SchemaParseException("Illegal character in: " + name);
+ NameValidator.Result result = validateNames.get().validate(name);
+ if (!result.isOK()) {
+ throw new SchemaParseException(result.errors);
}
return name;
}
+ public static void setNameValidator(final Schema.NameValidator validator) {
+ Schema.validateNames.set(validator);
+ }
+
private static final ThreadLocal<Boolean> VALIDATE_DEFAULTS =
ThreadLocalWithInitial.of(() -> true);
private static JsonNode validateDefault(String fieldName, Schema schema,
JsonNode defaultValue) {
@@ -2299,6 +2291,84 @@ public abstract class Schema extends JsonProperties
implements Serializable {
return alias;
}
+ public interface NameValidator {
+
+ class Result {
+ private final String errors;
+
+ public Result(final String errors) {
+ this.errors = errors;
+ }
+
+ public boolean isOK() {
+ return this == NameValidator.OK;
+ }
+
+ public String getErrors() {
+ return errors;
+ }
+ }
+
+ Result OK = new Result(null);
+
+ default Result validate(String name) {
+ return OK;
+ }
+
+ NameValidator NO_VALIDATION = new NameValidator() {
+ };
+
+ NameValidator UTF_VALIDATOR = new NameValidator() {
+ @Override
+ public Result validate(final String name) {
+ if (name == null)
+ return new Result("Null name");
+ int length = name.length();
+ if (length == 0)
+ return new Result("Empty name");
+ char first = name.charAt(0);
+ if (!(Character.isLetter(first) || first == '_'))
+ return new Result("Illegal initial character: " + name);
+ for (int i = 1; i < length; i++) {
+ char c = name.charAt(i);
+ if (!(Character.isLetterOrDigit(c) || c == '_'))
+ return new Result("Illegal character in: " + name);
+ }
+ return OK;
+ }
+ };
+
+ NameValidator STRICT_VALIDATOR = new NameValidator() {
+ @Override
+ public Result validate(final String name) {
+ if (name == null)
+ return new Result("Null name");
+ int length = name.length();
+ if (length == 0)
+ return new Result("Empty name");
+ char first = name.charAt(0);
+ if (!(isLetter(first) || first == '_'))
+ return new Result("Illegal initial character: " + name);
+ for (int i = 1; i < length; i++) {
+ char c = name.charAt(i);
+ if (!(isLetter(c) || isDigit(c) || c == '_'))
+ return new Result("Illegal character in: " + name);
+ }
+ return OK;
+ }
+
+ private boolean isLetter(char c) {
+ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+ }
+
+ private boolean isDigit(char c) {
+ return c >= '0' && c <= '9';
+ }
+
+ };
+
+ }
+
/**
* No change is permitted on LockableArrayList once lock() has been called on
* it.
diff --git
a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java
b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java
index a2b517225..150d2ace9 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java
@@ -139,7 +139,7 @@ public class DataFileStream<D> implements Iterator<D>,
Iterable<D>, Closeable {
// finalize the header
header.metaKeyList = Collections.unmodifiableList(header.metaKeyList);
- header.schema = new
Schema.Parser().setValidate(false).setValidateDefaults(false)
+ header.schema = new
Schema.Parser(Schema.NameValidator.NO_VALIDATION).setValidateDefaults(false)
.parse(getMetaString(DataFileConstants.SCHEMA));
this.codec = resolveCodec();
reader.setSchema(header.schema);
diff --git
a/lang/java/avro/src/test/java/org/apache/avro/SchemaNameValidatorTest.java
b/lang/java/avro/src/test/java/org/apache/avro/SchemaNameValidatorTest.java
new file mode 100644
index 000000000..6846c4434
--- /dev/null
+++ b/lang/java/avro/src/test/java/org/apache/avro/SchemaNameValidatorTest.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.util.stream.Stream;
+
+class SchemaNameValidatorTest {
+
+ @ParameterizedTest
+ @MethodSource("data")
+ void validator(Schema.NameValidator validator, String input, boolean
expectedResult) {
+ Schema.NameValidator.Result result = validator.validate(input);
+ Assertions.assertEquals(expectedResult, result.isOK(), result.getErrors());
+ }
+
+ static Stream<Arguments> data() {
+ return Stream.of(Arguments.of(Schema.NameValidator.UTF_VALIDATOR, null,
false), // null not accepted
+ Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, null, false), //
null not accepted
+ Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "", false), // empty
not accepted
+ Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "", false), //
empty not accepted
+ Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "Hello world",
false), // space not accepted
+ Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "Hello world",
false), // space not accepted
+ Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "H&", false), // non
letter or digit not accepted
+ Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "H&", false), //
non letter or digit not accepted
+ Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "H=", false), // non
letter or digit not accepted
+ Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "H=", false), //
non letter or digit not accepted
+ Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "H]", false), // non
letter or digit not accepted
+ Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "H]", false), //
non letter or digit not accepted
+ Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "Hello_world", true),
+ Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "Hello_world",
true),
+ Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "éàçô", true), //
Accept accent
+ Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "éàçô", false), //
Not Accept accent
+ Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "5éàçô", false), //
can't start with number
+ Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "5éàçô", false),
// can't start with number
+ Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "_Hello_world", true),
+ Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "_Hello_world",
true));
+ }
+
+}
diff --git
a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java
b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java
index fb42d639e..a85b96640 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java
@@ -87,7 +87,7 @@ public class TestDataFileReader {
// magic header check. This happens with throttled input stream,
// where we read into buffer less bytes than requested.
- Schema legacySchema = new
Schema.Parser().setValidate(false).setValidateDefaults(false)
+ Schema legacySchema = new
Schema.Parser(Schema.NameValidator.NO_VALIDATION).setValidateDefaults(false)
.parse("{\"type\": \"record\", \"name\": \"TestSchema\", \"fields\": "
+ "[ {\"name\": \"id\", \"type\": [\"long\", \"null\"],
\"default\": null}]}");
File f = Files.createTempFile("testThrottledInputStream",
".avro").toFile();
@@ -146,7 +146,7 @@ public class TestDataFileReader {
// AVRO-2944 describes hanging/failure in reading Avro file with
performing
// magic header check. This potentially happens with a defective input
stream
// where a -1 value is unexpectedly returned from a read.
- Schema legacySchema = new
Schema.Parser().setValidate(false).setValidateDefaults(false)
+ Schema legacySchema = new
Schema.Parser(Schema.NameValidator.NO_VALIDATION).setValidateDefaults(false)
.parse("{\"type\": \"record\", \"name\": \"TestSchema\", \"fields\":
"
+ "[ {\"name\": \"id\", \"type\": [\"long\", \"null\"],
\"default\": null}]}");
File f = Files.createTempFile("testInputStreamEOF", ".avro").toFile();
@@ -195,7 +195,7 @@ public class TestDataFileReader {
// This schema has an accent in the name and the default for the field
doesn't
// match the first type in the union. A Java SDK in the past could create
a file
// containing this schema.
- Schema legacySchema = new
Schema.Parser().setValidate(false).setValidateDefaults(false)
+ Schema legacySchema = new
Schema.Parser(Schema.NameValidator.NO_VALIDATION).setValidateDefaults(false)
.parse("{\"type\": \"record\", \"name\":
\"InvalidAccëntWithInvalidNull\", \"fields\": "
+ "[ {\"name\": \"id\", \"type\": [\"long\", \"null\"],
\"default\": null}]}");
diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java
b/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java
index b4c259e92..9900fd635 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java
@@ -500,17 +500,15 @@ public class TestSchema {
}
@Test
- void testContentAfterAvsc() throws Exception {
- Schema.Parser parser = new Schema.Parser();
- parser.setValidate(true);
+ void testContentAfterAvsc() {
+ Schema.Parser parser = new
Schema.Parser(Schema.NameValidator.UTF_VALIDATOR);
parser.setValidateDefaults(true);
assertThrows(SchemaParseException.class, () -> parser.parse("{\"type\":
\"string\"}; DROP TABLE STUDENTS"));
}
@Test
void testContentAfterAvscInInputStream() throws Exception {
- Schema.Parser parser = new Schema.Parser();
- parser.setValidate(true);
+ Schema.Parser parser = new
Schema.Parser(Schema.NameValidator.UTF_VALIDATOR);
parser.setValidateDefaults(true);
String avsc = "{\"type\": \"string\"}; DROP TABLE STUDENTS";
ByteArrayInputStream is = new
ByteArrayInputStream(avsc.getBytes(StandardCharsets.UTF_8));
@@ -526,8 +524,7 @@ public class TestSchema {
writer.flush();
}
- Schema.Parser parser = new Schema.Parser();
- parser.setValidate(true);
+ Schema.Parser parser = new
Schema.Parser(Schema.NameValidator.UTF_VALIDATOR);
parser.setValidateDefaults(true);
assertThrows(SchemaParseException.class, () -> parser.parse(avscFile));
}
diff --git
a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java
b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java
index 784bfba02..fcbaae655 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java
@@ -38,6 +38,8 @@ import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecordBuilder;
+
+import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
@@ -874,4 +876,25 @@ public class TestSchemaBuilder {
assertEquals(a2, a1);
}
+
+ @Test
+ void namesAcceptAll() throws InterruptedException {
+ // Ensure that Schema.setNameValidator won't interfere with others unit
tests.
+ Runnable r = () -> {
+ Schema.setNameValidator(Schema.NameValidator.NO_VALIDATION);
+ final Schema schema =
SchemaBuilder.record("7name").fields().name("123").type(Schema.create(Schema.Type.INT))
+ .noDefault().endRecord();
+ Assertions.assertNotNull(schema);
+ Assertions.assertEquals("7name", schema.getName());
+ final Schema.Field field = schema.getField("123");
+ Assertions.assertEquals("123", field.name());
+ };
+
+ final Throwable[] exception = new Throwable[] { null };
+ Thread t = new Thread(r);
+ t.setUncaughtExceptionHandler((Thread th, Throwable e) -> exception[0] =
e);
+ t.start();
+ t.join();
+ Assertions.assertNull(exception[0], () -> exception[0].getMessage());
+ }
}
diff --git
a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java
b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java
index 470010e6f..e8fadeea7 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java
@@ -1232,7 +1232,7 @@ public class TestReflect {
@Test
void dollarTerminatedNamespaceCompatibility() {
ReflectData data = ReflectData.get();
- Schema s = new Schema.Parser().setValidate(false).parse(
+ Schema s = new Schema.Parser(Schema.NameValidator.NO_VALIDATION).parse(
"{\"type\":\"record\",\"name\":\"Z\",\"namespace\":\"org.apache.avro.reflect.TestReflect$\",\"fields\":[]}");
assertEquals(data.getSchema(data.getClass(s)).toString(),
"{\"type\":\"record\",\"name\":\"Z\",\"namespace\":\"org.apache.avro.reflect.TestReflect\",\"fields\":[]}");
@@ -1242,7 +1242,7 @@ public class TestReflect {
void dollarTerminatedNestedStaticClassNamespaceCompatibility() {
ReflectData data = ReflectData.get();
// Older versions of Avro generated this namespace on nested records.
- Schema s = new Schema.Parser().setValidate(false).parse(
+ Schema s = new Schema.Parser(Schema.NameValidator.NO_VALIDATION).parse(
"{\"type\":\"record\",\"name\":\"AnotherSampleRecord\",\"namespace\":\"org.apache.avro.reflect.TestReflect$SampleRecord\",\"fields\":[]}");
assertThat(data.getSchema(data.getClass(s)).getFullName(),
is("org.apache.avro.reflect.TestReflect.SampleRecord.AnotherSampleRecord"));
diff --git a/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java
b/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java
index 9bb3e281a..eb2d5c918 100644
--- a/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java
+++ b/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java
@@ -205,9 +205,9 @@ public class TestSchema {
@Test
void invalidNameTolerance() {
- new
Schema.Parser().setValidate(false).parse("{\"type\":\"record\",\"name\":\"1X\",\"fields\":[]}");
- new
Schema.Parser().setValidate(false).parse("{\"type\":\"record\",\"name\":\"X-\",\"fields\":[]}");
- new
Schema.Parser().setValidate(false).parse("{\"type\":\"record\",\"name\":\"X$\",\"fields\":[]}");
+ new
Schema.Parser(Schema.NameValidator.NO_VALIDATION).parse("{\"type\":\"record\",\"name\":\"1X\",\"fields\":[]}");
+ new
Schema.Parser(Schema.NameValidator.NO_VALIDATION).parse("{\"type\":\"record\",\"name\":\"X-\",\"fields\":[]}");
+ new
Schema.Parser(Schema.NameValidator.NO_VALIDATION).parse("{\"type\":\"record\",\"name\":\"X$\",\"fields\":[]}");
}
@Test