This is an automated email from the ASF dual-hosted git repository. alsuliman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit 831b981dc5b0a003283b3936ab0af27c372a614c Author: Michael Blow <[email protected]> AuthorDate: Thu Mar 25 20:05:31 2021 -0400 [NO ISSUE][*DB] Update Dataverse canonical form Update canonical dataverse name to use / as a part separator *NOTE* this breaks metadata compatibility with existing multi-part dataverse names, as the canonical format is stored in metadata Change-Id: Ifc7d7fe5d7ce9a922371c1a9c6685d7a5dc64c33 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/10704 Integration-Tests: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> Reviewed-by: Michael Blow <[email protected]> Reviewed-by: Murtadha Hubail <[email protected]> --- .../special_chars_2/special_chars_2.2.adm | 6 +- .../asterix/common/metadata/DataverseName.java | 74 ++++------------------ .../asterix/common/metadata/DataverseNameTest.java | 25 ++++---- 3 files changed, 28 insertions(+), 77 deletions(-) diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/multipart-dataverse/special_chars_2/special_chars_2.2.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/multipart-dataverse/special_chars_2/special_chars_2.2.adm index 9abda4f..7edd3eb 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/multipart-dataverse/special_chars_2/special_chars_2.2.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/multipart-dataverse/special_chars_2/special_chars_2.2.adm @@ -1,6 +1,6 @@ { "CanonicalName": "A", "DisplayName": "A", "NameParts": [ "A" ] } -{ "CanonicalName": "B.C", "DisplayName": "B.C", "NameParts": [ "B", "C" ] } -{ "CanonicalName": "[email protected]@.E", "DisplayName": "`C.D.E`", "NameParts": [ "C.D.E" ] } +{ "CanonicalName": "B/C", "DisplayName": "B.C", "NameParts": [ "B", "C" ] } +{ "CanonicalName": "C.D.E", "DisplayName": "`C.D.E`", "NameParts": [ "C.D.E" ] } { "CanonicalName": "Default", "DisplayName": "Default", "NameParts": [ "Default" ] } { "CanonicalName": "Metadata", "DisplayName": "Metadata", "NameParts": [ "Metadata" ] } -{ "CanonicalName": "[email protected]", "DisplayName": "`a-A`.b_B.c$C.`z.Z`", "NameParts": [ "a-A", "b_B", "c$C", "z.Z" ] } \ No newline at end of file +{ "CanonicalName": "a-A/b_B/c$C/z.Z", "DisplayName": "`a-A`.b_B.c$C.`z.Z`", "NameParts": [ "a-A", "b_B", "c$C", "z.Z" ] } \ No newline at end of file diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/metadata/DataverseName.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/metadata/DataverseName.java index b8124a2..f943dea 100644 --- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/metadata/DataverseName.java +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/metadata/DataverseName.java @@ -36,11 +36,10 @@ import org.apache.commons.lang3.StringUtils; * <p> * Each dataverse name can be encoded into a single string (called a canonical form) by * {@link #getCanonicalForm()} and decoded back from it with {@link #createFromCanonicalForm(String)}. - * The canonical form encoding concatenates name parts together with {@link #CANONICAL_FORM_SEPARATOR_CHAR '.'} - * character. The {@link #CANONICAL_FORM_ESCAPE_CHAR '@'} character is used to escape - * {@link #CANONICAL_FORM_SEPARATOR_CHAR '.'} and itself in each name part prior to concatenation. + * The canonical form encoding concatenates name parts together with {@link #CANONICAL_FORM_SEPARATOR_CHAR '/'} + * character. * <p> - * E.g. the canonical form for a dataverse name {@code ["a", "b", "c"]} is {@code "a.b.c"} + * E.g. the canonical form for a dataverse name {@code ["a", "b", "c"]} is {@code "a/b/c"} * <p> * {@link #toString()} returns a display form which is suitable for error messages, * and is a valid SQL++ multi-part identifier parsable by {@code IParser#parseMultipartIdentifier()} @@ -59,11 +58,9 @@ import org.apache.commons.lang3.StringUtils; */ public final class DataverseName implements Serializable, Comparable<DataverseName> { - private static final long serialVersionUID = 1L; + private static final long serialVersionUID = 2L; - public static final char CANONICAL_FORM_SEPARATOR_CHAR = '.'; - - private static final char CANONICAL_FORM_ESCAPE_CHAR = '@'; + public static final char CANONICAL_FORM_SEPARATOR_CHAR = '/'; public static final char DISPLAY_FORM_SEPARATOR_CHAR = '.'; @@ -72,7 +69,7 @@ public final class DataverseName implements Serializable, Comparable<DataverseNa private static final char DISPLAY_FORM_ESCAPE_CHAR = '\\'; private static final char[] CANONICAL_FORM_SEPARATOR_AND_ESCAPE_CHARS = - new char[] { CANONICAL_FORM_SEPARATOR_CHAR, CANONICAL_FORM_ESCAPE_CHAR }; + new char[] { CANONICAL_FORM_SEPARATOR_CHAR }; private final boolean isMultiPart; @@ -282,13 +279,7 @@ public final class DataverseName implements Serializable, Comparable<DataverseNa } private static void encodePartIntoCanonicalForm(String part, StringBuilder out) { - for (int i = 0, ln = part.length(); i < ln; i++) { - char c = part.charAt(i); - if (c == CANONICAL_FORM_SEPARATOR_CHAR || c == CANONICAL_FORM_ESCAPE_CHAR) { - out.append(CANONICAL_FORM_ESCAPE_CHAR); - } - out.append(c); - } + out.append(part); } private static <T> void decodeCanonicalForm(String canonicalForm, BiConsumer<CharSequence, T> partConsumer, @@ -297,18 +288,11 @@ public final class DataverseName implements Serializable, Comparable<DataverseNa StringBuilder sb = new StringBuilder(ln); for (int i = 0; i < ln; i++) { char c = canonicalForm.charAt(i); - switch (c) { - case CANONICAL_FORM_SEPARATOR_CHAR: - partConsumer.accept(sb, partConsumerArg); - sb.setLength(0); - break; - case CANONICAL_FORM_ESCAPE_CHAR: - i++; - c = canonicalForm.charAt(i); - // fall through to 'default' - default: - sb.append(c); - break; + if (c == CANONICAL_FORM_SEPARATOR_CHAR) { + partConsumer.accept(sb, partConsumerArg); + sb.setLength(0); + } else { + sb.append(c); } } if (sb.length() > 0) { @@ -318,41 +302,11 @@ public final class DataverseName implements Serializable, Comparable<DataverseNa // optimization for a single part name private static String decodeSinglePartNameFromCanonicalForm(String canonicalForm) { - if (canonicalForm.indexOf(CANONICAL_FORM_ESCAPE_CHAR) < 0) { - // no escaping was done - return canonicalForm; - } - - StringBuilder singlePart = new StringBuilder(canonicalForm.length()); - for (int i = 0, ln = canonicalForm.length(); i < ln; i++) { - char c = canonicalForm.charAt(i); - switch (c) { - case CANONICAL_FORM_SEPARATOR_CHAR: - throw new IllegalStateException(canonicalForm); // should never happen - case CANONICAL_FORM_ESCAPE_CHAR: - i++; - c = canonicalForm.charAt(i); - // fall through to 'default' - default: - singlePart.append(c); - break; - } - } - return singlePart.toString(); + return canonicalForm; } private static boolean isMultiPartCanonicalForm(String canonicalForm) { - for (int i = 0, ln = canonicalForm.length(); i < ln; i++) { - char c = canonicalForm.charAt(i); - switch (c) { - case CANONICAL_FORM_SEPARATOR_CHAR: - return true; - case CANONICAL_FORM_ESCAPE_CHAR: - i++; - break; - } - } - return false; + return canonicalForm.indexOf(CANONICAL_FORM_SEPARATOR_CHAR) != -1; } private static void addPartToCollection(CharSequence part, Collection<? super String> out) { diff --git a/asterixdb/asterix-common/src/test/java/org/apache/asterix/common/metadata/DataverseNameTest.java b/asterixdb/asterix-common/src/test/java/org/apache/asterix/common/metadata/DataverseNameTest.java index 2f0dff5..75b3989 100644 --- a/asterixdb/asterix-common/src/test/java/org/apache/asterix/common/metadata/DataverseNameTest.java +++ b/asterixdb/asterix-common/src/test/java/org/apache/asterix/common/metadata/DataverseNameTest.java @@ -51,9 +51,7 @@ public class DataverseNameTest { private static final List<String> TEST_BUILTIN_DATAVERSE_INVALID_NAME_PARAMS = Arrays.asList( // separator character is not allowed - "a.b", - // escape character is not allowed - "c@d"); + "a/b"); private static final List<Triple<String, String, String>> TEST_SINGLE_PART_NAME_PARAMS = Arrays.asList( // <1-part-name, canonical-form, display-form> @@ -63,30 +61,30 @@ public class DataverseNameTest { // letters and digits new Triple<>("aA09", "aA09", "aA09"), // with canonical form escape character - new Triple<>("a@b", "a@@b", "`a@b`"), + new Triple<>("a@b", "a@b", "`a@b`"), // with canonical form separator character - new Triple<>("a.b", "[email protected]", "`a.b`"), + new Triple<>("a.b", "a.b", "`a.b`"), // with canonical form escape and separator characters - new Triple<>("[email protected]", "a@@@.b", "`[email protected]`"), + new Triple<>("[email protected]", "[email protected]", "`[email protected]`"), // with display form escape character new Triple<>("a\\b", "a\\b", "`a\\\\b`")); private static final List<Triple<List<String>, String, String>> TEST_MULTI_PART_NAME_PARAMS = Arrays.asList( // <multi-part-name, canonical-form, display-form> - new Triple<>(Arrays.asList("aa", "bb", "cc"), "aa.bb.cc", "aa.bb.cc"), + new Triple<>(Arrays.asList("aa", "bb", "cc"), "aa/bb/cc", "aa.bb.cc"), // mixed case letters, digits - new Triple<>(Arrays.asList("az", "AZ", "a09Z"), "az.AZ.a09Z", "az.AZ.a09Z"), + new Triple<>(Arrays.asList("az", "AZ", "a09Z"), "az/AZ/a09Z", "az.AZ.a09Z"), // with canonical form escape character - new Triple<>(Arrays.asList("a@a@", "@b@b", "@c@c"), "a@@a@@.@@b@@b.@@c@@c", "`a@a@`.`@b@b`.`@c@c`"), + new Triple<>(Arrays.asList("a@a@", "@b@b", "@c@c"), "a@a@/@b@b/@c@c", "`a@a@`.`@b@b`.`@c@c`"), // with canonical form separator character - new Triple<>(Arrays.asList("a.a.", ".b.b.", ".c.c"), "[email protected]@[email protected]@.b@[email protected]@.c", "`a.a.`.`.b.b.`.`.c.c`"), + new Triple<>(Arrays.asList("a.a.", ".b.b.", ".c.c"), "a.a./.b.b./.c.c", "`a.a.`.`.b.b.`.`.c.c`"), // with canonical form escape and separator characters - new Triple<>(Arrays.asList("a@a.", "@b.b@", ".c@c"), "a@@a@..@@[email protected]@@[email protected]@@c", "`a@a.`.`@b.b@`.`.c@c`"), + new Triple<>(Arrays.asList("a@a.", "@b.b@", ".c@c"), "a@a./@b.b@/.c@c", "`a@a.`.`@b.b@`.`.c@c`"), // with canonical form escape and separator characters repeated - new Triple<>(Arrays.asList("a@@a..", "@@b..b@@", "..c@@c"), "a@@@@a@.@..@@@@b@[email protected]@@@@.@[email protected]@@@@c", + new Triple<>(Arrays.asList("a@@a..", "@@b..b@@", "..c@@c"), "a@@a../@@b..b@@/..c@@c", "`a@@a..`.`@@b..b@@`.`..c@@c`"), // with display form escape character - new Triple<>(Arrays.asList("a\\b", "c\\d"), "a\\b.c\\d", "`a\\\\b`.`c\\\\d`")); + new Triple<>(Arrays.asList("a\\b", "c\\d"), "a\\b/c\\d", "`a\\\\b`.`c\\\\d`")); @Test public void testBuiltinDataverseName() throws Exception { @@ -220,7 +218,6 @@ public class DataverseNameTest { testRuntimeException(() -> DataverseName.createBuiltinDataverseName(null), NullPointerException.class); testRuntimeException(() -> DataverseName.createFromCanonicalForm(null), NullPointerException.class); testRuntimeException(() -> DataverseName.create(Collections.singletonList(null)), NullPointerException.class); - testRuntimeException(() -> DataverseName.create(Arrays.asList(null, null)), NullPointerException.class); // 3. IndexOutOfBoundsException testRuntimeException(() -> DataverseName.create(Collections.emptyList(), 0, 1), IndexOutOfBoundsException.class);
