Repository: hbase Updated Branches: refs/heads/master 553d5db35 -> 709f5a198
HBASE-18075 Support non-latin table names and namespaces Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/709f5a19 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/709f5a19 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/709f5a19 Branch: refs/heads/master Commit: 709f5a1980abe65eb85c638dacaaea8502ee0034 Parents: f1544c3 Author: Josh Elser <els...@apache.org> Authored: Thu May 18 18:38:25 2017 -0400 Committer: Josh Elser <els...@apache.org> Committed: Sun May 21 22:24:12 2017 -0400 ---------------------------------------------------------------------- .../hadoop/hbase/TestHTableDescriptor.java | 20 ++++- .../java/org/apache/hadoop/hbase/TableName.java | 84 ++++++++++++-------- .../org/apache/hadoop/hbase/io/HFileLink.java | 3 +- 3 files changed, 73 insertions(+), 34 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/709f5a19/hbase-client/src/test/java/org/apache/hadoop/hbase/TestHTableDescriptor.java ---------------------------------------------------------------------- diff --git a/hbase-client/src/test/java/org/apache/hadoop/hbase/TestHTableDescriptor.java b/hbase-client/src/test/java/org/apache/hadoop/hbase/TestHTableDescriptor.java index 9a6d3e3..bcff565 100644 --- a/hbase-client/src/test/java/org/apache/hadoop/hbase/TestHTableDescriptor.java +++ b/hbase-client/src/test/java/org/apache/hadoop/hbase/TestHTableDescriptor.java @@ -23,6 +23,7 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.IOException; +import java.util.Arrays; import java.util.regex.Pattern; import org.apache.commons.logging.Log; @@ -181,10 +182,13 @@ public class TestHTableDescriptor { String legalTableNames[] = { "foo", "with-dash_under.dot", "_under_start_ok", "with-dash.with_underscore", "02-01-2012.my_table_01-02", "xyz._mytable_", "9_9_0.table_02" , "dot1.dot2.table", "new.-mytable", "with-dash.with.dot", "legal..t2", "legal..legal.t2", - "trailingdots..", "trailing.dots...", "ns:mytable", "ns:_mytable_", "ns:my_table_01-02"}; + "trailingdots..", "trailing.dots...", "ns:mytable", "ns:_mytable_", "ns:my_table_01-02", + "æ±", "æ±:å", "_å_", "foo:å", "foo.å", "å.foo"}; + // Avoiding "zookeeper" in here as it's tough to encode in regex String illegalTableNames[] = { ".dot_start_illegal", "-dash_start_illegal", "spaces not ok", "-dash-.start_illegal", "new.table with space", "01 .table", "ns:-illegaldash", - "new:.illegaldot", "new:illegalcolon1:", "new:illegalcolon1:2"}; + "new:.illegaldot", "new:illegalcolon1:", "new:illegalcolon1:2", String.valueOf((char)130), + String.valueOf((char)5), String.valueOf((char)65530)}; @Test public void testLegalHTableNames() { @@ -206,6 +210,18 @@ public class TestHTableDescriptor { } @Test + public void testIllegalZooKeeperName() { + for (String name : Arrays.asList("zookeeper", "ns:zookeeper", "zookeeper:table")) { + try { + TableName.isLegalFullyQualifiedTableName(Bytes.toBytes(name)); + fail("invalid tablename " + name + " should have failed"); + } catch (Exception e) { + // expected + } + } + } + + @Test public void testLegalHTableNamesRegex() { for (String tn : legalTableNames) { TableName tName = TableName.valueOf(tn); http://git-wip-us.apache.org/repos/asf/hbase/blob/709f5a19/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java ---------------------------------------------------------------------- diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java index c4c15d0..a9b2527 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hbase; import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Set; import java.util.concurrent.CopyOnWriteArraySet; @@ -66,10 +67,10 @@ public final class TableName implements Comparable<TableName> { // in default namespace //Allows only letters, digits and '_' public static final String VALID_NAMESPACE_REGEX = - "(?:[a-zA-Z_0-9]+)"; + "(?:[_\\p{Digit}\\p{IsAlphabetic}]+)"; //Allows only letters, digits, '_', '-' and '.' public static final String VALID_TABLE_QUALIFIER_REGEX = - "(?:[a-zA-Z_0-9][a-zA-Z_0-9-.]*)"; + "(?:[_\\p{Digit}\\p{IsAlphabetic}][-_.\\p{Digit}\\p{IsAlphabetic}]*)"; //Concatenation of NAMESPACE_REGEX and TABLE_QUALIFIER_REGEX, //with NAMESPACE_DELIM as delimiter public static final String VALID_USER_TABLE_REGEX = @@ -87,6 +88,9 @@ public final class TableName implements Comparable<TableName> { public static final String OLD_META_STR = ".META."; public static final String OLD_ROOT_STR = "-ROOT-"; + /** One globally disallowed name */ + public static final String DISALLOWED_TABLE_NAME = "zookeeper"; + /** * @return True if <code>tn</code> is the hbase:meta table name. */ @@ -118,14 +122,14 @@ public final class TableName implements Comparable<TableName> { * @return Returns passed <code>tableName</code> param * @throws IllegalArgumentException if passed a tableName is null or * is made of other than 'word' characters or underscores: i.e. - * <code>[a-zA-Z_0-9.-:]</code>. The ':' is used to delimit the namespace + * <code>[\p{IsAlphabetic}\p{Digit}.-:]</code>. The ':' is used to delimit the namespace * from the table name and can be used for nothing else. * * Namespace names can only contain 'word' characters - * <code>[a-zA-Z_0-9]</code> or '_' + * <code>[\p{IsAlphabetic}\p{Digit}]</code> or '_' * * Qualifier names can only contain 'word' characters - * <code>[a-zA-Z_0-9]</code> or '_', '.' or '-'. + * <code>[\p{IsAlphabetic}\p{Digit}]</code> or '_', '.' or '-'. * The name may not start with '.' or '-'. * * Valid fully qualified table names: @@ -161,7 +165,7 @@ public final class TableName implements Comparable<TableName> { /** * Qualifier names can only contain 'word' characters - * <code>[a-zA-Z_0-9]</code> or '_', '.' or '-'. + * <code>[\p{IsAlphabetic}\p{Digit}]</code> or '_', '.' or '-'. * The name may not start with '.' or '-'. * * @param qualifierName byte array containing the qualifier name @@ -181,29 +185,37 @@ public final class TableName implements Comparable<TableName> { if(end - start < 1) { throw new IllegalArgumentException(isSnapshot ? "Snapshot" : "Table" + " qualifier must not be empty"); } - if (qualifierName[start] == '.' || qualifierName[start] == '-') { throw new IllegalArgumentException("Illegal first character <" + qualifierName[start] + "> at 0. " + (isSnapshot ? "Snapshot" : "User-space table") + " qualifiers can only start with 'alphanumeric " + - "characters': i.e. [a-zA-Z_0-9]: " + + "characters' from any language: " + Bytes.toString(qualifierName, start, end)); } - for (int i = start; i < end; i++) { - if (Character.isLetterOrDigit(qualifierName[i]) || - qualifierName[i] == '_' || - qualifierName[i] == '-' || - qualifierName[i] == '.') { + // Treat the bytes as UTF-8 + String qualifierString = new String( + qualifierName, start, (end - start), StandardCharsets.UTF_8); + if (qualifierString.equals(DISALLOWED_TABLE_NAME)) { + // Per https://zookeeper.apache.org/doc/r3.4.10/zookeeperProgrammers.html#ch_zkDataModel + // A znode named "zookeeper" is disallowed by zookeeper. + throw new IllegalArgumentException("Tables may not be named '" + DISALLOWED_TABLE_NAME + "'"); + } + for (int i = 0; i < qualifierString.length(); i++) { + // Treat the string as a char-array as some characters may be multi-byte + char c = qualifierString.charAt(i); + // Check for letter, digit, underscore, hyphen, or period, and allowed by ZK. + // ZooKeeper also has limitations, but Character.isAlphabetic omits those all + // See https://zookeeper.apache.org/doc/r3.4.10/zookeeperProgrammers.html#ch_zkDataModel + if (Character.isAlphabetic(c) || Character.isDigit(c) || c == '_' || c == '-' || c == '.') { continue; } - throw new IllegalArgumentException("Illegal character code:" + qualifierName[i] + - ", <" + (char) qualifierName[i] + "> at " + i + - ". " + (isSnapshot ? "Snapshot" : "User-space table") + - " qualifiers can only contain " + - "'alphanumeric characters': i.e. [a-zA-Z_0-9-.]: " + - Bytes.toString(qualifierName, start, end)); + throw new IllegalArgumentException("Illegal character code:" + (int) c + ", <" + c + "> at " + + i + ". " + (isSnapshot ? "Snapshot" : "User-space table") + + " qualifiers may only contain 'alphanumeric characters' and digits: " + + qualifierString); } } + public static void isLegalNamespaceName(byte[] namespaceName) { isLegalNamespaceName(namespaceName, 0, namespaceName.length); } @@ -217,14 +229,23 @@ public final class TableName implements Comparable<TableName> { if(end - start < 1) { throw new IllegalArgumentException("Namespace name must not be empty"); } - for (int i = start; i < end; i++) { - if (Character.isLetterOrDigit(namespaceName[i])|| namespaceName[i] == '_') { + String nsString = new String(namespaceName, start, (end - start), StandardCharsets.UTF_8); + if (nsString.equals(DISALLOWED_TABLE_NAME)) { + // Per https://zookeeper.apache.org/doc/r3.4.10/zookeeperProgrammers.html#ch_zkDataModel + // A znode named "zookeeper" is disallowed by zookeeper. + throw new IllegalArgumentException("Tables may not be named '" + DISALLOWED_TABLE_NAME + "'"); + } + for (int i = 0; i < nsString.length(); i++) { + // Treat the string as a char-array as some characters may be multi-byte + char c = nsString.charAt(i); + // ZooKeeper also has limitations, but Character.isAlphabetic omits those all + // See https://zookeeper.apache.org/doc/r3.4.10/zookeeperProgrammers.html#ch_zkDataModel + if (Character.isAlphabetic(c) || Character.isDigit(c)|| c == '_') { continue; } - throw new IllegalArgumentException("Illegal character <" + namespaceName[i] + - "> at " + i + ". Namespaces can only contain " + - "'alphanumeric characters': i.e. [a-zA-Z_0-9]: " + Bytes.toString(namespaceName, - start, end)); + throw new IllegalArgumentException("Illegal character <" + c + + "> at " + i + ". Namespaces may only contain " + + "'alphanumeric characters' from any language and digits: " + nsString); } } @@ -441,18 +462,19 @@ public final class TableName implements Comparable<TableName> { } } - int namespaceDelimIndex = name.indexOf(NAMESPACE_DELIM); - byte[] nameB = Bytes.toBytes(name); + final int namespaceDelimIndex = name.indexOf(NAMESPACE_DELIM); if (namespaceDelimIndex < 0) { return createTableNameIfNecessary( ByteBuffer.wrap(NamespaceDescriptor.DEFAULT_NAMESPACE_NAME), - ByteBuffer.wrap(nameB)); + ByteBuffer.wrap(Bytes.toBytes(name))); } else { + // indexOf is by character, not byte (consider multi-byte characters) + String ns = name.substring(0, namespaceDelimIndex); + String qualifier = name.substring(namespaceDelimIndex + 1); return createTableNameIfNecessary( - ByteBuffer.wrap(nameB, 0, namespaceDelimIndex), - ByteBuffer.wrap(nameB, namespaceDelimIndex + 1, - nameB.length - (namespaceDelimIndex + 1))); + ByteBuffer.wrap(Bytes.toBytes(ns)), + ByteBuffer.wrap(Bytes.toBytes(qualifier))); } } http://git-wip-us.apache.org/repos/asf/hbase/blob/709f5a19/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HFileLink.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HFileLink.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HFileLink.java index cdc5be1..96ad7de 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HFileLink.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HFileLink.java @@ -64,7 +64,8 @@ public class HFileLink extends FileLink { * The HFileLink describe a link to an hfile in a different table/region * and the name is in the form: table=region-hfile. * <p> - * Table name is ([a-zA-Z_0-9][a-zA-Z_0-9.-]*), so '=' is an invalid character for the table name. + * Table name is ([\p{IsAlphabetic}\p{Digit}][\p{IsAlphabetic}\p{Digit}.-]*), so '=' is an invalid + * character for the table name. * Region name is ([a-f0-9]+), so '-' is an invalid character for the region name. * HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid) * and the bulk loaded (_SeqId_[0-9]+_) hfiles.