Repository: hbase
Updated Branches:
  refs/heads/master 553d5db35 -> 709f5a198


HBASE-18075 Support non-latin table names and namespaces


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/709f5a19
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/709f5a19
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/709f5a19

Branch: refs/heads/master
Commit: 709f5a1980abe65eb85c638dacaaea8502ee0034
Parents: f1544c3
Author: Josh Elser <els...@apache.org>
Authored: Thu May 18 18:38:25 2017 -0400
Committer: Josh Elser <els...@apache.org>
Committed: Sun May 21 22:24:12 2017 -0400

----------------------------------------------------------------------
 .../hadoop/hbase/TestHTableDescriptor.java      | 20 ++++-
 .../java/org/apache/hadoop/hbase/TableName.java | 84 ++++++++++++--------
 .../org/apache/hadoop/hbase/io/HFileLink.java   |  3 +-
 3 files changed, 73 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/709f5a19/hbase-client/src/test/java/org/apache/hadoop/hbase/TestHTableDescriptor.java
----------------------------------------------------------------------
diff --git 
a/hbase-client/src/test/java/org/apache/hadoop/hbase/TestHTableDescriptor.java 
b/hbase-client/src/test/java/org/apache/hadoop/hbase/TestHTableDescriptor.java
index 9a6d3e3..bcff565 100644
--- 
a/hbase-client/src/test/java/org/apache/hadoop/hbase/TestHTableDescriptor.java
+++ 
b/hbase-client/src/test/java/org/apache/hadoop/hbase/TestHTableDescriptor.java
@@ -23,6 +23,7 @@ import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
 import java.io.IOException;
+import java.util.Arrays;
 import java.util.regex.Pattern;
 
 import org.apache.commons.logging.Log;
@@ -181,10 +182,13 @@ public class TestHTableDescriptor {
   String legalTableNames[] = { "foo", "with-dash_under.dot", "_under_start_ok",
       "with-dash.with_underscore", "02-01-2012.my_table_01-02", 
"xyz._mytable_", "9_9_0.table_02"
       , "dot1.dot2.table", "new.-mytable", "with-dash.with.dot", "legal..t2", 
"legal..legal.t2",
-      "trailingdots..", "trailing.dots...", "ns:mytable", "ns:_mytable_", 
"ns:my_table_01-02"};
+      "trailingdots..", "trailing.dots...", "ns:mytable", "ns:_mytable_", 
"ns:my_table_01-02",
+      "汉", "汉:字", "_字_", "foo:字", "foo.字", "字.foo"};
+  // Avoiding "zookeeper" in here as it's tough to encode in regex
   String illegalTableNames[] = { ".dot_start_illegal", "-dash_start_illegal", 
"spaces not ok",
       "-dash-.start_illegal", "new.table with space", "01 .table", 
"ns:-illegaldash",
-      "new:.illegaldot", "new:illegalcolon1:", "new:illegalcolon1:2"};
+      "new:.illegaldot", "new:illegalcolon1:", "new:illegalcolon1:2", 
String.valueOf((char)130),
+      String.valueOf((char)5), String.valueOf((char)65530)};
 
   @Test
   public void testLegalHTableNames() {
@@ -206,6 +210,18 @@ public class TestHTableDescriptor {
   }
 
   @Test
+  public void testIllegalZooKeeperName() {
+    for (String name : Arrays.asList("zookeeper", "ns:zookeeper", 
"zookeeper:table")) {
+      try {
+        TableName.isLegalFullyQualifiedTableName(Bytes.toBytes(name));
+        fail("invalid tablename " + name + " should have failed");
+      } catch (Exception e) {
+        // expected
+      }
+    }
+  }
+
+  @Test
   public void testLegalHTableNamesRegex() {
     for (String tn : legalTableNames) {
       TableName tName = TableName.valueOf(tn);

http://git-wip-us.apache.org/repos/asf/hbase/blob/709f5a19/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java 
b/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java
index c4c15d0..a9b2527 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hbase;
 
 import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.Set;
 import java.util.concurrent.CopyOnWriteArraySet;
@@ -66,10 +67,10 @@ public final class TableName implements 
Comparable<TableName> {
   // in default namespace
   //Allows only letters, digits and '_'
   public static final String VALID_NAMESPACE_REGEX =
-      "(?:[a-zA-Z_0-9]+)";
+      "(?:[_\\p{Digit}\\p{IsAlphabetic}]+)";
   //Allows only letters, digits, '_', '-' and '.'
   public static final String VALID_TABLE_QUALIFIER_REGEX =
-      "(?:[a-zA-Z_0-9][a-zA-Z_0-9-.]*)";
+      "(?:[_\\p{Digit}\\p{IsAlphabetic}][-_.\\p{Digit}\\p{IsAlphabetic}]*)";
   //Concatenation of NAMESPACE_REGEX and TABLE_QUALIFIER_REGEX,
   //with NAMESPACE_DELIM as delimiter
   public static final String VALID_USER_TABLE_REGEX =
@@ -87,6 +88,9 @@ public final class TableName implements Comparable<TableName> 
{
   public static final String OLD_META_STR = ".META.";
   public static final String OLD_ROOT_STR = "-ROOT-";
 
+  /** One globally disallowed name */
+  public static final String DISALLOWED_TABLE_NAME = "zookeeper";
+
   /**
    * @return True if <code>tn</code> is the hbase:meta table name.
    */
@@ -118,14 +122,14 @@ public final class TableName implements 
Comparable<TableName> {
    * @return Returns passed <code>tableName</code> param
    * @throws IllegalArgumentException if passed a tableName is null or
    * is made of other than 'word' characters or underscores: i.e.
-   * <code>[a-zA-Z_0-9.-:]</code>. The ':' is used to delimit the namespace
+   * <code>[\p{IsAlphabetic}\p{Digit}.-:]</code>. The ':' is used to delimit 
the namespace
    * from the table name and can be used for nothing else.
    *
    * Namespace names can only contain 'word' characters
-   * <code>[a-zA-Z_0-9]</code> or '_'
+   * <code>[\p{IsAlphabetic}\p{Digit}]</code> or '_'
    *
    * Qualifier names can only contain 'word' characters
-   * <code>[a-zA-Z_0-9]</code> or '_', '.' or '-'.
+   * <code>[\p{IsAlphabetic}\p{Digit}]</code> or '_', '.' or '-'.
    * The name may not start with '.' or '-'.
    *
    * Valid fully qualified table names:
@@ -161,7 +165,7 @@ public final class TableName implements 
Comparable<TableName> {
 
   /**
    * Qualifier names can only contain 'word' characters
-   * <code>[a-zA-Z_0-9]</code> or '_', '.' or '-'.
+   * <code>[\p{IsAlphabetic}\p{Digit}]</code> or '_', '.' or '-'.
    * The name may not start with '.' or '-'.
    *
    * @param qualifierName byte array containing the qualifier name
@@ -181,29 +185,37 @@ public final class TableName implements 
Comparable<TableName> {
     if(end - start < 1) {
       throw new IllegalArgumentException(isSnapshot ? "Snapshot" : "Table" + " 
qualifier must not be empty");
     }
-
     if (qualifierName[start] == '.' || qualifierName[start] == '-') {
       throw new IllegalArgumentException("Illegal first character <" + 
qualifierName[start] +
                                          "> at 0. " + (isSnapshot ? "Snapshot" 
: "User-space table") +
                                          " qualifiers can only start with 
'alphanumeric " +
-                                         "characters': i.e. [a-zA-Z_0-9]: " +
+                                         "characters' from any language: " +
                                          Bytes.toString(qualifierName, start, 
end));
     }
-    for (int i = start; i < end; i++) {
-      if (Character.isLetterOrDigit(qualifierName[i]) ||
-          qualifierName[i] == '_' ||
-          qualifierName[i] == '-' ||
-          qualifierName[i] == '.') {
+    // Treat the bytes as UTF-8
+    String qualifierString = new String(
+        qualifierName, start, (end - start), StandardCharsets.UTF_8);
+    if (qualifierString.equals(DISALLOWED_TABLE_NAME)) {
+      // Per 
https://zookeeper.apache.org/doc/r3.4.10/zookeeperProgrammers.html#ch_zkDataModel
+      // A znode named "zookeeper" is disallowed by zookeeper.
+      throw new IllegalArgumentException("Tables may not be named '" + 
DISALLOWED_TABLE_NAME + "'");
+    }
+    for (int i = 0; i < qualifierString.length(); i++) {
+      // Treat the string as a char-array as some characters may be multi-byte
+      char c = qualifierString.charAt(i);
+      // Check for letter, digit, underscore, hyphen, or period, and allowed 
by ZK.
+      // ZooKeeper also has limitations, but Character.isAlphabetic omits 
those all
+      //   See 
https://zookeeper.apache.org/doc/r3.4.10/zookeeperProgrammers.html#ch_zkDataModel
+      if (Character.isAlphabetic(c) || Character.isDigit(c) || c == '_' || c 
== '-' || c == '.') {
         continue;
       }
-      throw new IllegalArgumentException("Illegal character code:" + 
qualifierName[i] +
-                                         ", <" + (char) qualifierName[i] + "> 
at " + i +
-                                         ". " + (isSnapshot ? "Snapshot" : 
"User-space table") +
-                                         " qualifiers can only contain " +
-                                         "'alphanumeric characters': i.e. 
[a-zA-Z_0-9-.]: " +
-                                         Bytes.toString(qualifierName, start, 
end));
+      throw new IllegalArgumentException("Illegal character code:" + (int) c + 
", <" + c + "> at " +
+          i + ". " + (isSnapshot ? "Snapshot" : "User-space table") +
+          " qualifiers may only contain 'alphanumeric characters' and digits: 
" +
+          qualifierString);
     }
   }
+
   public static void isLegalNamespaceName(byte[] namespaceName) {
     isLegalNamespaceName(namespaceName, 0, namespaceName.length);
   }
@@ -217,14 +229,23 @@ public final class TableName implements 
Comparable<TableName> {
     if(end - start < 1) {
       throw new IllegalArgumentException("Namespace name must not be empty");
     }
-    for (int i = start; i < end; i++) {
-      if (Character.isLetterOrDigit(namespaceName[i])|| namespaceName[i] == 
'_') {
+    String nsString = new String(namespaceName, start, (end - start), 
StandardCharsets.UTF_8);
+    if (nsString.equals(DISALLOWED_TABLE_NAME)) {
+      // Per 
https://zookeeper.apache.org/doc/r3.4.10/zookeeperProgrammers.html#ch_zkDataModel
+      // A znode named "zookeeper" is disallowed by zookeeper.
+      throw new IllegalArgumentException("Tables may not be named '" + 
DISALLOWED_TABLE_NAME + "'");
+    }
+    for (int i = 0; i < nsString.length(); i++) {
+      // Treat the string as a char-array as some characters may be multi-byte
+      char c = nsString.charAt(i);
+      // ZooKeeper also has limitations, but Character.isAlphabetic omits 
those all
+      //   See 
https://zookeeper.apache.org/doc/r3.4.10/zookeeperProgrammers.html#ch_zkDataModel
+      if (Character.isAlphabetic(c) || Character.isDigit(c)|| c == '_') {
         continue;
       }
-      throw new IllegalArgumentException("Illegal character <" + 
namespaceName[i] +
-        "> at " + i + ". Namespaces can only contain " +
-        "'alphanumeric characters': i.e. [a-zA-Z_0-9]: " + 
Bytes.toString(namespaceName,
-          start, end));
+      throw new IllegalArgumentException("Illegal character <" + c +
+        "> at " + i + ". Namespaces may only contain " +
+        "'alphanumeric characters' from any language and digits: " + nsString);
     }
   }
 
@@ -441,18 +462,19 @@ public final class TableName implements 
Comparable<TableName> {
       }
     }
 
-    int namespaceDelimIndex = name.indexOf(NAMESPACE_DELIM);
-    byte[] nameB = Bytes.toBytes(name);
+    final int namespaceDelimIndex = name.indexOf(NAMESPACE_DELIM);
 
     if (namespaceDelimIndex < 0) {
       return createTableNameIfNecessary(
           ByteBuffer.wrap(NamespaceDescriptor.DEFAULT_NAMESPACE_NAME),
-          ByteBuffer.wrap(nameB));
+          ByteBuffer.wrap(Bytes.toBytes(name)));
     } else {
+      // indexOf is by character, not byte (consider multi-byte characters)
+      String ns = name.substring(0, namespaceDelimIndex);
+      String qualifier = name.substring(namespaceDelimIndex + 1);
       return createTableNameIfNecessary(
-          ByteBuffer.wrap(nameB, 0, namespaceDelimIndex),
-          ByteBuffer.wrap(nameB, namespaceDelimIndex + 1,
-              nameB.length - (namespaceDelimIndex + 1)));
+          ByteBuffer.wrap(Bytes.toBytes(ns)),
+          ByteBuffer.wrap(Bytes.toBytes(qualifier)));
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/709f5a19/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HFileLink.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HFileLink.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HFileLink.java
index cdc5be1..96ad7de 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HFileLink.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HFileLink.java
@@ -64,7 +64,8 @@ public class HFileLink extends FileLink {
    * The HFileLink describe a link to an hfile in a different table/region
    * and the name is in the form: table=region-hfile.
    * <p>
-   * Table name is ([a-zA-Z_0-9][a-zA-Z_0-9.-]*), so '=' is an invalid 
character for the table name.
+   * Table name is 
([\p{IsAlphabetic}\p{Digit}][\p{IsAlphabetic}\p{Digit}.-]*), so '=' is an 
invalid
+   * character for the table name.
    * Region name is ([a-f0-9]+), so '-' is an invalid character for the region 
name.
    * HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid)
    * and the bulk loaded (_SeqId_[0-9]+_) hfiles.

Reply via email to