Repository: kudu
Updated Branches:
  refs/heads/master f165ef7d6 -> 06569f218


KUDU-981 (part 1): validate identifiers as UTF8 with no null bytes

Ensures that table names and identifiers are valid UTF8 with no embedded
null bytes which could cause problems.

KUDU-981 proposes more stringent restrictions such as limiting to only
ASCII. We should collect more info from users before making such a
limitation since it may affect Chinese users, etc. This more lenient
validation is meant more at catching bugs and identifiers that may cause
downstream software to crash.

Change-Id: Ic3b05e2882c9c2ce9b47c16450d9d54a04d3e38b
Reviewed-on: http://gerrit.cloudera.org:8080/5296
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <t...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/b28c6dd6
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/b28c6dd6
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/b28c6dd6

Branch: refs/heads/master
Commit: b28c6dd683bfeddde0876ddb6b228395e0763798
Parents: f165ef7
Author: Todd Lipcon <t...@apache.org>
Authored: Wed Nov 30 17:10:54 2016 -0800
Committer: Todd Lipcon <t...@apache.org>
Committed: Thu Dec 1 22:48:56 2016 +0000

----------------------------------------------------------------------
 src/kudu/client/client-test.cc     | 41 +++++++++++++++++++++------------
 src/kudu/master/catalog_manager.cc | 17 ++++++++++++++
 2 files changed, 43 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/b28c6dd6/src/kudu/client/client-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/client/client-test.cc b/src/kudu/client/client-test.cc
index caadd24..7827912 100644
--- a/src/kudu/client/client-test.cc
+++ b/src/kudu/client/client-test.cc
@@ -3856,21 +3856,32 @@ TEST_F(ClientTest, TestCreateTableWithTooManyColumns) {
                       "permitted maximum 300");
 }
 
-TEST_F(ClientTest, TestCreateTableWithTooLongTableName) {
-  const string kLongName(1000, 'x');
-  unique_ptr<KuduTableCreator> table_creator(client_->NewTableCreator());
-  KuduSchema schema;
-  KuduSchemaBuilder schema_builder;
-  
schema_builder.AddColumn("key")->Type(KuduColumnSchema::INT32)->NotNull()->PrimaryKey();
-  ASSERT_OK(schema_builder.Build(&schema));
-  Status s = table_creator->table_name(kLongName)
-      .schema(&schema)
-      .set_range_partition_columns({ "key" })
-      .Create();
-  ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
-  ASSERT_STR_MATCHES(s.ToString(),
-                     "invalid table name: identifier 'xxx*' "
-                     "longer than maximum permitted length 256");
+TEST_F(ClientTest, TestCreateTable_TableNames) {
+  const vector<pair<string, string>> kCases = {
+    {string(1000, 'x'), "longer than maximum permitted length"},
+    {string("foo\0bar", 7), "invalid table name: identifier must not contain 
null bytes"},
+    // From 
http://stackoverflow.com/questions/1301402/example-invalid-utf8-string
+    {string("foo\xf0\x28\x8c\xbc", 7), "invalid table name: invalid UTF8 
sequence"},
+    // Should pass validation but fail due to lack of tablet servers running.
+    {"你好", "Not enough live tablet servers"}
+  };
+
+  for (const auto& test_case : kCases) {
+    const auto& bad_name = test_case.first;
+    const auto& substr = test_case.second;
+    SCOPED_TRACE(bad_name);
+
+    unique_ptr<KuduTableCreator> table_creator(client_->NewTableCreator());
+    KuduSchema schema;
+    KuduSchemaBuilder schema_builder;
+    
schema_builder.AddColumn("key")->Type(KuduColumnSchema::INT32)->NotNull()->PrimaryKey();
+    ASSERT_OK(schema_builder.Build(&schema));
+    Status s = table_creator->table_name(bad_name)
+        .schema(&schema)
+        .set_range_partition_columns({ "key" })
+        .Create();
+    ASSERT_STR_CONTAINS(s.ToString(), substr);
+  }
 }
 
 TEST_F(ClientTest, TestCreateTableWithTooLongColumnName) {

http://git-wip-us.apache.org/repos/asf/kudu/blob/b28c6dd6/src/kudu/master/catalog_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/catalog_manager.cc 
b/src/kudu/master/catalog_manager.cc
index 33fcc92..0f3dff3 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -70,6 +70,7 @@
 #include "kudu/gutil/strings/join.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/gutil/sysinfo.h"
+#include "kudu/gutil/utf/utf.h"
 #include "kudu/gutil/walltime.h"
 #include "kudu/master/master.h"
 #include "kudu/master/master.pb.h"
@@ -835,6 +836,22 @@ Status ValidateIdentifier(const string& id) {
         id, FLAGS_max_identifier_length));
   }
 
+  // Identifiers should be valid UTF8.
+  const char* p = id.data();
+  int rem = id.size();
+  while (rem > 0) {
+    Rune rune = Runeerror;
+    int rune_len = charntorune(&rune, p, rem);
+    if (rune == Runeerror) {
+      return Status::InvalidArgument("invalid UTF8 sequence");
+    }
+    if (rune == 0) {
+      return Status::InvalidArgument("identifier must not contain null bytes");
+    }
+    rem -= rune_len;
+    p += rune_len;
+  }
+
   return Status::OK();
 }
 

Reply via email to