Repository: kudu Updated Branches: refs/heads/master b0209c16d -> b30d4fc15
KUDU-1775 (part 2). Enforce a max number of columns and reasonable identifier lengths * Disallows unreasonable identifiers such as empty strings or strings longer than a configurable maximum size. I set the default max size to 256. MySQL[1] and PostgreSQL[2] seem to limit identifiers to 64 characters, and Oracle[3] seems to limit to 30. So 256 seemed like it should be plenty. The flag is marked as unsafe but if anyone has a compatibility issue on upgrade, the limit can be raised. * Enforces a maximum number of columns, set here to 300 as a default. Really wide tables are an anti-pattern, and although some initial testing shows that we won't completely fail until a few thousand columns, we probably want to put a guard rail up well before the failure point. Again, if users are depending on a higher number, this can be worked around by raising the default through an unsafe flag. [1] http://dev.mysql.com/doc/refman/5.7/en/identifiers.html [2] https://www.postgresql.org/docs/9.1/static/sql-syntax-lexical.html [3] http://stackoverflow.com/questions/18248318/change-table-column-index-names-size-in-oracle-11g-or-12c Change-Id: I9cab30dd1ea78a160403c79442b08739ddc92f12 Reviewed-on: http://gerrit.cloudera.org:8080/5290 Reviewed-by: Todd Lipcon <t...@apache.org> Reviewed-by: Dan Burkert <danburk...@apache.org> Tested-by: Kudu Jenkins Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/b30d4fc1 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/b30d4fc1 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/b30d4fc1 Branch: refs/heads/master Commit: b30d4fc154151aaa5583dbbd8ef83ac044236ee9 Parents: b0209c1 Author: Todd Lipcon <t...@apache.org> Authored: Wed Nov 30 15:09:01 2016 -0800 Committer: Todd Lipcon <t...@apache.org> Committed: Thu Dec 1 18:50:14 2016 +0000 ---------------------------------------------------------------------- java/kudu-client/pom.xml | 10 +- .../org/apache/kudu/client/TestKuduClient.java | 27 ++++ java/pom.xml | 3 +- src/kudu/client/client-test.cc | 89 +++++++++++++ src/kudu/master/catalog_manager.cc | 132 +++++++++++++------ src/kudu/master/master-test.cc | 23 +--- 6 files changed, 225 insertions(+), 59 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/b30d4fc1/java/kudu-client/pom.xml ---------------------------------------------------------------------- diff --git a/java/kudu-client/pom.xml b/java/kudu-client/pom.xml index cb5b55f..81866d9 100644 --- a/java/kudu-client/pom.xml +++ b/java/kudu-client/pom.xml @@ -64,8 +64,14 @@ </dependency> <dependency> <groupId>org.mockito</groupId> - <artifactId>mockito-all</artifactId> - <version>${mockito-all.version}</version> + <artifactId>mockito-core</artifactId> + <version>${mockito-core.version}</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.hamcrest</groupId> + <artifactId>hamcrest-core</artifactId> + <version>${hamcrest-core.version}</version> <scope>test</scope> </dependency> <dependency> http://git-wip-us.apache.org/repos/asf/kudu/blob/b30d4fc1/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java ---------------------------------------------------------------------- diff --git a/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java b/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java index aeba4bb..64951b2 100644 --- a/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java +++ b/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java @@ -24,8 +24,10 @@ import static org.apache.kudu.client.RowResult.timestampToString; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.junit.matchers.JUnitMatchers.containsString; import java.util.ArrayList; import java.util.Collections; @@ -116,6 +118,31 @@ public class TestKuduClient extends BaseKuduTest { newSchema.getColumn("column3_s").getCompressionAlgorithm()); } + + /** + * Test creating a table with various invalid schema cases. + */ + @Test(timeout = 100000) + public void testCreateTableTooManyColumns() throws Exception { + List<ColumnSchema> cols = new ArrayList<>(); + cols.add(new ColumnSchema.ColumnSchemaBuilder("key", Type.STRING) + .key(true) + .build()); + for (int i = 0; i < 1000; i++) { + // not null with default + cols.add(new ColumnSchema.ColumnSchemaBuilder("c" + i, Type.STRING) + .build()); + } + Schema schema = new Schema(cols); + try { + syncClient.createTable(tableName, schema, getBasicCreateTableOptions()); + } catch (NonRecoverableException nre) { + assertThat(nre.toString(), containsString( + "number of columns 1001 is greater than the permitted maximum")); + } + } + + /** * Test creating a table with columns with different combinations of NOT NULL and * default values, inserting rows, and checking the results are as expected. http://git-wip-us.apache.org/repos/asf/kudu/blob/b30d4fc1/java/pom.xml ---------------------------------------------------------------------- diff --git a/java/pom.xml b/java/pom.xml index c9fb510..6575ef6 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -62,9 +62,10 @@ <guava.version>18.0</guava.version> <hadoop.version>2.7.2</hadoop.version> <jsr305.version>3.0.1</jsr305.version> + <hamcrest-core.version>1.3</hamcrest-core.version> <junit.version>4.11</junit.version> <log4j.version>1.2.17</log4j.version> - <mockito-all.version>1.9.0</mockito-all.version> + <mockito-core.version>1.9.0</mockito-core.version> <murmur.version>1.0.0</murmur.version> <netty.version>3.10.5.Final</netty.version> <protobuf.version>2.6.1</protobuf.version> http://git-wip-us.apache.org/repos/asf/kudu/blob/b30d4fc1/src/kudu/client/client-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/client/client-test.cc b/src/kudu/client/client-test.cc index 77aa1a1..3c4f962 100644 --- a/src/kudu/client/client-test.cc +++ b/src/kudu/client/client-test.cc @@ -3109,6 +3109,8 @@ TEST_F(ClientTest, TestWriteWithBadSchema) { } TEST_F(ClientTest, TestBasicAlterOperations) { + const vector<string> kBadNames = {"", string(1000, 'x')}; + // test that having no steps throws an error { gscoped_ptr<KuduTableAlterer> table_alterer(client_->NewTableAlterer(kTableName)); @@ -3154,6 +3156,38 @@ TEST_F(ClientTest, TestBasicAlterOperations) { ASSERT_STR_CONTAINS(s.ToString(), "The column already exists: string_val"); } + // Test that renaming a column to an invalid name throws an error. + for (const string& bad_name : kBadNames) { + gscoped_ptr<KuduTableAlterer> table_alterer(client_->NewTableAlterer(kTableName)); + table_alterer->AlterColumn("int_val")->RenameTo(bad_name); + Status s = table_alterer->Alter(); + ASSERT_TRUE(s.IsInvalidArgument()); + ASSERT_STR_CONTAINS(s.ToString(), "invalid column name"); + } + + // Test that renaming a table to an invalid name throws an error. + for (const string& bad_name : kBadNames) { + gscoped_ptr<KuduTableAlterer> table_alterer(client_->NewTableAlterer(kTableName)); + table_alterer->RenameTo(bad_name); + Status s = table_alterer->Alter(); + ASSERT_TRUE(s.IsInvalidArgument()); + ASSERT_STR_CONTAINS(s.ToString(), "invalid table name"); + } + + // Test trying to add columns to a table such that it exceeds the permitted + // maximum. + { + gscoped_ptr<KuduTableAlterer> table_alterer(client_->NewTableAlterer(kTableName)); + for (int i = 0; i < 1000; i++) { + table_alterer->AddColumn(Substitute("c$0", i))->Type(KuduColumnSchema::INT32); + } + Status s = table_alterer->Alter(); + ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString(); + ASSERT_STR_CONTAINS(s.ToString(), + "number of columns 1004 is greater than the " + "permitted maximum 300"); + } + // Need a tablet peer for the next set of tests. string tablet_id = GetFirstTabletId(client_table_.get()); scoped_refptr<TabletPeer> tablet_peer; @@ -3800,6 +3834,61 @@ TEST_F(ClientTest, TestCreateTableWithInvalidEncodings) { "DICT_ENCODING not supported for type INT32"); } +TEST_F(ClientTest, TestCreateTableWithTooManyColumns) { + unique_ptr<KuduTableCreator> table_creator(client_->NewTableCreator()); + KuduSchema schema; + KuduSchemaBuilder schema_builder; + schema_builder.AddColumn("key")->Type(KuduColumnSchema::INT32)->NotNull()->PrimaryKey(); + for (int i = 0; i < 1000; i++) { + schema_builder.AddColumn(Substitute("c$0", i)) + ->Type(KuduColumnSchema::INT32)->NotNull(); + } + ASSERT_OK(schema_builder.Build(&schema)); + Status s = table_creator->table_name("foobar") + .schema(&schema) + .set_range_partition_columns({ "key" }) + .Create(); + ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString(); + ASSERT_STR_CONTAINS(s.ToString(), + "number of columns 1001 is greater than the " + "permitted maximum 300"); +} + +TEST_F(ClientTest, TestCreateTableWithTooLongTableName) { + const string kLongName(1000, 'x'); + unique_ptr<KuduTableCreator> table_creator(client_->NewTableCreator()); + KuduSchema schema; + KuduSchemaBuilder schema_builder; + schema_builder.AddColumn("key")->Type(KuduColumnSchema::INT32)->NotNull()->PrimaryKey(); + ASSERT_OK(schema_builder.Build(&schema)); + Status s = table_creator->table_name(kLongName) + .schema(&schema) + .set_range_partition_columns({ "key" }) + .Create(); + ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString(); + ASSERT_STR_MATCHES(s.ToString(), + "invalid table name: identifier 'xxx*' " + "longer than maximum permitted length 256"); +} + +TEST_F(ClientTest, TestCreateTableWithTooLongColumnName) { + const string kLongName(1000, 'x'); + unique_ptr<KuduTableCreator> table_creator(client_->NewTableCreator()); + KuduSchema schema; + KuduSchemaBuilder schema_builder; + schema_builder.AddColumn(kLongName)->Type(KuduColumnSchema::INT32)->NotNull()->PrimaryKey(); + ASSERT_OK(schema_builder.Build(&schema)); + Status s = table_creator->table_name("foobar") + .schema(&schema) + .set_range_partition_columns({ kLongName }) + .Create(); + ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString(); + ASSERT_STR_MATCHES(s.ToString(), + "invalid column name: identifier 'xxx*' " + "longer than maximum permitted length 256"); +} + + // Check the behavior of the latest observed timestamp when performing // write and read operations. TEST_F(ClientTest, TestLatestObservedTimestamp) { http://git-wip-us.apache.org/repos/asf/kudu/blob/b30d4fc1/src/kudu/master/catalog_manager.cc ---------------------------------------------------------------------- diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc index d5af69c..33fcc92 100644 --- a/src/kudu/master/catalog_manager.cc +++ b/src/kudu/master/catalog_manager.cc @@ -121,6 +121,20 @@ DEFINE_int32(max_num_replicas, 7, // Tag as unsafe since we have done very limited testing of higher than 5 replicas. TAG_FLAG(max_num_replicas, unsafe); +DEFINE_int32(max_num_columns, 300, + "Maximum number of columns that may be in a table."); +// Tag as unsafe since we have done very limited testing of higher than 300 columns. +TAG_FLAG(max_num_columns, unsafe); + + +DEFINE_int32(max_identifier_length, 256, + "Maximum length of the name of a column or table."); +// Tag as unsafe because we end up writing schemas in every WAL entry, etc, +// and having very long column names would enter untested territory and affect +// performance. +TAG_FLAG(max_identifier_length, unsafe); + + DEFINE_bool(allow_unsafe_replication_factor, false, "Allow creating tables with even replication factor."); TAG_FLAG(allow_unsafe_replication_factor, unsafe); @@ -807,6 +821,65 @@ Status CatalogManager::CheckOnline() const { return Status::OK(); } +namespace { + +// Validate a table or column name to ensure that it is a valid identifier. +Status ValidateIdentifier(const string& id) { + if (id.empty()) { + return Status::InvalidArgument("empty string not a valid identifier"); + } + + if (id.length() > FLAGS_max_identifier_length) { + return Status::InvalidArgument(Substitute( + "identifier '$0' longer than maximum permitted length $1", + id, FLAGS_max_identifier_length)); + } + + return Status::OK(); +} + +// Validate the client-provided schema and name. +Status ValidateClientSchema(const boost::optional<string>& name, + const Schema& schema) { + if (name != boost::none) { + RETURN_NOT_OK_PREPEND(ValidateIdentifier(name.get()), "invalid table name"); + } + for (int i = 0; i < schema.num_columns(); i++) { + RETURN_NOT_OK_PREPEND(ValidateIdentifier(schema.column(i).name()), + "invalid column name"); + } + if (schema.num_key_columns() <= 0) { + return Status::InvalidArgument("must specify at least one key column"); + } + if (schema.num_columns() > FLAGS_max_num_columns) { + return Status::InvalidArgument(Substitute( + "number of columns $0 is greater than the permitted maximum $1", + schema.num_columns(), FLAGS_max_num_columns)); + } + for (int i = 0; i < schema.num_key_columns(); i++) { + if (!IsTypeAllowableInKey(schema.column(i).type_info())) { + return Status::InvalidArgument( + "key column may not have type of BOOL, FLOAT, or DOUBLE"); + } + } + + // Check that the encodings are valid for the specified types. + for (int i = 0; i < schema.num_columns(); i++) { + const auto& col = schema.column(i); + const TypeEncodingInfo *dummy; + Status s = TypeEncodingInfo::Get(col.type_info(), + col.attributes().encoding, + &dummy); + if (!s.ok()) { + return s.CloneAndPrepend( + Substitute("invalid encoding for column '$0'", col.name())); + } + } + return Status::OK(); +} + +} // anonymous namespace + // Create a new table. // See README file in this directory for a description of the design. Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req, @@ -842,35 +915,13 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req, // a. Validate the user request. Schema client_schema; RETURN_NOT_OK(SchemaFromPB(req.schema(), &client_schema)); - if (client_schema.has_column_ids()) { - return SetError(MasterErrorPB::INVALID_SCHEMA, - Status::InvalidArgument("User requests should not have Column IDs")); + s = ValidateClientSchema(req.name(), client_schema); + if (s.ok() && client_schema.has_column_ids()) { + s = Status::InvalidArgument("User requests should not have Column IDs"); } - if (PREDICT_FALSE(client_schema.num_key_columns() <= 0)) { - return SetError(MasterErrorPB::INVALID_SCHEMA, - Status::InvalidArgument("Must specify at least one key column")); - } - for (int i = 0; i < client_schema.num_key_columns(); i++) { - if (!IsTypeAllowableInKey(client_schema.column(i).type_info())) { - return SetError(MasterErrorPB::INVALID_SCHEMA, - Status::InvalidArgument( - "Key column may not have type of BOOL, FLOAT, or DOUBLE")); - } - } - // Check that the encodings are valid for the specified types. - for (int i = 0; i < client_schema.num_columns(); i++) { - const auto& col = client_schema.column(i); - const TypeEncodingInfo *dummy; - Status s = TypeEncodingInfo::Get(col.type_info(), - col.attributes().encoding, - &dummy); - if (!s.ok()) { - return SetError(MasterErrorPB::INVALID_SCHEMA, - s.CloneAndPrepend( - Substitute("invalid encoding for column '$0'", col.name()))); - } + if (!s.ok()) { + return SetError(MasterErrorPB::INVALID_SCHEMA, s); } - Schema schema = client_schema.CopyWithColumnIds(); // If the client did not set a partition schema in the create table request, @@ -1262,15 +1313,8 @@ Status CatalogManager::ApplyAlterSchemaSteps(const SysTablesEntryPB& current_pb, } RETURN_NOT_OK(ProcessColumnPBDefaults(&new_col_pb)); - // Verify that encoding is appropriate for the new column's - // type + // Can't accept a NOT NULL column without a default. ColumnSchema new_col = ColumnSchemaFromPB(new_col_pb); - const TypeEncodingInfo *dummy; - RETURN_NOT_OK(TypeEncodingInfo::Get(new_col.type_info(), - new_col.attributes().encoding, - &dummy)); - - // can't accept a NOT NULL column without a default if (!new_col.is_nullable() && !new_col.has_read_default()) { return Status::InvalidArgument( Substitute("column `$0`: NOT NULL columns must have a default", new_col.name())); @@ -1548,7 +1592,7 @@ Status CatalogManager::AlterTable(const AlterTableRequestPB* req, string table_name = l.data().name(); *resp->mutable_table_id() = table->id(); - // 4. Calculate new schema for the on-disk state, not persisted yet. + // 4. Calculate and validate new schema for the on-disk state, not persisted yet. Schema new_schema; ColumnId next_col_id = ColumnId(l.data().pb.next_column_id()); if (!alter_schema_steps.empty()) { @@ -1561,10 +1605,24 @@ Status CatalogManager::AlterTable(const AlterTableRequestPB* req, DCHECK_NE(next_col_id, 0); DCHECK_EQ(new_schema.find_column_by_id(next_col_id), static_cast<int>(Schema::kColumnNotFound)); + + // Just validate the schema, not the name (validated below). + s = ValidateClientSchema(boost::none, new_schema); + if (!s.ok()) { + SetupError(resp->mutable_error(), MasterErrorPB::INVALID_SCHEMA, s); + return s; + } } - // 5. Try to acquire the new table name. + // 5. Validate and try to acquire the new table name. if (req->has_new_table_name()) { + Status s = ValidateIdentifier(req->new_table_name()); + if (!s.ok()) { + SetupError(resp->mutable_error(), MasterErrorPB::INVALID_SCHEMA, + s.CloneAndPrepend("invalid table name")); + return s; + } + std::lock_guard<LockType> catalog_lock(lock_); TRACE("Acquired catalog manager lock"); http://git-wip-us.apache.org/repos/asf/kudu/blob/b30d4fc1/src/kudu/master/master-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/master/master-test.cc b/src/kudu/master/master-test.cc index d454ed1..80b4ecb 100644 --- a/src/kudu/master/master-test.cc +++ b/src/kudu/master/master-test.cc @@ -581,28 +581,13 @@ TEST_F(MasterTest, TestCreateTableCheckRangeInvariants) { TEST_F(MasterTest, TestCreateTableInvalidKeyType) { const char *kTableName = "testtb"; - { - const Schema kTableSchema({ ColumnSchema("key", BOOL) }, 1); - Status s = CreateTable(kTableName, kTableSchema, vector<KuduPartialRow>(), {}); - ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString(); - ASSERT_STR_CONTAINS(s.ToString(), - "Key column may not have type of BOOL, FLOAT, or DOUBLE"); - } - - { - const Schema kTableSchema({ ColumnSchema("key", FLOAT) }, 1); - Status s = CreateTable(kTableName, kTableSchema, vector<KuduPartialRow>(), {}); - ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString(); - ASSERT_STR_CONTAINS(s.ToString(), - "Key column may not have type of BOOL, FLOAT, or DOUBLE"); - } - - { - const Schema kTableSchema({ ColumnSchema("key", DOUBLE) }, 1); + const DataType types[] = { BOOL, FLOAT, DOUBLE }; + for (DataType type : types) { + const Schema kTableSchema({ ColumnSchema("key", type) }, 1); Status s = CreateTable(kTableName, kTableSchema, vector<KuduPartialRow>(), {}); ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString(); ASSERT_STR_CONTAINS(s.ToString(), - "Key column may not have type of BOOL, FLOAT, or DOUBLE"); + "key column may not have type of BOOL, FLOAT, or DOUBLE"); } }