KUDU-180. Fix handling of defaults when creating tables This fixes an issue that was experienced when creating tables from the Java client when those tables had a column with a default value. Previously, the client would send the default value as the 'read_default' protobuf field, and the server would just write that default into the table schema. This would result in the write_default being left unset, and thus insertions which did not specify the column would insert NULL rather than inserting the column's default.
This didn't affect tables created from the C++ API, because the C++ client set both the read and write default fields when creating a table. Because the separation of "read" and "write" defaults is an internal concept, we shouldn't really have exposed it in the client-facing protobuf in the first place. Unfortunately, that ship sailed long ago and the same protobuf is used internally as is used in the client-facing wire protocol, and changing it now would be a huge amount of work to do without breaking compatibility. Thus, the fix here is to document that the server will only look at the "read_default" field sent by the client when adding columns. The server then propagates this field as the initial write_default for any newly-added columns. A new test is added in Java which reproduced the issue but now passes. Change-Id: Ia76c7cb599292fac4d7a8edf163d59a1574e897b Reviewed-on: http://gerrit.cloudera.org:8080/5031 Reviewed-by: Jean-Daniel Cryans <[email protected]> Reviewed-by: Dan Burkert <[email protected]> Tested-by: Kudu Jenkins Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/1db453f5 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/1db453f5 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/1db453f5 Branch: refs/heads/master Commit: 1db453f5485ad0743f826a8cf21c7913e4c61751 Parents: 63fdc0c Author: Todd Lipcon <[email protected]> Authored: Wed Nov 9 21:53:35 2016 -0800 Committer: Todd Lipcon <[email protected]> Committed: Fri Nov 11 01:49:23 2016 +0000 ---------------------------------------------------------------------- .../org/apache/kudu/client/TestKuduClient.java | 83 ++++++++++++++++++ src/kudu/client/client.cc | 3 +- src/kudu/client/table_alterer-internal.cc | 6 +- src/kudu/common/common.proto | 12 +++ src/kudu/common/wire_protocol.cc | 9 +- src/kudu/common/wire_protocol.h | 8 +- src/kudu/master/catalog_manager.cc | 89 ++++++++++++++------ src/kudu/master/master-test.cc | 29 +++++++ 8 files changed, 199 insertions(+), 40 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/1db453f5/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java ---------------------------------------------------------------------- diff --git a/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java b/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java index d78a433..17606c8 100644 --- a/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java +++ b/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java @@ -16,6 +16,7 @@ // under the License. package org.apache.kudu.client; +import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -30,6 +31,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterators; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.concurrent.Executors; import java.util.concurrent.atomic.AtomicInteger; @@ -115,6 +117,87 @@ public class TestKuduClient extends BaseKuduTest { } /** + * Test creating a table with columns with different combinations of NOT NULL and + * default values, inserting rows, and checking the results are as expected. + * Regression test for KUDU-180. + */ + @Test(timeout = 100000) + public void testTableWithDefaults() throws Exception { + List<ColumnSchema> cols = new ArrayList<>(); + cols.add(new ColumnSchema.ColumnSchemaBuilder("key", Type.STRING) + .key(true) + .build()); + // nullable with no default + cols.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.STRING) + .nullable(true) + .build()); + // nullable with default + cols.add(new ColumnSchema.ColumnSchemaBuilder("c2", Type.STRING) + .nullable(true) + .defaultValue("def") + .build()); + // not null with no default + cols.add(new ColumnSchema.ColumnSchemaBuilder("c3", Type.STRING) + .nullable(false) + .build()); + // not null with default + cols.add(new ColumnSchema.ColumnSchemaBuilder("c4", Type.STRING) + .nullable(false) + .defaultValue("def") + .build()); + Schema schema = new Schema(cols); + syncClient.createTable(tableName, schema, getBasicCreateTableOptions()); + KuduSession session = syncClient.newSession(); + KuduTable table = syncClient.openTable(tableName); + + // Insert various rows. '-' indicates leaving the row unset in the insert. + List<String> rows = ImmutableList.of( + // Specify all columns + "r1,a,b,c,d", + // Specify all, set nullable ones to NULL. + "r2,NULL,NULL,c,d", + // Don't specify any columns except for the one that is NOT NULL + // with no default. + "r3,-,-,c,-", + // Two rows which should not succeed. + "fail_1,a,b,c,NULL", + "fail_2,a,b,NULL,d"); + List<String> expectedStrings = ImmutableList.of( + "STRING key=r1, STRING c1=a, STRING c2=b, STRING c3=c, STRING c4=d", + "STRING key=r2, STRING c1=NULL, STRING c2=NULL, STRING c3=c, STRING c4=d", + "STRING key=r3, STRING c1=NULL, STRING c2=def, STRING c3=c, STRING c4=def"); + for (String row : rows) { + try { + String[] fields = row.split(","); + Insert insert = table.newInsert(); + for (int i = 0; i < fields.length; i++) { + if (fields[i].equals("-")) { // leave unset + continue; + } + if (fields[i].equals("NULL")) { + insert.getRow().setNull(i); + } else { + insert.getRow().addString(i, fields[i]); + } + } + session.apply(insert); + } catch (IllegalArgumentException e) { + // We expect two of the inserts to fail when we try to set NULL values for + // nullable columns. + assertTrue(e.getMessage(), + e.getMessage().matches("c[34] cannot be set to null")); + } + } + session.flush(); + + // Check that we got the results we expected. + List<String> rowStrings = scanTableToStrings(table); + Collections.sort(rowStrings); + assertArrayEquals(rowStrings.toArray(new String[0]), + expectedStrings.toArray(new String[0])); + } + + /** * Test inserting and retrieving string columns. */ @Test(timeout = 100000) http://git-wip-us.apache.org/repos/asf/kudu/blob/1db453f5/src/kudu/client/client.cc ---------------------------------------------------------------------- diff --git a/src/kudu/client/client.cc b/src/kudu/client/client.cc index 5c9f8a6..fb63cb2 100644 --- a/src/kudu/client/client.cc +++ b/src/kudu/client/client.cc @@ -586,7 +586,8 @@ Status KuduTableCreator::Create() { if (data_->num_replicas_ >= 1) { req.set_num_replicas(data_->num_replicas_); } - RETURN_NOT_OK_PREPEND(SchemaToPB(*data_->schema_->schema_, req.mutable_schema()), + RETURN_NOT_OK_PREPEND(SchemaToPB(*data_->schema_->schema_, req.mutable_schema(), + SCHEMA_PB_WITHOUT_WRITE_DEFAULT), "Invalid schema"); RowOperationsPBEncoder encoder(req.mutable_split_rows_range_bounds()); http://git-wip-us.apache.org/repos/asf/kudu/blob/1db453f5/src/kudu/client/table_alterer-internal.cc ---------------------------------------------------------------------- diff --git a/src/kudu/client/table_alterer-internal.cc b/src/kudu/client/table_alterer-internal.cc index e229cae..3ea2a16 100644 --- a/src/kudu/client/table_alterer-internal.cc +++ b/src/kudu/client/table_alterer-internal.cc @@ -62,7 +62,8 @@ Status KuduTableAlterer::Data::ToRequest(AlterTableRequestPB* req) { } if (schema_ != nullptr) { - RETURN_NOT_OK(SchemaToPBWithoutIds(*schema_, req->mutable_schema())); + RETURN_NOT_OK(SchemaToPB(*schema_, req->mutable_schema(), + SCHEMA_PB_WITHOUT_IDS | SCHEMA_PB_WITHOUT_WRITE_DEFAULT)); } for (const Step& s : steps_) { @@ -75,7 +76,8 @@ Status KuduTableAlterer::Data::ToRequest(AlterTableRequestPB* req) { KuduColumnSchema col; RETURN_NOT_OK(s.spec->ToColumnSchema(&col)); ColumnSchemaToPB(*col.col_, - pb_step->mutable_add_column()->mutable_schema()); + pb_step->mutable_add_column()->mutable_schema(), + SCHEMA_PB_WITHOUT_WRITE_DEFAULT); break; } case AlterTableRequestPB::DROP_COLUMN: http://git-wip-us.apache.org/repos/asf/kudu/blob/1db453f5/src/kudu/common/common.proto ---------------------------------------------------------------------- diff --git a/src/kudu/common/common.proto b/src/kudu/common/common.proto index 8e0d4aa..6a512dc 100644 --- a/src/kudu/common/common.proto +++ b/src/kudu/common/common.proto @@ -78,6 +78,18 @@ message ColumnSchemaPB { required DataType type = 3; optional bool is_key = 4 [default = false]; optional bool is_nullable = 5 [default = false]; + + // Default values. + // NOTE: as far as clients are concerned, there is only one + // "default value" of a column. The read/write defaults are used + // internally and should not be exposed by any public client APIs. + // + // When passing schemas to the master for create/alter table, + // specify the default in 'read_default_value'. + // + // Contrary to this, when the client opens a table, it will receive + // both the read and write defaults, but the *write* default is + // what should be exposed as the "current" default. optional bytes read_default_value = 6; optional bytes write_default_value = 7; http://git-wip-us.apache.org/repos/asf/kudu/blob/1db453f5/src/kudu/common/wire_protocol.cc ---------------------------------------------------------------------- diff --git a/src/kudu/common/wire_protocol.cc b/src/kudu/common/wire_protocol.cc index 6b09956..0c3d23b 100644 --- a/src/kudu/common/wire_protocol.cc +++ b/src/kudu/common/wire_protocol.cc @@ -183,11 +183,6 @@ Status SchemaToPB(const Schema& schema, SchemaPB *pb, int flags) { return SchemaToColumnPBs(schema, pb->mutable_columns(), flags); } -Status SchemaToPBWithoutIds(const Schema& schema, SchemaPB *pb) { - pb->Clear(); - return SchemaToColumnPBs(schema, pb->mutable_columns(), SCHEMA_PB_WITHOUT_IDS); -} - Status SchemaFromPB(const SchemaPB& pb, Schema *schema) { return ColumnPBsToSchema(pb.columns(), schema); } @@ -211,7 +206,7 @@ void ColumnSchemaToPB(const ColumnSchema& col_schema, ColumnSchemaPB *pb, int fl pb->set_read_default_value(read_value, col_schema.type_info()->size()); } } - if (col_schema.has_write_default()) { + if (col_schema.has_write_default() && !(flags & SCHEMA_PB_WITHOUT_WRITE_DEFAULT)) { if (col_schema.type_info()->physical_type() == BINARY) { const Slice *write_slice = static_cast<const Slice *>(col_schema.write_default_value()); pb->set_write_default_value(write_slice->data(), write_slice->size()); @@ -299,7 +294,7 @@ Status SchemaToColumnPBs(const Schema& schema, int idx = 0; for (const ColumnSchema& col : schema.columns()) { ColumnSchemaPB* col_pb = cols->Add(); - ColumnSchemaToPB(col, col_pb); + ColumnSchemaToPB(col, col_pb, flags); col_pb->set_is_key(idx < schema.num_key_columns()); if (schema.has_column_ids() && !(flags & SCHEMA_PB_WITHOUT_IDS)) { http://git-wip-us.apache.org/repos/asf/kudu/blob/1db453f5/src/kudu/common/wire_protocol.h ---------------------------------------------------------------------- diff --git a/src/kudu/common/wire_protocol.h b/src/kudu/common/wire_protocol.h index 9c1bda5..6448282 100644 --- a/src/kudu/common/wire_protocol.h +++ b/src/kudu/common/wire_protocol.h @@ -63,15 +63,17 @@ Status AddHostPortPBs(const std::vector<Sockaddr>& addrs, enum SchemaPBConversionFlags { SCHEMA_PB_WITHOUT_IDS = 1 << 0, SCHEMA_PB_WITHOUT_STORAGE_ATTRIBUTES = 1 << 1, + + // When serializing, only write the 'read_default' value into the + // protobuf. Used when sending schemas from the client to the master + // for create/alter table. + SCHEMA_PB_WITHOUT_WRITE_DEFAULT = 1 << 2, }; // Convert the specified schema to protobuf. // 'flags' is a bitfield of SchemaPBConversionFlags values. Status SchemaToPB(const Schema& schema, SchemaPB* pb, int flags = 0); -// Convert the specified schema to protobuf without column IDs. -Status SchemaToPBWithoutIds(const Schema& schema, SchemaPB *pb); - // Returns the Schema created from the specified protobuf. // If the schema is invalid, return a non-OK status. Status SchemaFromPB(const SchemaPB& pb, Schema *schema); http://git-wip-us.apache.org/repos/asf/kudu/blob/1db453f5/src/kudu/master/catalog_manager.cc ---------------------------------------------------------------------- diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc index bdbf767..e617136 100644 --- a/src/kudu/master/catalog_manager.cc +++ b/src/kudu/master/catalog_manager.cc @@ -545,6 +545,29 @@ Status CheckIfTableDeletedOrNotRunning(TableMetadataLock* lock, RespClass* resp) return Status::OK(); } +// Propagate the 'read_default' to the 'write_default' in 'col', +// and check that the client didn't specify an invalid combination of the two fields. +Status ProcessColumnPBDefaults(ColumnSchemaPB* col) { + if (col->has_read_default_value() && !col->has_write_default_value()) { + // We expect clients to send just the 'read_default_value' field. + col->set_write_default_value(col->read_default_value()); + } else if (col->has_read_default_value() && col->has_write_default_value()) { + // C++ client 1.0 and earlier sends the default in both PB fields. + // Check that the defaults match (we never provided an API that would + // let them be set to different values) + if (col->read_default_value() != col->write_default_value()) { + return Status::InvalidArgument(Substitute( + "column '$0' has mismatched read/write defaults", col->name())); + } + } else if (!col->has_read_default_value() && col->has_write_default_value()) { + // We don't expect any client to send us this, but better cover our + // bases. + return Status::InvalidArgument(Substitute( + "column '$0' has write_default field set but no read_default", col->name())); + } + return Status::OK(); +} + } // anonymous namespace CatalogManager::CatalogManager(Master *master) @@ -780,6 +803,11 @@ Status CatalogManager::CheckOnline() const { Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req, CreateTableResponsePB* resp, rpc::RpcContext* rpc) { + auto SetError = [&](MasterErrorPB::Code code, const Status& s) { + SetupError(resp->mutable_error(), code, s); + return s; + }; + leader_lock_.AssertAcquiredForReading(); RETURN_NOT_OK(CheckOnline()); Status s; @@ -789,25 +817,35 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req, LOG(INFO) << "CreateTable from " << RequestorString(rpc) << ":\n" << req.DebugString(); + // Do some fix-up of any defaults specified on columns. + // Clients are only expected to pass the default value in the 'read_default' + // field, but we need to write the schema to disk including the default + // as both the 'read' and 'write' default. It's easier to do this fix-up + // on the protobuf here. + for (int i = 0; i < req.schema().columns_size(); i++) { + auto* col = req.mutable_schema()->mutable_columns(i); + Status s = ProcessColumnPBDefaults(col); + if (!s.ok()) { + return SetError(MasterErrorPB::INVALID_SCHEMA, s); + } + } + // a. Validate the user request. Schema client_schema; RETURN_NOT_OK(SchemaFromPB(req.schema(), &client_schema)); if (client_schema.has_column_ids()) { - s = Status::InvalidArgument("User requests should not have Column IDs"); - SetupError(resp->mutable_error(), MasterErrorPB::INVALID_SCHEMA, s); - return s; + return SetError(MasterErrorPB::INVALID_SCHEMA, + Status::InvalidArgument("User requests should not have Column IDs")); } if (PREDICT_FALSE(client_schema.num_key_columns() <= 0)) { - s = Status::InvalidArgument("Must specify at least one key column"); - SetupError(resp->mutable_error(), MasterErrorPB::INVALID_SCHEMA, s); - return s; + return SetError(MasterErrorPB::INVALID_SCHEMA, + Status::InvalidArgument("Must specify at least one key column")); } for (int i = 0; i < client_schema.num_key_columns(); i++) { if (!IsTypeAllowableInKey(client_schema.column(i).type_info())) { - Status s = Status::InvalidArgument( - "Key column may not have type of BOOL, FLOAT, or DOUBLE"); - SetupError(resp->mutable_error(), MasterErrorPB::INVALID_SCHEMA, s); - return s; + return SetError(MasterErrorPB::INVALID_SCHEMA, + Status::InvalidArgument( + "Key column may not have type of BOOL, FLOAT, or DOUBLE")); } } // Check that the encodings are valid for the specified types. @@ -818,11 +856,12 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req, col.attributes().encoding, &dummy); if (!s.ok()) { - s = s.CloneAndPrepend(Substitute("invalid encoding for column '$0'", col.name())); - SetupError(resp->mutable_error(), MasterErrorPB::INVALID_SCHEMA, s); - return s; + return SetError(MasterErrorPB::INVALID_SCHEMA, + s.CloneAndPrepend( + Substitute("invalid encoding for column '$0'", col.name()))); } } + Schema schema = client_schema.CopyWithColumnIds(); // If the client did not set a partition schema in the create table request, @@ -831,8 +870,7 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req, PartitionSchema partition_schema; s = PartitionSchema::FromPB(req.partition_schema(), schema, &partition_schema); if (!s.ok()) { - SetupError(resp->mutable_error(), MasterErrorPB::INVALID_SCHEMA, s); - return s; + return SetError(MasterErrorPB::INVALID_SCHEMA, s); } // Decode split rows. @@ -857,9 +895,9 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req, if (i >= ops.size() || (ops[i].type != RowOperationsPB::RANGE_UPPER_BOUND && ops[i].type != RowOperationsPB::INCLUSIVE_RANGE_UPPER_BOUND)) { - Status s = Status::InvalidArgument("Missing upper range bound in create table request"); - SetupError(resp->mutable_error(), MasterErrorPB::UNKNOWN_ERROR, s); - return s; + return SetError(MasterErrorPB::UNKNOWN_ERROR, + Status::InvalidArgument( + "Missing upper range bound in create table request")); } if (op.type == RowOperationsPB::EXCLUSIVE_RANGE_LOWER_BOUND) { @@ -897,8 +935,7 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req, if (req.num_replicas() > 1 && max_tablets > 0 && partitions.size() > max_tablets) { s = Status::InvalidArgument(Substitute("The requested number of tablets is over the " "permitted maximum ($0)", max_tablets)); - SetupError(resp->mutable_error(), MasterErrorPB::TOO_MANY_TABLETS, s); - return s; + return SetError(MasterErrorPB::TOO_MANY_TABLETS, s); } // Verify that the number of replicas isn't larger than the number of live tablet @@ -908,8 +945,7 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req, s = Status::InvalidArgument(Substitute( "Not enough live tablet servers to create a table with the requested replication " "factor $0. $1 tablet servers are alive.", req.num_replicas(), num_live_tservers)); - SetupError(resp->mutable_error(), MasterErrorPB::REPLICATION_FACTOR_TOO_HIGH, s); - return s; + return SetError(MasterErrorPB::REPLICATION_FACTOR_TOO_HIGH, s); } scoped_refptr<TableInfo> table; @@ -922,16 +958,14 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req, if (table != nullptr) { s = Status::AlreadyPresent(Substitute("Table $0 already exists with id $1", req.name(), table->id())); - SetupError(resp->mutable_error(), MasterErrorPB::TABLE_ALREADY_PRESENT, s); - return s; + return SetError(MasterErrorPB::TABLE_ALREADY_PRESENT, s); } // c. Reserve the table name if possible. if (!InsertIfNotPresent(&reserved_table_names_, req.name())) { s = Status::ServiceUnavailable(Substitute( "New table name $0 is already reserved", req.name())); - SetupError(resp->mutable_error(), MasterErrorPB::TABLE_NOT_FOUND, s); - return s; + return SetError(MasterErrorPB::TABLE_NOT_FOUND, s); } } @@ -1193,6 +1227,7 @@ Status CatalogManager::ApplyAlterSchemaSteps(const SysTablesEntryPB& current_pb, return Status::InvalidArgument("column $0: client should not specify column ID", new_col_pb.ShortDebugString()); } + RETURN_NOT_OK(ProcessColumnPBDefaults(&new_col_pb)); // Verify that encoding is appropriate for the new column's // type @@ -1202,7 +1237,7 @@ Status CatalogManager::ApplyAlterSchemaSteps(const SysTablesEntryPB& current_pb, new_col.attributes().encoding, &dummy)); - // can't accept a NOT NULL column without read default + // can't accept a NOT NULL column without a default if (!new_col.is_nullable() && !new_col.has_read_default()) { return Status::InvalidArgument( Substitute("column `$0`: NOT NULL columns must have a default", new_col.name())); http://git-wip-us.apache.org/repos/asf/kudu/blob/1db453f5/src/kudu/master/master-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/master/master-test.cc b/src/kudu/master/master-test.cc index 8979401..d454ed1 100644 --- a/src/kudu/master/master-test.cc +++ b/src/kudu/master/master-test.cc @@ -628,6 +628,35 @@ TEST_F(MasterTest, TestCreateTableInvalidSchema) { resp.error().status().ShortDebugString()); } +// Test that, if the client specifies mismatched read and write defaults, +// we return an error. +TEST_F(MasterTest, TestCreateTableMismatchedDefaults) { + CreateTableRequestPB req; + CreateTableResponsePB resp; + RpcController controller; + + req.set_name("table"); + + ColumnSchemaPB* col = req.mutable_schema()->add_columns(); + col->set_name("key"); + col->set_type(INT32); + col->set_is_key(true); + + col = req.mutable_schema()->add_columns(); + col->set_name("col"); + col->set_type(BINARY); + col->set_is_nullable(true); + req.mutable_schema()->mutable_columns(1)->set_read_default_value("hello"); + req.mutable_schema()->mutable_columns(1)->set_write_default_value("bye"); + + ASSERT_OK(proxy_->CreateTable(req, &resp, &controller)); + SCOPED_TRACE(resp.DebugString()); + ASSERT_TRUE(resp.has_error()); + ASSERT_EQ("code: INVALID_ARGUMENT message: \"column \\'col\\' has " + "mismatched read/write defaults\"", + resp.error().status().ShortDebugString()); +} + // Regression test for KUDU-253/KUDU-592: crash if the GetTableLocations RPC call is // invalid. TEST_F(MasterTest, TestInvalidGetTableLocations) {
