This is an automated email from the ASF dual-hosted git repository. granthenke pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 889086d44d72e6157e08e7089af6e946af5e6955 Author: Grant Henke <[email protected]> AuthorDate: Fri Apr 26 11:59:26 2019 -0500 [backup] Add more metadata fields This patch adds more metadata fields to the metadata file for each backup. The following was added: - table_id: Can be used in the future to handle dropped or renamed tables gracefully. - comment: Used to backup and restore column comments. - column_ids: Can be used in the future to handle dropped or renamed columns gracefully. Change-Id: I42458f598a523596acb9f18558e6f518719a969b Reviewed-on: http://gerrit.cloudera.org:8080/13130 Tested-by: Grant Henke <[email protected]> Reviewed-by: Adar Dembo <[email protected]> Reviewed-by: Mike Percy <[email protected]> --- java/kudu-backup/src/main/protobuf/backup.proto | 14 ++++++++++---- .../org/apache/kudu/backup/TableMetadata.scala | 8 ++++++++ .../org/apache/kudu/backup/TestKuduBackup.scala | 1 + .../src/main/java/org/apache/kudu/Schema.java | 21 +++++++++++++++++++++ .../java/org/apache/kudu/util/SchemaGenerator.java | 3 ++- 5 files changed, 42 insertions(+), 5 deletions(-) diff --git a/java/kudu-backup/src/main/protobuf/backup.proto b/java/kudu-backup/src/main/protobuf/backup.proto index da2c794..5971e02 100644 --- a/java/kudu-backup/src/main/protobuf/backup.proto +++ b/java/kudu-backup/src/main/protobuf/backup.proto @@ -35,7 +35,6 @@ message ColumnTypeAttributesMetadataPB { // Maps to the ColumnSchema class. // The fields are effectively 1 to 1 mappings of those in ColumnSchema. -// TODO (KUDU-2788): How do we handle column additions? message ColumnMetadataPB { string name = 1; string type = 2; @@ -48,6 +47,7 @@ message ColumnMetadataPB { string encoding = 7; string compression = 8; int32 block_size = 9; + string comment = 10; } // A human readable string representation of a column value for use @@ -110,10 +110,16 @@ message TableMetadataPB { string data_format = 4; // The name of the table. string table_name = 5; + // The internal id of the table. + // This is useful for detecting dropped and added tables. + string table_id = 6; // The replication factor of this table. - int32 num_replicas = 6; + int32 num_replicas = 7; // The metadata for the table's columns. - repeated ColumnMetadataPB columns = 7; + repeated ColumnMetadataPB columns = 8; + // A map of column name to internal column id. + // This is useful for detecting dropped and added columns. + map<string, int32> column_ids = 9; // The metadata for the table's partitions. - PartitionMetadataPB partitions = 8; + PartitionMetadataPB partitions = 10; } \ No newline at end of file diff --git a/java/kudu-backup/src/main/scala/org/apache/kudu/backup/TableMetadata.scala b/java/kudu-backup/src/main/scala/org/apache/kudu/backup/TableMetadata.scala index 0d2c315..f09e3d2 100644 --- a/java/kudu-backup/src/main/scala/org/apache/kudu/backup/TableMetadata.scala +++ b/java/kudu-backup/src/main/scala/org/apache/kudu/backup/TableMetadata.scala @@ -17,6 +17,7 @@ package org.apache.kudu.backup import java.math.BigDecimal +import java.util import com.google.protobuf.StringValue import org.apache.commons.net.util.Base64 @@ -35,6 +36,7 @@ import org.apache.yetus.audience.InterfaceAudience import org.apache.yetus.audience.InterfaceStability import scala.collection.JavaConverters._ +import scala.collection.mutable @InterfaceAudience.Private @InterfaceStability.Unstable @@ -44,7 +46,9 @@ object TableMetadata { val MetadataVersion = 1 def getTableMetadata(table: KuduTable, options: BackupOptions): TableMetadataPB = { + val columnIds = new util.HashMap[String, Integer]() val columns = table.getSchema.getColumns.asScala.map { col => + columnIds.put(col.getName, table.getSchema.getColumnId(col.getName)) val builder = ColumnMetadataPB .newBuilder() .setName(col.getName) @@ -54,6 +58,7 @@ object TableMetadata { .setEncoding(col.getEncoding.toString) .setCompression(col.getCompressionAlgorithm.toString) .setBlockSize(col.getDesiredBlockSize) + .setComment(col.getComment) if (col.getTypeAttributes != null) { builder.setTypeAttributes(getTypeAttributesMetadata(col)) } @@ -70,7 +75,9 @@ object TableMetadata { .setToMs(options.toMs) .setDataFormat(options.format) .setTableName(table.getName) + .setTableId(table.getTableId) .addAllColumns(columns.asJava) + .putAllColumnIds(columnIds) .setNumReplicas(table.getNumReplicas) .setPartitions(getPartitionMetadata(table)) .build() @@ -174,6 +181,7 @@ object TableMetadata { .encoding(Encoding.valueOf(col.getEncoding)) .compressionAlgorithm(CompressionAlgorithm.valueOf(col.getCompression)) .desiredBlockSize(col.getBlockSize) + .comment(col.getComment) if (col.hasDefaultValue) { val value = valueFromString(col.getDefaultValue.getValue, colType) diff --git a/java/kudu-backup/src/test/scala/org/apache/kudu/backup/TestKuduBackup.scala b/java/kudu-backup/src/test/scala/org/apache/kudu/backup/TestKuduBackup.scala index 239194f..7533251 100644 --- a/java/kudu-backup/src/test/scala/org/apache/kudu/backup/TestKuduBackup.scala +++ b/java/kudu-backup/src/test/scala/org/apache/kudu/backup/TestKuduBackup.scala @@ -425,6 +425,7 @@ class TestKuduBackup extends KuduTestSuite { Objects .equal(before.getCompressionAlgorithm, after.getCompressionAlgorithm) && Objects.equal(before.getTypeAttributes, after.getTypeAttributes) + Objects.equal(before.getComment, after.getComment) } // Special handling because default values can be a byte array which is not diff --git a/java/kudu-client/src/main/java/org/apache/kudu/Schema.java b/java/kudu-client/src/main/java/org/apache/kudu/Schema.java index 0be52aa..630a68d 100644 --- a/java/kudu-client/src/main/java/org/apache/kudu/Schema.java +++ b/java/kudu-client/src/main/java/org/apache/kudu/Schema.java @@ -60,6 +60,11 @@ public class Schema { private final Map<Integer, Integer> columnsById; /** + * Mapping of column name to column ID, or null if the schema does not have assigned column IDs. + */ + private final Map<String, Integer> columnIdByName; + + /** * Mapping of column index to backing byte array offset. */ private final int[] columnOffsets; @@ -105,6 +110,7 @@ public class Schema { this.columnOffsets = new int[columns.size()]; this.columnsByName = new HashMap<>(columns.size()); this.columnsById = hasColumnIds ? new HashMap<Integer, Integer>(columnIds.size()) : null; + this.columnIdByName = hasColumnIds ? new HashMap<String, Integer>(columnIds.size()) : null; int offset = 0; boolean hasNulls = false; int isDeletedIndex = NO_IS_DELETED_INDEX; @@ -131,6 +137,10 @@ public class Schema { throw new IllegalArgumentException( String.format("Column IDs must be unique: %s", columnIds)); } + if (this.columnIdByName.put(column.getName(), columnIds.get(index)) != null) { + throw new IllegalArgumentException( + String.format("Column names must be unique: %s", columnIds)); + } } // If this is the IS_DELETED virtual column, set `hasIsDeleted` and `isDeletedIndex`. @@ -308,6 +318,17 @@ public class Schema { } /** + * Get the internal column ID for a column name. + * @param columnName column's name + * @return the column ID + */ + @InterfaceAudience.Private + @InterfaceStability.Unstable + public int getColumnId(String columnName) { + return columnIdByName.get(columnName); + } + + /** * Creates a new partial row for the schema. * @return a new partial row */ diff --git a/java/kudu-client/src/main/java/org/apache/kudu/util/SchemaGenerator.java b/java/kudu-client/src/main/java/org/apache/kudu/util/SchemaGenerator.java index 6ac21ec..bb5c13d 100644 --- a/java/kudu-client/src/main/java/org/apache/kudu/util/SchemaGenerator.java +++ b/java/kudu-client/src/main/java/org/apache/kudu/util/SchemaGenerator.java @@ -113,7 +113,8 @@ public class SchemaGenerator { .nullable(random.nextBoolean() && !key) .compressionAlgorithm(randomCompression()) .desiredBlockSize(randomBlockSize()) - .encoding(randomEncoding(type)); + .encoding(randomEncoding(type)) + .comment("A " + type.getName() + " column for " + name); ColumnTypeAttributes typeAttributes = null; if (type == Type.DECIMAL) {
