This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 8776b742d8c2cbabaaab280b8654ecf3020b14a9 Author: Will Berkeley <[email protected]> AuthorDate: Wed May 15 16:32:59 2019 -0700 [backup] Add a basic CLI tool This adds a CLI tool. For a set of tables, it can - Print out the latest backup - Print out the sequence of backups that would be used by a restore job - Print out all backups, ordered by end time An example invocation looks like $ java -cp <elided> org.apache.kudu.backup.KuduBackupCLI \ --rootPath=hdfs:///kudu-backups \ list_restore_sequence \ default.loadgen_auto_69c4838542ea481d81e03e8f66f7731e foo table name | table id | end time | start timestamp | end timestamp | type -------------------------------------------------------+----------------------------------+------------------------------+-----------------+---------------+------------- default.loadgen_auto_69c4838542ea481d81e03e8f66f7731e | b46191d75c8d46bab6dc76ba0bd345ba | 2019-05-13T09:38:54.642-0700 | 0 | 1557765534642 | full default.loadgen_auto_69c4838542ea481d81e03e8f66f7731e | b46191d75c8d46bab6dc76ba0bd345ba | 2019-05-13T10:33:23.669-0700 | 1557765534642 | 1557768803669 | incremental default.loadgen_auto_69c4838542ea481d81e03e8f66f7731e | b46191d75c8d46bab6dc76ba0bd345ba | 2019-05-13T11:07:25.358-0700 | 1557768803669 | 1557770845358 | incremental table name | table id | end time | start timestamp | end timestamp | type -------------------------------------------------------+----------------------------------+------------------------------+-----------------+---------------+------ default.loadgen_auto_69c4838542ea481d81e03e8f66f7731e | 1e0ab43cb925473fbc5acf86db12eb56 | 2019-05-13T14:09:01.518-0700 | 0 | 1557781741518 | full No backups were found for 1 table(s): foo Notice how, due to table renames, more than one table can match a table name, and, of course, there may be no table for a name. Another example: $ java -cp <elided> org.apache.kudu.backup.KuduBackupCLI \ --rootPath=hdfs:///kudu-backup-tests-again \ list_latest \ default.loadgen_auto_69c4838542ea481d81e03e8f66f7731e table name | table id | end time | start timestamp | end timestamp | type -------------------------------------------------------+----------------------------------+------------------------------+-----------------+---------------+------------- default.loadgen_auto_69c4838542ea481d81e03e8f66f7731e | b46191d75c8d46bab6dc76ba0bd345ba | 2019-05-13T11:07:25.358-0700 | 1557768803669 | 1557770845358 | incremental default.loadgen_auto_69c4838542ea481d81e03e8f66f7731e | 1e0ab43cb925473fbc5acf86db12eb56 | 2019-05-13T14:09:01.518-0700 | 0 | 1557781741518 | full The tool also supports TSV and CSV output, and listing out information about all backups by omitting an explicit list of tables. Change-Id: Ib5a4d3b44c77a06fa1b5d2b09506ba5a6e1b52c1 Reviewed-on: http://gerrit.cloudera.org:8080/13356 Reviewed-by: Mike Percy <[email protected]> Tested-by: Will Berkeley <[email protected]> --- .../scala/org/apache/kudu/backup/BackupGraph.scala | 7 + .../scala/org/apache/kudu/backup/BackupIO.scala | 15 +- .../org/apache/kudu/backup/KuduBackupCLI.scala | 193 ++++++++++++++++++ .../org/apache/kudu/backup/TestKuduBackupCLI.scala | 219 +++++++++++++++++++++ 4 files changed, 430 insertions(+), 4 deletions(-) diff --git a/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/BackupGraph.scala b/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/BackupGraph.scala index 13bb9e3..be6207f 100644 --- a/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/BackupGraph.scala +++ b/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/BackupGraph.scala @@ -64,6 +64,13 @@ class BackupGraph(val tableId: String) { } /** + * @return all the backups in the graph. + */ + def allBackups: Seq[BackupNode] = { + adjacencyList.values.flatten.toSeq + } + + /** * @return the most recent full backup. * @throws IllegalStateException if no full backup exists. */ diff --git a/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/BackupIO.scala b/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/BackupIO.scala index 43a359d..1cf3140 100644 --- a/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/BackupIO.scala +++ b/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/BackupIO.scala @@ -68,9 +68,9 @@ class BackupIO(val conf: Configuration, rootPathStr: String) { /** * Return the path to the table directory. */ - def tablePath(table: KuduTable): Path = { - val tableName = URLEncoder.encode(table.getName, "UTF-8") - val dirName = s"${table.getTableId}-$tableName" + def tablePath(tableId: String, tableName: String): Path = { + val encodedTableName = URLEncoder.encode(tableName, "UTF-8") + val dirName = s"$tableId-$encodedTableName" new Path(rootPath, dirName) } @@ -78,7 +78,7 @@ class BackupIO(val conf: Configuration, rootPathStr: String) { * Return the backup path for a table and time. */ def backupPath(table: KuduTable, timestampMs: Long): Path = { - new Path(tablePath(table), timestampMs.toString) + new Path(tablePath(table.getTableId, table.getName), timestampMs.toString) } /** @@ -101,6 +101,13 @@ class BackupIO(val conf: Configuration, rootPathStr: String) { } /** + * Reads all of the backup graphs. + */ + def readAllBackupGraphs(): Seq[BackupGraph] = { + buildBackupGraphs(listAllTableDirs(), System.currentTimeMillis()) + } + + /** * Reads all of the backup graphs for a given list of table names and a time filter. */ def readBackupGraphsByTableName( diff --git a/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/KuduBackupCLI.scala b/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/KuduBackupCLI.scala new file mode 100644 index 0000000..c77abc3 --- /dev/null +++ b/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/KuduBackupCLI.scala @@ -0,0 +1,193 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.kudu.backup + +import java.text.SimpleDateFormat + +import org.apache.hadoop.conf.Configuration +import scopt.OptionParser + +// The possible backup CLI tool actions. +object Action extends Enumeration { + val LIST_LATEST, LIST_RESTORE_SEQUENCE, LIST_ALL = Value +} + +// The possible backup CLI print formats. +object Format extends Enumeration { + val PRETTY, TSV, CSV = Value +} + +case class BackupCLIOptions( + action: Action.Value, + format: Format.Value, + tables: Seq[String], + rootPath: String) + +object BackupCLIOptions { + + val ProgramName: String = + KuduBackupCLI.getClass.getCanonicalName.dropRight(1) // Remove trailing `$` + + val parser: OptionParser[BackupCLIOptions] = + new OptionParser[BackupCLIOptions](ProgramName) { + opt[String]("rootPath") + .action((v, o) => o.copy(rootPath = v)) + .text("The root path to search for backups. Accepts any Hadoop compatible path.") + .required() + + arg[String]("format") + .validate(validateEnumeratedOption("format", Format.values.map(_.toString.toLowerCase))) + .action((v, o) => o.copy(format = Format.withName(v.toUpperCase))) + .text("The output format. One of 'pretty', 'tsv', 'csv'.") + .optional() + + arg[String]("<action>") + .validate(validateEnumeratedOption("action", Action.values.map(_.toString.toLowerCase))) + .action((v, o) => o.copy(action = Action.withName(v.toUpperCase))) + .text("The action to perform. One of 'list_latest', 'list_restore_sequence', 'list_all'.") + + arg[String]("<table>...") + .unbounded() + .action((v, o) => o.copy(tables = o.tables :+ v)) + .text("A list of tables about which to print backup information. Specifying no tables includes all tables.") + .optional() + + help("help").text("Prints this usage text") + } + + def validateEnumeratedOption( + name: String, + optionStrings: Iterable[String]): String => Either[String, Unit] = + (v: String) => { + if (optionStrings.exists(_.equalsIgnoreCase(v))) { + Right(()) + } else { + Left(s"$name must be one of ${optionStrings.mkString(", ")}: $v") + } + } + + def parse(args: Seq[String]): Option[BackupCLIOptions] = { + parser.parse(args, BackupCLIOptions(null, Format.PRETTY, Seq(), null)) + } +} + +object KuduBackupCLI { + + // The header for all tables printed by the tool. + val HEADER: Seq[String] = + Seq("table name", "table id", "end time", "start timestamp", "end timestamp", "type") + + // Run the backup CLI tool with the given options. Like a command, returns 0 if successful, or + // a nonzero error code. + def run(options: BackupCLIOptions): Int = { + // Sort by table name for a consistent ordering (at least if there's no duplicate names). + val sortedTables = options.tables.sorted + + val io: BackupIO = new BackupIO(new Configuration(), options.rootPath) + val backupGraphs = + if (sortedTables.isEmpty) + io.readAllBackupGraphs() + else + io.readBackupGraphsByTableName(sortedTables) + + options.action match { + case Action.LIST_LATEST => { + val rows = backupGraphs.map(graph => rowForBackupNode(graph.restorePath.lastBackup)) + printTable(options.format, rows) + } + case Action.LIST_RESTORE_SEQUENCE => { + val tablesOfBackups = + backupGraphs.map(_.restorePath.backups.map(node => rowForBackupNode(node))) + tablesOfBackups.foreach(table => printTable(options.format, table)) + } + case Action.LIST_ALL => { + val tablesOfBackups = backupGraphs.map( + _.allBackups.sortBy(node => node.metadata.getToMs).map(node => rowForBackupNode(node))) + tablesOfBackups.foreach(table => printTable(options.format, table)) + } + } + // Because of renames, one table name might map to multiple backup directories, so it's not + // sufficient to check the size of 'options.tables' against the size of 'backupGraphs'. + val foundTables = backupGraphs.map(graph => graph.backupBase.metadata.getTableName).toSet + val notFoundTables = options.tables.filter(table => !foundTables.contains(table)) + if (notFoundTables.nonEmpty) { + Console.err.println(s"No backups were found for ${notFoundTables.size} table(s):") + notFoundTables.foreach(Console.err.println) + return 1 + } + 0 + } + + private def rowForBackupNode(backup: BackupNode): Seq[String] = { + val metadata = backup.metadata + val tableName = metadata.getTableName + val tableId = metadata.getTableId + val fromMs = metadata.getFromMs + val toMs = metadata.getToMs + val toDatetime = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ").format(toMs) + val backupType = if (fromMs == 0) "full" else "incremental" + Seq(tableName, tableId, toDatetime, s"$fromMs", s"$toMs", backupType) + } + + private def formatDsv(delimiter: String, table: Seq[Seq[String]]): String = { + table.map(_.mkString(delimiter)).mkString("\n") + } + + private def formatPrettyTable(table: Seq[Seq[String]]): String = { + if (table.isEmpty) { + return "" + } + // The width of a column is the width of largest cell, plus a padding of 2. + val colWidths = table.transpose.map(_.map(_.length).max + 2) + val rows = table.map { row => + (row, colWidths).zipped + .map { + // 1 space on left, then pad to (padding - 1) spaces. + case (cell, width) => s" %-${width - 1}s".format(cell) + } + .mkString("|") + } + val separatorRow = colWidths.map("-" * _).mkString("+") + (rows.head +: separatorRow +: rows.tail).mkString("\n") + } + + private def printTable(format: Format.Value, rows: Seq[Seq[String]]): Unit = { + if (rows.isEmpty) { + return + } + val table = HEADER +: rows + format match { + case Format.PRETTY => { + println(formatPrettyTable(table)) + } + case Format.TSV => { + println(formatDsv("\t", table)) + } + case Format.CSV => { + println(formatDsv(",", table)) + } + } + println() // Spacing after the table. + } + + def main(args: Array[String]): Unit = { + val options = BackupCLIOptions + .parse(args) + .getOrElse(throw new IllegalArgumentException("could not parse the arguments")) + System.exit(run(options)) + } +} diff --git a/java/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupCLI.scala b/java/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupCLI.scala new file mode 100644 index 0000000..8302c8c --- /dev/null +++ b/java/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupCLI.scala @@ -0,0 +1,219 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.kudu.backup + +import java.io.ByteArrayOutputStream +import java.io.PrintStream +import java.nio.file.Files +import java.nio.file.Path +import java.text.SimpleDateFormat + +import org.apache.commons.io.FileUtils +import org.apache.hadoop.fs.{Path => HPath} +import org.apache.hadoop.conf.Configuration +import org.junit.After +import org.junit.Assert._ +import org.junit.Before +import org.junit.Test +import org.slf4j.Logger +import org.slf4j.LoggerFactory + +import org.apache.kudu.backup.Backup.TableMetadataPB +import org.apache.kudu.backup.TableMetadata.MetadataVersion + +class TestKuduBackupCLI { + val log: Logger = LoggerFactory.getLogger(getClass) + + var rootDir: Path = _ + + @Before + def setUp(): Unit = { + rootDir = Files.createTempDirectory("backupcli") + } + + @After + def tearDown(): Unit = { + FileUtils.deleteDirectory(rootDir.toFile) + } + + // Create dummy table metadata and write it to the test directory. + private def createTableMetadata( + io: BackupIO, + tableName: String, + fromMs: Long, + toMs: Long): Unit = { + // Create dummy table metadata with just enough information to be used to create a BackupGraph. + val tableId = s"id_$tableName" + val metadata = TableMetadataPB + .newBuilder() + .setVersion(MetadataVersion) + .setFromMs(fromMs) + .setToMs(toMs) + .setTableName(tableName) + .setTableId(tableId) + .build() + val backupPath = new HPath(io.tablePath(tableId, tableName), s"$toMs") + val metadataPath = io.backupMetadataPath(backupPath) + io.writeTableMetadata(metadata, metadataPath) + } + + // Helper to write a standard collection of backup metadata useful for a few tests. + private def createStandardTableMetadata(io: BackupIO): Unit = { + Seq( + // Two fulls and one incremental for 'taco' table. + ("taco", 0, 100), + ("taco", 0, 1000), + ("taco", 100, 2000), + // One full and two incrementals for 'pizza' table. + ("pizza", 0, 200), + ("pizza", 200, 400), + ("pizza", 400, 600) + ).foreach { + case (tableName: String, fromMs: Int, toMs: Int) => + createTableMetadata(io, tableName, fromMs, toMs) + } + } + + // Helper to format the end time column, since its value depends on the timezone of the machine + // where the tool is run. + private def endTime(toMs: Long): String = { + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ").format(toMs) + } + + @Test + def testListAllBackups(): Unit = { + val io = new BackupIO(new Configuration(), rootDir.toUri.toString) + createStandardTableMetadata(io) + + val options = + BackupCLIOptions(Action.LIST_ALL, Format.CSV, Seq(), rootDir.toUri.toString) + val stdout = new ByteArrayOutputStream + Console.withOut(new PrintStream(stdout)) { + assertEquals(0, KuduBackupCLI.run(options)) + } + + val headerString = KuduBackupCLI.HEADER.mkString(",") + val expected = Seq( + headerString, + s"pizza,id_pizza,${endTime(200)},0,200,full", + s"pizza,id_pizza,${endTime(400)},200,400,incremental", + s"pizza,id_pizza,${endTime(600)},400,600,incremental", + "", + headerString, + s"taco,id_taco,${endTime(100)},0,100,full", + s"taco,id_taco,${endTime(1000)},0,1000,full", + s"taco,id_taco,${endTime(2000)},100,2000,incremental" + ).mkString("\n") + assertEquals(expected, stdout.toString.trim) + } + + @Test + def testListLatestBackups(): Unit = { + val io = new BackupIO(new Configuration(), rootDir.toUri.toString) + createStandardTableMetadata(io) + + val options = + BackupCLIOptions(Action.LIST_LATEST, Format.CSV, Seq(), rootDir.toUri.toString) + val stdout = new ByteArrayOutputStream + Console.withOut(new PrintStream(stdout)) { + assertEquals(0, KuduBackupCLI.run(options)) + } + + val headerString = KuduBackupCLI.HEADER.mkString(",") + val expected = Seq( + headerString, + s"pizza,id_pizza,${endTime(600)},400,600,incremental", + s"taco,id_taco,${endTime(2000)},100,2000,incremental" + ).mkString("\n") + assertEquals(expected, stdout.toString.trim) + } + + @Test + def testListRestorePath(): Unit = { + val io = new BackupIO(new Configuration(), rootDir.toUri.toString) + createStandardTableMetadata(io) + + val options = + BackupCLIOptions(Action.LIST_RESTORE_SEQUENCE, Format.CSV, Seq(), rootDir.toUri.toString) + val stdout = new ByteArrayOutputStream + Console.withOut(new PrintStream(stdout)) { + assertEquals(0, KuduBackupCLI.run(options)) + } + + val headerString = KuduBackupCLI.HEADER.mkString(",") + val expected = Seq( + headerString, + s"pizza,id_pizza,${endTime(200)},0,200,full", + s"pizza,id_pizza,${endTime(400)},200,400,incremental", + s"pizza,id_pizza,${endTime(600)},400,600,incremental", + "", + headerString, + s"taco,id_taco,${endTime(100)},0,100,full", + s"taco,id_taco,${endTime(2000)},100,2000,incremental" + ).mkString("\n") + assertEquals(expected, stdout.toString.trim) + } + + @Test + def testTableFilter(): Unit = { + val io = new BackupIO(new Configuration(), rootDir.toUri.toString) + createStandardTableMetadata(io) + + val options = + BackupCLIOptions(Action.LIST_ALL, Format.CSV, Seq("taco"), rootDir.toUri.toString) + val stdout = new ByteArrayOutputStream + Console.withOut(new PrintStream(stdout)) { + assertEquals(0, KuduBackupCLI.run(options)) + } + + val headerString = KuduBackupCLI.HEADER.mkString(",") + val expected = Seq( + headerString, + s"taco,id_taco,${endTime(100)},0,100,full", + s"taco,id_taco,${endTime(1000)},0,1000,full", + s"taco,id_taco,${endTime(2000)},100,2000,incremental" + ).mkString("\n") + assertEquals(expected, stdout.toString.trim) + } + + @Test + def testMissingTable(): Unit = { + val io = new BackupIO(new Configuration(), rootDir.toUri.toString) + createStandardTableMetadata(io) + + val options = + BackupCLIOptions(Action.LIST_ALL, Format.CSV, Seq("pizza", "nope"), rootDir.toUri.toString) + val stdout = new ByteArrayOutputStream + val stderr = new ByteArrayOutputStream + Console.withOut(new PrintStream(stdout)) { + Console.withErr(new PrintStream(stderr)) { + assertEquals(1, KuduBackupCLI.run(options)) + } + } + + val headerString = KuduBackupCLI.HEADER.mkString(",") + val expected = Seq( + headerString, + s"pizza,id_pizza,${endTime(200)},0,200,full", + s"pizza,id_pizza,${endTime(400)},200,400,incremental", + s"pizza,id_pizza,${endTime(600)},400,600,incremental" + ).mkString("\n") + assertEquals(expected, stdout.toString.trim) + + assertEquals("No backups were found for 1 table(s):\nnope", stderr.toString.trim) + } +}
