This is an automated email from the ASF dual-hosted git repository. granthenke pushed a commit to branch branch-1.10.x in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 0c9189be0ede1cfe413c8a9898ea3017f5f76722 Author: Grant Henke <[email protected]> AuthorDate: Thu Jun 20 14:25:59 2019 -0500 [backup] Fix backup tools packaging This patch adjusts how we package the kudu-backup-tools module. Instead of building a fat jar that contains all of the hadoop classes, the hadoop classes are now marked as provided and therfore not included. This change was made with two concerns in mind. Primarily because the defacto way to load the environments hadoop configuration is from the classpath via `hadoop classpath`. A secondary benefit is that loading the classpath this way avoids any potential hadoop compatibility issues. An example execution of the jar would now be: `java -cp $(hadoop classpath):kudu-backup-tools-1.10.0.jar org.apache.kudu.backup.KuduBackupCLI` Additionally this patch adds integration tests for the `hdfs` uri in the tests using a hadoop-minicluster. Change-Id: I334a38bb5d7252b5e6d2722c4f1fc1d3b5622db7 Reviewed-on: http://gerrit.cloudera.org:8080/13691 Tested-by: Kudu Jenkins Reviewed-by: Mike Percy <[email protected]> (cherry picked from commit ef921b2c35589e75de4c3fcb2673e38123d9fb94) Reviewed-on: http://gerrit.cloudera.org:8080/13705 Tested-by: Grant Henke <[email protected]> --- java/gradle/dependencies.gradle | 1 + java/gradle/shadow.gradle | 18 ------ java/kudu-backup-common/build.gradle | 5 +- java/kudu-backup-tools/build.gradle | 21 +------ .../org/apache/kudu/backup/KuduBackupLister.scala | 2 +- .../apache/kudu/backup/TestKuduBackupCleaner.scala | 59 ++++++++++++----- .../apache/kudu/backup/TestKuduBackupLister.scala | 73 ++++++++++++++-------- 7 files changed, 99 insertions(+), 80 deletions(-) diff --git a/java/gradle/dependencies.gradle b/java/gradle/dependencies.gradle index c80763e..221e055 100755 --- a/java/gradle/dependencies.gradle +++ b/java/gradle/dependencies.gradle @@ -85,6 +85,7 @@ libs += [ guava : "com.google.guava:guava:$versions.guava", hadoopClient : "org.apache.hadoop:hadoop-client:$versions.hadoop", hadoopCommon : "org.apache.hadoop:hadoop-common:$versions.hadoop", + hadoopMiniCluster : "org.apache.hadoop:hadoop-minicluster:$versions.hadoop", hadoopMRClientCommon : "org.apache.hadoop:hadoop-mapreduce-client-common:$versions.hadoop", hadoopMRClientCore : "org.apache.hadoop:hadoop-mapreduce-client-core:$versions.hadoop", hamcrest : "org.hamcrest:hamcrest:$versions.hamcrest", diff --git a/java/gradle/shadow.gradle b/java/gradle/shadow.gradle index a46e678..bb414e5 100644 --- a/java/gradle/shadow.gradle +++ b/java/gradle/shadow.gradle @@ -76,27 +76,9 @@ configurations.create("compileUnshaded") configurations.shadow.extendsFrom(configurations.compileUnshaded) configurations.compile.extendsFrom(configurations.compileUnshaded) -// Define an overridable property to indicate tool jars that should -// include all of their dependencies. -// We use this below to ensure slf4j is included. -shadow.ext { - isToolJar = false -} - // We use afterEvaluate to add additional configuration once all the definitions // in the projects build script have been applied afterEvaluate { - if (!shadow.isToolJar) { - shadowJar { - dependencies { - // Our shaded library jars always try to pull in the slf4j api from - // kudu-client, though we never want it included. Excluding it - // here prevents the need to always list it. - exclude dependency(libs.slf4jApi) - } - } - } - // Ensure compileUnshaded dependencies are included in the pom. [install, uploadArchives].each { task -> task.repositories.each { diff --git a/java/kudu-backup-common/build.gradle b/java/kudu-backup-common/build.gradle index 82c4248..65983ec 100644 --- a/java/kudu-backup-common/build.gradle +++ b/java/kudu-backup-common/build.gradle @@ -25,10 +25,11 @@ dependencies { // Make sure wrong Guava version is not pulled in. exclude group: "com.google.guava", module: "guava" } - compile libs.hadoopCommon - compile libs.scalaLibrary compile libs.slf4jApi + provided libs.hadoopCommon + provided libs.scalaLibrary + optional libs.yetusAnnotations testCompile project(path: ":kudu-test-utils", configuration: "shadow") diff --git a/java/kudu-backup-tools/build.gradle b/java/kudu-backup-tools/build.gradle index e08fb4e..c1eb612 100644 --- a/java/kudu-backup-tools/build.gradle +++ b/java/kudu-backup-tools/build.gradle @@ -18,37 +18,22 @@ apply plugin: "scala" apply from: "$rootDir/gradle/shadow.gradle" -// Mark this as a tool jar so shadow doesn't exclude any dependencies when shading. -shadow { - isToolJar = true -} - - -// Add the main class to the manifest so the jar can be run via `java -jar` -jar { - manifest { - attributes 'Main-Class': 'org.apache.kudu.backup.KuduBackupCLI' - } -} - dependencies { compile project(path: ":kudu-backup-common") compile project(path: ":kudu-client", configuration: "shadow") - compile libs.hadoopCommon compile (libs.scopt) { // Make sure wrong Scala version is not pulled in. exclude group: "org.scala-lang", module: "scala-library" } compile libs.scalaLibrary - - // Include the logging implementation given this is a standalone command line tool. compile libs.slf4jApi - compile libs.log4j - compile libs.log4jSlf4jImpl + + provided libs.hadoopClient optional libs.yetusAnnotations testCompile project(path: ":kudu-test-utils", configuration: "shadow") + testCompile libs.hadoopMiniCluster testCompile libs.junit testCompile libs.scalatest } diff --git a/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/KuduBackupLister.scala b/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/KuduBackupLister.scala index e2e4f34..94ac3f7 100644 --- a/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/KuduBackupLister.scala +++ b/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/KuduBackupLister.scala @@ -34,7 +34,7 @@ object KuduBackupLister { // Run the backup CLI tool with the given options. Like a command, returns 0 if successful, or // a nonzero error code. def run(options: BackupCLIOptions): Int = { - Preconditions.checkArgument(options.mode == Mode.LIST); + Preconditions.checkArgument(options.mode == Mode.LIST) // Sort by table name for a consistent ordering (at least if there's no duplicate names). val sortedTables = options.tables.sorted diff --git a/java/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupCleaner.scala b/java/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupCleaner.scala index 879d269..1362532 100644 --- a/java/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupCleaner.scala +++ b/java/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupCleaner.scala @@ -16,15 +16,17 @@ // under the License. package org.apache.kudu.backup +import java.io.File import java.nio.file.Files -import java.nio.file.Path import java.time.temporal.ChronoUnit import java.time.Duration import java.time.Instant import org.apache.commons.io.FileUtils import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.FileUtil import org.apache.hadoop.fs.{Path => HPath} +import org.apache.hadoop.hdfs.MiniDFSCluster import org.junit.After import org.junit.Assert._ import org.junit.Before @@ -32,20 +34,10 @@ import org.junit.Test import org.slf4j.Logger import org.slf4j.LoggerFactory -class TestKuduBackupCleaner { +abstract class BaseTestKuduBackupCleaner { val log: Logger = LoggerFactory.getLogger(getClass) - var rootPath: Path = _ - - @Before - def setUp(): Unit = { - rootPath = Files.createTempDirectory("backupcli") - } - - @After - def tearDown(): Unit = { - FileUtils.deleteDirectory(rootPath.toFile) - } + var rootPath: String = _ // Return the epoch time in milliseconds that is 'secsBefore' seconds before 'current'. private def epochMillisBefore(current: Instant, secsBefore: Long): Long = { @@ -54,7 +46,7 @@ class TestKuduBackupCleaner { @Test def testBackupCleaner(): Unit = { - val io = new BackupIO(new Configuration(), rootPath.toUri.toString) + val io = new BackupIO(new Configuration(), rootPath) val expirationAge = Duration.of(15, ChronoUnit.SECONDS) val now = Instant.now val tableName = "taco" @@ -123,13 +115,13 @@ class TestKuduBackupCleaner { } def createOptions( - rootPath: Path, + rootPath: String, expirationAge: Duration, tables: Seq[String] = Seq(), dryRun: Boolean = false, verbose: Boolean = false): BackupCLIOptions = { new BackupCLIOptions( - rootPath.toUri.toString, + rootPath, Mode.CLEAN, tables = tables, expirationAge = expirationAge, @@ -137,3 +129,38 @@ class TestKuduBackupCleaner { verbose = verbose) } } + +class LocalTestKuduBackupCleaner extends BaseTestKuduBackupCleaner { + + @Before + def setUp(): Unit = { + rootPath = Files.createTempDirectory("backupcli").toAbsolutePath.toString + } + + @After + def tearDown(): Unit = { + FileUtils.deleteDirectory(new File(rootPath)) + } +} + +class HDFSTestKuduBackupCleaner extends BaseTestKuduBackupCleaner { + var baseDir: File = _ + + @Before + def setUp(): Unit = { + baseDir = Files.createTempDirectory("hdfs-test").toFile.getAbsoluteFile + + // Create an HDFS mini-cluster. + val conf = new Configuration() + conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, baseDir.getAbsolutePath) + val hdfsCluster = new MiniDFSCluster.Builder(conf).build() + + // Set the root path to use the HDFS URI. + rootPath = "hdfs://localhost:" + hdfsCluster.getNameNodePort + "/" + } + + @After + def tearDown(): Unit = { + FileUtil.fullyDelete(baseDir) + } +} diff --git a/java/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupLister.scala b/java/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupLister.scala index cf75ba1..e6dd8a6 100644 --- a/java/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupLister.scala +++ b/java/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupLister.scala @@ -17,13 +17,15 @@ package org.apache.kudu.backup import java.io.ByteArrayOutputStream +import java.io.File import java.io.PrintStream import java.nio.file.Files -import java.nio.file.Path import java.text.SimpleDateFormat import org.apache.commons.io.FileUtils import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.FileUtil +import org.apache.hadoop.hdfs.MiniDFSCluster import org.junit.Assert._ import org.junit.After import org.junit.Before @@ -31,20 +33,10 @@ import org.junit.Test import org.slf4j.Logger import org.slf4j.LoggerFactory -class TestKuduBackupLister { +abstract class BaseTestKuduBackupLister { val log: Logger = LoggerFactory.getLogger(getClass) - var rootPath: Path = _ - - @Before - def setUp(): Unit = { - rootPath = Files.createTempDirectory("backupcli") - } - - @After - def tearDown(): Unit = { - FileUtils.deleteDirectory(rootPath.toFile) - } + var rootPath: String = _ // Helper to write a standard collection of backup metadata useful for a few tests. private def createStandardTableMetadata(io: BackupIO): Unit = { @@ -71,7 +63,7 @@ class TestKuduBackupLister { @Test def testListAllBackups(): Unit = { - val io = new BackupIO(new Configuration(), rootPath.toUri.toString) + val io = new BackupIO(new Configuration(), rootPath) createStandardTableMetadata(io) val options = createOptions(rootPath, ListType.ALL) @@ -97,7 +89,7 @@ class TestKuduBackupLister { @Test def testListLatestBackups(): Unit = { - val io = new BackupIO(new Configuration(), rootPath.toUri.toString) + val io = new BackupIO(new Configuration(), rootPath) createStandardTableMetadata(io) val options = createOptions(rootPath, ListType.LATEST) @@ -117,7 +109,7 @@ class TestKuduBackupLister { @Test def testListRestorePath(): Unit = { - val io = new BackupIO(new Configuration(), rootPath.toUri.toString) + val io = new BackupIO(new Configuration(), rootPath) createStandardTableMetadata(io) val options = createOptions(rootPath, ListType.RESTORE_SEQUENCE) @@ -142,7 +134,7 @@ class TestKuduBackupLister { @Test def testTableFilter(): Unit = { - val io = new BackupIO(new Configuration(), rootPath.toUri.toString) + val io = new BackupIO(new Configuration(), rootPath) createStandardTableMetadata(io) val options = createOptions(rootPath, ListType.ALL, Seq("taco")) @@ -163,7 +155,7 @@ class TestKuduBackupLister { @Test def testMissingTable(): Unit = { - val io = new BackupIO(new Configuration(), rootPath.toUri.toString) + val io = new BackupIO(new Configuration(), rootPath) createStandardTableMetadata(io) val options = createOptions(rootPath, ListType.ALL, Seq("pizza", "nope")) @@ -188,15 +180,46 @@ class TestKuduBackupLister { } def createOptions( - rootPath: Path, + rootPath: String, listType: ListType.Value, tables: Seq[String] = Seq(), format: Format.Value = Format.CSV): BackupCLIOptions = { - new BackupCLIOptions( - rootPath.toUri.toString, - Mode.LIST, - tables = tables, - listType = listType, - format = format) + new BackupCLIOptions(rootPath, Mode.LIST, tables = tables, listType = listType, format = format) + } +} + +class LocalTestKuduBackupLister extends BaseTestKuduBackupLister { + + @Before + def setUp(): Unit = { + rootPath = Files.createTempDirectory("local-test").toAbsolutePath.toString + } + + @After + def tearDown(): Unit = { + FileUtils.deleteDirectory(new File(rootPath)) + } +} + +class HDFSTestKuduBackupLister extends BaseTestKuduBackupLister { + + var baseDir: File = _ + + @Before + def setUp(): Unit = { + baseDir = Files.createTempDirectory("hdfs-test").toFile.getAbsoluteFile + + // Create an HDFS mini-cluster. + val conf = new Configuration() + conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, baseDir.getAbsolutePath) + val hdfsCluster = new MiniDFSCluster.Builder(conf).build() + + // Set the root path to use the HDFS URI. + rootPath = "hdfs://localhost:" + hdfsCluster.getNameNodePort + "/" + } + + @After + def tearDown(): Unit = { + FileUtil.fullyDelete(baseDir) } }
