Repository: hive Updated Branches: refs/heads/branch-3 813ac0b86 -> 86063301c
HIVE-19485: dump directory for non native tables should not be created (Anishek Agarwal, reviewed by Mahesh Kumar Behera,Sankar Hariappan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/86063301 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/86063301 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/86063301 Branch: refs/heads/branch-3 Commit: 86063301ceea98352464242f0fe05b0130567c14 Parents: 813ac0b Author: Anishek Agarwal <anis...@gmail.com> Authored: Thu May 31 16:04:41 2018 +0530 Committer: Anishek Agarwal <anis...@gmail.com> Committed: Thu May 31 16:04:41 2018 +0530 ---------------------------------------------------------------------- ...TestReplicationScenariosAcrossInstances.java | 25 +++++++++ .../apache/hadoop/hive/ql/exec/ExportTask.java | 2 +- .../hive/ql/parse/repl/dump/TableExport.java | 53 ++++++++++++-------- 3 files changed, 58 insertions(+), 22 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/86063301/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java ---------------------------------------------------------------------- diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java index f5dd0a9..ebcb0d6 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java @@ -21,6 +21,7 @@ import org.apache.commons.lang3.RandomStringUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hive.conf.HiveConf; @@ -57,6 +58,7 @@ import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.is; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; public class TestReplicationScenariosAcrossInstances { @@ -779,4 +781,27 @@ public class TestReplicationScenariosAcrossInstances { Partition uk = replica.getPartition(replicatedDbName, "t2", Collections.singletonList("uk")); verifyIfCkptSet(uk.getParameters(), tuple.dumpLocation); } + + @Test + public void shouldNotCreateDirectoryForNonNativeTableInDumpDirectory() throws Throwable { + String createTableQuery = + "CREATE TABLE custom_serdes( serde_id bigint COMMENT 'from deserializer', name string " + + "COMMENT 'from deserializer', slib string COMMENT 'from deserializer') " + + "ROW FORMAT SERDE 'org.apache.hive.storage.jdbc.JdbcSerDe' " + + "STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler' " + + "WITH SERDEPROPERTIES ('serialization.format'='1') " + + "TBLPROPERTIES ( " + + "'hive.sql.database.type'='METASTORE', " + + "'hive.sql.query'='SELECT \"SERDE_ID\", \"NAME\", \"SLIB\" FROM \"SERDES\"')"; + + WarehouseInstance.Tuple bootstrapTuple = primary + .run("use " + primaryDbName) + .run(createTableQuery).dump(primaryDbName, null); + Path cSerdesTableDumpLocation = new Path( + new Path(bootstrapTuple.dumpLocation, primaryDbName), + "custom_serdes"); + FileSystem fs = cSerdesTableDumpLocation.getFileSystem(primary.hiveConf); + assertFalse(fs.exists(cSerdesTableDumpLocation)); + } + } http://git-wip-us.apache.org/repos/asf/hive/blob/86063301/ql/src/java/org/apache/hadoop/hive/ql/exec/ExportTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExportTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExportTask.java index aba6591..e3af4f9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExportTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExportTask.java @@ -51,7 +51,7 @@ public class ExportTask extends Task<ExportWork> implements Serializable { new TableExport.Paths(work.getAstRepresentationForErrorMsg(), work.getExportRootDir(), conf, false); Hive db = getHive(); - LOG.debug("Exporting data to: {}", exportPaths.getExportRootDir()); + LOG.debug("Exporting data to: {}", exportPaths.exportRootDir()); work.acidPostProcess(db); TableExport tableExport = new TableExport( exportPaths, work.getTableSpec(), work.getReplicationSpec(), db, null, conf http://git-wip-us.apache.org/repos/asf/hive/blob/86063301/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java index d0aeee5..e801a64 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java @@ -172,9 +172,10 @@ public class TableExport { public static class Paths { private final String astRepresentationForErrorMsg; private final HiveConf conf; - private final Path exportRootDir; + //variable access should not be done and use exportRootDir() instead. + private final Path _exportRootDir; private final FileSystem exportFileSystem; - private boolean writeData; + private boolean writeData, exportRootDirCreated = false; public Paths(String astRepresentationForErrorMsg, Path dbRoot, String tblName, HiveConf conf, boolean shouldWriteData) throws SemanticException { @@ -184,12 +185,9 @@ public class TableExport { Path tableRoot = new Path(dbRoot, tblName); URI exportRootDir = EximUtil.getValidatedURI(conf, tableRoot.toUri().toString()); validateTargetDir(exportRootDir); - this.exportRootDir = new Path(exportRootDir); + this._exportRootDir = new Path(exportRootDir); try { - this.exportFileSystem = this.exportRootDir.getFileSystem(conf); - if (!exportFileSystem.exists(this.exportRootDir) && writeData) { - exportFileSystem.mkdirs(this.exportRootDir); - } + this.exportFileSystem = this._exportRootDir.getFileSystem(conf); } catch (IOException e) { throw new SemanticException(e); } @@ -199,20 +197,37 @@ public class TableExport { boolean shouldWriteData) throws SemanticException { this.astRepresentationForErrorMsg = astRepresentationForErrorMsg; this.conf = conf; - this.exportRootDir = new Path(EximUtil.getValidatedURI(conf, path)); + this._exportRootDir = new Path(EximUtil.getValidatedURI(conf, path)); this.writeData = shouldWriteData; try { - this.exportFileSystem = exportRootDir.getFileSystem(conf); - if (!exportFileSystem.exists(this.exportRootDir) && writeData) { - exportFileSystem.mkdirs(this.exportRootDir); - } + this.exportFileSystem = _exportRootDir.getFileSystem(conf); } catch (IOException e) { throw new SemanticException(e); } } Path partitionExportDir(String partitionName) throws SemanticException { - return exportDir(new Path(exportRootDir, partitionName)); + return exportDir(new Path(exportRootDir(), partitionName)); + } + + /** + * Access to the {@link #_exportRootDir} should only be done via this method + * since the creation of the directory is delayed until we figure out if we want + * to write something or not. This is specifically important to prevent empty non-native + * directories being created in repl dump. + */ + public Path exportRootDir() throws SemanticException { + if (!exportRootDirCreated) { + try { + if (!exportFileSystem.exists(this._exportRootDir) && writeData) { + exportFileSystem.mkdirs(this._exportRootDir); + } + exportRootDirCreated = true; + } catch (IOException e) { + throw new SemanticException(e); + } + } + return _exportRootDir; } private Path exportDir(Path exportDir) throws SemanticException { @@ -227,8 +242,8 @@ public class TableExport { } } - private Path metaDataExportFile() { - return new Path(exportRootDir, EximUtil.METADATA_NAME); + private Path metaDataExportFile() throws SemanticException { + return new Path(exportRootDir(), EximUtil.METADATA_NAME); } /** @@ -236,7 +251,7 @@ public class TableExport { * Partition's data export directory is created within the export semantics of partition. */ private Path dataExportDir() throws SemanticException { - return exportDir(new Path(getExportRootDir(), EximUtil.DATA_PATH_NAME)); + return exportDir(new Path(exportRootDir(), EximUtil.DATA_PATH_NAME)); } /** @@ -269,10 +284,6 @@ public class TableExport { throw new SemanticException(astRepresentationForErrorMsg, e); } } - - public Path getExportRootDir() { - return exportRootDir; - } } public static class AuthEntities { @@ -306,7 +317,7 @@ public class TableExport { authEntities.inputs.add(new ReadEntity(tableSpec.tableHandle)); } } - authEntities.outputs.add(toWriteEntity(paths.getExportRootDir(), conf)); + authEntities.outputs.add(toWriteEntity(paths.exportRootDir(), conf)); } catch (Exception e) { throw new SemanticException(e); }