Repository: hive Updated Branches: refs/heads/master b9e46bc4a -> 1bcc88f15
HIVE-18536 : IOW + DP is broken for insert-only ACID (Sergey Shelukhin, reviewed by Eugene Koifman) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1bcc88f1 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1bcc88f1 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1bcc88f1 Branch: refs/heads/master Commit: 1bcc88f15b20e2a808528d664d35c0d21d6c9388 Parents: b9e46bc Author: sergey <[email protected]> Authored: Mon Feb 5 16:16:54 2018 -0800 Committer: sergey <[email protected]> Committed: Mon Feb 5 16:16:54 2018 -0800 ---------------------------------------------------------------------- .../apache/hadoop/hive/common/JavaUtils.java | 35 ++++++++------------ .../apache/hadoop/hive/ql/exec/Utilities.java | 33 ++++++++++-------- .../apache/hadoop/hive/ql/metadata/Hive.java | 2 ++ 3 files changed, 35 insertions(+), 35 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/1bcc88f1/common/src/java/org/apache/hadoop/hive/common/JavaUtils.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/common/JavaUtils.java b/common/src/java/org/apache/hadoop/hive/common/JavaUtils.java index 9f64b3d..57afbf8 100644 --- a/common/src/java/org/apache/hadoop/hive/common/JavaUtils.java +++ b/common/src/java/org/apache/hadoop/hive/common/JavaUtils.java @@ -186,32 +186,24 @@ public final class JavaUtils { } public static class IdPathFilter implements PathFilter { - private String mmDirName; - private final boolean isMatch, isIgnoreTemp, isPrefix; + private String baseDirName, deltaDirName; + private final boolean isMatch, isIgnoreTemp, isDeltaPrefix; public IdPathFilter(long writeId, int stmtId, boolean isMatch) { - this(writeId, stmtId, isMatch, false, false); + this(writeId, stmtId, isMatch, false); } + public IdPathFilter(long writeId, int stmtId, boolean isMatch, boolean isIgnoreTemp) { - this(writeId, stmtId, isMatch, isIgnoreTemp, false); - } - public IdPathFilter(long writeId, int stmtId, boolean isMatch, boolean isIgnoreTemp, boolean isBaseDir) { - String mmDirName = null; - if (!isBaseDir) { - mmDirName = DELTA_PREFIX + "_" + String.format(DELTA_DIGITS, writeId) + "_" + - String.format(DELTA_DIGITS, writeId) + "_"; - if (stmtId >= 0) { - mmDirName += String.format(STATEMENT_DIGITS, stmtId); - isPrefix = false; - } else { - isPrefix = true; - } - } else { - mmDirName = BASE_PREFIX + "_" + String.format(DELTA_DIGITS, writeId); - isPrefix = false; + String deltaDirName = null; + deltaDirName = DELTA_PREFIX + "_" + String.format(DELTA_DIGITS, writeId) + "_" + + String.format(DELTA_DIGITS, writeId) + "_"; + isDeltaPrefix = (stmtId < 0); + if (!isDeltaPrefix) { + deltaDirName += String.format(STATEMENT_DIGITS, stmtId); } - this.mmDirName = mmDirName; + this.baseDirName = BASE_PREFIX + "_" + String.format(DELTA_DIGITS, writeId); + this.deltaDirName = deltaDirName; this.isMatch = isMatch; this.isIgnoreTemp = isIgnoreTemp; } @@ -219,7 +211,8 @@ public final class JavaUtils { @Override public boolean accept(Path path) { String name = path.getName(); - if ((isPrefix && name.startsWith(mmDirName)) || (!isPrefix && name.equals(mmDirName))) { + if (name.equals(baseDirName) || (isDeltaPrefix && name.startsWith(deltaDirName)) + || (!isDeltaPrefix && name.equals(deltaDirName))) { return isMatch; } if (isIgnoreTemp && name.length() > 0) { http://git-wip-us.apache.org/repos/asf/hive/blob/1bcc88f1/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index cf108e3..941dd58 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -4071,18 +4071,20 @@ public final class Utilities { } public static Path[] getMmDirectoryCandidates(FileSystem fs, Path path, int dpLevels, - int lbLevels, PathFilter filter, long txnId, int stmtId, Configuration conf, boolean isBaseDir) - throws IOException { + int lbLevels, PathFilter filter, long txnId, int stmtId, Configuration conf, + Boolean isBaseDir) throws IOException { int skipLevels = dpLevels + lbLevels; if (filter == null) { - filter = new JavaUtils.IdPathFilter(txnId, stmtId, true, false, isBaseDir); + filter = new JavaUtils.IdPathFilter(txnId, stmtId, true, false); } if (skipLevels == 0) { return statusToPath(fs.listStatus(path, filter)); } // TODO: for some reason, globStatus doesn't work for masks like "...blah/*/delta_0000007_0000007*" // the last star throws it off. So, for now, if stmtId is missing use recursion. - if (stmtId < 0 + // For the same reason, we cannot use it if we don't know isBaseDir. Currently, we don't + // /want/ to know isBaseDir because that is error prone; so, it ends up never being used. + if (stmtId < 0 || isBaseDir == null || (HiveConf.getBoolVar(conf, ConfVars.HIVE_MM_AVOID_GLOBSTATUS_ON_S3) && isS3(fs))) { return getMmDirectoryCandidatesRecursive(fs, path, skipLevels, filter); } @@ -4162,8 +4164,8 @@ public final class Utilities { return results.toArray(new Path[results.size()]); } - private static Path[] getMmDirectoryCandidatesGlobStatus(FileSystem fs, - Path path, int skipLevels, PathFilter filter, long txnId, int stmtId, boolean isBaseDir) throws IOException { + private static Path[] getMmDirectoryCandidatesGlobStatus(FileSystem fs, Path path, int skipLevels, + PathFilter filter, long txnId, int stmtId, boolean isBaseDir) throws IOException { StringBuilder sb = new StringBuilder(path.toUri().getPath()); for (int i = 0; i < skipLevels; i++) { sb.append(Path.SEPARATOR).append('*'); @@ -4180,10 +4182,10 @@ public final class Utilities { } private static void tryDeleteAllMmFiles(FileSystem fs, Path specPath, Path manifestDir, - int dpLevels, int lbLevels, JavaUtils.IdPathFilter filter, - long txnId, int stmtId, Configuration conf, boolean isBaseDir) throws IOException { + int dpLevels, int lbLevels, JavaUtils.IdPathFilter filter, long txnId, int stmtId, + Configuration conf) throws IOException { Path[] files = getMmDirectoryCandidates( - fs, specPath, dpLevels, lbLevels, filter, txnId, stmtId, conf, isBaseDir); + fs, specPath, dpLevels, lbLevels, filter, txnId, stmtId, conf, null); if (files != null) { for (Path path : files) { Utilities.FILE_OP_LOGGER.info("Deleting {} on failure", path); @@ -4220,8 +4222,10 @@ public final class Utilities { } } - private static Path getManifestDir(Path specPath, long txnId, int stmtId, String unionSuffix, boolean isInsertOverwrite) { - Path manifestPath = new Path(specPath, "_tmp." + + // TODO: we should get rid of isInsertOverwrite here too. + private static Path getManifestDir( + Path specPath, long txnId, int stmtId, String unionSuffix, boolean isInsertOverwrite) { + Path manifestPath = new Path(specPath, "_tmp." + AcidUtils.baseOrDeltaSubdir(isInsertOverwrite, txnId, txnId, stmtId)); return (unionSuffix == null) ? manifestPath : new Path(manifestPath, unionSuffix); @@ -4240,13 +4244,14 @@ public final class Utilities { public static void handleMmTableFinalPath(Path specPath, String unionSuffix, Configuration hconf, boolean success, int dpLevels, int lbLevels, MissingBucketsContext mbc, long txnId, int stmtId, - Reporter reporter, boolean isMmTable, boolean isMmCtas, boolean isInsertOverwrite) throws IOException, HiveException { + Reporter reporter, boolean isMmTable, boolean isMmCtas, boolean isInsertOverwrite) + throws IOException, HiveException { FileSystem fs = specPath.getFileSystem(hconf); Path manifestDir = getManifestDir(specPath, txnId, stmtId, unionSuffix, isInsertOverwrite); if (!success) { JavaUtils.IdPathFilter filter = new JavaUtils.IdPathFilter(txnId, stmtId, true); tryDeleteAllMmFiles(fs, specPath, manifestDir, dpLevels, lbLevels, - filter, txnId, stmtId, hconf, isInsertOverwrite); + filter, txnId, stmtId, hconf); return; } @@ -4269,7 +4274,7 @@ public final class Utilities { } Utilities.FILE_OP_LOGGER.debug("Looking for files in: {}", specPath); - JavaUtils.IdPathFilter filter = new JavaUtils.IdPathFilter(txnId, stmtId, true, false, isInsertOverwrite); + JavaUtils.IdPathFilter filter = new JavaUtils.IdPathFilter(txnId, stmtId, true, false); if (isMmCtas && !fs.exists(specPath)) { Utilities.FILE_OP_LOGGER.info("Creating table directory for CTAS with no output at {}", specPath); FileUtils.mkdir(fs, specPath, hconf); http://git-wip-us.apache.org/repos/asf/hive/blob/1bcc88f1/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index b1e05df..07999e2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -2190,6 +2190,8 @@ private void constructOneLBLocationMap(FileStatus fSta, // Note: we ignore the statement ID here, because it's currently irrelevant for MoveTask // where this is used; we always want to load everything; also the only case where // we have multiple statements anyway is union. + Utilities.FILE_OP_LOGGER.trace( + "Looking for dynamic partitions in {} ({} levels)", loadPath, numDP); Path[] leafStatus = Utilities.getMmDirectoryCandidates( fs, loadPath, numDP, numLB, null, txnId, -1, conf, isInsertOverwrite); for (Path p : leafStatus) {
