Repository: hive Updated Branches: refs/heads/branch-3 f53833292 -> 559ebb3e9
HIVE-17824 : msck repair table should drop the missing partitions from metastore (Janaki Lahorani, reviewed by Peter Vary, Alexander Kolbasov and Vihang Karajgaonkar) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/559ebb3e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/559ebb3e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/559ebb3e Branch: refs/heads/branch-3 Commit: 559ebb3e9a9a7151fa81632a023726186a141cab Parents: f538332 Author: Janaki Lahorani <[email protected]> Authored: Mon Apr 16 11:16:50 2018 -0700 Committer: Vihang Karajgaonkar <[email protected]> Committed: Mon Apr 16 11:52:38 2018 -0700 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/QOutProcessor.java | 3 +- .../org/apache/hadoop/hive/ql/exec/DDLTask.java | 185 +++++++--- .../apache/hadoop/hive/ql/metadata/Hive.java | 80 +++++ .../hive/ql/parse/DDLSemanticAnalyzer.java | 54 ++- .../org/apache/hadoop/hive/ql/parse/HiveLexer.g | 1 + .../apache/hadoop/hive/ql/parse/HiveParser.g | 8 +- .../apache/hadoop/hive/ql/plan/MsckDesc.java | 50 ++- .../exec/TestMsckDropPartitionsInBatches.java | 342 +++++++++++++++++++ .../queries/clientpositive/msck_repair_drop.q | 180 ++++++++++ .../clientpositive/msck_repair_drop.q.out | 293 ++++++++++++++++ 10 files changed, 1134 insertions(+), 62 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/559ebb3e/itests/util/src/main/java/org/apache/hadoop/hive/ql/QOutProcessor.java ---------------------------------------------------------------------- diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QOutProcessor.java b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QOutProcessor.java index 024fa1b..52d9668 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QOutProcessor.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QOutProcessor.java @@ -110,7 +110,8 @@ public class QOutProcessor { ".*at com\\.jolbox.*", ".*at com\\.zaxxer.*", "org\\.apache\\.hadoop\\.hive\\.metastore\\.model\\.MConstraint@([0-9]|[a-z])*", - "^Repair: Added partition to metastore.*" + "^Repair: Added partition to metastore.*", + "^Repair: Dropped partition from metastore.*" }); public QOutProcessor(FsType fsType) { http://git-wip-us.apache.org/repos/asf/hive/blob/559ebb3e/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index b9b1830..5b26b84 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -2132,58 +2132,92 @@ public class DDLTask extends Task<DDLWork> implements Serializable { try { HiveMetaStoreChecker checker = new HiveMetaStoreChecker(db); String[] names = Utilities.getDbTableName(msckDesc.getTableName()); + + // checkMetastore call will fill in result with partitions that are present in filesystem + // and missing in metastore - accessed through getPartitionsNotInMs + // And partitions that are not present in filesystem and metadata exists in metastore - + // accessed through getPartitionNotOnFS checker.checkMetastore(names[0], names[1], msckDesc.getPartSpecs(), result); Set<CheckResult.PartitionResult> partsNotInMs = result.getPartitionsNotInMs(); - if (msckDesc.isRepairPartitions() && !partsNotInMs.isEmpty()) { - AbstractList<String> vals = null; - String settingStr = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_MSCK_PATH_VALIDATION); - boolean doValidate = !("ignore".equals(settingStr)); - boolean doSkip = doValidate && "skip".equals(settingStr); - // The default setting is "throw"; assume doValidate && !doSkip means throw. - if (doValidate) { - // Validate that we can add partition without escaping. Escaping was originally intended - // to avoid creating invalid HDFS paths; however, if we escape the HDFS path (that we - // deem invalid but HDFS actually supports - it is possible to create HDFS paths with - // unprintable characters like ASCII 7), metastore will create another directory instead - // of the one we are trying to "repair" here. - Iterator<CheckResult.PartitionResult> iter = partsNotInMs.iterator(); - while (iter.hasNext()) { - CheckResult.PartitionResult part = iter.next(); - try { - vals = Warehouse.makeValsFromName(part.getPartitionName(), vals); - } catch (MetaException ex) { - throw new HiveException(ex); - } - for (String val : vals) { - String escapedPath = FileUtils.escapePathName(val); - assert escapedPath != null; - if (escapedPath.equals(val)) { - continue; + Set<CheckResult.PartitionResult> partsNotInFs = result.getPartitionsNotOnFs(); + + if (msckDesc.isRepairPartitions()) { + // Repair metadata in HMS + + Table table = db.getTable(msckDesc.getTableName()); + int maxRetries = conf.getIntVar(ConfVars.HIVE_MSCK_REPAIR_BATCH_MAX_RETRIES); + int decayingFactor = 2; + + if (msckDesc.isAddPartitions() && !partsNotInMs.isEmpty()) { + // MSCK called to add missing paritions into metastore and there are + // missing partitions. + + int batchSize = conf.getIntVar(ConfVars.HIVE_MSCK_REPAIR_BATCH_SIZE); + if (batchSize == 0) { + //batching is not enabled. Try to add all the partitions in one call + batchSize = partsNotInMs.size(); + } + + AbstractList<String> vals = null; + String settingStr = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_MSCK_PATH_VALIDATION); + boolean doValidate = !("ignore".equals(settingStr)); + boolean doSkip = doValidate && "skip".equals(settingStr); + // The default setting is "throw"; assume doValidate && !doSkip means throw. + if (doValidate) { + // Validate that we can add partition without escaping. Escaping was originally intended + // to avoid creating invalid HDFS paths; however, if we escape the HDFS path (that we + // deem invalid but HDFS actually supports - it is possible to create HDFS paths with + // unprintable characters like ASCII 7), metastore will create another directory instead + // of the one we are trying to "repair" here. + Iterator<CheckResult.PartitionResult> iter = partsNotInMs.iterator(); + while (iter.hasNext()) { + CheckResult.PartitionResult part = iter.next(); + try { + vals = Warehouse.makeValsFromName(part.getPartitionName(), vals); + } catch (MetaException ex) { + throw new HiveException(ex); } - String errorMsg = "Repair: Cannot add partition " + msckDesc.getTableName() - + ':' + part.getPartitionName() + " due to invalid characters in the name"; - if (doSkip) { - repairOutput.add(errorMsg); - iter.remove(); - } else { - throw new HiveException(errorMsg); + for (String val : vals) { + String escapedPath = FileUtils.escapePathName(val); + assert escapedPath != null; + if (escapedPath.equals(val)) { + continue; + } + String errorMsg = "Repair: Cannot add partition " + msckDesc.getTableName() + ':' + + part.getPartitionName() + " due to invalid characters in the name"; + if (doSkip) { + repairOutput.add(errorMsg); + iter.remove(); + } else { + throw new HiveException(errorMsg); + } } } } + try { + createPartitionsInBatches(db, repairOutput, partsNotInMs, table, batchSize, + decayingFactor, maxRetries); + } catch (Exception e) { + throw new HiveException(e); + } } - Table table = db.getTable(msckDesc.getTableName()); - int batchSize = conf.getIntVar(ConfVars.HIVE_MSCK_REPAIR_BATCH_SIZE); - int maxRetries = conf.getIntVar(ConfVars.HIVE_MSCK_REPAIR_BATCH_MAX_RETRIES); - int decayingFactor = 2; - if (batchSize == 0) { - //batching is not enabled. Try to add all the partitions in one call - batchSize = partsNotInMs.size(); - } - try { - createPartitionsInBatches(db, repairOutput, partsNotInMs, table, batchSize, - decayingFactor, maxRetries); - } catch (Exception e) { - throw new HiveException(e); + + if (msckDesc.isDropPartitions() && !partsNotInFs.isEmpty()) { + // MSCK called to drop stale paritions from metastore and there are + // stale partitions. + + int batchSize = conf.getIntVar(ConfVars.HIVE_MSCK_REPAIR_BATCH_SIZE); + if (batchSize == 0) { + //batching is not enabled. Try to drop all the partitions in one call + batchSize = partsNotInFs.size(); + } + + try { + dropPartitionsInBatches(db, repairOutput, partsNotInFs, table, batchSize, + decayingFactor, maxRetries); + } catch (Exception e) { + throw new HiveException(e); + } } } } catch (HiveException e) { @@ -2273,6 +2307,67 @@ public class DDLTask extends Task<DDLWork> implements Serializable { }.run(); } + // Drops partitions in batches. partNotInFs is split into batches based on batchSize + // and dropped. The dropping will be through RetryUtilities which will retry when there is a + // failure after reducing the batchSize by decayingFactor. Retrying will cease when maxRetries + // limit is reached or batchSize reduces to 0, whichever comes earlier. + @VisibleForTesting + void dropPartitionsInBatches(Hive db, List<String> repairOutput, + Set<CheckResult.PartitionResult> partsNotInFs, Table table, int batchSize, int decayingFactor, + int maxRetries) throws Exception { + String dropMsgFormat = + "Repair: Dropped partition from metastore " + table.getFullyQualifiedName() + ":%s"; + // Copy of partitions that will be split into batches + Set<CheckResult.PartitionResult> batchWork = new HashSet<>(partsNotInFs); + + new RetryUtilities.ExponentiallyDecayingBatchWork<Void>(batchSize, decayingFactor, maxRetries) { + @Override + public Void execute(int size) throws Exception { + while (!batchWork.isEmpty()) { + int currentBatchSize = size; + + // to store the partitions that are currently being processed + List<CheckResult.PartitionResult> lastBatch = new ArrayList<>(currentBatchSize); + + // drop messages for the dropped partitions + List<String> dropMsgs = new ArrayList<>(currentBatchSize); + + // Partitions to be dropped + List<String> dropParts = new ArrayList<>(currentBatchSize); + + for (CheckResult.PartitionResult part : batchWork) { + // This batch is full: break out of for loop to execute + if (currentBatchSize == 0) { + break; + } + + dropParts.add(part.getPartitionName()); + + // Add the part to lastBatch to track the parition being dropped + lastBatch.add(part); + + // Update messages + dropMsgs.add(String.format(dropMsgFormat, part.getPartitionName())); + + // Decrement batch size. When this gets to 0, the batch will be executed + currentBatchSize--; + } + + // this call is deleting partitions that are already missing from filesystem + // so 3rd parameter (deleteData) is set to false + // msck is doing a clean up of hms. if for some reason the partition is already + // deleted, then it is good. So, the last parameter ifexists is set to true + db.dropPartitions(table, dropParts, false, true); + + // if last batch is successful remove it from partsNotInFs + batchWork.removeAll(lastBatch); + repairOutput.addAll(dropMsgs); + } + return null; + } + }.run(); + } + /** * Write the result of msck to a writer. * http://git-wip-us.apache.org/repos/asf/hive/blob/559ebb3e/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 2dd1d35..90b6836 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -28,6 +28,7 @@ import static org.apache.hadoop.hive.conf.Constants.MATERIALIZED_VIEW_REWRITING_ import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE; import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getDefaultCatalog; +import static org.apache.hadoop.hive.ql.parse.DDLSemanticAnalyzer.makeBinaryPredicate; import static org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT; import static org.apache.hadoop.hive.serde.serdeConstants.STRING_TYPE_NAME; @@ -182,6 +183,8 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveAugmentMateri import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPrunerUtils; import org.apache.hadoop.hive.ql.plan.AddPartitionDesc; import org.apache.hadoop.hive.ql.plan.DropTableDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.LoadTableDesc.LoadFileType; import org.apache.hadoop.hive.ql.session.CreateTableAutomaticGrant; @@ -189,6 +192,8 @@ import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.mapred.InputFormat; @@ -2735,6 +2740,81 @@ private void constructOneLBLocationMap(FileStatus fSta, } } + /** + * drop the partitions specified as directory names associated with the table. + * + * @param table object for which partition is needed + * @param partDirNames partition directories that need to be dropped + * @param deleteData whether data should be deleted from file system + * @param ifExists check for existence before attempting delete + * + * @return list of partition objects that were deleted + * + * @throws HiveException + */ + public List<Partition> dropPartitions(Table table, List<String>partDirNames, + boolean deleteData, boolean ifExists) throws HiveException { + // partitions to be dropped in this batch + List<DropTableDesc.PartSpec> partSpecs = new ArrayList<>(partDirNames.size()); + + // parts of the partition + String[] parts = null; + + // Expression splits of each part of the partition + String[] partExprParts = null; + + // Column Types of all partitioned columns. Used for generating partition specification + Map<String, String> colTypes = new HashMap<String, String>(); + for (FieldSchema fs : table.getPartitionKeys()) { + colTypes.put(fs.getName(), fs.getType()); + } + + // Key to be used to save the partition to be dropped in partSpecs + int partSpecKey = 0; + + for (String partDir : partDirNames) { + // The expression to identify the partition to be dropped + ExprNodeGenericFuncDesc expr = null; + + // Split by "/" to identify partition parts + parts = partDir.split("/"); + + // Loop through the partitions and form the expression + for (String part : parts) { + // Split the partition predicate to identify column and value + partExprParts = part.split("="); + + // Only two elements expected in partExprParts partition column name and partition value + assert partExprParts.length == 2; + + // Partition Column + String partCol = partExprParts[0]; + + // Column Type + PrimitiveTypeInfo pti = TypeInfoFactory.getPrimitiveTypeInfo(colTypes.get(partCol)); + + // Form the expression node corresponding to column + ExprNodeColumnDesc column = new ExprNodeColumnDesc(pti, partCol, null, true); + + // Build the expression based on the partition predicate + ExprNodeGenericFuncDesc op = + makeBinaryPredicate("=", column, new ExprNodeConstantDesc(pti, partExprParts[1])); + + // the multiple parts to partition predicate are joined using and + expr = (expr == null) ? op : makeBinaryPredicate("and", expr, op); + } + + // Add the expression to partition specification + partSpecs.add(new DropTableDesc.PartSpec(expr, partSpecKey)); + + // Increment dropKey to get a new key for hash map + ++partSpecKey; + } + + String[] names = Utilities.getDbTableName(table.getFullyQualifiedName()); + return dropPartitions(names[0], names[1], partSpecs, deleteData, ifExists); + } + public List<Partition> dropPartitions(String tblName, List<DropTableDesc.PartSpec> partSpecs, boolean deleteData, boolean ifExists) throws HiveException { String[] names = Utilities.getDbTableName(tblName); http://git-wip-us.apache.org/repos/asf/hive/blob/559ebb3e/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index f38b0bc..f0daa18 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -3652,6 +3652,44 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer { } /** + * Check if MSCK is called to add partitions. + * + * @param keyWord + * could be ADD, DROP or SYNC. ADD or SYNC will indicate that add partition is on. + * + * @return true if add is on; false otherwise + */ + private static boolean isMsckAddPartition(int keyWord) { + switch (keyWord) { + case HiveParser.KW_DROP: + return false; + case HiveParser.KW_SYNC: + case HiveParser.KW_ADD: + default: + return true; + } + } + + /** + * Check if MSCK is called to drop partitions. + * + * @param keyWord + * could be ADD, DROP or SYNC. DROP or SYNC will indicate that drop partition is on. + * + * @return true if drop is on; false otherwise + */ + private static boolean isMsckDropPartition(int keyWord) { + switch (keyWord) { + case HiveParser.KW_DROP: + case HiveParser.KW_SYNC: + return true; + case HiveParser.KW_ADD: + default: + return false; + } + } + + /** * Verify that the information in the metastore matches up with the data on * the fs. * @@ -3661,20 +3699,34 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer { */ private void analyzeMetastoreCheck(CommonTree ast) throws SemanticException { String tableName = null; + + boolean addPartitions = true; + boolean dropPartitions = false; + boolean repair = false; if (ast.getChildCount() > 0) { repair = ast.getChild(0).getType() == HiveParser.KW_REPAIR; if (!repair) { tableName = getUnescapedName((ASTNode) ast.getChild(0)); + + if (ast.getChildCount() > 1) { + addPartitions = isMsckAddPartition(ast.getChild(1).getType()); + dropPartitions = isMsckDropPartition(ast.getChild(1).getType()); + } } else if (ast.getChildCount() > 1) { tableName = getUnescapedName((ASTNode) ast.getChild(1)); + + if (ast.getChildCount() > 2) { + addPartitions = isMsckAddPartition(ast.getChild(2).getType()); + dropPartitions = isMsckDropPartition(ast.getChild(2).getType()); + } } } Table tab = getTable(tableName); List<Map<String, String>> specs = getPartitionSpecs(tab, ast); outputs.add(new WriteEntity(tab, WriteEntity.WriteType.DDL_SHARED)); MsckDesc checkDesc = new MsckDesc(tableName, specs, ctx.getResFile(), - repair); + repair, addPartitions, dropPartitions); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), checkDesc))); } http://git-wip-us.apache.org/repos/asf/hive/blob/559ebb3e/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 3ff6a5e..09a4368 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -373,6 +373,7 @@ KW_MANAGEMENT: 'MANAGEMENT'; KW_ACTIVE: 'ACTIVE'; KW_UNMANAGED: 'UNMANAGED'; KW_APPLICATION: 'APPLICATION'; +KW_SYNC: 'SYNC'; // Operators // NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work. http://git-wip-us.apache.org/repos/asf/hive/blob/559ebb3e/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 936657b..925783b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -490,6 +490,7 @@ import org.apache.hadoop.hive.conf.HiveConf; xlateMap.put("KW_CLUSTER", "CLUSTER"); xlateMap.put("KW_DISTRIBUTE", "DISTRIBUTE"); xlateMap.put("KW_SORT", "SORT"); + xlateMap.put("KW_SYNC", "SYNC"); xlateMap.put("KW_UNION", "UNION"); xlateMap.put("KW_INTERSECT", "INTERSECT"); xlateMap.put("KW_EXCEPT", "EXCEPT"); @@ -1807,8 +1808,11 @@ withAdminOption metastoreCheck @init { pushMsg("metastore check statement", state); } @after { popMsg(state); } - : KW_MSCK (repair=KW_REPAIR)? (KW_TABLE tableName partitionSpec? (COMMA partitionSpec)*)? - -> ^(TOK_MSCK $repair? (tableName partitionSpec*)?) + : KW_MSCK (repair=KW_REPAIR)? + (KW_TABLE tableName + ((add=KW_ADD | drop=KW_DROP | sync=KW_SYNC) (parts=KW_PARTITIONS))? | + (partitionSpec)?) + -> ^(TOK_MSCK $repair? tableName? $add? $drop? $sync? (partitionSpec*)?) ; resourceList http://git-wip-us.apache.org/repos/asf/hive/blob/559ebb3e/ql/src/java/org/apache/hadoop/hive/ql/plan/MsckDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MsckDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MsckDesc.java index 7e6def4..5d2307c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MsckDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MsckDesc.java @@ -35,6 +35,8 @@ public class MsckDesc extends DDLWork implements Serializable { private ArrayList<LinkedHashMap<String, String>> partSpecs; private String resFile; private boolean repairPartitions; + private boolean addPartitions; + private boolean dropPartitions; /** * For serialization use only. @@ -42,20 +44,26 @@ public class MsckDesc extends DDLWork implements Serializable { public MsckDesc() { } - /** - * Description of a msck command. - * - * @param tableName - * Table to check, can be null. - * @param partSpecs - * Partition specification, can be null. - * @param resFile - * Where to save the output of the command - * @param repairPartitions - * remove stale / add new partitions found during the check - */ + /** + * Description of a msck command. + * + * @param tableName + * Table to check, can be null. + * @param partSpecs + * Partition specification, can be null. + * @param resFile + * Where to save the output of the command + * @param repairPartitions + * remove stale / add new partitions found during the check + * @param addPartitions + * find partitions that are missing from metastore, and add them when repairPartitions + * is set to true + * @param dropPartitions + * find stale partitions in metastore, and drop them when repairPartitions + * is set to true + */ public MsckDesc(String tableName, List<? extends Map<String, String>> partSpecs, - Path resFile, boolean repairPartitions) { + Path resFile, boolean repairPartitions, boolean addPartitions, boolean dropPartitions) { super(); this.tableName = tableName; this.partSpecs = new ArrayList<LinkedHashMap<String, String>>(partSpecs.size()); @@ -64,6 +72,8 @@ public class MsckDesc extends DDLWork implements Serializable { } this.resFile = resFile.toString(); this.repairPartitions = repairPartitions; + this.addPartitions = addPartitions; + this.dropPartitions = dropPartitions; } /** @@ -119,6 +129,20 @@ public class MsckDesc extends DDLWork implements Serializable { } /** + * @return if missing partitions is to be found, and added with repair option + */ + public boolean isAddPartitions() { + return addPartitions; + } + + /** + * @return if stale partitions is to be found, and removed with repair option + */ + public boolean isDropPartitions() { + return dropPartitions; + } + + /** * @param repairPartitions * stale / add new partitions found during the check */ http://git-wip-us.apache.org/repos/asf/hive/blob/559ebb3e/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java new file mode 100644 index 0000000..7e768da --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java @@ -0,0 +1,342 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec; + +import static org.junit.Assert.fail; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat; +import org.apache.hadoop.hive.ql.metadata.CheckResult.PartitionResult; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.ql.stats.StatsUtils; +import org.apache.hadoop.mapred.TextInputFormat; +import org.apache.hadoop.util.StringUtils; +import org.apache.hive.common.util.RetryUtilities.RetryException; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.mockito.ArgumentCaptor; +import org.mockito.Mockito; + +/** + * Unit test for function dropPartitionsInBatches in DDLTask. + * + **/ +public class TestMsckDropPartitionsInBatches { + private static HiveConf hiveConf; + private static DDLTask ddlTask; + private final String tableName = "test_msck_batch"; + private static Hive db; + private List<String> repairOutput; + private Table table; + + @BeforeClass + public static void setupClass() throws HiveException { + hiveConf = new HiveConf(TestMsckCreatePartitionsInBatches.class); + hiveConf.setIntVar(ConfVars.HIVE_MSCK_REPAIR_BATCH_SIZE, 5); + hiveConf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, + "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); + SessionState.start(hiveConf); + db = Hive.get(hiveConf); + ddlTask = new DDLTask(); + } + + @Before + public void before() throws Exception { + createPartitionedTable("default", tableName); + table = db.getTable(tableName); + repairOutput = new ArrayList<String>(); + } + + @After + public void after() throws Exception { + cleanUpTableQuietly("default", tableName); + } + + private Table createPartitionedTable(String dbName, String tableName) throws Exception { + try { + db.dropTable(dbName, tableName); + db.createTable(tableName, Arrays.asList("key", "value"), // Data columns. + Arrays.asList("city"), // Partition columns. + TextInputFormat.class, HiveIgnoreKeyTextOutputFormat.class); + return db.getTable(dbName, tableName); + } catch (Exception exception) { + fail("Unable to drop and create table " + StatsUtils + .getFullyQualifiedTableName(dbName, tableName) + " because " + StringUtils + .stringifyException(exception)); + throw exception; + } + } + + private void cleanUpTableQuietly(String dbName, String tableName) { + try { + db.dropTable(dbName, tableName, true, true, true); + } catch (Exception exception) { + fail("Unexpected exception: " + StringUtils.stringifyException(exception)); + } + } + + private Set<PartitionResult> dropPartsNotInFs(int numOfParts) { + Set<PartitionResult> partsNotInFs = new HashSet<>(); + for (int i = 0; i < numOfParts; i++) { + PartitionResult result = new PartitionResult(); + result.setPartitionName("city=dummyCity_" + String.valueOf(i)); + partsNotInFs.add(result); + } + return partsNotInFs; + } + + // Find most significant bit with starting index as 0 + private int findMSB(int n) { + int msbIndex = 0; + + Assert.assertTrue("Only positive values expected", n > 0); + + while (n > 1) { + n = (n >> 1); + msbIndex++; + } + + return msbIndex; + } + + // runDropPartitions is the main function that gets called with different options + // partCount: total number of partitions that will be deleted + // batchSize: maximum number of partitions that can be deleted in a batch + // based on the above the test will check that the batch sizes are as expected + // exceptionStatus can take 3 values + // noException: no exception is expected. + // oneException: first call throws exception. Since dropPartitionInBatches will retry, this + // will succeed after the first failure + // allException: failure case where everything fails. Will test that the test fails after + // retrying based on maxRetries when specified, or based on a decaying factor + // of 2 + private final int noException = 1; + private final int oneException = 2; + private final int allException = 3; + private void runDropPartitions(int partCount, int batchSize, int maxRetries, int exceptionStatus) + throws Exception { + Hive spyDb = Mockito.spy(db); + + // create partCount dummy partitions + Set<PartitionResult> partsNotInFs = dropPartsNotInFs(partCount); + + // Expected number of dropPartitions call + int expectedCallCount = maxRetries; + + // Expected number of partitions dropped in each of those calls + int[] expectedBatchSizes; + + // Last batch can sometimes have less number of elements + int lastBatchSize = batchSize; + + // Actual Batch size that will be used + int actualBatchSize = batchSize; + + if (exceptionStatus == oneException) { + // After one exception everything is expected to run + actualBatchSize = batchSize/2; + } + + if (exceptionStatus != allException) { + expectedCallCount = partCount/actualBatchSize; + + if (expectedCallCount*actualBatchSize < partCount) { + // partCount not equally divided into batches. last batch size will be less than batch size + lastBatchSize = partCount - (expectedCallCount * actualBatchSize); + + // Add 1 to counter default rounding + expectedCallCount++; + } + + if (exceptionStatus == oneException) { + // first one will fail - count it in + expectedCallCount++; + + // only first call throws exception + Mockito.doThrow(HiveException.class).doCallRealMethod().doCallRealMethod().when(spyDb) + .dropPartitions(Mockito.eq(table), Mockito.any(List.class), Mockito.eq(false), + Mockito.eq(true)); + } + + expectedBatchSizes = new int[expectedCallCount]; + + // first batch is always based on batch size + expectedBatchSizes[0] = Integer.min(partCount, batchSize); + + // second batch to last but one batch will be actualBatchSize + // actualBatchSize is same as batchSize when no exceptions are expected + // actualBatchSize is half of batchSize when 1 exception is expected + for (int i = 1; i < expectedCallCount-1; i++) { + expectedBatchSizes[i] = Integer.min(partCount, actualBatchSize); + } + + expectedBatchSizes[expectedCallCount-1] = lastBatchSize; + + // batch size from input and decaying factor of 2 + ddlTask.dropPartitionsInBatches(spyDb, repairOutput, partsNotInFs, table, batchSize, 2, + maxRetries); + } else { + if (maxRetries == 0) { + // Retries will be done till decaying factor reduces to 0. Decaying Factor is 2. + // So, log to base 2 of batchSize plus 1 or Most Significant Bit + // of batchsize plus 1 will give the number of expected calls + expectedCallCount = findMSB(batchSize) + 1; + } + expectedBatchSizes = new int[expectedCallCount]; + + // decaying where the batchSize keeps reducing by half + for (int i = 0; i < expectedCallCount; i++) { + actualBatchSize = batchSize / (int) Math.pow(2, i); + expectedBatchSizes[i] = Integer.min(partCount, actualBatchSize); + } + // all calls fail + Mockito.doThrow(HiveException.class).when(spyDb) + .dropPartitions(Mockito.eq(table), Mockito.any(List.class), Mockito.eq(false), + Mockito.eq(true)); + + Exception ex = null; + try { + ddlTask.dropPartitionsInBatches(spyDb, repairOutput, partsNotInFs, table, batchSize, 2, + maxRetries); + } catch (Exception retryEx) { + ex = retryEx; + } + Assert.assertFalse("Exception was expected but was not thrown", ex == null); + Assert.assertTrue("Unexpected class of exception thrown", ex instanceof RetryException); + } + + // there should be expectedCallCount calls to drop partitions with each batch size of + // actualBatchSize + ArgumentCaptor<List> argument = ArgumentCaptor.forClass(List.class); + Mockito.verify(spyDb, Mockito.times(expectedCallCount)) + .dropPartitions(Mockito.eq(table), argument.capture(), Mockito.eq(false), Mockito.eq(true)); + + // confirm the batch sizes were as expected + List<List> droppedParts = argument.getAllValues(); + + for (int i = 0; i < expectedCallCount; i++) { + Assert.assertEquals( + String.format("Unexpected batch size in attempt %d. Expected: %d. Found: %d", i + 1, + expectedBatchSizes[i], droppedParts.get(i).size()), + expectedBatchSizes[i], droppedParts.get(i).size()); + } + } + + private void runDropPartitions(int partCount, int batchSize) throws Exception { + runDropPartitions(partCount, batchSize, 0, noException); + } + + /** + * Tests the number of times Hive.dropPartitions calls are executed with total number of + * partitions to be added are equally divisible by batch size. + * + * @throws Exception + */ + @Test + public void testNumberOfDropPartitionsCalls() throws Exception { + runDropPartitions(10, 5); + } + + /** + * Tests the number of times Hive.dropPartitions calls are executed with total number of + * partitions to be added are not exactly divisible by batch size. + * + * @throws Exception + */ + @Test + public void testUnevenNumberOfCreatePartitionCalls() throws Exception { + runDropPartitions(14, 5); + } + + /** + * Tests the number of times Hive.dropPartitions calls are executed with total number of + * partitions exactly equal to batch size. + * + * @throws Exception + */ + @Test + public void testEqualNumberOfPartitions() throws Exception { + runDropPartitions(13, 13); + } + + /** + * Tests the number of times Hive.dropPartitions calls are executed with total number of + * partitions to is less than batch size. + * + * @throws Exception + */ + @Test + public void testSmallNumberOfPartitions() throws Exception { + runDropPartitions(10, 20); + } + + /** + * Tests the number of calls to dropPartitions and the respective batch sizes when first call to + * dropPartitions throws HiveException. The batch size should be reduced once by the + * decayingFactor 2, iow after batch size is halved. + * + * @throws Exception + */ + @Test + public void testBatchingWhenException() throws Exception { + runDropPartitions(23, 30, 0, oneException); + } + + /** + * Tests the retries exhausted case when Hive.DropPartitions method call always keep throwing + * HiveException. The batch sizes should exponentially decreased based on the decaying factor and + * ultimately give up when it reaches 0. + * + * @throws Exception + */ + @Test + public void testRetriesExhaustedBatchSize() throws Exception { + runDropPartitions(17, 30, 0, allException); + } + + /** + * Tests the maximum retry attempt is set to 2. + * @throws Exception + */ + @Test + public void testMaxRetriesReached() throws Exception { + runDropPartitions(17, 30, 2, allException); + } + + /** + * Tests when max number of retries is set to 1. + * @throws Exception + */ + @Test + public void testOneMaxRetries() throws Exception { + runDropPartitions(17, 30, 1, allException); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/559ebb3e/ql/src/test/queries/clientpositive/msck_repair_drop.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/msck_repair_drop.q b/ql/src/test/queries/clientpositive/msck_repair_drop.q new file mode 100644 index 0000000..bc14d98 --- /dev/null +++ b/ql/src/test/queries/clientpositive/msck_repair_drop.q @@ -0,0 +1,180 @@ +set hive.mv.files.thread=0; +DROP TABLE IF EXISTS repairtable; + +CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING, p2 STRING); + +-- repairtable will have partitions created with part keys p1=1, p1=2, p1=3, p1=4 and p1=5 +-- p1=2 will be used to test drop in 3 tests +-- 1. each partition is dropped individually: set hive.msck.repair.batch.size=1; +-- 2. partition are dropped in groups of 3: set hive.msck.repair.batch.size=3; +-- 3. all partitions are dropped in 1 shot: set hive.msck.repair.batch.size=0; +-- the same set of 10 partitions will be created between each drop attempts +-- p1=3, p1=4 and p1=5 will be used to test keywords add, drop and sync + +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=1/p2=11/p3=111; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=1/p2=11/p3=111/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=1/p2=12/p3=121; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=1/p2=12/p3=121/datafile; + +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=21/p3=211; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=21/p3=211/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=22/p3=221; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=22/p3=221/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=23/p3=231; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=23/p3=231/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=24/p3=241; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=24/p3=241/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=25/p3=251; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=25/p3=251/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=26/p3=261; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=26/p3=261/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=27/p3=271; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=27/p3=271/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=28/p3=281; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=28/p3=281/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=29/p3=291; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=29/p3=291/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=210/p3=2101; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=210/p3=2101/datafile; + +MSCK TABLE default.repairtable; +MSCK REPAIR TABLE default.repairtable; + +-- Now all 12 partitions are in +show partitions default.repairtable; + +-- Remove all p1=2 partitions from file system +dfs -rmr ${system:test.warehouse.dir}/repairtable/p1=2; + +-- test 1: each partition is dropped individually +set hive.msck.repair.batch.size=1; +MSCK TABLE default.repairtable DROP PARTITIONS; +MSCK REPAIR TABLE default.repairtable DROP PARTITIONS; +show partitions default.repairtable; + +-- Recreate p1=2 partitions +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=21/p3=211; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=21/p3=211/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=22/p3=221; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=22/p3=221/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=23/p3=231; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=23/p3=231/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=24/p3=241; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=24/p3=241/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=25/p3=251; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=25/p3=251/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=26/p3=261; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=26/p3=261/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=27/p3=271; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=27/p3=271/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=28/p3=281; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=28/p3=281/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=29/p3=291; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=29/p3=291/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=210/p3=2101; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=210/p3=2101/datafile; + +MSCK TABLE default.repairtable; +MSCK REPAIR TABLE default.repairtable; + +-- Now all 12 partitions are in +show partitions default.repairtable; + +-- Remove all p1=2 partitions from file system +dfs -rmr ${system:test.warehouse.dir}/repairtable/p1=2; + +-- test 2: partition are dropped in groups of 3 +set hive.msck.repair.batch.size=3; +MSCK TABLE default.repairtable DROP PARTITIONS; +MSCK REPAIR TABLE default.repairtable DROP PARTITIONS; +show partitions default.repairtable; + +-- Recreate p1=2 partitions +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=21/p3=211; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=21/p3=211/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=22/p3=221; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=22/p3=221/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=23/p3=231; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=23/p3=231/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=24/p3=241; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=24/p3=241/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=25/p3=251; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=25/p3=251/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=26/p3=261; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=26/p3=261/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=27/p3=271; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=27/p3=271/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=28/p3=281; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=28/p3=281/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=29/p3=291; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=29/p3=291/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=2/p2=210/p3=2101; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=2/p2=210/p3=2101/datafile; + +MSCK TABLE default.repairtable; +MSCK REPAIR TABLE default.repairtable; + +-- Now all 12 partitions are in +show partitions default.repairtable; + +-- Remove all p1=2 partitions from file system +dfs -rmr ${system:test.warehouse.dir}/repairtable/p1=2; + +-- test 3. all partitions are dropped in 1 shot +set hive.msck.repair.batch.size=0; +MSCK TABLE default.repairtable DROP PARTITIONS; +MSCK REPAIR TABLE default.repairtable DROP PARTITIONS; +show partitions default.repairtable; + +-- test add parition keyword: begin +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=3/p2=31/p3=311; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=3/p2=31/p3=311/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=3/p2=32/p3=321; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=3/p2=32/p3=321/datafile; + +MSCK TABLE default.repairtable; +MSCK REPAIR TABLE default.repairtable; +show partitions default.repairtable; + +-- Create p1=4 in filesystem +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=4/p2=41/p3=411; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=4/p2=41/p3=411/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=4/p2=42/p3=421; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=4/p2=42/p3=421/datafile; + +-- Remove p1=3 from filesystem +dfs -rmr ${system:test.warehouse.dir}/repairtable/p1=3; + +-- Status: p1=3 dropped from filesystem, but exists in metastore +-- p1=4 exists in filesystem but not in metastore +-- test add partition: only brings in p1=4 and doesn't remove p1=3 +MSCK TABLE default.repairtable ADD PARTITIONS; +MSCK REPAIR TABLE default.repairtable ADD PARTITIONS; +show partitions default.repairtable; +-- test add partition keyword: end + +-- test drop partition keyword: begin +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=5/p2=51/p3=511; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=5/p2=51/p3=511/datafile; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=5/p2=52/p3=521; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=5/p2=52/p3=521/datafile; + +-- Status: p1=3 removed from filesystem, but exists in metastore (as part of add test) +-- p1=5 exists in filesystem but not in metastore +-- test drop partition: only removes p1=3 from metastore but doesn't update metadata for p1=5 +MSCK TABLE default.repairtable DROP PARTITIONS; +MSCK REPAIR TABLE default.repairtable DROP PARTITIONS; +show partitions default.repairtable; +-- test drop partition keyword: end + +-- test sync partition keyword: begin +-- Remove p1=4 from filesystem +dfs -rmr ${system:test.warehouse.dir}/repairtable/p1=4; + +-- Status: p1=4 dropped from filesystem, but exists in metastore +-- p1=5 exists in filesystem but not in metastore (as part of drop test) +-- test sync partition: removes p1=4 from metastore and updates metadata for p1=5 +MSCK TABLE default.repairtable SYNC PARTITIONS; +MSCK REPAIR TABLE default.repairtable SYNC PARTITIONS; +show partitions default.repairtable; +-- test sync partition keyword: end http://git-wip-us.apache.org/repos/asf/hive/blob/559ebb3e/ql/src/test/results/clientpositive/msck_repair_drop.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/msck_repair_drop.q.out b/ql/src/test/results/clientpositive/msck_repair_drop.q.out new file mode 100644 index 0000000..6d89d8f --- /dev/null +++ b/ql/src/test/results/clientpositive/msck_repair_drop.q.out @@ -0,0 +1,293 @@ +PREHOOK: query: DROP TABLE IF EXISTS repairtable +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS repairtable +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING, p2 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@repairtable +POSTHOOK: query: CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING, p2 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@repairtable +PREHOOK: query: MSCK TABLE default.repairtable +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable +POSTHOOK: query: MSCK TABLE default.repairtable +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable +Partitions not in metastore: repairtable:p1=1/p2=11 repairtable:p1=1/p2=12 repairtable:p1=2/p2=21 repairtable:p1=2/p2=210 repairtable:p1=2/p2=22 repairtable:p1=2/p2=23 repairtable:p1=2/p2=24 repairtable:p1=2/p2=25 repairtable:p1=2/p2=26 repairtable:p1=2/p2=27 repairtable:p1=2/p2=28 repairtable:p1=2/p2=29 +PREHOOK: query: MSCK REPAIR TABLE default.repairtable +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable +POSTHOOK: query: MSCK REPAIR TABLE default.repairtable +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable +Partitions not in metastore: repairtable:p1=1/p2=11 repairtable:p1=1/p2=12 repairtable:p1=2/p2=21 repairtable:p1=2/p2=210 repairtable:p1=2/p2=22 repairtable:p1=2/p2=23 repairtable:p1=2/p2=24 repairtable:p1=2/p2=25 repairtable:p1=2/p2=26 repairtable:p1=2/p2=27 repairtable:p1=2/p2=28 repairtable:p1=2/p2=29 +#### A masked pattern was here #### +PREHOOK: query: show partitions default.repairtable +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@repairtable +POSTHOOK: query: show partitions default.repairtable +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@repairtable +p1=1/p2=11 +p1=1/p2=12 +p1=2/p2=21 +p1=2/p2=210 +p1=2/p2=22 +p1=2/p2=23 +p1=2/p2=24 +p1=2/p2=25 +p1=2/p2=26 +p1=2/p2=27 +p1=2/p2=28 +p1=2/p2=29 +#### A masked pattern was here #### +PREHOOK: query: MSCK TABLE default.repairtable DROP PARTITIONS +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable +POSTHOOK: query: MSCK TABLE default.repairtable DROP PARTITIONS +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable +Partitions missing from filesystem: repairtable:p1=2/p2=21 repairtable:p1=2/p2=210 repairtable:p1=2/p2=22 repairtable:p1=2/p2=23 repairtable:p1=2/p2=24 repairtable:p1=2/p2=25 repairtable:p1=2/p2=26 repairtable:p1=2/p2=27 repairtable:p1=2/p2=28 repairtable:p1=2/p2=29 +PREHOOK: query: MSCK REPAIR TABLE default.repairtable DROP PARTITIONS +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable +POSTHOOK: query: MSCK REPAIR TABLE default.repairtable DROP PARTITIONS +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable +Partitions missing from filesystem: repairtable:p1=2/p2=21 repairtable:p1=2/p2=210 repairtable:p1=2/p2=22 repairtable:p1=2/p2=23 repairtable:p1=2/p2=24 repairtable:p1=2/p2=25 repairtable:p1=2/p2=26 repairtable:p1=2/p2=27 repairtable:p1=2/p2=28 repairtable:p1=2/p2=29 +#### A masked pattern was here #### +PREHOOK: query: show partitions default.repairtable +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@repairtable +POSTHOOK: query: show partitions default.repairtable +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@repairtable +p1=1/p2=11 +p1=1/p2=12 +PREHOOK: query: MSCK TABLE default.repairtable +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable +POSTHOOK: query: MSCK TABLE default.repairtable +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable +Partitions not in metastore: repairtable:p1=2/p2=21 repairtable:p1=2/p2=210 repairtable:p1=2/p2=22 repairtable:p1=2/p2=23 repairtable:p1=2/p2=24 repairtable:p1=2/p2=25 repairtable:p1=2/p2=26 repairtable:p1=2/p2=27 repairtable:p1=2/p2=28 repairtable:p1=2/p2=29 +PREHOOK: query: MSCK REPAIR TABLE default.repairtable +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable +POSTHOOK: query: MSCK REPAIR TABLE default.repairtable +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable +Partitions not in metastore: repairtable:p1=2/p2=21 repairtable:p1=2/p2=210 repairtable:p1=2/p2=22 repairtable:p1=2/p2=23 repairtable:p1=2/p2=24 repairtable:p1=2/p2=25 repairtable:p1=2/p2=26 repairtable:p1=2/p2=27 repairtable:p1=2/p2=28 repairtable:p1=2/p2=29 +#### A masked pattern was here #### +PREHOOK: query: show partitions default.repairtable +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@repairtable +POSTHOOK: query: show partitions default.repairtable +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@repairtable +p1=1/p2=11 +p1=1/p2=12 +p1=2/p2=21 +p1=2/p2=210 +p1=2/p2=22 +p1=2/p2=23 +p1=2/p2=24 +p1=2/p2=25 +p1=2/p2=26 +p1=2/p2=27 +p1=2/p2=28 +p1=2/p2=29 +#### A masked pattern was here #### +PREHOOK: query: MSCK TABLE default.repairtable DROP PARTITIONS +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable +POSTHOOK: query: MSCK TABLE default.repairtable DROP PARTITIONS +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable +Partitions missing from filesystem: repairtable:p1=2/p2=21 repairtable:p1=2/p2=210 repairtable:p1=2/p2=22 repairtable:p1=2/p2=23 repairtable:p1=2/p2=24 repairtable:p1=2/p2=25 repairtable:p1=2/p2=26 repairtable:p1=2/p2=27 repairtable:p1=2/p2=28 repairtable:p1=2/p2=29 +PREHOOK: query: MSCK REPAIR TABLE default.repairtable DROP PARTITIONS +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable +POSTHOOK: query: MSCK REPAIR TABLE default.repairtable DROP PARTITIONS +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable +Partitions missing from filesystem: repairtable:p1=2/p2=21 repairtable:p1=2/p2=210 repairtable:p1=2/p2=22 repairtable:p1=2/p2=23 repairtable:p1=2/p2=24 repairtable:p1=2/p2=25 repairtable:p1=2/p2=26 repairtable:p1=2/p2=27 repairtable:p1=2/p2=28 repairtable:p1=2/p2=29 +#### A masked pattern was here #### +PREHOOK: query: show partitions default.repairtable +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@repairtable +POSTHOOK: query: show partitions default.repairtable +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@repairtable +p1=1/p2=11 +p1=1/p2=12 +PREHOOK: query: MSCK TABLE default.repairtable +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable +POSTHOOK: query: MSCK TABLE default.repairtable +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable +Partitions not in metastore: repairtable:p1=2/p2=21 repairtable:p1=2/p2=210 repairtable:p1=2/p2=22 repairtable:p1=2/p2=23 repairtable:p1=2/p2=24 repairtable:p1=2/p2=25 repairtable:p1=2/p2=26 repairtable:p1=2/p2=27 repairtable:p1=2/p2=28 repairtable:p1=2/p2=29 +PREHOOK: query: MSCK REPAIR TABLE default.repairtable +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable +POSTHOOK: query: MSCK REPAIR TABLE default.repairtable +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable +Partitions not in metastore: repairtable:p1=2/p2=21 repairtable:p1=2/p2=210 repairtable:p1=2/p2=22 repairtable:p1=2/p2=23 repairtable:p1=2/p2=24 repairtable:p1=2/p2=25 repairtable:p1=2/p2=26 repairtable:p1=2/p2=27 repairtable:p1=2/p2=28 repairtable:p1=2/p2=29 +#### A masked pattern was here #### +PREHOOK: query: show partitions default.repairtable +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@repairtable +POSTHOOK: query: show partitions default.repairtable +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@repairtable +p1=1/p2=11 +p1=1/p2=12 +p1=2/p2=21 +p1=2/p2=210 +p1=2/p2=22 +p1=2/p2=23 +p1=2/p2=24 +p1=2/p2=25 +p1=2/p2=26 +p1=2/p2=27 +p1=2/p2=28 +p1=2/p2=29 +#### A masked pattern was here #### +PREHOOK: query: MSCK TABLE default.repairtable DROP PARTITIONS +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable +POSTHOOK: query: MSCK TABLE default.repairtable DROP PARTITIONS +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable +Partitions missing from filesystem: repairtable:p1=2/p2=21 repairtable:p1=2/p2=210 repairtable:p1=2/p2=22 repairtable:p1=2/p2=23 repairtable:p1=2/p2=24 repairtable:p1=2/p2=25 repairtable:p1=2/p2=26 repairtable:p1=2/p2=27 repairtable:p1=2/p2=28 repairtable:p1=2/p2=29 +PREHOOK: query: MSCK REPAIR TABLE default.repairtable DROP PARTITIONS +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable +POSTHOOK: query: MSCK REPAIR TABLE default.repairtable DROP PARTITIONS +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable +Partitions missing from filesystem: repairtable:p1=2/p2=21 repairtable:p1=2/p2=210 repairtable:p1=2/p2=22 repairtable:p1=2/p2=23 repairtable:p1=2/p2=24 repairtable:p1=2/p2=25 repairtable:p1=2/p2=26 repairtable:p1=2/p2=27 repairtable:p1=2/p2=28 repairtable:p1=2/p2=29 +#### A masked pattern was here #### +PREHOOK: query: show partitions default.repairtable +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@repairtable +POSTHOOK: query: show partitions default.repairtable +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@repairtable +p1=1/p2=11 +p1=1/p2=12 +PREHOOK: query: MSCK TABLE default.repairtable +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable +POSTHOOK: query: MSCK TABLE default.repairtable +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable +Partitions not in metastore: repairtable:p1=3/p2=31 repairtable:p1=3/p2=32 +PREHOOK: query: MSCK REPAIR TABLE default.repairtable +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable +POSTHOOK: query: MSCK REPAIR TABLE default.repairtable +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable +Partitions not in metastore: repairtable:p1=3/p2=31 repairtable:p1=3/p2=32 +#### A masked pattern was here #### +PREHOOK: query: show partitions default.repairtable +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@repairtable +POSTHOOK: query: show partitions default.repairtable +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@repairtable +p1=1/p2=11 +p1=1/p2=12 +p1=3/p2=31 +p1=3/p2=32 +#### A masked pattern was here #### +PREHOOK: query: MSCK TABLE default.repairtable ADD PARTITIONS +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable +POSTHOOK: query: MSCK TABLE default.repairtable ADD PARTITIONS +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable +Partitions not in metastore: repairtable:p1=4/p2=41 repairtable:p1=4/p2=42 +Partitions missing from filesystem: repairtable:p1=3/p2=31 repairtable:p1=3/p2=32 +PREHOOK: query: MSCK REPAIR TABLE default.repairtable ADD PARTITIONS +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable +POSTHOOK: query: MSCK REPAIR TABLE default.repairtable ADD PARTITIONS +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable +Partitions not in metastore: repairtable:p1=4/p2=41 repairtable:p1=4/p2=42 +Partitions missing from filesystem: repairtable:p1=3/p2=31 repairtable:p1=3/p2=32 +#### A masked pattern was here #### +PREHOOK: query: show partitions default.repairtable +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@repairtable +POSTHOOK: query: show partitions default.repairtable +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@repairtable +p1=1/p2=11 +p1=1/p2=12 +p1=3/p2=31 +p1=3/p2=32 +p1=4/p2=41 +p1=4/p2=42 +PREHOOK: query: MSCK TABLE default.repairtable DROP PARTITIONS +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable +POSTHOOK: query: MSCK TABLE default.repairtable DROP PARTITIONS +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable +Partitions not in metastore: repairtable:p1=5/p2=51 repairtable:p1=5/p2=52 +Partitions missing from filesystem: repairtable:p1=3/p2=31 repairtable:p1=3/p2=32 +PREHOOK: query: MSCK REPAIR TABLE default.repairtable DROP PARTITIONS +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable +POSTHOOK: query: MSCK REPAIR TABLE default.repairtable DROP PARTITIONS +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable +Partitions not in metastore: repairtable:p1=5/p2=51 repairtable:p1=5/p2=52 +Partitions missing from filesystem: repairtable:p1=3/p2=31 repairtable:p1=3/p2=32 +#### A masked pattern was here #### +PREHOOK: query: show partitions default.repairtable +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@repairtable +POSTHOOK: query: show partitions default.repairtable +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@repairtable +p1=1/p2=11 +p1=1/p2=12 +p1=4/p2=41 +p1=4/p2=42 +#### A masked pattern was here #### +PREHOOK: query: MSCK TABLE default.repairtable SYNC PARTITIONS +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable +POSTHOOK: query: MSCK TABLE default.repairtable SYNC PARTITIONS +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable +Partitions not in metastore: repairtable:p1=5/p2=51 repairtable:p1=5/p2=52 +Partitions missing from filesystem: repairtable:p1=4/p2=41 repairtable:p1=4/p2=42 +PREHOOK: query: MSCK REPAIR TABLE default.repairtable SYNC PARTITIONS +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable +POSTHOOK: query: MSCK REPAIR TABLE default.repairtable SYNC PARTITIONS +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable +Partitions not in metastore: repairtable:p1=5/p2=51 repairtable:p1=5/p2=52 +Partitions missing from filesystem: repairtable:p1=4/p2=41 repairtable:p1=4/p2=42 +#### A masked pattern was here #### +PREHOOK: query: show partitions default.repairtable +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@repairtable +POSTHOOK: query: show partitions default.repairtable +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@repairtable +p1=1/p2=11 +p1=1/p2=12 +p1=5/p2=51 +p1=5/p2=52
