Repository: hive Updated Branches: refs/heads/master 8efe6f7f7 -> d20921100
HIVE-12077 : MSCK Repair table should fix partitions in batches (Chinna Rao L , via Chinna Rao L) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d2092110 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d2092110 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d2092110 Branch: refs/heads/master Commit: d20921100efb4411a82cb716e3e0471cbc118c71 Parents: 8efe6f7 Author: Chinna Rao L <[email protected]> Authored: Mon Aug 1 20:16:59 2016 +0530 Committer: Chinna Rao L <[email protected]> Committed: Mon Aug 1 20:16:59 2016 +0530 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 5 +++ .../org/apache/hadoop/hive/ql/exec/DDLTask.java | 26 ++++++++++--- .../clientpositive/msck_repair_batchsize.q | 19 ++++++++++ .../clientpositive/msck_repair_batchsize.q.out | 40 ++++++++++++++++++++ 4 files changed, 85 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/d2092110/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index aa7647b..5f8daa3 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3043,6 +3043,11 @@ public class HiveConf extends Configuration { "directories that are partition-like but contain unsupported characters. 'throw' (an " + "exception) is the default; 'skip' will skip the invalid directories and still repair the" + " others; 'ignore' will skip the validation (legacy behavior, causes bugs in many cases)"), + HIVE_MSCK_REPAIR_BATCH_SIZE( + "hive.msck.repair.batch.size", 0, + "Batch size for the msck repair command. If the value is greater than zero, " + + "it will execute batch wise with the configured batch size. " + + "The default value is zero. Zero means it will execute directly (Not batch wise)"), HIVE_SERVER2_LLAP_CONCURRENT_QUERIES("hive.server2.llap.concurrent.queries", -1, "The number of queries allowed in parallel via llap. Negative number implies 'infinite'."), HIVE_TEZ_ENABLE_MEMORY_MANAGER("hive.tez.enable.memory.manager", true, http://git-wip-us.apache.org/repos/asf/hive/blob/d2092110/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 39ac7d6..a59b781 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -1865,12 +1865,28 @@ public class DDLTask extends Task<DDLWork> implements Serializable { AddPartitionDesc apd = new AddPartitionDesc( table.getDbName(), table.getTableName(), false); try { - for (CheckResult.PartitionResult part : partsNotInMs) { - apd.addPartition(Warehouse.makeSpecFromName(part.getPartitionName()), null); - repairOutput.add("Repair: Added partition to metastore " - + msckDesc.getTableName() + ':' + part.getPartitionName()); + int batch_size = conf.getIntVar(ConfVars.HIVE_MSCK_REPAIR_BATCH_SIZE); + if (batch_size > 0 && partsNotInMs.size() > batch_size) { + int counter = 0; + for (CheckResult.PartitionResult part : partsNotInMs) { + counter++; + apd.addPartition(Warehouse.makeSpecFromName(part.getPartitionName()), null); + repairOutput.add("Repair: Added partition to metastore " + msckDesc.getTableName() + + ':' + part.getPartitionName()); + if (counter == batch_size) { + db.createPartitions(apd); + apd = new AddPartitionDesc(table.getDbName(), table.getTableName(), false); + counter = 0; + } + } + } else { + for (CheckResult.PartitionResult part : partsNotInMs) { + apd.addPartition(Warehouse.makeSpecFromName(part.getPartitionName()), null); + repairOutput.add("Repair: Added partition to metastore " + msckDesc.getTableName() + + ':' + part.getPartitionName()); + } + db.createPartitions(apd); } - db.createPartitions(apd); } catch (Exception e) { LOG.info("Could not bulk-add partitions to metastore; trying one by one", e); repairOutput.clear(); http://git-wip-us.apache.org/repos/asf/hive/blob/d2092110/ql/src/test/queries/clientpositive/msck_repair_batchsize.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/msck_repair_batchsize.q b/ql/src/test/queries/clientpositive/msck_repair_batchsize.q new file mode 100644 index 0000000..60970e2 --- /dev/null +++ b/ql/src/test/queries/clientpositive/msck_repair_batchsize.q @@ -0,0 +1,19 @@ +set hive.msck.repair.batch.size=1; + +DROP TABLE IF EXISTS repairtable; + +CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING, p2 STRING); + +MSCK TABLE repairtable; + +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=a/p2=a; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=b/p2=a; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=b/p2=a/datafile; + +MSCK TABLE default.repairtable; + +MSCK REPAIR TABLE default.repairtable; + +MSCK TABLE repairtable; + +DROP TABLE default.repairtable; http://git-wip-us.apache.org/repos/asf/hive/blob/d2092110/ql/src/test/results/clientpositive/msck_repair_batchsize.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/msck_repair_batchsize.q.out b/ql/src/test/results/clientpositive/msck_repair_batchsize.q.out new file mode 100644 index 0000000..86ac031 --- /dev/null +++ b/ql/src/test/results/clientpositive/msck_repair_batchsize.q.out @@ -0,0 +1,40 @@ +PREHOOK: query: DROP TABLE IF EXISTS repairtable +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS repairtable +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING, p2 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@repairtable +POSTHOOK: query: CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING, p2 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@repairtable +PREHOOK: query: MSCK TABLE repairtable +PREHOOK: type: MSCK +POSTHOOK: query: MSCK TABLE repairtable +POSTHOOK: type: MSCK +PREHOOK: query: MSCK TABLE default.repairtable +PREHOOK: type: MSCK +POSTHOOK: query: MSCK TABLE default.repairtable +POSTHOOK: type: MSCK +Partitions not in metastore: repairtable:p1=a/p2=a repairtable:p1=b/p2=a +PREHOOK: query: MSCK REPAIR TABLE default.repairtable +PREHOOK: type: MSCK +POSTHOOK: query: MSCK REPAIR TABLE default.repairtable +POSTHOOK: type: MSCK +Partitions not in metastore: repairtable:p1=a/p2=a repairtable:p1=b/p2=a +Repair: Added partition to metastore default.repairtable:p1=a/p2=a +Repair: Added partition to metastore default.repairtable:p1=b/p2=a +PREHOOK: query: MSCK TABLE repairtable +PREHOOK: type: MSCK +POSTHOOK: query: MSCK TABLE repairtable +POSTHOOK: type: MSCK +PREHOOK: query: DROP TABLE default.repairtable +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@repairtable +PREHOOK: Output: default@repairtable +POSTHOOK: query: DROP TABLE default.repairtable +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@repairtable +POSTHOOK: Output: default@repairtable
