Repository: hive
Updated Branches:
  refs/heads/master 8efe6f7f7 -> d20921100


HIVE-12077 : MSCK Repair table should fix partitions in batches (Chinna Rao L , 
via Chinna Rao L)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d2092110
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d2092110
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d2092110

Branch: refs/heads/master
Commit: d20921100efb4411a82cb716e3e0471cbc118c71
Parents: 8efe6f7
Author: Chinna Rao L <[email protected]>
Authored: Mon Aug 1 20:16:59 2016 +0530
Committer: Chinna Rao L <[email protected]>
Committed: Mon Aug 1 20:16:59 2016 +0530

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |  5 +++
 .../org/apache/hadoop/hive/ql/exec/DDLTask.java | 26 ++++++++++---
 .../clientpositive/msck_repair_batchsize.q      | 19 ++++++++++
 .../clientpositive/msck_repair_batchsize.q.out  | 40 ++++++++++++++++++++
 4 files changed, 85 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/d2092110/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index aa7647b..5f8daa3 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -3043,6 +3043,11 @@ public class HiveConf extends Configuration {
        "directories that are partition-like but contain unsupported 
characters. 'throw' (an " +
        "exception) is the default; 'skip' will skip the invalid directories 
and still repair the" +
        " others; 'ignore' will skip the validation (legacy behavior, causes 
bugs in many cases)"),
+    HIVE_MSCK_REPAIR_BATCH_SIZE(
+        "hive.msck.repair.batch.size", 0,
+        "Batch size for the msck repair command. If the value is greater than 
zero, "
+            + "it will execute batch wise with the configured batch size. "
+            + "The default value is zero. Zero means it will execute directly 
(Not batch wise)"),
     
HIVE_SERVER2_LLAP_CONCURRENT_QUERIES("hive.server2.llap.concurrent.queries", -1,
         "The number of queries allowed in parallel via llap. Negative number 
implies 'infinite'."),
     HIVE_TEZ_ENABLE_MEMORY_MANAGER("hive.tez.enable.memory.manager", true,

http://git-wip-us.apache.org/repos/asf/hive/blob/d2092110/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
index 39ac7d6..a59b781 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
@@ -1865,12 +1865,28 @@ public class DDLTask extends Task<DDLWork> implements 
Serializable {
         AddPartitionDesc apd = new AddPartitionDesc(
             table.getDbName(), table.getTableName(), false);
         try {
-          for (CheckResult.PartitionResult part : partsNotInMs) {
-            
apd.addPartition(Warehouse.makeSpecFromName(part.getPartitionName()), null);
-            repairOutput.add("Repair: Added partition to metastore "
-                + msckDesc.getTableName() + ':' + part.getPartitionName());
+          int batch_size = 
conf.getIntVar(ConfVars.HIVE_MSCK_REPAIR_BATCH_SIZE);
+          if (batch_size > 0 && partsNotInMs.size() > batch_size) {
+            int counter = 0;
+            for (CheckResult.PartitionResult part : partsNotInMs) {
+              counter++;
+              
apd.addPartition(Warehouse.makeSpecFromName(part.getPartitionName()), null);
+              repairOutput.add("Repair: Added partition to metastore " + 
msckDesc.getTableName()
+                  + ':' + part.getPartitionName());
+              if (counter == batch_size) {
+                db.createPartitions(apd);
+                apd = new AddPartitionDesc(table.getDbName(), 
table.getTableName(), false);
+                counter = 0;
+              }
+            }
+          } else {
+            for (CheckResult.PartitionResult part : partsNotInMs) {
+              
apd.addPartition(Warehouse.makeSpecFromName(part.getPartitionName()), null);
+              repairOutput.add("Repair: Added partition to metastore " + 
msckDesc.getTableName()
+                  + ':' + part.getPartitionName());
+            }
+            db.createPartitions(apd);
           }
-          db.createPartitions(apd);
         } catch (Exception e) {
           LOG.info("Could not bulk-add partitions to metastore; trying one by 
one", e);
           repairOutput.clear();

http://git-wip-us.apache.org/repos/asf/hive/blob/d2092110/ql/src/test/queries/clientpositive/msck_repair_batchsize.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/msck_repair_batchsize.q 
b/ql/src/test/queries/clientpositive/msck_repair_batchsize.q
new file mode 100644
index 0000000..60970e2
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/msck_repair_batchsize.q
@@ -0,0 +1,19 @@
+set hive.msck.repair.batch.size=1;
+
+DROP TABLE IF EXISTS repairtable;
+
+CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING, p2 STRING);
+
+MSCK TABLE repairtable;
+
+dfs ${system:test.dfs.mkdir} 
${system:test.warehouse.dir}/repairtable/p1=a/p2=a;
+dfs ${system:test.dfs.mkdir} 
${system:test.warehouse.dir}/repairtable/p1=b/p2=a;
+dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=b/p2=a/datafile;
+
+MSCK TABLE default.repairtable;
+
+MSCK REPAIR TABLE default.repairtable;
+
+MSCK TABLE repairtable;
+
+DROP TABLE default.repairtable;

http://git-wip-us.apache.org/repos/asf/hive/blob/d2092110/ql/src/test/results/clientpositive/msck_repair_batchsize.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/msck_repair_batchsize.q.out 
b/ql/src/test/results/clientpositive/msck_repair_batchsize.q.out
new file mode 100644
index 0000000..86ac031
--- /dev/null
+++ b/ql/src/test/results/clientpositive/msck_repair_batchsize.q.out
@@ -0,0 +1,40 @@
+PREHOOK: query: DROP TABLE IF EXISTS repairtable
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS repairtable
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 
STRING, p2 STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@repairtable
+POSTHOOK: query: CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 
STRING, p2 STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@repairtable
+PREHOOK: query: MSCK TABLE repairtable
+PREHOOK: type: MSCK
+POSTHOOK: query: MSCK TABLE repairtable
+POSTHOOK: type: MSCK
+PREHOOK: query: MSCK TABLE default.repairtable
+PREHOOK: type: MSCK
+POSTHOOK: query: MSCK TABLE default.repairtable
+POSTHOOK: type: MSCK
+Partitions not in metastore:   repairtable:p1=a/p2=a   repairtable:p1=b/p2=a
+PREHOOK: query: MSCK REPAIR TABLE default.repairtable
+PREHOOK: type: MSCK
+POSTHOOK: query: MSCK REPAIR TABLE default.repairtable
+POSTHOOK: type: MSCK
+Partitions not in metastore:   repairtable:p1=a/p2=a   repairtable:p1=b/p2=a
+Repair: Added partition to metastore default.repairtable:p1=a/p2=a
+Repair: Added partition to metastore default.repairtable:p1=b/p2=a
+PREHOOK: query: MSCK TABLE repairtable
+PREHOOK: type: MSCK
+POSTHOOK: query: MSCK TABLE repairtable
+POSTHOOK: type: MSCK
+PREHOOK: query: DROP TABLE default.repairtable
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@repairtable
+PREHOOK: Output: default@repairtable
+POSTHOOK: query: DROP TABLE default.repairtable
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@repairtable
+POSTHOOK: Output: default@repairtable

Reply via email to