Repository: hbase
Updated Branches:
refs/heads/branch-1 ec3d7189a -> 2b56169ad
HBASE-13576 HBCK enhancement: Failure in checking one region should not fail
the entire HBCK operation. (Stephen Yuan Jiang)
Conflicts:
hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/2b56169a
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/2b56169a
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/2b56169a
Branch: refs/heads/branch-1
Commit: 2b56169ad60c89015466957f7c6bdd6fe5b8d703
Parents: ec3d718
Author: Enis Soztutar <[email protected]>
Authored: Wed May 6 12:08:36 2015 -0700
Committer: Enis Soztutar <[email protected]>
Committed: Wed May 6 12:47:59 2015 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hbase/util/HBaseFsck.java | 66 +++++++++++++++++---
.../hadoop/hbase/util/HBaseFsckRepair.java | 6 +-
.../apache/hadoop/hbase/util/TestHBaseFsck.java | 1 +
3 files changed, 61 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/2b56169a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
----------------------------------------------------------------------
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
index 2971643..b4548f6 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
@@ -215,7 +215,7 @@ public class HBaseFsck extends Configured implements
Closeable {
private Table meta;
// threads to do ||izable tasks: retrieve data from regionservers, handle
overlapping regions
protected ExecutorService executor;
- private long startMillis = System.currentTimeMillis();
+ private long startMillis = EnvironmentEdgeManager.currentTime();
private HFileCorruptionChecker hfcc;
private int retcode = 0;
private Path HBCK_LOCK_PATH;
@@ -297,6 +297,7 @@ public class HBaseFsck extends Configured implements
Closeable {
private Map<TableName, Set<String>> orphanTableDirs =
new HashMap<TableName, Set<String>>();
+ private Map<TableName, Set<String>> skippedRegions = new HashMap<TableName,
Set<String>>();
/**
* List of orphaned table ZNodes
@@ -566,6 +567,7 @@ public class HBaseFsck extends Configured implements
Closeable {
errors.clear();
tablesInfo.clear();
orphanHdfsDirs.clear();
+ skippedRegions.clear();
}
/**
@@ -1717,7 +1719,7 @@ public class HBaseFsck extends Configured implements
Closeable {
return false;
}
ServerName sn = metaLocation.getServerName();
- MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn,
System.currentTimeMillis());
+ MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn,
EnvironmentEdgeManager.currentTime());
HbckInfo hbckInfo =
regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName());
if (hbckInfo == null) {
regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new
HbckInfo(m));
@@ -1815,6 +1817,17 @@ public class HBaseFsck extends Configured implements
Closeable {
}
checkRegionConsistencyConcurrently(replicaWorkItems);
setCheckHdfs(prevHdfsCheck);
+
+ // If some regions is skipped during checkRegionConsistencyConcurrently()
phase, we might
+ // not get accurate state of the hbase if continuing. The config here
allows users to tune
+ // the tolerance of number of skipped region.
+ // TODO: evaluate the consequence to continue the hbck operation without
config.
+ int terminateThreshold =
getConf().getInt("hbase.hbck.skipped.regions.limit", 0);
+ int numOfSkippedRegions = skippedRegions.size();
+ if (numOfSkippedRegions > 0 && numOfSkippedRegions > terminateThreshold) {
+ throw new IOException(numOfSkippedRegions
+ + " region(s) could not be checked or repaired. See logs for
detail.");
+ }
}
/**
@@ -1857,11 +1870,32 @@ public class HBaseFsck extends Configured implements
Closeable {
@Override
public synchronized Void call() throws Exception {
- checkRegionConsistency(key, hbi);
+ try {
+ checkRegionConsistency(key, hbi);
+ } catch (Exception e) {
+ // If the region is non-META region, skip this region and send
warning/error message; if
+ // the region is META region, we should not continue.
+ LOG.warn("Unable to complete check or repair the region '" +
hbi.getRegionNameAsString()
+ + "'.", e);
+ if (hbi.getHdfsHRI().isMetaRegion()) {
+ throw e;
+ }
+ LOG.warn("Skip region '" + hbi.getRegionNameAsString() + "'");
+ addSkippedRegion(hbi);
+ }
return null;
}
}
-
+
+ private void addSkippedRegion(final HbckInfo hbi) {
+ Set<String> skippedRegionNames = skippedRegions.get(hbi.getTableName());
+ if (skippedRegionNames == null) {
+ skippedRegionNames = new HashSet<String>();
+ }
+ skippedRegionNames.add(hbi.getRegionNameAsString());
+ skippedRegions.put(hbi.getTableName(), skippedRegionNames);
+ }
+
private void preCheckPermission() throws IOException, AccessDeniedException {
if (shouldIgnorePreCheckPermission()) {
return;
@@ -2106,7 +2140,7 @@ public class HBaseFsck extends Configured implements
Closeable {
(hbi.metaEntry == null)? false: hbi.metaEntry.isSplit() &&
hbi.metaEntry.isOffline();
boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry);
boolean recentlyModified = inHdfs &&
- hbi.getModTime() + timelag > System.currentTimeMillis();
+ hbi.getModTime() + timelag > EnvironmentEdgeManager.currentTime();
// ========== First the healthy cases =============
if (hbi.containsOnlyHdfsEdits()) {
@@ -3113,7 +3147,7 @@ public class HBaseFsck extends Configured implements
Closeable {
*/
HTableDescriptor[] getTables(AtomicInteger numSkipped) {
List<TableName> tableNames = new ArrayList<TableName>();
- long now = System.currentTimeMillis();
+ long now = EnvironmentEdgeManager.currentTime();
for (HbckInfo hbi : regionInfoMap.values()) {
MetaEntry info = hbi.metaEntry;
@@ -3716,14 +3750,30 @@ public class HBaseFsck extends Configured implements
Closeable {
*/
private void printTableSummary(SortedMap<TableName, TableInfo> tablesInfo) {
StringBuilder sb = new StringBuilder();
+ int numOfSkippedRegions;
errors.print("Summary:");
for (TableInfo tInfo : tablesInfo.values()) {
+ numOfSkippedRegions = (skippedRegions.containsKey(tInfo.getName())) ?
+ skippedRegions.get(tInfo.getName()).size() : 0;
+
if (errors.tableHasErrors(tInfo)) {
errors.print("Table " + tInfo.getName() + " is inconsistent.");
- } else {
- errors.print(" " + tInfo.getName() + " is okay.");
+ } else if (numOfSkippedRegions > 0){
+ errors.print("Table " + tInfo.getName() + " is okay (with "
+ + numOfSkippedRegions + " skipped regions).");
+ }
+ else {
+ errors.print("Table " + tInfo.getName() + " is okay.");
}
errors.print(" Number of regions: " + tInfo.getNumRegions());
+ if (numOfSkippedRegions > 0) {
+ Set<String> skippedRegionStrings = skippedRegions.get(tInfo.getName());
+ System.out.println(" Number of skipped regions: " +
numOfSkippedRegions);
+ System.out.println(" List of skipped regions:");
+ for(String sr : skippedRegionStrings) {
+ System.out.println(" " + sr);
+ }
+ }
sb.setLength(0); // clear out existing buffer, if any.
sb.append(" Deployed on: ");
for (ServerName server : tInfo.deployedOn) {
http://git-wip-us.apache.org/repos/asf/hbase/blob/2b56169a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java
----------------------------------------------------------------------
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java
index d21bda1..7d8f67f 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java
@@ -31,7 +31,6 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MetaTableAccessor;
-import org.apache.hadoop.hbase.NotServingRegionException;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
@@ -41,7 +40,6 @@ import org.apache.hadoop.hbase.client.ClusterConnection;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.HConnection;
-import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.master.RegionState;
@@ -129,8 +127,8 @@ public class HBaseFsckRepair {
public static void waitUntilAssigned(Admin admin,
HRegionInfo region) throws IOException, InterruptedException {
long timeout =
admin.getConfiguration().getLong("hbase.hbck.assign.timeout", 120000);
- long expiration = timeout + System.currentTimeMillis();
- while (System.currentTimeMillis() < expiration) {
+ long expiration = timeout + EnvironmentEdgeManager.currentTime();
+ while (EnvironmentEdgeManager.currentTime() < expiration) {
try {
Map<String, RegionState> rits=
admin.getClusterStatus().getRegionsInTransition();
http://git-wip-us.apache.org/repos/asf/hbase/blob/2b56169a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
----------------------------------------------------------------------
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
index b1f7427..f8ea4ab 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
@@ -111,6 +111,7 @@ import
org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.regionserver.SplitTransactionImpl;
import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter;
import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
import org.apache.hadoop.hbase.util.HBaseFsck.HbckInfo;