HBASE-13806 Check the mob files when there are mob-enabled columns in HFileCorruptionChecker. (Jingcheng)
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/13fe542b Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/13fe542b Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/13fe542b Branch: refs/heads/master Commit: 13fe542bccea2ac70f052017ac71b99b10dbcda1 Parents: efbef29 Author: anoopsjohn <[email protected]> Authored: Mon Jun 8 11:17:43 2015 +0530 Committer: anoopsjohn <[email protected]> Committed: Mon Jun 8 11:17:43 2015 +0530 ---------------------------------------------------------------------- .../hbase/util/hbck/HFileCorruptionChecker.java | 197 +++++++++++++++++++ .../apache/hadoop/hbase/util/TestHBaseFsck.java | 110 +++++++++++ 2 files changed, 307 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/13fe542b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/hbck/HFileCorruptionChecker.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/hbck/HFileCorruptionChecker.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/hbck/HFileCorruptionChecker.java index 23dc570..a1b7c2c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/hbck/HFileCorruptionChecker.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/hbck/HFileCorruptionChecker.java @@ -39,9 +39,11 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.CorruptHFileException; import org.apache.hadoop.hbase.io.hfile.HFile; +import org.apache.hadoop.hbase.mob.MobUtils; import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.FSUtils.FamilyDirFilter; import org.apache.hadoop.hbase.util.FSUtils.HFileFilter; @@ -68,8 +70,13 @@ public class HFileCorruptionChecker { final Set<Path> failures = new ConcurrentSkipListSet<Path>(); final Set<Path> quarantined = new ConcurrentSkipListSet<Path>(); final Set<Path> missing = new ConcurrentSkipListSet<Path>(); + final Set<Path> corruptedMobFiles = new ConcurrentSkipListSet<Path>(); + final Set<Path> failureMobFiles = new ConcurrentSkipListSet<Path>(); + final Set<Path> missedMobFiles = new ConcurrentSkipListSet<Path>(); + final Set<Path> quarantinedMobFiles = new ConcurrentSkipListSet<Path>(); final boolean inQuarantineMode; final AtomicInteger hfilesChecked = new AtomicInteger(); + final AtomicInteger mobFilesChecked = new AtomicInteger(); public HFileCorruptionChecker(Configuration conf, ExecutorService executor, boolean quarantine) throws IOException { @@ -177,6 +184,109 @@ public class HFileCorruptionChecker { } /** + * Check all files in a mob column family dir. + * + * @param cfDir + * mob column family directory + * @throws IOException + */ + protected void checkMobColFamDir(Path cfDir) throws IOException { + FileStatus[] hfs = null; + try { + hfs = fs.listStatus(cfDir, new HFileFilter(fs)); // use same filter as scanner. + } catch (FileNotFoundException fnfe) { + // Hadoop 0.23+ listStatus semantics throws an exception if the path does not exist. + LOG.warn("Mob colfam Directory " + cfDir + + " does not exist. Likely the table is deleted. Skipping."); + missedMobFiles.add(cfDir); + return; + } + + // Hadoop 1.0 listStatus does not throw an exception if the path does not exist. + if (hfs.length == 0 && !fs.exists(cfDir)) { + LOG.warn("Mob colfam Directory " + cfDir + + " does not exist. Likely the table is deleted. Skipping."); + missedMobFiles.add(cfDir); + return; + } + for (FileStatus hfFs : hfs) { + Path hf = hfFs.getPath(); + checkMobFile(hf); + } + } + + /** + * Checks a path to see if it is a valid mob file. + * + * @param p + * full Path to a mob file. + * @throws IOException + * This is a connectivity related exception + */ + protected void checkMobFile(Path p) throws IOException { + HFile.Reader r = null; + try { + r = HFile.createReader(fs, p, cacheConf, conf); + } catch (CorruptHFileException che) { + LOG.warn("Found corrupt mob file " + p, che); + corruptedMobFiles.add(p); + if (inQuarantineMode) { + Path dest = createQuarantinePath(p); + LOG.warn("Quarantining corrupt mob file " + p + " into " + dest); + boolean success = fs.mkdirs(dest.getParent()); + success = success ? fs.rename(p, dest): false; + if (!success) { + failureMobFiles.add(p); + } else { + quarantinedMobFiles.add(dest); + } + } + return; + } catch (FileNotFoundException fnfe) { + LOG.warn("Mob file " + p + " was missing. Likely removed due to compaction?"); + missedMobFiles.add(p); + } finally { + mobFilesChecked.addAndGet(1); + if (r != null) { + r.close(true); + } + } + } + + /** + * Checks all the mob files of a table. + * @param regionDir The mob region directory + * @throws IOException + */ + private void checkMobRegionDir(Path regionDir) throws IOException { + if (!fs.exists(regionDir)) { + return; + } + FileStatus[] hfs = null; + try { + hfs = fs.listStatus(regionDir, new FamilyDirFilter(fs)); + } catch (FileNotFoundException fnfe) { + // Hadoop 0.23+ listStatus semantics throws an exception if the path does not exist. + LOG.warn("Mob directory " + regionDir + + " does not exist. Likely the table is deleted. Skipping."); + missedMobFiles.add(regionDir); + return; + } + + // Hadoop 1.0 listStatus does not throw an exception if the path does not exist. + if (hfs.length == 0 && !fs.exists(regionDir)) { + LOG.warn("Mob directory " + regionDir + + " does not exist. Likely the table is deleted. Skipping."); + missedMobFiles.add(regionDir); + return; + } + for (FileStatus hfFs : hfs) { + Path hf = hfFs.getPath(); + checkMobColFamDir(hf); + } + } + + /** * Check all column families in a region dir. * * @param regionDir @@ -236,6 +346,8 @@ public class HFileCorruptionChecker { rdcs.add(work); } + // add mob region + rdcs.add(createMobRegionDirChecker(tableDir)); // Submit and wait for completion try { rdFutures = executor.invokeAll(rdcs); @@ -293,6 +405,34 @@ public class HFileCorruptionChecker { } /** + * An individual work item for parallelized mob dir processing. This is + * intentionally an inner class so it can use the shared error sets and fs. + */ + private class MobRegionDirChecker extends RegionDirChecker { + + MobRegionDirChecker(Path regionDir) { + super(regionDir); + } + + @Override + public Void call() throws IOException { + checkMobRegionDir(regionDir); + return null; + } + } + + /** + * Creates an instance of MobRegionDirChecker. + * @param tableDir The current table directory. + * @return An instance of MobRegionDirChecker. + */ + private MobRegionDirChecker createMobRegionDirChecker(Path tableDir) { + TableName tableName = FSUtils.getTableName(tableDir); + Path mobDir = MobUtils.getMobRegionPath(conf, tableName); + return new MobRegionDirChecker(mobDir); + } + + /** * Check the specified table dirs for bad hfiles. */ public void checkTables(Collection<Path> tables) throws IOException { @@ -338,6 +478,42 @@ public class HFileCorruptionChecker { } /** + * @return the set of check failure mob file paths after checkTables is called. + */ + public Collection<Path> getFailureMobFiles() { + return new HashSet<Path>(failureMobFiles); + } + + /** + * @return the set of corrupted mob file paths after checkTables is called. + */ + public Collection<Path> getCorruptedMobFiles() { + return new HashSet<Path>(corruptedMobFiles); + } + + /** + * @return number of mob files checked in the last HfileCorruptionChecker run + */ + public int getMobFilesChecked() { + return mobFilesChecked.get(); + } + + /** + * @return the set of successfully quarantined paths after checkTables is called. + */ + public Collection<Path> getQuarantinedMobFiles() { + return new HashSet<Path>(quarantinedMobFiles); + } + + /** + * @return the set of paths that were missing. Likely due to table deletion or + * deletion/moves from compaction. + */ + public Collection<Path> getMissedMobFiles() { + return new HashSet<Path>(missedMobFiles); + } + + /** * Print a human readable summary of hfile quarantining operations. * @param out */ @@ -363,10 +539,31 @@ public class HFileCorruptionChecker { String fixedState = (corrupted.size() == quarantined.size()) ? "OK" : "CORRUPTED"; + // print mob-related report + if (inQuarantineMode) { + out.print(" Mob files successfully quarantined: " + quarantinedMobFiles.size()); + for (Path sq : quarantinedMobFiles) { + out.print(" " + sq); + } + out.print(" Mob files failed quarantine: " + failureMobFiles.size()); + for (Path fq : failureMobFiles) { + out.print(" " + fq); + } + } + out.print(" Mob files moved while checking: " + missedMobFiles.size()); + for (Path mq : missedMobFiles) { + out.print(" " + mq); + } + String initialMobState = (corruptedMobFiles.size() == 0) ? "OK" : "CORRUPTED"; + String fixedMobState = (corruptedMobFiles.size() == quarantinedMobFiles.size()) ? "OK" + : "CORRUPTED"; + if (inQuarantineMode) { out.print("Summary: " + initialState + " => " + fixedState); + out.print("Mob summary: " + initialMobState + " => " + fixedMobState); } else { out.print("Summary: " + initialState); + out.print("Mob summary: " + initialMobState); } } } http://git-wip-us.apache.org/repos/asf/hbase/blob/13fe542b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java index 28b80ff..9938df7 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java @@ -39,6 +39,7 @@ import java.util.List; import java.util.Map; import java.util.NavigableMap; import java.util.Set; +import java.util.UUID; import java.util.concurrent.Callable; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; @@ -95,6 +96,8 @@ import org.apache.hadoop.hbase.master.RegionState; import org.apache.hadoop.hbase.master.RegionStates; import org.apache.hadoop.hbase.master.TableLockManager; import org.apache.hadoop.hbase.master.TableLockManager.TableLock; +import org.apache.hadoop.hbase.mob.MobFileName; +import org.apache.hadoop.hbase.mob.MobUtils; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.AdminProtos; import org.apache.hadoop.hbase.regionserver.HRegion; @@ -438,6 +441,31 @@ public class TestHBaseFsck { } /** + * Setup a clean table with a mob-enabled column. + * + * @param tableName The name of a table to be created. + * @throws Exception + */ + void setupMobTable(TableName tablename) throws Exception { + HTableDescriptor desc = new HTableDescriptor(tablename); + HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM)); + hcd.setMobEnabled(true); + hcd.setMobThreshold(0); + desc.addFamily(hcd); // If a table has no CF's it doesn't get checked + createTable(TEST_UTIL, desc, SPLITS); + + tbl = (HTable) connection.getTable(tablename, tableExecutorService); + List<Put> puts = new ArrayList<Put>(); + for (byte[] row : ROWKEYS) { + Put p = new Put(row); + p.add(FAM, Bytes.toBytes("val"), row); + puts.add(p); + } + tbl.put(puts); + tbl.flushCommits(); + } + + /** * Counts the number of row to verify data loss or non-dataloss. */ int countRows() throws IOException { @@ -2121,6 +2149,44 @@ public class TestHBaseFsck { } /** + * Gets flushed mob files. + * @param fs The current file system. + * @param table The current table name. + * @return Path of a flushed hfile. + * @throws IOException + */ + Path getFlushedMobFile(FileSystem fs, TableName table) throws IOException { + Path regionDir = MobUtils.getMobRegionPath(conf, table); + Path famDir = new Path(regionDir, FAM_STR); + + // keep doing this until we get a legit hfile + while (true) { + FileStatus[] hfFss = fs.listStatus(famDir); + if (hfFss.length == 0) { + continue; + } + for (FileStatus hfs : hfFss) { + if (!hfs.isDirectory()) { + return hfs.getPath(); + } + } + } + } + + /** + * Creates a new mob file name by the old one. + * @param oldFileName The old mob file name. + * @return The new mob file name. + */ + String createMobFileName(String oldFileName) { + MobFileName mobFileName = MobFileName.create(oldFileName); + String startKey = mobFileName.getStartKey(); + String date = mobFileName.getDate(); + return MobFileName.create(startKey, date, UUID.randomUUID().toString().replaceAll("-", "")) + .getFileName(); + } + + /** * This creates a table and then corrupts an hfile. Hbck should quarantine the file. */ @Test(timeout=180000) @@ -2161,6 +2227,50 @@ public class TestHBaseFsck { } /** + * This creates a table and then corrupts a mob file. Hbck should quarantine the file. + */ + @Test(timeout=180000) + public void testQuarantineCorruptMobFile() throws Exception { + TableName table = TableName.valueOf(name.getMethodName()); + try { + setupMobTable(table); + assertEquals(ROWKEYS.length, countRows()); + admin.flush(table); + + FileSystem fs = FileSystem.get(conf); + Path mobFile = getFlushedMobFile(fs, table); + admin.disableTable(table); + // create new corrupt mob file. + String corruptMobFile = createMobFileName(mobFile.getName()); + Path corrupt = new Path(mobFile.getParent(), corruptMobFile); + TestHFile.truncateFile(fs, mobFile, corrupt); + LOG.info("Created corrupted mob file " + corrupt); + HBaseFsck.debugLsr(conf, FSUtils.getRootDir(conf)); + HBaseFsck.debugLsr(conf, MobUtils.getMobHome(conf)); + + // A corrupt mob file doesn't abort the start of regions, so we can enable the table. + admin.enableTable(table); + HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, table); + assertEquals(res.getRetCode(), 0); + HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker(); + assertEquals(hfcc.getHFilesChecked(), 4); + assertEquals(hfcc.getCorrupted().size(), 0); + assertEquals(hfcc.getFailures().size(), 0); + assertEquals(hfcc.getQuarantined().size(), 0); + assertEquals(hfcc.getMissing().size(), 0); + assertEquals(hfcc.getMobFilesChecked(), 5); + assertEquals(hfcc.getCorruptedMobFiles().size(), 1); + assertEquals(hfcc.getFailureMobFiles().size(), 0); + assertEquals(hfcc.getQuarantinedMobFiles().size(), 1); + assertEquals(hfcc.getMissedMobFiles().size(), 0); + String quarantinedMobFile = hfcc.getQuarantinedMobFiles().iterator().next().getName(); + assertEquals(corruptMobFile, quarantinedMobFile); + } finally { + cleanupTable(table); + } + } + + /** * Test that use this should have a timeout, because this method could potentially wait forever. */ private void doQuarantineTest(TableName table, HBaseFsck hbck, int check,
