HBASE-13932 - Add mob integrity check in HFilePrettyPrinter (Jingcheng du)
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/ba4ba32b Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/ba4ba32b Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/ba4ba32b Branch: refs/heads/master Commit: ba4ba32b0dd5166b1cc2862e55e5c1c6eacfdf43 Parents: c4437e2 Author: ramkrishna <[email protected]> Authored: Wed Jun 24 09:22:44 2015 +0530 Committer: ramkrishna <[email protected]> Committed: Wed Jun 24 09:22:44 2015 +0530 ---------------------------------------------------------------------- .../hbase/io/hfile/HFilePrettyPrinter.java | 100 ++++++++++++++++++- 1 file changed, 99 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/ba4ba32b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java index 7cc31d0..dc12762 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java @@ -24,9 +24,13 @@ import java.io.DataInput; import java.io.IOException; import java.io.PrintStream; import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashSet; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Set; import java.util.SortedMap; import org.apache.commons.cli.CommandLine; @@ -57,12 +61,14 @@ import org.apache.hadoop.hbase.KeyValueUtil; import org.apache.hadoop.hbase.Tag; import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo; +import org.apache.hadoop.hbase.mob.MobUtils; import org.apache.hadoop.hbase.regionserver.TimeRangeTracker; import org.apache.hadoop.hbase.util.BloomFilter; import org.apache.hadoop.hbase.util.BloomFilterUtil; import org.apache.hadoop.hbase.util.BloomFilterFactory; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.HFileArchiveUtil; import org.apache.hadoop.hbase.util.Writables; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; @@ -95,6 +101,10 @@ public class HFilePrettyPrinter extends Configured implements Tool { private boolean checkRow; private boolean checkFamily; private boolean isSeekToRow = false; + private boolean checkMobIntegrity = false; + private Map<String, List<Path>> mobFileLocations; + private static final int FOUND_MOB_FILES_CACHE_CAPACITY = 50; + private static final int MISSING_MOB_FILES_CACHE_CAPACITY = 20; /** * The row which the user wants to specify and print all the KeyValues for. @@ -130,6 +140,8 @@ public class HFilePrettyPrinter extends Configured implements Tool { options.addOption("w", "seekToRow", true, "Seek to this row and print all the kvs for this row only"); options.addOption("s", "stats", false, "Print statistics"); + options.addOption("i", "checkMobIntegrity", false, + "Print all cells whose mob files are missing"); OptionGroup files = new OptionGroup(); files.addOption(new Option("f", "file", true, @@ -158,6 +170,7 @@ public class HFilePrettyPrinter extends Configured implements Tool { printStats = cmd.hasOption("s"); checkRow = cmd.hasOption("k"); checkFamily = cmd.hasOption("a"); + checkMobIntegrity = cmd.hasOption("i"); if (cmd.hasOption("f")) { files.add(new Path(cmd.getOptionValue("f"))); @@ -199,6 +212,12 @@ public class HFilePrettyPrinter extends Configured implements Tool { files.addAll(regionFiles); } + if(checkMobIntegrity) { + if (verbose) { + System.out.println("checkMobIntegrity is enabled"); + } + mobFileLocations = new HashMap<String, List<Path>>(); + } return true; } @@ -255,7 +274,7 @@ public class HFilePrettyPrinter extends Configured implements Tool { KeyValueStatsCollector fileStats = null; - if (verbose || printKey || checkRow || checkFamily || printStats) { + if (verbose || printKey || checkRow || checkFamily || printStats || checkMobIntegrity) { // scan over file and read key/value's and check if requested HFileScanner scanner = reader.getScanner(false, false, false); fileStats = new KeyValueStatsCollector(); @@ -313,6 +332,9 @@ public class HFilePrettyPrinter extends Configured implements Tool { private void scanKeysValues(Path file, KeyValueStatsCollector fileStats, HFileScanner scanner, byte[] row) throws IOException { Cell pCell = null; + FileSystem fs = FileSystem.get(getConf()); + Set<String> foundMobFiles = new LinkedHashSet<String>(FOUND_MOB_FILES_CACHE_CAPACITY); + Set<String> missingMobFiles = new LinkedHashSet<String>(MISSING_MOB_FILES_CACHE_CAPACITY); do { Cell cell = scanner.getKeyValue(); if (row != null && row.length != 0) { @@ -369,12 +391,88 @@ public class HFilePrettyPrinter extends Configured implements Tool { + "\n\tcurrent -> " + CellUtil.getCellKeyAsString(cell)); } } + // check if mob files are missing. + if (checkMobIntegrity && MobUtils.isMobReferenceCell(cell)) { + Tag tnTag = MobUtils.getTableNameTag(cell); + if (tnTag == null) { + System.err.println("ERROR, wrong tag format in mob reference cell " + + CellUtil.getCellKeyAsString(cell)); + } else if (!MobUtils.hasValidMobRefCellValue(cell)) { + System.err.println("ERROR, wrong value format in mob reference cell " + + CellUtil.getCellKeyAsString(cell)); + } else { + TableName tn = TableName.valueOf(tnTag.getValue()); + String mobFileName = MobUtils.getMobFileName(cell); + boolean exist = mobFileExists(fs, tn, mobFileName, + Bytes.toString(CellUtil.cloneFamily(cell)), foundMobFiles, missingMobFiles); + if (!exist) { + // report error + System.err.println("ERROR, the mob file [" + mobFileName + + "] is missing referenced by cell " + CellUtil.getCellKeyAsString(cell)); + } + } + } pCell = cell; ++count; } while (scanner.next()); } /** + * Checks whether the referenced mob file exists. + */ + private boolean mobFileExists(FileSystem fs, TableName tn, String mobFileName, String family, + Set<String> foundMobFiles, Set<String> missingMobFiles) throws IOException { + if (foundMobFiles.contains(mobFileName)) { + return true; + } + if (missingMobFiles.contains(mobFileName)) { + return false; + } + String tableName = tn.getNameAsString(); + List<Path> locations = mobFileLocations.get(tableName); + if (locations == null) { + locations = new ArrayList<Path>(2); + locations.add(MobUtils.getMobFamilyPath(getConf(), tn, family)); + locations.add(HFileArchiveUtil.getStoreArchivePath(getConf(), tn, + MobUtils.getMobRegionInfo(tn).getEncodedName(), family)); + mobFileLocations.put(tn.getNameAsString(), locations); + } + boolean exist = false; + for (Path location : locations) { + Path mobFilePath = new Path(location, mobFileName); + if (fs.exists(mobFilePath)) { + exist = true; + break; + } + } + if (exist) { + evictMobFilesIfNecessary(foundMobFiles, FOUND_MOB_FILES_CACHE_CAPACITY); + foundMobFiles.add(mobFileName); + } else { + evictMobFilesIfNecessary(missingMobFiles, MISSING_MOB_FILES_CACHE_CAPACITY); + missingMobFiles.add(mobFileName); + } + return exist; + } + + /** + * Evicts the cached mob files if the set is larger than the limit. + */ + private void evictMobFilesIfNecessary(Set<String> mobFileNames, int limit) { + if (mobFileNames.size() < limit) { + return; + } + int index = 0; + int evict = limit / 2; + Iterator<String> fileNamesItr = mobFileNames.iterator(); + while (index < evict && fileNamesItr.hasNext()) { + fileNamesItr.next(); + fileNamesItr.remove(); + index++; + } + } + + /** * Format a string of the form "k1=v1, k2=v2, ..." into separate lines * with a four-space indentation. */
