HBASE-12773 Add warning message when user is trying to bulkload a large HFile (Srikanth Srungarapu)
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/358e6002 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/358e6002 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/358e6002 Branch: refs/heads/0.98 Commit: 358e60027502f7534a707091d7e53081cbfeb824 Parents: 5e36e70 Author: Matteo Bertozzi <[email protected]> Authored: Fri Jan 16 10:24:33 2015 +0000 Committer: Matteo Bertozzi <[email protected]> Committed: Fri Jan 16 10:24:33 2015 +0000 ---------------------------------------------------------------------- .../hbase/mapreduce/LoadIncrementalHFiles.java | 22 ++++++++++++-------- .../hadoop/hbase/regionserver/HStore.java | 9 ++++++-- 2 files changed, 20 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/358e6002/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java index 1d8282d..cf7a92c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java @@ -36,6 +36,7 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.TreeMap; +import java.util.UUID; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; @@ -43,20 +44,17 @@ import java.util.concurrent.Future; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicLong; import org.apache.commons.lang.mutable.MutableInt; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.hbase.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; @@ -64,6 +62,8 @@ import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableNotFoundException; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.classification.InterfaceStability; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.HTable; @@ -71,7 +71,6 @@ import org.apache.hadoop.hbase.client.RegionServerCallable; import org.apache.hadoop.hbase.client.RpcRetryingCallerFactory; import org.apache.hadoop.hbase.client.coprocessor.SecureBulkLoadClient; import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; -import org.apache.hadoop.hbase.io.HFileLink; import org.apache.hadoop.hbase.io.HalfStoreFileReader; import org.apache.hadoop.hbase.io.Reference; import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; @@ -97,8 +96,6 @@ import com.google.common.collect.Multimap; import com.google.common.collect.Multimaps; import com.google.common.util.concurrent.ThreadFactoryBuilder; -import java.util.UUID; - /** * Tool to load the output of HFileOutputFormat into an existing table. * @see #usage() @@ -207,9 +204,16 @@ public class LoadIncrementalHFiles extends Configured implements Tool { continue; } byte[] family = familyDir.getName().getBytes(); - Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir)); - for (Path hfile : hfiles) { + FileStatus[] hfileStatuses = fs.listStatus(familyDir); + for (FileStatus hfileStatus : hfileStatuses) { + long length = hfileStatus.getLen(); + Path hfile = hfileStatus.getPath(); if (hfile.getName().startsWith("_")) continue; + if(length > getConf().getLong(HConstants.HREGION_MAX_FILESIZE, + HConstants.DEFAULT_MAX_FILE_SIZE)) { + LOG.warn("Trying to bulk load hfile " + hfofDir.toString() + " with size: " + + length + " bytes can be problematic as it may lead to oversplitting."); + } ret.add(new LoadQueueItem(family, hfile)); } } http://git-wip-us.apache.org/repos/asf/hbase/blob/358e6002/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java index 79942d4..f6492aa 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hbase.regionserver; import java.io.IOException; -import java.io.FileNotFoundException; import java.io.InterruptedIOException; import java.net.InetSocketAddress; import java.security.Key; @@ -45,7 +44,6 @@ import java.util.concurrent.locks.ReentrantReadWriteLock; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -60,6 +58,7 @@ import org.apache.hadoop.hbase.RemoteExceptionHandler; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.Tag; import org.apache.hadoop.hbase.TagType; +import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.crypto.Cipher; @@ -645,6 +644,12 @@ public class HStore implements Store { + this.getRegionInfo().getRegionNameAsString()); } + if(reader.length() > conf.getLong(HConstants.HREGION_MAX_FILESIZE, + HConstants.DEFAULT_MAX_FILE_SIZE)) { + LOG.warn("Trying to bulk load hfile " + srcPath.toString() + " with size: " + + reader.length() + " bytes can be problematic as it may lead to oversplitting."); + } + if (verifyBulkLoads) { long verificationStartTime = EnvironmentEdgeManager.currentTimeMillis(); LOG.info("Full verification started for bulk load hfile: " + srcPath.toString());
