HBASE-18038 Rename StoreFile to HStoreFile and add a StoreFile interface for CP


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/ee0f148c
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/ee0f148c
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/ee0f148c

Branch: refs/heads/master
Commit: ee0f148c730e0ae1cb616406166487fba78a2298
Parents: 8bfa8aa
Author: zhangduo <zhang...@apache.org>
Authored: Tue Jun 6 16:35:19 2017 +0800
Committer: zhangduo <zhang...@apache.org>
Committed: Tue Jun 6 20:36:38 2017 +0800

----------------------------------------------------------------------
 .../assignment/MergeTableRegionsProcedure.java  |    9 +-
 .../assignment/SplitTableRegionProcedure.java   |   13 +-
 .../master/balancer/StochasticLoadBalancer.java |    3 +-
 .../apache/hadoop/hbase/mob/CachedMobFile.java  |    3 +-
 .../org/apache/hadoop/hbase/mob/MobFile.java    |    3 +-
 .../org/apache/hadoop/hbase/mob/MobUtils.java   |    5 +-
 .../compactions/PartitionedMobCompactor.java    |   10 +-
 .../hbase/regionserver/CompactionTool.java      |   38 +-
 .../regionserver/DateTieredStoreEngine.java     |    2 +-
 .../hbase/regionserver/DefaultStoreEngine.java  |    2 +-
 .../regionserver/DefaultStoreFileManager.java   |    8 +-
 .../hadoop/hbase/regionserver/HMobStore.java    |    2 +-
 .../hadoop/hbase/regionserver/HRegion.java      |    6 +-
 .../hadoop/hbase/regionserver/HStore.java       |   30 +-
 .../hadoop/hbase/regionserver/HStoreFile.java   |  560 +++++++++
 .../hadoop/hbase/regionserver/StoreFile.java    |  736 ++----------
 .../regionserver/StoreFileComparators.java      |   96 ++
 .../hbase/regionserver/StoreFileInfo.java       |    1 -
 .../hbase/regionserver/StoreFileScanner.java    |   20 +-
 .../hbase/regionserver/StoreFileWriter.java     |   21 +-
 .../hadoop/hbase/regionserver/StoreScanner.java |    4 +-
 .../hadoop/hbase/regionserver/StoreUtils.java   |   94 +-
 .../regionserver/StripeStoreFileManager.java    |   20 +-
 .../compactions/CompactionRequest.java          |   45 +-
 .../regionserver/compactions/Compactor.java     |    4 +-
 .../compactions/DateTieredCompactionPolicy.java |   48 +-
 .../compactions/DateTieredCompactor.java        |    4 +-
 .../compactions/RatioBasedCompactionPolicy.java |    7 +-
 .../compactions/SortedCompactionPolicy.java     |   54 +-
 .../hadoop/hbase/snapshot/SnapshotManifest.java |   13 +-
 .../hadoop/hbase/util/BloomFilterFactory.java   |    2 +-
 .../hbase/coprocessor/SimpleRegionObserver.java |    2 +-
 .../hbase/mapreduce/TestHFileOutputFormat2.java |    5 +-
 .../apache/hadoop/hbase/mob/TestMobFile.java    |    6 +-
 .../hbase/mob/compactions/TestMobCompactor.java |    3 +-
 .../TestPartitionedMobCompactor.java            |   40 +-
 .../hbase/namespace/TestNamespaceAuditor.java   |    2 +
 .../AbstractTestDateTieredCompactionPolicy.java |    9 +-
 .../regionserver/DataBlockEncodingTool.java     |    2 +-
 .../EncodedSeekPerformanceTest.java             |    4 +-
 .../hbase/regionserver/MockStoreFile.java       |   59 +-
 .../regionserver/TestCacheOnWriteInSchema.java  |    2 +-
 .../TestCompactionArchiveConcurrentClose.java   |   16 +-
 .../TestCompactionArchiveIOException.java       |   11 +-
 .../regionserver/TestCompoundBloomFilter.java   |   10 +-
 .../regionserver/TestEncryptionKeyRotation.java |    8 +-
 .../TestEncryptionRandomKeying.java             |    8 +-
 .../hbase/regionserver/TestFSErrorsExposed.java |    8 +-
 .../hbase/regionserver/TestHMobStore.java       |   29 +-
 .../hadoop/hbase/regionserver/TestHRegion.java  |    1 -
 .../regionserver/TestHRegionReplayEvents.java   |   14 +-
 .../regionserver/TestHRegionServerBulkLoad.java |    8 +-
 .../hbase/regionserver/TestHStoreFile.java      | 1106 ++++++++++++++++++
 .../regionserver/TestMobStoreCompaction.java    |   15 +-
 .../hbase/regionserver/TestRegionReplicas.java  |    2 +-
 .../regionserver/TestReversibleScanners.java    |   14 +-
 .../TestSplitTransactionOnCluster.java          |    3 +-
 .../hadoop/hbase/regionserver/TestStore.java    |    5 +-
 .../hbase/regionserver/TestStoreFile.java       | 1106 ------------------
 .../regionserver/TestStripeStoreEngine.java     |    2 +
 .../TestStripeStoreFileManager.java             |   13 +-
 .../compactions/MockStoreFileGenerator.java     |    9 +-
 .../compactions/PerfTestCompactionPolicies.java |   21 +-
 .../TestCompactedHFilesDischarger.java          |   13 +-
 .../compactions/TestDateTieredCompactor.java    |    3 +-
 .../compactions/TestStripeCompactionPolicy.java |    8 +-
 .../visibility/TestVisibilityLabels.java        |    4 +-
 .../apache/hadoop/hbase/util/HFileTestUtil.java |   13 +-
 68 files changed, 2331 insertions(+), 2116 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java
index 2792ea2..83d5506 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java
@@ -56,7 +56,7 @@ import 
org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
 import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
 import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
-import org.apache.hadoop.hbase.regionserver.StoreFile;
+import org.apache.hadoop.hbase.regionserver.HStoreFile;
 import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
 import 
org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
@@ -605,11 +605,8 @@ public class MergeTableRegionsProcedure
         final CacheConfig cacheConf = new CacheConfig(conf, hcd);
         for (StoreFileInfo storeFileInfo: storeFiles) {
           // Create reference file(s) of the region in mergedDir
-          regionFs.mergeStoreFile(
-            mergedRegion,
-            family,
-            new StoreFile(
-              mfs.getFileSystem(), storeFileInfo, conf, cacheConf, 
hcd.getBloomFilterType()),
+          regionFs.mergeStoreFile(mergedRegion, family, new 
HStoreFile(mfs.getFileSystem(),
+              storeFileInfo, conf, cacheConf, hcd.getBloomFilterType(), true),
             mergedDir);
         }
       }

http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java
index a663608..2e2aa5d 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hbase.master.assignment;
 
+import com.google.common.annotations.VisibleForTesting;
+
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InterruptedIOException;
@@ -62,6 +64,7 @@ import 
org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
 import 
org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SplitTableRegionState;
 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
 import org.apache.hadoop.hbase.regionserver.HStore;
+import org.apache.hadoop.hbase.regionserver.HStoreFile;
 import org.apache.hadoop.hbase.regionserver.StoreFile;
 import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
 import 
org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse;
@@ -71,8 +74,6 @@ import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.Pair;
 import org.apache.hadoop.hbase.util.Threads;
 
-import com.google.common.annotations.VisibleForTesting;
-
 /**
  * The procedure to split a region in a table.
  * Takes lock on the parent region.
@@ -525,11 +526,9 @@ public class SplitTableRegionProcedure
       if (storeFiles != null && storeFiles.size() > 0) {
         final CacheConfig cacheConf = new CacheConfig(conf, hcd);
         for (StoreFileInfo storeFileInfo: storeFiles) {
-          StoreFileSplitter sfs = new StoreFileSplitter(
-            regionFs,
-            family.getBytes(),
-            new StoreFile(
-              mfs.getFileSystem(), storeFileInfo, conf, cacheConf, 
hcd.getBloomFilterType()));
+          StoreFileSplitter sfs =
+              new StoreFileSplitter(regionFs, family.getBytes(), new 
HStoreFile(mfs.getFileSystem(),
+                  storeFileInfo, conf, cacheConf, hcd.getBloomFilterType(), 
true));
           futures.add(threadPool.submit(sfs));
         }
       }

http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
index 4b96bc6..fd4b091 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
@@ -28,7 +28,6 @@ import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Random;
 
-import com.google.common.collect.Lists;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -51,6 +50,8 @@ import 
org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.SwapRegi
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 
+import com.google.common.collect.Lists;
+
 /**
  * <p>This is a best effort load balancer. Given a Cost function F(C) =&gt; x 
It will
  * randomly try and mutate the cluster to Cprime. If F(Cprime) &lt; F(C) then 
the

http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/CachedMobFile.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/CachedMobFile.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/CachedMobFile.java
index 90d1f2d..b7ebee3 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/CachedMobFile.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/CachedMobFile.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
 import org.apache.hadoop.hbase.regionserver.BloomType;
+import org.apache.hadoop.hbase.regionserver.HStoreFile;
 import org.apache.hadoop.hbase.regionserver.StoreFile;
 
 /**
@@ -46,7 +47,7 @@ public class CachedMobFile extends MobFile implements 
Comparable<CachedMobFile>
       CacheConfig cacheConf) throws IOException {
     // XXX: primaryReplica is only used for constructing the key of block 
cache so it is not a
     // critical problem if we pass the wrong value, so here we always pass 
true. Need to fix later.
-    StoreFile sf = new StoreFile(fs, path, conf, cacheConf, BloomType.NONE, 
true);
+    StoreFile sf = new HStoreFile(fs, path, conf, cacheConf, BloomType.NONE, 
true);
     return new CachedMobFile(sf);
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobFile.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobFile.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobFile.java
index 73355e8..7e3e36f 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobFile.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobFile.java
@@ -29,6 +29,7 @@ import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
 import org.apache.hadoop.hbase.regionserver.BloomType;
+import org.apache.hadoop.hbase.regionserver.HStoreFile;
 import org.apache.hadoop.hbase.regionserver.StoreFile;
 import org.apache.hadoop.hbase.regionserver.StoreFileScanner;
 
@@ -146,7 +147,7 @@ public class MobFile {
       throws IOException {
     // XXX: primaryReplica is only used for constructing the key of block 
cache so it is not a
     // critical problem if we pass the wrong value, so here we always pass 
true. Need to fix later.
-    StoreFile sf = new StoreFile(fs, path, conf, cacheConf, BloomType.NONE, 
true);
+    StoreFile sf = new HStoreFile(fs, path, conf, cacheConf, BloomType.NONE, 
true);
     return new MobFile(sf);
   }
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobUtils.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobUtils.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobUtils.java
index 06c5001..a869b7a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobUtils.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobUtils.java
@@ -69,6 +69,7 @@ import 
org.apache.hadoop.hbase.mob.compactions.PartitionedMobCompactionRequest.C
 import org.apache.hadoop.hbase.mob.compactions.PartitionedMobCompactor;
 import org.apache.hadoop.hbase.regionserver.BloomType;
 import org.apache.hadoop.hbase.regionserver.HStore;
+import org.apache.hadoop.hbase.regionserver.HStoreFile;
 import org.apache.hadoop.hbase.regionserver.StoreFile;
 import org.apache.hadoop.hbase.regionserver.StoreFileWriter;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -334,7 +335,7 @@ public final class MobUtils {
             LOG.debug(fileName + " is an expired file");
           }
           filesToClean
-              .add(new StoreFile(fs, file.getPath(), conf, cacheConfig, 
BloomType.NONE, true));
+              .add(new HStoreFile(fs, file.getPath(), conf, cacheConfig, 
BloomType.NONE, true));
         }
       } catch (Exception e) {
         LOG.error("Cannot parse the fileName " + fileName, e);
@@ -722,7 +723,7 @@ public final class MobUtils {
       CacheConfig cacheConfig, boolean primaryReplica) throws IOException {
     StoreFile storeFile = null;
     try {
-      storeFile = new StoreFile(fs, path, conf, cacheConfig, BloomType.NONE, 
primaryReplica);
+      storeFile = new HStoreFile(fs, path, conf, cacheConfig, BloomType.NONE, 
primaryReplica);
       storeFile.initReader();
     } catch (IOException e) {
       LOG.error("Failed to open mob file[" + path + "], keep it in temp 
directory.", e);

http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/compactions/PartitionedMobCompactor.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/compactions/PartitionedMobCompactor.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/compactions/PartitionedMobCompactor.java
index 05c7076..5fe0002 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/compactions/PartitionedMobCompactor.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/compactions/PartitionedMobCompactor.java
@@ -36,7 +36,6 @@ import java.util.concurrent.Callable;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Future;
 
-import com.google.common.annotations.VisibleForTesting;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -74,6 +73,7 @@ import 
org.apache.hadoop.hbase.mob.compactions.PartitionedMobCompactionRequest.C
 import 
org.apache.hadoop.hbase.mob.compactions.PartitionedMobCompactionRequest.CompactionPartitionId;
 import org.apache.hadoop.hbase.regionserver.BloomType;
 import org.apache.hadoop.hbase.regionserver.HStore;
+import org.apache.hadoop.hbase.regionserver.HStoreFile;
 import org.apache.hadoop.hbase.regionserver.ScanInfo;
 import org.apache.hadoop.hbase.regionserver.ScanType;
 import org.apache.hadoop.hbase.regionserver.ScannerContext;
@@ -87,6 +87,8 @@ import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.apache.hadoop.hbase.util.Pair;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /**
  * An implementation of {@link MobCompactor} that compacts the mob files in 
partitions.
  */
@@ -335,7 +337,7 @@ public class PartitionedMobCompactor extends MobCompactor {
       for (CompactionDelPartition delPartition : request.getDelPartitions()) {
         for (Path newDelPath : delPartition.listDelFiles()) {
           StoreFile sf =
-              new StoreFile(fs, newDelPath, conf, compactionCacheConfig, 
BloomType.NONE, true);
+              new HStoreFile(fs, newDelPath, conf, compactionCacheConfig, 
BloomType.NONE, true);
           // pre-create reader of a del file to avoid race condition when 
opening the reader in each
           // partition.
           sf.initReader();
@@ -551,7 +553,7 @@ public class PartitionedMobCompactor extends MobCompactor {
       // add the selected mob files and del files into filesToCompact
       List<StoreFile> filesToCompact = new ArrayList<>();
       for (int i = offset; i < batch + offset; i++) {
-        StoreFile sf = new StoreFile(fs, files.get(i).getPath(), conf, 
compactionCacheConfig,
+        StoreFile sf = new HStoreFile(fs, files.get(i).getPath(), conf, 
compactionCacheConfig,
             BloomType.NONE, true);
         filesToCompact.add(sf);
       }
@@ -733,7 +735,7 @@ public class PartitionedMobCompactor extends MobCompactor {
         continue;
       }
       for (int i = offset; i < batch + offset; i++) {
-        batchedDelFiles.add(new StoreFile(fs, delFilePaths.get(i), conf, 
compactionCacheConfig,
+        batchedDelFiles.add(new HStoreFile(fs, delFilePaths.get(i), conf, 
compactionCacheConfig,
           BloomType.NONE, true));
       }
       // compact the del files in a batch.

http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionTool.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionTool.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionTool.java
index bea3e7f..e1d2ea1 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionTool.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionTool.java
@@ -27,41 +27,39 @@ import java.util.Set;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.util.LineReader;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
 import org.apache.hadoop.hbase.HDFSBlocksDistribution;
-import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.regionserver.HRegion;
-import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
-import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
-import 
org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.hbase.mapreduce.JobUtil;
 import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
+import 
org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.apache.hadoop.hbase.util.FSTableDescriptors;
 import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.LineReader;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
 
 /*
  * The CompactionTool allows to execute a compaction specifying a:

http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DateTieredStoreEngine.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DateTieredStoreEngine.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DateTieredStoreEngine.java
index 2d86e39..7fe4c22 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DateTieredStoreEngine.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DateTieredStoreEngine.java
@@ -58,7 +58,7 @@ public class DateTieredStoreEngine extends 
StoreEngine<DefaultStoreFlusher,
       throws IOException {
     this.compactionPolicy = new DateTieredCompactionPolicy(conf, store);
     this.storeFileManager =
-        new DefaultStoreFileManager(kvComparator, 
StoreFile.Comparators.SEQ_ID_MAX_TIMESTAMP, conf,
+        new DefaultStoreFileManager(kvComparator, 
StoreFileComparators.SEQ_ID_MAX_TIMESTAMP, conf,
             compactionPolicy.getConf());
     this.storeFlusher = new DefaultStoreFlusher(conf, store);
     this.compactor = new DateTieredCompactor(conf, store);

http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java
index 8e94e2f..5c7b817 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java
@@ -69,7 +69,7 @@ public class DefaultStoreEngine extends StoreEngine<
     createCompactionPolicy(conf, store);
     createStoreFlusher(conf, store);
     storeFileManager =
-        new DefaultStoreFileManager(kvComparator, 
StoreFile.Comparators.SEQ_ID, conf,
+        new DefaultStoreFileManager(kvComparator, StoreFileComparators.SEQ_ID, 
conf,
             compactionPolicy.getConf());
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java
index da25df5..f4f9aa6 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java
@@ -25,6 +25,7 @@ import java.util.Collections;
 import java.util.Comparator;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Optional;
 
 import com.google.common.collect.ImmutableCollection;
 import com.google.common.collect.ImmutableList;
@@ -172,10 +173,13 @@ class DefaultStoreFileManager implements StoreFileManager 
{
 
   @Override
   public final byte[] getSplitPoint() throws IOException {
-    if (this.storefiles.isEmpty()) {
+    List<StoreFile> storefiles = this.storefiles;
+    if (storefiles.isEmpty()) {
       return null;
     }
-    return 
StoreUtils.getLargestFile(this.storefiles).getFileSplitPoint(this.kvComparator);
+    Optional<StoreFile> largestFile = StoreUtils.getLargestFile(storefiles);
+    return largestFile.isPresent()
+        ? StoreUtils.getFileSplitPoint(largestFile.get(), 
kvComparator).orElse(null) : null;
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HMobStore.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HMobStore.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HMobStore.java
index 73c8a1f..c240df3 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HMobStore.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HMobStore.java
@@ -293,7 +293,7 @@ public class HMobStore extends HStore {
   private void validateMobFile(Path path) throws IOException {
     StoreFile storeFile = null;
     try {
-      storeFile = new StoreFile(region.getFilesystem(), path, conf, 
this.mobCacheConfig,
+      storeFile = new HStoreFile(region.getFilesystem(), path, conf, 
this.mobCacheConfig,
           BloomType.NONE, isPrimaryReplicaStore());
       storeFile.initReader();
     } catch (IOException e) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
index a620a25..7f9c766 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
@@ -1456,7 +1456,7 @@ public class HRegion implements HeapSize, 
PropagatingConfigurationObserver, Regi
    * time-sensitive thread.
    *
    * @return Vector of all the storage files that the HRegion's component
-   * HStores make use of.  It's a list of all HStoreFile objects. Returns empty
+   * HStores make use of.  It's a list of all StoreFile objects. Returns empty
    * vector if already closed and null if judged that it should not close.
    *
    * @throws IOException e
@@ -1497,7 +1497,7 @@ public class HRegion implements HeapSize, 
PropagatingConfigurationObserver, Regi
    *
    * @param abort true if server is aborting (only during testing)
    * @return Vector of all the storage files that the HRegion's component
-   * HStores make use of.  It's a list of HStoreFile objects.  Can be null if
+   * HStores make use of.  It's a list of StoreFile objects.  Can be null if
    * we are not to close at this time or we are already closed.
    *
    * @throws IOException e
@@ -4204,7 +4204,7 @@ public class HRegion implements HeapSize, 
PropagatingConfigurationObserver, Regi
       Set<StoreFile> fakeStoreFiles = new HashSet<>(files.size());
       for (Path file: files) {
         fakeStoreFiles.add(
-          new StoreFile(getRegionFileSystem().getFileSystem(), file, 
this.conf, null, null, true));
+          new HStoreFile(getRegionFileSystem().getFileSystem(), file, 
this.conf, null, null, true));
       }
       getRegionFileSystem().removeStoreFiles(fakeFamilyName, fakeStoreFiles);
     } else {

http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
index 051471e..17e255a 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
@@ -18,13 +18,6 @@
  */
 package org.apache.hadoop.hbase.regionserver;
 
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableCollection;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
-
 import java.io.IOException;
 import java.io.InterruptedIOException;
 import java.net.InetSocketAddress;
@@ -53,7 +46,15 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.*;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellComparator;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.CompoundConfiguration;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.MemoryCompactionPolicy;
+import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.backup.FailedArchiveException;
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.hbase.client.Scan;
@@ -90,6 +91,13 @@ import org.apache.hadoop.hbase.util.ReflectionUtils;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.StringUtils.TraditionalBinaryPrefix;
 
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableCollection;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
 /**
  * A Store holds a column family in a Region.  Its a memstore and a set of zero
  * or more StoreFiles, which stretch backwards over time.
@@ -455,12 +463,12 @@ public class HStore implements Store {
    */
   @Override
   public long getMaxSequenceId() {
-    return StoreFile.getMaxSequenceIdInList(this.getStorefiles());
+    return StoreUtils.getMaxSequenceIdInList(this.getStorefiles());
   }
 
   @Override
   public long getMaxMemstoreTS() {
-    return StoreFile.getMaxMemstoreTSInList(this.getStorefiles());
+    return StoreUtils.getMaxMemstoreTSInList(this.getStorefiles());
   }
 
   /**
@@ -655,7 +663,7 @@ public class HStore implements Store {
 
   private StoreFile createStoreFileAndReader(final StoreFileInfo info) throws 
IOException {
     info.setRegionCoprocessorHost(this.region.getCoprocessorHost());
-    StoreFile storeFile = new StoreFile(this.getFileSystem(), info, this.conf, 
this.cacheConf,
+    StoreFile storeFile = new HStoreFile(this.getFileSystem(), info, 
this.conf, this.cacheConf,
         this.family.getBloomFilterType(), isPrimaryReplicaStore());
     storeFile.initReader();
     return storeFile;

http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java
new file mode 100644
index 0000000..2df15f9
--- /dev/null
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java
@@ -0,0 +1,560 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Map;
+import java.util.OptionalLong;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.HDFSBlocksDistribution;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.io.TimeRange;
+import org.apache.hadoop.hbase.io.hfile.BlockType;
+import org.apache.hadoop.hbase.io.hfile.CacheConfig;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.util.BloomFilterFactory;
+import org.apache.hadoop.hbase.util.Bytes;
+
+import com.google.common.annotations.VisibleForTesting;
+
+/**
+ * A Store data file.  Stores usually have one or more of these files.  They
+ * are produced by flushing the memstore to disk.  To
+ * create, instantiate a writer using {@link StoreFileWriter.Builder}
+ * and append data. Be sure to add any metadata before calling close on the
+ * Writer (Use the appendMetadata convenience methods). On close, a StoreFile
+ * is sitting in the Filesystem.  To refer to it, create a StoreFile instance
+ * passing filesystem and path.  To read, call {@link #initReader()}
+ * <p>StoreFiles may also reference store files in another Store.
+ *
+ * The reason for this weird pattern where you use a different instance for the
+ * writer and a reader is that we write once but read a lot more.
+ */
+@InterfaceAudience.Private
+public class HStoreFile implements StoreFile {
+
+  private static final Log LOG = LogFactory.getLog(HStoreFile.class.getName());
+
+  private static final boolean DEFAULT_STORE_FILE_READER_NO_READAHEAD = false;
+
+  private final StoreFileInfo fileInfo;
+  private final FileSystem fs;
+
+  // Block cache configuration and reference.
+  private final CacheConfig cacheConf;
+
+  // Counter that is incremented every time a scanner is created on the
+  // store file. It is decremented when the scan on the store file is
+  // done.
+  private final AtomicInteger refCount = new AtomicInteger(0);
+
+  private final boolean noReadahead;
+
+  private final boolean primaryReplica;
+
+  // Indicates if the file got compacted
+  private volatile boolean compactedAway = false;
+
+  // Keys for metadata stored in backing HFile.
+  // Set when we obtain a Reader.
+  private long sequenceid = -1;
+
+  // max of the MemstoreTS in the KV's in this store
+  // Set when we obtain a Reader.
+  private long maxMemstoreTS = -1;
+
+  // firstKey, lastkey and cellComparator will be set when openReader.
+  private Cell firstKey;
+
+  private Cell lastKey;
+
+  private Comparator<Cell> comparator;
+
+  @Override
+  public CacheConfig getCacheConf() {
+    return cacheConf;
+  }
+
+  @Override
+  public Cell getFirstKey() {
+    return firstKey;
+  }
+
+  @Override
+  public Cell getLastKey() {
+    return lastKey;
+  }
+
+  @Override
+  public Comparator<Cell> getComparator() {
+    return comparator;
+  }
+
+  @Override
+  public long getMaxMemstoreTS() {
+    return maxMemstoreTS;
+  }
+
+  // If true, this file was product of a major compaction.  Its then set
+  // whenever you get a Reader.
+  private AtomicBoolean majorCompaction = null;
+
+  // If true, this file should not be included in minor compactions.
+  // It's set whenever you get a Reader.
+  private boolean excludeFromMinorCompaction = false;
+
+  /**
+   * Map of the metadata entries in the corresponding HFile. Populated when 
Reader is opened
+   * after which it is not modified again.
+   */
+  private Map<byte[], byte[]> metadataMap;
+
+  // StoreFile.Reader
+  private volatile StoreFileReader reader;
+
+  /**
+   * Bloom filter type specified in column family configuration. Does not
+   * necessarily correspond to the Bloom filter type present in the HFile.
+   */
+  private final BloomType cfBloomType;
+
+  /**
+   * Constructor, loads a reader and it's indices, etc. May allocate a 
substantial amount of ram
+   * depending on the underlying files (10-20MB?).
+   * @param fs The current file system to use.
+   * @param p The path of the file.
+   * @param conf The current configuration.
+   * @param cacheConf The cache configuration and block cache reference.
+   * @param cfBloomType The bloom type to use for this store file as specified 
by column family
+   *          configuration. This may or may not be the same as the Bloom 
filter type actually
+   *          present in the HFile, because column family configuration might 
change. If this is
+   *          {@link BloomType#NONE}, the existing Bloom filter is ignored.
+   * @deprecated Now we will specific whether the StoreFile is for primary 
replica when
+   *             constructing, so please use {@link #HStoreFile(FileSystem, 
Path, Configuration,
+   *             CacheConfig, BloomType, boolean)} directly.
+   */
+  @Deprecated
+  public HStoreFile(final FileSystem fs, final Path p, final Configuration 
conf,
+      final CacheConfig cacheConf, final BloomType cfBloomType) throws 
IOException {
+    this(fs, new StoreFileInfo(conf, fs, p), conf, cacheConf, cfBloomType);
+  }
+
+  /**
+   * Constructor, loads a reader and it's indices, etc. May allocate a 
substantial amount of ram
+   * depending on the underlying files (10-20MB?).
+   * @param fs The current file system to use.
+   * @param p The path of the file.
+   * @param conf The current configuration.
+   * @param cacheConf The cache configuration and block cache reference.
+   * @param cfBloomType The bloom type to use for this store file as specified 
by column family
+   *          configuration. This may or may not be the same as the Bloom 
filter type actually
+   *          present in the HFile, because column family configuration might 
change. If this is
+   *          {@link BloomType#NONE}, the existing Bloom filter is ignored.
+   * @param primaryReplica true if this is a store file for primary replica, 
otherwise false.
+   * @throws IOException
+   */
+  public HStoreFile(FileSystem fs, Path p, Configuration conf, CacheConfig 
cacheConf,
+      BloomType cfBloomType, boolean primaryReplica) throws IOException {
+    this(fs, new StoreFileInfo(conf, fs, p), conf, cacheConf, cfBloomType, 
primaryReplica);
+  }
+
+  /**
+   * Constructor, loads a reader and it's indices, etc. May allocate a 
substantial amount of ram
+   * depending on the underlying files (10-20MB?).
+   * @param fs The current file system to use.
+   * @param fileInfo The store file information.
+   * @param conf The current configuration.
+   * @param cacheConf The cache configuration and block cache reference.
+   * @param cfBloomType The bloom type to use for this store file as specified 
by column family
+   *          configuration. This may or may not be the same as the Bloom 
filter type actually
+   *          present in the HFile, because column family configuration might 
change. If this is
+   *          {@link BloomType#NONE}, the existing Bloom filter is ignored.
+   * @deprecated Now we will specific whether the StoreFile is for primary 
replica when
+   *             constructing, so please use {@link #HStoreFile(FileSystem, 
StoreFileInfo,
+   *             Configuration, CacheConfig, BloomType, boolean)} directly.
+   */
+  @Deprecated
+  public HStoreFile(final FileSystem fs, final StoreFileInfo fileInfo, final 
Configuration conf,
+      final CacheConfig cacheConf, final BloomType cfBloomType) throws 
IOException {
+    this(fs, fileInfo, conf, cacheConf, cfBloomType, true);
+  }
+
+  /**
+   * Constructor, loads a reader and it's indices, etc. May allocate a 
substantial amount of ram
+   * depending on the underlying files (10-20MB?).
+   * @param fs fs The current file system to use.
+   * @param fileInfo The store file information.
+   * @param conf The current configuration.
+   * @param cacheConf The cache configuration and block cache reference.
+   * @param cfBloomType The bloom type to use for this store file as specified 
by column
+   *          family configuration. This may or may not be the same as the 
Bloom filter type
+   *          actually present in the HFile, because column family 
configuration might change. If
+   *          this is {@link BloomType#NONE}, the existing Bloom filter is 
ignored.
+   * @param primaryReplica true if this is a store file for primary replica, 
otherwise false.
+   */
+  public HStoreFile(FileSystem fs, StoreFileInfo fileInfo, Configuration conf, 
CacheConfig cacheConf,
+      BloomType cfBloomType, boolean primaryReplica) {
+    this.fs = fs;
+    this.fileInfo = fileInfo;
+    this.cacheConf = cacheConf;
+    this.noReadahead =
+        conf.getBoolean(STORE_FILE_READER_NO_READAHEAD, 
DEFAULT_STORE_FILE_READER_NO_READAHEAD);
+    if (BloomFilterFactory.isGeneralBloomEnabled(conf)) {
+      this.cfBloomType = cfBloomType;
+    } else {
+      LOG.info("Ignoring bloom filter check for file " + this.getPath() + ": " 
+ "cfBloomType=" +
+          cfBloomType + " (disabled in config)");
+      this.cfBloomType = BloomType.NONE;
+    }
+    this.primaryReplica = primaryReplica;
+  }
+
+  @Override
+  public StoreFileInfo getFileInfo() {
+    return this.fileInfo;
+  }
+
+  @Override
+  public Path getPath() {
+    return this.fileInfo.getPath();
+  }
+
+  @Override
+  public Path getQualifiedPath() {
+    return this.fileInfo.getPath().makeQualified(fs.getUri(), 
fs.getWorkingDirectory());
+  }
+
+  @Override
+  public boolean isReference() {
+    return this.fileInfo.isReference();
+  }
+
+  @Override
+  public boolean isHFile() {
+    return StoreFileInfo.isHFile(this.fileInfo.getPath());
+  }
+
+  @Override
+  public boolean isMajorCompactionResult() {
+    if (this.majorCompaction == null) {
+      throw new NullPointerException("This has not been set yet");
+    }
+    return this.majorCompaction.get();
+  }
+
+  @Override
+  public boolean excludeFromMinorCompaction() {
+    return this.excludeFromMinorCompaction;
+  }
+
+  @Override
+  public long getMaxSequenceId() {
+    return this.sequenceid;
+  }
+
+  @Override
+  public long getModificationTimeStamp() throws IOException {
+    return fileInfo.getModificationTime();
+  }
+
+  @Override
+  public byte[] getMetadataValue(byte[] key) {
+    return metadataMap.get(key);
+  }
+
+  @Override
+  public boolean isBulkLoadResult() {
+    boolean bulkLoadedHFile = false;
+    String fileName = this.getPath().getName();
+    int startPos = fileName.indexOf("SeqId_");
+    if (startPos != -1) {
+      bulkLoadedHFile = true;
+    }
+    return bulkLoadedHFile || (metadataMap != null && 
metadataMap.containsKey(BULKLOAD_TIME_KEY));
+  }
+
+  @Override
+  public boolean isCompactedAway() {
+    return compactedAway;
+  }
+
+  @VisibleForTesting
+  public int getRefCount() {
+    return refCount.get();
+  }
+
+  @Override
+  public boolean isReferencedInReads() {
+    int rc = refCount.get();
+    assert rc >= 0; // we should not go negative.
+    return rc > 0;
+  }
+
+  @Override
+  public OptionalLong getBulkLoadTimestamp() {
+    byte[] bulkLoadTimestamp = metadataMap.get(BULKLOAD_TIME_KEY);
+    return bulkLoadTimestamp == null ? OptionalLong.empty()
+        : OptionalLong.of(Bytes.toLong(bulkLoadTimestamp));
+  }
+
+  @Override
+  public HDFSBlocksDistribution getHDFSBlockDistribution() {
+    return this.fileInfo.getHDFSBlockDistribution();
+  }
+
+  /**
+   * Opens reader on this store file. Called by Constructor.
+   * @throws IOException
+   * @see #closeReader(boolean)
+   */
+  private void open() throws IOException {
+    if (this.reader != null) {
+      throw new IllegalAccessError("Already open");
+    }
+
+    // Open the StoreFile.Reader
+    this.reader = fileInfo.open(this.fs, this.cacheConf, false, noReadahead ? 
0L : -1L,
+      primaryReplica, refCount, true);
+
+    // Load up indices and fileinfo. This also loads Bloom filter type.
+    metadataMap = Collections.unmodifiableMap(this.reader.loadFileInfo());
+
+    // Read in our metadata.
+    byte [] b = metadataMap.get(MAX_SEQ_ID_KEY);
+    if (b != null) {
+      // By convention, if halfhfile, top half has a sequence number > bottom
+      // half. Thats why we add one in below. Its done for case the two halves
+      // are ever merged back together --rare.  Without it, on open of store,
+      // since store files are distinguished by sequence id, the one half would
+      // subsume the other.
+      this.sequenceid = Bytes.toLong(b);
+      if (fileInfo.isTopReference()) {
+        this.sequenceid += 1;
+      }
+    }
+
+    if (isBulkLoadResult()){
+      // generate the sequenceId from the fileName
+      // fileName is of the form <randomName>_SeqId_<id-when-loaded>_
+      String fileName = this.getPath().getName();
+      // Use lastIndexOf() to get the last, most recent bulk load seqId.
+      int startPos = fileName.lastIndexOf("SeqId_");
+      if (startPos != -1) {
+        this.sequenceid = Long.parseLong(fileName.substring(startPos + 6,
+            fileName.indexOf('_', startPos + 6)));
+        // Handle reference files as done above.
+        if (fileInfo.isTopReference()) {
+          this.sequenceid += 1;
+        }
+      }
+      // SKIP_RESET_SEQ_ID only works in bulk loaded file.
+      // In mob compaction, the hfile where the cells contain the path of a 
new mob file is bulk
+      // loaded to hbase, these cells have the same seqIds with the old ones. 
We do not want
+      // to reset new seqIds for them since this might make a mess of the 
visibility of cells that
+      // have the same row key but different seqIds.
+      boolean skipResetSeqId = 
isSkipResetSeqId(metadataMap.get(SKIP_RESET_SEQ_ID));
+      if (skipResetSeqId) {
+        // increase the seqId when it is a bulk loaded file from mob 
compaction.
+        this.sequenceid += 1;
+      }
+      this.reader.setSkipResetSeqId(skipResetSeqId);
+      this.reader.setBulkLoaded(true);
+    }
+    this.reader.setSequenceID(this.sequenceid);
+
+    b = metadataMap.get(HFile.Writer.MAX_MEMSTORE_TS_KEY);
+    if (b != null) {
+      this.maxMemstoreTS = Bytes.toLong(b);
+    }
+
+    b = metadataMap.get(MAJOR_COMPACTION_KEY);
+    if (b != null) {
+      boolean mc = Bytes.toBoolean(b);
+      if (this.majorCompaction == null) {
+        this.majorCompaction = new AtomicBoolean(mc);
+      } else {
+        this.majorCompaction.set(mc);
+      }
+    } else {
+      // Presume it is not major compacted if it doesn't explicity say so
+      // HFileOutputFormat explicitly sets the major compacted key.
+      this.majorCompaction = new AtomicBoolean(false);
+    }
+
+    b = metadataMap.get(EXCLUDE_FROM_MINOR_COMPACTION_KEY);
+    this.excludeFromMinorCompaction = (b != null && Bytes.toBoolean(b));
+
+    BloomType hfileBloomType = reader.getBloomFilterType();
+    if (cfBloomType != BloomType.NONE) {
+      reader.loadBloomfilter(BlockType.GENERAL_BLOOM_META);
+      if (hfileBloomType != cfBloomType) {
+        LOG.info("HFile Bloom filter type for "
+            + reader.getHFileReader().getName() + ": " + hfileBloomType
+            + ", but " + cfBloomType + " specified in column family "
+            + "configuration");
+      }
+    } else if (hfileBloomType != BloomType.NONE) {
+      LOG.info("Bloom filter turned off by CF config for "
+          + reader.getHFileReader().getName());
+    }
+
+    // load delete family bloom filter
+    reader.loadBloomfilter(BlockType.DELETE_FAMILY_BLOOM_META);
+
+    try {
+      this.reader.timeRange = 
TimeRangeTracker.getTimeRange(metadataMap.get(TIMERANGE_KEY));
+    } catch (IllegalArgumentException e) {
+      LOG.error("Error reading timestamp range data from meta -- " +
+          "proceeding without", e);
+      this.reader.timeRange = null;
+    }
+    // initialize so we can reuse them after reader closed.
+    firstKey = reader.getFirstKey();
+    lastKey = reader.getLastKey();
+    comparator = reader.getComparator();
+  }
+
+  @Override
+  public void initReader() throws IOException {
+    if (reader == null) {
+      try {
+        open();
+      } catch (Exception e) {
+        try {
+          boolean evictOnClose = cacheConf != null ? 
cacheConf.shouldEvictOnClose() : true;
+          this.closeReader(evictOnClose);
+        } catch (IOException ee) {
+          LOG.warn("failed to close reader", ee);
+        }
+        throw e;
+      }
+    }
+  }
+
+  private StoreFileReader createStreamReader(boolean canUseDropBehind) throws 
IOException {
+    initReader();
+    StoreFileReader reader = fileInfo.open(this.fs, this.cacheConf, 
canUseDropBehind, -1L,
+      primaryReplica, refCount, false);
+    reader.copyFields(this.reader);
+    return reader;
+  }
+
+  @Override
+  public StoreFileScanner getPreadScanner(boolean cacheBlocks, long readPt, 
long scannerOrder,
+      boolean canOptimizeForNonNullColumn) {
+    return getReader().getStoreFileScanner(cacheBlocks, true, false, readPt, 
scannerOrder,
+      canOptimizeForNonNullColumn);
+  }
+
+  @Override
+  public StoreFileScanner getStreamScanner(boolean canUseDropBehind, boolean 
cacheBlocks,
+      boolean isCompaction, long readPt, long scannerOrder, boolean 
canOptimizeForNonNullColumn)
+      throws IOException {
+    return 
createStreamReader(canUseDropBehind).getStoreFileScanner(cacheBlocks, false,
+      isCompaction, readPt, scannerOrder, canOptimizeForNonNullColumn);
+  }
+
+  @Override
+  public StoreFileReader getReader() {
+    return this.reader;
+  }
+
+  @Override
+  public synchronized void closeReader(boolean evictOnClose)
+      throws IOException {
+    if (this.reader != null) {
+      this.reader.close(evictOnClose);
+      this.reader = null;
+    }
+  }
+
+  @Override
+  public void markCompactedAway() {
+    this.compactedAway = true;
+  }
+
+  @Override
+  public void deleteReader() throws IOException {
+    boolean evictOnClose =
+        cacheConf != null? cacheConf.shouldEvictOnClose(): true;
+    closeReader(evictOnClose);
+    this.fs.delete(getPath(), true);
+  }
+
+  @Override
+  public String toString() {
+    return this.fileInfo.toString();
+  }
+
+  @Override
+  public String toStringDetailed() {
+    StringBuilder sb = new StringBuilder();
+    sb.append(this.getPath().toString());
+    sb.append(", isReference=").append(isReference());
+    sb.append(", isBulkLoadResult=").append(isBulkLoadResult());
+    if (isBulkLoadResult()) {
+      sb.append(", bulkLoadTS=");
+      OptionalLong bulkLoadTS = getBulkLoadTimestamp();
+      if (bulkLoadTS.isPresent()) {
+        sb.append(bulkLoadTS.getAsLong());
+      } else {
+        sb.append("NotPresent");
+      }
+    } else {
+      sb.append(", seqid=").append(getMaxSequenceId());
+    }
+    sb.append(", majorCompaction=").append(isMajorCompactionResult());
+
+    return sb.toString();
+  }
+
+  /**
+   * Gets whether to skip resetting the sequence id for cells.
+   * @param skipResetSeqId The byte array of boolean.
+   * @return Whether to skip resetting the sequence id.
+   */
+  private boolean isSkipResetSeqId(byte[] skipResetSeqId) {
+    if (skipResetSeqId != null && skipResetSeqId.length == 1) {
+      return Bytes.toBoolean(skipResetSeqId);
+    }
+    return false;
+  }
+
+  @Override
+  public OptionalLong getMinimumTimestamp() {
+    TimeRange tr = getReader().timeRange;
+    return tr != null ? OptionalLong.of(tr.getMin()) : OptionalLong.empty();
+  }
+
+  @Override
+  public OptionalLong getMaximumTimestamp() {
+    TimeRange tr = getReader().timeRange;
+    return tr != null ? OptionalLong.of(tr.getMax()) : OptionalLong.empty();
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/ee0f148c/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
index 91ff97a..a5efbed 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
@@ -1,5 +1,4 @@
 /**
- *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -18,789 +17,200 @@
  */
 package org.apache.hadoop.hbase.regionserver;
 
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Function;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Ordering;
-
 import java.io.IOException;
-import java.util.Collection;
-import java.util.Collections;
 import java.util.Comparator;
-import java.util.Map;
-import java.util.UUID;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicInteger;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
+import java.util.OptionalLong;
+
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellComparator;
-import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseInterfaceAudience;
 import org.apache.hadoop.hbase.HDFSBlocksDistribution;
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.io.hfile.BlockType;
+import org.apache.hadoop.hbase.classification.InterfaceStability;
 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
-import org.apache.hadoop.hbase.io.hfile.HFile;
-import org.apache.hadoop.hbase.util.BloomFilterFactory;
 import org.apache.hadoop.hbase.util.Bytes;
 
 /**
- * A Store data file.  Stores usually have one or more of these files.  They
- * are produced by flushing the memstore to disk.  To
- * create, instantiate a writer using {@link StoreFileWriter.Builder}
- * and append data. Be sure to add any metadata before calling close on the
- * Writer (Use the appendMetadata convenience methods). On close, a StoreFile
- * is sitting in the Filesystem.  To refer to it, create a StoreFile instance
- * passing filesystem and path.  To read, call {@link #initReader()}
- * <p>StoreFiles may also reference store files in another Store.
- *
- * The reason for this weird pattern where you use a different instance for the
- * writer and a reader is that we write once but read a lot more.
+ * An interface to describe a store data file.
  */
-@InterfaceAudience.LimitedPrivate("Coprocessor")
-public class StoreFile {
-  private static final Log LOG = LogFactory.getLog(StoreFile.class.getName());
+@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.COPROC)
+@InterfaceStability.Evolving
+public interface StoreFile {
 
-  public static final String STORE_FILE_READER_NO_READAHEAD = 
"hbase.store.reader.no-readahead";
-
-  private static final boolean DEFAULT_STORE_FILE_READER_NO_READAHEAD = false;
+  static final String STORE_FILE_READER_NO_READAHEAD = 
"hbase.store.reader.no-readahead";
 
   // Keys for fileinfo values in HFile
 
   /** Max Sequence ID in FileInfo */
-  public static final byte [] MAX_SEQ_ID_KEY = Bytes.toBytes("MAX_SEQ_ID_KEY");
+  static final byte[] MAX_SEQ_ID_KEY = Bytes.toBytes("MAX_SEQ_ID_KEY");
 
   /** Major compaction flag in FileInfo */
-  public static final byte[] MAJOR_COMPACTION_KEY =
-      Bytes.toBytes("MAJOR_COMPACTION_KEY");
+  static final byte[] MAJOR_COMPACTION_KEY = 
Bytes.toBytes("MAJOR_COMPACTION_KEY");
 
   /** Minor compaction flag in FileInfo */
-  public static final byte[] EXCLUDE_FROM_MINOR_COMPACTION_KEY =
+  static final byte[] EXCLUDE_FROM_MINOR_COMPACTION_KEY =
       Bytes.toBytes("EXCLUDE_FROM_MINOR_COMPACTION");
 
   /** Bloom filter Type in FileInfo */
-  public static final byte[] BLOOM_FILTER_TYPE_KEY =
-      Bytes.toBytes("BLOOM_FILTER_TYPE");
+  static final byte[] BLOOM_FILTER_TYPE_KEY = 
Bytes.toBytes("BLOOM_FILTER_TYPE");
 
   /** Delete Family Count in FileInfo */
-  public static final byte[] DELETE_FAMILY_COUNT =
-      Bytes.toBytes("DELETE_FAMILY_COUNT");
+  static final byte[] DELETE_FAMILY_COUNT = 
Bytes.toBytes("DELETE_FAMILY_COUNT");
 
   /** Last Bloom filter key in FileInfo */
-  public static final byte[] LAST_BLOOM_KEY = Bytes.toBytes("LAST_BLOOM_KEY");
-
-  /** Key for Timerange information in metadata*/
-  public static final byte[] TIMERANGE_KEY = Bytes.toBytes("TIMERANGE");
-
-  /** Key for timestamp of earliest-put in metadata*/
-  public static final byte[] EARLIEST_PUT_TS = 
Bytes.toBytes("EARLIEST_PUT_TS");
-
-  /** Key for the number of mob cells in metadata*/
-  public static final byte[] MOB_CELLS_COUNT = 
Bytes.toBytes("MOB_CELLS_COUNT");
-
-  private final StoreFileInfo fileInfo;
-  private final FileSystem fs;
-
-  // Block cache configuration and reference.
-  private final CacheConfig cacheConf;
-
-  // Counter that is incremented every time a scanner is created on the
-  // store file. It is decremented when the scan on the store file is
-  // done.
-  private final AtomicInteger refCount = new AtomicInteger(0);
-
-  private final boolean noReadahead;
-
-  private final boolean primaryReplica;
-
-  // Indicates if the file got compacted
-  private volatile boolean compactedAway = false;
-
-  // Keys for metadata stored in backing HFile.
-  // Set when we obtain a Reader.
-  private long sequenceid = -1;
-
-  // max of the MemstoreTS in the KV's in this store
-  // Set when we obtain a Reader.
-  private long maxMemstoreTS = -1;
+  static final byte[] LAST_BLOOM_KEY = Bytes.toBytes("LAST_BLOOM_KEY");
 
-  // firstKey, lastkey and cellComparator will be set when openReader.
-  private Cell firstKey;
+  /** Key for Timerange information in metadata */
+  static final byte[] TIMERANGE_KEY = Bytes.toBytes("TIMERANGE");
 
-  private Cell lastKey;
+  /** Key for timestamp of earliest-put in metadata */
+  static final byte[] EARLIEST_PUT_TS = Bytes.toBytes("EARLIEST_PUT_TS");
 
-  private Comparator<Cell> comparator;
-
-  CacheConfig getCacheConf() {
-    return cacheConf;
-  }
-
-  public Cell getFirstKey() {
-    return firstKey;
-  }
-
-  public Cell getLastKey() {
-    return lastKey;
-  }
-
-  public Comparator<Cell> getComparator() {
-    return comparator;
-  }
-
-  public long getMaxMemstoreTS() {
-    return maxMemstoreTS;
-  }
-
-  public void setMaxMemstoreTS(long maxMemstoreTS) {
-    this.maxMemstoreTS = maxMemstoreTS;
-  }
-
-  // If true, this file was product of a major compaction.  Its then set
-  // whenever you get a Reader.
-  private AtomicBoolean majorCompaction = null;
-
-  // If true, this file should not be included in minor compactions.
-  // It's set whenever you get a Reader.
-  private boolean excludeFromMinorCompaction = false;
+  /** Key for the number of mob cells in metadata */
+  static final byte[] MOB_CELLS_COUNT = Bytes.toBytes("MOB_CELLS_COUNT");
 
   /** Meta key set when store file is a result of a bulk load */
-  public static final byte[] BULKLOAD_TASK_KEY =
-    Bytes.toBytes("BULKLOAD_SOURCE_TASK");
-  public static final byte[] BULKLOAD_TIME_KEY =
-    Bytes.toBytes("BULKLOAD_TIMESTAMP");
+  static final byte[] BULKLOAD_TASK_KEY = 
Bytes.toBytes("BULKLOAD_SOURCE_TASK");
+  static final byte[] BULKLOAD_TIME_KEY = Bytes.toBytes("BULKLOAD_TIMESTAMP");
 
   /**
-   * Map of the metadata entries in the corresponding HFile. Populated when 
Reader is opened
-   * after which it is not modified again.
+   * Key for skipping resetting sequence id in metadata. For bulk loaded 
hfiles, the scanner resets
+   * the cell seqId with the latest one, if this metadata is set as true, the 
reset is skipped.
    */
-  private Map<byte[], byte[]> metadataMap;
+  static final byte[] SKIP_RESET_SEQ_ID = Bytes.toBytes("SKIP_RESET_SEQ_ID");
 
-  // StoreFile.Reader
-  private volatile StoreFileReader reader;
+  CacheConfig getCacheConf();
 
-  /**
-   * Bloom filter type specified in column family configuration. Does not
-   * necessarily correspond to the Bloom filter type present in the HFile.
-   */
-  private final BloomType cfBloomType;
+  Cell getFirstKey();
 
-  /**
-   * Key for skipping resetting sequence id in metadata.
-   * For bulk loaded hfiles, the scanner resets the cell seqId with the latest 
one,
-   * if this metadata is set as true, the reset is skipped.
-   */
-  public static final byte[] SKIP_RESET_SEQ_ID = 
Bytes.toBytes("SKIP_RESET_SEQ_ID");
+  Cell getLastKey();
 
-  /**
-   * Constructor, loads a reader and it's indices, etc. May allocate a 
substantial amount of ram
-   * depending on the underlying files (10-20MB?).
-   * @param fs The current file system to use.
-   * @param p The path of the file.
-   * @param conf The current configuration.
-   * @param cacheConf The cache configuration and block cache reference.
-   * @param cfBloomType The bloom type to use for this store file as specified 
by column family
-   *          configuration. This may or may not be the same as the Bloom 
filter type actually
-   *          present in the HFile, because column family configuration might 
change. If this is
-   *          {@link BloomType#NONE}, the existing Bloom filter is ignored.
-   * @deprecated Now we will specific whether the StoreFile is for primary 
replica when
-   *             constructing, so please use
-   *             {@link #StoreFile(FileSystem, Path, Configuration, 
CacheConfig, BloomType, boolean)}
-   *             directly.
-   */
-  @Deprecated
-  public StoreFile(final FileSystem fs, final Path p, final Configuration conf,
-      final CacheConfig cacheConf, final BloomType cfBloomType) throws 
IOException {
-    this(fs, new StoreFileInfo(conf, fs, p), conf, cacheConf, cfBloomType);
-  }
+  Comparator<Cell> getComparator();
 
-  /**
-   * Constructor, loads a reader and it's indices, etc. May allocate a 
substantial amount of ram
-   * depending on the underlying files (10-20MB?).
-   * @param fs The current file system to use.
-   * @param p The path of the file.
-   * @param conf The current configuration.
-   * @param cacheConf The cache configuration and block cache reference.
-   * @param cfBloomType The bloom type to use for this store file as specified 
by column family
-   *          configuration. This may or may not be the same as the Bloom 
filter type actually
-   *          present in the HFile, because column family configuration might 
change. If this is
-   *          {@link BloomType#NONE}, the existing Bloom filter is ignored.
-   * @param primaryReplica true if this is a store file for primary replica, 
otherwise false.
-   * @throws IOException
-   */
-  public StoreFile(FileSystem fs, Path p, Configuration conf, CacheConfig 
cacheConf,
-      BloomType cfBloomType, boolean primaryReplica) throws IOException {
-    this(fs, new StoreFileInfo(conf, fs, p), conf, cacheConf, cfBloomType, 
primaryReplica);
-  }
-
-  /**
-   * Constructor, loads a reader and it's indices, etc. May allocate a 
substantial amount of ram
-   * depending on the underlying files (10-20MB?).
-   * @param fs The current file system to use.
-   * @param fileInfo The store file information.
-   * @param conf The current configuration.
-   * @param cacheConf The cache configuration and block cache reference.
-   * @param cfBloomType The bloom type to use for this store file as specified 
by column family
-   *          configuration. This may or may not be the same as the Bloom 
filter type actually
-   *          present in the HFile, because column family configuration might 
change. If this is
-   *          {@link BloomType#NONE}, the existing Bloom filter is ignored.
-   * @deprecated Now we will specific whether the StoreFile is for primary 
replica when
-   *             constructing, so please use
-   *             {@link #StoreFile(FileSystem, StoreFileInfo, Configuration, 
CacheConfig, BloomType, boolean)}
-   *             directly.
-   */
-  @Deprecated
-  public StoreFile(final FileSystem fs, final StoreFileInfo fileInfo, final 
Configuration conf,
-      final CacheConfig cacheConf, final BloomType cfBloomType) throws 
IOException {
-    this(fs, fileInfo, conf, cacheConf, cfBloomType, true);
-  }
+  long getMaxMemstoreTS();
 
   /**
-   * Constructor, loads a reader and it's indices, etc. May allocate a 
substantial amount of ram
-   * depending on the underlying files (10-20MB?).
-   * @param fs fs The current file system to use.
-   * @param fileInfo The store file information.
-   * @param conf The current configuration.
-   * @param cacheConf The cache configuration and block cache reference.
-   * @param cfBloomType cfBloomType The bloom type to use for this store file 
as specified by column
-   *          family configuration. This may or may not be the same as the 
Bloom filter type
-   *          actually present in the HFile, because column family 
configuration might change. If
-   *          this is {@link BloomType#NONE}, the existing Bloom filter is 
ignored.
-   * @param primaryReplica true if this is a store file for primary replica, 
otherwise false.
+   * @return the StoreFile object associated to this StoreFile. null if the 
StoreFile is not a
+   *         reference.
    */
-  public StoreFile(FileSystem fs, StoreFileInfo fileInfo, Configuration conf, 
CacheConfig cacheConf,
-      BloomType cfBloomType, boolean primaryReplica) {
-    this.fs = fs;
-    this.fileInfo = fileInfo;
-    this.cacheConf = cacheConf;
-    this.noReadahead =
-        conf.getBoolean(STORE_FILE_READER_NO_READAHEAD, 
DEFAULT_STORE_FILE_READER_NO_READAHEAD);
-    if (BloomFilterFactory.isGeneralBloomEnabled(conf)) {
-      this.cfBloomType = cfBloomType;
-    } else {
-      LOG.info("Ignoring bloom filter check for file " + this.getPath() + ": " 
+ "cfBloomType=" +
-          cfBloomType + " (disabled in config)");
-      this.cfBloomType = BloomType.NONE;
-    }
-    this.primaryReplica = primaryReplica;
-  }
-
-  /**
-   * @return the StoreFile object associated to this StoreFile.
-   *         null if the StoreFile is not a reference.
-   */
-  public StoreFileInfo getFileInfo() {
-    return this.fileInfo;
-  }
+  StoreFileInfo getFileInfo();
 
   /**
    * @return Path or null if this StoreFile was made with a Stream.
    */
-  public Path getPath() {
-    return this.fileInfo.getPath();
-  }
+  Path getPath();
 
   /**
    * @return Returns the qualified path of this StoreFile
    */
-  public Path getQualifiedPath() {
-    return this.fileInfo.getPath().makeQualified(fs.getUri(), 
fs.getWorkingDirectory());
-  }
+  Path getQualifiedPath();
 
   /**
-   * @return True if this is a StoreFile Reference; call
-   * after {@link #open()} else may get wrong answer.
+   * @return True if this is a StoreFile Reference.
    */
-  public boolean isReference() {
-    return this.fileInfo.isReference();
-  }
+  boolean isReference();
 
   /**
    * @return True if this is HFile.
    */
-  public boolean isHFile() {
-    return StoreFileInfo.isHFile(this.fileInfo.getPath());
-  }
+  boolean isHFile();
 
   /**
    * @return True if this file was made by a major compaction.
    */
-  public boolean isMajorCompaction() {
-    if (this.majorCompaction == null) {
-      throw new NullPointerException("This has not been set yet");
-    }
-    return this.majorCompaction.get();
-  }
+  boolean isMajorCompactionResult();
 
   /**
    * @return True if this file should not be part of a minor compaction.
    */
-  public boolean excludeFromMinorCompaction() {
-    return this.excludeFromMinorCompaction;
-  }
+  boolean excludeFromMinorCompaction();
 
   /**
    * @return This files maximum edit sequence id.
    */
-  public long getMaxSequenceId() {
-    return this.sequenceid;
-  }
+  long getMaxSequenceId();
 
-  public long getModificationTimeStamp() throws IOException {
-    return (fileInfo == null) ? 0 : fileInfo.getModificationTime();
-  }
+  long getModificationTimeStamp() throws IOException;
 
   /**
    * Only used by the Striped Compaction Policy
    * @param key
    * @return value associated with the metadata key
    */
-  public byte[] getMetadataValue(byte[] key) {
-    return metadataMap.get(key);
-  }
-
-  /**
-   * Return the largest memstoreTS found across all storefiles in
-   * the given list. Store files that were created by a mapreduce
-   * bulk load are ignored, as they do not correspond to any specific
-   * put operation, and thus do not have a memstoreTS associated with them.
-   * @return 0 if no non-bulk-load files are provided or, this is Store that
-   * does not yet have any store files.
-   */
-  public static long getMaxMemstoreTSInList(Collection<StoreFile> sfs) {
-    long max = 0;
-    for (StoreFile sf : sfs) {
-      if (!sf.isBulkLoadResult()) {
-        max = Math.max(max, sf.getMaxMemstoreTS());
-      }
-    }
-    return max;
-  }
-
-  /**
-   * Return the highest sequence ID found across all storefiles in
-   * the given list.
-   * @param sfs
-   * @return 0 if no non-bulk-load files are provided or, this is Store that
-   * does not yet have any store files.
-   */
-  public static long getMaxSequenceIdInList(Collection<StoreFile> sfs) {
-    long max = 0;
-    for (StoreFile sf : sfs) {
-      max = Math.max(max, sf.getMaxSequenceId());
-    }
-    return max;
-  }
+  byte[] getMetadataValue(byte[] key);
 
   /**
-   * Check if this storefile was created by bulk load.
-   * When a hfile is bulk loaded into HBase, we append
-   * {@code '_SeqId_<id-when-loaded>'} to the hfile name, unless
-   * "hbase.mapreduce.bulkload.assign.sequenceNumbers" is
-   * explicitly turned off.
-   * If "hbase.mapreduce.bulkload.assign.sequenceNumbers"
-   * is turned off, fall back to BULKLOAD_TIME_KEY.
+   * Check if this storefile was created by bulk load. When a hfile is bulk 
loaded into HBase, we
+   * append {@code '_SeqId_<id-when-loaded>'} to the hfile name, unless
+   * "hbase.mapreduce.bulkload.assign.sequenceNumbers" is explicitly turned 
off. If
+   * "hbase.mapreduce.bulkload.assign.sequenceNumbers" is turned off, fall 
back to
+   * BULKLOAD_TIME_KEY.
    * @return true if this storefile was created by bulk load.
    */
-  public boolean isBulkLoadResult() {
-    boolean bulkLoadedHFile = false;
-    String fileName = this.getPath().getName();
-    int startPos = fileName.indexOf("SeqId_");
-    if (startPos != -1) {
-      bulkLoadedHFile = true;
-    }
-    return bulkLoadedHFile || (metadataMap != null && 
metadataMap.containsKey(BULKLOAD_TIME_KEY));
-  }
-
-  @VisibleForTesting
-  public boolean isCompactedAway() {
-    return compactedAway;
-  }
-
-  @VisibleForTesting
-  public int getRefCount() {
-    return refCount.get();
-  }
+  boolean isBulkLoadResult();
+
+  boolean isCompactedAway();
 
   /**
    * @return true if the file is still used in reads
    */
-  public boolean isReferencedInReads() {
-    int rc = refCount.get();
-    assert rc >= 0; // we should not go negative.
-    return rc > 0;
-  }
+  boolean isReferencedInReads();
 
   /**
    * Return the timestamp at which this bulk load file was generated.
    */
-  public long getBulkLoadTimestamp() {
-    byte[] bulkLoadTimestamp = metadataMap.get(BULKLOAD_TIME_KEY);
-    return (bulkLoadTimestamp == null) ? 0 : Bytes.toLong(bulkLoadTimestamp);
-  }
+  OptionalLong getBulkLoadTimestamp();
 
   /**
-   * @return the cached value of HDFS blocks distribution. The cached value is
-   * calculated when store file is opened.
+   * @return the cached value of HDFS blocks distribution. The cached value is 
calculated when store
+   *         file is opened.
    */
-  public HDFSBlocksDistribution getHDFSBlockDistribution() {
-    return this.fileInfo.getHDFSBlockDistribution();
-  }
+  HDFSBlocksDistribution getHDFSBlockDistribution();
 
   /**
-   * Opens reader on this store file. Called by Constructor.
-   * @throws IOException
-   * @see #closeReader(boolean)
+   * Initialize the reader used for pread.
    */
-  private void open() throws IOException {
-    if (this.reader != null) {
-      throw new IllegalAccessError("Already open");
-    }
-
-    // Open the StoreFile.Reader
-    this.reader = fileInfo.open(this.fs, this.cacheConf, false, noReadahead ? 
0L : -1L,
-      primaryReplica, refCount, true);
-
-    // Load up indices and fileinfo. This also loads Bloom filter type.
-    metadataMap = Collections.unmodifiableMap(this.reader.loadFileInfo());
-
-    // Read in our metadata.
-    byte [] b = metadataMap.get(MAX_SEQ_ID_KEY);
-    if (b != null) {
-      // By convention, if halfhfile, top half has a sequence number > bottom
-      // half. Thats why we add one in below. Its done for case the two halves
-      // are ever merged back together --rare.  Without it, on open of store,
-      // since store files are distinguished by sequence id, the one half would
-      // subsume the other.
-      this.sequenceid = Bytes.toLong(b);
-      if (fileInfo.isTopReference()) {
-        this.sequenceid += 1;
-      }
-    }
-
-    if (isBulkLoadResult()){
-      // generate the sequenceId from the fileName
-      // fileName is of the form <randomName>_SeqId_<id-when-loaded>_
-      String fileName = this.getPath().getName();
-      // Use lastIndexOf() to get the last, most recent bulk load seqId.
-      int startPos = fileName.lastIndexOf("SeqId_");
-      if (startPos != -1) {
-        this.sequenceid = Long.parseLong(fileName.substring(startPos + 6,
-            fileName.indexOf('_', startPos + 6)));
-        // Handle reference files as done above.
-        if (fileInfo.isTopReference()) {
-          this.sequenceid += 1;
-        }
-      }
-      // SKIP_RESET_SEQ_ID only works in bulk loaded file.
-      // In mob compaction, the hfile where the cells contain the path of a 
new mob file is bulk
-      // loaded to hbase, these cells have the same seqIds with the old ones. 
We do not want
-      // to reset new seqIds for them since this might make a mess of the 
visibility of cells that
-      // have the same row key but different seqIds.
-      boolean skipResetSeqId = 
isSkipResetSeqId(metadataMap.get(SKIP_RESET_SEQ_ID));
-      if (skipResetSeqId) {
-        // increase the seqId when it is a bulk loaded file from mob 
compaction.
-        this.sequenceid += 1;
-      }
-      this.reader.setSkipResetSeqId(skipResetSeqId);
-      this.reader.setBulkLoaded(true);
-    }
-    this.reader.setSequenceID(this.sequenceid);
-
-    b = metadataMap.get(HFile.Writer.MAX_MEMSTORE_TS_KEY);
-    if (b != null) {
-      this.maxMemstoreTS = Bytes.toLong(b);
-    }
-
-    b = metadataMap.get(MAJOR_COMPACTION_KEY);
-    if (b != null) {
-      boolean mc = Bytes.toBoolean(b);
-      if (this.majorCompaction == null) {
-        this.majorCompaction = new AtomicBoolean(mc);
-      } else {
-        this.majorCompaction.set(mc);
-      }
-    } else {
-      // Presume it is not major compacted if it doesn't explicity say so
-      // HFileOutputFormat explicitly sets the major compacted key.
-      this.majorCompaction = new AtomicBoolean(false);
-    }
-
-    b = metadataMap.get(EXCLUDE_FROM_MINOR_COMPACTION_KEY);
-    this.excludeFromMinorCompaction = (b != null && Bytes.toBoolean(b));
-
-    BloomType hfileBloomType = reader.getBloomFilterType();
-    if (cfBloomType != BloomType.NONE) {
-      reader.loadBloomfilter(BlockType.GENERAL_BLOOM_META);
-      if (hfileBloomType != cfBloomType) {
-        LOG.info("HFile Bloom filter type for "
-            + reader.getHFileReader().getName() + ": " + hfileBloomType
-            + ", but " + cfBloomType + " specified in column family "
-            + "configuration");
-      }
-    } else if (hfileBloomType != BloomType.NONE) {
-      LOG.info("Bloom filter turned off by CF config for "
-          + reader.getHFileReader().getName());
-    }
-
-    // load delete family bloom filter
-    reader.loadBloomfilter(BlockType.DELETE_FAMILY_BLOOM_META);
-
-    try {
-      this.reader.timeRange = 
TimeRangeTracker.getTimeRange(metadataMap.get(TIMERANGE_KEY));
-    } catch (IllegalArgumentException e) {
-      LOG.error("Error reading timestamp range data from meta -- " +
-          "proceeding without", e);
-      this.reader.timeRange = null;
-    }
-    // initialize so we can reuse them after reader closed.
-    firstKey = reader.getFirstKey();
-    lastKey = reader.getLastKey();
-    comparator = reader.getComparator();
-  }
+  void initReader() throws IOException;
 
   /**
-   * Initialize the reader used for pread.
+   * Must be called after initReader.
    */
-  public void initReader() throws IOException {
-    if (reader == null) {
-      try {
-        open();
-      } catch (Exception e) {
-        try {
-          boolean evictOnClose = cacheConf != null ? 
cacheConf.shouldEvictOnClose() : true;
-          this.closeReader(evictOnClose);
-        } catch (IOException ee) {
-          LOG.warn("failed to close reader", ee);
-        }
-        throw e;
-      }
-    }
-  }
-
-  private StoreFileReader createStreamReader(boolean canUseDropBehind) throws 
IOException {
-    initReader();
-    StoreFileReader reader = fileInfo.open(this.fs, this.cacheConf, 
canUseDropBehind, -1L,
-      primaryReplica, refCount, false);
-    reader.copyFields(this.reader);
-    return reader;
-  }
-
-  public StoreFileScanner getPreadScanner(boolean cacheBlocks, long readPt, 
long scannerOrder,
-      boolean canOptimizeForNonNullColumn) {
-    return getReader().getStoreFileScanner(cacheBlocks, true, false, readPt, 
scannerOrder,
-      canOptimizeForNonNullColumn);
-  }
-
-  public StoreFileScanner getStreamScanner(boolean canUseDropBehind, boolean 
cacheBlocks,
+  StoreFileScanner getPreadScanner(boolean cacheBlocks, long readPt, long 
scannerOrder,
+      boolean canOptimizeForNonNullColumn);
+
+  StoreFileScanner getStreamScanner(boolean canUseDropBehind, boolean 
cacheBlocks,
       boolean isCompaction, long readPt, long scannerOrder, boolean 
canOptimizeForNonNullColumn)
-      throws IOException {
-    return 
createStreamReader(canUseDropBehind).getStoreFileScanner(cacheBlocks, false,
-      isCompaction, readPt, scannerOrder, canOptimizeForNonNullColumn);
-  }
+      throws IOException;
 
   /**
-   * @return Current reader.  Must call initReader first else returns null.
+   * @return Current reader. Must call initReader first else returns null.
    * @see #initReader()
    */
-  public StoreFileReader getReader() {
-    return this.reader;
-  }
+  StoreFileReader getReader();
 
   /**
    * @param evictOnClose whether to evict blocks belonging to this file
    * @throws IOException
    */
-  public synchronized void closeReader(boolean evictOnClose)
-      throws IOException {
-    if (this.reader != null) {
-      this.reader.close(evictOnClose);
-      this.reader = null;
-    }
-  }
+  void closeReader(boolean evictOnClose) throws IOException;
 
   /**
    * Marks the status of the file as compactedAway.
    */
-  public void markCompactedAway() {
-    this.compactedAway = true;
-  }
+  void markCompactedAway();
 
   /**
    * Delete this file
    * @throws IOException
    */
-  public void deleteReader() throws IOException {
-    boolean evictOnClose =
-        cacheConf != null? cacheConf.shouldEvictOnClose(): true;
-    closeReader(evictOnClose);
-    this.fs.delete(getPath(), true);
-  }
-
-  @Override
-  public String toString() {
-    return this.fileInfo.toString();
-  }
+  void deleteReader() throws IOException;
 
   /**
    * @return a length description of this StoreFile, suitable for debug output
    */
-  public String toStringDetailed() {
-    StringBuilder sb = new StringBuilder();
-    sb.append(this.getPath().toString());
-    sb.append(", isReference=").append(isReference());
-    sb.append(", isBulkLoadResult=").append(isBulkLoadResult());
-    if (isBulkLoadResult()) {
-      sb.append(", bulkLoadTS=").append(getBulkLoadTimestamp());
-    } else {
-      sb.append(", seqid=").append(getMaxSequenceId());
-    }
-    sb.append(", majorCompaction=").append(isMajorCompaction());
-
-    return sb.toString();
-  }
-
-  /**
-   * Gets whether to skip resetting the sequence id for cells.
-   * @param skipResetSeqId The byte array of boolean.
-   * @return Whether to skip resetting the sequence id.
-   */
-  private boolean isSkipResetSeqId(byte[] skipResetSeqId) {
-    if (skipResetSeqId != null && skipResetSeqId.length == 1) {
-      return Bytes.toBoolean(skipResetSeqId);
-    }
-    return false;
-  }
-
-  /**
-   * @param fs
-   * @param dir Directory to create file in.
-   * @return random filename inside passed <code>dir</code>
-   */
-  public static Path getUniqueFile(final FileSystem fs, final Path dir)
-      throws IOException {
-    if (!fs.getFileStatus(dir).isDirectory()) {
-      throw new IOException("Expecting " + dir.toString() +
-        " to be a directory");
-    }
-    return new Path(dir, UUID.randomUUID().toString().replaceAll("-", ""));
-  }
+  String toStringDetailed();
 
-  public Long getMinimumTimestamp() {
-    return getReader().timeRange == null? null: getReader().timeRange.getMin();
-  }
+  OptionalLong getMinimumTimestamp();
 
-  public Long getMaximumTimestamp() {
-    return getReader().timeRange == null? null: getReader().timeRange.getMax();
-  }
-
-
-  /**
-   * Gets the approximate mid-point of this file that is optimal for use in 
splitting it.
-   * @param comparator Comparator used to compare KVs.
-   * @return The split point row, or null if splitting is not possible, or 
reader is null.
-   */
-  byte[] getFileSplitPoint(CellComparator comparator) throws IOException {
-    if (this.reader == null) {
-      LOG.warn("Storefile " + this + " Reader is null; cannot get split 
point");
-      return null;
-    }
-    // Get first, last, and mid keys.  Midkey is the key that starts block
-    // in middle of hfile.  Has column and timestamp.  Need to return just
-    // the row we want to split on as midkey.
-    Cell midkey = this.reader.midkey();
-    if (midkey != null) {
-      Cell firstKey = this.reader.getFirstKey();
-      Cell lastKey = this.reader.getLastKey();
-      // if the midkey is the same as the first or last keys, we cannot (ever) 
split this region.
-      if (comparator.compareRows(midkey, firstKey) == 0
-          || comparator.compareRows(midkey, lastKey) == 0) {
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("cannot split because midkey is the same as first or last 
row");
-        }
-        return null;
-      }
-      return CellUtil.cloneRow(midkey);
-    }
-    return null;
-  }
-
-  /**
-   * Useful comparators for comparing StoreFiles.
-   */
-  public abstract static class Comparators {
-    /**
-     * Comparator that compares based on the Sequence Ids of the
-     * the StoreFiles. Bulk loads that did not request a seq ID
-     * are given a seq id of -1; thus, they are placed before all non-
-     * bulk loads, and bulk loads with sequence Id. Among these files,
-     * the size is used to determine the ordering, then bulkLoadTime.
-     * If there are ties, the path name is used as a tie-breaker.
-     */
-    public static final Comparator<StoreFile> SEQ_ID =
-      Ordering.compound(ImmutableList.of(
-          Ordering.natural().onResultOf(new GetSeqId()),
-          Ordering.natural().onResultOf(new GetFileSize()).reverse(),
-          Ordering.natural().onResultOf(new GetBulkTime()),
-          Ordering.natural().onResultOf(new GetPathName())
-      ));
-
-    /**
-     * Comparator for time-aware compaction. SeqId is still the first
-     *   ordering criterion to maintain MVCC.
-     */
-    public static final Comparator<StoreFile> SEQ_ID_MAX_TIMESTAMP =
-      Ordering.compound(ImmutableList.of(
-        Ordering.natural().onResultOf(new GetSeqId()),
-        Ordering.natural().onResultOf(new GetMaxTimestamp()),
-        Ordering.natural().onResultOf(new GetFileSize()).reverse(),
-        Ordering.natural().onResultOf(new GetBulkTime()),
-        Ordering.natural().onResultOf(new GetPathName())
-      ));
-
-    private static class GetSeqId implements Function<StoreFile, Long> {
-      @Override
-      public Long apply(StoreFile sf) {
-        return sf.getMaxSequenceId();
-      }
-    }
-
-    private static class GetFileSize implements Function<StoreFile, Long> {
-      @Override
-      public Long apply(StoreFile sf) {
-        if (sf.getReader() != null) {
-          return sf.getReader().length();
-        } else {
-          // the reader may be null for the compacted files and if the 
archiving
-          // had failed.
-          return -1L;
-        }
-      }
-    }
-
-    private static class GetBulkTime implements Function<StoreFile, Long> {
-      @Override
-      public Long apply(StoreFile sf) {
-        if (!sf.isBulkLoadResult()) return Long.MAX_VALUE;
-        return sf.getBulkLoadTimestamp();
-      }
-    }
-
-    private static class GetPathName implements Function<StoreFile, String> {
-      @Override
-      public String apply(StoreFile sf) {
-        return sf.getPath().getName();
-      }
-    }
-
-    private static class GetMaxTimestamp implements Function<StoreFile, Long> {
-      @Override
-      public Long apply(StoreFile sf) {
-        return sf.getMaximumTimestamp() == null? (Long)Long.MAX_VALUE : 
sf.getMaximumTimestamp();
-      }
-    }
-  }
+  OptionalLong getMaximumTimestamp();
 }

Reply via email to