joshelser commented on a change in pull request #3488:
URL: https://github.com/apache/hbase/pull/3488#discussion_r674299072



##########
File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java
##########
@@ -107,6 +113,21 @@ public MergeTableRegionsProcedure(final MasterProcedureEnv 
env,
     // Preflight depends on mergedRegion being set (at least).
     preflightChecks(env, true);
     this.force = force;
+    createMergeStrategy(env.getMasterConfiguration());
+  }
+
+  private void createMergeStrategy(Configuration conf) {
+    String className = conf.get(MERGE_REGION_STRATEGY, 
DefaultMergeStrategy.class.getName());
+    createMergeStrategy(className);
+  }
+
+  private void createMergeStrategy(String className) {

Review comment:
       Can replace this with `o.a.hadoop.util.ReflectionUtils#newInstance()`. 
We also have a `ReflectionUtils` in HBase that lets you do the same with a 
custom constructor (if we need one for some reason)

##########
File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeRegionsStrategy.java
##########
@@ -0,0 +1,99 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.assignment;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.master.MasterFileSystem;
+import org.apache.hadoop.hbase.master.RegionState;
+import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
+import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
+import org.apache.hadoop.hbase.util.CommonFSUtils;
+import org.apache.yetus.audience.InterfaceAudience;
+
+import java.io.IOException;
+
+/**
+ * Region merge directory creation strategy to decouple create dir logic from
+ * <code></code>MergeTableRegionsProcedure</code> and allow for plugable 
behaviour.
+ */
[email protected]
+public abstract class MergeRegionsStrategy {
+
+  /**
+   * Creates the resulting merging region dir and files in the file system, 
then updates
+   * meta table information for the given region. Specific logic on where in 
the files system to
+   * create the region structure is delegated to 
<method>innerMergeRegions</method> and the
+   * actual <code>HRegionFileSystemWriteStrategy</code> implementation.
+   * @param env the MasterProcedureEnv wrapping several meta information 
required.
+   * @param regionsToMerge array of RegionInfo representing the regions being 
merged.
+   * @param mergedRegion the resulting merging region.
+   * @throws IOException if any error occurs while creating the region dir.
+   */
+  public void createMergedRegion(MasterProcedureEnv env, RegionInfo[] 
regionsToMerge,
+    RegionInfo mergedRegion) throws IOException {
+    final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
+    final Path tabledir = CommonFSUtils.getTableDir(mfs.getRootDir(), 
regionsToMerge[0].getTable());
+    final FileSystem fs = mfs.getFileSystem();
+    HRegionFileSystem mergeRegionFs = innerMergeRegions(env, fs, 
regionsToMerge,
+      tabledir, mergedRegion);
+    assert mergeRegionFs != null;
+    mergeRegionFs.commitMergedRegion(mergedRegion);
+    // Prepare to create merged regions
+    env.getAssignmentManager().getRegionStates().
+      
getOrCreateRegionStateNode(mergedRegion).setState(RegionState.State.MERGING_NEW);
+  }
+
+  /**
+   * Should define specific logic about where in the file system the region 
structure should be
+   * created.
+   * @param env the MasterProcedureEnv wrapping several meta information 
required.
+   * @param fs the FileSystem instance to write the region directory.
+   * @param regionsToMerge array of RegionInfo representing the regions being 
merged.
+   * @param tableDir Path instance for the table dir.
+   * @param mergedRegion the resulting merging region.
+   * @return HRegionFileSystem for the resulting merging region.
+   * @throws IOException if any error occurs while creating the region dir.
+   */
+  abstract protected HRegionFileSystem innerMergeRegions(MasterProcedureEnv 
env, FileSystem fs,
+    RegionInfo[] regionsToMerge, Path tableDir, RegionInfo mergedRegion) 
throws IOException;

Review comment:
       Should try to come up with a better name for this.

##########
File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java
##########
@@ -107,6 +113,21 @@ public MergeTableRegionsProcedure(final MasterProcedureEnv 
env,
     // Preflight depends on mergedRegion being set (at least).
     preflightChecks(env, true);
     this.force = force;
+    createMergeStrategy(env.getMasterConfiguration());
+  }
+
+  private void createMergeStrategy(Configuration conf) {
+    String className = conf.get(MERGE_REGION_STRATEGY, 
DefaultMergeStrategy.class.getName());
+    createMergeStrategy(className);
+  }
+
+  private void createMergeStrategy(String className) {
+    try {
+      LOG.info("instantiating write strategy {}", className);
+      mergeStrategy = (MergeRegionsStrategy) 
Class.forName(className).newInstance();
+    } catch (Exception e) {
+      LOG.error("Unable to create write strategy: {}", className, e);

Review comment:
       need to propagate exception as Runtime

##########
File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DirectStoreFSWriteStrategy.java
##########
@@ -0,0 +1,258 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.yetus.audience.InterfaceAudience;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+/**
+ * <code>HRegionFileSystemWriteStrategy</code> implementation to be used in 
combination with
+ * <code>PersistedStoreEngine</code> to avoid renames when splitting and 
merging regions.
+ *
+ * To use it, define the following properties under master configuration:
+ * 1) <property>
+ *      <name>hbase.hregion.file.write.strategy</name>
+ *      
<value>org.apache.hadoop.hbase.regionserver.DirectStoreFSWriteStrategy</value>
+ *    </property>
+ * 2) <property>
+ *      <name>hbase.hregion.merge.strategy</name>
+ *      
<value>org.apache.hadoop.hbase.master.assignment.DirectStoreMergeRegionsStrategy</value>
+ *    </property>
+ *
+ * This will create the resulting merging and splitting regions directory 
straight under
+ * the table dir, instead of creating it under the temporary ".tmp" or 
".merges" dirs,
+ * as done by the default implementation.
+ */
[email protected]
+public class DirectStoreFSWriteStrategy extends HRegionFileSystemWriteStrategy 
{
+  private StoreFilePathAccessor accessor;
+  private Map<String, Map<String,List<Path>>> regionSplitReferences = new 
ConcurrentHashMap<>();
+  private Map<String, List<Path>> mergeReferences = new HashMap();
+
+  public DirectStoreFSWriteStrategy(HRegionFileSystem fileSystem) throws 
IOException {
+    super(fileSystem);
+    this.accessor =  
StoreFileTrackingUtils.createStoreFilePathAccessor(fileSystem.conf,
+      ConnectionFactory.createConnection(fileSystem.conf));
+  }
+
+  /**
+   * The parent directory where to create the splits dirs is
+   * the table directory itself, in this case.
+   * @return Path representing the table directory.
+   */
+  @Override
+  public Path getParentSplitsDir() {
+    return fileSystem.getTableDir();
+  }
+
+  /**
+   * The parent directory where to create the merge dir is
+   * the table directory itself, in this case.
+   * @return Path representing the table directory.
+   */
+  @Override
+  public Path getParentMergesDir() {
+    return fileSystem.getTableDir();
+  }
+
+  /**
+   * Creates the directories for the respective split daughters directly under 
the
+   * table directory, instead of default behaviour of doing it under temp 
dirs, initially.
+   * @param daughterA the first half of the split region
+   * @param daughterB the second half of the split region
+   *
+   * @throws IOException if directories creation fails.
+   */
+  @Override
+  public void createSplitsDir(RegionInfo daughterA, RegionInfo daughterB)
+    throws IOException {
+    Path splitdir = getParentSplitsDir();
+    // splitDir doesn't exists now. No need to do an exists() call for it.
+    if (!fileSystem.getFileSystem().exists(splitdir)) {
+      throw new IOException("Table dir for splitting region not found:  " + 
splitdir);
+    }
+    Path daughterADir = getSplitsDir(daughterA);
+    if (!fileSystem.createDir(daughterADir)) {
+      throw new IOException("Failed create of " + daughterADir);
+    }
+    Path daughterBDir = getSplitsDir(daughterB);
+    if (!fileSystem.createDir(daughterBDir)) {
+      throw new IOException("Failed create of " + daughterBDir);
+    }
+  }
+
+  /**
+   * Just validates that merges parent, the actual table dir in this case, 
exists.
+   * @throws IOException if table dir doesn't exist.
+   */
+  @Override
+  public void createMergesDir() throws IOException {
+    //When writing directly, avoiding renames, merges parent is the table dir 
itself, so it
+    // should exist already, so just validate it exist then do nothing
+    Path mergesdir = getParentMergesDir();
+    if (!fileSystem.fs.exists(mergesdir)) {
+      throw new IOException("Table dir for merging region not found: " + 
mergesdir);
+    }
+  }
+
+  /**
+   * Wraps <code>super.splitStoreFile</code>, so that it can map the resulting 
split reference to
+   * the related split region and column family, in order to add this to 
'storefile' system table
+   * for the tracking logic of PersisedStoreFileManager later on 
<code>commitDaughterRegion</code>
+   * method.
+   * @param parentRegionInfo {@link RegionInfo} of the parent split region.
+   * @param daughterRegionInfo {@link RegionInfo} of the resulting split 
region.
+   * @param familyName Column Family Name
+   * @param f File to split.
+   * @param splitRow Split Row
+   * @param top True if we are referring to the top half of the hfile.
+   * @param splitPolicy A split policy instance; be careful! May not be full 
populated; e.g. if
+   *                    this method is invoked on the Master side, then the 
RegionSplitPolicy will
+   *                    NOT have a reference to a Region.
+   * @param fs FileSystem instance for creating the actual reference file.
+   * @return
+   * @throws IOException
+   */
+  @Override
+  public Path splitStoreFile(RegionInfo parentRegionInfo, RegionInfo 
daughterRegionInfo,
+    String familyName, HStoreFile f, byte[] splitRow,
+    boolean top, RegionSplitPolicy splitPolicy, FileSystem fs) throws 
IOException {
+    Path path = super.splitStoreFile(parentRegionInfo, daughterRegionInfo,
+      familyName, f, splitRow, top, splitPolicy, fs);
+    Map<String,List<Path>> splitReferences = regionSplitReferences.
+      get(daughterRegionInfo.getEncodedName());
+    if(splitReferences==null){
+      splitReferences = new HashMap<>();
+      regionSplitReferences.put(daughterRegionInfo.getEncodedName(), 
splitReferences);
+    }
+    List<Path> references = splitReferences.get(familyName);
+    if(references==null){
+      references = new ArrayList<>();
+      splitReferences.put(familyName,references);
+    }
+    references.add(path);
+    return path;
+  }
+
+  /**
+   * Wraps <code>super.mergeStoreFile</code>, so that it can map the resulting 
merge reference to
+   * the related merged region and column family, in order to add this to 
'storefile' system table
+   * for the tracking logic of PersisedStoreFileManager later in 
<code>commitMergedRegion</code>.
+   * @param regionToMerge {@link RegionInfo} for one of the regions being 
merged.
+   * @param mergedRegion {@link RegionInfo} of the merged region
+   * @param familyName Column Family Name
+   * @param f File to create reference.
+   * @param mergedDir
+   * @param fs FileSystem instance for creating the actual reference file.
+   * @return
+   * @throws IOException
+   */
+  @Override
+  public Path mergeStoreFile(RegionInfo regionToMerge, RegionInfo 
mergedRegion, String familyName,
+      HStoreFile f, Path mergedDir, FileSystem fs) throws IOException {
+//    Path path = super.mergeStoreFile(regionToMerge, mergedRegion, 
familyName, f, mergedDir, fs);
+
+    Path path = (this.fileSystem.regionInfoForFs.equals(regionToMerge)) ?
+      super.mergeStoreFile(mergedRegion, regionToMerge, familyName, f, 
mergedDir, fs)
+      : super.mergeStoreFile(regionToMerge, mergedRegion, familyName, f, 
mergedDir, fs);
+
+    List<Path> referenceFiles = mergeReferences.get(familyName);
+    if(referenceFiles==null){
+      referenceFiles = new ArrayList<>();
+      mergeReferences.put(familyName, referenceFiles);
+    }
+    referenceFiles.add(path);
+    return path;
+  }
+
+  /**
+   * Do nothing. Here the split dir is the table dir itself, we cannot delete 
it.
+   * @throws IOException
+   */
+  @Override
+  public void cleanupSplitsDir() throws IOException {
+    //do nothing, the split dir is the store dir itself, so we cannot delete 
it.
+  }
+
+  /**
+   * Do nothing. Here the merge dir is the table dir itself, we cannot delete 
it.
+   * @throws IOException
+   */
+  @Override
+  public void cleanupMergesDir() throws IOException {
+    //do nothing, the merges dir is the store dir itself, so we cannot delete 
it.
+  }

Review comment:
       What happens if a split or merge operation fundamentally fails (e.g. we 
fail to write the reference files in a split daughter region)? Does something 
in the SplitTableRegionProcedure remove the daughter region in that case?

##########
File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DirectStoreFSWriteStrategy.java
##########
@@ -0,0 +1,258 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.yetus.audience.InterfaceAudience;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+/**
+ * <code>HRegionFileSystemWriteStrategy</code> implementation to be used in 
combination with
+ * <code>PersistedStoreEngine</code> to avoid renames when splitting and 
merging regions.
+ *
+ * To use it, define the following properties under master configuration:
+ * 1) <property>
+ *      <name>hbase.hregion.file.write.strategy</name>
+ *      
<value>org.apache.hadoop.hbase.regionserver.DirectStoreFSWriteStrategy</value>
+ *    </property>
+ * 2) <property>
+ *      <name>hbase.hregion.merge.strategy</name>
+ *      
<value>org.apache.hadoop.hbase.master.assignment.DirectStoreMergeRegionsStrategy</value>
+ *    </property>
+ *
+ * This will create the resulting merging and splitting regions directory 
straight under
+ * the table dir, instead of creating it under the temporary ".tmp" or 
".merges" dirs,
+ * as done by the default implementation.
+ */
[email protected]
+public class DirectStoreFSWriteStrategy extends HRegionFileSystemWriteStrategy 
{
+  private StoreFilePathAccessor accessor;
+  private Map<String, Map<String,List<Path>>> regionSplitReferences = new 
ConcurrentHashMap<>();

Review comment:
       Is the `ConcurrentHashMap` necessary? Multiple concurrent splits 
accessing this?
   
   Also, who cleans up the entries in this map?

##########
File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DirectStoreFSWriteStrategy.java
##########
@@ -0,0 +1,258 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.yetus.audience.InterfaceAudience;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+/**
+ * <code>HRegionFileSystemWriteStrategy</code> implementation to be used in 
combination with
+ * <code>PersistedStoreEngine</code> to avoid renames when splitting and 
merging regions.
+ *
+ * To use it, define the following properties under master configuration:
+ * 1) <property>
+ *      <name>hbase.hregion.file.write.strategy</name>
+ *      
<value>org.apache.hadoop.hbase.regionserver.DirectStoreFSWriteStrategy</value>
+ *    </property>
+ * 2) <property>
+ *      <name>hbase.hregion.merge.strategy</name>
+ *      
<value>org.apache.hadoop.hbase.master.assignment.DirectStoreMergeRegionsStrategy</value>
+ *    </property>
+ *
+ * This will create the resulting merging and splitting regions directory 
straight under
+ * the table dir, instead of creating it under the temporary ".tmp" or 
".merges" dirs,
+ * as done by the default implementation.
+ */
[email protected]
+public class DirectStoreFSWriteStrategy extends HRegionFileSystemWriteStrategy 
{
+  private StoreFilePathAccessor accessor;
+  private Map<String, Map<String,List<Path>>> regionSplitReferences = new 
ConcurrentHashMap<>();
+  private Map<String, List<Path>> mergeReferences = new HashMap();
+
+  public DirectStoreFSWriteStrategy(HRegionFileSystem fileSystem) throws 
IOException {
+    super(fileSystem);
+    this.accessor =  
StoreFileTrackingUtils.createStoreFilePathAccessor(fileSystem.conf,
+      ConnectionFactory.createConnection(fileSystem.conf));
+  }
+
+  /**
+   * The parent directory where to create the splits dirs is
+   * the table directory itself, in this case.
+   * @return Path representing the table directory.
+   */
+  @Override
+  public Path getParentSplitsDir() {
+    return fileSystem.getTableDir();
+  }
+
+  /**
+   * The parent directory where to create the merge dir is
+   * the table directory itself, in this case.
+   * @return Path representing the table directory.
+   */
+  @Override
+  public Path getParentMergesDir() {
+    return fileSystem.getTableDir();
+  }
+
+  /**
+   * Creates the directories for the respective split daughters directly under 
the
+   * table directory, instead of default behaviour of doing it under temp 
dirs, initially.
+   * @param daughterA the first half of the split region
+   * @param daughterB the second half of the split region
+   *
+   * @throws IOException if directories creation fails.
+   */
+  @Override
+  public void createSplitsDir(RegionInfo daughterA, RegionInfo daughterB)
+    throws IOException {
+    Path splitdir = getParentSplitsDir();
+    // splitDir doesn't exists now. No need to do an exists() call for it.
+    if (!fileSystem.getFileSystem().exists(splitdir)) {
+      throw new IOException("Table dir for splitting region not found:  " + 
splitdir);
+    }
+    Path daughterADir = getSplitsDir(daughterA);
+    if (!fileSystem.createDir(daughterADir)) {
+      throw new IOException("Failed create of " + daughterADir);
+    }
+    Path daughterBDir = getSplitsDir(daughterB);
+    if (!fileSystem.createDir(daughterBDir)) {
+      throw new IOException("Failed create of " + daughterBDir);
+    }
+  }
+
+  /**
+   * Just validates that merges parent, the actual table dir in this case, 
exists.
+   * @throws IOException if table dir doesn't exist.
+   */
+  @Override
+  public void createMergesDir() throws IOException {
+    //When writing directly, avoiding renames, merges parent is the table dir 
itself, so it
+    // should exist already, so just validate it exist then do nothing
+    Path mergesdir = getParentMergesDir();
+    if (!fileSystem.fs.exists(mergesdir)) {
+      throw new IOException("Table dir for merging region not found: " + 
mergesdir);
+    }
+  }
+
+  /**
+   * Wraps <code>super.splitStoreFile</code>, so that it can map the resulting 
split reference to
+   * the related split region and column family, in order to add this to 
'storefile' system table
+   * for the tracking logic of PersisedStoreFileManager later on 
<code>commitDaughterRegion</code>
+   * method.
+   * @param parentRegionInfo {@link RegionInfo} of the parent split region.
+   * @param daughterRegionInfo {@link RegionInfo} of the resulting split 
region.
+   * @param familyName Column Family Name
+   * @param f File to split.
+   * @param splitRow Split Row
+   * @param top True if we are referring to the top half of the hfile.
+   * @param splitPolicy A split policy instance; be careful! May not be full 
populated; e.g. if
+   *                    this method is invoked on the Master side, then the 
RegionSplitPolicy will
+   *                    NOT have a reference to a Region.
+   * @param fs FileSystem instance for creating the actual reference file.
+   * @return
+   * @throws IOException
+   */
+  @Override
+  public Path splitStoreFile(RegionInfo parentRegionInfo, RegionInfo 
daughterRegionInfo,
+    String familyName, HStoreFile f, byte[] splitRow,
+    boolean top, RegionSplitPolicy splitPolicy, FileSystem fs) throws 
IOException {
+    Path path = super.splitStoreFile(parentRegionInfo, daughterRegionInfo,
+      familyName, f, splitRow, top, splitPolicy, fs);
+    Map<String,List<Path>> splitReferences = regionSplitReferences.
+      get(daughterRegionInfo.getEncodedName());
+    if(splitReferences==null){
+      splitReferences = new HashMap<>();
+      regionSplitReferences.put(daughterRegionInfo.getEncodedName(), 
splitReferences);
+    }
+    List<Path> references = splitReferences.get(familyName);
+    if(references==null){
+      references = new ArrayList<>();
+      splitReferences.put(familyName,references);
+    }
+    references.add(path);
+    return path;
+  }
+
+  /**
+   * Wraps <code>super.mergeStoreFile</code>, so that it can map the resulting 
merge reference to
+   * the related merged region and column family, in order to add this to 
'storefile' system table
+   * for the tracking logic of PersisedStoreFileManager later in 
<code>commitMergedRegion</code>.
+   * @param regionToMerge {@link RegionInfo} for one of the regions being 
merged.
+   * @param mergedRegion {@link RegionInfo} of the merged region
+   * @param familyName Column Family Name
+   * @param f File to create reference.
+   * @param mergedDir
+   * @param fs FileSystem instance for creating the actual reference file.
+   * @return
+   * @throws IOException
+   */
+  @Override
+  public Path mergeStoreFile(RegionInfo regionToMerge, RegionInfo 
mergedRegion, String familyName,
+      HStoreFile f, Path mergedDir, FileSystem fs) throws IOException {
+//    Path path = super.mergeStoreFile(regionToMerge, mergedRegion, 
familyName, f, mergedDir, fs);
+
+    Path path = (this.fileSystem.regionInfoForFs.equals(regionToMerge)) ?
+      super.mergeStoreFile(mergedRegion, regionToMerge, familyName, f, 
mergedDir, fs)
+      : super.mergeStoreFile(regionToMerge, mergedRegion, familyName, f, 
mergedDir, fs);

Review comment:
       Couldn't we require the regions to be positional (regionA, regionB) so 
we didn't have to do this check and flip-flop the argument order?
   
   Also, is n-way region merges handled at a higher level?

##########
File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DirectStoreFSWriteStrategy.java
##########
@@ -0,0 +1,258 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.yetus.audience.InterfaceAudience;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+/**
+ * <code>HRegionFileSystemWriteStrategy</code> implementation to be used in 
combination with
+ * <code>PersistedStoreEngine</code> to avoid renames when splitting and 
merging regions.

Review comment:
       This makes me wonder if we should just have the 
WriteStrategy/MergeStrategy implementations defined in the StoreEngine 
implementation itself. Is there any benefit to having them separate?
   
   I can't think of a reason (besides working around bugs) why we would want 
no-renames for flushes and compactions but not splits/merges.

##########
File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DirectStoreFSWriteStrategy.java
##########
@@ -0,0 +1,258 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.yetus.audience.InterfaceAudience;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+/**
+ * <code>HRegionFileSystemWriteStrategy</code> implementation to be used in 
combination with
+ * <code>PersistedStoreEngine</code> to avoid renames when splitting and 
merging regions.
+ *
+ * To use it, define the following properties under master configuration:
+ * 1) <property>
+ *      <name>hbase.hregion.file.write.strategy</name>
+ *      
<value>org.apache.hadoop.hbase.regionserver.DirectStoreFSWriteStrategy</value>
+ *    </property>
+ * 2) <property>
+ *      <name>hbase.hregion.merge.strategy</name>
+ *      
<value>org.apache.hadoop.hbase.master.assignment.DirectStoreMergeRegionsStrategy</value>
+ *    </property>
+ *
+ * This will create the resulting merging and splitting regions directory 
straight under
+ * the table dir, instead of creating it under the temporary ".tmp" or 
".merges" dirs,
+ * as done by the default implementation.
+ */
[email protected]
+public class DirectStoreFSWriteStrategy extends HRegionFileSystemWriteStrategy 
{
+  private StoreFilePathAccessor accessor;
+  private Map<String, Map<String,List<Path>>> regionSplitReferences = new 
ConcurrentHashMap<>();
+  private Map<String, List<Path>> mergeReferences = new HashMap();
+
+  public DirectStoreFSWriteStrategy(HRegionFileSystem fileSystem) throws 
IOException {
+    super(fileSystem);
+    this.accessor =  
StoreFileTrackingUtils.createStoreFilePathAccessor(fileSystem.conf,
+      ConnectionFactory.createConnection(fileSystem.conf));

Review comment:
       Better, I think, to accept a Connection from the caller, rather than 
create another we have to manage.

##########
File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java
##########
@@ -381,6 +403,7 @@ protected void 
deserializeStateData(ProcedureStateSerializer serializer)
     for (int i = 0; i < regionsToMerge.length; i++) {
       regionsToMerge[i] = 
ProtobufUtil.toRegionInfo(mergeTableRegionsMsg.getRegionInfo(i));
     }
+    createMergeStrategy(mergeTableRegionsMsg.getMergeStrategy());

Review comment:
       Need to guard against the old `MergeTableRegionsStateData` message not 
having this newly-added attribute, and default to a specific implementation 
(either from explicit Configuration or default value)

##########
File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DirectStoreFSWriteStrategy.java
##########
@@ -0,0 +1,258 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.yetus.audience.InterfaceAudience;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+/**
+ * <code>HRegionFileSystemWriteStrategy</code> implementation to be used in 
combination with
+ * <code>PersistedStoreEngine</code> to avoid renames when splitting and 
merging regions.
+ *
+ * To use it, define the following properties under master configuration:
+ * 1) <property>
+ *      <name>hbase.hregion.file.write.strategy</name>
+ *      
<value>org.apache.hadoop.hbase.regionserver.DirectStoreFSWriteStrategy</value>
+ *    </property>
+ * 2) <property>
+ *      <name>hbase.hregion.merge.strategy</name>
+ *      
<value>org.apache.hadoop.hbase.master.assignment.DirectStoreMergeRegionsStrategy</value>
+ *    </property>
+ *
+ * This will create the resulting merging and splitting regions directory 
straight under
+ * the table dir, instead of creating it under the temporary ".tmp" or 
".merges" dirs,
+ * as done by the default implementation.
+ */
[email protected]
+public class DirectStoreFSWriteStrategy extends HRegionFileSystemWriteStrategy 
{
+  private StoreFilePathAccessor accessor;
+  private Map<String, Map<String,List<Path>>> regionSplitReferences = new 
ConcurrentHashMap<>();
+  private Map<String, List<Path>> mergeReferences = new HashMap();
+
+  public DirectStoreFSWriteStrategy(HRegionFileSystem fileSystem) throws 
IOException {
+    super(fileSystem);
+    this.accessor =  
StoreFileTrackingUtils.createStoreFilePathAccessor(fileSystem.conf,
+      ConnectionFactory.createConnection(fileSystem.conf));
+  }
+
+  /**
+   * The parent directory where to create the splits dirs is
+   * the table directory itself, in this case.
+   * @return Path representing the table directory.
+   */
+  @Override
+  public Path getParentSplitsDir() {
+    return fileSystem.getTableDir();
+  }
+
+  /**
+   * The parent directory where to create the merge dir is
+   * the table directory itself, in this case.
+   * @return Path representing the table directory.
+   */
+  @Override
+  public Path getParentMergesDir() {
+    return fileSystem.getTableDir();
+  }
+
+  /**
+   * Creates the directories for the respective split daughters directly under 
the
+   * table directory, instead of default behaviour of doing it under temp 
dirs, initially.
+   * @param daughterA the first half of the split region
+   * @param daughterB the second half of the split region
+   *
+   * @throws IOException if directories creation fails.
+   */
+  @Override
+  public void createSplitsDir(RegionInfo daughterA, RegionInfo daughterB)
+    throws IOException {
+    Path splitdir = getParentSplitsDir();
+    // splitDir doesn't exists now. No need to do an exists() call for it.
+    if (!fileSystem.getFileSystem().exists(splitdir)) {
+      throw new IOException("Table dir for splitting region not found:  " + 
splitdir);
+    }
+    Path daughterADir = getSplitsDir(daughterA);
+    if (!fileSystem.createDir(daughterADir)) {
+      throw new IOException("Failed create of " + daughterADir);
+    }
+    Path daughterBDir = getSplitsDir(daughterB);
+    if (!fileSystem.createDir(daughterBDir)) {
+      throw new IOException("Failed create of " + daughterBDir);
+    }
+  }
+
+  /**
+   * Just validates that merges parent, the actual table dir in this case, 
exists.
+   * @throws IOException if table dir doesn't exist.
+   */
+  @Override
+  public void createMergesDir() throws IOException {
+    //When writing directly, avoiding renames, merges parent is the table dir 
itself, so it
+    // should exist already, so just validate it exist then do nothing
+    Path mergesdir = getParentMergesDir();
+    if (!fileSystem.fs.exists(mergesdir)) {
+      throw new IOException("Table dir for merging region not found: " + 
mergesdir);
+    }

Review comment:
       Shouldn't we create the merged region directory in this method?

##########
File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java
##########
@@ -1247,4 +1149,186 @@ private static void sleepBeforeRetry(String msg, int 
sleepMultiplier, int baseSl
     }
     Thread.sleep((long)baseSleepBeforeRetries * sleepMultiplier);
   }
+
+  private void createWriteStrategy(Configuration conf) {
+    String className = conf.get(REGION_WRITE_STRATEGY, 
DefaultWriteStrategy.class.getName());
+    try {
+      LOG.info("instantiating write strategy {}", className);
+      writeStrategy = ReflectionUtils.instantiateWithCustomCtor(className,
+        new Class[] { HRegionFileSystem.class }, new Object[] { this });
+    } catch (Exception e) {
+      LOG.error("Unable to create write strategy: {}", className, e);
+    }
+  }
+
+  public static class DefaultWriteStrategy extends 
HRegionFileSystemWriteStrategy {
+
+    public DefaultWriteStrategy(HRegionFileSystem fileSystem){
+      super(fileSystem);
+    }
+
+    /**
+     * Constructs a Path for the split dir as follows:
+     *  "/hbase/data/NS/TABLE/PARENT_REGION_DIR/.splits/"
+     * @return the temporary parent path for the split dir
+     */
+    @Override
+    public Path getParentSplitsDir() {
+      return new Path(fileSystem.getRegionDir(), REGION_SPLITS_DIR);
+    }
+
+    /**
+     * Constructs a Path for the merged dir as follows:
+     *  "/hbase/data/NS/TABLE/PARENT_REGION_DIR/.merges/"
+     * @return the temporary parent path for the merges dir.
+     */
+    @Override
+    public Path getParentMergesDir() {
+      return new Path(fileSystem.getRegionDir(), REGION_MERGES_DIR);
+    }
+
+    /**
+     * Creates the region splits directory. Assumes getSplitsDir 
implementation returns a tmp dir,
+     * therefore, it deletes any existing directory returned by getSplitsDir.
+     *
+     * @param daughterA the first half of the split region
+     * @param daughterB the second half of the split region
+     *
+     * @throws IOException if splits dir creation fails.
+     */
+    @Override
+    public void createSplitsDir(RegionInfo daughterA, RegionInfo daughterB) 
throws IOException {
+      Path splitdir = getParentSplitsDir();
+      if (this.fileSystem.fs.exists(splitdir)) {
+        LOG.info("The " + splitdir + " directory exists.  Hence deleting it to 
recreate it");

Review comment:
       nit, move over to the slf4j marker message

##########
File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java
##########
@@ -1247,4 +1149,186 @@ private static void sleepBeforeRetry(String msg, int 
sleepMultiplier, int baseSl
     }
     Thread.sleep((long)baseSleepBeforeRetries * sleepMultiplier);
   }
+
+  private void createWriteStrategy(Configuration conf) {
+    String className = conf.get(REGION_WRITE_STRATEGY, 
DefaultWriteStrategy.class.getName());
+    try {
+      LOG.info("instantiating write strategy {}", className);
+      writeStrategy = ReflectionUtils.instantiateWithCustomCtor(className,
+        new Class[] { HRegionFileSystem.class }, new Object[] { this });
+    } catch (Exception e) {
+      LOG.error("Unable to create write strategy: {}", className, e);

Review comment:
       We should bail hard if we can't load the given class -- throw it as 
RuntimeException

##########
File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystemWriteStrategy.java
##########
@@ -0,0 +1,220 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.PrivateCellUtil;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.io.Reference;
+import org.apache.yetus.audience.InterfaceAudience;
+
+import java.io.IOException;
+import java.util.Optional;
+
+/**
+ * HRegionFileSystem write strategy to decouple splits/merge create dir and 
commit logic
+ * from <code>HRegionFileSystem</code>, allowing for a plugable behaviour.
+ */
[email protected]
+public abstract class HRegionFileSystemWriteStrategy {
+
+  protected HRegionFileSystem fileSystem;
+
+  public HRegionFileSystemWriteStrategy(HRegionFileSystem fileSystem){
+    this.fileSystem = fileSystem;
+  }
+
+  /**
+   * Returns the directory Path for the region split resulting daughter.
+   * @param hri for the split resulting daughter region.
+   * @return a path under tmp dir for the resulting daughter region.
+   */
+  public Path getSplitsDir(final RegionInfo hri) {
+    return new Path(getParentSplitsDir(), hri.getEncodedName());
+  }
+
+  /**
+   * Defines the parent dir for the split dir.
+   * @return
+   */
+  public abstract Path getParentSplitsDir();
+
+  /**
+   * Defines the parent dir for the merges dir.
+   * @return
+   */
+  public abstract Path getParentMergesDir();

Review comment:
       I think it might be cleaner to push the `RegionInfo` into this call. Let 
the implementation decide where to put the "region dir" it's creating.
   
   The "getParent..." terminology is a little bit confusing to me. I think I 
really just want to know if I'm splitting or merging a region, where does the 
new region(s) get created?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to