Apache9 commented on a change in pull request #941: HBASE-23326 Implement a 
ProcedureStore which stores procedures in a H…
URL: https://github.com/apache/hbase/pull/941#discussion_r361034742
 
 

 ##########
 File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/procedure2/store/region/RegionProcedureStore.java
 ##########
 @@ -0,0 +1,576 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.procedure2.store.region;
+
+import static org.apache.hadoop.hbase.HConstants.EMPTY_BYTE_ARRAY;
+import static org.apache.hadoop.hbase.HConstants.HREGION_LOGDIR_NAME;
+import static org.apache.hadoop.hbase.HConstants.NO_NONCE;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import org.apache.commons.lang3.mutable.MutableLong;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.HBaseIOException;
+import org.apache.hadoop.hbase.Server;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Mutation;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.client.RegionInfoBuilder;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
+import org.apache.hadoop.hbase.log.HBaseMarkers;
+import org.apache.hadoop.hbase.procedure2.Procedure;
+import org.apache.hadoop.hbase.procedure2.ProcedureUtil;
+import org.apache.hadoop.hbase.procedure2.store.LeaseRecovery;
+import org.apache.hadoop.hbase.procedure2.store.ProcedureStoreBase;
+import org.apache.hadoop.hbase.procedure2.store.ProcedureTree;
+import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
+import org.apache.hadoop.hbase.regionserver.RegionScanner;
+import org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.CommonFSUtils;
+import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
+import org.apache.hadoop.hbase.wal.WAL;
+import org.apache.hadoop.hbase.wal.WALFactory;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import 
org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
+
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
+
+/**
+ * A procedure store which uses a region to store all the procedures.
+ * <p/>
+ * FileSystem layout:
+ *
+ * <pre>
+ * hbase
+ *   |
+ *   --MasterProcs
+ *       |
+ *       --data
+ *       |  |
+ *       |  --/master/procedure/&lt;encoded-region-name&gt; <---- The region 
data
+ *       |      |
+ *       |      --replay <---- The edits to replay
+ *       |
+ *       --WALs
+ *          |
+ *          --&lt;master-server-name&gt; <---- The WAL dir for active master
+ *          |
+ *          --&lt;master-server-name&gt;-dead <---- The WAL dir dead master
+ * </pre>
+ */
[email protected]
+public class RegionProcedureStore extends ProcedureStoreBase {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(RegionProcedureStore.class);
+
+  static final String MAX_WALS_KEY = "hbase.procedure.store.region.maxwals";
+
+  private static final int DEFAULT_MAX_WALS = 10;
+
+  static final String USE_HSYNC_KEY = "hbase.procedure.store.region.wal.hsync";
+
+  static final String MASTER_PROCEDURE_DIR = "MasterProcs";
+
+  static final String LOGCLEANER_PLUGINS = 
"hbase.procedure.store.region.logcleaner.plugins";
+
+  private static final String DATA_DIR = "data";
+
+  private static final String REPLAY_EDITS_DIR = "replay";
+
+  private static final String DEAD_WAL_DIR_SUFFIX = "-dead";
+
+  private static final TableName TABLE_NAME = 
TableName.valueOf("master:procedure");
+
+  private static final byte[] FAMILY = Bytes.toBytes("info");
+
+  private static final byte[] PROC_QUALIFIER = Bytes.toBytes("proc");
+
+  private static final TableDescriptor TABLE_DESC = 
TableDescriptorBuilder.newBuilder(TABLE_NAME)
+    .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)).build();
+
+  private final Server server;
+
+  private final LeaseRecovery leaseRecovery;
+
+  private WALFactory walFactory;
+
+  @VisibleForTesting
+  HRegion region;
+
+  private RegionFlusherAndCompactor flusherAndCompactor;
+
+  @VisibleForTesting
+  RegionProcedureStoreWALRoller walRoller;
+
+  private int numThreads;
+
+  public RegionProcedureStore(Server server, LeaseRecovery leaseRecovery) {
+    this.server = server;
+    this.leaseRecovery = leaseRecovery;
+  }
+
+  @Override
+  public void start(int numThreads) throws IOException {
+    if (!setRunning(true)) {
+      return;
+    }
+    LOG.info("Starting the Region Procedure Store...");
+    this.numThreads = numThreads;
+  }
+
+  private void shutdownWAL() {
+    if (walFactory != null) {
+      try {
+        walFactory.shutdown();
+      } catch (IOException e) {
+        LOG.warn("Failed to shutdown WAL", e);
+      }
+    }
+  }
+
+  private void closeRegion(boolean abort) {
+    if (region != null) {
+      try {
+        region.close(abort);
+      } catch (IOException e) {
+        LOG.warn("Failed to close region", e);
+      }
+    }
+
+  }
+
+  @Override
+  public void stop(boolean abort) {
+    if (!setRunning(false)) {
+      return;
+    }
+    LOG.info("Stopping the Region Procedure Store, isAbort={}", abort);
+    if (flusherAndCompactor != null) {
+      flusherAndCompactor.close();
+    }
+    // if abort, we shutdown wal first to fail the ongoing updates to the 
region, and then close the
+    // region, otherwise there will be dead lock.
+    if (abort) {
+      shutdownWAL();
+      closeRegion(true);
+    } else {
+      closeRegion(false);
+      shutdownWAL();
+    }
+
+    if (walRoller != null) {
+      walRoller.close();
+    }
+  }
+
+  @Override
+  public int getNumThreads() {
+    return numThreads;
+  }
+
+  @Override
+  public int setRunningProcedureCount(int count) {
+    // useless for region based storage.
+    return count;
+  }
+
+  private WAL createWAL(FileSystem fs, Path rootDir, RegionInfo regionInfo) 
throws IOException {
+    String logName = 
AbstractFSWALProvider.getWALDirectoryName(server.getServerName().toString());
+    Path walDir = new Path(rootDir, logName);
+    LOG.debug("WALDir={}", walDir);
+    if (fs.exists(walDir)) {
+      throw new HBaseIOException(
+        "Master procedure store has already created directory at " + walDir);
+    }
+    if (!fs.mkdirs(walDir)) {
+      throw new IOException("Can not create master procedure wal directory " + 
walDir);
+    }
+    WAL wal = walFactory.getWAL(regionInfo);
+    walRoller.addWAL(wal);
+    return wal;
+  }
+
+  private HRegion bootstrap(Configuration conf, FileSystem fs, Path rootDir, 
Path dataDir)
+    throws IOException {
+    RegionInfo regionInfo = RegionInfoBuilder.newBuilder(TABLE_NAME).build();
+    Path tmpDataDir = new Path(dataDir.getParent(), dataDir.getName() + 
"-tmp");
+    if (fs.exists(tmpDataDir) && !fs.delete(tmpDataDir, true)) {
+      throw new IOException("Can not delete partial created proc region " + 
tmpDataDir);
+    }
+    Path tableDir = CommonFSUtils.getTableDir(tmpDataDir, TABLE_NAME);
+    HRegion.createHRegion(conf, regionInfo, fs, tableDir, TABLE_DESC).close();
+    if (!fs.rename(tmpDataDir, dataDir)) {
+      throw new IOException("Can not rename " + tmpDataDir + " to " + dataDir);
+    }
+    WAL wal = createWAL(fs, rootDir, regionInfo);
+    return HRegion.openHRegionFromTableDir(conf, fs, tableDir, regionInfo, 
TABLE_DESC, wal, null,
+      null);
+  }
+
+  private HRegion open(Configuration conf, FileSystem fs, Path rootDir, Path 
dataDir)
+    throws IOException {
+    String factoryId = server.getServerName().toString();
+    Path tableDir = CommonFSUtils.getTableDir(dataDir, TABLE_NAME);
+    Path regionDir =
+      fs.listStatus(tableDir, p -> 
RegionInfo.isEncodedRegionName(Bytes.toBytes(p.getName())))[0]
+        .getPath();
+    Path replayEditsDir = new Path(regionDir, REPLAY_EDITS_DIR);
+    if (!fs.exists(replayEditsDir) && !fs.mkdirs(replayEditsDir)) {
+      throw new IOException("Failed to create replay directory: " + 
replayEditsDir);
+    }
+    Path walsDir = new Path(rootDir, HREGION_LOGDIR_NAME);
+    for (FileStatus walDir : fs.listStatus(walsDir)) {
+      if (!walDir.isDirectory()) {
+        continue;
+      }
+      if (walDir.getPath().getName().startsWith(factoryId)) {
+        LOG.warn("This should not happen in real production as we have not 
created our WAL " +
+          "directory yet, ignore if you are running a procedure related UT");
+      }
+      Path deadWALDir;
+      if (!walDir.getPath().getName().endsWith(DEAD_WAL_DIR_SUFFIX)) {
+        deadWALDir =
+          new Path(walDir.getPath().getParent(), walDir.getPath().getName() + 
DEAD_WAL_DIR_SUFFIX);
+        if (!fs.rename(walDir.getPath(), deadWALDir)) {
+          throw new IOException("Can not rename " + walDir + " to " + 
deadWALDir +
+            " when recovering lease of proc store");
+        }
+        LOG.info("Renamed {} to {} as it is dead", walDir.getPath(), 
deadWALDir);
+      } else {
+        deadWALDir = walDir.getPath();
+        LOG.info("{} is already marked as dead", deadWALDir);
+      }
+      for (FileStatus walFile : fs.listStatus(deadWALDir)) {
+        Path replayEditsFile = new Path(replayEditsDir, 
walFile.getPath().getName());
+        leaseRecovery.recoverFileLease(fs, walFile.getPath());
+        if (!fs.rename(walFile.getPath(), replayEditsFile)) {
+          throw new IOException("Can not rename " + walFile.getPath() + " to " 
+ replayEditsFile +
+            " when recovering lease of proc store");
+        }
+        LOG.info("Renamed {} to {}", walFile.getPath(), replayEditsFile);
+      }
+      LOG.info("Delete empty proc wal dir {}", deadWALDir);
+      fs.delete(deadWALDir, true);
+    }
+    RegionInfo regionInfo = HRegionFileSystem.loadRegionInfoFileContent(fs, 
regionDir);
+    WAL wal = createWAL(fs, rootDir, regionInfo);
+    conf.set(HRegion.RECOVERED_EDITS_DIR,
+      replayEditsDir.makeQualified(fs.getUri(), 
fs.getWorkingDirectory()).toString());
+    return HRegion.openHRegionFromTableDir(conf, fs, tableDir, regionInfo, 
TABLE_DESC, wal, null,
+      null);
+  }
+
+  @SuppressWarnings("deprecation")
+  private void tryMigrate(FileSystem fs) throws IOException {
+    Configuration conf = server.getConfiguration();
+    Path procWALDir =
+      new Path(CommonFSUtils.getWALRootDir(conf), 
WALProcedureStore.MASTER_PROCEDURE_LOGDIR);
+    if (!fs.exists(procWALDir)) {
+      return;
+    }
+    LOG.info("The old procedure wal directory {} exists, start migrating", 
procWALDir);
+    WALProcedureStore store = new WALProcedureStore(conf, leaseRecovery);
+    store.start(numThreads);
+    store.recoverLease();
+    MutableLong maxProcIdSet = new MutableLong(-1);
+    MutableLong maxProcIdFromProcs = new MutableLong(-1);
+    store.load(new ProcedureLoader() {
 
 Review comment:
   It could. If this happens the master will crash, i.e, we fail startup. We 
first need to fix the original wal procedure store, and then try to upgrade.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to