This is an automated email from the ASF dual-hosted git repository. andor pushed a commit to branch HBASE-29081_rebased in repository https://gitbox.apache.org/repos/asf/hbase.git
commit 233a258f10cc77ec55073ba2eaa26f91e8c32e29 Author: Anuj Sharma <[email protected]> AuthorDate: Thu Mar 12 00:53:35 2026 +0530 HBASE-29959 Cluster started in read-only mode mistakenly deletes suffix file during startup (#7881) * HBASE-29959 Cluster started in read-only mode mistakenly deletes suffix file during startup * Move log message to if block. * Close file input stream * Change the getter which does not mutate the suffix data --- .../hadoop/hbase/master/MasterFileSystem.java | 5 +- .../access/AbstractReadOnlyController.java | 26 +++- .../security/access/TestReadOnlyController.java | 27 ---- .../TestReadOnlyManageActiveClusterFile.java | 153 +++++++++++++++++++++ 4 files changed, 174 insertions(+), 37 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java index 034faa05802..1734f03a273 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java @@ -415,7 +415,7 @@ public class MasterFileSystem { acs, getActiveClusterSuffix()); } catch (FileNotFoundException fnfe) { // this is the active cluster, create active cluster suffix file if it does not exist - FSUtils.setActiveClusterSuffix(fs, rootdir, getSuffixFileDataToWrite(), wait); + FSUtils.setActiveClusterSuffix(fs, rootdir, computeAndSetSuffixFileDataToWrite(), wait); } } else { // this is a replica cluster @@ -447,8 +447,7 @@ public class MasterFileSystem { return str.getBytes(StandardCharsets.UTF_8); } - // - public byte[] getSuffixFileDataToWrite() { + public byte[] computeAndSetSuffixFileDataToWrite() { String str = getClusterId().toString() + ":" + getActiveClusterSuffixFromConfig(conf); this.activeClusterSuffix = new ActiveClusterSuffix(str); return str.getBytes(StandardCharsets.UTF_8); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/security/access/AbstractReadOnlyController.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/security/access/AbstractReadOnlyController.java index d5039f84348..c8c1837b21b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/security/access/AbstractReadOnlyController.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/security/access/AbstractReadOnlyController.java @@ -17,7 +17,11 @@ */ package org.apache.hadoop.hbase.security.access; +import java.io.FileNotFoundException; import java.io.IOException; +import java.nio.charset.StandardCharsets; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Coprocessor; @@ -59,25 +63,33 @@ public abstract class AbstractReadOnlyController implements Coprocessor { // ENABLING READ-ONLY (false -> true), delete the active cluster file. LOG.debug("Global read-only mode is being ENABLED. Deleting active cluster file: {}", activeClusterFile); - try { - if (fs.exists(activeClusterFile)) { + try (FSDataInputStream in = fs.open(activeClusterFile)) { + String actualClusterFileData = IOUtils.toString(in, StandardCharsets.UTF_8); + String expectedClusterFileData = mfs.getSuffixFromConfig(); + if (actualClusterFileData.equals(expectedClusterFileData)) { fs.delete(activeClusterFile, false); LOG.info("Successfully deleted active cluster file: {}", activeClusterFile); } else { - LOG.debug("Active cluster file does not exist at: {}. No need to delete.", - activeClusterFile); + LOG.debug( + "Active cluster file data does not match expected data. " + + "Not deleting the file to avoid potential inconsistency. " + + "Actual data: {}, Expected data: {}", + new String(actualClusterFileData), new String(expectedClusterFileData)); } - } catch (IOException e) { + } catch (FileNotFoundException e) { + LOG.debug("Active cluster file does not exist at: {}. No need to delete.", + activeClusterFile); + } catch (IOException e) { LOG.error( "Failed to delete active cluster file: {}. " + "Read-only flag will be updated, but file system state is inconsistent.", - activeClusterFile); + activeClusterFile, e); } } else { // DISABLING READ-ONLY (true -> false), create the active cluster file id file int wait = mfs.getConfiguration().getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000); if (!fs.exists(activeClusterFile)) { - FSUtils.setActiveClusterSuffix(fs, rootDir, mfs.getSuffixFileDataToWrite(), wait); + FSUtils.setActiveClusterSuffix(fs, rootDir, mfs.computeAndSetSuffixFileDataToWrite(), wait); } else { LOG.debug("Active cluster file already exists at: {}. No need to create it again.", activeClusterFile); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/security/access/TestReadOnlyController.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/security/access/TestReadOnlyController.java index 15ee2fdd45b..31e5f8193ba 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/security/access/TestReadOnlyController.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/security/access/TestReadOnlyController.java @@ -18,15 +18,11 @@ package org.apache.hadoop.hbase.security.access; import static org.apache.hadoop.hbase.HConstants.HBASE_CLIENT_RETRIES_NUMBER; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtil; import org.apache.hadoop.hbase.HConstants; @@ -39,7 +35,6 @@ import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Row; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.master.HMaster; -import org.apache.hadoop.hbase.master.MasterFileSystem; import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.SecurityTests; @@ -152,14 +147,6 @@ public class TestReadOnlyController { hRegionServer.getConfigurationManager().notifyAllObservers(conf); } - private static boolean isActiveClusterIdFileExists() throws IOException { - MasterFileSystem mfs = hMaster.getMasterFileSystem(); - Path rootDir = mfs.getRootDir(); - FileSystem fs = mfs.getFileSystem(); - Path activeClusterFile = new Path(rootDir, HConstants.ACTIVE_CLUSTER_SUFFIX_FILE_NAME); - return fs.exists(activeClusterFile); - } - // The test case for successfully creating a table with Read-Only mode disabled happens when // setting up the test class, so we only need a test function for a failed table creation. @Test @@ -227,18 +214,4 @@ public class TestReadOnlyController { // This should throw the IOException testTable.batch(actions, null); } - - @Test - public void testActiveClusterIdFileCreationWhenReadOnlyDisabled() - throws IOException, InterruptedException { - disableReadOnlyMode(); - assertTrue(isActiveClusterIdFileExists()); - } - - @Test - public void testActiveClusterIdFileDeletionWhenReadOnlyEnabled() - throws IOException, InterruptedException { - enableReadOnlyMode(); - assertFalse(isActiveClusterIdFileExists()); - } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/security/access/TestReadOnlyManageActiveClusterFile.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/security/access/TestReadOnlyManageActiveClusterFile.java new file mode 100644 index 00000000000..eaeda593ea4 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/security/access/TestReadOnlyManageActiveClusterFile.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.security.access; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtil; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.master.MasterFileSystem; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.testclassification.SecurityTests; +import org.junit.After; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Category({ SecurityTests.class, MediumTests.class }) +public class TestReadOnlyManageActiveClusterFile { + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestReadOnlyManageActiveClusterFile.class); + + private static final Logger LOG = + LoggerFactory.getLogger(TestReadOnlyManageActiveClusterFile.class); + private final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); + private static final int NUM_RS = 1; + private static Configuration conf; + HMaster master; + HRegionServer regionServer; + + MasterFileSystem mfs; + Path rootDir; + FileSystem fs; + Path activeClusterFile; + + @Before + public void setup() throws Exception { + conf = TEST_UTIL.getConfiguration(); + + // Set up test class with Read-Only mode disabled so a table can be created + conf.setBoolean(HConstants.HBASE_GLOBAL_READONLY_ENABLED_KEY, false); + // Start the test cluster + TEST_UTIL.startMiniCluster(NUM_RS); + master = TEST_UTIL.getMiniHBaseCluster().getMaster(); + regionServer = TEST_UTIL.getMiniHBaseCluster().getRegionServer(0); + + mfs = master.getMasterFileSystem(); + rootDir = mfs.getRootDir(); + fs = mfs.getFileSystem(); + activeClusterFile = new Path(rootDir, HConstants.ACTIVE_CLUSTER_SUFFIX_FILE_NAME); + } + + @After + public void tearDown() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + private void setReadOnlyMode(boolean enabled) { + conf.setBoolean(HConstants.HBASE_GLOBAL_READONLY_ENABLED_KEY, enabled); + master.getConfigurationManager().notifyAllObservers(conf); + regionServer.getConfigurationManager().notifyAllObservers(conf); + } + + private void restartCluster() throws IOException, InterruptedException { + TEST_UTIL.getMiniHBaseCluster().shutdown(); + TEST_UTIL.restartHBaseCluster(NUM_RS); + TEST_UTIL.waitUntilNoRegionsInTransition(); + + master = TEST_UTIL.getMiniHBaseCluster().getMaster(); + regionServer = TEST_UTIL.getMiniHBaseCluster().getRegionServer(0); + + MasterFileSystem mfs = master.getMasterFileSystem(); + fs = mfs.getFileSystem(); + activeClusterFile = new Path(mfs.getRootDir(), HConstants.ACTIVE_CLUSTER_SUFFIX_FILE_NAME); + } + + private void overwriteExistingFile() throws IOException { + try (FSDataOutputStream out = fs.create(activeClusterFile, true)) { + out.writeBytes("newClusterId"); + } + } + + private boolean activeClusterIdFileExists() throws IOException { + return fs.exists(activeClusterFile); + } + + @Test + public void testActiveClusterIdFileCreationWhenReadOnlyDisabled() + throws IOException, InterruptedException { + setReadOnlyMode(false); + assertTrue(activeClusterIdFileExists()); + } + + @Test + public void testActiveClusterIdFileDeletionWhenReadOnlyEnabled() + throws IOException, InterruptedException { + setReadOnlyMode(true); + assertFalse(activeClusterIdFileExists()); + } + + @Test + public void testDeleteActiveClusterIdFileWhenSwitchingToReadOnlyIfOwnedByCluster() + throws IOException, InterruptedException { + // At the start cluster is in active mode hence set readonly mode and restart + conf.setBoolean(HConstants.HBASE_GLOBAL_READONLY_ENABLED_KEY, true); + // Restart the cluster to trigger the deletion of the active cluster ID file + restartCluster(); + // Should delete the active cluster ID file since it is owned by the cluster + assertFalse(activeClusterIdFileExists()); + } + + @Test + public void testDoNotDeleteActiveClusterIdFileWhenSwitchingToReadOnlyIfNotOwnedByCluster() + throws IOException, InterruptedException { + // Change the content of Active Cluster file to simulate the scenario where the file is not + // owned by the cluster and then set readonly mode and restart + overwriteExistingFile(); + // At the start cluster is in active mode hence set readonly mode and restart + conf.setBoolean(HConstants.HBASE_GLOBAL_READONLY_ENABLED_KEY, true); + // Restart the cluster to trigger the deletion of the active cluster ID file + restartCluster(); + // As Active cluster file is not owned by the cluster, it should not be deleted even when + // switching to readonly mode + assertTrue(activeClusterIdFileExists()); + } +}
