This is an automated email from the ASF dual-hosted git repository.
sergeychugunov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ignite.git
The following commit(s) were added to refs/heads/master by this push:
new c849eb5c963 IGNITE-18195 Message about restart in the middle of
checkpoint with disabled WAL (#10386). - Fixes #10386.
c849eb5c963 is described below
commit c849eb5c9631ea8977b7c88ef3a8864dd5abc480
Author: Sergey Chugunov <[email protected]>
AuthorDate: Wed Nov 23 13:49:19 2022 +0300
IGNITE-18195 Message about restart in the middle of checkpoint with
disabled WAL (#10386). - Fixes #10386.
Signed-off-by: Aleksandr Polovtsev <[email protected]>
---
.../persistence/file/FilePageStoreManager.java | 46 ++++++++++++----------
.../wal/WalEnableDisableWithNodeShutdownTest.java | 2 +-
.../wal/WalEnableDisableWithRestartsTest.java | 2 +-
3 files changed, 27 insertions(+), 23 deletions(-)
diff --git
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/file/FilePageStoreManager.java
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/file/FilePageStoreManager.java
index cf6f23515ca..b4fbc232033 100755
---
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/file/FilePageStoreManager.java
+++
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/file/FilePageStoreManager.java
@@ -350,22 +350,29 @@ public class FilePageStoreManager extends
GridCacheSharedManagerAdapter implemen
/** {@inheritDoc} */
@Override public void beginRecover() {
- List<String> groupsWithWalDisabled = checkCachesWithDisabledWal();
+ List<CacheConfiguration> cacheCfgs = findCacheGroupsWithDisabledWal();
- if (!groupsWithWalDisabled.isEmpty()) {
- String errorMsg = "Cache groups with potentially corrupted
partition files found. " +
- "To cleanup them maintenance is needed, node will enter
maintenance mode on next restart. " +
- "Cleanup cache group folders manually or trigger maintenance
action to do that and restart the node. " +
- "Corrupted files are located in subdirectories " +
groupsWithWalDisabled +
- " in a work dir " + storeWorkDir;
+ if (!cacheCfgs.isEmpty()) {
+ List<String> cacheGroupNames = cacheCfgs.stream()
+ .map(ccfg -> ccfg.getGroupName() != null ? ccfg.getGroupName()
: ccfg.getName())
+ .collect(Collectors.toList());
+
+ String errorMsg = "Ignite node with disabled WAL was stopped in
the middle of a checkpoint, " +
+ "data files may be corrupted. Node will stop and enter the
Maintenance Mode on next start. " +
+ "In the Maintenance Mode, use the Control Utility
*persistence* command " +
+ "to clean and optionally back up corrupted files. When
cleaning is done, restart the node manually. " +
+ "Possible corruption affects the following cache groups: " +
cacheGroupNames;
log.warning(errorMsg);
try {
-
cctx.kernalContext().maintenanceRegistry().registerMaintenanceTask(
- new MaintenanceTask(CORRUPTED_DATA_FILES_MNTC_TASK_NAME,
- "Corrupted cache groups found",
-
groupsWithWalDisabled.stream().collect(Collectors.joining(File.separator)))
+ cctx.kernalContext().maintenanceRegistry()
+ .registerMaintenanceTask(
+ new
MaintenanceTask(CORRUPTED_DATA_FILES_MNTC_TASK_NAME,
+ "Corrupted cache groups found",
+ cacheCfgs.stream()
+ .map(ccfg -> cacheWorkDir(ccfg).getName())
+ .collect(Collectors.joining(File.separator)))
);
}
catch (IgniteCheckedException e) {
@@ -384,12 +391,12 @@ public class FilePageStoreManager extends
GridCacheSharedManagerAdapter implemen
}
/**
- * Checks cache groups' settings and returns groups names with disabled
WAL.
+ * Checks cache groups' settings and returns configurations of cache
groups with disabled WAL.
*
- * @return List of cache groups names that had WAL disabled before node
stop.
+ * @return List of cache groups' configurations that had WAL disabled
before node stop.
*/
- private List<String> checkCachesWithDisabledWal() {
- List<String> corruptedCachesDirs = new ArrayList<>();
+ private List<CacheConfiguration> findCacheGroupsWithDisabledWal() {
+ List<CacheConfiguration> corruptedCacheGroups = new ArrayList<>();
for (Integer grpDescId : idxCacheStores.keySet()) {
CacheGroupDescriptor desc =
cctx.cache().cacheGroupDescriptor(grpDescId);
@@ -402,17 +409,14 @@ public class FilePageStoreManager extends
GridCacheSharedManagerAdapter implemen
File dir = cacheWorkDir(desc.config());
if (Arrays.stream(
- dir.listFiles())
- .filter(f -> !f.getName().equals(CACHE_DATA_FILENAME))
- .count() > 0
- ) {
- corruptedCachesDirs.add(cacheDirName(desc.config()));
+ dir.listFiles()).anyMatch(f ->
!f.getName().equals(CACHE_DATA_FILENAME))) {
+ corruptedCacheGroups.add(desc.config());
}
}
}
}
- return corruptedCachesDirs;
+ return corruptedCacheGroups;
}
/** {@inheritDoc} */
diff --git
a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/wal/WalEnableDisableWithNodeShutdownTest.java
b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/wal/WalEnableDisableWithNodeShutdownTest.java
index cc36db7c357..37194a32f44 100644
---
a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/wal/WalEnableDisableWithNodeShutdownTest.java
+++
b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/wal/WalEnableDisableWithNodeShutdownTest.java
@@ -245,7 +245,7 @@ public class WalEnableDisableWithNodeShutdownTest extends
GridCommonAbstractTest
node = Ignition.start(igniteCfg(false, consistentId));
}
catch (Exception ex) {
- assertTrue(X.hasCause(ex, "Cache groups with potentially corrupted
partition files", IgniteException.class));
+ assertTrue(X.hasCause(ex, "Ignite node with disabled WAL was
stopped in the middle of a checkpoint", IgniteException.class));
node = Ignition.start(igniteCfg(false, consistentId));
diff --git
a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/wal/WalEnableDisableWithRestartsTest.java
b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/wal/WalEnableDisableWithRestartsTest.java
index 08f33e09007..782c5bb5add 100644
---
a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/wal/WalEnableDisableWithRestartsTest.java
+++
b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/wal/WalEnableDisableWithRestartsTest.java
@@ -142,7 +142,7 @@ public class WalEnableDisableWithRestartsTest extends
GridCommonAbstractTest {
node = Ignition.start(igniteCfg(false, consistentId));
}
catch (Exception ex) {
- if (!X.hasCause(ex, "Cache groups with potentially corrupted
partition files", IgniteException.class))
+ if (!X.hasCause(ex, "Ignite node with disabled WAL was stopped in
the middle of a checkpoint", IgniteException.class))
throw ex;
node = Ignition.start(igniteCfg(false, consistentId));