This is an automated email from the ASF dual-hosted git repository.

sergeychugunov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ignite.git


The following commit(s) were added to refs/heads/master by this push:
     new c849eb5c963 IGNITE-18195 Message about restart in the middle of 
checkpoint with disabled WAL (#10386). - Fixes #10386.
c849eb5c963 is described below

commit c849eb5c9631ea8977b7c88ef3a8864dd5abc480
Author: Sergey Chugunov <[email protected]>
AuthorDate: Wed Nov 23 13:49:19 2022 +0300

    IGNITE-18195 Message about restart in the middle of checkpoint with 
disabled WAL (#10386). - Fixes #10386.
    
    Signed-off-by: Aleksandr Polovtsev <[email protected]>
---
 .../persistence/file/FilePageStoreManager.java     | 46 ++++++++++++----------
 .../wal/WalEnableDisableWithNodeShutdownTest.java  |  2 +-
 .../wal/WalEnableDisableWithRestartsTest.java      |  2 +-
 3 files changed, 27 insertions(+), 23 deletions(-)

diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/file/FilePageStoreManager.java
 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/file/FilePageStoreManager.java
index cf6f23515ca..b4fbc232033 100755
--- 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/file/FilePageStoreManager.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/file/FilePageStoreManager.java
@@ -350,22 +350,29 @@ public class FilePageStoreManager extends 
GridCacheSharedManagerAdapter implemen
 
     /** {@inheritDoc} */
     @Override public void beginRecover() {
-        List<String> groupsWithWalDisabled = checkCachesWithDisabledWal();
+        List<CacheConfiguration> cacheCfgs = findCacheGroupsWithDisabledWal();
 
-        if (!groupsWithWalDisabled.isEmpty()) {
-            String errorMsg = "Cache groups with potentially corrupted 
partition files found. " +
-                "To cleanup them maintenance is needed, node will enter 
maintenance mode on next restart. " +
-                "Cleanup cache group folders manually or trigger maintenance 
action to do that and restart the node. " +
-                "Corrupted files are located in subdirectories " + 
groupsWithWalDisabled +
-                " in a work dir " + storeWorkDir;
+        if (!cacheCfgs.isEmpty()) {
+            List<String> cacheGroupNames = cacheCfgs.stream()
+                .map(ccfg -> ccfg.getGroupName() != null ? ccfg.getGroupName() 
: ccfg.getName())
+                .collect(Collectors.toList());
+
+            String errorMsg = "Ignite node with disabled WAL was stopped in 
the middle of a checkpoint, " +
+                "data files may be corrupted. Node will stop and enter the 
Maintenance Mode on next start. " +
+                "In the Maintenance Mode, use the Control Utility 
*persistence* command " +
+                "to clean and optionally back up corrupted files. When 
cleaning is done, restart the node manually. " +
+                "Possible corruption affects the following cache groups: " + 
cacheGroupNames;
 
             log.warning(errorMsg);
 
             try {
-                
cctx.kernalContext().maintenanceRegistry().registerMaintenanceTask(
-                    new MaintenanceTask(CORRUPTED_DATA_FILES_MNTC_TASK_NAME,
-                        "Corrupted cache groups found",
-                        
groupsWithWalDisabled.stream().collect(Collectors.joining(File.separator)))
+                cctx.kernalContext().maintenanceRegistry()
+                    .registerMaintenanceTask(
+                        new 
MaintenanceTask(CORRUPTED_DATA_FILES_MNTC_TASK_NAME,
+                            "Corrupted cache groups found",
+                            cacheCfgs.stream()
+                                .map(ccfg -> cacheWorkDir(ccfg).getName())
+                                .collect(Collectors.joining(File.separator)))
                 );
             }
             catch (IgniteCheckedException e) {
@@ -384,12 +391,12 @@ public class FilePageStoreManager extends 
GridCacheSharedManagerAdapter implemen
     }
 
     /**
-     * Checks cache groups' settings and returns groups names with disabled 
WAL.
+     * Checks cache groups' settings and returns configurations of cache 
groups with disabled WAL.
      *
-     * @return List of cache groups names that had WAL disabled before node 
stop.
+     * @return List of cache groups' configurations that had WAL disabled 
before node stop.
      */
-    private List<String> checkCachesWithDisabledWal() {
-        List<String> corruptedCachesDirs = new ArrayList<>();
+    private List<CacheConfiguration> findCacheGroupsWithDisabledWal() {
+        List<CacheConfiguration> corruptedCacheGroups = new ArrayList<>();
 
         for (Integer grpDescId : idxCacheStores.keySet()) {
             CacheGroupDescriptor desc = 
cctx.cache().cacheGroupDescriptor(grpDescId);
@@ -402,17 +409,14 @@ public class FilePageStoreManager extends 
GridCacheSharedManagerAdapter implemen
                     File dir = cacheWorkDir(desc.config());
 
                     if (Arrays.stream(
-                        dir.listFiles())
-                        .filter(f -> !f.getName().equals(CACHE_DATA_FILENAME))
-                        .count() > 0
-                    ) {
-                        corruptedCachesDirs.add(cacheDirName(desc.config()));
+                        dir.listFiles()).anyMatch(f -> 
!f.getName().equals(CACHE_DATA_FILENAME))) {
+                        corruptedCacheGroups.add(desc.config());
                     }
                 }
             }
         }
 
-        return corruptedCachesDirs;
+        return corruptedCacheGroups;
     }
 
     /** {@inheritDoc} */
diff --git 
a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/wal/WalEnableDisableWithNodeShutdownTest.java
 
b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/wal/WalEnableDisableWithNodeShutdownTest.java
index cc36db7c357..37194a32f44 100644
--- 
a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/wal/WalEnableDisableWithNodeShutdownTest.java
+++ 
b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/wal/WalEnableDisableWithNodeShutdownTest.java
@@ -245,7 +245,7 @@ public class WalEnableDisableWithNodeShutdownTest extends 
GridCommonAbstractTest
             node = Ignition.start(igniteCfg(false, consistentId));
         }
         catch (Exception ex) {
-            assertTrue(X.hasCause(ex, "Cache groups with potentially corrupted 
partition files", IgniteException.class));
+            assertTrue(X.hasCause(ex, "Ignite node with disabled WAL was 
stopped in the middle of a checkpoint", IgniteException.class));
 
             node = Ignition.start(igniteCfg(false, consistentId));
 
diff --git 
a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/wal/WalEnableDisableWithRestartsTest.java
 
b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/wal/WalEnableDisableWithRestartsTest.java
index 08f33e09007..782c5bb5add 100644
--- 
a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/wal/WalEnableDisableWithRestartsTest.java
+++ 
b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/wal/WalEnableDisableWithRestartsTest.java
@@ -142,7 +142,7 @@ public class WalEnableDisableWithRestartsTest extends 
GridCommonAbstractTest {
             node = Ignition.start(igniteCfg(false, consistentId));
         }
         catch (Exception ex) {
-            if (!X.hasCause(ex, "Cache groups with potentially corrupted 
partition files", IgniteException.class))
+            if (!X.hasCause(ex, "Ignite node with disabled WAL was stopped in 
the middle of a checkpoint", IgniteException.class))
                 throw ex;
 
             node = Ignition.start(igniteCfg(false, consistentId));

Reply via email to