[
https://issues.apache.org/jira/browse/IGNITE-10720?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16744790#comment-16744790
]
Ivan Artukhov commented on IGNITE-10720:
----------------------------------------
Got the following assertion when I start a server node which was previously
stopped (killed) during checkpoint:
{code:java}
[2019-01-16 15:57:48,646][INFO ][main] Starting binary memory restore for:
[-1282194281, -1282194277, -1282194278, -1282194279, -1282194280, -1282194273,
-1282194274, -623411239, -1282194275, -1282194276, -1282194303, -1282194304,
-1282194249, -1282194250, -1282194245, -1282194246, -1282194247, -1282194248,
-1282194241, -1282194242, -1282194243, -1282194244, -1282194272, -2100569601,
-1282194217, -1282194218, -1282194219, -1282194213, -1282194214, -1282194215,
-1282194216, -1282194210, -1282194211, -1282194212, -1093321804, -1282194185,
-1282194186, -1282194187, -1282194188, -1282194181, -1282194182, -1282194183,
-1282194184, -1282194433, -1309826943, -1282194434, -1282194179, -1282194435,
-1282194436, -1282194180, 692518463, 692455107, -1282194405, -623474595,
-1282194401, -1282194402, -1282194403, -1282194404, -1282194429, -1282194430,
-1282194431, -1282194432, -1282194427, -1282194428, -2131765427, -1282194373,
-1282194374, -1282194369, -1282194370, -1282194371, -1282194372, -1282194397,
-1282194398, -1282194399, -1282194400, -1282194396, -1282194341, -1282194342,
-1282194343, -1282194337, -1282194338, -1282194339, -1282194340, -1282194365,
-1282194366, -1282194367, -1282194368, -1974223438, -1282194309, -1282194310,
-1282194311, -1282194312, -1282194305, -1282194306, -1282194307, -1282194308,
-1282194334, -1282194335, 374280892, -1941895502, -1282194336, 374280891,
374280890, 374280889, 374280888, 374280887, 374280886, 374280885, 374280884]
[2019-01-16 15:57:52,885][INFO ][main] Read checkpoint status
[startMarker=/storage/ssd/prtagent/poc/work/db/poc_tester_server_172_25_1_22_id_0/cp/1547643332579-8f0f1aca-36de-4cfd-a5ea-08daae99c80c-START.bin,
endMarker=/storage/ssd/prtagent/poc/work/db/poc_tester_server_172_25_1_22_id_0/cp/1547643312802-93a29247-8743-492c-951e-035a2126ddde-END.bin]
[2019-01-16 15:57:52,885][INFO ][main] Checking memory state
[lastValidPos=FileWALPointer [idx=3302, fileOff=1319234, len=2055203],
lastMarked=FileWALPointer [idx=3338, fileOff=59, len=2055203],
lastCheckpointId=8f0f1aca-36de-4cfd-a5ea-08daae99c80c]
[2019-01-16 15:57:52,886][WARN ][main] Ignite node stopped in the middle of
checkpoint. Will restore memory state and finish checkpoint on node start.
[2019-01-16 15:58:01,586][ERROR][main] Exception during start processors,
node will be stopped and close connections
java.lang.AssertionError: 0002ffff00000000
at
org.apache.ignite.internal.processors.cache.persistence.pagemem.PageMemoryImpl.writeUnlockPage(PageMemoryImpl.java:1551)
~[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
at
org.apache.ignite.internal.processors.cache.persistence.pagemem.PageMemoryImpl.writeUnlock(PageMemoryImpl.java:470)
~[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
at
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.performBinaryMemoryRestore(GridCacheDatabaseSharedManager.java:2228)
~[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
at
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.restoreBinaryMemory(GridCacheDatabaseSharedManager.java:942)
~[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
at
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.startMemoryRestore(GridCacheDatabaseSharedManager.java:1946)
~[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
at
org.apache.ignite.internal.IgniteKernal.start(IgniteKernal.java:1054)
[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
at
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.start0(IgnitionEx.java:2041)
[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
at
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.start(IgnitionEx.java:1732)
[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
at org.apache.ignite.internal.IgnitionEx.start0(IgnitionEx.java:1158)
[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.java:656)
[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
at org.apache.ignite.IgniteSpring.start(IgniteSpring.java:66)
[ignite-spring-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
at
org.apache.ignite.scenario.internal.utils.IgniteNode.start(IgniteNode.java:242)
[poc-tester-0.1.0-SNAPSHOT.jar:?]
at
org.apache.ignite.scenario.internal.utils.IgniteNode.main(IgniteNode.java:82)
[poc-tester-0.1.0-SNAPSHOT.jar:?]
[2019-01-16 15:58:01,594][ERROR][main] Got exception while starting (will
rollback startup routine).
java.lang.AssertionError: 0002ffff00000000
at
org.apache.ignite.internal.processors.cache.persistence.pagemem.PageMemoryImpl.writeUnlockPage(PageMemoryImpl.java:1551)
~[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
at
org.apache.ignite.internal.processors.cache.persistence.pagemem.PageMemoryImpl.writeUnlock(PageMemoryImpl.java:470)
~[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
at
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.performBinaryMemoryRestore(GridCacheDatabaseSharedManager.java:2228)
~[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
at
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.restoreBinaryMemory(GridCacheDatabaseSharedManager.java:942)
~[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
at
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.startMemoryRestore(GridCacheDatabaseSharedManager.java:1946)
~[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
at
org.apache.ignite.internal.IgniteKernal.start(IgniteKernal.java:1054)
[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
at
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.start0(IgnitionEx.java:2041)
[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
at
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.start(IgnitionEx.java:1732)
[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
at org.apache.ignite.internal.IgnitionEx.start0(IgnitionEx.java:1158)
[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.java:656)
[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
at org.apache.ignite.IgniteSpring.start(IgniteSpring.java:66)
[ignite-spring-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
at
org.apache.ignite.scenario.internal.utils.IgniteNode.start(IgniteNode.java:242)
[poc-tester-0.1.0-SNAPSHOT.jar:?]
at
org.apache.ignite.scenario.internal.utils.IgniteNode.main(IgniteNode.java:82)
[poc-tester-0.1.0-SNAPSHOT.jar:?]
{code}
> Decrease time to save metadata during checkpoint
> ------------------------------------------------
>
> Key: IGNITE-10720
> URL: https://issues.apache.org/jira/browse/IGNITE-10720
> Project: Ignite
> Issue Type: Improvement
> Reporter: Anton Kalashnikov
> Assignee: Anton Kalashnikov
> Priority: Major
> Time Spent: 10m
> Remaining Estimate: 0h
>
> Looks like it's not neccessery save all metadata(like free list) under write
> checkpoint lock because sometimes it's too long.
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)