Our ignite cluster stuck in an inactive state, cannot be restored from a
checkpoint.

When cluster is activated, the following exception occurs,

[17:40:54,750][INFO][exchange-worker-#122][GridCacheDatabaseSharedManager]
Read checkpoint status
[startMarker=/naver/ignite_storage/20180330/storage/node00-698bff11-10c4-4fa9-87bf-07f22714951e/cp/1525070153790-cd46119a-51cd-49af-9ffa-0dccca84fb20-START.bin,
endMarker=/naver/ignite_storage/20180330/storage/node00-698bff11-10c4-4fa9-87bf-07f22714951e/cp/1525070153790-cd46119a-51cd-49af-9ffa-0dccca84fb20-END.bin]
[17:40:54,750][INFO][exchange-worker-#122][GridCacheDatabaseSharedManager]
Applying lost cache updates since last checkpoint record
[lastMarked=FileWALPointer [idx=106922, fileOffset=3457606, len=299101,
forceFlush=false], lastCheckpointId=cd46119a-51cd-49af-9ffa-0dccca84fb20]
[17:40:54,818][SEVERE][exchange-worker-#122][GridDhtPartitionsExchangeFuture]
Failed to reinitialize local partitions (preloading will be stopped):
GridDhtPartitionExchangeId [topVer=AffinityTopologyVersion [topVer=12,
minorTopVer=1], discoEvt=DiscoveryCustomEvent
[customMsg=ChangeGlobalStateMessage
[id=9a375b51361-acca12ae-d9fb-4e21-a282-3bc7af575257,
reqId=b3985722-b063-4e5a-831e-9f84d656df96,
initiatingNodeId=c6e1394e-bf7a-4fe4-a1bf-f64193bd44f4, activate=true],
affTopVer=AffinityTopologyVersion [topVer=12, minorTopVer=1],
super=DiscoveryEvent [evtNode=TcpDiscoveryNode
[id=c6e1394e-bf7a-4fe4-a1bf-f64193bd44f4, addrs=[10.116.24.222, 10.244.5.0,
127.0.0.1, 172.17.0.1, 192.168.193.192], sockAddrs=[/10.244.5.0:47500,
/172.17.0.1:47500, /192.168.193.192:47500, /127.0.0.1:47500,
/10.116.24.222:47500], discPort=47500, order=3, intOrder=3,
lastExchangeTime=1525077608394, loc=false, ver=2.3.0#20171220-sha1:8431829c,
isClient=false], topVer=12, nodeId8=e8f4c909, msg=null,
type=DISCOVERY_CUSTOM_EVT, tstamp=1525077647980]], nodeId=c6e1394e,
evt=DISCOVERY_CUSTOM_EVT]
java.lang.IndexOutOfBoundsException: index 890
        at
java.util.concurrent.atomic.AtomicReferenceArray.checkedByteOffset(AtomicReferenceArray.java:78)
        at
java.util.concurrent.atomic.AtomicReferenceArray.get(AtomicReferenceArray.java:125)
        at
org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtPartitionTopologyImpl.forceCreatePartition(GridDhtPartitionTopologyImpl.java:767)
        at
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.applyUpdate(GridCacheDatabaseSharedManager.java:1777)
        at
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.applyLastUpdates(GridCacheDatabaseSharedManager.java:1637)
        at
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.restoreState(GridCacheDatabaseSharedManager.java:1072)
        at
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.beforeExchange(GridCacheDatabaseSharedManager.java:863)
        at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.distributedExchange(GridDhtPartitionsExchangeFuture.java:1019)
        at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.init(GridDhtPartitionsExchangeFuture.java:651)
        at
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.body(GridCachePartitionExchangeManager.java:2279)
        at
org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:110)
        at java.lang.Thread.run(Thread.java:745)
[17:40:54,818][INFO][exchange-worker-#122][GridDhtPartitionsExchangeFuture]
Finish exchange future [startVer=AffinityTopologyVersion [topVer=12,
minorTopVer=1], resVer=null, err=java.lang.IndexOutOfBoundsException: index
890]
[17:40:54,830][SEVERE][exchange-worker-#122][GridCachePartitionExchangeManager]
Failed to wait for completion of partition map exchange (preloading will not
start): GridDhtPartitionsExchangeFuture [firstDiscoEvt=DiscoveryCustomEvent
[customMsg=null, affTopVer=AffinityTopologyVersion [topVer=12,
minorTopVer=1], super=DiscoveryEvent [evtNode=TcpDiscoveryNode
[id=c6e1394e-bf7a-4fe4-a1bf-f64193bd44f4, addrs=[10.116.24.222, 10.244.5.0,
127.0.0.1, 172.17.0.1, 192.168.193.192], sockAddrs=[/10.244.5.0:47500,
/172.17.0.1:47500, /192.168.193.192:47500, /127.0.0.1:47500,
/10.116.24.222:47500], discPort=47500, order=3, intOrder=3,
lastExchangeTime=1525077608394, loc=false, ver=2.3.0#20171220-sha1:8431829c,
isClient=false], topVer=12, nodeId8=e8f4c909, msg=null,
type=DISCOVERY_CUSTOM_EVT, tstamp=1525077647980]], crd=TcpDiscoveryNode
[id=8e65440a-df65-4770-9a7b-26672bd574a3, addrs=[10.116.25.32, 10.244.6.0,
127.0.0.1, 172.17.0.1, 192.168.82.128], sockAddrs=[/10.244.6.0:47500,
/10.116.25.32:47500, /172.17.0.1:47500, /192.168.82.128:47500,
/127.0.0.1:47500], discPort=47500, order=1, intOrder=1,
lastExchangeTime=1525077608394, loc=false, ver=2.3.0#20171220-sha1:8431829c,
isClient=false], exchId=GridDhtPartitionExchangeId
[topVer=AffinityTopologyVersion [topVer=12, minorTopVer=1],
discoEvt=DiscoveryCustomEvent [customMsg=null,
affTopVer=AffinityTopologyVersion [topVer=12, minorTopVer=1],
super=DiscoveryEvent [evtNode=TcpDiscoveryNode
[id=c6e1394e-bf7a-4fe4-a1bf-f64193bd44f4, addrs=[10.116.24.222, 10.244.5.0,
127.0.0.1, 172.17.0.1, 192.168.193.192], sockAddrs=[/10.244.5.0:47500,
/172.17.0.1:47500, /192.168.193.192:47500, /127.0.0.1:47500,
/10.116.24.222:47500], discPort=47500, order=3, intOrder=3,
lastExchangeTime=1525077608394, loc=false, ver=2.3.0#20171220-sha1:8431829c,
isClient=false], topVer=12, nodeId8=e8f4c909, msg=null,
type=DISCOVERY_CUSTOM_EVT, tstamp=1525077647980]], nodeId=c6e1394e,
evt=DISCOVERY_CUSTOM_EVT], added=true, initFut=GridFutureAdapter
[ignoreInterrupts=false, state=DONE, res=false, hash=989374705], init=false,
lastVer=null, partReleaseFut=PartitionReleaseFuture
[topVer=AffinityTopologyVersion [topVer=12, minorTopVer=1],
futures=[ExplicitLockReleaseFuture [topVer=AffinityTopologyVersion
[topVer=12, minorTopVer=1], futures=[]], TxReleaseFuture
[topVer=AffinityTopologyVersion [topVer=12, minorTopVer=1], futures=[]],
AtomicUpdateReleaseFuture [topVer=AffinityTopologyVersion [topVer=12,
minorTopVer=1], futures=[]], DataStreamerReleaseFuture
[topVer=AffinityTopologyVersion [topVer=12, minorTopVer=1], futures=[]]]],
exchActions=null, affChangeMsg=null, initTs=1525077647990,
centralizedAff=false, changeGlobalStateE=null, done=true, state=SRV,
evtLatch=0, remaining=[8e65440a-df65-4770-9a7b-26672bd574a3,
18184b4a-0fe0-4fff-a917-a3b03f16a509, c6e1394e-bf7a-4fe4-a1bf-f64193bd44f4,
491c9af5-e855-42d8-b617-e72bf3099a46, 4aae4b1e-6ef4-43ac-b156-f5445adb40c6,
91d8036d-c74f-48d7-b389-82ebba96adf2, 95e77e2f-ba25-4c12-b9b0-d1b21386eb36,
c12d30c3-bf9e-4c58-9468-8ef878ec2679, 1edbfd89-a03f-4fea-93b9-d058eb93f66b],
super=GridFutureAdapter [ignoreInterrupts=false, state=DONE,
res=java.lang.IndexOutOfBoundsException: index 890, hash=328088520]]
class org.apache.ignite.IgniteCheckedException: index 890
        at
org.apache.ignite.internal.util.IgniteUtils.cast(IgniteUtils.java:7252)
        at
org.apache.ignite.internal.util.future.GridFutureAdapter.resolve(GridFutureAdapter.java:259)
        at
org.apache.ignite.internal.util.future.GridFutureAdapter.get0(GridFutureAdapter.java:207)
        at
org.apache.ignite.internal.util.future.GridFutureAdapter.get(GridFutureAdapter.java:159)
        at
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.body(GridCachePartitionExchangeManager.java:2289)
        at
org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:110)
        at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.IndexOutOfBoundsException: index 890
        at
java.util.concurrent.atomic.AtomicReferenceArray.checkedByteOffset(AtomicReferenceArray.java:78)
        at
java.util.concurrent.atomic.AtomicReferenceArray.get(AtomicReferenceArray.java:125)
        at
org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtPartitionTopologyImpl.forceCreatePartition(GridDhtPartitionTopologyImpl.java:767)
        at
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.applyUpdate(GridCacheDatabaseSharedManager.java:1777)
        at
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.applyLastUpdates(GridCacheDatabaseSharedManager.java:1637)
        at
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.restoreState(GridCacheDatabaseSharedManager.java:1072)
        at
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.beforeExchange(GridCacheDatabaseSharedManager.java:863)
        at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.distributedExchange(GridDhtPartitionsExchangeFuture.java:1019)
        at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.init(GridDhtPartitionsExchangeFuture.java:651)
        at
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.body(GridCachePartitionExchangeManager.java:2279)
        ... 2 more

And, I cannot do any operations.

This symptom started to show when I cancelled (Ctrl+C) a service deployment.
At that time, other job was writing to a cache. I just changed the sticky
parameter of a service deployment (from false to true), and the deployment
was too slow, so I cancelled it. And then I restarted the cluster, and the
problem began.

Is there any solution or workaround for this error like skipping the
checkpoint restoring process, because it's ok for me to lose some recent
cache updates.

Ignite version is 2.3.0 and config is as follows.

<?xml version="1.0" encoding="UTF-8"?>



<beans xmlns="http://www.springframework.org/schema/beans";
       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
       xsi:schemaLocation="
       http://www.springframework.org/schema/beans
       http://www.springframework.org/schema/beans/spring-beans.xsd";>
    <bean id="grid.cfg"
class="org.apache.ignite.configuration.IgniteConfiguration">

        
        <property name="serviceThreadPoolSize" value="80"/>

        <property name="failureDetectionTimeout" value="3600000"/>

        
        <property name="cacheConfiguration">
            <list>
                
                <bean
class="org.apache.ignite.configuration.CacheConfiguration">
                    <property name="name"
value="valid_dup_ratio_cache_name"/> 
                    <property name="atomicityMode" value="ATOMIC"/>
                    <property name="cacheMode" value="REPLICATED"/>
                    <property name="indexedTypes">
                        <list>
                            <value>java.lang.String</value>
                            <value>java.util.LinkedList</value>
                        </list>
                    </property>
                </bean>

                
                <bean
class="org.apache.ignite.configuration.CacheConfiguration">
                    <property name="name"
value="dup_ratio_hbase_read_through"/>
                    <property name="atomicityMode" value="ATOMIC"/>
                    <property name="cacheMode" value="PARTITIONED"/>
                    <property name="onheapCacheEnabled" value="true"/>
                    <property name="evictionPolicy">
                        <bean
class="org.apache.ignite.cache.eviction.lru.LruEvictionPolicy">
                            <property name="batchSize" value="5"/>
                            
                        </bean>
                    </property>

                    <property name="expiryPolicyFactory">
                        <bean id="expiryPolicy"
class="javax.cache.expiry.CreatedExpiryPolicy" factory-method="factoryOf">
                            <constructor-arg>
                                <bean class="javax.cache.expiry.Duration">
                                    <constructor-arg value="HOURS"/>
                                    <constructor-arg value="24"/>
                                </bean>
                            </constructor-arg>
                        </bean>
                    </property>

                    <property name="cacheStoreFactory">
                        <bean
class="javax.cache.configuration.FactoryBuilder" factory-method="factoryOf">
                            <constructor-arg
value="com.naver.kweb.serp.title.ignite.read_through.HBaseDupRatioAdapter"/>
                        </bean>
                    </property>
                    <property name="readThrough" value="true"/>
                    <property name="writeThrough" value="false"/>
                </bean>
            </list>
        </property>

        
        <property name="discoverySpi">
            <bean
class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
                <property name="clientReconnectDisabled" value="false"/>
                <property name="networkTimeout" value="120000"/>
                <property name="ipFinder">
                    
                    <bean
class="org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder">
                        <property name="addresses">
                            <list>
                                
                               
<value>csb7x0876.nfra.io:47500..47509</value>
                               
<value>csb7x0877.nfra.io:47500..47509</value>
                               
<value>csb7x0878.nfra.io:47500..47509</value>
                               
<value>csb7x0879.nfra.io:47500..47509</value>
                               
<value>csb7x0880.nfra.io:47500..47509</value>
                               
<value>csb7x0881.nfra.io:47500..47509</value>
                               
<value>csb7x0882.nfra.io:47500..47509</value>
                               
<value>csb7x0883.nfra.io:47500..47509</value>
                               
<value>csb7x0884.nfra.io:47500..47509</value>
                               
<value>csb7x0885.nfra.io:47500..47509</value>
                            </list>
                        </property>
                    </bean>
                </property>
            </bean>
        </property>

        
        <property name="dataStorageConfiguration">
            <bean
class="org.apache.ignite.configuration.DataStorageConfiguration">
                 

                <property name="writeThrottlingEnabled" value="true"/> 

                <property name="defaultDataRegionConfiguration">
                    <bean
class="org.apache.ignite.configuration.DataRegionConfiguration">
                        <property name="persistenceEnabled" value="true"/> 
                        <property name="name" value="Default_Region"/>
                        <property name="maxSize" value="#{75L * 1024 * 1024
* 1024}"/> 
                        <property name="checkpointPageBufferSize"
value="#{1024L * 1024 * 1024}"/> 
                        <property name="metricsEnabled" value="true"/>
                    </bean>
                </property>
                <property name="storagePath"
value="/naver/ignite_storage/20180330/storage"/>
                <property name="walPath"
value="/naver/ignite_storage/20180330/wal"/>
                <property name="walArchivePath"
value="/naver/ignite_storage/20180330/walArchive"/>
                <property name="metricsEnabled" value="true"/>
            </bean>
        </property>

        
        <property name="binaryConfiguration">
            <bean
class="org.apache.ignite.configuration.BinaryConfiguration">
                <property name="typeConfigurations">
                    <list>
                        <bean
class="org.apache.ignite.binary.BinaryTypeConfiguration">
                            <property name="typeName"
value="com.naver.kweb.serp.title.ignite.service.TitleMakerServiceImpl"/>
                        </bean>
                    </list>
                </property>
            </bean>
        </property>
    </bean>
</beans>

Thanks.



--
Sent from: http://apache-ignite-users.70518.x6.nabble.com/

Reply via email to