[ 
https://issues.apache.org/jira/browse/IGNITE-13093?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17119289#comment-17119289
 ] 

Stanilovsky Evgeny commented on IGNITE-13093:
---------------------------------------------

[[email protected]] hi, first of all you need to correctly form this 
ticket, you post huge unnecessary info into Environment info.
Can you append all logs ? ignite.log - as default, and plz run with flag : 
-DIGNITE_QUIET=false


> Unidentified Apache Ignite worker blocked when inserting large amount of 
> records to the persistent storage
> ----------------------------------------------------------------------------------------------------------
>
>                 Key: IGNITE-13093
>                 URL: https://issues.apache.org/jira/browse/IGNITE-13093
>             Project: Ignite
>          Issue Type: Bug
>          Components: cache
>    Affects Versions: 2.8.1
>         Environment: Java 1.8.0_231
> Apache Ignite 2.8.1
> Windows 10, 64G memory
> <?xml version="1.0" encoding="UTF-8"?>
> <beans xmlns="http://www.springframework.org/schema/beans";
>        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
>        xsi:schemaLocation="
>         http://www.springframework.org/schema/beans
>         http://www.springframework.org/schema/beans/spring-beans.xsd";>
>     <bean class="org.apache.ignite.configuration.IgniteConfiguration">
>         <property name="gridLogger">
>             <bean class="org.apache.ignite.logger.log4j2.Log4J2Logger">
>                 <constructor-arg type="java.lang.String" value="log4j2.xml"/>
>             </bean>
>         </property>
>         <property name="communicationSpi">
>             <bean 
> class="org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi">
>               <!-- Override local port. -->
>               <property name="localPort" value="47400"/>
>             </bean>
>         </property>
>         <!-- Configure internal thread pool. 64-->
>         <property name="publicThreadPoolSize" value="16"/>
>         <!-- Configure system thread pool. 32-->
>         <property name="systemThreadPoolSize" value="8"/>
>         <property name="systemWorkerBlockedTimeout" value="#{5 * 60 * 1000}"/>
>         <property name="failureHandler">
>             <bean class="org.apache.ignite.failure.StopNodeFailureHandler">
>                 <!-- uncomment to enable this handler to 
>                 process critical workers' hung-ups -->
>                 <property name="ignoredFailureTypes">
>                     <list>
>                     </list>
>                 </property>
>             </bean>
>         </property>
>         <!-- Set to true to enable distributed class loading for examples, 
> default is false. -->
>         <property name="peerClassLoadingEnabled" value="false"/>
>         <property name="dataStorageConfiguration">
>             <bean 
> class="org.apache.ignite.configuration.DataStorageConfiguration">
>                 <!--
>                      Sets a path to the root directory where data and indexes 
> are
>                      to be persisted. It's assumed the directory is on a 
> separated SSD.
>                 -->
>                 <property name="storagePath" value="persistence"/>
>                 <!--
>                      Sets a path to the directory where WAL is stored.
>                      It's assumed the directory is on a separated HDD.
>                 -->
>                 <property name="walPath" value="wal"/>
>                 <!--
>                     Sets a path to the directory where WAL archive is stored.
>                     The directory is on the same HDD as the WAL.
>                 -->
>                 <property name="walArchivePath" value="wal/archive"/>
>                 <!-- Changing WAL Mode. --> 
>                 <property name="walMode" value="NONE"/>
>                 <!-- Set the page size to 4 KB, default -->
>                 <!-- limit 1 KB - 16 KB -->
>                 <property name="pageSize" value="#{4 * 1024}"/>
>                 <!-- Enable write throttling. -->
>                 <property name="writeThrottlingEnabled" value="false"/>
>                 <property name="checkpointFrequency" value="500"/>
>                 <property name="lockWaitTime" value="2000"/>
>                 <property name="checkpointThreads" value="1"/>
>                 <property name="checkpointWriteOrder" value="RANDOM"/>
>                 <!--
>                     Default memory region that grows endlessly. A cache is 
> bound to this memory region
>                     unless it sets another one in its CacheConfiguration.
>                 -->
>                 <property name="defaultDataRegionConfiguration">
>                     <bean 
> class="org.apache.ignite.configuration.DataRegionConfiguration">
>                         <!--property name="name" value="Default_Region"/-->
>                         <property name="name" value="default"/>
>                         <!-- 100 MB memory region with disabled eviction -->
>                         <property name="initialSize" value="#{100L * 1024 * 
> 1024}"/>
>                         <!-- maxSize 20 MB is too little -->
>                         <!-- sum of all maxSize values has to be less than 
> total memory of the system -->
>                         <!-- limits size in memory, not on disk -->
>                         <!-- default value 1.2GB -->
>                         <property name="maxSize"     value="#{2L * 1024 * 
> 1024 * 1024}"/>
>                         <property name="persistenceEnabled" value="true"/>
>                         <!-- Increasing the buffer size to 1 GB. -->
>                         <property name="checkpointPageBufferSize" value="#{1L 
> * 1024 * 1024 * 1024}"/>
>                     </bean>
>                 </property>
>                 <!-- Defining several data regions for different memory 
> regions -->
>                 <property name="dataRegionConfigurations">
>                     <list>
>                     </list>
>                 </property>
>             </bean>
>         </property>
>         <property name="cacheConfiguration">
>             <list>
>                 <!--bean 
> class="org.apache.ignite.configuration.CacheConfiguration">
>                     <property name="dataRegionName" value="default"/>
>                     <property name="name" value=".ShardDetectorStorage"/>
>                     <property name="onheapCacheEnabled" value="true"/>
>                 </bean>
>                 <bean 
> class="org.apache.ignite.configuration.CacheConfiguration">
>                     <property name="dataRegionName" value="default"/>
>                     <property name="name" value=".ChildrenStore"/>
>                     <property name="onheapCacheEnabled" value="true"/>
>                 </bean>
>                 <bean 
> class="org.apache.ignite.configuration.CacheConfiguration">
>                     <property name="dataRegionName" value="default"/>
>                     <property name="name" value=".ChildrenStore.listsize"/>
>                     <property name="onheapCacheEnabled" value="true"/>
>                 </bean>
>                 <bean 
> class="org.apache.ignite.configuration.CacheConfiguration">
>                     <property name="dataRegionName" value="default"/>
>                     <property name="name" value=".RootStorage"/>
>                     <property name="onheapCacheEnabled" value="true"/>
>                 </bean>
>                 <bean 
> class="org.apache.ignite.configuration.CacheConfiguration">
>                     <property name="dataRegionName" value="default"/>
>                     <property name="name" value=".QualifierStorage"/>
>                     <property name="onheapCacheEnabled" value="true"/>
>                 </bean-->
>             </list>
>         </property>
>     </bean>
> </beans>
> Java settings
> -Xms1024m -Xmx50g -Xss1024m
> -Xverify:none
> -server
> -DIGNITE_QUIET=true
> -XX:+UseG1GC
> -XX:+DisableExplicitGC
> -Djava.net.preferIPv4Stack=true
> -XX:+AlwaysPreTouch
> -XX:+ScavengeBeforeFullGC
> -XX:+AggressiveOpts
> partial thread dump during slowdown
> "db-checkpoint-thread-#54" #99 prio=5 os_prio=0 tid=0x0000000070344800 
> nid=0x2d54 runnable [0x0000001c5df3e000]
>    java.lang.Thread.State: RUNNABLE
>         at 
> org.apache.ignite.internal.processors.cache.persistence.pagemem.PageMemoryImpl$Segment.removePageForReplacement(PageMemoryImpl.java:2398)
>         at 
> org.apache.ignite.internal.processors.cache.persistence.pagemem.PageMemoryImpl$Segment.access$900(PageMemoryImpl.java:2093)
>         at 
> org.apache.ignite.internal.processors.cache.persistence.pagemem.PageMemoryImpl.acquirePage(PageMemoryImpl.java:773)
>         at 
> org.apache.ignite.internal.processors.cache.persistence.pagemem.PageMemoryImpl.acquirePage(PageMemoryImpl.java:701)
>         at 
> org.apache.ignite.internal.processors.cache.persistence.GridCacheOffheapManager.saveStoreMetadata(GridCacheOffheapManager.java:342)
>         at 
> org.apache.ignite.internal.processors.cache.persistence.GridCacheOffheapManager.syncMetadata(GridCacheOffheapManager.java:268)
>         at 
> org.apache.ignite.internal.processors.cache.persistence.GridCacheOffheapManager.syncMetadata(GridCacheOffheapManager.java:254)
>         at 
> org.apache.ignite.internal.processors.cache.persistence.GridCacheOffheapManager.beforeCheckpointBegin(GridCacheOffheapManager.java:226)
>         at 
> org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager$Checkpointer.markCheckpointBegin(GridCacheDatabaseSharedManager.java:4125)
>         at 
> org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager$Checkpointer.doCheckpoint(GridCacheDatabaseSharedManager.java:3738)
>         at 
> org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager$Checkpointer.body(GridCacheDatabaseSharedManager.java:3623)
>         at 
> org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:120)
>         at java.lang.Thread.run(Thread.java:748)
>    Locked ownable synchronizers:
>         - <0x000000009b71a250> (a 
> java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync)
>            Reporter: Tomasz Grygo
>            Priority: Blocker
>
> I'm looking at Apache Ignite to use as a fast database. Performance is very 
> important, I need to build it as fast as possible with resources available. 
> First I copy all (450M) records from my original test database to Ignite 
> caches through IgniteDataStreams using PK as a key. Database does not fit in 
> memory so I have disk persistence enabled and eviction disabled. Data is 
> inserted in parallel using 8 threads. I have only one but fairly powerful 
> Windows PC doing all the work, no separate Ignite cluster. I'm not interested 
> in cache recovery so WAL is disabled. Everything goes well until I hit around 
> 310 million entries (2 hours of work). At this point Ignite starts to choke, 
> inserts slow down and then stop with exceptions. Exception is triggered by 
> systemWorkerBlockedTimeout setting set to 5 minutes. Extending this time does 
> not help at all. Based on heap dump I tried adding 
> -DIGNITE_PAGES_LIST_DISABLE_ONHEAP_CACHING=true and it failed slightly later 
> but still could not finish the job. I read the performance guides and I tried 
> tweaking other Ignite settings too but didn't see any impact. How can if find 
> which worker is being blocked and why?
> 2020-05-27 21:54:26,176 [Storage2 ] [ERROR] - DTR_0030 worker Storage2 had 
> error: FATAL ERROR java.lang.IllegalStateException: Data streamer has been 
> closed.
> java.lang.IllegalStateException: Data streamer has been closed.
>     at 
> org.apache.ignite.internal.processors.datastreamer.DataStreamerImpl.closedException(DataStreamerImpl.java:1095)
>     at 
> org.apache.ignite.internal.processors.datastreamer.DataStreamerImpl.lock(DataStreamerImpl.java:446)
>     at 
> org.apache.ignite.internal.processors.datastreamer.DataStreamerImpl.addDataInternal(DataStreamerImpl.java:646)
>     at 
> org.apache.ignite.internal.processors.datastreamer.DataStreamerImpl.addDataInternal(DataStreamerImpl.java:631)
>     at 
> org.apache.ignite.internal.processors.datastreamer.DataStreamerImpl.addData(DataStreamerImpl.java:753)
>     at 
> com.sc.extr.cache.PureIgniteDynamicRowStorage.putIfAbsent(PureIgniteDynamicRowStorage.java:83)
>     at 
> com.sc.extr.cache.PureIgniteDynamicRowStorage.addRowOnKey(PureIgniteDynamicRowStorage.java:160)
>     at 
> com.sc.extr.tree.MultiCacheTreeBuilder.addRootRowToCache(MultiCacheTreeBuilder.java:409)
>     at 
> com.sc.extr.tree.MultiCacheTreeBuilder.parentRev1to1(MultiCacheTreeBuilder.java:237)
>     at 
> com.sc.extr.tree.MultiCacheTreeBuilder.addRowToCache(MultiCacheTreeBuilder.java:333)
>     at 
> com.sc.extr.tree.MultiCacheTreeBuilder.parentRev(MultiCacheTreeBuilder.java:274)
>     at 
> com.sc.extr.tree.MultiCacheTreeBuilder.addRow(MultiCacheTreeBuilder.java:379)
>     at 
> com.sc.extr.tree.MultiCacheTreeBuilder.process(MultiCacheTreeBuilder.java:206)
>     at com.sc.bi.workflow.WorkTransformer.processOne(WorkTransformer.java:84)
>     at com.sc.bi.workflow.WorkTransformer.doWork(WorkTransformer.java:145)
>     at 
> com.sc.bi.workflow.WorkTransformer.processQueue(WorkTransformer.java:210)
>     at com.sc.bi.workflow.WorkTransformer.run(WorkTransformer.java:169)
> Caused by: class org.apache.ignite.IgniteCheckedException: Data streamer has 
> been cancelled: DataStreamerImpl [bufLdrSzPerThread=4096, 
> rcvr=org.apache.ignite.internal.processors.datastreamer.DataStreamerImpl$IsolatedUpdater@381b03ed,
>  ioPlcRslvr=null, cacheName=PERSON.PTINTN, bufSize=512, parallelOps=0, 
> timeout=-1, autoFlushFreq=0, bufMappings=ConcurrentHashMap 
> {03e74462-12ec-4140-b9fb-a975572ac3bb=Buffer [node=TcpDiscoveryNode 
> [id=03e74462-12ec-4140-b9fb-a975572ac3bb, 
> consistentId=b01eb38b-7728-4e43-a697-0bc52f872e44, addrs=ArrayList 
> [127.0.0.1, 172.27.179.112], sockAddrs=HashSet 
> [SOFTBI-DEV.sc.com/172.27.179.112:47500, /127.0.0.1:47500], discPort=47500, 
> order=1, intOrder=1, lastExchangeTime=1590614830815, loc=true, 
> ver=2.8.1#20200521-sha1:86422096, isClient=false], isLocNode=true, idGen=0, 
> sem=java.util.concurrent.Semaphore@2a869d9[Permits = 64], 
> perNodeParallelOps=64, entriesCnt=2048, locFutsSize=0, reqsSize=0]}, 
> cacheObjProc=GridProcessorAdapter [], 
> cacheObjCtx=org.apache.ignite.internal.processors.cache.CacheObjectContext@2a5313b0,
>  cancelled=true, cancellationReason=null, failCntr=0, 
> activeFuts=GridConcurrentHashSet [GridFutureAdapter [ignoreInterrupts=false, 
> state=INIT, res=null, hash=2102798044], GridFutureAdapter 
> [ignoreInterrupts=false, state=INIT, res=null, hash=1195632760], 
> GridFutureAdapter [ignoreInterrupts=false, state=INIT, res=null, 
> hash=370791970], GridFutureAdapter [ignoreInterrupts=false, state=INIT, 
> res=null, hash=420732031], GridFutureAdapter [ignoreInterrupts=false, 
> state=INIT, res=null, hash=1453517070]], jobPda=null, depCls=null, 
> fut=DataStreamerFuture [super=GridFutureAdapter [ignoreInterrupts=false, 
> state=INIT, res=null, hash=1165180540]], publicFut=IgniteFuture 
> [orig=DataStreamerFuture [super=GridFutureAdapter [ignoreInterrupts=false, 
> state=INIT, res=null, hash=1165180540]]], disconnectErr=null, closed=true, 
> lastFlushTime=1590629894701, skipStore=false, keepBinary=false, 
> maxRemapCnt=32, remapSem=java.util.concurrent.Semaphore@6e6f060b[Permits = 
> 2147483647], remapOwning=false]
>     at 
> org.apache.ignite.internal.processors.datastreamer.DataStreamerImpl.closeEx(DataStreamerImpl.java:1347)
>     at 
> org.apache.ignite.internal.processors.datastreamer.DataStreamerImpl.closeEx(DataStreamerImpl.java:1318)
>     at 
> org.apache.ignite.internal.processors.datastreamer.DataStreamProcessor.onKernalStop(DataStreamProcessor.java:155)
>     at org.apache.ignite.internal.IgniteKernal.stop0(IgniteKernal.java:2551)
>     at org.apache.ignite.internal.IgniteKernal.stop(IgniteKernal.java:2499)
>     at 
> org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop0(IgnitionEx.java:2650)
>     at 
> org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop(IgnitionEx.java:2613)
>     at org.apache.ignite.internal.IgnitionEx.stop(IgnitionEx.java:339)
>     at 
> org.apache.ignite.failure.StopNodeFailureHandler$1.run(StopNodeFailureHandler.java:36)
>     at java.lang.Thread.run(Thread.java:748)



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to