I've been struggling to find a configuration that works successfully for IGFS with hadoop filesystem caching. Anytime I attempt to load more data than what will fit into memory on my Ignite node, the ignite process crashes.
The behavior I am looking for is that old cache entries will be evicted when I try to write new data to IGFS that exceeds the available memory on the server. I can see that my data is being persisted into HDFS, but I seem to be limited to the amount of physical memory on my Ignite server at the moment. I am using the teragen example to generate the files on hadoop for the purposes of this test like so: time hadoop-ig jar /usr/hdp/current/hadoop-mapreduce-client/hadoop-mapreduce-examples.jar teragen 100000000 igfs://[email protected]/tmp/output1 If I have systemRegionMaxSize set to a value less than the physical memory on my ignite server, then the message is something like this: /class org.apache.ignite.internal.mem.IgniteOutOfMemoryException: Out of memory in data region [name=sysMemPlc, initSize=1.0 GiB, maxSize=14.0 GiB, persistenceEnabled=false] Try the following: ^-- Increase maximum off-heap memory size (DataRegionConfiguration.maxSize) ^-- Enable Ignite persistence (DataRegionConfiguration.persistenceEnabled) ^-- Enable eviction or expiration policies / If I increase the systemRegionMaxSize to a value greater than the physical memory on my ignite server, the message is something like this: /[2018-08-09 12:16:08,174][ERROR][igfs-#171][GridNearTxLocal] Heuristic transaction failure. class org.apache.ignite.internal.transactions.IgniteTxHeuristicCheckedException: Failed to locally write to cache (all transaction entries will be invalidated, however there was a window when entries for this transaction were visible to others): GridNearTxLocal [mappings=IgniteTxMappingsImpl [], nearLocallyMapped=false, colocatedLocallyMapped=true, needCheckBackup=null, hasRemoteLocks=false, trackTimeout=false, lb=null, thread=igfs-#171, mappings=IgniteTxMappingsImpl [], super=GridDhtTxLocalAdapter [nearOnOriginatingNode=false, nearNodes=[], dhtNodes=[], explicitLock=false, super=IgniteTxLocalAdapter [completedBase=null, sndTransformedVals=false, depEnabled=false, txState=IgniteTxStateImpl [activeCacheIds=[-313790114], recovery=false, txMap=[IgniteTxEntry [key=KeyCacheObjectImpl [part=504, val=IgfsBlockKey [fileId=c976b6f1561-689b0ba5-6920-4b52-a614-c2360d0acff4, blockId=52879, affKey=null, evictExclude=true], hasValBytes=true], cacheId=-313790114, txKey=IgniteTxKey [key=KeyCacheObjectImpl [part=504, val=IgfsBlockKey [fileId=c976b6f1561-689b0ba5-6920-4b52-a614-c2360d0acff4, blockId=52879, affKey=null, evictExclude=true], hasValBytes=true], cacheId=-313790114], val=[op=CREATE, val=CacheObjectByteArrayImpl [arrLen=65536]], prevVal=[op=NOOP, val=null], oldVal=[op=NOOP, val=null], entryProcessorsCol=null, ttl=-1, conflictExpireTime=-1, conflictVer=null, explicitVer=null, dhtVer=null, filters=[], filtersPassed=false, filtersSet=true, entry=GridDhtCacheEntry [rdrs=[], part=504, super=GridDistributedCacheEntry [super=GridCacheMapEntry [key=KeyCacheObjectImpl [part=504, val=IgfsBlockKey [fileId=c976b6f1561-689b0ba5-6920-4b52-a614-c2360d0acff4, blockId=52879, affKey=null, evictExclude=true], hasValBytes=true], val=null, startVer=1533830728270, ver=GridCacheVersion [topVer=145310277, order=1533830728270, nodeOrder=1], hash=-915370253, extras=GridCacheMvccEntryExtras [mvcc=GridCacheMvcc [locs=[GridCacheMvccCandidate [nodeId=6ed33eb9-2103-402c-afab-a415c8f08f2f, ver=GridCacheVersion [topVer=145310277, order=1533830728268, nodeOrder=1], threadId=224, id=258264, topVer=AffinityTopologyVersion [topVer=1, minorTopVer=0], reentry=null, otherNodeId=6ed33eb9-2103-402c-afab-a415c8f08f2f, otherVer=GridCacheVersion [topVer=145310277, order=1533830728268, nodeOrder=1], mappedDhtNodes=null, mappedNearNodes=null, ownerVer=null, serOrder=null, key=KeyCacheObjectImpl [part=504, val=IgfsBlockKey [fileId=c976b6f1561-689b0ba5-6920-4b52-a614-c2360d0acff4, blockId=52879, affKey=null, evictExclude=true], hasValBytes=true], masks=local=1|owner=1|ready=1|reentry=0|used=0|tx=1|single_implicit=0|dht_local=1|near_local=0|removed=0|read=0, prevVer=GridCacheVersion [topVer=145310277, order=1533830728268, nodeOrder=1], nextVer=GridCacheVersion [topVer=145310277, order=1533830728268, nodeOrder=1]]], rmts=null]], flags=2]]], prepared=1, locked=false, nodeId=6ed33eb9-2103-402c-afab-a415c8f08f2f, locMapped=false, expiryPlc=null, transferExpiryPlc=false, flags=0, partUpdateCntr=0, serReadVer=null, xidVer=GridCacheVersion [topVer=145310277, order=1533830728268, nodeOrder=1]], IgniteTxEntry [key=KeyCacheObjectImpl [part=504, val=IgfsBlockKey [fileId=c976b6f1561-689b0ba5-6920-4b52-a614-c2360d0acff4, blockId=52880, affKey=null, evictExclude=true], hasValBytes=true], cacheId=-313790114, txKey=IgniteTxKey [key=KeyCacheObjectImpl [part=504, val=IgfsBlockKey [fileId=c976b6f1561-689b0ba5-6920-4b52-a614-c2360d0acff4, blockId=52880, affKey=null, evictExclude=true], hasValBytes=true], cacheId=-313790114], val=[op=CREATE, val=CacheObjectByteArrayImpl [arrLen=65536]], prevVal=[op=NOOP, val=null], oldVal=[op=NOOP, val=null], entryProcessorsCol=null, ttl=-1, conflictExpireTime=-1, conflictVer=null, explicitVer=null, dhtVer=null, filters=[], filtersPassed=false, filtersSet=true, entry=GridDhtCacheEntry [rdrs=[], part=504, super=GridDistributedCacheEntry [super=GridCacheMapEntry [key=KeyCacheObjectImpl [part=504, val=IgfsBlockKey [fileId=c976b6f1561-689b0ba5-6920-4b52-a614-c2360d0acff4, blockId=52880, affKey=null, evictExclude=true], hasValBytes=true], val=null, startVer=1533830728271, ver=GridCacheVersion [topVer=145310277, order=1533830728271, nodeOrder=1], hash=-915370252, extras=GridCacheMvccEntryExtras [mvcc=GridCacheMvcc [locs=[GridCacheMvccCandidate [nodeId=6ed33eb9-2103-402c-afab-a415c8f08f2f, ver=GridCacheVersion [topVer=145310277, order=1533830728268, nodeOrder=1], threadId=224, id=258265, topVer=AffinityTopologyVersion [topVer=1, minorTopVer=0], reentry=null, otherNodeId=6ed33eb9-2103-402c-afab-a415c8f08f2f, otherVer=GridCacheVersion [topVer=145310277, order=1533830728268, nodeOrder=1], mappedDhtNodes=null, mappedNearNodes=null, ownerVer=null, serOrder=null, key=KeyCacheObjectImpl [part=504, val=IgfsBlockKey [fileId=c976b6f1561-689b0ba5-6920-4b52-a614-c2360d0acff4, blockId=52880, affKey=null, evictExclude=true], hasValBytes=true], masks=local=1|owner=1|ready=1|reentry=0|used=0|tx=1|single_implicit=0|dht_local=1|near_local=0|removed=0|read=0, prevVer=GridCacheVersion [topVer=145310277, order=1533830728268, nodeOrder=1], nextVer=GridCacheVersion [topVer=145310277, order=1533830728268, nodeOrder=1]]], rmts=null]], flags=2]]], prepared=1, locked=false, nodeId=6ed33eb9-2103-402c-afab-a415c8f08f2f, locMapped=false, expiryPlc=null, transferExpiryPlc=false, flags=0, partUpdateCntr=0, serReadVer=null, xidVer=GridCacheVersion [topVer=145310277, order=1533830728268, nodeOrder=1]], IgniteTxEntry [key=KeyCacheObjectImpl [part=504, val=IgfsBlockKey [fileId=c976b6f1561-689b0ba5-6920-4b52-a614-c2360d0acff4, blockId=52878, affKey=null, evictExclude=true], hasValBytes=true], cacheId=-313790114, txKey=IgniteTxKey [key=KeyCacheObjectImpl [part=504, val=IgfsBlockKey [fileId=c976b6f1561-689b0ba5-6920-4b52-a614-c2360d0acff4, blockId=52878, affKey=null, evictExclude=true], hasValBytes=true], cacheId=-313790114], val=[op=CREATE, val=CacheObjectByteArrayImpl [arrLen=65536]], prevVal=[op=NOOP, val=null], oldVal=[op=NOOP, val=null], entryProcessorsCol=null, ttl=-1, conflictExpireTime=-1, conflictVer=null, explicitVer=null, dhtVer=null, filters=[], filtersPassed=false, filtersSet=true, entry=GridDhtCacheEntry [rdrs=[], part=504, super=GridDistributedCacheEntry [super=GridCacheMapEntry [key=KeyCacheObjectImpl [part=504, val=IgfsBlockKey [fileId=c976b6f1561-689b0ba5-6920-4b52-a614-c2360d0acff4, blockId=52878, affKey=null, evictExclude=true], hasValBytes=true], val=null, startVer=1533830728269, ver=GridCacheVersion [topVer=145310277, order=1533830728269, nodeOrder=1], hash=-915370254, extras=GridCacheMvccEntryExtras [mvcc=GridCacheMvcc [locs=[GridCacheMvccCandidate [nodeId=6ed33eb9-2103-402c-afab-a415c8f08f2f, ver=GridCacheVersion [topVer=145310277, order=1533830728268, nodeOrder=1], threadId=224, id=258263, topVer=AffinityTopologyVersion [topVer=1, minorTopVer=0], reentry=null, otherNodeId=6ed33eb9-2103-402c-afab-a415c8f08f2f, otherVer=GridCacheVersion [topVer=145310277, order=1533830728268, nodeOrder=1], mappedDhtNodes=null, mappedNearNodes=null, ownerVer=null, serOrder=null, key=KeyCacheObjectImpl [part=504, val=IgfsBlockKey [fileId=c976b6f1561-689b0ba5-6920-4b52-a614-c2360d0acff4, blockId=52878, affKey=null, evictExclude=true], hasValBytes=true], masks=local=1|owner=1|ready=1|reentry=0|used=0|tx=1|single_implicit=0|dht_local=1|near_local=0|removed=0|read=0, prevVer=null, nextVer=GridCacheVersion [topVer=145310277, order=1533830728268, nodeOrder=1]]], rmts=null]], flags=2]]], prepared=1, locked=false, nodeId=6ed33eb9-2103-402c-afab-a415c8f08f2f, locMapped=false, expiryPlc=null, transferExpiryPlc=false, flags=0, partUpdateCntr=0, serReadVer=null, xidVer=GridCacheVersion [topVer=145310277, order=1533830728268, nodeOrder=1]], IgniteTxEntry [key=KeyCacheObjectImpl [part=504, val=IgfsBlockKey [fileId=c976b6f1561-689b0ba5-6920-4b52-a614-c2360d0acff4, blockId=52881, affKey=null, evictExclude=true], hasValBytes=true], cacheId=-313790114, txKey=IgniteTxKey [key=KeyCacheObjectImpl [part=504, val=IgfsBlockKey [fileId=c976b6f1561-689b0ba5-6920-4b52-a614-c2360d0acff4, blockId=52881, affKey=null, evictExclude=true], hasValBytes=true], cacheId=-313790114], val=[op=CREATE, val=CacheObjectByteArrayImpl [arrLen=65536]], prevVal=[op=NOOP, val=null], oldVal=[op=NOOP, val=null], entryProcess... and 2261 skipped ...272, ver=GridCacheVersion [topVer=145310277, order=1533830728272, nodeOrder=1], hash=-915370251, extras=GridCacheMvccEntryExtras [mvcc=GridCacheMvcc [locs=[GridCacheMvccCandidate [nodeId=6ed33eb9-2103-402c-afab-a415c8f08f2f, ver=GridCacheVersion [topVer=145310277, order=1533830728268, nodeOrder=1], threadId=224, id=258266, topVer=AffinityTopologyVersion [topVer=1, minorTopVer=0], reentry=null, otherNodeId=6ed33eb9-2103-402c-afab-a415c8f08f2f, otherVer=GridCacheVersion [topVer=145310277, order=1533830728268, nodeOrder=1], mappedDhtNodes=null, mappedNearNodes=null, ownerVer=null, serOrder=null, key=KeyCacheObjectImpl [part=504, val=IgfsBlockKey [fileId=c976b6f1561-689b0ba5-6920-4b52-a614-c2360d0acff4, blockId=52881, affKey=null, evictExclude=true], hasValBytes=true], masks=local=1|owner=1|ready=1|reentry=0|used=0|tx=1|single_implicit=0|dht_local=1|near_local=0|removed=0|read=0, prevVer=GridCacheVersion [topVer=145310277, order=1533830728268, nodeOrder=1], nextVer=null]], rmts=null]], flags=2]]], prepared=1, locked=false, nodeId=6ed33eb9-2103-402c-afab-a415c8f08f2f, locMapped=false, expiryPlc=null, transferExpiryPlc=false, flags=0, partUpdateCntr=0, serReadVer=null, xidVer=GridCacheVersion [topVer=145310277, order=1533830728268, nodeOrder=1]]]], super=IgniteTxAdapter [xidVer=GridCacheVersion [topVer=145310277, order=1533830728268, nodeOrder=1], writeVer=GridCacheVersion [topVer=145310277, order=1533830728273, nodeOrder=1], implicit=true, loc=true, threadId=224, startTime=1533831368025, nodeId=6ed33eb9-2103-402c-afab-a415c8f08f2f, startVer=GridCacheVersion [topVer=145310277, order=1533830728268, nodeOrder=1], endVer=null, isolation=READ_COMMITTED, concurrency=OPTIMISTIC, timeout=0, sysInvalidate=false, sys=true, plc=2, commitVer=GridCacheVersion [topVer=145310277, order=1533830728268, nodeOrder=1], finalizing=NONE, invalidParts=null, state=COMMITTING, timedOut=false, topVer=AffinityTopologyVersion [topVer=1, minorTopVer=0], duration=53ms, onePhaseCommit=true], size=4]]] at org.apache.ignite.internal.processors.cache.transactions.IgniteTxLocalAdapter.userCommit(IgniteTxLocalAdapter.java:851) at org.apache.ignite.internal.processors.cache.distributed.near.GridNearTxLocal.localFinish(GridNearTxLocal.java:3185) at org.apache.ignite.internal.processors.cache.distributed.near.GridNearTxFinishFuture.doFinish(GridNearTxFinishFuture.java:467) at org.apache.ignite.internal.processors.cache.distributed.near.GridNearTxFinishFuture.finish(GridNearTxFinishFuture.java:417) at org.apache.ignite.internal.processors.cache.distributed.near.GridNearTxLocal$19.apply(GridNearTxLocal.java:3341) at org.apache.ignite.internal.processors.cache.distributed.near.GridNearTxLocal$19.apply(GridNearTxLocal.java:3335) at org.apache.ignite.internal.util.future.GridFutureAdapter.notifyListener(GridFutureAdapter.java:383) at org.apache.ignite.internal.util.future.GridFutureAdapter.listen(GridFutureAdapter.java:353) at org.apache.ignite.internal.processors.cache.distributed.near.GridNearTxLocal.commitNearTxLocalAsync(GridNearTxLocal.java:3335) at org.apache.ignite.internal.processors.cache.distributed.near.GridNearTxLocal.optimisticPutFuture(GridNearTxLocal.java:2486) at org.apache.ignite.internal.processors.cache.distributed.near.GridNearTxLocal.putAllAsync0(GridNearTxLocal.java:836) at org.apache.ignite.internal.processors.cache.distributed.near.GridNearTxLocal.putAllAsync(GridNearTxLocal.java:391) at org.apache.ignite.internal.processors.cache.GridCacheAdapter$36.op(GridCacheAdapter.java:2852) at org.apache.ignite.internal.processors.cache.GridCacheAdapter$AsyncOp.op(GridCacheAdapter.java:5132) at org.apache.ignite.internal.processors.cache.GridCacheAdapter.asyncOp(GridCacheAdapter.java:4289) at org.apache.ignite.internal.processors.cache.GridCacheAdapter$AsyncOpRetryFuture.execute(GridCacheAdapter.java:4853) at org.apache.ignite.internal.processors.cache.GridCacheAdapter.asyncOp(GridCacheAdapter.java:4194) at org.apache.ignite.internal.processors.cache.GridCacheAdapter.putAllAsync0(GridCacheAdapter.java:2850) at org.apache.ignite.internal.processors.cache.GridCacheAdapter.putAllAsync(GridCacheAdapter.java:2842) at org.apache.ignite.internal.processors.cache.GridCacheProxyImpl.putAllAsync(GridCacheProxyImpl.java:815) at org.apache.ignite.internal.processors.igfs.IgfsDataManager.storeBlocksAsync(IgfsDataManager.java:1086) at org.apache.ignite.internal.processors.igfs.IgfsDataManager.access$1500(IgfsDataManager.java:96) at org.apache.ignite.internal.processors.igfs.IgfsDataManager$5.run(IgfsDataManager.java:1002) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.OutOfMemoryError at sun.misc.Unsafe.allocateMemory(Native Method) at org.apache.ignite.internal.util.GridUnsafe.allocateMemory(GridUnsafe.java:1090) at org.apache.ignite.internal.mem.unsafe.UnsafeMemoryProvider.nextRegion(UnsafeMemoryProvider.java:82) at org.apache.ignite.internal.processors.cache.persistence.IgniteCacheDatabaseSharedManager$2.nextRegion(IgniteCacheDatabaseSharedManager.java:996) at org.apache.ignite.internal.pagemem.impl.PageMemoryNoStoreImpl.addSegment(PageMemoryNoStoreImpl.java:627) at org.apache.ignite.internal.pagemem.impl.PageMemoryNoStoreImpl.allocatePage(PageMemoryNoStoreImpl.java:293) at org.apache.ignite.internal.processors.cache.persistence.freelist.AbstractFreeList.allocateDataPage(AbstractFreeList.java:463) at org.apache.ignite.internal.processors.cache.persistence.freelist.AbstractFreeList.insertDataRow(AbstractFreeList.java:501) at org.apache.ignite.internal.processors.cache.persistence.RowStore.addRow(RowStore.java:97) at org.apache.ignite.internal.processors.cache.IgniteCacheOffheapManagerImpl$CacheDataStoreImpl.createRow(IgniteCacheOffheapManagerImpl.java:1302) at org.apache.ignite.internal.processors.cache.GridCacheMapEntry$UpdateClosure.call(GridCacheMapEntry.java:4426) at org.apache.ignite.internal.processors.cache.GridCacheMapEntry$UpdateClosure.call(GridCacheMapEntry.java:4371) at org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree$Invoke.invokeClosure(BPlusTree.java:3083) at org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree$Invoke.access$6200(BPlusTree.java:2977) at org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree.invokeDown(BPlusTree.java:1726) at org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree.invokeDown(BPlusTree.java:1703) at org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree.invoke(BPlusTree.java:1610) at org.apache.ignite.internal.processors.cache.IgniteCacheOffheapManagerImpl$CacheDataStoreImpl.invoke(IgniteCacheOffheapManagerImpl.java:1249) at org.apache.ignite.internal.processors.cache.IgniteCacheOffheapManagerImpl.invoke(IgniteCacheOffheapManagerImpl.java:352) at org.apache.ignite.internal.processors.cache.GridCacheMapEntry.storeValue(GridCacheMapEntry.java:3602) at org.apache.ignite.internal.processors.cache.GridCacheMapEntry.storeValue(GridCacheMapEntry.java:3578) at org.apache.ignite.internal.processors.cache.GridCacheMapEntry.innerSet(GridCacheMapEntry.java:1040) at org.apache.ignite.internal.processors.cache.transactions.IgniteTxLocalAdapter.userCommit(IgniteTxLocalAdapter.java:652) ... 25 more/ My server has 16GB of memory and I have about 1.5TB available in HDFS. I fully intend to scale this out to multiple Ignite servers, but ultimately I will run into the same problem when I reach the limits of Ignite's memory and I want to make sure I understand how to handle that. I have my Xms and Xmx set to 8g currently and the rest of my Ignite config is below: /<beans xmlns="http://www.springframework.org/schema/beans" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:util="http://www.springframework.org/schema/util" xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util.xsd"> <bean id="propertyConfigurer" class="org.springframework.beans.factory.config.PropertyPlaceholderConfigurer"> <property name="systemPropertiesModeName" value="SYSTEM_PROPERTIES_MODE_FALLBACK"/> <property name="searchSystemEnvironment" value="true"/> </bean> <bean id="grid.cfg" class="org.apache.ignite.configuration.IgniteConfiguration"> <property name="connectorConfiguration"> <bean class="org.apache.ignite.configuration.ConnectorConfiguration"> <property name="port" value="11211"/> </bean> </property> <property name="fileSystemConfiguration"> <list> <bean class="org.apache.ignite.configuration.FileSystemConfiguration"> <property name="name" value="igfs"/> <property name="ipcEndpointConfiguration"> <bean class="org.apache.ignite.igfs.IgfsIpcEndpointConfiguration"> <property name="type" value="TCP" /> <property name="host" value="myserver.com" /> <property name="port" value="10500" /> </bean> </property> <property name="secondaryFileSystem"> <bean class="org.apache.ignite.hadoop.fs.IgniteHadoopIgfsSecondaryFileSystem"> <property name="fileSystemFactory"> <bean class="org.apache.ignite.hadoop.fs.CachingHadoopFileSystemFactory"> <property name="uri" value="hdfs://myhadoop.com:8020/"/> </bean> </property> </bean> </property> </bean> </list> </property> <property name="discoverySpi"> <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi"> <property name="ipFinder"> <bean class="org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder"> <property name="addresses"> <list> <value>127.0.0.1:47500..47509</value> </list> </property> </bean> </property> </bean> </property> <property name="dataStorageConfiguration"> <bean class="org.apache.ignite.configuration.DataStorageConfiguration"> <property name="defaultDataRegionConfiguration"> <bean class="org.apache.ignite.configuration.DataRegionConfiguration"> <property name="name" value="Default_Region"/> <property name="pageEvictionMode" value="RANDOM_2_LRU"/> </bean> </property> <property name="systemRegionInitialSize" value="#{1L * 1024 * 1024 * 1024}"/> <property name="systemRegionMaxSize" value="#{20L * 1024 * 1024 * 1024}"/> </bean> </property> </bean> </beans> / -- Sent from: http://apache-ignite-users.70518.x6.nabble.com/
