Hi,

I'm trying out the ignite-spark support. I have a dataframe that was
created from reading a csv file sized around 800MB.

It seems that When I store the rdd from this dataframe in ignite using
saveValues api in IgniteContext it takes around 2GB of RAM.

Naturally once we add more dataframes, joins and computations we get OOM
errors even though we have more than enough RAM.

Any ideas why the inflated memory?

Attached is my config.

Yair
<?xml version="1.0" encoding="UTF-8"?>

<!--
    Ignite Spring configuration file to startup Ignite cache.

    This file demonstrates how to configure cache using Spring. Provided cache
    will be created on node startup.

    Use this configuration file when running HTTP REST examples (see 'examples/rest' folder).

    When starting a standalone node, you need to execute the following command:
    {IGNITE_HOME}/bin/ignite.{bat|sh} examples/config/example-cache.xml

    When starting Ignite from Java IDE, pass path to this file to Ignition:
    Ignition.start("examples/config/example-cache.xml");
-->
<beans xmlns="http://www.springframework.org/schema/beans";
       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
       xsi:schemaLocation="
        http://www.springframework.org/schema/beans
        http://www.springframework.org/schema/beans/spring-beans.xsd";>
    <bean id="ignite.cfg" class="org.apache.ignite.configuration.IgniteConfiguration">
        <property name="metricsLogFrequency" value="#{2 * 60 * 1000}"/>
        <property name="clientMode" value="true"/>

        <property name="memoryConfiguration">
            <bean class="org.apache.ignite.configuration.MemoryConfiguration">
                <property name="concurrencyLevel" value="16"/>
                <property name="defaultMemoryPolicyName" value="Default_Region"/>
                <property name="pageSize" value="16384"/>

                <property name="memoryPolicies">
                    <list>
                        <bean class="org.apache.ignite.configuration.MemoryPolicyConfiguration">
                            <property name="name" value="Default_Region"/>
                            <property name="initialSize" value="#{2L * 1024 * 1024 * 1024}"/>
                            <property name="maxSize" value="#{10L * 1024 * 1024 * 1024}"/>
                            <property name="metricsEnabled" value="false"/>
                            <property name="pageEvictionMode" value="DISABLED"/>
                        </bean>
                    </list>
                </property>
            </bean>
        </property>

        <property name="cacheConfiguration">

            <list>                
                <!-- Partitioned cache example configuration (Atomic mode). -->
                <bean parent="cache-template">
                    <property name="name" value="*"/>

                    <property name="cacheMode" value="PARTITIONED"/>
                    <property name="atomicityMode" value="ATOMIC"/>
                    <property name="backups" value="0"/>
                    <!--<property name="nearConfiguration">-->
                    <!--<bean class="org.apache.ignite.configuration.NearCacheConfiguration">-->
                    <!--<property name="nearEvictionPolicy">-->
                    <!--<bean class="org.apache.ignite.cache.eviction.lru.LruEvictionPolicy"/>-->
                    <!--</property>-->
                    <!--</bean>-->
                    <!--</property>-->
                </bean>
            </list>
        </property>
        <!-- Explicitly configure TCP discovery SPI to provide list of initial nodes. -->
        <property name="discoverySpi">
            <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
                <property name="ipFinder">
                    <!--
                        Ignite provides several options for automatic discovery that can be used
                        instead os static IP based discovery. For information on all options refer
                        to our documentation: http://apacheignite.readme.io/docs/cluster-config
                    -->
                    <!-- Uncomment static IP finder to enable static-based discovery of initial nodes. -->
                    <!--<bean class="org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder">-->
                    <bean class="org.apache.ignite.spi.discovery.tcp.ipfinder.multicast.TcpDiscoveryMulticastIpFinder">
                        <property name="addresses">
                            <list>
                                <!-- In distributed environment, replace with actual host IP address. -->
                                <value>my-host-05</value>
                                <value>my-host-06</value>
                                <value>my-host-07</value>
                            </list>
                        </property>
                    </bean>
                </property>
            </bean>
        </property>
        <property name="communicationSpi">
            <bean class="org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi">
                <property name="socketWriteTimeout" value="20000"/>
                <property name="messageQueueLimit" value="10000"/>
            </bean>
        </property>
    </bean>

    <bean id="cache-template" abstract="true" class="org.apache.ignite.configuration.CacheConfiguration">
        <!-- Set synchronous rebalancing (default is asynchronous). -->
        <property name="rebalanceMode" value="ASYNC"/>
        <!-- Set to FULL_SYNC for examples, default is PRIMARY_SYNC. -->
        <property name="writeSynchronizationMode" value="PRIMARY_SYNC"/>
        <property name="copyOnRead" value="false"/>
    </bean>
</beans>

Reply via email to