I have a hacked together class that somebody else wrote based on some 
wikimedia puppet scripts. I've been asked to set the JAVA_HEAP_MAX if the 
host is a resource manager.

I'm trying to set the variable yarn_env_java_heap_max if the host is a rm. 
Is it possible to set the paramater as below? Do I have to create a 
separate role just to set this paramater? Am I better off checking if a rm 
in the yarn-site.sh.erb template itself? 

        if ($::hostname in $rm_hosts){
        yarn_env_java_heap_max => '-Xmx4000M', 
}

I am accsessing the variable in the yarn-site.sh.erb template as follows:

<% if @yarn_env_java_heap_max -%>
JAVA_HEAP_MAX = <%= yarn_env_java_heap_max %>
<% end -%>




the role class


# role/wh/hadoop.pp
# borrowed from https://git.wikimedia.org/tree/operations%2Fpuppet.git

# Role classes for Hadoop nodes.
#
# Usage:
#
# To install only hadoop client packages and configs:
#   include role::wh::hadoop
#
# To install a Hadoop Master (NameNode + ResourceManager, etc.):
#   include role::wh::hadoop::master
#
# To install a Hadoop Worker (DataNode + NodeManager + etc.):
#   include role::wh::hadoop::worker
#

class role::wh::hadoop::client {
  # include common labs or production hadoop configs
  case $common::data::env {
    'dev': { 
      include role::wh::hadoop::dev
      anchor { 'cdh5_hadoop_dev_first': } -> Class['role::wh::hadoop::dev'] 
-> anchor { 'cdh5_hadoop_dev_last': } 
     }
    'qa' : { 
      include role::wh::hadoop::qa 
      anchor { 'cdh5_hadoop_qa_first': } -> Class['role::wh::hadoop::qa'] 
-> anchor { 'cdh5_hadoop_qa_last': }
    }
    'prod': { 
      include role::wh::hadoop::production
      anchor { 'cdh5_hadoop_production_first': } -> 
Class['role::wh::hadoop::production'] -> anchor { 
'cdh5_hadoop_production_last': }
    }
    default: { fail("Unrecognized environment type for hadoop") }
  }
}

class role::wh::hadoop::journalnode inherits role::wh::hadoop::client {
    motd::register{ 'Hadoop Journal Node': }
    
    class { 'cdh5::hadoop::journalnode' :}
    
    anchor { 'cdh5_hadoop_journalnode_first': } -> 
Class['cdh5::hadoop::journalnode'] -> anchor { 
'cdh5_hadoop_journalnode_last': }
}

class role::wh::hadoop::master inherits role::wh::hadoop::client {
    motd::register{ 'Hadoop Master (NameNode, ResourceManager & 
HistoryServer)': }
    
    system::mkdirs{'/var/lib/hadoop-hdfs/cache/hdfs/dfs/name' :}
    system::mkdirs{'/var/lib/hadoop-hdfs/journalEdits' :}
    
    class { 'cdh5::hadoop::master' :}
    
    anchor { 'cdh5_hadoop_master_first': } -> Class['cdh5::hadoop::master'] 
-> anchor { 'cdh5_hadoop_master_last': }
}

class role::wh::hadoop::worker inherits role::wh::hadoop::client {
    motd::register{ 'Hadoop Worker (DataNode & NodeManager)': }
    class { 'cdh5::hadoop::worker' : }
    
    anchor { 'cdh5_hadoop_worker_first': } -> Class['cdh5::hadoop::worker'] 
-> anchor { 'cdh5_hadoop_worker_last': }
}

class role::wh::hadoop::standby inherits role::wh::hadoop::client {
    motd::register{ 'Hadoop Standby NameNode': }
    
    system::mkdirs{'/var/lib/hadoop-hdfs/cache/hdfs/dfs/name' :}
    system::mkdirs{'/var/lib/hadoop-hdfs/journalEdits' :}
    
    class { 'cdh5::hadoop::namenode' : }
    class {'cdh5::hadoop::resourcemanager' : }
    
    anchor { 'cdh5_wh_hadoop_standby_first': } -> 
Class['cdh5::hadoop::namenode'] -> Class['cdh5::hadoop::resourcemanager'] 
-> anchor { 'cdh5_wh_hadoop_standby_last': }
}

class role::wh::hadoop::primary::postinstall inherits 
role::wh::hadoop::client {
    class { 'cdh5::hadoop::namenode::primarypostinstall' : }
    
    anchor { 'cdh5_wh_hadoop_primarypostinstall_first': } -> 
Class['cdh5::hadoop::namenode::primarypostinstall'] -> anchor { 
'cdh5_wh_hadoop_primarypostinstall_last': }
}

class role::wh::hadoop::standby::postinstall inherits 
role::wh::hadoop::client {
    class { 'cdh5::hadoop::namenode::standbypostinstall' : }
    
    anchor { 'cdh5_wh_hadoop_standbypostinstall_first': } -> 
Class['cdh5::hadoop::namenode::standbypostinstall'] -> anchor { 
'cdh5_wh_hadoop_standbypostinstall_last': }
}


### The following classes should not be included directly.
### You should either include role::wh::hadoop::client,
### or role::wh::hadoop::worker or
### role::wh::hadoop::master.

class role::wh::hadoop::production {
  
    class { 'cdh5::hadoop':
        namenode_hosts                           => [
        'us3sm2nn010r07.comp.prod.local',
        'us3sm2nn011r08.comp.prod.local',
    ],
    rm_hosts                                 => [
                                                        
'us3sm2nn010r07.comp.prod.local',
                                                        
'us3sm2nn011r08.comp.prod.local',
                                                    ],
        dfs_name_dir                             => 
[['/var/lib/hadoop-hdfs/cache/hdfs/dfs/name', '/nfs/namedir']],
        config_directory                         => '/etc/hadoop/conf',
        nameservice_id                           => 'whprod',           # 
This is the logical name of the Hadoop cluster.
        journalnode_hosts                        => [
        'us3sm2zk010r07.comp.prod.local',
        'us3sm2zk011r08.comp.prod.local',
        'us3sm2zk012r09.comp.prod.local',        
    ],
        dfs_journalnode_edits_dir                => 
'/var/lib/hadoop-hdfs/journalEdits',
        datanode_mounts                          => [
        '/data1',
        '/data2',
        '/data3',
        '/data4',
        '/data5',
        '/data6',
        '/data7',
        '/data8',
        '/data9',
        '/data10',
        '/data11'
    ],
    dfs_data_path                            => 'dfs',
        dfs_block_size                           => 268435456,  # 256 MB
        
        # Turn on Snappy compression by default for maps and final outputs
        mapreduce_intermediate_compression       => true,
        mapreduce_intermediate_compression_codec => 
'org.apache.hadoop.io.compress.SnappyCodec',
        mapreduce_output_compression             => true,
        mapreduce_output_compression_codec       => 
'org.apache.hadoop.io.compress.SnappyCodec',
        mapreduce_output_compression_type        => 'BLOCK',
        #mapreduce_map_tasks_maximum              => ($::processorcount - 
2) / 2,
        #mapreduce_reduce_tasks_maximum           => ($::processorcount - 
2) / 2,
        #mapreduce_job_reuse_jvm_num_tasks        => 1,
        #mapreduce_map_memory_mb                  => 1536,
        #mapreduce_reduce_memory_mb               => 3072,
        #mapreduce_map_java_opts                  => '-Xmx1024M',
        #mapreduce_reduce_java_opts               => '-Xmx2560M',
        #mapreduce_reduce_shuffle_parallelcopies  => 10,
        #mapreduce_task_io_sort_mb                => 200,
        #mapreduce_task_io_sort_factor            => 10,
        if ($::hostname in $rm_hosts){
        yarn_env_java_heap_max => '-Xmx4000M', 
}
        yarn_nodemanager_resource_memory_mb      => 40960,
        yarn_resourcemanager_scheduler_class     => 
'org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler',
        net_topology_script_template             => 
'hadoop/getRackID.py-prod',
    }

    anchor { 'cdh5_hadoop_first': } -> Class['cdh5::hadoop'] -> anchor { 
'cdh5_hadoop_last': }
    
    file { "$::cdh5::hadoop::config_directory/capacity-scheduler.xml":
        content => template('hadoop/capacity-scheduler.xml-prod'),
        require => Class['cdh5::hadoop'],
    }

}

class role::wh::hadoop::qa {
  
    class { 'cdh5::hadoop':
        namenode_hosts                           => [
                                                        
'us3sm2hbqa03r09.comp.prod.local',
                                                        
'us3sm2hbqa04r07.comp.prod.local',
                                                    ],
        rm_hosts                                 => [
                                                        
'us3sm2hbqa03r09.comp.prod.local',
                                                        
'us3sm2hbqa04r07.comp.prod.local',
                                                    ],
        dfs_name_dir                             => 
[['/var/lib/hadoop-hdfs/cache/hdfs/dfs/name']],
        config_directory                         => '/etc/hadoop/conf',
        nameservice_id                           => 'whqa',           # 
This is the logical name of the Hadoop cluster.
        journalnode_hosts                        => [
                                                        
'us3sm2hbqa03r09.comp.prod.local',
                                                        
'us3sm2hbqa04r07.comp.prod.local',
                                                        
'us3sm2hbqa05r08.comp.prod.local',        
                                                    ],
        dfs_journalnode_edits_dir                => 
'/var/lib/hadoop-hdfs/journalEdits',
        datanode_mounts                          => [
                                                      '/data1',
                                                      '/data2'
                                                    ],
        dfs_data_path                            => 'dfs',
        dfs_block_size                           => 268435456,  # 256 MB
        
        # Turn on Snappy compression by default for maps and final outputs
        mapreduce_intermediate_compression       => true,
        mapreduce_intermediate_compression_codec => 
'org.apache.hadoop.io.compress.SnappyCodec',
        mapreduce_output_compression             => true,
        mapreduce_output_compression_codec       => 
'org.apache.hadoop.io.compress.SnappyCodec',
        mapreduce_output_compression_type        => 'BLOCK',
        yarn_nodemanager_resource_memory_mb      => 24576,
        yarn_resourcemanager_max_completed_applications => 500, 
yarn_resourcemanager_scheduler_class     => 
'org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler',
        net_topology_script_template             => 
'hadoop/getRackID.py-qa',
    }

    anchor { 'cdh5_wh_hadoop_qa_first': } -> Class['cdh5::hadoop'] -> 
anchor { 'cdh5_wh_hadoop_qa_last': }
    
    file { "$::cdh5::hadoop::config_directory/capacity-scheduler.xml":
        content => template('hadoop/capacity-scheduler.xml-qa'),
        require => Class['cdh5::hadoop'],
    }
}

class role::wh::hadoop::dev {

    class { 'cdh5::hadoop':
        namenode_hosts                           => [$::fqdn],
        rm_hosts                                 => [$::fqdn],
        dfs_name_dir                             => 
[['/var/lib/hadoop-hdfs/cache/hdfs/dfs/name']],
        config_directory                         => '/etc/hadoop/conf',
        # nameservice_id                           => 'whdev',
        journalnode_hosts                        => [$::fqdn],
        dfs_journalnode_edits_dir                => 
'/var/lib/hadoop-hdfs/journalEdits',
        datanode_mounts                          => [
                                                      '/data1',
                                                      '/data2'
                                                    ],
        dfs_data_path                            => 'dfs',
        dfs_block_size                           => 67108864,  # 256 MB
        
        # Turn on Snappy compression by default for maps and final outputs
        mapreduce_intermediate_compression       => true,
        mapreduce_intermediate_compression_codec => 
'org.apache.hadoop.io.compress.SnappyCodec',
        mapreduce_output_compression             => true,
        mapreduce_output_compression_codec       => 
'org.apache.hadoop.io.compress.SnappyCodec',
        mapreduce_output_compression_type        => 'BLOCK',
        mapreduce_map_tasks_maximum              => 2,
        mapreduce_reduce_tasks_maximum           => 2,
        yarn_nodemanager_resource_memory_mb      => 4096,
        yarn_resourcemanager_scheduler_class     => 
'org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler',
}


    anchor { 'cdh5_hadoop_first': } -> Class['cdh5::hadoop'] -> anchor { 
'cdh5_hadoop_last': }

    file { "$::cdh5::hadoop::config_directory/capacity-scheduler.xml":
        content => template('hadoop/capacity-scheduler.xml-qa'),
        require => Class['cdh5::hadoop'],
    }
}


Note that the main hadoop class has a yarn_env_java_heap_max parameter that 
I added, which also is given a default value.

-- 
You received this message because you are subscribed to the Google Groups 
"Puppet Users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to puppet-users+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/puppet-users/9d134b61-191b-407c-aabf-9b02c512de37%40googlegroups.com.
For more options, visit https://groups.google.com/d/optout.

Reply via email to