I'm going to try your suggestion. Thanks for the response! I'm curious 
however if $rm_hosts is available where you suggest. E.G.

class role::wh::hadoop::production {
          $is_rm = $::hostname in $rm_hosts


When the relevant lines from the provided hadoop.pp pasted above :

class role::wh::hadoop::production {
  
    class { 'cdh5::hadoop':
        namenode_hosts                           => [
        'us3sm2nn010r07.comp.prod.local',
        'us3sm2nn011r08.comp.prod.local',
    ],
    rm_hosts                                 => [
                                                        
'us3sm2nn010r07.comp.prod.local',
                                                        
'us3sm2nn011r08.comp.prod.local',
                                                    ],






On Friday, September 5, 2014 4:27:31 AM UTC, Colin Kincaid Williams wrote:
>
> I have a hacked together class that somebody else wrote based on some 
> wikimedia puppet scripts. I've been asked to set the JAVA_HEAP_MAX if the 
> host is a resource manager.
>
> I'm trying to set the variable yarn_env_java_heap_max if the host is a rm. 
> Is it possible to set the paramater as below? Do I have to create a 
> separate role just to set this paramater? Am I better off checking if a rm 
> in the yarn-site.sh.erb template itself? 
>
>         if ($::hostname in $rm_hosts){
>         yarn_env_java_heap_max => '-Xmx4000M', 
> }
>
> I am accsessing the variable in the yarn-site.sh.erb template as follows:
>
> <% if @yarn_env_java_heap_max -%>
> JAVA_HEAP_MAX = <%= yarn_env_java_heap_max %>
> <% end -%>
>
>
>
>
> the role class
>
>
> # role/wh/hadoop.pp
> # borrowed from https://git.wikimedia.org/tree/operations%2Fpuppet.git
>
> # Role classes for Hadoop nodes.
> #
> # Usage:
> #
> # To install only hadoop client packages and configs:
> #   include role::wh::hadoop
> #
> # To install a Hadoop Master (NameNode + ResourceManager, etc.):
> #   include role::wh::hadoop::master
> #
> # To install a Hadoop Worker (DataNode + NodeManager + etc.):
> #   include role::wh::hadoop::worker
> #
>
> class role::wh::hadoop::client {
>   # include common labs or production hadoop configs
>   case $common::data::env {
>     'dev': { 
>       include role::wh::hadoop::dev
>       anchor { 'cdh5_hadoop_dev_first': } -> 
> Class['role::wh::hadoop::dev'] -> anchor { 'cdh5_hadoop_dev_last': } 
>      }
>     'qa' : { 
>       include role::wh::hadoop::qa 
>       anchor { 'cdh5_hadoop_qa_first': } -> Class['role::wh::hadoop::qa'] 
> -> anchor { 'cdh5_hadoop_qa_last': }
>     }
>     'prod': { 
>       include role::wh::hadoop::production
>       anchor { 'cdh5_hadoop_production_first': } -> 
> Class['role::wh::hadoop::production'] -> anchor { 
> 'cdh5_hadoop_production_last': }
>     }
>     default: { fail("Unrecognized environment type for hadoop") }
>   }
> }
>
> class role::wh::hadoop::journalnode inherits role::wh::hadoop::client {
>     motd::register{ 'Hadoop Journal Node': }
>     
>     class { 'cdh5::hadoop::journalnode' :}
>     
>     anchor { 'cdh5_hadoop_journalnode_first': } -> 
> Class['cdh5::hadoop::journalnode'] -> anchor { 
> 'cdh5_hadoop_journalnode_last': }
> }
>
> class role::wh::hadoop::master inherits role::wh::hadoop::client {
>     motd::register{ 'Hadoop Master (NameNode, ResourceManager & 
> HistoryServer)': }
>     
>     system::mkdirs{'/var/lib/hadoop-hdfs/cache/hdfs/dfs/name' :}
>     system::mkdirs{'/var/lib/hadoop-hdfs/journalEdits' :}
>     
>     class { 'cdh5::hadoop::master' :}
>     
>     anchor { 'cdh5_hadoop_master_first': } -> 
> Class['cdh5::hadoop::master'] -> anchor { 'cdh5_hadoop_master_last': }
> }
>
> class role::wh::hadoop::worker inherits role::wh::hadoop::client {
>     motd::register{ 'Hadoop Worker (DataNode & NodeManager)': }
>     class { 'cdh5::hadoop::worker' : }
>     
>     anchor { 'cdh5_hadoop_worker_first': } -> 
> Class['cdh5::hadoop::worker'] -> anchor { 'cdh5_hadoop_worker_last': }
> }
>
> class role::wh::hadoop::standby inherits role::wh::hadoop::client {
>     motd::register{ 'Hadoop Standby NameNode': }
>     
>     system::mkdirs{'/var/lib/hadoop-hdfs/cache/hdfs/dfs/name' :}
>     system::mkdirs{'/var/lib/hadoop-hdfs/journalEdits' :}
>     
>     class { 'cdh5::hadoop::namenode' : }
>     class {'cdh5::hadoop::resourcemanager' : }
>     
>     anchor { 'cdh5_wh_hadoop_standby_first': } -> 
> Class['cdh5::hadoop::namenode'] -> Class['cdh5::hadoop::resourcemanager'] 
> -> anchor { 'cdh5_wh_hadoop_standby_last': }
> }
>
> class role::wh::hadoop::primary::postinstall inherits 
> role::wh::hadoop::client {
>     class { 'cdh5::hadoop::namenode::primarypostinstall' : }
>     
>     anchor { 'cdh5_wh_hadoop_primarypostinstall_first': } -> 
> Class['cdh5::hadoop::namenode::primarypostinstall'] -> anchor { 
> 'cdh5_wh_hadoop_primarypostinstall_last': }
> }
>
> class role::wh::hadoop::standby::postinstall inherits 
> role::wh::hadoop::client {
>     class { 'cdh5::hadoop::namenode::standbypostinstall' : }
>     
>     anchor { 'cdh5_wh_hadoop_standbypostinstall_first': } -> 
> Class['cdh5::hadoop::namenode::standbypostinstall'] -> anchor { 
> 'cdh5_wh_hadoop_standbypostinstall_last': }
> }
>
>
> ### The following classes should not be included directly.
> ### You should either include role::wh::hadoop::client,
> ### or role::wh::hadoop::worker or
> ### role::wh::hadoop::master.
>
> class role::wh::hadoop::production {
>   
>     class { 'cdh5::hadoop':
>         namenode_hosts                           => [
>         'us3sm2nn010r07.comp.prod.local',
>         'us3sm2nn011r08.comp.prod.local',
>     ],
>     rm_hosts                                 => [
>                                                         
> 'us3sm2nn010r07.comp.prod.local',
>                                                         
> 'us3sm2nn011r08.comp.prod.local',
>                                                     ],
>         dfs_name_dir                             => 
> [['/var/lib/hadoop-hdfs/cache/hdfs/dfs/name', '/nfs/namedir']],
>         config_directory                         => '/etc/hadoop/conf',
>         nameservice_id                           => 'whprod',           # 
> This is the logical name of the Hadoop cluster.
>         journalnode_hosts                        => [
>         'us3sm2zk010r07.comp.prod.local',
>         'us3sm2zk011r08.comp.prod.local',
>         'us3sm2zk012r09.comp.prod.local',        
>     ],
>         dfs_journalnode_edits_dir                => 
> '/var/lib/hadoop-hdfs/journalEdits',
>         datanode_mounts                          => [
>         '/data1',
>         '/data2',
>         '/data3',
>         '/data4',
>         '/data5',
>         '/data6',
>         '/data7',
>         '/data8',
>         '/data9',
>         '/data10',
>         '/data11'
>     ],
>     dfs_data_path                            => 'dfs',
>         dfs_block_size                           => 268435456,  # 256 MB
>         
>         # Turn on Snappy compression by default for maps and final outputs
>         mapreduce_intermediate_compression       => true,
>         mapreduce_intermediate_compression_codec => 
> 'org.apache.hadoop.io.compress.SnappyCodec',
>         mapreduce_output_compression             => true,
>         mapreduce_output_compression_codec       => 
> 'org.apache.hadoop.io.compress.SnappyCodec',
>         mapreduce_output_compression_type        => 'BLOCK',
>         #mapreduce_map_tasks_maximum              => ($::processorcount - 
> 2) / 2,
>         #mapreduce_reduce_tasks_maximum           => ($::processorcount - 
> 2) / 2,
>         #mapreduce_job_reuse_jvm_num_tasks        => 1,
>         #mapreduce_map_memory_mb                  => 1536,
>         #mapreduce_reduce_memory_mb               => 3072,
>         #mapreduce_map_java_opts                  => '-Xmx1024M',
>         #mapreduce_reduce_java_opts               => '-Xmx2560M',
>         #mapreduce_reduce_shuffle_parallelcopies  => 10,
>         #mapreduce_task_io_sort_mb                => 200,
>         #mapreduce_task_io_sort_factor            => 10,
>         if ($::hostname in $rm_hosts){
>         yarn_env_java_heap_max => '-Xmx4000M', 
> }
>         yarn_nodemanager_resource_memory_mb      => 40960,
>         yarn_resourcemanager_scheduler_class     => 
> 'org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler',
>         net_topology_script_template             => 
> 'hadoop/getRackID.py-prod',
>     }
>
>     anchor { 'cdh5_hadoop_first': } -> Class['cdh5::hadoop'] -> anchor { 
> 'cdh5_hadoop_last': }
>     
>     file { "$::cdh5::hadoop::config_directory/capacity-scheduler.xml":
>         content => template('hadoop/capacity-scheduler.xml-prod'),
>         require => Class['cdh5::hadoop'],
>     }
>
> }
>
> class role::wh::hadoop::qa {
>   
>     class { 'cdh5::hadoop':
>         namenode_hosts                           => [
>                                                         
> 'us3sm2hbqa03r09.comp.prod.local',
>                                                         
> 'us3sm2hbqa04r07.comp.prod.local',
>                                                     ],
>         rm_hosts                                 => [
>                                                         
> 'us3sm2hbqa03r09.comp.prod.local',
>                                                         
> 'us3sm2hbqa04r07.comp.prod.local',
>                                                     ],
>         dfs_name_dir                             => 
> [['/var/lib/hadoop-hdfs/cache/hdfs/dfs/name']],
>         config_directory                         => '/etc/hadoop/conf',
>         nameservice_id                           => 'whqa',           # 
> This is the logical name of the Hadoop cluster.
>         journalnode_hosts                        => [
>                                                         
> 'us3sm2hbqa03r09.comp.prod.local',
>                                                         
> 'us3sm2hbqa04r07.comp.prod.local',
>                                                         
> 'us3sm2hbqa05r08.comp.prod.local',        
>                                                     ],
>         dfs_journalnode_edits_dir                => 
> '/var/lib/hadoop-hdfs/journalEdits',
>         datanode_mounts                          => [
>                                                       '/data1',
>                                                       '/data2'
>                                                     ],
>         dfs_data_path                            => 'dfs',
>         dfs_block_size                           => 268435456,  # 256 MB
>         
>         # Turn on Snappy compression by default for maps and final outputs
>         mapreduce_intermediate_compression       => true,
>         mapreduce_intermediate_compression_codec => 
> 'org.apache.hadoop.io.compress.SnappyCodec',
>         mapreduce_output_compression             => true,
>         mapreduce_output_compression_codec       => 
> 'org.apache.hadoop.io.compress.SnappyCodec',
>         mapreduce_output_compression_type        => 'BLOCK',
>         yarn_nodemanager_resource_memory_mb      => 24576,
>         yarn_resourcemanager_max_completed_applications => 500, 
> yarn_resourcemanager_scheduler_class     => 
> 'org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler',
>         net_topology_script_template             => 
> 'hadoop/getRackID.py-qa',
>     }
>
>     anchor { 'cdh5_wh_hadoop_qa_first': } -> Class['cdh5::hadoop'] -> 
> anchor { 'cdh5_wh_hadoop_qa_last': }
>     
>     file { "$::cdh5::hadoop::config_directory/capacity-scheduler.xml":
>         content => template('hadoop/capacity-scheduler.xml-qa'),
>         require => Class['cdh5::hadoop'],
>     }
> }
>
> class role::wh::hadoop::dev {
>
>     class { 'cdh5::hadoop':
>         namenode_hosts                           => [$::fqdn],
>         rm_hosts                                 => [$::fqdn],
>         dfs_name_dir                             => 
> [['/var/lib/hadoop-hdfs/cache/hdfs/dfs/name']],
>         config_directory                         => '/etc/hadoop/conf',
>         # nameservice_id                           => 'whdev',
>         journalnode_hosts                        => [$::fqdn],
>         dfs_journalnode_edits_dir                => 
> '/var/lib/hadoop-hdfs/journalEdits',
>         datanode_mounts                          => [
>                                                       '/data1',
>                                                       '/data2'
>                                                     ],
>         dfs_data_path                            => 'dfs',
>         dfs_block_size                           => 67108864,  # 256 MB
>         
>         # Turn on Snappy compression by default for maps and final outputs
>         mapreduce_intermediate_compression       => true,
>         mapreduce_intermediate_compression_codec => 
> 'org.apache.hadoop.io.compress.SnappyCodec',
>         mapreduce_output_compression             => true,
>         mapreduce_output_compression_codec       => 
> 'org.apache.hadoop.io.compress.SnappyCodec',
>         mapreduce_output_compression_type        => 'BLOCK',
>         mapreduce_map_tasks_maximum              => 2,
>         mapreduce_reduce_tasks_maximum           => 2,
>         yarn_nodemanager_resource_memory_mb      => 4096,
>         yarn_resourcemanager_scheduler_class     => 
> 'org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler',
> }
>
>
>     anchor { 'cdh5_hadoop_first': } -> Class['cdh5::hadoop'] -> anchor { 
> 'cdh5_hadoop_last': }
>
>     file { "$::cdh5::hadoop::config_directory/capacity-scheduler.xml":
>         content => template('hadoop/capacity-scheduler.xml-qa'),
>         require => Class['cdh5::hadoop'],
>     }
> }
>
>
> Note that the main hadoop class has a yarn_env_java_heap_max parameter 
> that I added, which also is given a default value.
>

-- 
You received this message because you are subscribed to the Google Groups 
"Puppet Users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to puppet-users+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/puppet-users/bd60e871-1a90-43aa-a667-8001600cdf2b%40googlegroups.com.
For more options, visit https://groups.google.com/d/optout.

Reply via email to