Repository: bigtop Updated Branches: refs/heads/master 3dd00010e -> b2225cfdb
BIGTOP-1686: Update and clean mapred template and namespace Update the mapred-site.xml template to current names for settings. Remove outdated settings. Make more settings adjustable and move their defaults into the Puppet class. Change variable names to consistently resemble the setting names. Signed-off-by: Olaf Flebbe <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/bigtop/repo Commit: http://git-wip-us.apache.org/repos/asf/bigtop/commit/b2225cfd Tree: http://git-wip-us.apache.org/repos/asf/bigtop/tree/b2225cfd Diff: http://git-wip-us.apache.org/repos/asf/bigtop/diff/b2225cfd Branch: refs/heads/master Commit: b2225cfdb218d1920da46dd18edbfcbafb7e4c36 Parents: 3dd0001 Author: Michael Weiser <[email protected]> Authored: Thu Feb 19 17:34:22 2015 +0100 Committer: Olaf Flebbe <[email protected]> Committed: Mon Mar 2 22:09:27 2015 +0100 ---------------------------------------------------------------------- .../puppet/hieradata/bigtop/cluster.yaml | 4 +- .../puppet/modules/hadoop/manifests/init.pp | 44 ++-- .../modules/hadoop/templates/mapred-site.xml | 224 ++++++------------- 3 files changed, 91 insertions(+), 181 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/bigtop/blob/b2225cfd/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml ---------------------------------------------------------------------- diff --git a/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml b/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml index 28c9449..2751d33 100644 --- a/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml +++ b/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml @@ -61,8 +61,8 @@ hadoop::common_yarn::hadoop_rm_host: "%{hiera('bigtop::hadoop_head_node')}" # actually default but needed for hue::server::rm_port here hadoop::common_yarn::hadoop_rm_port: "8032" -hadoop::common_mapred_app::hadoop_hs_host: "%{hiera('bigtop::hadoop_head_node')}" -hadoop::common_mapred_app::hadoop_jobtracker_host: "%{hiera('bigtop::hadoop_head_node')}" +hadoop::common_mapred_app::jobtracker_host: "%{hiera('bigtop::hadoop_head_node')}" +hadoop::common_mapred_app::mapreduce_jobhistory_host: "%{hiera('bigtop::hadoop_head_node')}" # actually default but needed for hue::server::webhdfs_url here hadoop::httpfs::hadoop_httpfs_port: "14000" http://git-wip-us.apache.org/repos/asf/bigtop/blob/b2225cfd/bigtop-deploy/puppet/modules/hadoop/manifests/init.pp ---------------------------------------------------------------------- diff --git a/bigtop-deploy/puppet/modules/hadoop/manifests/init.pp b/bigtop-deploy/puppet/modules/hadoop/manifests/init.pp index eaca730..a3c94db 100644 --- a/bigtop-deploy/puppet/modules/hadoop/manifests/init.pp +++ b/bigtop-deploy/puppet/modules/hadoop/manifests/init.pp @@ -227,28 +227,30 @@ class hadoop ($hadoop_security_authentication = "simple", } class common_mapred_app ( - $hadoop_config_io_sort_factor = undef, - $hadoop_config_io_sort_mb = undef, - $hadoop_config_mapred_child_ulimit = undef, - $hadoop_config_mapred_fairscheduler_assignmultiple = undef, - $hadoop_config_mapred_fairscheduler_sizebasedweight = undef, - $hadoop_config_mapred_job_tracker_handler_count = undef, - $hadoop_config_mapred_reduce_parallel_copies = undef, - $hadoop_config_mapred_reduce_slowstart_completed_maps = undef, - $hadoop_config_mapred_reduce_tasks_speculative_execution = undef, - $hadoop_config_tasktracker_http_threads = undef, - $hadoop_config_use_compression = undef, - $hadoop_hs_host = undef, - $hadoop_hs_port = "10020", - $hadoop_hs_webapp_port = "19888", - $hadoop_jobtracker_fairscheduler_weightadjuster = undef, - $hadoop_jobtracker_host, - $hadoop_jobtracker_port = "8021", - $hadoop_jobtracker_taskscheduler = undef, - $hadoop_mapred_jobtracker_plugins = "", - $hadoop_mapred_tasktracker_plugins = "", - $mapred_acls_enabled = undef, + $mapreduce_cluster_acls_enabled = undef, + $mapreduce_jobtracker_taskscheduler = undef, + $mapreduce_jobhistory_host = undef, + $mapreduce_jobhistory_port = "10020", + $mapreduce_jobhistory_webapp_port = "19888", + $mapreduce_framework_name = "yarn", + $jobtracker_host, + $jobtracker_port = "8021", $mapred_data_dirs = suffix($hadoop::hadoop_storage_dirs, "/mapred"), + $mapreduce_cluster_temp_dir = "/mapred/system", + $mapreduce_jobtracker_system_dir = "/mapred/system", + $mapreduce_jobtracker_staging_root_dir = "/user", + $yarn_app_mapreduce_am_staging_dir = "/user", + $mapreduce_task_io_sort_factor = 64, # 10 default + $mapreduce_task_io_sort_mb = 256, # 100 default + $mapreduce_reduce_shuffle_parallelcopies = undef, # 5 is default + # processorcount == facter fact + $mapreduce_tasktracker_map_tasks_maximum = inline_template("<%= [1, @processorcount.to_i * 0.20].max.round %>"), + $mapreduce_tasktracker_reduce_tasks_maximum = inline_template("<%= [1, @processorcount.to_i * 0.20].max.round %>"), + $mapreduce_tasktracker_http_threads = 60, # 40 default + $mapreduce_output_fileoutputformat_compress_type = "BLOCK", # "RECORD" default + $mapreduce_map_output_compress = undef, + $mapreduce_job_reduce_slowstart_completedmaps = undef, + $mapred_jobtracker_plugins = "", $hadoop_security_authentication = $hadoop::hadoop_security_authentication, $kerberos_realm = $hadoop::kerberos_realm, ) inherits hadoop { http://git-wip-us.apache.org/repos/asf/bigtop/blob/b2225cfd/bigtop-deploy/puppet/modules/hadoop/templates/mapred-site.xml ---------------------------------------------------------------------- diff --git a/bigtop-deploy/puppet/modules/hadoop/templates/mapred-site.xml b/bigtop-deploy/puppet/modules/hadoop/templates/mapred-site.xml index 5bf9777..d9e842f 100644 --- a/bigtop-deploy/puppet/modules/hadoop/templates/mapred-site.xml +++ b/bigtop-deploy/puppet/modules/hadoop/templates/mapred-site.xml @@ -42,7 +42,7 @@ <name>mapreduce.jobtracker.keytab.file</name> <value>/etc/mapred.keytab</value> <!-- path to the MapReduce keytab --> </property> - + <!-- TaskTracker security configs --> <property> <name>mapreduce.tasktracker.kerberos.principal</name> @@ -56,7 +56,7 @@ <name>mapreduce.tasktracker.keytab.file</name> <value>/etc/mapred.keytab</value> <!-- path to the MapReduce keytab --> </property> - + <!-- TaskController settings --> <property> <name>mapreduce.tasktracker.taskcontroller</name> @@ -66,63 +66,42 @@ <name>mapreduce.tasktracker.group</name> <value>mapred</value> </property> -<% end %> -<% if @mapred_acls_enabled %> +<% end %> +<% if @mapreduce_cluster_acls_enabled %> <property> <name>mapreduce.cluster.acls.enabled</name> - <value><%= @mapred_acls_enabled %></value> + <value><%= @mapreduce_cluster_acls_enabled %></value> </property> -<% end %> +<% end %> <!-- specify JobTracker TaskScheduler --> -<% if @hadoop_jobtracker_taskscheduler %> +<% if @mapreduce_jobtracker_taskscheduler %> <property> <name>mapreduce.jobtracker.taskscheduler</name> <value><%= @hadoop_jobtracker_taskscheduler %></value> </property> -<% end %> - -<% if @hadoop_config_mapred_fairscheduler_assignmultiple %> - <property> - <name>mapred.fairscheduler.assignmultiple</name> - <value><%= @hadoop_config_mapred_fairscheduler_assignmultiple %></value> - </property> -<% end %> -<% if @hadoop_config_mapred_fairscheduler_sizebasedweight %> - <property> - <name>mapred.fairscheduler.sizebasedweight</name> - <value><%= @hadoop_config_mapred_fairscheduler_assignmultiple %></value> - </property> <% end %> - -<% if @hadoop_jobtracker_fairscheduler_weightadjuster %> - <property> - <name>mapred.fairscheduler.weightadjuster</name> - <value><%= @hadoop_jobtracker_fairscheduler_weightadjuster %></value> - </property> -<% end %> - -<% if @hadoop_hs_host %> +<% if @mapreduce_jobhistory_host %> <property> <name>mapreduce.jobhistory.address</name> - <value><%= @hadoop_hs_host %>:<%= @hadoop_hs_port %></value> + <value><%= @mapreduce_jobhistory_host %>:<%= @mapreduce_jobhistory_port %></value> </property> <property> <name>mapreduce.jobhistory.webapp.address</name> - <value><%= @hadoop_hs_host %>:<%= @hadoop_hs_webapp_port %></value> + <value><%= @mapreduce_jobhistory_host %>:<%= @mapreduce_jobhistory_webapp_port %></value> </property> -<% end %> +<% end %> <property> <name>mapreduce.framework.name</name> - <value>yarn</value> + <value><%= @mapreduce_framework_name %></value> </property> <property> <name>mapreduce.jobtracker.address</name> - <value><%= @hadoop_jobtracker_host %>:<%= @hadoop_jobtracker_port%></value> + <value><%= @jobtracker_host %>:<%= @jobtracker_port%></value> </property> <property> @@ -131,189 +110,118 @@ <final>true</final> </property> - <!-- property> +<% if @mapreduce_cluster_temp_dir -%> + <property> <name>mapreduce.cluster.temp.dir</name> - <value>/mapred/system</value> - </property --> + <value><%= @mapreduce_cluster_temp_dir %></value> + </property> +<% end -%> +<% if @mapreduce_jobtracker_system_dir -%> <property> <name>mapreduce.jobtracker.system.dir</name> - <value>/mapred/system</value> + <value><%= @mapreduce_cluster_temp_dir %></value> </property> +<% end -%> +<% if @mapreduce_jobtracker_staging_root_dir -%> <property> <name>mapreduce.jobtracker.staging.root.dir</name> - <value>/user</value> + <value><%= @mapreduce_jobtracker_staging_root_dir %></value> </property> +<% end -%> +<% if @yarn_app_mapreduce_am_staging_dir -%> <property> <name>yarn.app.mapreduce.am.staging-dir</name> - <value>/user</value> + <value><%= @yarn_app_mapreduce_am_staging_dir %></value> </property> +<% end -%> <property> <name>mapred.child.java.opts</name> <value>-Xmx1024m</value> </property> -<% if @hadoop_config_mapred_child_ulimit %> - <property> - <!-- set this to ~1.5x the heap size in mapred.child.java.opts --> - <name>mapred.child.ulimit</name> - <value><%= @hadoop_config_mapred_child_ulimit %></value> - </property> -<% else %> - <property> - <!-- set this to ~1.5x the heap size in mapred.child.java.opts --> - <name>mapred.child.ulimit</name> - <value>unlimited</value> - </property> -<% end %> - -<% if @hadoop_config_io_sort_mb %> - <property> - <name>io.sort.mb</name> - <value><%= @hadoop_config_io_sort_mb %></value> - </property> -<% else %> - <property> - <name>io.sort.mb</name> - <value>256</value> - </property> -<% end %> - -<% if @hadoop_config_io_sort_factor %> - <property> - <name>io.sort.factor</name> - <value><%= @hadoop_config_io_sort_factor %></value> - </property> -<% else %> +<% if @mapreduce_task_io_sort_mb -%> <property> - <name>io.sort.factor</name> - <value>64</value> + <name>mapreduce.task.io.sort.mb</name> + <value><%= @mapreduce_task_io_sort_mb %></value> </property> -<% end %> -<% if @hadoop_config_mapred_job_tracker_handler_count %> +<% end -%> +<% if @mapreduce_task_io_sort_factor -%> <property> - <name>mapred.job.tracker.handler.count</name> - <value><%= @hadoop_config_mapred_job_tracker_handler_count %></value> - <final>true</final> + <name>mapreduce.task.io.sort.factor</name> + <value><%= @mapreduce_task_io_sort_factor %></value> </property> -<% else %> - <property> - <name>mapred.job.tracker.handler.count</name> - <value>10</value> - <final>true</final> - </property> -<% end %> +<% end -%> +<% if @mapreduce_reduce_shuffle_parallelcopies -%> <property> - <name>mapred.map.tasks.speculative.execution</name> - <value>true</value> - </property> - -<% if @hadoop_config_mapred_reduce_parallel_copies %> - <property> - <name>mapred.reduce.parallel.copies</name> + <name>mapreduce.reduce.shuffle.parallelcopies</name> <!-- set this to somewhere between sqrt(nodes) and nodes/2. for <20 nodes, set == |nodes| --> - <value><%= @hadoop_config_mapred_reduce_parallel_copies %></value> + <value><%= @mapreduce_reduce_shuffle_parallelcopies %></value> </property> -<% else %> - <property> - <name>mapred.reduce.parallel.copies</name> - <!-- set this to somewhere between sqrt(nodes) and nodes/2. - for <20 nodes, set == |nodes| --> - <value>5</value> - </property> -<% end %> +<% end -%> +<% if @mapreduce_tasktracker_map_tasks_maximum -%> <property> - <name>mapred.reduce.tasks</name> - <!-- set to numnodes * mapred.tasktracker.reduce.tasks.maximum --> - <value>30</value> - </property> - -<% if @hadoop_config_mapred_reduce_tasks_speculative_execution %> - <property> - <name>mapred.reduce.tasks.speculative.execution</name> - <value><%= @hadoop_config_mapred_reduce_tasks_speculative_execution %></value> - </property> -<% else %> - <property> - <name>mapred.reduce.tasks.speculative.execution</name> - <value>false</value> - </property> -<% end %> - - <property> - <name>mapred.tasktracker.map.tasks.maximum</name> + <name>mapreduce.tasktracker.map.tasks.maximum</name> <!-- see other kb entry about this one. --> - <value><%= [1, @processorcount.to_i * 0.80].max.round %></value> + <value><%= @mapreduce_tasktracker_map_tasks_maximum %></value> <final>true</final> </property> +<% end -%> +<% if @mapreduce_tasktracker_reduce_tasks_maximum -%> <property> - <name>mapred.tasktracker.reduce.tasks.maximum</name> + <name>mapreduce.tasktracker.reduce.tasks.maximum</name> <!-- see other kb entry about this one. --> - <value><%= [1, @processorcount.to_i * 0.20].max.round %></value> + <value><%= @mapreduce_tasktracker_reduce_tasks_maximum %></value> <final>true</final> </property> -<% if @hadoop_config_tasktracker_http_threads %> +<% end -%> +<% if @mapreduce_tasktracker_http_threads -%> <property> - <name>tasktracker.http.threads</name> - <value><%= @hadoop_config_tasktracker_http_threads %></value> + <name>mapreduce.tasktracker.http.threads</name> + <value><%= @mapreduce_tasktracker_http_threads %></value> <final>true</final> </property> -<% else %> - <property> - <name>tasktracker.http.threads</name> - <value>60</value> - <final>true</final> - </property> -<% end %> +<% end -%> +<% if @mapreduce_output_fileoutputformat_compress_type -%> <property> - <name>mapred.output.compression.type</name> - <value>BLOCK</value> + <name>mapreduce.output.fileoutputformat.compress.type</name> + <value><%= @mapreduce_output_fileoutputformat_compress_type %></value> <description>If the job outputs are to compressed as SequenceFiles, how should they be compressed? Should be one of NONE, RECORD or BLOCK.</description> </property> -<% if @hadoop_config_use_compression %> +<% end -%> +<% if @mapreduce_map_output_compress -%> <property> - <name>mapred.compress.map.output</name> - <value><%= @hadoop_config_use_compression %></value> + <name>mapreduce.map.output.compress</name> + <value><%= @mapreduce_map_output_compress %></value> </property> -<% else %> - <property> - <name>mapred.compress.map.output</name> - <value>false</value> - </property> -<% end %> -<% if @hadoop_config_mapred_reduce_slowstart_completed_maps %> +<% end -%> +<% if @mapreduce_job_reduce_slowstart_completedmaps -%> <property> - <name>mapred.reduce.slowstart.completed.maps</name> - <value><%= @hadoop_config_mapred_reduce_slowstart_completed_maps %></value> + <name>mapreduce.job.reduce.slowstart.completedmaps</name> + <value><%= @mapreduce_job_reduce_slowstart_completedmaps %></value> </property> -<% end %> -<% if @hadoop_mapred_jobtracker_plugins %> +<% end -%> +<% if @mapred_jobtracker_plugins -%> <property> <name>mapred.jobtracker.plugins</name> - <value><%= @hadoop_mapred_jobtracker_plugins %></value> + <value><%= @mapred_jobtracker_plugins %></value> <description>Comma-separated list of jobtracker plug-ins to be activated.</description> </property> -<% end %> -<% if @hadoop_mapred_tasktracker_plugins %> - <property> - <name>mapred.tasktracker.instrumentation</name> - <value><%= @hadoop_mapred_tasktracker_plugins %></value> - </property> -<% end %> +<% end -%> </configuration>
