Repository: bigtop Updated Branches: refs/heads/master f506701c8 -> 459e663bc
BIGTOP-1502. Improve puppet deployment for new gridgain component Project: http://git-wip-us.apache.org/repos/asf/bigtop/repo Commit: http://git-wip-us.apache.org/repos/asf/bigtop/commit/459e663b Tree: http://git-wip-us.apache.org/repos/asf/bigtop/tree/459e663b Diff: http://git-wip-us.apache.org/repos/asf/bigtop/diff/459e663b Branch: refs/heads/master Commit: 459e663bcd403ff5f3b185e5abbc3d67dcc4df85 Parents: 098f67f Author: Konstantin Boudnik <[email protected]> Authored: Mon Feb 9 21:33:47 2015 -0800 Committer: Konstantin Boudnik <[email protected]> Committed: Tue Feb 10 13:56:05 2015 -0800 ---------------------------------------------------------------------- bigtop-deploy/puppet/README.md | 4 ++ bigtop-deploy/puppet/manifests/cluster.pp | 4 ++ .../modules/gridgain-hadoop/manifests/init.pp | 28 +++++++++ .../gridgain-hadoop/templates/core-site.xml | 65 ++++++++++++++++++++ .../gridgain-hadoop/templates/mapred-site.xml | 62 +++++++++++++++++++ 5 files changed, 163 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/bigtop/blob/459e663b/bigtop-deploy/puppet/README.md ---------------------------------------------------------------------- diff --git a/bigtop-deploy/puppet/README.md b/bigtop-deploy/puppet/README.md index edab477..d29cefb 100644 --- a/bigtop-deploy/puppet/README.md +++ b/bigtop-deploy/puppet/README.md @@ -124,3 +124,7 @@ And run the following on those nodes: # EOF # puppet apply -d --modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules" bigtop-deploy/puppet/manifests/site.pp </pre> + +When gridgain-hadoop accelerator is deployed the client configs are placed under +/etc/hadoop/gridgain.client.conf. All one needs to do to run Mapreduce jobs on gridgain-hadoop grid +is to set HADOOP_CONF_DIR=/etc/hadoop/gridgain.client.conf in the client session. http://git-wip-us.apache.org/repos/asf/bigtop/blob/459e663b/bigtop-deploy/puppet/manifests/cluster.pp ---------------------------------------------------------------------- diff --git a/bigtop-deploy/puppet/manifests/cluster.pp b/bigtop-deploy/puppet/manifests/cluster.pp index d4bae8a..c862948 100644 --- a/bigtop-deploy/puppet/manifests/cluster.pp +++ b/bigtop-deploy/puppet/manifests/cluster.pp @@ -64,6 +64,10 @@ class hadoop_worker_node ( include hadoop-hbase::server } + if ($all or "gridgain-hadoop" in $components) { + gridgain-hadoop::server { "gridgain-hadoop-node": } + } + ### If mapred is not installed, yarn can fail. ### So, when we install yarn, we also need mapred for now. ### This dependency should be cleaned up eventually. http://git-wip-us.apache.org/repos/asf/bigtop/blob/459e663b/bigtop-deploy/puppet/modules/gridgain-hadoop/manifests/init.pp ---------------------------------------------------------------------- diff --git a/bigtop-deploy/puppet/modules/gridgain-hadoop/manifests/init.pp b/bigtop-deploy/puppet/modules/gridgain-hadoop/manifests/init.pp index 7088eca..c130832 100644 --- a/bigtop-deploy/puppet/modules/gridgain-hadoop/manifests/init.pp +++ b/bigtop-deploy/puppet/modules/gridgain-hadoop/manifests/init.pp @@ -28,6 +28,34 @@ class gridgain-hadoop { require => Package["gridgain-hadoop"], } + file { "/etc/hadoop/gridgain.client.conf": + ensure => directory, + owner => 'root', + group => 'root', + mode => '0755', + require => Package["gridgain-hadoop-service"], + } + file { "/etc/hadoop/gridgain.client.conf/core-site.xml": + content => template('gridgain-hadoop/core-site.xml'), + require => [File["/etc/hadoop/gridgain.client.conf"]], + } + file { + "/etc/hadoop/gridgain.client.conf/mapred-site.xml": + content => template('gridgain-hadoop/mapred-site.xml'), + require => [File["/etc/hadoop/gridgain.client.conf"]], + } +## let's make sure that gridgain-hadoop libs are linked properly + file {'/usr/lib/hadoop/lib/gridgain-core.jar': + ensure => link, + target => '/usr/lib/gridgain-hadoop/libs/gridgain-core.jar', + require => [Package["gridgain-hadoop-service"]], + } + file {'/usr/lib/hadoop/lib/gridgain-hadoop.jar': + ensure => link, + target => '/usr/lib/gridgain-hadoop/libs/gridgain-hadoop/gridgain-hadoop.jar', + require => [Package["gridgain-hadoop-service"]], + } + service { "gridgain-hadoop": ensure => running, require => [ Package["gridgain-hadoop", "gridgain-hadoop-service"], File["/etc/default/gridgain-hadoop"] ], http://git-wip-us.apache.org/repos/asf/bigtop/blob/459e663b/bigtop-deploy/puppet/modules/gridgain-hadoop/templates/core-site.xml ---------------------------------------------------------------------- diff --git a/bigtop-deploy/puppet/modules/gridgain-hadoop/templates/core-site.xml b/bigtop-deploy/puppet/modules/gridgain-hadoop/templates/core-site.xml new file mode 100644 index 0000000..7e39015 --- /dev/null +++ b/bigtop-deploy/puppet/modules/gridgain-hadoop/templates/core-site.xml @@ -0,0 +1,65 @@ +<?xml version="1.0" encoding="UTF-8"?> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> +<!-- + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. See accompanying LICENSE file. +--> + +<!-- + This template file contains settings needed to run Apache Hadoop jobs + with GridGain's distributed in-memory file system GGFS. + + You can replace '$HADOOP_HOME/etc/hadoop/core-site.xml' file with this one + to work with GridGain GGFS nodes running on localhost (these local nodes can be + a part of distributed cluster though). To work with file system on remote + hosts you need to change the host of file system URI to any host running + GridGain's GGFS node. + + Note that GridGain jars must be in Apache Hadoop client classpath to work + with this configuration. + + Run script '$GRIDGAIN_HOME/bin/setup-hadoop.{sh|bat}' for Apache Hadoop client setup. +--> + +<configuration> + <!-- + Set default file system to GGFS instance named "ggfs" configured in GridGain. + --> + <property> + <name>fs.defaultFS</name> + <value>ggfs://ggfs@localhost</value> + </property> + + <!-- + Set Hadoop 1.* file system implementation class for GGFS. + --> + <property> + <name>fs.ggfs.impl</name> + <value>org.gridgain.grid.ggfs.hadoop.v1.GridGgfsHadoopFileSystem</value> + </property> + + <!-- + Set Hadoop 2.* file system implementation class for GGFS. + --> + <property> + <name>fs.AbstractFileSystem.ggfs.impl</name> + <value>org.gridgain.grid.ggfs.hadoop.v2.GridGgfsHadoopFileSystem</value> + </property> + + <!-- + Disallow data node replacement since it does not make sense for GridGain's GGFS nodes. + --> + <property> + <name>dfs.client.block.write.replace-datanode-on-failure.policy</name> + <value>NEVER</value> + </property> +</configuration> http://git-wip-us.apache.org/repos/asf/bigtop/blob/459e663b/bigtop-deploy/puppet/modules/gridgain-hadoop/templates/mapred-site.xml ---------------------------------------------------------------------- diff --git a/bigtop-deploy/puppet/modules/gridgain-hadoop/templates/mapred-site.xml b/bigtop-deploy/puppet/modules/gridgain-hadoop/templates/mapred-site.xml new file mode 100644 index 0000000..d2be8eb --- /dev/null +++ b/bigtop-deploy/puppet/modules/gridgain-hadoop/templates/mapred-site.xml @@ -0,0 +1,62 @@ +<?xml version="1.0"?> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> +<!-- + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. See accompanying LICENSE file. +--> + +<!-- + This template file contains settings needed to run Apache Hadoop jobs + with GridGain In-Memory Accelerator. + + You can replace '$HADOOP_HOME/etc/hadoop/mapred-site.xml' file with this one + to run jobs on localhost (local node can be a part of distributed cluster though). + To run jobs on remote host you have to change jobtracker address to the REST address + of any running GridGain node. + + Note that GridGain jars must be in Apache Hadoop client classpath to work + with this configuration. + + Run script '$GRIDGAIN_HOME/bin/setup-hadoop.{sh|bat}' for Apache Hadoop client setup. +--> + +<configuration> + <!-- + Framework name must be set to 'gridgain'. + --> + <property> + <name>mapreduce.framework.name</name> + <value>gridgain</value> + </property> + + <!-- + Job tracker address must be set to the REST address of any running GridGain node. + --> + <property> + <name>mapreduce.jobtracker.address</name> + <value>localhost:11211</value> + </property> + + <!-- Parameters for job tuning. --> + <!-- + <property> + <name>mapreduce.job.reduces</name> + <value>1</value> + </property> + + <property> + <name>mapreduce.job.maps</name> + <value>4</value> + </property> + --> + +</configuration>
