Author: abayer Date: Mon Aug 13 19:49:06 2012 New Revision: 1372568 URL: http://svn.apache.org/viewvc?rev=1372568&view=rev Log: WHIRR-189. Hadoop on EC2 should use all available storage.
Added: whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/VolumeManager.java (with props) whirr/trunk/services/hadoop/src/main/resources/functions/prepare_all_disks.sh (with props) whirr/trunk/services/hadoop/src/main/resources/functions/prepare_disks.sh (with props) Modified: whirr/trunk/CHANGES.txt whirr/trunk/core/src/main/java/org/apache/whirr/compute/BootstrapTemplate.java whirr/trunk/core/src/test/java/org/apache/whirr/compute/BootstrapTemplateTest.java whirr/trunk/core/src/test/java/org/apache/whirr/service/TemplateBuilderStrategyTest.java whirr/trunk/pom.xml whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hadoop.sh whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hbase.sh whirr/trunk/services/cdh/src/main/resources/functions/install_cdh_hbase.sh whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopClusterActionHandler.java whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilder.java whirr/trunk/services/hadoop/src/main/resources/functions/configure_hadoop.sh whirr/trunk/services/hadoop/src/main/resources/whirr-hadoop-default.properties whirr/trunk/services/hadoop/src/test/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilderTest.java whirr/trunk/services/yarn/src/main/resources/functions/configure_hadoop_mr2.sh Modified: whirr/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/whirr/trunk/CHANGES.txt?rev=1372568&r1=1372567&r2=1372568&view=diff ============================================================================== --- whirr/trunk/CHANGES.txt (original) +++ whirr/trunk/CHANGES.txt Mon Aug 13 19:49:06 2012 @@ -15,6 +15,8 @@ Trunk (unreleased changes) IMPROVEMENTS + WHIRR-189. Hadoop on EC2 should use all available storage. (abayer) + WHIRR-63. Support EC2 Cluster Compute groups for Hadoop etc. (abayer) WHIRR-573. Allow configuring REPO_HOST for CDH repositories. (abayer) Modified: whirr/trunk/core/src/main/java/org/apache/whirr/compute/BootstrapTemplate.java URL: http://svn.apache.org/viewvc/whirr/trunk/core/src/main/java/org/apache/whirr/compute/BootstrapTemplate.java?rev=1372568&r1=1372567&r2=1372568&view=diff ============================================================================== --- whirr/trunk/core/src/main/java/org/apache/whirr/compute/BootstrapTemplate.java (original) +++ whirr/trunk/core/src/main/java/org/apache/whirr/compute/BootstrapTemplate.java Mon Aug 13 19:49:06 2012 @@ -19,6 +19,7 @@ package org.apache.whirr.compute; import static org.jclouds.compute.options.TemplateOptions.Builder.runScript; +import static org.jclouds.ec2.domain.RootDeviceType.EBS; import static org.jclouds.scriptbuilder.domain.Statements.appendFile; import static org.jclouds.scriptbuilder.domain.Statements.createOrOverwriteFile; import static org.jclouds.scriptbuilder.domain.Statements.interpret; @@ -31,6 +32,9 @@ import org.apache.whirr.service.jclouds. import org.apache.whirr.service.jclouds.TemplateBuilderStrategy; import org.jclouds.aws.ec2.AWSEC2ApiMetadata; import org.jclouds.aws.ec2.compute.AWSEC2TemplateOptions; +import org.jclouds.ec2.EC2ApiMetadata; +import org.jclouds.ec2.compute.options.EC2TemplateOptions; +import org.jclouds.ec2.compute.predicates.EC2ImagePredicates; import org.jclouds.compute.ComputeService; import org.jclouds.compute.ComputeServiceContext; import org.jclouds.compute.domain.Template; @@ -108,10 +112,25 @@ public class BootstrapTemplate { } } - return setPlacementGroup(context, spec, template, instanceTemplate); + return mapEphemeralIfImageIsEBSBacked(context, spec, template, instanceTemplate); } /** + * If this is an EBS-backed volume, map the ephemeral device. + */ + private static Template mapEphemeralIfImageIsEBSBacked(ComputeServiceContext context, + ClusterSpec spec, + Template template, + InstanceTemplate instanceTemplate) { + if (EC2ApiMetadata.CONTEXT_TOKEN.isAssignableFrom(context.getBackendType())) { + if (EC2ImagePredicates.rootDeviceType(EBS).apply(template.getImage())) { + template.getOptions().as(EC2TemplateOptions.class).mapEphemeralDeviceToDeviceName("/dev/sdc", "ephemeral1"); + } + } + return setPlacementGroup(context, spec, template, instanceTemplate); + } + + /** * Set the placement group, if desired - if it doesn't already exist, create it. */ private static Template setPlacementGroup(ComputeServiceContext context, ClusterSpec spec, Modified: whirr/trunk/core/src/test/java/org/apache/whirr/compute/BootstrapTemplateTest.java URL: http://svn.apache.org/viewvc/whirr/trunk/core/src/test/java/org/apache/whirr/compute/BootstrapTemplateTest.java?rev=1372568&r1=1372567&r2=1372568&view=diff ============================================================================== --- whirr/trunk/core/src/test/java/org/apache/whirr/compute/BootstrapTemplateTest.java (original) +++ whirr/trunk/core/src/test/java/org/apache/whirr/compute/BootstrapTemplateTest.java Mon Aug 13 19:49:06 2012 @@ -38,6 +38,7 @@ import org.jclouds.aws.ec2.compute.AWSEC import org.jclouds.aws.ec2.compute.AWSEC2TemplateOptions; import org.jclouds.compute.ComputeService; import org.jclouds.compute.ComputeServiceContext; +import org.jclouds.compute.domain.Image; import org.jclouds.compute.domain.Template; import org.jclouds.compute.domain.TemplateBuilder; import org.jclouds.compute.options.TemplateOptions; @@ -127,8 +128,10 @@ private void assertSpotPriceIs( Template template = mock(Template.class); TemplateOptions options = mock(TemplateOptions.class); + Image image = mock(Image.class); when(templateBuilder.build()).thenReturn(template); when(template.getOptions()).thenReturn(options); + when(template.getImage()).thenReturn(image); AWSEC2TemplateOptions awsEec2TemplateOptions = mock(AWSEC2TemplateOptions.class); when(options.as((Class<TemplateOptions>) any())).thenReturn(awsEec2TemplateOptions); Modified: whirr/trunk/core/src/test/java/org/apache/whirr/service/TemplateBuilderStrategyTest.java URL: http://svn.apache.org/viewvc/whirr/trunk/core/src/test/java/org/apache/whirr/service/TemplateBuilderStrategyTest.java?rev=1372568&r1=1372567&r2=1372568&view=diff ============================================================================== --- whirr/trunk/core/src/test/java/org/apache/whirr/service/TemplateBuilderStrategyTest.java (original) +++ whirr/trunk/core/src/test/java/org/apache/whirr/service/TemplateBuilderStrategyTest.java Mon Aug 13 19:49:06 2012 @@ -44,6 +44,7 @@ public class TemplateBuilderStrategyTest @Before public void setUp() throws ConfigurationException, JSchException, IOException { spec = ClusterSpec.withTemporaryKeys(); + spec.setProvider("my-provider"); instanceTemplate = mock(InstanceTemplate.class); } Modified: whirr/trunk/pom.xml URL: http://svn.apache.org/viewvc/whirr/trunk/pom.xml?rev=1372568&r1=1372567&r2=1372568&view=diff ============================================================================== --- whirr/trunk/pom.xml (original) +++ whirr/trunk/pom.xml Mon Aug 13 19:49:06 2012 @@ -348,23 +348,7 @@ </archive> </configuration> </plugin> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-javadoc-plugin</artifactId> - <version>2.8.1</version> - <configuration> - <excludePackageNames>org.jclouds.*</excludePackageNames> - </configuration> - <executions> - <execution> - <id>aggregate</id> - <goals> - <goal>aggregate</goal> - </goals> - <phase>site</phase> - </execution> - </executions> - </plugin> + <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-remote-resources-plugin</artifactId> @@ -411,6 +395,7 @@ <groupId>org.apache.rat</groupId> <artifactId>apache-rat-plugin</artifactId> <version>0.8</version> + <inherited>false</inherited> <executions> <execution> <phase>package</phase> @@ -604,12 +589,32 @@ <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-site-plugin</artifactId> <version>3.0</version> + <inherited>false</inherited> <configuration> - <!-- Reports are generated at the site level --> - <generateReports>false</generateReports> <templateDirectory>${basedir}/src/site/resources</templateDirectory> <template>site.vm</template> <relativizeDecorationLinks>false</relativizeDecorationLinks> + <reportPlugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-javadoc-plugin</artifactId> + <version>2.8.1</version> + <configuration> + <encoding>${project.build.sourceEncoding}</encoding> + <quiet>true</quiet> + <maxmemory>256m</maxmemory> + </configuration> + <reportSets> + <reportSet> + <id>default</id> + <reports> + <report>javadoc</report> + <report>aggregate</report> + </reports> + </reportSet> + </reportSets> + </plugin> + </reportPlugins> </configuration> </plugin> </plugins> @@ -701,13 +706,6 @@ </configuration> <executions> <execution> - <id>aggregate</id> - <goals> - <goal>aggregate</goal> - </goals> - <phase>site</phase> - </execution> - <execution> <id>attach-javadocs</id> <goals> <goal>jar</goal> Modified: whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hadoop.sh URL: http://svn.apache.org/viewvc/whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hadoop.sh?rev=1372568&r1=1372567&r2=1372568&view=diff ============================================================================== --- whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hadoop.sh (original) +++ whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hadoop.sh Mon Aug 13 19:49:06 2012 @@ -26,15 +26,6 @@ function configure_cdh_hadoop() { ROLES=$1 shift - case $CLOUD_PROVIDER in - ec2 | aws-ec2 ) - # Alias /mnt as /data - if [ ! -e /data ]; then ln -s /mnt /data; fi - ;; - *) - ;; - esac - REPO=${REPO:-cdh4} CDH_MAJOR_VERSION=$(echo $REPO | sed -e 's/cdh\([0-9]\).*/\1/') if [ $CDH_MAJOR_VERSION = "4" ]; then @@ -49,11 +40,7 @@ function configure_cdh_hadoop() { MAPREDUCE_PACKAGE_PREFIX=hadoop-${HADOOP_VERSION:-0.20} fi - mkdir -p /data/hadoop - chgrp hadoop /data/hadoop - chmod g+w /data/hadoop - mkdir /data/tmp - chmod a+rwxt /data/tmp + make_hadoop_dirs /data* # Copy generated configuration files in place cp /tmp/{core,hdfs,mapred}-site.xml $HADOOP_CONF_DIR @@ -105,6 +92,19 @@ function configure_cdh_hadoop() { } +function make_hadoop_dirs { + for mount in "$@"; do + if [ ! -e $mount/hadoop ]; then + mkdir -p $mount/hadoop + chown hadoop:hadoop $mount/hadoop + fi + if [ ! -e $mount/tmp ]; then + mkdir $mount/tmp + chmod a+rwxt $mount/tmp + fi + done +} + function start_namenode() { if which dpkg &> /dev/null; then retry_apt_get -y install $HDFS_PACKAGE_PREFIX-namenode Modified: whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hbase.sh URL: http://svn.apache.org/viewvc/whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hbase.sh?rev=1372568&r1=1372567&r2=1372568&view=diff ============================================================================== --- whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hbase.sh (original) +++ whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hbase.sh Mon Aug 13 19:49:06 2012 @@ -54,21 +54,7 @@ function configure_cdh_hbase() { HBASE_PREFIX=hadoop- fi - case $CLOUD_PROVIDER in - ec2 | aws-ec2 ) - # Alias /mnt as /data - if [ ! -e /data ]; then ln -s /mnt /data; fi - ;; - *) - ;; - esac - - mkdir -p /data/hbase - chown hbase:hbase /data/hbase - if [ ! -e /data/tmp ]; then - mkdir /data/tmp - chmod a+rwxt /data/tmp - fi + make_hbase_dirs /data* # Copy generated configuration files in place cp /tmp/hbase-site.xml $HBASE_CONF_DIR @@ -147,3 +133,17 @@ function install_hbase_daemon() { retry_yum install -y $daemon fi } + + +function make_hbase_dirs { + for mount in "$@"; do + if [ ! -e $mount/hbase ]; then + mkdir -p $mount/hbase + chown hbase:hbase $mount/hbase + fi + if [ ! -e $mount/tmp ]; then + mkdir $mount/tmp + chmod a+rwxt $mount/tmp + fi + done +} Modified: whirr/trunk/services/cdh/src/main/resources/functions/install_cdh_hbase.sh URL: http://svn.apache.org/viewvc/whirr/trunk/services/cdh/src/main/resources/functions/install_cdh_hbase.sh?rev=1372568&r1=1372567&r2=1372568&view=diff ============================================================================== --- whirr/trunk/services/cdh/src/main/resources/functions/install_cdh_hbase.sh (original) +++ whirr/trunk/services/cdh/src/main/resources/functions/install_cdh_hbase.sh Mon Aug 13 19:49:06 2012 @@ -69,15 +69,6 @@ function install_cdh_hbase() { esac done - case $CLOUD_PROVIDER in - ec2 | aws-ec2 ) - # Alias /mnt as /data - if [ ! -e /data ]; then ln -s /mnt /data; fi - ;; - *) - ;; - esac - REPO=${REPO:-cdh4} REPO_HOST=${REPO_HOST:-archive.cloudera.com} HBASE_HOME=/usr/lib/hbase Modified: whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopClusterActionHandler.java URL: http://svn.apache.org/viewvc/whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopClusterActionHandler.java?rev=1372568&r1=1372567&r2=1372568&view=diff ============================================================================== --- whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopClusterActionHandler.java (original) +++ whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopClusterActionHandler.java Mon Aug 13 19:49:06 2012 @@ -25,7 +25,9 @@ import static org.apache.whirr.service.h import static org.jclouds.scriptbuilder.domain.Statements.call; import com.google.common.base.Joiner; +import com.google.common.collect.Iterables; import java.io.IOException; +import java.util.Map; import java.util.Set; import org.apache.commons.configuration.Configuration; import org.apache.commons.configuration.ConfigurationException; @@ -80,6 +82,16 @@ public abstract class HadoopClusterActio "-u", tarball)); } + protected Map<String, String> getDeviceMappings(ClusterActionEvent event) { + Set<Instance> instances = event.getCluster().getInstancesMatching(RolePredicates.role(getRole())); + Instance prototype = Iterables.getFirst(instances, null); + if (prototype == null) { + throw new IllegalStateException("No instances found in role " + getRole()); + } + VolumeManager volumeManager = new VolumeManager(); + return volumeManager.getDeviceMappings(event.getClusterSpec(), prototype); + } + @Override protected void beforeConfigure(ClusterActionEvent event) throws IOException, InterruptedException { @@ -102,11 +114,12 @@ public abstract class HadoopClusterActio private void createHadoopConfigFiles(ClusterActionEvent event, ClusterSpec clusterSpec, Cluster cluster) throws IOException { + Map<String, String> deviceMappings = getDeviceMappings(event); try { event.getStatementBuilder().addStatements( buildCommon("/tmp/core-site.xml", clusterSpec, cluster), - buildHdfs("/tmp/hdfs-site.xml", clusterSpec, cluster), - buildMapReduce("/tmp/mapred-site.xml", clusterSpec, cluster), + buildHdfs("/tmp/hdfs-site.xml", clusterSpec, cluster, deviceMappings.keySet()), + buildMapReduce("/tmp/mapred-site.xml", clusterSpec, cluster, deviceMappings.keySet()), buildHadoopEnv("/tmp/hadoop-env.sh", clusterSpec, cluster), TemplateUtils.createFileFromTemplate("/tmp/hadoop-metrics.properties", event.getTemplateEngine(), getMetricsTemplate(event, clusterSpec, cluster), clusterSpec, cluster) ); @@ -114,6 +127,8 @@ public abstract class HadoopClusterActio } catch (ConfigurationException e) { throw new IOException(e); } + String devMappings = VolumeManager.asString(deviceMappings); + addStatement(event, call("prepare_all_disks", "'" + devMappings + "'")); } private String getMetricsTemplate(ClusterActionEvent event, ClusterSpec clusterSpec, Cluster cluster) { Modified: whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilder.java URL: http://svn.apache.org/viewvc/whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilder.java?rev=1372568&r1=1372567&r2=1372568&view=diff ============================================================================== --- whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilder.java (original) +++ whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilder.java Mon Aug 13 19:49:06 2012 @@ -21,7 +21,10 @@ package org.apache.whirr.service.hadoop; import static org.apache.whirr.RolePredicates.role; import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Function; +import com.google.common.base.Joiner; import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; import java.io.IOException; import java.util.Set; @@ -50,6 +53,7 @@ public class HadoopConfigurationBuilder Configuration defaults, String prefix) throws ConfigurationException { CompositeConfiguration config = new CompositeConfiguration(); + config.setDelimiterParsingDisabled(true); Configuration sub = clusterSpec.getConfigurationForKeysWithPrefix(prefix); config.addConfiguration(sub.subset(prefix)); // remove prefix config.addConfiguration(defaults.subset(prefix)); @@ -64,16 +68,16 @@ public class HadoopConfigurationBuilder } public static Statement buildHdfs(String path, ClusterSpec clusterSpec, - Cluster cluster) throws ConfigurationException, IOException { + Cluster cluster, Set<String> dataDirectories) throws ConfigurationException, IOException { Configuration config = buildHdfsConfiguration(clusterSpec, cluster, - new PropertiesConfiguration(WHIRR_HADOOP_DEFAULT_PROPERTIES)); + new PropertiesConfiguration(WHIRR_HADOOP_DEFAULT_PROPERTIES), dataDirectories); return HadoopConfigurationConverter.asCreateXmlConfigurationFileStatement(path, config); } public static Statement buildMapReduce(String path, ClusterSpec clusterSpec, - Cluster cluster) throws ConfigurationException, IOException { + Cluster cluster, Set<String> dataDirectories) throws ConfigurationException, IOException { Configuration config = buildMapReduceConfiguration(clusterSpec, cluster, - new PropertiesConfiguration(WHIRR_HADOOP_DEFAULT_PROPERTIES)); + new PropertiesConfiguration(WHIRR_HADOOP_DEFAULT_PROPERTIES), dataDirectories); return HadoopConfigurationConverter.asCreateXmlConfigurationFileStatement(path, config); } @@ -100,16 +104,27 @@ public class HadoopConfigurationBuilder @VisibleForTesting static Configuration buildHdfsConfiguration(ClusterSpec clusterSpec, - Cluster cluster, Configuration defaults) throws ConfigurationException { - return build(clusterSpec, cluster, defaults, "hadoop-hdfs"); + Cluster cluster, Configuration defaults, Set<String> dataDirectories) throws ConfigurationException { + Configuration config = build(clusterSpec, cluster, defaults, "hadoop-hdfs"); + + setIfAbsent(config, "dfs.data.dir", + appendToDataDirectories(dataDirectories, "/hadoop/hdfs/data")); + setIfAbsent(config, "dfs.name.dir", + appendToDataDirectories(dataDirectories, "/hadoop/hdfs/name")); + setIfAbsent(config, "fs.checkpoint.dir", + appendToDataDirectories(dataDirectories, "/hadoop/hdfs/secondary")); + return config; } @VisibleForTesting static Configuration buildMapReduceConfiguration(ClusterSpec clusterSpec, - Cluster cluster, Configuration defaults) throws ConfigurationException, IOException { + Cluster cluster, Configuration defaults, Set<String> dataDirectories) throws ConfigurationException, IOException { Configuration config = build(clusterSpec, cluster, defaults, "hadoop-mapreduce"); + setIfAbsent(config, "mapred.local.dir", + appendToDataDirectories(dataDirectories, "/hadoop/mapred/local")); + Set<Instance> taskTrackers = cluster .getInstancesMatching(role(HadoopTaskTrackerClusterActionHandler.ROLE)); @@ -158,5 +173,15 @@ public class HadoopConfigurationBuilder config.setProperty(property, value); } } + + private static String appendToDataDirectories(Set<String> dataDirectories, final String suffix) { + return Joiner.on(',').join(Lists.transform(Lists.newArrayList(dataDirectories), + new Function<String, String>() { + @Override public String apply(String input) { + return input + suffix; + } + } + )); + } } Added: whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/VolumeManager.java URL: http://svn.apache.org/viewvc/whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/VolumeManager.java?rev=1372568&view=auto ============================================================================== --- whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/VolumeManager.java (added) +++ whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/VolumeManager.java Mon Aug 13 19:49:06 2012 @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.whirr.service.hadoop; + +import com.google.common.collect.Maps; + +import java.util.List; +import java.util.Map; + +import org.apache.whirr.Cluster.Instance; +import org.apache.whirr.ClusterSpec; +import org.jclouds.compute.domain.Hardware; +import org.jclouds.compute.domain.Volume; + +public class VolumeManager { + + public static final String MOUNT_PREFIX = "/data"; + + public Map<String, String> getDeviceMappings(ClusterSpec clusterSpec, Instance instance) { + Map<String, String> mappings = Maps.newLinkedHashMap(); + int number = 0; + Hardware hardware = instance.getNodeMetadata().getHardware(); + + /* null when using the BYON jclouds compute provider */ + if (hardware != null) { + List<? extends Volume> volumes = + instance.getNodeMetadata().getHardware().getVolumes(); + for (Volume volume : volumes) { + if (volume.isBootDevice()) { + continue; + } + + mappings.put(MOUNT_PREFIX + number++, volume.getDevice()); + } + } + return mappings; + } + + public static String asString(Map<String, String> deviceMappings) { + StringBuilder sb = new StringBuilder(); + for (Map.Entry<String, String> mapping : deviceMappings.entrySet()) { + if (sb.length() > 0) { + sb.append(";"); + } + sb.append(mapping.getKey()).append(",").append(mapping.getValue()); + } + return sb.toString(); + } +} Propchange: whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/VolumeManager.java ------------------------------------------------------------------------------ svn:eol-style = native Modified: whirr/trunk/services/hadoop/src/main/resources/functions/configure_hadoop.sh URL: http://svn.apache.org/viewvc/whirr/trunk/services/hadoop/src/main/resources/functions/configure_hadoop.sh?rev=1372568&r1=1372567&r2=1372568&view=diff ============================================================================== --- whirr/trunk/services/hadoop/src/main/resources/functions/configure_hadoop.sh (original) +++ whirr/trunk/services/hadoop/src/main/resources/functions/configure_hadoop.sh Mon Aug 13 19:49:06 2012 @@ -26,24 +26,10 @@ function configure_hadoop() { ROLES=$1 shift - case $CLOUD_PROVIDER in - ec2 | aws-ec2 ) - # Alias /mnt as /data - ln -s /mnt /data - ;; - *) - ;; - esac - HADOOP_HOME=/usr/local/hadoop HADOOP_CONF_DIR=$HADOOP_HOME/conf - mkdir -p /data/hadoop - chown hadoop:hadoop /data/hadoop - if [ ! -e /data/tmp ]; then - mkdir /data/tmp - chmod a+rwxt /data/tmp - fi + make_hadoop_dirs /data* mkdir /etc/hadoop ln -s $HADOOP_CONF_DIR /etc/hadoop/conf @@ -94,6 +80,19 @@ function configure_hadoop() { } +function make_hadoop_dirs { + for mount in "$@"; do + if [ ! -e $mount/hadoop ]; then + mkdir -p $mount/hadoop + chown hadoop:hadoop $mount/hadoop + fi + if [ ! -e $mount/tmp ]; then + mkdir $mount/tmp + chmod a+rwxt $mount/tmp + fi + done +} + function start_namenode() { if which dpkg &> /dev/null; then AS_HADOOP="su -s /bin/bash - hadoop -c" Added: whirr/trunk/services/hadoop/src/main/resources/functions/prepare_all_disks.sh URL: http://svn.apache.org/viewvc/whirr/trunk/services/hadoop/src/main/resources/functions/prepare_all_disks.sh?rev=1372568&view=auto ============================================================================== --- whirr/trunk/services/hadoop/src/main/resources/functions/prepare_all_disks.sh (added) +++ whirr/trunk/services/hadoop/src/main/resources/functions/prepare_all_disks.sh Mon Aug 13 19:49:06 2012 @@ -0,0 +1,84 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +set -x + +# This function ensures that all the mount directories in the mapping string +# are available to be used. This is achieved by formatting, mounting, and +# symlinking (if the volume is already mounted as another directory). +# +# E.g. suppose the mapping string were /data0,/dev/sdb;/data1,/dev/sdc +# and /dev/sdb were mounted on /mnt, and /dev/sdc was not mounted or formatted. +# In this case a symlink would be created from /data0 to /mnt. /dev/sdc would +# be formatted, then mounted on /data1. +function prepare_all_disks() { + for mapping in $(echo "$1" | tr ";" "\n"); do + # Split on the comma (see "Parameter Expansion" in the bash man page) + mount=${mapping%,*} + device=${mapping#*,} + prep_disk $mount $device + done + # Make sure there's at least a /data0 and /data (on the root filesystem) + if [ ! -e /data0 ]; then + if [ -e /data ]; then + ln -s /data /data0 + else + mkdir /data0 + ln -s /data0 /data + fi + else + if [ ! -e /data ]; then + ln -s /data0 /data + fi + fi +} + +function prep_disk() { + mount=$1 + device=$2 + automount=${3:-false} + + # is device formatted? + if [ $(mountpoint -q -x $device) ]; then + echo "$device is formatted" + else + if which dpkg &> /dev/null; then + apt-get install -y xfsprogs + elif which rpm &> /dev/null; then + yum install -y xfsprogs + fi + echo "warning: ERASING CONTENTS OF $device" + mkfs.xfs -f $device + fi + # is device mounted? + mount | grep -q $device + if [ $? == 0 ]; then + echo "$device is mounted" + if [ ! -d $mount ]; then + echo "Symlinking to $mount" + ln -s $(grep $device /proc/mounts | awk '{print $2}') $mount + fi + else + echo "Mounting $device on $mount" + if [ ! -e $mount ]; then + mkdir $mount + fi + mount -o defaults,noatime $device $mount + if $automount ; then + echo "$device $mount xfs defaults,noatime 0 0" >> /etc/fstab + fi + fi +} Propchange: whirr/trunk/services/hadoop/src/main/resources/functions/prepare_all_disks.sh ------------------------------------------------------------------------------ svn:eol-style = native Added: whirr/trunk/services/hadoop/src/main/resources/functions/prepare_disks.sh URL: http://svn.apache.org/viewvc/whirr/trunk/services/hadoop/src/main/resources/functions/prepare_disks.sh?rev=1372568&view=auto ============================================================================== --- whirr/trunk/services/hadoop/src/main/resources/functions/prepare_disks.sh (added) +++ whirr/trunk/services/hadoop/src/main/resources/functions/prepare_disks.sh Mon Aug 13 19:49:06 2012 @@ -0,0 +1,49 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +set -x + +# This function ensures that all the mount directories in the mapping string +# whose devices are already mounted are available to be used. Symlinks are +# created as necessary. +# +# E.g. suppose the mapping string were /data0,/dev/sdb;/data1,/dev/sdc +# and /dev/sdb were mounted on /mnt, and /dev/sdc was not mounted (possibly +# not even formatted). +# In this case a symlink would be created from /data0 to /mnt. /data1 would +# be created. +function prepare_disks() { + for mapping in $(echo "$1" | tr ";" "\n"); do + # Split on the comma (see "Parameter Expansion" in the bash man page) + mount=${mapping%,*} + device=${mapping#*,} + prep_disk $mount $device + done +} + +function prep_disk() { + mount=$1 + device=$2 + # is device mounted? + mount | grep -q $device + if [ $? == 0 ]; then + echo "$device is mounted" + if [ ! -d $mount ]; then + echo "Symlinking to $mount" + ln -s $(grep $device /proc/mounts | awk '{print $2}') $mount + fi + fi +} Propchange: whirr/trunk/services/hadoop/src/main/resources/functions/prepare_disks.sh ------------------------------------------------------------------------------ svn:eol-style = native Modified: whirr/trunk/services/hadoop/src/main/resources/whirr-hadoop-default.properties URL: http://svn.apache.org/viewvc/whirr/trunk/services/hadoop/src/main/resources/whirr-hadoop-default.properties?rev=1372568&r1=1372567&r2=1372568&view=diff ============================================================================== --- whirr/trunk/services/hadoop/src/main/resources/whirr-hadoop-default.properties (original) +++ whirr/trunk/services/hadoop/src/main/resources/whirr-hadoop-default.properties Mon Aug 13 19:49:06 2012 @@ -32,13 +32,9 @@ hadoop-common.fs.trash.interval=1440 # HDFS hadoop-hdfs.dfs.block.size=134217728 -hadoop-hdfs.dfs.data.dir=/data/hadoop/hdfs/data hadoop-hdfs.dfs.datanode.du.reserved=1073741824 -hadoop-hdfs.dfs.name.dir=/data/hadoop/hdfs/name -hadoop-hdfs.fs.checkpoint.dir=/data/hadoop/hdfs/secondary # MR -hadoop-mapreduce.mapred.local.dir=/data/hadoop/mapred/local hadoop-mapreduce.mapred.map.tasks.speculative.execution=true hadoop-mapreduce.mapred.reduce.tasks.speculative.execution=false hadoop-mapreduce.mapred.system.dir=/hadoop/system/mapred Modified: whirr/trunk/services/hadoop/src/test/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilderTest.java URL: http://svn.apache.org/viewvc/whirr/trunk/services/hadoop/src/test/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilderTest.java?rev=1372568&r1=1372567&r2=1372568&view=diff ============================================================================== --- whirr/trunk/services/hadoop/src/test/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilderTest.java (original) +++ whirr/trunk/services/hadoop/src/test/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilderTest.java Mon Aug 13 19:49:06 2012 @@ -23,6 +23,8 @@ import com.google.common.collect.Immutab import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSet.Builder; import com.google.common.collect.Iterators; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; import org.apache.commons.configuration.Configuration; import org.apache.commons.configuration.PropertiesConfiguration; import org.apache.whirr.Cluster; @@ -142,21 +144,27 @@ public class HadoopConfigurationBuilderT @Test public void testHdfs() throws Exception { Configuration conf = HadoopConfigurationBuilder.buildHdfsConfiguration( - clusterSpec, cluster, defaults); - assertThat(Iterators.size(conf.getKeys()), is(1)); + clusterSpec, cluster, defaults, + Sets.newLinkedHashSet(Lists.newArrayList("/data0", "/data1"))); + assertThat(Iterators.size(conf.getKeys()), is(4)); assertThat(conf.getString("p1"), is("hdfs1")); + assertThat(conf.getString("dfs.data.dir"), + is("/data0/hadoop/hdfs/data,/data1/hadoop/hdfs/data")); } @Test public void testMapReduce() throws Exception { Cluster cluster = newCluster(5); Configuration conf = HadoopConfigurationBuilder - .buildMapReduceConfiguration(clusterSpec, cluster, defaults); + .buildMapReduceConfiguration(clusterSpec, cluster, defaults, + Sets.newLinkedHashSet(Lists.newArrayList("/data0", "/data1"))); assertThat(conf.getString("p1"), is("mapred1")); assertThat(conf.getString("mapred.job.tracker"), matches(".+:8021")); assertThat(conf.getString("mapred.tasktracker.map.tasks.maximum"), is("4")); assertThat(conf.getString("mapred.tasktracker.reduce.tasks.maximum"), is("3")); assertThat(conf.getString("mapred.reduce.tasks"), is("15")); + assertThat(conf.getString("mapred.local.dir"), + is("/data0/hadoop/mapred/local,/data1/hadoop/mapred/local")); } @Test @@ -165,7 +173,8 @@ public class HadoopConfigurationBuilderT overrides.addProperty("hadoop-mapreduce.mapred.tasktracker.map.tasks.maximum", "70"); clusterSpec = ClusterSpec.withNoDefaults(overrides); Configuration conf = HadoopConfigurationBuilder.buildMapReduceConfiguration( - clusterSpec, cluster, defaults); + clusterSpec, cluster, defaults, + Sets.newLinkedHashSet(Lists.newArrayList("/data0", "/data1"))); assertThat(conf.getString("mapred.tasktracker.map.tasks.maximum"), is("70")); } @@ -175,7 +184,8 @@ public class HadoopConfigurationBuilderT overrides.addProperty("hadoop-mapreduce.mapred.reduce.tasks", "7"); clusterSpec = ClusterSpec.withNoDefaults(overrides); Configuration conf = HadoopConfigurationBuilder.buildMapReduceConfiguration( - clusterSpec, cluster, defaults); + clusterSpec, cluster, defaults, + Sets.newLinkedHashSet(Lists.newArrayList("/data0", "/data1"))); assertThat(conf.getString("mapred.reduce.tasks"), is("7")); } Modified: whirr/trunk/services/yarn/src/main/resources/functions/configure_hadoop_mr2.sh URL: http://svn.apache.org/viewvc/whirr/trunk/services/yarn/src/main/resources/functions/configure_hadoop_mr2.sh?rev=1372568&r1=1372567&r2=1372568&view=diff ============================================================================== --- whirr/trunk/services/yarn/src/main/resources/functions/configure_hadoop_mr2.sh (original) +++ whirr/trunk/services/yarn/src/main/resources/functions/configure_hadoop_mr2.sh Mon Aug 13 19:49:06 2012 @@ -28,26 +28,12 @@ function configure_hadoop_mr2() { ROLES=$1 shift - case $CLOUD_PROVIDER in - ec2 | aws-ec2 ) - # Alias /mnt as /data - ln -s /mnt /data - ;; - *) - ;; - esac - HADOOP_COMMON_HOME=$HADOOP_HOME HADOOP_HDFS_HOME=$HADOOP_HOME HADOOP_HOME=/usr/local/hadoop HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop - mkdir -p /data/hadoop - chown hadoop:hadoop /data/hadoop - if [ ! -e /data/tmp ]; then - mkdir /data/tmp - chmod a+rwxt /data/tmp - fi + make_hadoop_dirs /data* mkdir /etc/hadoop ln -s $HADOOP_CONF_DIR /etc/hadoop/conf @@ -101,6 +87,19 @@ function configure_hadoop_mr2() { } +function make_hadoop_dirs { + for mount in "$@"; do + if [ ! -e $mount/hadoop ]; then + mkdir -p $mount/hadoop + chown hadoop:hadoop $mount/hadoop + fi + if [ ! -e $mount/tmp ]; then + mkdir $mount/tmp + chmod a+rwxt $mount/tmp + fi + done +} + function start_namenode() { if which dpkg &> /dev/null; then AS_HADOOP="su -s /bin/bash - hadoop -c"