Author: abayer
Date: Mon Aug 13 19:49:06 2012
New Revision: 1372568

URL: http://svn.apache.org/viewvc?rev=1372568&view=rev
Log:
WHIRR-189. Hadoop on EC2 should use all available storage.

Added:
    
whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/VolumeManager.java
   (with props)
    
whirr/trunk/services/hadoop/src/main/resources/functions/prepare_all_disks.sh   
(with props)
    whirr/trunk/services/hadoop/src/main/resources/functions/prepare_disks.sh   
(with props)
Modified:
    whirr/trunk/CHANGES.txt
    
whirr/trunk/core/src/main/java/org/apache/whirr/compute/BootstrapTemplate.java
    
whirr/trunk/core/src/test/java/org/apache/whirr/compute/BootstrapTemplateTest.java
    
whirr/trunk/core/src/test/java/org/apache/whirr/service/TemplateBuilderStrategyTest.java
    whirr/trunk/pom.xml
    
whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hadoop.sh
    whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hbase.sh
    whirr/trunk/services/cdh/src/main/resources/functions/install_cdh_hbase.sh
    
whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopClusterActionHandler.java
    
whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilder.java
    whirr/trunk/services/hadoop/src/main/resources/functions/configure_hadoop.sh
    
whirr/trunk/services/hadoop/src/main/resources/whirr-hadoop-default.properties
    
whirr/trunk/services/hadoop/src/test/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilderTest.java
    
whirr/trunk/services/yarn/src/main/resources/functions/configure_hadoop_mr2.sh

Modified: whirr/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/whirr/trunk/CHANGES.txt?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
--- whirr/trunk/CHANGES.txt (original)
+++ whirr/trunk/CHANGES.txt Mon Aug 13 19:49:06 2012
@@ -15,6 +15,8 @@ Trunk (unreleased changes)
 
   IMPROVEMENTS
 
+    WHIRR-189. Hadoop on EC2 should use all available storage. (abayer)
+
     WHIRR-63. Support EC2 Cluster Compute groups for Hadoop etc. (abayer)
 
     WHIRR-573. Allow configuring REPO_HOST for CDH repositories. (abayer)

Modified: 
whirr/trunk/core/src/main/java/org/apache/whirr/compute/BootstrapTemplate.java
URL: 
http://svn.apache.org/viewvc/whirr/trunk/core/src/main/java/org/apache/whirr/compute/BootstrapTemplate.java?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
--- 
whirr/trunk/core/src/main/java/org/apache/whirr/compute/BootstrapTemplate.java 
(original)
+++ 
whirr/trunk/core/src/main/java/org/apache/whirr/compute/BootstrapTemplate.java 
Mon Aug 13 19:49:06 2012
@@ -19,6 +19,7 @@
 package org.apache.whirr.compute;
 
 import static org.jclouds.compute.options.TemplateOptions.Builder.runScript;
+import static org.jclouds.ec2.domain.RootDeviceType.EBS;
 import static org.jclouds.scriptbuilder.domain.Statements.appendFile;
 import static 
org.jclouds.scriptbuilder.domain.Statements.createOrOverwriteFile;
 import static org.jclouds.scriptbuilder.domain.Statements.interpret;
@@ -31,6 +32,9 @@ import org.apache.whirr.service.jclouds.
 import org.apache.whirr.service.jclouds.TemplateBuilderStrategy;
 import org.jclouds.aws.ec2.AWSEC2ApiMetadata;
 import org.jclouds.aws.ec2.compute.AWSEC2TemplateOptions;
+import org.jclouds.ec2.EC2ApiMetadata;
+import org.jclouds.ec2.compute.options.EC2TemplateOptions;
+import org.jclouds.ec2.compute.predicates.EC2ImagePredicates;
 import org.jclouds.compute.ComputeService;
 import org.jclouds.compute.ComputeServiceContext;
 import org.jclouds.compute.domain.Template;
@@ -108,10 +112,25 @@ public class BootstrapTemplate {
       }
     }
 
-    return setPlacementGroup(context, spec, template, instanceTemplate);
+    return mapEphemeralIfImageIsEBSBacked(context, spec, template, 
instanceTemplate);
   }
 
     /**
+     * If this is an EBS-backed volume, map the ephemeral device.
+     */
+    private static Template 
mapEphemeralIfImageIsEBSBacked(ComputeServiceContext context,
+                                                           ClusterSpec spec,
+                                                           Template template,
+                                                           InstanceTemplate 
instanceTemplate) {
+        if 
(EC2ApiMetadata.CONTEXT_TOKEN.isAssignableFrom(context.getBackendType())) {
+            if 
(EC2ImagePredicates.rootDeviceType(EBS).apply(template.getImage())) {
+                
template.getOptions().as(EC2TemplateOptions.class).mapEphemeralDeviceToDeviceName("/dev/sdc",
 "ephemeral1");
+            }
+        }
+        return setPlacementGroup(context, spec, template, instanceTemplate);
+    }
+    
+    /**
      * Set the placement group, if desired - if it doesn't already exist, 
create it.
      */
     private static Template setPlacementGroup(ComputeServiceContext context, 
ClusterSpec spec,

Modified: 
whirr/trunk/core/src/test/java/org/apache/whirr/compute/BootstrapTemplateTest.java
URL: 
http://svn.apache.org/viewvc/whirr/trunk/core/src/test/java/org/apache/whirr/compute/BootstrapTemplateTest.java?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
--- 
whirr/trunk/core/src/test/java/org/apache/whirr/compute/BootstrapTemplateTest.java
 (original)
+++ 
whirr/trunk/core/src/test/java/org/apache/whirr/compute/BootstrapTemplateTest.java
 Mon Aug 13 19:49:06 2012
@@ -38,6 +38,7 @@ import org.jclouds.aws.ec2.compute.AWSEC
 import org.jclouds.aws.ec2.compute.AWSEC2TemplateOptions;
 import org.jclouds.compute.ComputeService;
 import org.jclouds.compute.ComputeServiceContext;
+import org.jclouds.compute.domain.Image;
 import org.jclouds.compute.domain.Template;
 import org.jclouds.compute.domain.TemplateBuilder;
 import org.jclouds.compute.options.TemplateOptions;
@@ -127,8 +128,10 @@ private void assertSpotPriceIs(
 
     Template template = mock(Template.class);
     TemplateOptions options = mock(TemplateOptions.class);
+    Image image = mock(Image.class);
     when(templateBuilder.build()).thenReturn(template);
     when(template.getOptions()).thenReturn(options);
+    when(template.getImage()).thenReturn(image);
 
     AWSEC2TemplateOptions awsEec2TemplateOptions = 
mock(AWSEC2TemplateOptions.class);
     when(options.as((Class<TemplateOptions>) 
any())).thenReturn(awsEec2TemplateOptions);

Modified: 
whirr/trunk/core/src/test/java/org/apache/whirr/service/TemplateBuilderStrategyTest.java
URL: 
http://svn.apache.org/viewvc/whirr/trunk/core/src/test/java/org/apache/whirr/service/TemplateBuilderStrategyTest.java?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
--- 
whirr/trunk/core/src/test/java/org/apache/whirr/service/TemplateBuilderStrategyTest.java
 (original)
+++ 
whirr/trunk/core/src/test/java/org/apache/whirr/service/TemplateBuilderStrategyTest.java
 Mon Aug 13 19:49:06 2012
@@ -44,6 +44,7 @@ public class TemplateBuilderStrategyTest
   @Before
   public void setUp() throws ConfigurationException, JSchException, 
IOException {
     spec = ClusterSpec.withTemporaryKeys();
+    spec.setProvider("my-provider");
     instanceTemplate = mock(InstanceTemplate.class);
   }
 

Modified: whirr/trunk/pom.xml
URL: 
http://svn.apache.org/viewvc/whirr/trunk/pom.xml?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
--- whirr/trunk/pom.xml (original)
+++ whirr/trunk/pom.xml Mon Aug 13 19:49:06 2012
@@ -348,23 +348,7 @@
           </archive>
         </configuration>
       </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-javadoc-plugin</artifactId>
-        <version>2.8.1</version>
-        <configuration>
-          <excludePackageNames>org.jclouds.*</excludePackageNames>
-        </configuration>
-        <executions>
-          <execution>
-            <id>aggregate</id>
-            <goals>
-              <goal>aggregate</goal>
-            </goals>
-            <phase>site</phase>
-          </execution>
-        </executions>
-      </plugin>
+
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-remote-resources-plugin</artifactId>
@@ -411,6 +395,7 @@
         <groupId>org.apache.rat</groupId>
         <artifactId>apache-rat-plugin</artifactId>
         <version>0.8</version>
+        <inherited>false</inherited>
         <executions>
           <execution>
             <phase>package</phase>
@@ -604,12 +589,32 @@
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-site-plugin</artifactId>
           <version>3.0</version>
+          <inherited>false</inherited>
           <configuration>
-            <!-- Reports are generated at the site level -->
-            <generateReports>false</generateReports>
             
<templateDirectory>${basedir}/src/site/resources</templateDirectory>
             <template>site.vm</template>
             <relativizeDecorationLinks>false</relativizeDecorationLinks>
+            <reportPlugins>
+              <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-javadoc-plugin</artifactId>
+                <version>2.8.1</version>
+                <configuration>
+                  <encoding>${project.build.sourceEncoding}</encoding>
+                  <quiet>true</quiet>
+                  <maxmemory>256m</maxmemory>
+                </configuration>
+                <reportSets>
+                  <reportSet>
+                    <id>default</id>
+                    <reports>
+                      <report>javadoc</report>
+                      <report>aggregate</report>
+                    </reports>
+                  </reportSet>
+                </reportSets>
+              </plugin>
+            </reportPlugins>
           </configuration>
         </plugin>
       </plugins>
@@ -701,13 +706,6 @@
             </configuration>
             <executions>
               <execution>
-                <id>aggregate</id>
-                <goals>
-                  <goal>aggregate</goal>
-                </goals>
-                <phase>site</phase>
-              </execution>
-              <execution>
                 <id>attach-javadocs</id>
                 <goals>
                   <goal>jar</goal>

Modified: 
whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hadoop.sh
URL: 
http://svn.apache.org/viewvc/whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hadoop.sh?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
--- 
whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hadoop.sh 
(original)
+++ 
whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hadoop.sh 
Mon Aug 13 19:49:06 2012
@@ -26,15 +26,6 @@ function configure_cdh_hadoop() {
   ROLES=$1
   shift
   
-  case $CLOUD_PROVIDER in
-    ec2 | aws-ec2 )
-      # Alias /mnt as /data
-      if [ ! -e /data ]; then ln -s /mnt /data; fi
-      ;;
-    *)
-      ;;
-  esac
-  
   REPO=${REPO:-cdh4}
   CDH_MAJOR_VERSION=$(echo $REPO | sed -e 's/cdh\([0-9]\).*/\1/')
   if [ $CDH_MAJOR_VERSION = "4" ]; then
@@ -49,11 +40,7 @@ function configure_cdh_hadoop() {
     MAPREDUCE_PACKAGE_PREFIX=hadoop-${HADOOP_VERSION:-0.20}  
   fi
   
-  mkdir -p /data/hadoop
-  chgrp hadoop /data/hadoop
-  chmod g+w /data/hadoop
-  mkdir /data/tmp
-  chmod a+rwxt /data/tmp
+  make_hadoop_dirs /data*
 
   # Copy generated configuration files in place
   cp /tmp/{core,hdfs,mapred}-site.xml $HADOOP_CONF_DIR
@@ -105,6 +92,19 @@ function configure_cdh_hadoop() {
   
 }
 
+function make_hadoop_dirs {
+  for mount in "$@"; do
+    if [ ! -e $mount/hadoop ]; then
+      mkdir -p $mount/hadoop
+      chown hadoop:hadoop $mount/hadoop
+    fi
+    if [ ! -e $mount/tmp ]; then
+      mkdir $mount/tmp
+      chmod a+rwxt $mount/tmp
+    fi
+  done
+}
+
 function start_namenode() {
   if which dpkg &> /dev/null; then
     retry_apt_get -y install $HDFS_PACKAGE_PREFIX-namenode

Modified: 
whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hbase.sh
URL: 
http://svn.apache.org/viewvc/whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hbase.sh?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
--- 
whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hbase.sh 
(original)
+++ 
whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hbase.sh 
Mon Aug 13 19:49:06 2012
@@ -54,21 +54,7 @@ function configure_cdh_hbase() {
     HBASE_PREFIX=hadoop-
   fi
 
-  case $CLOUD_PROVIDER in
-    ec2 | aws-ec2 )
-      # Alias /mnt as /data
-      if [ ! -e /data ]; then ln -s /mnt /data; fi
-      ;;
-    *)
-      ;;
-  esac
-
-  mkdir -p /data/hbase
-  chown hbase:hbase /data/hbase
-  if [ ! -e /data/tmp ]; then
-    mkdir /data/tmp
-    chmod a+rwxt /data/tmp
-  fi
+  make_hbase_dirs /data*
 
   # Copy generated configuration files in place
   cp /tmp/hbase-site.xml $HBASE_CONF_DIR
@@ -147,3 +133,17 @@ function install_hbase_daemon() {
     retry_yum install -y $daemon
   fi
 }
+
+
+function make_hbase_dirs {
+  for mount in "$@"; do
+    if [ ! -e $mount/hbase ]; then
+      mkdir -p $mount/hbase
+      chown hbase:hbase $mount/hbase
+    fi
+    if [ ! -e $mount/tmp ]; then
+      mkdir $mount/tmp
+      chmod a+rwxt $mount/tmp
+    fi
+  done
+}

Modified: 
whirr/trunk/services/cdh/src/main/resources/functions/install_cdh_hbase.sh
URL: 
http://svn.apache.org/viewvc/whirr/trunk/services/cdh/src/main/resources/functions/install_cdh_hbase.sh?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
--- whirr/trunk/services/cdh/src/main/resources/functions/install_cdh_hbase.sh 
(original)
+++ whirr/trunk/services/cdh/src/main/resources/functions/install_cdh_hbase.sh 
Mon Aug 13 19:49:06 2012
@@ -69,15 +69,6 @@ function install_cdh_hbase() {
     esac
   done
   
-  case $CLOUD_PROVIDER in
-    ec2 | aws-ec2 )
-      # Alias /mnt as /data
-      if [ ! -e /data ]; then ln -s /mnt /data; fi
-      ;;
-    *)
-      ;;
-  esac
-  
   REPO=${REPO:-cdh4}
   REPO_HOST=${REPO_HOST:-archive.cloudera.com}
   HBASE_HOME=/usr/lib/hbase

Modified: 
whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopClusterActionHandler.java
URL: 
http://svn.apache.org/viewvc/whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopClusterActionHandler.java?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
--- 
whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopClusterActionHandler.java
 (original)
+++ 
whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopClusterActionHandler.java
 Mon Aug 13 19:49:06 2012
@@ -25,7 +25,9 @@ import static org.apache.whirr.service.h
 import static org.jclouds.scriptbuilder.domain.Statements.call;
 
 import com.google.common.base.Joiner;
+import com.google.common.collect.Iterables;
 import java.io.IOException;
+import java.util.Map;
 import java.util.Set;
 import org.apache.commons.configuration.Configuration;
 import org.apache.commons.configuration.ConfigurationException;
@@ -80,6 +82,16 @@ public abstract class HadoopClusterActio
         "-u", tarball));
   }
   
+  protected Map<String, String> getDeviceMappings(ClusterActionEvent event) {
+      Set<Instance> instances = 
event.getCluster().getInstancesMatching(RolePredicates.role(getRole()));
+      Instance prototype = Iterables.getFirst(instances, null);
+      if (prototype == null) {
+          throw new IllegalStateException("No instances found in role " + 
getRole());
+      }
+      VolumeManager volumeManager = new VolumeManager();
+      return volumeManager.getDeviceMappings(event.getClusterSpec(), 
prototype);
+  }
+    
   @Override
   protected void beforeConfigure(ClusterActionEvent event)
       throws IOException, InterruptedException {
@@ -102,11 +114,12 @@ public abstract class HadoopClusterActio
 
   private void createHadoopConfigFiles(ClusterActionEvent event,
       ClusterSpec clusterSpec, Cluster cluster) throws IOException {
+    Map<String, String> deviceMappings = getDeviceMappings(event);
     try {
       event.getStatementBuilder().addStatements(
         buildCommon("/tmp/core-site.xml", clusterSpec, cluster),
-        buildHdfs("/tmp/hdfs-site.xml", clusterSpec, cluster),
-        buildMapReduce("/tmp/mapred-site.xml", clusterSpec, cluster),
+        buildHdfs("/tmp/hdfs-site.xml", clusterSpec, cluster, 
deviceMappings.keySet()),
+        buildMapReduce("/tmp/mapred-site.xml", clusterSpec, cluster, 
deviceMappings.keySet()),
         buildHadoopEnv("/tmp/hadoop-env.sh", clusterSpec, cluster),
         TemplateUtils.createFileFromTemplate("/tmp/hadoop-metrics.properties", 
event.getTemplateEngine(), getMetricsTemplate(event, clusterSpec, cluster), 
clusterSpec, cluster)
       );
@@ -114,6 +127,8 @@ public abstract class HadoopClusterActio
     } catch (ConfigurationException e) {
       throw new IOException(e);
     }
+    String devMappings = VolumeManager.asString(deviceMappings);
+    addStatement(event, call("prepare_all_disks", "'" + devMappings + "'"));
   }
 
   private String getMetricsTemplate(ClusterActionEvent event, ClusterSpec 
clusterSpec, Cluster cluster) {

Modified: 
whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilder.java
URL: 
http://svn.apache.org/viewvc/whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilder.java?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
--- 
whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilder.java
 (original)
+++ 
whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilder.java
 Mon Aug 13 19:49:06 2012
@@ -21,7 +21,10 @@ package org.apache.whirr.service.hadoop;
 import static org.apache.whirr.RolePredicates.role;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Function;
+import com.google.common.base.Joiner;
 import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
 
 import java.io.IOException;
 import java.util.Set;
@@ -50,6 +53,7 @@ public class HadoopConfigurationBuilder 
       Configuration defaults, String prefix)
       throws ConfigurationException {
     CompositeConfiguration config = new CompositeConfiguration();
+    config.setDelimiterParsingDisabled(true);
     Configuration sub = clusterSpec.getConfigurationForKeysWithPrefix(prefix);
     config.addConfiguration(sub.subset(prefix)); // remove prefix
     config.addConfiguration(defaults.subset(prefix));
@@ -64,16 +68,16 @@ public class HadoopConfigurationBuilder 
   }
   
   public static Statement buildHdfs(String path, ClusterSpec clusterSpec,
-      Cluster cluster) throws ConfigurationException, IOException {
+      Cluster cluster, Set<String> dataDirectories) throws 
ConfigurationException, IOException {
     Configuration config = buildHdfsConfiguration(clusterSpec, cluster,
-        new PropertiesConfiguration(WHIRR_HADOOP_DEFAULT_PROPERTIES));
+        new PropertiesConfiguration(WHIRR_HADOOP_DEFAULT_PROPERTIES), 
dataDirectories);
     return 
HadoopConfigurationConverter.asCreateXmlConfigurationFileStatement(path, 
config);
   }
   
   public static Statement buildMapReduce(String path, ClusterSpec clusterSpec,
-      Cluster cluster) throws ConfigurationException, IOException {
+      Cluster cluster, Set<String> dataDirectories) throws 
ConfigurationException, IOException {
     Configuration config = buildMapReduceConfiguration(clusterSpec, cluster,
-        new PropertiesConfiguration(WHIRR_HADOOP_DEFAULT_PROPERTIES));
+        new PropertiesConfiguration(WHIRR_HADOOP_DEFAULT_PROPERTIES), 
dataDirectories);
     return 
HadoopConfigurationConverter.asCreateXmlConfigurationFileStatement(path, 
config);
   }
   
@@ -100,16 +104,27 @@ public class HadoopConfigurationBuilder 
   
   @VisibleForTesting
   static Configuration buildHdfsConfiguration(ClusterSpec clusterSpec,
-      Cluster cluster, Configuration defaults) throws ConfigurationException {
-    return build(clusterSpec, cluster, defaults, "hadoop-hdfs");
+      Cluster cluster, Configuration defaults, Set<String> dataDirectories) 
throws ConfigurationException {
+    Configuration config = build(clusterSpec, cluster, defaults, 
"hadoop-hdfs");
+    
+    setIfAbsent(config, "dfs.data.dir",
+        appendToDataDirectories(dataDirectories, "/hadoop/hdfs/data"));
+    setIfAbsent(config, "dfs.name.dir",
+        appendToDataDirectories(dataDirectories, "/hadoop/hdfs/name"));
+    setIfAbsent(config, "fs.checkpoint.dir",
+        appendToDataDirectories(dataDirectories, "/hadoop/hdfs/secondary"));
+    return config;
   }
   
   @VisibleForTesting
   static Configuration buildMapReduceConfiguration(ClusterSpec clusterSpec,
-      Cluster cluster, Configuration defaults) throws ConfigurationException, 
IOException {
+      Cluster cluster, Configuration defaults, Set<String> dataDirectories) 
throws ConfigurationException, IOException {
     Configuration config = build(clusterSpec, cluster, defaults,
         "hadoop-mapreduce");
     
+    setIfAbsent(config, "mapred.local.dir",
+        appendToDataDirectories(dataDirectories, "/hadoop/mapred/local"));
+    
     Set<Instance> taskTrackers = cluster
       .getInstancesMatching(role(HadoopTaskTrackerClusterActionHandler.ROLE));
     
@@ -158,5 +173,15 @@ public class HadoopConfigurationBuilder 
       config.setProperty(property, value);
     }
   }
+  
+  private static String appendToDataDirectories(Set<String> dataDirectories, 
final String suffix) {
+    return 
Joiner.on(',').join(Lists.transform(Lists.newArrayList(dataDirectories),
+      new Function<String, String>() {
+        @Override public String apply(String input) {
+          return input + suffix;
+        }
+      }
+    ));
+  }
 
 }

Added: 
whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/VolumeManager.java
URL: 
http://svn.apache.org/viewvc/whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/VolumeManager.java?rev=1372568&view=auto
==============================================================================
--- 
whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/VolumeManager.java
 (added)
+++ 
whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/VolumeManager.java
 Mon Aug 13 19:49:06 2012
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.whirr.service.hadoop;
+
+import com.google.common.collect.Maps;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.whirr.Cluster.Instance;
+import org.apache.whirr.ClusterSpec;
+import org.jclouds.compute.domain.Hardware;
+import org.jclouds.compute.domain.Volume;
+
+public class VolumeManager {
+  
+  public static final String MOUNT_PREFIX = "/data";
+  
+  public Map<String, String> getDeviceMappings(ClusterSpec clusterSpec, 
Instance instance) {
+    Map<String, String> mappings = Maps.newLinkedHashMap();
+    int number = 0;
+    Hardware hardware = instance.getNodeMetadata().getHardware();
+
+    /* null when using the BYON jclouds compute provider */
+    if (hardware != null) {
+        List<? extends Volume> volumes =
+            instance.getNodeMetadata().getHardware().getVolumes();
+        for (Volume volume : volumes) {
+            if (volume.isBootDevice()) {
+                continue;
+            }
+            
+            mappings.put(MOUNT_PREFIX + number++, volume.getDevice());
+        }
+    }
+    return mappings;
+  }
+  
+  public static String asString(Map<String, String> deviceMappings) {
+    StringBuilder sb = new StringBuilder();
+    for (Map.Entry<String, String> mapping : deviceMappings.entrySet()) {
+      if (sb.length() > 0) {
+        sb.append(";");
+      }
+      sb.append(mapping.getKey()).append(",").append(mapping.getValue());
+    }
+    return sb.toString();
+  }
+}

Propchange: 
whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/VolumeManager.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: 
whirr/trunk/services/hadoop/src/main/resources/functions/configure_hadoop.sh
URL: 
http://svn.apache.org/viewvc/whirr/trunk/services/hadoop/src/main/resources/functions/configure_hadoop.sh?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
--- 
whirr/trunk/services/hadoop/src/main/resources/functions/configure_hadoop.sh 
(original)
+++ 
whirr/trunk/services/hadoop/src/main/resources/functions/configure_hadoop.sh 
Mon Aug 13 19:49:06 2012
@@ -26,24 +26,10 @@ function configure_hadoop() {
   ROLES=$1
   shift
   
-  case $CLOUD_PROVIDER in
-    ec2 | aws-ec2 )
-      # Alias /mnt as /data
-      ln -s /mnt /data
-      ;;
-    *)
-      ;;
-  esac
-  
   HADOOP_HOME=/usr/local/hadoop
   HADOOP_CONF_DIR=$HADOOP_HOME/conf
 
-  mkdir -p /data/hadoop
-  chown hadoop:hadoop /data/hadoop
-  if [ ! -e /data/tmp ]; then
-    mkdir /data/tmp
-    chmod a+rwxt /data/tmp
-  fi
+  make_hadoop_dirs /data*
   mkdir /etc/hadoop
   ln -s $HADOOP_CONF_DIR /etc/hadoop/conf
 
@@ -94,6 +80,19 @@ function configure_hadoop() {
 
 }
 
+function make_hadoop_dirs {
+  for mount in "$@"; do
+    if [ ! -e $mount/hadoop ]; then
+      mkdir -p $mount/hadoop
+      chown hadoop:hadoop $mount/hadoop
+    fi
+    if [ ! -e $mount/tmp ]; then
+      mkdir $mount/tmp
+      chmod a+rwxt $mount/tmp
+    fi
+  done
+}
+
 function start_namenode() {
   if which dpkg &> /dev/null; then
     AS_HADOOP="su -s /bin/bash - hadoop -c"

Added: 
whirr/trunk/services/hadoop/src/main/resources/functions/prepare_all_disks.sh
URL: 
http://svn.apache.org/viewvc/whirr/trunk/services/hadoop/src/main/resources/functions/prepare_all_disks.sh?rev=1372568&view=auto
==============================================================================
--- 
whirr/trunk/services/hadoop/src/main/resources/functions/prepare_all_disks.sh 
(added)
+++ 
whirr/trunk/services/hadoop/src/main/resources/functions/prepare_all_disks.sh 
Mon Aug 13 19:49:06 2012
@@ -0,0 +1,84 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+set -x
+
+# This function ensures that all the mount directories in the mapping string
+# are available to be used. This is achieved by formatting, mounting, and 
+# symlinking (if the volume is already mounted as another directory).
+#
+# E.g. suppose the mapping string were /data0,/dev/sdb;/data1,/dev/sdc
+# and /dev/sdb were mounted on /mnt, and /dev/sdc was not mounted or formatted.
+# In this case a symlink would be created from /data0 to /mnt. /dev/sdc would
+# be formatted, then mounted on /data1.
+function prepare_all_disks() {
+  for mapping in $(echo "$1" | tr ";" "\n"); do
+    # Split on the comma (see "Parameter Expansion" in the bash man page)
+    mount=${mapping%,*}
+    device=${mapping#*,}
+    prep_disk $mount $device
+  done
+  # Make sure there's at least a /data0 and /data (on the root filesystem)
+  if [ ! -e /data0 ]; then
+    if [ -e /data ]; then
+      ln -s /data /data0
+    else
+      mkdir /data0
+      ln -s /data0 /data
+    fi
+  else
+    if [ ! -e /data ]; then
+      ln -s /data0 /data
+    fi
+  fi
+}
+
+function prep_disk() {
+  mount=$1
+  device=$2
+  automount=${3:-false}
+
+  # is device formatted?
+  if [ $(mountpoint -q -x $device) ]; then
+    echo "$device is formatted"
+  else
+    if which dpkg &> /dev/null; then
+      apt-get install -y xfsprogs
+    elif which rpm &> /dev/null; then
+      yum install -y xfsprogs
+    fi
+    echo "warning: ERASING CONTENTS OF $device"
+    mkfs.xfs -f $device
+  fi
+  # is device mounted?
+  mount | grep -q $device
+  if [ $? == 0 ]; then 
+    echo "$device is mounted"
+    if [ ! -d $mount ]; then
+      echo "Symlinking to $mount"
+      ln -s $(grep $device /proc/mounts | awk '{print $2}') $mount
+    fi
+  else
+    echo "Mounting $device on $mount"
+    if [ ! -e $mount ]; then
+      mkdir $mount
+    fi
+    mount -o defaults,noatime $device $mount
+    if $automount ; then
+      echo "$device $mount xfs defaults,noatime 0 0" >> /etc/fstab
+    fi
+  fi
+}

Propchange: 
whirr/trunk/services/hadoop/src/main/resources/functions/prepare_all_disks.sh
------------------------------------------------------------------------------
    svn:eol-style = native

Added: whirr/trunk/services/hadoop/src/main/resources/functions/prepare_disks.sh
URL: 
http://svn.apache.org/viewvc/whirr/trunk/services/hadoop/src/main/resources/functions/prepare_disks.sh?rev=1372568&view=auto
==============================================================================
--- whirr/trunk/services/hadoop/src/main/resources/functions/prepare_disks.sh 
(added)
+++ whirr/trunk/services/hadoop/src/main/resources/functions/prepare_disks.sh 
Mon Aug 13 19:49:06 2012
@@ -0,0 +1,49 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+set -x
+
+# This function ensures that all the mount directories in the mapping string
+# whose devices are already mounted are available to be used. Symlinks are
+# created as necessary.
+#
+# E.g. suppose the mapping string were /data0,/dev/sdb;/data1,/dev/sdc
+# and /dev/sdb were mounted on /mnt, and /dev/sdc was not mounted (possibly
+# not even formatted).
+# In this case a symlink would be created from /data0 to /mnt. /data1 would
+# be created.
+function prepare_disks() {
+  for mapping in $(echo "$1" | tr ";" "\n"); do
+    # Split on the comma (see "Parameter Expansion" in the bash man page)
+    mount=${mapping%,*}
+    device=${mapping#*,}
+    prep_disk $mount $device
+  done
+}
+
+function prep_disk() {
+  mount=$1
+  device=$2
+  # is device mounted?
+  mount | grep -q $device
+  if [ $? == 0 ]; then 
+    echo "$device is mounted"
+    if [ ! -d $mount ]; then
+      echo "Symlinking to $mount"
+      ln -s $(grep $device /proc/mounts | awk '{print $2}') $mount
+    fi
+  fi
+}

Propchange: 
whirr/trunk/services/hadoop/src/main/resources/functions/prepare_disks.sh
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: 
whirr/trunk/services/hadoop/src/main/resources/whirr-hadoop-default.properties
URL: 
http://svn.apache.org/viewvc/whirr/trunk/services/hadoop/src/main/resources/whirr-hadoop-default.properties?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
--- 
whirr/trunk/services/hadoop/src/main/resources/whirr-hadoop-default.properties 
(original)
+++ 
whirr/trunk/services/hadoop/src/main/resources/whirr-hadoop-default.properties 
Mon Aug 13 19:49:06 2012
@@ -32,13 +32,9 @@ hadoop-common.fs.trash.interval=1440
 
 # HDFS
 hadoop-hdfs.dfs.block.size=134217728
-hadoop-hdfs.dfs.data.dir=/data/hadoop/hdfs/data
 hadoop-hdfs.dfs.datanode.du.reserved=1073741824
-hadoop-hdfs.dfs.name.dir=/data/hadoop/hdfs/name
-hadoop-hdfs.fs.checkpoint.dir=/data/hadoop/hdfs/secondary
 
 # MR
-hadoop-mapreduce.mapred.local.dir=/data/hadoop/mapred/local
 hadoop-mapreduce.mapred.map.tasks.speculative.execution=true
 hadoop-mapreduce.mapred.reduce.tasks.speculative.execution=false
 hadoop-mapreduce.mapred.system.dir=/hadoop/system/mapred

Modified: 
whirr/trunk/services/hadoop/src/test/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilderTest.java
URL: 
http://svn.apache.org/viewvc/whirr/trunk/services/hadoop/src/test/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilderTest.java?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
--- 
whirr/trunk/services/hadoop/src/test/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilderTest.java
 (original)
+++ 
whirr/trunk/services/hadoop/src/test/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilderTest.java
 Mon Aug 13 19:49:06 2012
@@ -23,6 +23,8 @@ import com.google.common.collect.Immutab
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.ImmutableSet.Builder;
 import com.google.common.collect.Iterators;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
 import org.apache.commons.configuration.Configuration;
 import org.apache.commons.configuration.PropertiesConfiguration;
 import org.apache.whirr.Cluster;
@@ -142,21 +144,27 @@ public class HadoopConfigurationBuilderT
   @Test
   public void testHdfs() throws Exception {
     Configuration conf = HadoopConfigurationBuilder.buildHdfsConfiguration(
-        clusterSpec, cluster, defaults);
-    assertThat(Iterators.size(conf.getKeys()), is(1));
+        clusterSpec, cluster, defaults,
+        Sets.newLinkedHashSet(Lists.newArrayList("/data0", "/data1")));
+    assertThat(Iterators.size(conf.getKeys()), is(4));
     assertThat(conf.getString("p1"), is("hdfs1"));
+    assertThat(conf.getString("dfs.data.dir"),
+        is("/data0/hadoop/hdfs/data,/data1/hadoop/hdfs/data"));
   }
 
   @Test
   public void testMapReduce() throws Exception {
     Cluster cluster = newCluster(5);
     Configuration conf = HadoopConfigurationBuilder
-      .buildMapReduceConfiguration(clusterSpec, cluster, defaults);
+      .buildMapReduceConfiguration(clusterSpec, cluster, defaults,
+          Sets.newLinkedHashSet(Lists.newArrayList("/data0", "/data1")));
     assertThat(conf.getString("p1"), is("mapred1"));
     assertThat(conf.getString("mapred.job.tracker"), matches(".+:8021"));
     assertThat(conf.getString("mapred.tasktracker.map.tasks.maximum"), 
is("4"));
     assertThat(conf.getString("mapred.tasktracker.reduce.tasks.maximum"), 
is("3"));
     assertThat(conf.getString("mapred.reduce.tasks"), is("15"));
+    assertThat(conf.getString("mapred.local.dir"),
+        is("/data0/hadoop/mapred/local,/data1/hadoop/mapred/local"));
   }
 
   @Test
@@ -165,7 +173,8 @@ public class HadoopConfigurationBuilderT
     
overrides.addProperty("hadoop-mapreduce.mapred.tasktracker.map.tasks.maximum", 
"70");
     clusterSpec = ClusterSpec.withNoDefaults(overrides);
     Configuration conf = 
HadoopConfigurationBuilder.buildMapReduceConfiguration(
-        clusterSpec, cluster, defaults);
+        clusterSpec, cluster, defaults,
+        Sets.newLinkedHashSet(Lists.newArrayList("/data0", "/data1")));
     assertThat(conf.getString("mapred.tasktracker.map.tasks.maximum"), 
is("70"));
   }
   
@@ -175,7 +184,8 @@ public class HadoopConfigurationBuilderT
     overrides.addProperty("hadoop-mapreduce.mapred.reduce.tasks", "7");
     clusterSpec = ClusterSpec.withNoDefaults(overrides);
     Configuration conf = 
HadoopConfigurationBuilder.buildMapReduceConfiguration(
-        clusterSpec, cluster, defaults);
+        clusterSpec, cluster, defaults,
+        Sets.newLinkedHashSet(Lists.newArrayList("/data0", "/data1")));
     assertThat(conf.getString("mapred.reduce.tasks"), is("7"));
   }
 

Modified: 
whirr/trunk/services/yarn/src/main/resources/functions/configure_hadoop_mr2.sh
URL: 
http://svn.apache.org/viewvc/whirr/trunk/services/yarn/src/main/resources/functions/configure_hadoop_mr2.sh?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
--- 
whirr/trunk/services/yarn/src/main/resources/functions/configure_hadoop_mr2.sh 
(original)
+++ 
whirr/trunk/services/yarn/src/main/resources/functions/configure_hadoop_mr2.sh 
Mon Aug 13 19:49:06 2012
@@ -28,26 +28,12 @@ function configure_hadoop_mr2() {
   ROLES=$1
   shift
 
-  case $CLOUD_PROVIDER in
-    ec2 | aws-ec2 )
-      # Alias /mnt as /data
-      ln -s /mnt /data
-      ;;
-    *)
-      ;;
-  esac
-  
   HADOOP_COMMON_HOME=$HADOOP_HOME
   HADOOP_HDFS_HOME=$HADOOP_HOME
   HADOOP_HOME=/usr/local/hadoop
   HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
 
-  mkdir -p /data/hadoop
-  chown hadoop:hadoop /data/hadoop
-  if [ ! -e /data/tmp ]; then
-    mkdir /data/tmp
-    chmod a+rwxt /data/tmp
-  fi
+  make_hadoop_dirs /data*
   mkdir /etc/hadoop
   ln -s $HADOOP_CONF_DIR /etc/hadoop/conf
 
@@ -101,6 +87,19 @@ function configure_hadoop_mr2() {
 
 }
 
+function make_hadoop_dirs {
+  for mount in "$@"; do
+    if [ ! -e $mount/hadoop ]; then
+      mkdir -p $mount/hadoop
+      chown hadoop:hadoop $mount/hadoop
+    fi
+    if [ ! -e $mount/tmp ]; then
+      mkdir $mount/tmp
+      chmod a+rwxt $mount/tmp
+    fi
+  done
+}
+
 function start_namenode() {
   if which dpkg &> /dev/null; then
     AS_HADOOP="su -s /bin/bash - hadoop -c"


Reply via email to