bigtop git commit: BIGTOP-1235. Speed-up init-hdfs.sh for complete HCFS compliant provisioning

cos Sun, 15 Mar 2015 14:29:48 -0700

Repository: bigtop
Updated Branches:
  refs/heads/master 3becc79ae -> d7e9834d5



BIGTOP-1235. Speed-up init-hdfs.sh for complete HCFS compliant provisioning


Project: http://git-wip-us.apache.org/repos/asf/bigtop/repo
Commit: http://git-wip-us.apache.org/repos/asf/bigtop/commit/d7e9834d
Tree: http://git-wip-us.apache.org/repos/asf/bigtop/tree/d7e9834d
Diff: http://git-wip-us.apache.org/repos/asf/bigtop/diff/d7e9834d

Branch: refs/heads/master
Commit: d7e9834d54da0ceb90959de193436d7ccda1dc3f
Parents: 3becc79
Author: Konstantin Boudnik <[email protected]>
Authored: Sat Mar 14 22:48:55 2015 -0700
Committer: Konstantin Boudnik <[email protected]>
Committed: Sun Mar 15 14:27:23 2015 -0700

----------------------------------------------------------------------
 .../src/common/bigtop-utils/init-hcfs.groovy    | 299 ++++++++++++++++++
 .../src/common/bigtop-utils/provision.groovy    | 300 -------------------
 bigtop-packages/src/common/hadoop/init-hdfs.sh  | 102 ++-----
 .../src/common/hadoop/install_hadoop.sh         |   1 +
 .../src/deb/hadoop/hadoop-hdfs.install          |   1 +
 .../rpm/bigtop-utils/SPECS/bigtop-utils.spec    |   7 +-
 .../src/rpm/hadoop/SPECS/hadoop.spec            |   1 +
 .../src/main/resources/yum/bigtop-utils.xml     |   3 +-
 8 files changed, 330 insertions(+), 384 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bigtop/blob/d7e9834d/bigtop-packages/src/common/bigtop-utils/init-hcfs.groovy
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/common/bigtop-utils/init-hcfs.groovy 
b/bigtop-packages/src/common/bigtop-utils/init-hcfs.groovy
new file mode 100644
index 0000000..37ed7b9
--- /dev/null
+++ b/bigtop-packages/src/common/bigtop-utils/init-hcfs.groovy
@@ -0,0 +1,299 @@
+#!/usr/bin/env /usr/lib/bigtop-groovy/bin/groovy
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import groovy.json.JsonSlurper;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.permission.FsPermission;
+
+def final LOG = LogFactory.getLog(this.getClass());
+def final jsonParser = new JsonSlurper();
+
+def final USAGE = """\
+    *********************************************************************
+    USAGE:
+
+        This script provisions the skeleton of a hadoop file system.
+    It takes a single argument: The json schema (a list of lists),
+    of 4 element tuples.  For an example , see the bigtop init-hcfs.json
+    file.  The main elements of the JSON file are:
+
+    A copy of init-hcfs.json ships with bigtop distributions.
+
+    dir: list of dirs to create with permissions.
+    user: list of users to setup home dirs with permissions.
+    root_user: The root owner of distributed FS, to run shell commands.
+
+    To run this script, you will want to setup your environment using
+    init-hcfs.json,
+    which defines the properties above, and then invoke this script.
+
+    Details below.
+
+    SETUP YOUR CLUSTER ENVIRONMENT
+
+    As mentinoed above, the init-hcfs.json file is what guides the
+    directories/users to setup.
+    So first you will want to edit that file as you need to.  Some common
+    modifications:
+
+
+    - Usually the "root_user" on HDFS is just hdfs.  For other file systems
+    the root user might be "root".
+    - The default hadoop users you may find in the init-hcfs.json template
+    you follow "tom"/"alice"/etc.. aren't necessarily on all clusters.
+
+    HOW TO INVOKE:
+
+    1) Simple groovy based method:  Just manually construct a hadoop classpath:
+
+    groovy -classpath /usr/lib/hadoop/hadoop-common-2.0.6-alpha.jar
+    :/usr/lib/hadoop/lib/guava-11.0.2.jar
+    :/etc/hadoop/conf/:/usr/lib/hadoop/hadoop-common-2.0.6-alpha.jar
+    :/usr/lib/hadoop/lib/commons-configuration-1.6.jar
+    :/usr/lib/hadoop/lib/commons-lang-2.5.jar:/usr/lib/hadoop/hadoop-auth.jar
+    :/usr/lib/hadoop/lib/slf4j-api-1.6.1.jar
+    :/usr/lib/hadoop-hdfs/hadoop-hdfs.jar
+    :/usr/lib/hadoop/lib/protobuf-java-2.4.0a.jar /vagrant/init-hcfs.groovy
+    /vagrant/init-hcfs.json
+
+    2) Another method: Follow the instructions on groovy.codehaus.org/Running
+     for setting up groovy runtime environment with
+    CLASSPATH and/or append those libraries to the shebang command as
+    necessary, and then simply do:
+
+    chmod +x init-hcfs.groovy
+    ./init-hcfs.groovy init-hcfs.json
+
+    *********************************************************************
+"""
+
+/**
+ * The HCFS generic provisioning process:
+ *
+ *   1) Create a file system skeleton.
+ *   2) Create users with home dirs in /user.
+ *   3) Copy jars and libs into the DFS for oozie.
+ *
+ *   In the future maybe we will add more optional steps (i.e. adding libs to
+ *   the distribtued cache, mounting FUSE over HDFS, etc...).
+ **/
+
+def errors = [
+    ("0: No init-hcfs.json input file provided !"): {
+      LOG.info("Checking argument length: " + args.length + " " + args);
+      return args.length == 1
+    },
+    ("1: init-hcfs json not found."): {
+      LOG.info("Checking for file : " + args[0]);
+      return new File(args[0]).exists()
+    }];
+
+errors.each { error_message, passed ->
+  if (!passed.call()) {
+    System.err.println("ERROR:" + error_message);
+    System.err.println(USAGE);
+    System.exit(1);
+  }
+}
+
+def final json = args[0];
+def final parsedData = jsonParser.parse(new FileReader(json));
+
+/**
+ * Groovy  is smart enough to convert JSON
+ * fields to objects for us automagically.
+ * */
+def dirs = parsedData.dir as List;
+def users = parsedData.user as List;
+def hcfs_super_user = parsedData.root_user;
+
+def final Configuration conf = new Configuration();
+
+LOG.info("Provisioning file system for file system from Configuration: " +
+    conf.get("fs.defaultFS"));
+
+/**
+ * We create a single FileSystem instance to use for all the file system calls.
+ * This script makes anywhere from 20-100 file system operations so its
+ * important to cache and create this only once.
+ * */
+def final FileSystem fs = FileSystem.get(conf);
+
+LOG.info("PROVISIONING WITH FILE SYSTEM : " + fs.getClass());
+
+/**
+ * Make a  directory.  Note when providing input to this functino that if
+ * nulls are given, the commands will work but behaviour varies depending on
+ * the HCFS implementation ACLs, etc.
+ * @param fs The HCFS implementation to create the Directory on.
+ * @param dname Required.
+ * @param mode can be null.
+ * @param user can be null.
+ * @param group can be null,
+ */
+def mkdir = { FileSystem fsys, Path dname, FsPermission mode, String user, 
String group ->
+  fsys.mkdirs(dname);
+  if (user != null) {
+    fsys.setOwner(dname, user, group);
+  }
+  if (mode != null) {
+    fsys.setPermission(dname, mode);
+    FsPermission result = fsys.getFileStatus(dname).getPermission();
+    /** Confirm that permission took properly.
+     * important to do this since while we work on better
+     * docs for modifying and maintaining this new approach
+     * to HCFS provisioning.*/
+    if (!fsys.getFileStatus(dname).getPermission().equals(mode)) {
+      throw new RuntimeException("Failed at setting permission to " + mode +
+          "... target directory permission is incorrect: " + result);
+    }
+  }
+}
+
+/**
+ * Create a perm from raw string representing an octal perm.
+ * @param mode The stringified octal mode (i.e. "1777")
+ * */
+private FsPermission readPerm(String mode) {
+  Short permValue = Short.decode("0" + mode);
+  //This constructor will decode the octal perm bits
+  //out of the short.
+  return new FsPermission(permValue);
+}
+
+int dirs_created = 0;
+/**
+ * Provisioning the directories on the file system.  This is the
+ * most important task of this script, as a basic directory skeleton
+ * is needed even for basic yarn/mapreduce apps before startup.
+ * */
+dirs.each() {
+  def (dname, mode, user, group) = it;
+
+  dname = new Path(dname);
+
+  //We encode permissions as strings, since they are octal.
+  //JSON doesn't support octal natively.
+  if (mode != null)
+    mode = readPerm(mode) as FsPermission;
+
+  if (user?.equals("HCFS_SUPER_USER"))
+    user = hcfs_super_user;
+
+  LOG.info("mkdirs " + dname + " " + user + " " + mode + " " + group);
+  mkdir(fs, dname, mode, user, group);
+
+  dirs_created++;
+}
+
+LOG.info("Succesfully created " + dirs_created + " directories in the DFS.");
+
+/**
+ * Now, for most clusters we will generally start out with at least one
+ * user.  You should modify your init-hcfs.json file accordingly if you
+ * have a set of users you want to setup for using hadoop.
+ *
+ * For each user we do initial setup, create a home directory, etc...
+ * You may also need to do special tasks if running LinuxTaskControllers,
+ * etc, which aren't (yet) handled by this provisioner.
+ * */
+users.each() {
+  def (user, permission, group) = it;
+  LOG.info("current user: " + user);
+  Path homedir = new Path("/user/" + user);
+
+  //perms should be ALL, RX,RX ^^
+  fs.mkdirs(homedir);
+  fs.setOwner(homedir, user, group);
+  FsPermission perm = readPerm(permission);
+  fs.setPermission(homedir, perm);
+}
+
+
+/**
+ * Copys jar files from a destination into the distributed FS.
+ * Build specifically for the common task of getting jars into
+ * oozies classpath so that oozie can run pig/hive/etc based
+ * applications.
+ *
+ * @param fs An instance of an HCFS FileSystem .
+ *
+ * @param input The LOCAL DIRECTORY containing jar files.
+ *
+ * @param jarstr A jar file name filter used to reject/accept jar names.
+ * See the script below for example of how its used.  jars matching this
+ * string will be copied into the specified path on the "target" directory.
+ *
+ * @param target The path on the DISTRIBUTED FS where jars should be copied
+ * to.
+ *
+ * @return The total number of jars copied into the DFS.
+ */
+def copyJars = { FileSystem fsys, File input, String jarstr, Path target ->
+  int copied = 0;
+  input.listFiles(new FilenameFilter() {
+    public boolean accept(File f, String filename) {
+      return filename.contains(jarstr) && filename.endsWith("jar")
+    }
+  }).each({ jar_file ->
+    copied++;
+    fsys.copyFromLocalFile(new Path(jar_file.getAbsolutePath()), target)
+  });
+  return copied;
+}
+
+/**
+ *  Copy shared libraries into oozie.
+ *  Particular applications might want to modify this for example
+ *  if one wanted to add a custom file system or always available
+ *  custom library to be used in oozie workflows.
+ * */
+total_jars = 0;
+
+LOG.info("Now copying Jars into the DFS for oozie ");
+LOG.info("This might take a few seconds...");
+
+def final OOZIE_SHARE = "/user/oozie/share/lib/";
+def final MAPREDUCE = "/usr/lib/hadoop-mapreduce/";
+def final PIG_HOME = "/usr/lib/pig/";
+def final HIVE_HOME = "/usr/lib/hive/";
+
+total_jars += copyJars(fs,
+    new File(HIVE_HOME, "lib"), "",
+    new Path(OOZIE_SHARE, "hive/"))
+
+total_jars += copyJars(fs,
+    new File(MAPREDUCE), "hadoop-streaming/",
+    new Path(OOZIE_SHARE, "lib/mapreduce-streaming/"))
+
+total_jars += copyJars(fs,
+    new File(MAPREDUCE), "hadoop-distcp/",
+    new Path(OOZIE_SHARE, "distcp"))
+
+total_jars += copyJars(fs,
+    new File(PIG_HOME, "lib/"), "",
+    new Path(OOZIE_SHARE, "pig"))
+
+total_jars += copyJars(fs,
+    new File(PIG_HOME), "",
+    new Path(OOZIE_SHARE, "pig"))
+
+LOG.info("Total jars copied into the DFS : " + total_jars);

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d7e9834d/bigtop-packages/src/common/bigtop-utils/provision.groovy
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/common/bigtop-utils/provision.groovy 
b/bigtop-packages/src/common/bigtop-utils/provision.groovy
deleted file mode 100644
index 6d60492..0000000
--- a/bigtop-packages/src/common/bigtop-utils/provision.groovy
+++ /dev/null
@@ -1,300 +0,0 @@
-#!/usr/bin/env /usr/lib/bigtop-groovy/bin/groovy
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-import groovy.json.JsonSlurper;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.fs.permission.FsPermission;
-
-def final LOG = LogFactory.getLog(this.getClass());
-def final jsonParser = new JsonSlurper();
-
-def final USAGE = """\
-    *********************************************************************
-    USAGE:
-
-        This script provisions the skeleton of a hadoop file system.
-    It takes a single argument: The json schema (a list of lists),
-    of 4 element tuples.  For an example , see the bigtop init-hcfs.json
-    file.  The main elements of the JSON file are:
-
-    A copy of init-hcfs.json ships with bigtop distributions.
-
-    dir: list of dirs to create with permissions.
-    user: list of users to setup home dirs with permissions.
-    root_user: The root owner of distributed FS, to run shell commands.
-
-    To run this script, you will want to setup your environment using
-    init-hcfs.json,
-    which defines the properties above, and then invoke this script.
-
-    Details below.
-
-    SETUP YOUR CLUSTER ENVIRONMENT
-
-    As mentinoed above, the init-hcfs.json file is what guides the
-    directories/users to setup.
-    So first you will want to edit that file as you need to.  Some common
-    modifications:
-
-
-    - Usually the "root_user" on HDFS is just hdfs.  For other file systems
-    the root user might be "root".
-    - The default hadoop users you may find in the init-hcfs.json template
-    you follow "tom"/"alice"/etc.. aren't necessarily on all clusters.
-
-    HOW TO INVOKE:
-
-    1) Simple groovy based method:  Just manually construct a hadoop classpath:
-    
-    groovy -classpath /usr/lib/hadoop/hadoop-common-2.0.6-alpha.jar
-    :/usr/lib/hadoop/lib/guava-11.0.2.jar
-    :/etc/hadoop/conf/:/usr/lib/hadoop/hadoop-common-2.0.6-alpha.jar
-    :/usr/lib/hadoop/lib/commons-configuration-1.6.jar
-    :/usr/lib/hadoop/lib/commons-lang-2.5.jar:/usr/lib/hadoop/hadoop-auth.jar
-    :/usr/lib/hadoop/lib/slf4j-api-1.6.1.jar
-    :/usr/lib/hadoop-hdfs/hadoop-hdfs.jar
-    :/usr/lib/hadoop/lib/protobuf-java-2.4.0a.jar /vagrant/provision.groovy 
-    /vagrant/init-hcfs.json
-
-    2) Another method: Follow the instructions on groovy.codehaus.org/Running
-     for setting up groovy runtime environment with
-    CLASSPATH and/or append those libraries to the shebang command as
-    necessary, and then simply do:
-
-    chmod +x provision.groovy
-    ./provision.groovy init-hcfs.json
-
-    *********************************************************************
-"""
-
-/**
- * The HCFS generic provisioning process:
- *
- *   1) Create a file system skeleton.
- *   2) Create users with home dirs in /user.
- *   3) Copy jars and libs into the DFS for oozie.
- *
- *   In the future maybe we will add more optional steps (i.e. adding libs to
- *   the distribtued cache, mounting FUSE over HDFS, etc...).
- **/
-
-def errors = [
-    ("0: No init-hcfs.json input file provided !"): {
-      LOG.info("Checking argument length: " + args.length + " " + args);
-      return args.length == 1
-    },
-    ("1: init-hcfs json not found."): {
-      LOG.info("Checking for file : " + args[0]);
-      return new File(args[0]).exists()
-    }];
-
-errors.each { error_message, passed ->
-  if (!passed.call()) {
-    System.err.println("ERROR:" + error_message);
-    System.err.println(USAGE);
-    System.exit(1);
-  }
-}
-
-def final json = args[0];
-def final parsedData = jsonParser.parse(new FileReader(json));
-
-/**
- * Groovy  is smart enough to convert JSON
- * fields to objects for us automagically.
- * */
-def dirs = parsedData.dir as List;
-def users = parsedData.user as List;
-def hcfs_super_user = parsedData.root_user;
-
-def final Configuration conf = new Configuration();
-
-LOG.info("Provisioning file system for file system from Configuration: " +
-    conf.get("fs.defaultFS"));
-
-/**
- * We create a single FileSystem instance to use for all the file system calls.
- * This script makes anywhere from 20-100 file system operations so its
- * important to cache and create this only once.
- * */
-def final FileSystem fs = FileSystem.get(conf);
-
-LOG.info("PROVISIONING WITH FILE SYSTEM : " + fs.getClass());
-
-/**
- * Make a  directory.  Note when providing input to this functino that if
- * nulls are given, the commands will work but behaviour varies depending on
- * the HCFS implementation ACLs, etc.
- * @param fs The HCFS implementation to create the Directory on.
- * @param dname Required.
- * @param mode can be null.
- * @param user can be null.
- * @param group can be null,
- */
-public void mkdir(FileSystem fs, Path dname, FsPermission mode, String user,
-                  String group) {
-  fs.mkdirs(dname);
-  if (user != null) {
-    fs.setOwner(dname, user, group);
-  }
-  if (mode != null) {
-    fs.setPermission(dname, mode);
-    FsPermission result = fs.getFileStatus(dname).getPermission();
-    /** Confirm that permission took properly.
-     * important to do this since while we work on better
-     * docs for modifying and maintaining this new approach
-     * to HCFS provisioning.*/
-    if (!fs.getFileStatus(dname).getPermission().equals(mode)) {
-      throw new RuntimeException("Failed at setting permission to " + mode +
-          "... target directory permission is incorrect: " + result);
-    }
-  }
-}
-
-/**
- * Create a perm from raw string representing an octal perm.
- * @param mode The stringified octal mode (i.e. "1777")
- * */
-public FsPermission readPerm(String mode) {
-  Short permValue = Short.decode("0" + mode);
-  //This constructor will decode the octal perm bits
-  //out of the short.
-  return new FsPermission(permValue);
-}
-
-int dirs_created = 0;
-/**
- * Provisioning the directories on the file system.  This is the
- * most important task of this script, as a basic directory skeleton
- * is needed even for basic yarn/mapreduce apps before startup.
- * */
-dirs.each() {
-  def (dname, mode, user, group) = it;
-
-  dname = new Path(dname);
-
-  //We encode permissions as strings, since they are octal.
-  //JSON doesn't support octal natively.
-  if (mode != null)
-    mode = readPerm(mode) as FsPermission;
-
-  if (user?.equals("HCFS_SUPER_USER"))
-    user = hcfs_super_user;
-
-  LOG.info("mkdirs " + dname + " " + user + " " + mode + " " + group);
-  mkdir(fs, dname, mode, user, group);
-
-  dirs_created++;
-}
-
-LOG.info("Succesfully created " + dirs_created + " directories in the DFS.");
-
-/**
- * Now, for most clusters we will generally start out with at least one
- * user.  You should modify your init-hcfs.json file accordingly if you
- * have a set of users you want to setup for using hadoop.
- *
- * For each user we do initial setup, create a home directory, etc...
- * You may also need to do special tasks if running LinuxTaskControllers,
- * etc, which aren't (yet) handled by this provisioner.
- * */
-users.each() {
-  def (user, permission, group) = it;
-  LOG.info("current user: " + user);
-  Path homedir = new Path("/user/" + user);
-
-  //perms should be ALL, RX,RX ^^
-  fs.mkdirs(homedir);
-  fs.setOwner(homedir, user, group);
-  FsPermission perm = readPerm(permission);
-  fs.setPermission(homedir, perm);
-}
-
-
-/**
- * Copys jar files from a destination into the distributed FS.
- * Build specifically for the common task of getting jars into
- * oozies classpath so that oozie can run pig/hive/etc based
- * applications.
- *
- * @param fs An instance of an HCFS FileSystem .
- *
- * @param input The LOCAL DIRECTORY containing jar files.
- *
- * @param jarstr A jar file name filter used to reject/accept jar names.
- * See the script below for example of how its used.  jars matching this
- * string will be copied into the specified path on the "target" directory.
- *
- * @param target The path on the DISTRIBUTED FS where jars should be copied
- * to.
- *
- * @return The total number of jars copied into the DFS.
- */
-public int copyJars(FileSystem fs, File input, String jarstr, Path target) {
-  int copied = 0;
-  input.listFiles(new FilenameFilter() {
-    public boolean accept(File f, String filename) {
-      return filename.contains(jarstr) && filename.endsWith("jar")
-    }
-  }).each({ jar_file ->
-    copied++;
-    fs.copyFromLocalFile(new Path(jar_file.getAbsolutePath()), target)
-  });
-  return copied;
-}
-
-/**
- *  Copy shared libraries into oozie.
- *  Particular applications might want to modify this for example
- *  if one wanted to add a custom file system or always available
- *  custom library to be used in oozie workflows.
- * */
-total_jars = 0;
-
-LOG.info("Now copying Jars into the DFS for oozie ");
-LOG.info("This might take a few seconds...");
-
-def final OOZIE_SHARE = "/user/oozie/share/lib/";
-def final MAPREDUCE = "/usr/lib/hadoop-mapreduce/";
-def final PIG_HOME = "/usr/lib/pig/";
-def final HIVE_HOME = "/usr/lib/hive/";
-
-total_jars += copyJars(fs,
-    new File(HIVE_HOME, "lib"), "",
-    new Path(OOZIE_SHARE, "hive/"))
-
-total_jars += copyJars(fs,
-    new File(MAPREDUCE), "hadoop-streaming/",
-    new Path(OOZIE_SHARE, "lib/mapreduce-streaming/"))
-
-total_jars += copyJars(fs,
-    new File(MAPREDUCE), "hadoop-distcp/",
-    new Path(OOZIE_SHARE, "distcp"))
-
-total_jars += copyJars(fs,
-    new File(PIG_HOME, "lib/"), "",
-    new Path(OOZIE_SHARE, "pig"))
-
-total_jars += copyJars(fs,
-    new File(PIG_HOME), "",
-    new Path(OOZIE_SHARE, "pig"))
-
-LOG.info("Total jars copied into the DFS : " + total_jars);

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d7e9834d/bigtop-packages/src/common/hadoop/init-hdfs.sh
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/common/hadoop/init-hdfs.sh 
b/bigtop-packages/src/common/hadoop/init-hdfs.sh
index 1bf820f..9b9271b 100755
--- a/bigtop-packages/src/common/hadoop/init-hdfs.sh
+++ b/bigtop-packages/src/common/hadoop/init-hdfs.sh
@@ -15,90 +15,32 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Use this script to initialize HDFS directory structure for various 
components to run. This script can be run from any node in the Hadoop cluster 
but should only be run once by one node only. If you are planning on using 
oozie, we recommend that you run this script from a node that has hive, pig, 
sqoop, etc. installed. Unless you are using psuedo distributed cluster, this 
node is most likely NOT your namenode
+### Script requires package bigtop-groovy to be installed
+# Use this script to initialize HDFS directory structure for various components
+# to run. This script can be run from any node in the Hadoop cluster but should
+# only be run once by one node only. If you are planning on using oozie, we
+# recommend that you run this script from a node that has hive, pig, sqoop, 
etc.
+# installed. Unless you are using psuedo distributed cluster, this node is most
+# likely NOT your namenode
 # Steps to be performed before running this script:
 # 1. Stop the namenode and datanode services if running.
 # 2. Format namenode (su -s /bin/bash hdfs hdfs namenode -format).
 # 3. Start the namenode and datanode services on appropriate nodes.
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /tmp'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chmod -R 1777 /tmp'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /var'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /var/log'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chmod -R 1775 /var/log'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chown yarn:mapred /var/log'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /tmp/hadoop-yarn'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chown -R mapred:mapred 
/tmp/hadoop-yarn'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chmod -R 777 /tmp/hadoop-yarn'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir -p 
/var/log/hadoop-yarn/apps'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chmod -R 1777 
/var/log/hadoop-yarn/apps'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chown yarn:mapred 
/var/log/hadoop-yarn/apps'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /hbase'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chown hbase:hbase /hbase'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /solr'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chown solr:solr /solr'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /benchmarks'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chmod -R 777 /benchmarks'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /user'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chmod 755 /user'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chown hdfs  /user'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /user/history'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chown mapred:mapred /user/history'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chmod 755 /user/history'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /user/jenkins'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chmod -R 777 /user/jenkins'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chown jenkins /user/jenkins'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /user/hive'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chmod -R 777 /user/hive'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chown hive /user/hive'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /user/root'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chmod -R 777 /user/root'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chown root /user/root'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /user/hue'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chmod -R 777 /user/hue'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chown hue /user/hue'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /user/sqoop'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chmod -R 777 /user/sqoop'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chown sqoop /user/sqoop'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /user/oozie'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chmod -R 777 /user/oozie'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chown -R oozie /user/oozie'
-# Do more setup for oozie
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /user/oozie/share'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /user/oozie/share/lib'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /user/oozie/share/lib/hive'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir 
/user/oozie/share/lib/mapreduce-streaming'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir 
/user/oozie/share/lib/distcp'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir /user/oozie/share/lib/pig'
-# Event log directory for Apache Spark
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -mkdir -p /var/log/spark/apps'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chmod -R 1777 /var/log/spark/apps'
-su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -chown spark:spark 
/var/log/spark/apps'
 
-# Copy over files from local filesystem to HDFS that oozie might need
-if ls /usr/lib/hive/lib/*.jar &> /dev/null; then
-  su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -put /usr/lib/hive/lib/*.jar 
/user/oozie/share/lib/hive'
+# Autodetect JAVA_HOME if not defined
+if [ -f /usr/lib/bigtop-utils/bigtop-detect-javahome ]; then
+  . /usr/lib/bigtop-utils/bigtop-detect-javahome
 fi
 
-if ls /usr/lib/hadoop-mapreduce/hadoop-streaming*.jar &> /dev/null; then
-  su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -put 
/usr/lib/hadoop-mapreduce/hadoop-streaming*.jar 
/user/oozie/share/lib/mapreduce-streaming'
-fi
-
-if ls /usr/lib/hadoop-mapreduce/hadoop-distcp*.jar &> /dev/null; then
-  su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -put 
/usr/lib/hadoop-mapreduce/hadoop-distcp*.jar /user/oozie/share/lib/distcp'
-fi
-
-if ls /usr/lib/pig/{lib/,}*.jar &> /dev/null; then
-  su -s /bin/bash hdfs -c '/usr/bin/hadoop fs -put /usr/lib/pig/{lib/,}*.jar 
/user/oozie/share/lib/pig'
-fi
-
-# Create home directory for the current user if it does not exist
-if [ "$1" = "-u" ] ; then
-  USER="$2"
-  USER=${USER:-$(id -un)}
-  EXIST=$(su -s /bin/bash hdfs -c "/usr/bin/hadoop fs -ls /user/${USER}" &> 
/dev/null; echo $?)
-  if [ ! $EXIST -eq 0 ]; then
-    su -s /bin/bash hdfs -c "/usr/bin/hadoop fs -mkdir /user/${USER}"
-    su -s /bin/bash hdfs -c "/usr/bin/hadoop fs -chmod -R 755 /user/${USER}"
-    su -s /bin/bash hdfs -c "/usr/bin/hadoop fs -chown ${USER} /user/${USER}"
-  fi
-fi
+HADOOP_LIB_DIR=/usr/lib/hadoop/lib
+HDFS_LIB_DIR=/usr/lib/hadoop-hdfs/lib
+HADOOP_DEPENDENCIES="commons-logging*.jar guava*.jar 
commons-configuration*.jar commons-collections*.jar slf4j-api*.jar 
protobuf-java*.jar commons-lang*.jar"
+HDFS_DEPENDENCIES="htrace-core*.jar"
+for i in /usr/lib/hadoop/*.jar; do CLASSPATH=$CLASSPATH:$i; done
+CLASSPATH=/etc/hadoop/conf:$CLASSPATH:/usr/lib/hadoop-hdfs/hadoop-hdfs.jar
+pushd .
+cd $HADOOP_LIB_DIR
+for d in $HADOOP_DEPENDENCIES; do CLASSPATH=$CLASSPATH:$HADOOP_LIB_DIR/$d; done
+for d in $HDFS_DEPENDENCIES;   do CLASSPATH=$CLASSPATH:$HDFS_LIB_DIR/$d; done
+popd
+su -s /bin/bash hdfs -c "/usr/lib/bigtop-groovy/bin/groovy -classpath 
$CLASSPATH /usr/lib/bigtop-utils/init-hcfs.groovy 
/usr/lib/hadoop/libexec/init-hcfs.json"

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d7e9834d/bigtop-packages/src/common/hadoop/install_hadoop.sh
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/common/hadoop/install_hadoop.sh 
b/bigtop-packages/src/common/hadoop/install_hadoop.sh
index 5a3ac83..38b578b 100755
--- a/bigtop-packages/src/common/hadoop/install_hadoop.sh
+++ b/bigtop-packages/src/common/hadoop/install_hadoop.sh
@@ -184,6 +184,7 @@ install -d -m 0755 ${SYSTEM_LIBEXEC_DIR}
 cp ${BUILD_DIR}/libexec/* ${SYSTEM_LIBEXEC_DIR}/
 cp ${DISTRO_DIR}/hadoop-layout.sh ${SYSTEM_LIBEXEC_DIR}/
 install -m 0755 ${DISTRO_DIR}/init-hdfs.sh ${SYSTEM_LIBEXEC_DIR}/
+install -m 0755 ${DISTRO_DIR}/init-hcfs.json ${SYSTEM_LIBEXEC_DIR}/
 
 # hadoop jar
 install -d -m 0755 ${HADOOP_DIR}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d7e9834d/bigtop-packages/src/deb/hadoop/hadoop-hdfs.install
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/hadoop/hadoop-hdfs.install 
b/bigtop-packages/src/deb/hadoop/hadoop-hdfs.install
index 9ce5994..3aece60 100644
--- a/bigtop-packages/src/deb/hadoop/hadoop-hdfs.install
+++ b/bigtop-packages/src/deb/hadoop/hadoop-hdfs.install
@@ -3,6 +3,7 @@
 /usr/lib/hadoop-hdfs
 /usr/lib/hadoop/libexec/hdfs-config.sh
 /usr/lib/hadoop/libexec/init-hdfs.sh
+/usr/lib/hadoop/libexec/init-hcfs.json
 /usr/bin/hdfs
 /var/lib/hadoop-hdfs
 /var/log/hadoop-hdfs

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d7e9834d/bigtop-packages/src/rpm/bigtop-utils/SPECS/bigtop-utils.spec
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/rpm/bigtop-utils/SPECS/bigtop-utils.spec 
b/bigtop-packages/src/rpm/bigtop-utils/SPECS/bigtop-utils.spec
index 9760434..9186540 100644
--- a/bigtop-packages/src/rpm/bigtop-utils/SPECS/bigtop-utils.spec
+++ b/bigtop-packages/src/rpm/bigtop-utils/SPECS/bigtop-utils.spec
@@ -32,8 +32,8 @@ Source2:    bigtop-utils.default
 Source3:    bigtop-detect-javalibs
 Source4:    bigtop-detect-classpath
 Source5:    bigtop-monitor-service
-
-Requires:   bash
+Source6:    init-hcfs.groovy
+Requires:   bash, bigtop-groovy
 
 # "which" command is needed for a lot of projects.
 # It is part of the package "util-linux" on suse and "which" everywhere else
@@ -55,6 +55,7 @@ install -p -m 644 %{SOURCE2} .
 install -p -m 644 %{SOURCE3} .
 install -p -m 644 %{SOURCE4} .
 install -p -m 644 %{SOURCE5} .
+install -p -m 755 %{SOURCE6} .
 
 %build
 
@@ -68,6 +69,7 @@ install -p -m 755 %{SOURCE3} $RPM_BUILD_ROOT%{lib_dir}/
 install -p -m 755 %{SOURCE4} $RPM_BUILD_ROOT%{lib_dir}/
 install -p -m 755 %{SOURCE5} $RPM_BUILD_ROOT%{lib_dir}/
 install -p -m 644 %{SOURCE2} $RPM_BUILD_ROOT/etc/default/bigtop-utils
+install -p -m 644 %{SOURCE6} $RPM_BUILD_ROOT%{lib_dir}/
 
 %clean
 rm -rf $RPM_BUILD_ROOT
@@ -83,4 +85,3 @@ rm -rf $RPM_BUILD_ROOT
 
 %changelog
 
-

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d7e9834d/bigtop-packages/src/rpm/hadoop/SPECS/hadoop.spec
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/rpm/hadoop/SPECS/hadoop.spec 
b/bigtop-packages/src/rpm/hadoop/SPECS/hadoop.spec
index d21c38f..786e009 100644
--- a/bigtop-packages/src/rpm/hadoop/SPECS/hadoop.spec
+++ b/bigtop-packages/src/rpm/hadoop/SPECS/hadoop.spec
@@ -619,6 +619,7 @@ fi
 %attr(0755,hdfs,hadoop) %{state_hdfs}
 %attr(1777,hdfs,hadoop) %{state_hdfs}/cache
 %{lib_hadoop}/libexec/init-hdfs.sh
+%{lib_hadoop}/libexec/init-hcfs.json
 
 %files mapreduce
 %defattr(-,root,root)

http://git-wip-us.apache.org/repos/asf/bigtop/blob/d7e9834d/bigtop-tests/test-artifacts/package/src/main/resources/yum/bigtop-utils.xml
----------------------------------------------------------------------
diff --git 
a/bigtop-tests/test-artifacts/package/src/main/resources/yum/bigtop-utils.xml 
b/bigtop-tests/test-artifacts/package/src/main/resources/yum/bigtop-utils.xml
index c0e70a5..f5b7933 100644
--- 
a/bigtop-tests/test-artifacts/package/src/main/resources/yum/bigtop-utils.xml
+++ 
b/bigtop-tests/test-artifacts/package/src/main/resources/yum/bigtop-utils.xml
@@ -2,7 +2,8 @@
   <content>
     <config name='/etc/default/bigtop-utils' owners='1' perm='-rw-r--r--' 
user='root' group='root' />
     <file name='/usr/libexec/bigtop-detect-javahome' owners='1' 
perm='-rwxr-xr-x' user='root' group='root' />
+    <file name='/usr/lib/bigtop-utils/init-hcfs.groovy' owners='1' 
perm='-rw-r--r--' user='root' group='root' />
     <file name='/usr/share/doc/bigtop-utils-BIGTOP-PACKAGE-VERSION' owners='1' 
perm='drwxr-xr-x' user='root' group='root' />
     <doc  name='/usr/share/doc/bigtop-utils-BIGTOP-PACKAGE-VERSION/LICENSE' 
owners='1' perm='-rw-r--r--' user='root' group='root' />
   </content>
-</bigtop-utils>
\ No newline at end of file
+</bigtop-utils>

bigtop git commit: BIGTOP-1235. Speed-up init-hdfs.sh for complete HCFS compliant provisioning

Reply via email to