TableMapReduceUtil.java

stack Tue, 19 Oct 2010 09:45:05 -0700

Author: stack
Date: Tue Oct 19 16:44:06 2010
New Revision: 1024317

URL: http://svn.apache.org/viewvc?rev=1024317&view=rev
Log:
HBASE-3076 Allow to disable automatic shipping of dependency jars for mapreduce 
jobs


Modified:
    hbase/trunk/CHANGES.txt
    
hbase/trunk/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java
    
hbase/trunk/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java

Modified: hbase/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hbase/trunk/CHANGES.txt?rev=1024317&r1=1024316&r2=1024317&view=diff
==============================================================================
--- hbase/trunk/CHANGES.txt (original)
+++ hbase/trunk/CHANGES.txt Tue Oct 19 16:44:06 2010
@@ -1014,6 +1014,8 @@ Release 0.21.0 - Unreleased
    HBASE-3097  Merge in hbase-1200 doc on bloomfilters into hbase book
    HBASE-2700  Test of: Handle master failover for regions in transition
    HBASE-3115  HBaseClient wastes 1 TCP packet per RPC
+   HBASE-3076  Allow to disable automatic shipping of dependency jars
+               for mapreduce jobs (Bruno Dumon)
 
   NEW FEATURES
    HBASE-1961  HBase EC2 scripts

Modified: 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java
URL: 
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java?rev=1024317&r1=1024316&r2=1024317&view=diff
==============================================================================
--- 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java
 (original)
+++ 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java
 Tue Oct 19 16:44:06 2010
@@ -56,6 +56,26 @@ public class TableMapReduceUtil {
     Class<? extends TableMap> mapper,
     Class<? extends WritableComparable> outputKeyClass,
     Class<? extends Writable> outputValueClass, JobConf job) {
+    initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, 
job, true);
+  }
+
+  /**
+   * Use this before submitting a TableMap job. It will
+   * appropriately set up the JobConf.
+   *
+   * @param table  The table name to read from.
+   * @param columns  The columns to scan.
+   * @param mapper  The mapper class to use.
+   * @param outputKeyClass  The class of the output key.
+   * @param outputValueClass  The class of the output value.
+   * @param job  The current job configuration to adjust.
+   * @param addDependencyJars upload HBase jars and jars for any of the 
configured
+   *           job classes via the distributed cache (tmpjars).
+   */
+  public static void initTableMapJob(String table, String columns,
+    Class<? extends TableMap> mapper,
+    Class<? extends WritableComparable> outputKeyClass,
+    Class<? extends Writable> outputValueClass, JobConf job, boolean 
addDependencyJars) {
 
     job.setInputFormat(TableInputFormat.class);
     job.setMapOutputValueClass(outputValueClass);
@@ -63,10 +83,12 @@ public class TableMapReduceUtil {
     job.setMapperClass(mapper);
     FileInputFormat.addInputPaths(job, table);
     job.set(TableInputFormat.COLUMN_LIST, columns);
-    try {
-      addDependencyJars(job);
-    } catch (IOException e) {
-      e.printStackTrace();
+    if (addDependencyJars) {
+      try {
+        addDependencyJars(job);
+      } catch (IOException e) {
+        e.printStackTrace();
+      }
     }
   }
 
@@ -99,6 +121,25 @@ public class TableMapReduceUtil {
   public static void initTableReduceJob(String table,
     Class<? extends TableReduce> reducer, JobConf job, Class partitioner)
   throws IOException {
+    initTableReduceJob(table, reducer, job, partitioner, true);
+  }
+
+  /**
+   * Use this before submitting a TableReduce job. It will
+   * appropriately set up the JobConf.
+   *
+   * @param table  The output table.
+   * @param reducer  The reducer class to use.
+   * @param job  The current job configuration to adjust.
+   * @param partitioner  Partitioner to use. Pass <code>null</code> to use
+   * default partitioner.
+   * @param addDependencyJars upload HBase jars and jars for any of the 
configured
+   *           job classes via the distributed cache (tmpjars).
+   * @throws IOException When determining the region count fails.
+   */
+  public static void initTableReduceJob(String table,
+    Class<? extends TableReduce> reducer, JobConf job, Class partitioner,
+    boolean addDependencyJars) throws IOException {
     job.setOutputFormat(TableOutputFormat.class);
     job.setReducerClass(reducer);
     job.set(TableOutputFormat.OUTPUT_TABLE, table);
@@ -114,7 +155,9 @@ public class TableMapReduceUtil {
     } else if (partitioner != null) {
       job.setPartitionerClass(partitioner);
     }
-    addDependencyJars(job);
+    if (addDependencyJars) {
+      addDependencyJars(job);
+    }
   }
 
   /**

Modified: 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java
URL: 
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java?rev=1024317&r1=1024316&r2=1024317&view=diff
==============================================================================
--- 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java
 (original)
+++ 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java
 Tue Oct 19 16:44:06 2010
@@ -70,6 +70,31 @@ public class TableMapReduceUtil {
       Class<? extends WritableComparable> outputKeyClass,
       Class<? extends Writable> outputValueClass, Job job)
   throws IOException {
+    initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass,
+        job, true);
+  }
+
+  /**
+   * Use this before submitting a TableMap job. It will appropriately set up
+   * the job.
+   *
+   * @param table  The table name to read from.
+   * @param scan  The scan instance with the columns, time range etc.
+   * @param mapper  The mapper class to use.
+   * @param outputKeyClass  The class of the output key.
+   * @param outputValueClass  The class of the output value.
+   * @param job  The current job to adjust.  Make sure the passed job is
+   * carrying all necessary HBase configuration.
+   * @param addDependencyJars upload HBase jars and jars for any of the 
configured
+   *           job classes via the distributed cache (tmpjars).
+   * @throws IOException When setting up the details fails.
+   */
+  public static void initTableMapperJob(String table, Scan scan,
+      Class<? extends TableMapper> mapper,
+      Class<? extends WritableComparable> outputKeyClass,
+      Class<? extends Writable> outputValueClass, Job job,
+      boolean addDependencyJars)
+  throws IOException {
     job.setInputFormatClass(TableInputFormat.class);
     if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass);
     if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass);
@@ -77,7 +102,9 @@ public class TableMapReduceUtil {
     job.getConfiguration().set(TableInputFormat.INPUT_TABLE, table);
     job.getConfiguration().set(TableInputFormat.SCAN,
       convertScanToString(scan));
-    addDependencyJars(job);
+    if (addDependencyJars) {
+      addDependencyJars(job);
+    }
   }
 
   /**
@@ -167,6 +194,38 @@ public class TableMapReduceUtil {
     Class<? extends TableReducer> reducer, Job job,
     Class partitioner, String quorumAddress, String serverClass,
     String serverImpl) throws IOException {
+    initTableReducerJob(table, reducer, job, partitioner, quorumAddress,
+        serverClass, serverImpl, true);
+  }
+
+  /**
+   * Use this before submitting a TableReduce job. It will
+   * appropriately set up the JobConf.
+   *
+   * @param table  The output table.
+   * @param reducer  The reducer class to use.
+   * @param job  The current job to adjust.  Make sure the passed job is
+   * carrying all necessary HBase configuration.
+   * @param partitioner  Partitioner to use. Pass <code>null</code> to use
+   * default partitioner.
+   * @param quorumAddress Distant cluster to write to; default is null for
+   * output to the cluster that is designated in <code>hbase-site.xml</code>.
+   * Set this String to the zookeeper ensemble of an alternate remote cluster
+   * when you would have the reduce write a cluster that is other than the
+   * default; e.g. copying tables between clusters, the source would be
+   * designated by <code>hbase-site.xml</code> and this param would have the
+   * ensemble address of the remote cluster.  The format to pass is particular.
+   * Pass <code> &lt;hbase.zookeeper.quorum> ':' 
&lt;ZOOKEEPER_ZNODE_PARENT></code>.
+   * @param serverClass redefined hbase.regionserver.class
+   * @param serverImpl redefined hbase.regionserver.impl
+   * @param addDependencyJars upload HBase jars and jars for any of the 
configured
+   *           job classes via the distributed cache (tmpjars).
+   * @throws IOException When determining the region count fails.
+   */
+  public static void initTableReducerJob(String table,
+    Class<? extends TableReducer> reducer, Job job,
+    Class partitioner, String quorumAddress, String serverClass,
+    String serverImpl, boolean addDependencyJars) throws IOException {
 
     Configuration conf = job.getConfiguration();
     job.setOutputFormatClass(TableOutputFormat.class);
@@ -198,7 +257,10 @@ public class TableMapReduceUtil {
     } else if (partitioner != null) {
       job.setPartitionerClass(partitioner);
     }
-    addDependencyJars(job);
+
+    if (addDependencyJars) {
+      addDependencyJars(job);
+    }
   }
 
   /**

svn commit: r1024317 - in /hbase/trunk: CHANGES.txt src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java

Reply via email to