svn commit: r1144117 - in /incubator/hcatalog/branches/branch-0.1: ./ src/docs/ src/docs/src/documentation/content/xdocs/ src/java/org/apache/hcatalog/mapreduce/ src/java/org/apache/hcatalog/pig/

hashutosh Thu, 07 Jul 2011 18:18:22 -0700

Author: hashutosh
Date: Fri Jul  8 01:17:52 2011
New Revision: 1144117

URL: http://svn.apache.org/viewvc?rev=1144117&view=rev
Log:
HCATALOG-54 Javadoc is not being built as part of HCatalog docs


Added:
    incubator/hcatalog/branches/branch-0.1/src/docs/overview.html
Modified:
    incubator/hcatalog/branches/branch-0.1/CHANGES.txt
    incubator/hcatalog/branches/branch-0.1/build.xml
    
incubator/hcatalog/branches/branch-0.1/src/docs/src/documentation/content/xdocs/site.xml
    
incubator/hcatalog/branches/branch-0.1/src/java/org/apache/hcatalog/mapreduce/HCatInputStorageDriver.java
    
incubator/hcatalog/branches/branch-0.1/src/java/org/apache/hcatalog/mapreduce/HCatTableInfo.java
    
incubator/hcatalog/branches/branch-0.1/src/java/org/apache/hcatalog/mapreduce/JobInfo.java
    
incubator/hcatalog/branches/branch-0.1/src/java/org/apache/hcatalog/pig/PigHCatUtil.java

Modified: incubator/hcatalog/branches/branch-0.1/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.1/CHANGES.txt?rev=1144117&r1=1144116&r2=1144117&view=diff
==============================================================================
--- incubator/hcatalog/branches/branch-0.1/CHANGES.txt (original)
+++ incubator/hcatalog/branches/branch-0.1/CHANGES.txt Fri Jul  8 01:17:52 2011
@@ -15,6 +15,8 @@ Release 0.1.0
     (Krishna Kumar via macyang)
 
   IMPROVEMENTS
+    HCAT-54. Javadoc is not being built as part of HCatalog docs (hashutosh) 
+
     HCAT-44. Add a releaseaudit target to build.xml (gates)
 
     HCAT-40. Remove dependencies from the HCatalog client jar (macyang)

Modified: incubator/hcatalog/branches/branch-0.1/build.xml
URL: 
http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.1/build.xml?rev=1144117&r1=1144116&r2=1144117&view=diff
==============================================================================
--- incubator/hcatalog/branches/branch-0.1/build.xml (original)
+++ incubator/hcatalog/branches/branch-0.1/build.xml Fri Jul  8 01:17:52 2011
@@ -317,7 +317,7 @@
   Docs Section
   
================================================================================
   -->
-  <target name="docs" depends="forrest.check" description="Generate 
forrest-based documentation. To use, specify -Dforrest.home=&lt;base of Apache 
Forrest installation&gt; on the command line." if="forrest.home">
+  <target name="docs" depends="javadoc, forrest.check" description="Generate 
forrest-based documentation. To use, specify -Dforrest.home=&lt;base of Apache 
Forrest installation&gt; on the command line." if="forrest.home">
     <exec dir="${docs.src}" executable="${forrest.home}/bin/forrest"
           failonerror="true">
     </exec>
@@ -330,6 +330,18 @@
     <fail message="'forrest.home' is not defined. Please pass 
-Dforrest.home=&lt;base of Apache Forrest installation&gt; to Ant on the 
command-line." />
   </target>
 
+  <target name="javadoc" depends="clientjar" description="Create 
documentation">
+      <mkdir dir="${build.javadoc}" />
+      <javadoc overview="${src.dir}/../docs/overview.html" 
packagenames="org.apache.hcatalog.*" destdir="${build.javadoc}" author="true" 
version="true" use="true" windowtitle="HCatalog ${hcatalog.version} API" 
doctitle="HCatalog ${hcatalog.version} API" bottom="Copyright &amp;copy; 
${year} The Apache Software Foundation">
+          <packageset dir="${src.dir}" />
+          <classpath>
+              <path refid="classpath" />
+          </classpath>
+          <group title="hcatalog" packages="org.apache.hcatalog.*" />
+      </javadoc>
+  </target>
+
+
   <!--
   
===============================================================================
   Distribution Section

Added: incubator/hcatalog/branches/branch-0.1/src/docs/overview.html
URL: 
http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.1/src/docs/overview.html?rev=1144117&view=auto
==============================================================================
--- incubator/hcatalog/branches/branch-0.1/src/docs/overview.html (added)
+++ incubator/hcatalog/branches/branch-0.1/src/docs/overview.html Fri Jul  8 
01:17:52 2011
@@ -0,0 +1,116 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" 
"http://www.w3.org/TR/html4/loose.dtd";>
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<title>Overview </title>
+</head>
+<body> 
+<h1>Overview </h1>
+<div id="front-matter">
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#HCatalog">HCatalog </a>
+</li>
+<li>
+<a href="#HCatalog+Architecture">HCatalog Architecture</a>
+<ul class="minitoc">
+<li>
+<a href="#Interfaces">Interfaces</a>
+</li>
+<li>
+<a href="#Data+Model">Data Model</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#Data+Flow+Example">Data Flow Example</a>
+</li>
+</ul>
+</div>
+</div>
+   
+<a name="HCatalog"></a>
+<h2 class="h3">HCatalog </h2>
+<div class="section">
+<p>HCatalog is a table management and storage management layer for Hadoop that 
enables users with different data processing tools &ndash; Pig, MapReduce, 
Hive, Streaming &ndash; to more easily read and write data on the grid. 
HCatalog&rsquo;s table abstraction presents users with a relational view of 
data in the Hadoop distributed file system (HDFS) and ensures that users need 
not worry about where or in what format their data is stored &ndash; RCFile 
format, text files, sequence files. </p>
+<p>(Note: In this release, Streaming is not supported. Also, HCatalog supports 
only writing RCFile formatted files and only reading PigStorage formated text 
files.)</p>
+<p></p>
+     
+      
+      
+<a name="HCatalog+Architecture"></a>
+<h2 class="h3">HCatalog Architecture</h2>
+<div class="section">
+<p>HCatalog is built on top of the Hive metastore and incorporates components 
from the Hive DDL. HCatalog provides read and write interfaces for Pig and 
MapReduce and a command line interface for data definitions.</p>
+<p>(Note: HCatalog notification is not available in this release.)</p>
+<p></p>
+<a name="Interfaces"></a>
+<h3 class="h4">Interfaces</h3>
+<p>The HCatalog interface for Pig &ndash; HCatLoader and HCatStorer &ndash; is 
an implementation of the Pig load and store interfaces. HCatLoader accepts a 
table to read data from; you can indicate which partitions to scan by 
immediately following the load statement with a partition filter statement. 
HCatStorer accepts a table to write to and a specification of partition keys to 
create a new partition. Currently HCatStorer only supports writing to one 
partition. HCatLoader and HCatStorer are implemented on top of HCatInputFormat 
and HCatOutputFormat respectively </p>
+<p>The HCatalog interface for MapReduce &ndash; HCatInputFormat and 
HCatOutputFormat &ndash; is an implementation of Hadoop InputFormat and 
OutputFormat. HCatInputFormat accepts a table to read data from and a selection 
predicate to indicate which partitions to scan. HCatOutputFormat accepts a 
table to write to and a specification of partition keys to create a new 
partition. Currently HCatOutputFormat only supports writing to one 
partition.</p>
+<p>
+<strong>Note:</strong> Currently there is no Hive-specific interface. Since 
HCatalog uses Hive's metastore, Hive can read data in HCatalog directly as long 
as a SerDe for that data already exists. In the future we plan to write a 
HCatalogSerDe so that users won't need storage-specific SerDes and so that Hive 
users can write data to HCatalog. Currently, this is supported - if a Hive user 
writes data in the RCFile format, it is possible to read the data through 
HCatalog. </p>
+<p>Data is defined using HCatalog's command line interface (CLI). The HCatalog 
CLI supports most of the DDL portion of Hive's query language, allowing users 
to create, alter, drop tables, etc. The CLI also supports the data exploration 
part of the Hive command line, such as SHOW TABLES, DESCRIBE TABLE, etc.</p>
+<a name="Data+Model"></a>
+<h3 class="h4">Data Model</h3>
+<p>HCatalog presents a relational view of data in HDFS. Data is stored in 
tables and these tables can be placed in databases. Tables can also be hash 
partitioned on one or more keys; that is, for a given value of a key (or set of 
keys) there will be one partition that contains all rows with that value (or 
set of values). For example, if a table is partitioned on date and there are 
three days of data in the table, there will be three partitions in the table. 
New partitions can be added to a table, and partitions can be dropped from a 
table. Partitioned tables have no partitions at create time. Unpartitioned 
tables effectively have one default partition that must be created at table 
creation time. There is no guaranteed read consistency when a partition is 
dropped.</p>
+<p>Partitions contain records. Once a partition is created records cannot be 
added to it, removed from it, or updated in it. (In the future some ability to 
integrate changes to a partition will be added.) Partitions are 
multi-dimensional and not hierarchical. Records are divided into columns. 
Columns have a name and a datatype. HCatalog supports the same datatypes as 
Hive. </p>
+</div>
+     
+  
+<a name="Data+Flow+Example"></a>
+<h2 class="h3">Data Flow Example</h2>
+<div class="section">
+<p>This simple data flow example shows how HCatalog is used to move data from 
the grid into a database. 
+  From the database, the data can then be analyzed using Hive.</p>
+<p>
+<strong>First</strong> Joe in data acquisition uses distcp to get data onto 
the grid.</p>
+<pre class="code">
+hadoop distcp file:///file.dat hdfs://data/rawevents/20100819/data
+
+hcat "alter table rawevents add partition 20100819 
hdfs://data/rawevents/20100819/data"
+</pre>
+<p>
+<strong>Second</strong> Sally in data processing uses Pig to cleanse and 
prepare the data.</p>
+<p>Without HCatalog, Sally must be manually informed by Joe that data is 
available, or use Oozie and poll on HDFS.</p>
+<pre class="code">
+A = load '/data/rawevents/20100819/data' as (alpha:int, beta:chararray, 
&hellip;);
+B = filter A by bot_finder(zeta) = 0;
+&hellip;
+store Z into 'data/processedevents/20100819/data';
+</pre>
+<p>With HCatalog, Oozie will be notified by HCatalog data is available and can 
then start the Pig job</p>
+<pre class="code">
+A = load 'rawevents' using HCatLoader;
+B = filter A by date = '20100819' and by bot_finder(zeta) = 0;
+&hellip;
+store Z into 'processedevents' using HCatStorer("date=20100819");
+</pre>
+<p>
+<strong>Third</strong> Robert in client management uses Hive to analyze his 
clients' results.</p>
+<p>Without HCatalog, Robert must alter the table to add the required 
partition. </p>
+<pre class="code">
+alter table processedevents add partition 20100819 
hdfs://data/processedevents/20100819/data
+
+select advertiser_id, count(clicks)
+from processedevents
+where date = '20100819' 
+group by adverstiser_id;
+</pre>
+<p>With HCatalog, Robert does not need to modify the table structure.</p>
+<pre class="code">
+select advertiser_id, count(clicks)
+from processedevents
+where date = &lsquo;20100819&rsquo; 
+group by adverstiser_id;
+</pre>
+</div>
+  
+<div class="copyright">
+        Copyright &copy;
+         2011 <a href="http://www.apache.org/licenses/";>The Apache Software 
Foundation</a>
+</div>
+</div>
+</body>
+</html>

Modified: 
incubator/hcatalog/branches/branch-0.1/src/docs/src/documentation/content/xdocs/site.xml
URL: 
http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.1/src/docs/src/documentation/content/xdocs/site.xml?rev=1144117&r1=1144116&r2=1144117&view=diff
==============================================================================
--- 
incubator/hcatalog/branches/branch-0.1/src/docs/src/documentation/content/xdocs/site.xml
 (original)
+++ 
incubator/hcatalog/branches/branch-0.1/src/docs/src/documentation/content/xdocs/site.xml
 Fri Jul  8 01:17:52 2011
@@ -45,6 +45,7 @@ See http://forrest.apache.org/docs/linki
     <index label="Cmd Line Interface " href="cli.html" />
     <index label="Supported data formats" href="supportedformats.html" />
     <index label="Installation" href="install.html" />
-    </docs>  
+    <api   label="API Docs" href="api/index.html"/>
+  </docs>  
 
 </site>

Modified: 
incubator/hcatalog/branches/branch-0.1/src/java/org/apache/hcatalog/mapreduce/HCatInputStorageDriver.java
URL: 
http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.1/src/java/org/apache/hcatalog/mapreduce/HCatInputStorageDriver.java?rev=1144117&r1=1144116&r2=1144117&view=diff
==============================================================================
--- 
incubator/hcatalog/branches/branch-0.1/src/java/org/apache/hcatalog/mapreduce/HCatInputStorageDriver.java
 (original)
+++ 
incubator/hcatalog/branches/branch-0.1/src/java/org/apache/hcatalog/mapreduce/HCatInputStorageDriver.java
 Fri Jul  8 01:17:52 2011
@@ -45,7 +45,7 @@ public abstract class HCatInputStorageDr
 
   /**
    * Returns the InputFormat to use with this Storage Driver.
-   * @param properties the properties containing parameters required for 
initialization of InputFormat
+   * @param howlProperties the properties containing parameters required for 
initialization of InputFormat
    * @return the InputFormat instance
    */
   public abstract InputFormat<? extends WritableComparable, ? extends 
Writable> getInputFormat(Properties howlProperties);
@@ -56,7 +56,8 @@ public abstract class HCatInputStorageDr
    * Implementers of StorageDriver should look to overwriting this function so 
as to convert their
    * value type to HowlRecord. Default implementation is provided for 
StorageDriver implementations
    * on top of an underlying InputFormat that already uses HowlRecord as a 
tuple
-   * @param value the underlying value to convert to HowlRecord
+   * @param baseKey
+   * @param baseValue the underlying value to convert to HowlRecord
    */
   public abstract HCatRecord convertToHCatRecord(WritableComparable baseKey, 
Writable baseValue) throws IOException;
 
@@ -130,7 +131,6 @@ public abstract class HCatInputStorageDr
    * the schema it has (like Zebra) or it will use this to create a HowlRecord 
matching the output schema.
    * @param jobContext the job context object
    * @param howlSchema the schema published in Howl for this data
-   * @param instantiationState
    * @throws IOException Signals that an I/O exception has occurred.
    */
   public abstract void setOriginalSchema(JobContext jobContext, HCatSchema 
howlSchema) throws IOException;
@@ -149,7 +149,6 @@ public abstract class HCatInputStorageDr
    * driver can add the partition key values to the output HowlRecord if the 
partition key values are not present on disk.
    * @param jobContext the job context object
    * @param partitionValues the partition values having a map with partition 
key name as key and the HowlKeyValue as value
-   * @param instantiationState
    * @throws IOException Signals that an I/O exception has occurred.
    */
   public abstract void setPartitionValues(JobContext jobContext, 
Map<String,String> partitionValues) throws IOException;

Modified: 
incubator/hcatalog/branches/branch-0.1/src/java/org/apache/hcatalog/mapreduce/HCatTableInfo.java
URL: 
http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.1/src/java/org/apache/hcatalog/mapreduce/HCatTableInfo.java?rev=1144117&r1=1144116&r2=1144117&view=diff
==============================================================================
--- 
incubator/hcatalog/branches/branch-0.1/src/java/org/apache/hcatalog/mapreduce/HCatTableInfo.java
 (original)
+++ 
incubator/hcatalog/branches/branch-0.1/src/java/org/apache/hcatalog/mapreduce/HCatTableInfo.java
 Fri Jul  8 01:17:52 2011
@@ -25,8 +25,8 @@ import org.apache.hadoop.hive.metastore.
 
 /**
  *
- * HCatTableInfo - class to communicate table information to {@link 
HowlInputFormat}
- * and {@link HowlOutputFormat}
+ * HCatTableInfo - class to communicate table information to {@link 
HCatInputFormat}
+ * and {@link HCatOutputFormat}
  *
  */
 public class HCatTableInfo implements Serializable {
@@ -67,7 +67,7 @@ public class HCatTableInfo implements Se
   private Map<String, String> partitionValues;
 
   /**
-   * Initializes a new HCatTableInfo instance to be used with {@link 
HowlInputFormat}
+   * Initializes a new HCatTableInfo instance to be used with {@link 
HCatInputFormat}
    * for reading data from a table.
    * @param serverUri the Metadata server uri
    * @param serverKerberosPrincipal If the howl server is configured to
@@ -86,7 +86,7 @@ public class HCatTableInfo implements Se
   }
 
   /**
-   * Initializes a new HCatTableInfo instance to be used with {@link 
HowlInputFormat}
+   * Initializes a new HCatTableInfo instance to be used with {@link 
HCatInputFormat}
    * for reading data from a table.
    * @param serverUri the Metadata server uri
    * @param serverKerberosPrincipal If the howl server is configured to
@@ -115,7 +115,7 @@ public class HCatTableInfo implements Se
       this.filter = filter;
   }
   /**
-   * Initializes a new HCatTableInfo instance to be used with {@link 
HowlOutputFormat}
+   * Initializes a new HCatTableInfo instance to be used with {@link 
HCatOutputFormat}
    * for writing data from a table.
    * @param serverUri the Metadata server uri
    * @param serverKerberosPrincipal If the howl server is configured to

Modified: 
incubator/hcatalog/branches/branch-0.1/src/java/org/apache/hcatalog/mapreduce/JobInfo.java
URL: 
http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.1/src/java/org/apache/hcatalog/mapreduce/JobInfo.java?rev=1144117&r1=1144116&r2=1144117&view=diff
==============================================================================
--- 
incubator/hcatalog/branches/branch-0.1/src/java/org/apache/hcatalog/mapreduce/JobInfo.java
 (original)
+++ 
incubator/hcatalog/branches/branch-0.1/src/java/org/apache/hcatalog/mapreduce/JobInfo.java
 Fri Jul  8 01:17:52 2011
@@ -40,7 +40,7 @@ public class JobInfo implements Serializ
 
     /**
      * Instantiates a new howl job info.
-     * @param tableName the table name
+     * @param howlTableInfo 
      * @param tableSchema the table schema
      * @param partitions the partitions
      */

Modified: 
incubator/hcatalog/branches/branch-0.1/src/java/org/apache/hcatalog/pig/PigHCatUtil.java
URL: 
http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.1/src/java/org/apache/hcatalog/pig/PigHCatUtil.java?rev=1144117&r1=1144116&r2=1144117&view=diff
==============================================================================
--- 
incubator/hcatalog/branches/branch-0.1/src/java/org/apache/hcatalog/pig/PigHCatUtil.java
 (original)
+++ 
incubator/hcatalog/branches/branch-0.1/src/java/org/apache/hcatalog/pig/PigHCatUtil.java
 Fri Jul  8 01:17:52 2011
@@ -229,7 +229,7 @@ public class PigHCatUtil {
   }
 
 /**
-   * @param type owl column type
+   * @param hfs schema of the column
    * @return corresponding pig type
    * @throws IOException
    */

svn commit: r1144117 - in /incubator/hcatalog/branches/branch-0.1: ./ src/docs/ src/docs/src/documentation/content/xdocs/ src/java/org/apache/hcatalog/mapreduce/ src/java/org/apache/hcatalog/pig/

Reply via email to