svn commit: r892408 - in /hadoop/pig/trunk: ./ src/docs/src/documentation/content/xdocs/

olga Fri, 18 Dec 2009 16:01:39 -0800

Author: olga
Date: Sat Dec 19 00:01:11 2009
New Revision: 892408

URL: http://svn.apache.org/viewvc?rev=892408&view=rev
Log:
PIG-1163: Pig/Zebra 0.6.0 release (chandec via olgan)


Modified:
    hadoop/pig/trunk/CHANGES.txt
    hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/site.xml
    
hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_mapreduce.xml
    hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_overview.xml
    hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_pig.xml
    
hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_reference.xml
    hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_stream.xml
    hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_users.xml

Modified: hadoop/pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=892408&r1=892407&r2=892408&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Sat Dec 19 00:01:11 2009
@@ -24,6 +24,8 @@
 
 IMPROVEMENTS
 
+PIG-1163: Pig/Zebra 0.6.0 release (chandec via olgan)
+
 PIG-1156: Add aliases to ExecJobs and PhysicalOperators (dvryaboy via gates)
 
 PIG-1161: add missing license headers (dvryaboy via olgan)

Modified: hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/site.xml
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/site.xml?rev=892408&r1=892407&r2=892408&view=diff
==============================================================================
--- hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/site.xml 
(original)
+++ hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/site.xml Sat Dec 
19 00:01:11 2009
@@ -45,24 +45,24 @@
     <tutorial label="Tutorial"                                 
href="tutorial.html" />
     </docs>  
      <docs label="Guides"> 
-    <piglatin label="Pig Latin Users " href="piglatin_users.html" />
-    <piglatin label="Pig Latin Reference"      href="piglatin_reference.html" 
/>
+    <plusers label="Pig Latin Users "  href="piglatin_users.html" />
+    <plref label="Pig Latin Reference" href="piglatin_reference.html" />
     <cookbook label="Cookbook"                 href="cookbook.html" />
     <udf label="UDFs" href="udf.html" />
     </docs>  
     <docs label="Zebra"> 
-     <piglatin label="Zebra Overview " href="zebra_overview.html" />
-     <piglatin label="Zebra Users "    href="zebra_users.html" />
-     <piglatin label="Zebra Reference "        href="zebra_reference.html" />
-     <piglatin label="Zebra MapReduce "        href="zebra_mapreduce.html" />
-     <piglatin label="Zebra Pig "      href="zebra_pig.html" />
-     <piglatin label="Zebra Streaming "        href="zebra_stream.html" />
+     <zover label="Zebra Overview "    href="zebra_overview.html" />
+     <zusers label="Zebra Users "      href="zebra_users.html" />
+     <zref label="Zebra Reference "    href="zebra_reference.html" />
+     <zmr label="Zebra MapReduce "     href="zebra_mapreduce.html" />
+     <zpig label="Zebra Pig "              href="zebra_pig.html" />
+     <zstream label="Zebra Streaming " href="zebra_stream.html" />
     </docs>
      <docs label="Miscellaneous"> 
      <api      label="API Docs"                                        
href="ext:api"/>
-    <wiki  label="Wiki"                                href="ext:wiki" />
-    <faq  label="FAQ"                                  href="ext:faq" />
-    <relnotes  label="Release Notes"   href="ext:relnotes" />
+     <wiki  label="Wiki"                                       href="ext:wiki" 
/>
+     <faq  label="FAQ"                                 href="ext:faq" />
+     <relnotes  label="Release Notes"  href="ext:relnotes" />
     </docs>
 
  <external-refs> 

Modified: 
hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_mapreduce.xml
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_mapreduce.xml?rev=892408&r1=892407&r2=892408&view=diff
==============================================================================
--- 
hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_mapreduce.xml 
(original)
+++ 
hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_mapreduce.xml 
Sat Dec 19 00:01:11 2009
@@ -149,6 +149,7 @@
         throw new RuntimeException(e);
       }
     }
+
   }
 
   static class ProjectionMap extends MapReduceBase implements 
Mapper&lt;BytesWritable, Tuple, Text, IntWritable&gt; {
@@ -209,21 +210,22 @@
     jobConf.setOutputFormat(BasicTableOutputFormat.class);
     BasicTableOutputFormat.setOutputPath(jobConf, new Path(args[1]));
 
-    // set the output info:
-    // ZebraSchema - 2 logical columns
-    // ZebraStorageHint - 2 physical column groups (one column each)
-    // ZebraSortInfo - unsorted table (null)
+    // set the storage info of logical schema with 2 columns;
+    // and create 2 physical column groups;
+    // unsorted table
     
     BasicTableOutputFormat.setStorageInfo(jobConf, 
         ZebraSchema.createZebraSchema("word:string, count:int"),
-        ZebraStorageHint.createZebraStorageHint("[word];[count]"), 
-        null);
+        ZebraStorageHint.createZebraStorageHint("[word];[count]"), null);
 
     // set map-only job.
     jobConf.setNumReduceTasks(0);
 
     // Run Job
     JobClient.runJob(jobConf);
+    
+    // Need to close Zebra output streams
+    BasicTableOutputFormat.close(jobConf);
 
     /*
       Second MR Job for Table Projection of count column
@@ -264,7 +266,13 @@
   <!-- ZEBRA OUTPUT EXAMPLE-->
 <section>
 <title>Table Input/Output Formats</title> 
-<p>This MapReduce example demonstrates the Zebra table input/output formats. 
</p>
+<p>
+This MapReduce examples demonstrates how to perform a simple union. 
+To run this program, we need two basic tables that contain 
+the data as in the example above (word, count). In this example they are: 
+/user/mapredu/t1 and  /user/mapredu/t2. The resulting table is 
/user/mapredu2/t.
+</p>
+
  <source>
  package org.apache.hadoop.zebra.mapred;
 
@@ -286,13 +294,7 @@
 import org.apache.hadoop.zebra.types.TypesUtils;
 import org.apache.pig.data.Tuple;
 
-/**
- * This is a sample to show using zebra table to do a simple basic union in
- * map/reduce * To run this, we need have two basic tables ready. They contain
- * the data as in Sample 1, i.e., (word, count). In this example, they are at:
- * /user/mapredu/t1 /user/mapredu/t2 The resulting table is put at: 
/user/mapredu2/t1
- * 
- */
+
 public class TableMRSample2 {
   static class MapClass implements
       Mapper&lt;BytesWritable, Tuple, BytesWritable, Tuple&gt; {

Modified: 
hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_overview.xml
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_overview.xml?rev=892408&r1=892407&r2=892408&view=diff
==============================================================================
--- 
hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_overview.xml 
(original)
+++ 
hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_overview.xml 
Sat Dec 19 00:01:11 2009
@@ -42,8 +42,8 @@
    <title>Prerequisites</title> 
    <p>Zebra requires:</p>
    <ul>
-   <li>Pig 0.6.0 </li>
-   <li>Hadoop 0.20.1</li>
+   <li>Pig 0.6.0 or later</li>
+   <li>Hadoop 0.20.1 or later</li>
    </ul>
    <p></p>
    <p>Also, make sure the following software is installed on your system:</p>

Modified: 
hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_pig.xml
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_pig.xml?rev=892408&r1=892407&r2=892408&view=diff
==============================================================================
--- hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_pig.xml 
(original)
+++ hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_pig.xml Sat 
Dec 19 00:01:11 2009
@@ -73,7 +73,7 @@
 A = LOAD 'studenttab, votertab' USING 
org.apache.hadoop.zebra.pig.TableLoader();
 </source>
 
-<p>TableLoader supports efficient column selection; projections are 
automatically push down to the loader. This example tells the loader to only 
return two columns, name and age.</p>
+<p>TableLoader supports efficient column selection; projections are 
automatically pushed down to the loader. This example tells the loader to only 
return two columns, name and age.</p>
 <source>
 A = LOAD 'studenttab' USING org.apache.hadoop.zebra.pig.TableLoader('name, 
age');
 </source>
@@ -84,7 +84,7 @@
  <section>
  <title>Map-Side Group and Merge Join</title>
 
-<p>If the input data is globally sorted, map-side group or merge join can be 
used. Please, notice the âsortedâ argument passed to the loader. This lets 
the loader know that the data is expected to be globally sorted and that a 
single key must be given to the same map.</p>
+<p>If the input data is globally sorted, merge join and map-side group can be 
used. Please note the âsortedâ argument that is passed to the loader. This 
lets the loader know that the data is expected to be globally sorted and that a 
single key must be given to the same map.</p>
 
 <p>Here is an example of the merge join. Note that the first argument to the 
loader is left empty to indicate that all columns are requested.</p>
 <source>
@@ -93,7 +93,7 @@
 G = JOIN A BY $0, B By $0 USING "merge";
 </source>
 
-<p>Here is an example of a map-side group. Note that multiple sorted files are 
passed to the loader and that the loader will perform sort preserving merge to 
make sure that the data is globally sorted.</p>
+<p>Here is an example of a map-side group. Note that multiple sorted files are 
passed to the loader and that the loader will perform sort-preserving merge to 
make sure that the data is globally sorted.</p>
 <source>
 A = LOAD 'studentsortedtab, studentnullsortedtab' using 
org.apache.hadoop.zebra.pig.TableLoader('name, age, gpa, source_table', 
'sorted');
 B = GROUP A BY $0 USING "collected";

Modified: 
hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_reference.xml
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_reference.xml?rev=892408&r1=892407&r2=892408&view=diff
==============================================================================
--- 
hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_reference.xml 
(original)
+++ 
hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_reference.xml 
Sat Dec 19 00:01:11 2009
@@ -150,7 +150,7 @@
 (<em>In a future release, the schema will also support type compatibility 
between Zebra/Pig-SQL and will guide the underlying serialization formats 
provided by Avro for projection, filtering, and so on. </em>)
 </p>   
    
-   <p>The basic format for the the store schema is shown here. 
+   <p>The basic format for the store schema is shown here. 
    The type name is optional; if not specified, the column defaults to type 
bytes.</p>
    <p>
 <code>
@@ -453,7 +453,7 @@
    <p>The Zebra load schema is load or read table columns. </p>
      <section>
    <title>Schema</title>
-   <p>The basic format for the the Zebra load (read) schema is shown here. The 
column name can be any valid Zebra type.  
+   <p>The basic format for the Zebra load (read) schema is shown here. The 
column name can be any valid Zebra type.  
    If no columns are specified, the entire Zebra table is loaded.</p>
    <p>
 <code>

Modified: 
hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_stream.xml
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_stream.xml?rev=892408&r1=892407&r2=892408&view=diff
==============================================================================
--- hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_stream.xml 
(original)
+++ hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_stream.xml 
Sat Dec 19 00:01:11 2009
@@ -105,7 +105,7 @@
    <section>
    <title>Locating Frequently Visited Pages</title>
    
- <p>This perl script sorts the pages on number of page view counts. The script 
outputs space padded count 
+ <p>This Perl script sorts the pages on number of page view counts. The script 
outputs space padded count 
  so that string sorting results in correct output. The first TAB separates the 
key and value for Hadoop streaming.</p>
 
 <source>

Modified: 
hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_users.xml
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_users.xml?rev=892408&r1=892407&r2=892408&view=diff
==============================================================================
--- hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_users.xml 
(original)
+++ hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/zebra_users.xml 
Sat Dec 19 00:01:11 2009
@@ -32,31 +32,24 @@
    <title>Column Security</title>
    <p><strong>NOTE: THIS FEATURE IS EXPERIMENTAL AND SUBJECT TO CHANGE IN THE 
FUTURE</strong></p>
    
- <p>Since Zebra provides columnar storage of user data, we intend to separate 
secure and non-secure data into separate columns. We can then have access 
control based on HDFS file systesm based security. This would be achieved by an 
administrator setting appropriate permissions on the HDFS files contaning 
secure data. </p>
+ <p>Since Zebra supports reading and writing data in a column-oriented 
fashion, you can store secure and non-secure data in separate columns.
+ Then, using the HDFS file system, you can enable access control by setting 
the appropriate permissions on the columns containing secure data.
+ </p>
  
- <section>
- <title>Design Issues</title>
-<p>Roles:</p> 
-<ul>
-<li>Publishers of secure data </li>
-<li>Consumers of secure data </li>
-<li>Administrators of secure data </li>
+ <p>About the data:</p>
+ <ul>
+<li>All the files and directories containing secure data will have the same 
permissions and groups within a table.  </li>
+<li>If no security information is provided, then the HDFS file system default 
behavior applies.</li>
 </ul>
-
- <p>How it will work:</p>
+<p></p>
+ <p>About the users:</p>
  <ul>
+<li>The user who creates the data will set the file permissions.</li> 
+<li>If a permissions-related error happens, it will be communicated to the 
user as a normal IO exception. </li>
+<li>A user running a client application needs to have chgrp permissions to 
execute the "secure by group" operations on a table. </li>
+<li>If a user running a client application does not have read permissions for 
a secure column group, an IO exception is issued.</li>
 
-<li>Before any data is written in tables, all the corresponding files and 
directories need to have right set of ownership and permissions. 
-This is necessary because if all the data is written and then the operation to 
make it secure is executed, that can leave security holes and paranoid will not 
allow that.</li>
-
-<li>All the files and direcories containing secure data will have same 
permissions and groups within a table </li>
-<li>User of the MR Job/Pig Script is required to have permissions to execute 
chgrp operations on a table. </li>
-<li>If no security information provided, then default behaviour.</li>
-<li>If permissions related error happens, it will be communicated to user as 
normal IOException </li>
-<li>If reader does not have read permissions for a column (CG), an IOException 
will be thrown </li>
-<li>The publisher/creator of the files will set these permissions when 
creating data.</li> 
 </ul>
- </section>
 
 <p></p>   
 <p>One simple Pig example:</p>
@@ -70,7 +63,7 @@
 zStorageHint = ZebraStorageHint.createZebraStorageHint(â[a, b] secure by 
group:secure perm:640â);
 zSchema = â¦;
 zSortInfo = â¦;
-setStorageInfo(jobConf, zSchema, zStorageHint, zSortInf);
+setStorageInfo(jobConf, zSchema, zStorageHint, zSortInfo);
 </source>
     </section>
   <!-- END COLUMN SECURITY -->  
@@ -80,20 +73,20 @@
    <title>Drop Column Groups</title>
    <p><strong>NOTE: THIS FEATURE IS EXPERIMENTAL AND SUBJECT TO CHANGE IN THE 
FUTURE</strong></p>
    
-   <p>Zebra allows you to delete a column group using the column group name. 
+   <p>Zebra allows you to delete a column group (CG) using the column group 
name. 
    For examples, see <a href="zebra_mapreduce.html#Drop+Column+Groups">Drop 
Column Groups</a>.  </p>
   
 <p>Please note the following:</p>
 <ul>
 <li>Any failures during a drop will leave the table in consistent state 
(either with or with out the column group). 
-While success of a CG removal guarantees a column removal, a failure does not 
imply CG is not removed. 
+While success of a column group removal guarantees a column removal, a failure 
does not imply the column group is not removed. 
 In rare cases, you might receive an error but the column could still be 
deleted. </li>
 <li>MapReduce jobs and other clients that are currently accessing the table 
might fail with exceptions. 
-It is recommended that the column groups are dropped when there are no 
accesses to a table. 
+It is recommended that column groups be dropped when there are no accesses to 
a table. 
 It might not be feasible to ensure that there are no readers for a table; in 
these cases the readers should handle the exception. </li>
-<li>Once a column group is dropped, the column gruop data is deleted from the 
underlying filesystem. 
-In the case of HDFS, it may not imply that physical data is actually removed 
because of earlier snapshot of the filesystem; handling this is out side the 
scope of Zebra. Legal requirements might require an admin finalize HDFS (if it 
is not already finalized) before or after performing a deletion. </li>
-<li>Concurrent deletions are supported and their access is serialized. </li>
+<li>Once a column group is dropped, the column group data is deleted from the 
underlying file system. 
+In the case of the HDFS filesystem, it may not imply that physical data is 
actually removed because of earlier snapshot of the file system; handling this 
is out side the scope of Zebra.</li>
+<li>Concurrent column group deletions are supported and their access is 
serialized. </li>
 <li>Deleting a non-existant column group or a column group that is already 
deleted is not allowed.</li>
 <li>If you delete all the remaining columns in a table, it logically leaves an 
empty null table. The difference between a non-existant table and a table with 
zero columns is that opening a non-existant table causes an error. </li>
  </ul>  
@@ -104,19 +97,13 @@
     <section>
    <title>Order-Preserving Sorted Table Union</title>
 <p>
-This Zebra functionality is only available on underlying sorted Zebra tables. 
-</p>
-
-<section>
-<title>Output Records</title>
-<p>
-This feature groups all records from all "delta tables" on some sort key to 
form an output set of records while preserving the sorted ordering of the 
records in the origional tables. For instance, if the client application wants 
to fetch records from a union of tables of T1, T2 on a column "c1", then all 
records from T1 with a particular value of column "c1" and all records from T2 
with that value of column "c1" will be output. The ordering of the rows of the 
output set of the same value of column "c1" is undefined. As a prerequisite, 
both T1 and T2 must be sorted on column "c1". More specifically the input and 
results could be as follows: 
+With Zebra you can group all records from all "delta tables" on some sort key 
to form an output set of records while preserving the sorted ordering of the 
records in the original tables. For instance, if the client application wants 
to fetch records from a union of tables of T1, T2 on a column "C1", then all 
records from T1 with a particular value of column "c1" and all records from T2 
with that value of column "C1" will be output. The ordering of the rows of the 
output set of the same value of column "C1" is undefined. As a prerequisite, 
both T1 and T2 must be sorted on column "C1". More specifically the input and 
results could be as follows: 
 </p>
 
 <p>Table T1: </p>
 <source>
 C1    C2  
---------------
+------------- 
 A     11  
 A     12  
 B     21  
@@ -137,7 +124,7 @@
 <p>T1 Sort-Unioned with T2: </p>
 <source>
 source_table    C1     C2
----------------------------------------
+------------------------------
 0                A      11  
 1                A      101  
 0                A      12  
@@ -148,27 +135,27 @@
 1                D      401  
 0                D      41  
 </source>
-</section>
+
 
 <p>
-Note that the sortness is guaranteed per mapper and among all mappers arranged 
with certain ordering, but not among mappers arranged in any ordering. For 
instance, the outputs generated by 4 mappers, m1, m2, m3 and m4, could be in 
total ordering between m1, m3, m2 and m4, but not in any other arrangements. 
+Note that the sortness is guaranteed per mapper and among all mappers arranged 
with certain ordering, but not among mappers arranged in any ordering. For 
instance, the outputs generated by four mappers, m1, m2, m3 and m4, could be in 
total ordering between m1, m3, m2 and m4, but not in any other arrangements. 
 </p>
 
 <section>
-<title>Grouping and Indexing of Sort-Unioned Output Rows </title>
+<title>Indexing Sort-Unioned  Results</title>
 <p>
-The order preserving sort-unioned results above could be further indexed by 
the component tables if the projection contains column(s) named "source_table". 
If so specified, the component table index will be output at the position(s) as 
specified in the projection list. If the underlying table is not a union of 
sorted tables, use of the special column name in projection will cause an 
exception thrown. 
-</p>
+The order-preserving sort-unioned results above can be further indexed by the 
component tables if the projection contains column(s) named "source_table". If 
so specified, the component table index is output at the position(s) as 
specified in the projection list. 
 
-<p>
-If an attempt is made to create a table of a column named "source_table", an 
excpetion will be thrown as the name is reserved by zebra for the virtual name. 
+If the underlying table is not a union of sorted tables, the use of the 
special column name in a projection will cause an exception. 
+
+If an attempt is made to create a table of a column named "source_table", an 
exception will be thrown as the name is reserved by zebra for the virtual name. 
 </p>
 </section>
 
 <section>
-<title>MapReduce Interface </title>
+<title>MapReduce Jobs</title>
 <p>
-TableInputFormat will have a static method, requireSortedTable, that allows 
the caller to specify the behavior of a single sorted table or an order 
preserving sorted table union as described above. The method will ensure all 
tables in a union are sorted. For  more information, see <a 
href="zebra_reference.html#TableInputFormat">TableInputFormat</a>.
+TableInputFormat has static method, requireSortedTable, that allows the caller 
to specify the behavior of a single sorted table or an order-preserving sorted 
table union as described above. The method ensures all tables in a union are 
sorted. For more information, see <a 
href="zebra_reference.html#TableInputFormat">TableInputFormat</a>.
 </p>
 
 <p>One simple example: A order-preserving sorted union B. A and B are sorted 
tables. </p>
@@ -182,8 +169,8 @@
 </section>
 
 <section>
-<title>Pig Interface </title>
-<p>Pig will take an extra string argument of "sorted" indicating the desire to 
load from a sorted table or an order preserving sorted table union. 
+<title>Pig Scripts</title>
+<p>Pig takes an extra string argument of "sorted" indicating the desire to 
load from a sorted table or an order-preserving sorted table union. 
 For  more information, see <a href="zebra_pig.html#Zebra+Pig+Examples">Zebra 
Pig Examples</a>.</p> 
 
 <p>One simple example:</p> 
@@ -193,8 +180,7 @@
 ...
 </source>
   </section>
-  
-</section>
+  </section>
 <!-- END ORDER PRESERVE SORT-->   
 
   <!--MERGE JOIN-->
@@ -206,23 +192,22 @@
    
 <p>One simple example:</p> 
 <source>
-Class myMapper {
-â¦
-Object keyGenerator;
-â¦
-
-public void map(â¦) {
-   bytesKey = BasicTableOutputFormat.getSortKey(keyGenerator, userKey);
-   â¦
-   output.collect(bytesKey, valueTuple);
-   â¦
+class myMapper extends Mapper&lt;â¦&gt; {
+  â¦
+  Object keyGenerator;
+  â¦
+  public void map(â¦) {
+     bytesKey = BasicTableOutputFormat.getSortKey(keyGenerator, userKey);
+     â¦
+     output.collect(bytesKey, valueTuple);
+     â¦
+  }
+  public void configure(JobConf job) {
+     keyGenerator = BasicTableOutputFormat.getSortKeyGenerator(job);
+     â¦
+  }
 }
 
-public void configure(JobConf job)
-{
-    keyGenerator = BasicTableOutputFormat.getSortKeyGenerator(job);
-â¦
-}
 </source>   
    
    </section>

svn commit: r892408 - in /hadoop/pig/trunk: ./ src/docs/src/documentation/content/xdocs/

Reply via email to