TableLoader.java

daijy Mon, 12 Apr 2010 14:06:45 -0700

Author: daijy
Date: Mon Apr 12 21:06:19 2010
New Revision: 933415

URL: http://svn.apache.org/viewvc?rev=933415&view=rev
Log:
PIG-1361: [Zebra] Zebra TableLoader.getSchema() should return the 
projectionSchema specified in the constructor of TableLoader instead of pruned 
proejction by pig


Modified:
    hadoop/pig/trunk/contrib/zebra/CHANGES.txt
    
hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/pig/TableLoader.java

Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/CHANGES.txt?rev=933415&r1=933414&r2=933415&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/CHANGES.txt (original)
+++ hadoop/pig/trunk/contrib/zebra/CHANGES.txt Mon Apr 12 21:06:19 2010
@@ -18,6 +18,8 @@ Trunk (unreleased changes)
 
   IMPROVEMENTS
 
+    PIG-1361 Zebra TableLoader.getSchema() should return the projectionSchema 
specified in the constructor of TableLoader instead of pruned proejction by pig 
(gauravj via daijy)
+
     PIG-1291 Support of virtual column "source_table" on unsorted table (yanz)
 
     PIG-1315 Implementing OrderedLoadFunc interface for Zebra TableLoader 
(xuefux via yanz)

Modified: 
hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/pig/TableLoader.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/pig/TableLoader.java?rev=933415&r1=933414&r2=933415&view=diff
==============================================================================
--- 
hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/pig/TableLoader.java
 (original)
+++ 
hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/pig/TableLoader.java
 Mon Apr 12 21:06:19 2010
@@ -158,6 +158,7 @@ public class TableLoader extends LoadFun
         schema = TableInputFormat.getSchema( job );
         sorted = true;
         
+        setSortOrder( job );
         setProjection( job );
 
         try {
@@ -169,36 +170,48 @@ public class TableLoader extends LoadFun
         }
     }
 
+    
     /**
-     * This method does more than set projection. For instance, it also try to 
grab sorting info if required.
+     * it processes sortedness of table .
+     * 
+     * @param job
+     * @throws IOException
+     */
+    private void setSortOrder(Job job) throws IOException {
+       Properties properties = UDFContext.getUDFContext().getUDFProperties( 
+              this.getClass(), new String[]{ udfContextSignature } );
+       boolean requireGlobalOrder = "true".equals(properties.getProperty( 
UDFCONTEXT_GLOBAL_SORTING));
+       if (requireGlobalOrder && !sorted)
+         throw new IOException("Global sorting can be only asked on table 
loaded as sorted");
+       if( sorted ) {
+           SplitMode splitMode = 
+             requireGlobalOrder ? SplitMode.GLOBALLY_SORTED : 
SplitMode.LOCALLY_SORTED;
+           TableInputFormat.setSplitMode(job, splitMode, null);
+           sortInfo = TableInputFormat.getSortInfo( job );
+       }       
+    }
+    
+    
+    /**
+     * This method sets projection.
      * 
      * @param job
      * @throws IOException
      */
     private void setProjection(Job job) throws IOException {
       Properties properties = UDFContext.getUDFContext().getUDFProperties( 
-          this.getClass(), new String[]{ udfContextSignature } );
-      boolean requireGlobalOrder = "true".equals(properties.getProperty( 
UDFCONTEXT_GLOBAL_SORTING));
-      if (requireGlobalOrder && !sorted)
-        throw new IOException("Global sorting can be only asked on table 
loaded as sorted");
-        if( sorted ) {
-            SplitMode splitMode = 
-              requireGlobalOrder ? SplitMode.GLOBALLY_SORTED : 
SplitMode.LOCALLY_SORTED;
-            TableInputFormat.setSplitMode(job, splitMode, null);
-            sortInfo = TableInputFormat.getSortInfo( job );
-        }
-        
-        try {
-            String prunedProjStr = properties.getProperty( 
UDFCONTEXT_PROJ_STRING );
-            
-            if( prunedProjStr != null ) {
-                TableInputFormat.setProjection( job, prunedProjStr );
-            } else if( projectionString != null ) {              
-                TableInputFormat.setProjection( job, projectionString );
-            }
-        } catch (ParseException ex) {
-            throw new IOException( "Schema parsing failed : " + 
ex.getMessage() );
-        }
+          this.getClass(), new String[]{ udfContextSignature } );   
+      try {
+          String prunedProjStr = properties.getProperty( 
UDFCONTEXT_PROJ_STRING );
+          
+          if( prunedProjStr != null ) {
+              TableInputFormat.setProjection( job, prunedProjStr );
+          } else if( projectionString != null ) {              
+              TableInputFormat.setProjection( job, projectionString );
+          }
+      } catch (ParseException ex) {
+          throw new IOException( "Schema parsing failed : " + ex.getMessage() 
);
+      }
     }
 
     private KeyGenerator makeKeyBuilder(byte[] elems) {
@@ -283,6 +296,7 @@ public class TableLoader extends LoadFun
 
          // The following obviously goes beyond of set location, but this is 
the only place that we
          // can do and it's suggested by Pig team.
+         setSortOrder( job );
          setProjection( job );
      }
 
@@ -318,10 +332,25 @@ public class TableLoader extends LoadFun
              }
          }
          
-         setProjection( job );
+         // This is needed as it does a check if a unsorted table is loaded as 
sorted
+         // It fails if unosrted table is loaded as sorted
+         setSortOrder( job );
+         
+         /*
+         As per pig team any changes to this job object will be thrown away.
+         getSchema is needed to return the projectionSchema for the projection
+         string specified in TableLoader constructor. So, projectionString is 
used
+         here. However, setLocation() calls setPojection() because that is 
called after
+         projectionPruning and needs to read projection string from UDFCONTEXT
+         That also sets/calls TableInputFormat.setProjection(job, 
$prunedProj). But the 
+         job object here in getSchema() is a different copy from setLocation() 
and hence 
+         the changes will not be overridden as per PIG TEAM.  
+        */
 
          projectionSchema = tableSchema;
          try {
+                if(projectionString != null)
+                        TableInputFormat.setProjection(job, projectionString); 
                         
              Projection projection = new 
org.apache.hadoop.zebra.types.Projection( tableSchema, 
                      TableInputFormat.getProjection( job ) );
              projectionSchema = projection.getProjectionSchema();

svn commit: r933415 - in /hadoop/pig/trunk/contrib/zebra: CHANGES.txt src/java/org/apache/hadoop/zebra/pig/TableLoader.java

Reply via email to