Author: hashutosh
Date: Sun Feb  7 18:45:36 2010
New Revision: 907463

URL: http://svn.apache.org/viewvc?rev=907463&view=rev
Log:
PIG-1046: join algorithm specification is within double quotes

Modified:
    hadoop/pig/trunk/CHANGES.txt
    hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
    hadoop/pig/trunk/test/org/apache/pig/test/TestCollectedGroup.java
    hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java

Modified: hadoop/pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=907463&r1=907462&r2=907463&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Sun Feb  7 18:45:36 2010
@@ -24,6 +24,8 @@
 
 IMPROVEMENTS
 
+PIG-1046: join algorithm specification is within double quotes (ashutoshc)
+
 PIG-1209: Port POJoinPackage to proactively spill (ashutoshc)
 
 PIG-1190: Handling of quoted strings in pig-latin/grunt commands (ashutoshc)

Modified: 
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt?rev=907463&r1=907462&r2=907463&view=diff
==============================================================================
--- 
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt 
(original)
+++ 
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt 
Sun Feb  7 18:45:36 2010
@@ -320,6 +320,31 @@
                return cogroup;
        }
        
+    private LogicalOperator parseUsingForGroupBy(String modifier, 
ArrayList<CogroupInput> gis, LogicalPlan lp) throws ParseException, 
PlanException{
+
+      if(modifier.equalsIgnoreCase("collected")){
+            if (gis.size() != 1) {
+                throw new ParseException("Collected group is only supported 
for single input");  
+                }
+            if (!isColumnProjectionsOrStar(gis.get(0))) {
+                throw new ParseException("Collected group is only supported 
for columns or star projection");
+                }
+            LogicalOperator cogroup = parseCogroup(gis, lp, 
LOCogroup.GROUPTYPE.COLLECTED);
+            cogroup.pinOption(LOCogroup.OPTION_GROUPTYPE);
+            return cogroup;
+        }
+
+        else if (modifier.equalsIgnoreCase("regular")){
+            LogicalOperator cogroup = parseCogroup(gis, lp, 
LOCogroup.GROUPTYPE.REGULAR);
+            cogroup.pinOption(LOCogroup.OPTION_GROUPTYPE);
+            return cogroup;
+        }
+
+        else{
+            throw new ParseException("Only COLLECTED or REGULAR are valid 
GROUP modifiers.");
+        }
+    }
+    
        /**
         * Join parser. 
         */
@@ -435,7 +460,42 @@
                return foreach;
        }
 
-       void assertAtomic(LogicalOperator spec, boolean desiredAtomic) throws 
ParseException{
+    private LogicalOperator parseUsingForJoin(String modifier, 
ArrayList<CogroupInput> gis,
+                LogicalPlan lp, boolean isFullOuter, boolean isRightOuter, 
boolean isOuter) throws
+                ParseException, PlanException{
+
+              if (modifier.equalsIgnoreCase("repl") || 
modifier.equalsIgnoreCase("replicated")) {
+              if(isFullOuter || isRightOuter) {
+                  throw new ParseException("Replicated join does not support 
(right|full) outer joins");
+              }
+                    LogicalOperator joinOp = parseJoin(gis, lp, 
LOJoin.JOINTYPE.REPLICATED); 
+                    joinOp.pinOption(LOJoin.OPTION_JOIN);
+                    return joinOp; 
+            }
+             else if (modifier.equalsIgnoreCase("hash") || 
modifier.equalsIgnoreCase("default")) {
+                    LogicalOperator joinOp = parseJoin(gis, lp, 
LOJoin.JOINTYPE.HASH);
+                    joinOp.pinOption(LOJoin.OPTION_JOIN);
+                    return joinOp;
+            }
+            else if (modifier.equalsIgnoreCase("skewed")) {
+                    LogicalOperator joinOp = parseJoin(gis, lp, 
LOJoin.JOINTYPE.SKEWED);
+                    joinOp.pinOption(LOJoin.OPTION_JOIN);
+                    return joinOp;
+            }
+             else if (modifier.equalsIgnoreCase("merge")) {
+                 if(isOuter) {
+                        throw new ParseException("Merge join does not support 
(left|right|full) outer joins");
+                    }
+                    LogicalOperator joinOp = parseJoin(gis, lp, 
LOJoin.JOINTYPE.MERGE);
+                    joinOp.pinOption(LOJoin.OPTION_JOIN);
+                    return joinOp; 
+            }
+            else{
+                    throw new ParseException("Only REPL, REPLICATED, HASH, 
SKEWED and MERGE are vaild JOIN modifiers.");
+            }
+    }
+
+    void assertAtomic(LogicalOperator spec, boolean desiredAtomic) throws 
ParseException{
                Boolean isAtomic = null;
                if ( spec instanceof LOConst || 
                        (spec instanceof LOUserFunc &&
@@ -1713,29 +1773,22 @@
     ArrayList<CogroupInput> gis = new ArrayList<CogroupInput>(); 
     LogicalOperator cogroup = null; 
     log.trace("Entering CoGroupClause");
+    Token t;
 }
 {
-
     (gi = GroupItem(lp) { gis.add(gi); }
         ("," gi = GroupItem(lp) { gis.add(gi); })*
-        (
-            [<USING> ("\"collected\"" { 
-                if (gis.size() != 1) {
-                    throw new ParseException("Collected group is only 
supported for single input");  
-                }
-                if (!isColumnProjectionsOrStar(gis.get(0))) {
-                    throw new ParseException("Collected group is only 
supported for columns or star projection");
-                }
-                cogroup = parseCogroup(gis, lp, LOCogroup.GROUPTYPE.COLLECTED);
-                cogroup.pinOption(LOCogroup.OPTION_GROUPTYPE);
-                }
-                |"\"regular\"" {
-                    cogroup = parseCogroup(gis, lp, 
LOCogroup.GROUPTYPE.REGULAR);
-                    cogroup.pinOption(LOCogroup.OPTION_GROUPTYPE);
-                }
-                )
-            ]                                                                  
      
-        )
+        ([ <USING> (
+          (t = < QUOTEDSTRING> { cogroup = parseUsingForGroupBy(unquote 
(t.image), gis, lp); })
+         |("\"collected\"") {
+            log.info("[WARN] Use of double-quoted string to specify hint is 
deprecated. Please specify hint in single quotes."); 
+            cogroup = parseUsingForGroupBy("collected", gis, lp);
+            }
+         |("\"regular\"") {
+            log.info("[WARN] Use of double-quoted string to specify hint is 
deprecated. Please specify hint in single quotes."); 
+            cogroup = parseUsingForGroupBy("regular", gis, lp);
+            }
+        )])
     )
 
     {
@@ -2033,6 +2086,7 @@
        boolean isRightOuter = false;
        boolean isFullOuter = false;
        boolean isOuter = false;
+       Token t;
 }
 {
        (gi = JoinItem(lp) { gis.add(gi); }
@@ -2081,43 +2135,25 @@
                
        }
        // For all types of join we create LOJoin and mark what type of join it 
is.
-       (
-               [<USING> ("\"replicated\"" { 
-                 if(isFullOuter || isRightOuter) {
-                     throw new ParseException("Replicated join does not 
support (right|full) outer joins");
-                 }
-                                   joinOp = parseJoin(gis, lp, 
LOJoin.JOINTYPE.REPLICATED); 
-                                   joinOp.pinOption(LOJoin.OPTION_JOIN); 
-                           }
-                       | "\"repl\"" {
-                  if(isFullOuter || isRightOuter) {
-                           throw new ParseException("Replicated join does not 
support (right|full) outer joins");
-                 }
-                                   joinOp = parseJoin(gis, lp, 
LOJoin.JOINTYPE.REPLICATED);
-                                   joinOp.pinOption(LOJoin.OPTION_JOIN);
-                  }
-                   |"\"skewed\"" {
-                           joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.SKEWED);
-                           joinOp.pinOption(LOJoin.OPTION_JOIN);
+       ([<USING> (
+                 (t = <QUOTEDSTRING> { joinOp = 
parseUsingForJoin(unquote(t.image), gis, lp, isFullOuter, isRightOuter, 
isOuter);})
+        | ("\"repl\"" | "\"replicated\"")  {
+                     log.info("[WARN] Use of double-quotes for specifying join 
algorithm is deprecated. Please use single quotes."); 
+              joinOp = parseUsingForJoin("replicated", gis, lp, isFullOuter, 
isRightOuter, isOuter);
+                 }
+           | ("\"skewed\"") {
+              log.info("[WARN] Use of double-quotes for specifying join 
algorithm is deprecated. Please use single quotes."); 
+              joinOp = parseUsingForJoin("skewed", gis, lp, isFullOuter, 
isRightOuter, isOuter);
                        }
-                   |"\"merge\"" { 
-                           if(isOuter) {
-                        throw new ParseException("Merge join does not support 
(left|right|full) outer joins");
-                    }
-                           joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.MERGE);
-                           joinOp.pinOption(LOJoin.OPTION_JOIN); 
-                       }
-                   |"\"hash\"" {
-                               joinOp = parseJoin(gis, lp, 
LOJoin.JOINTYPE.HASH);
-                               joinOp.pinOption(LOJoin.OPTION_JOIN);
+               | ("\"merge\"") { 
+            log.info("[WARN] Use of double-quotes for specifying join 
algorithm is deprecated. Please use single quotes."); 
+            joinOp = parseUsingForJoin("merge", gis, lp, isFullOuter, 
isRightOuter, isOuter);
+               }
+           | ("\"hash\"" | "\"default\"") {
+                   log.info("[WARN] Use of double-quotes for specifying join 
algorithm is deprecated. Please use single quotes."); 
+            joinOp = parseUsingForJoin("hash", gis, lp, isFullOuter, 
isRightOuter, isOuter);
                        }
-                   |"\"default\"" {
-                               joinOp = parseJoin(gis, lp, 
LOJoin.JOINTYPE.HASH);
-                               joinOp.pinOption(LOJoin.OPTION_JOIN);
-                       })
-           ] 
-    )
-    )
+     )]))
 
        {log.trace("Exiting JoinClause");
        if (joinOp!=null) {

Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestCollectedGroup.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestCollectedGroup.java?rev=907463&r1=907462&r2=907463&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestCollectedGroup.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestCollectedGroup.java Sun Feb  
7 18:45:36 2010
@@ -32,9 +32,12 @@
 import org.apache.pig.data.BagFactory;
 import org.apache.pig.data.DataBag;
 import org.apache.pig.data.Tuple;
+import org.apache.pig.test.utils.LogicalPlanTester;
 import org.apache.pig.test.utils.TestHelper;
 import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POCollectedGroup;
 import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
+import org.apache.pig.impl.logicalLayer.LOCogroup;
+import org.apache.pig.impl.logicalLayer.LogicalPlan;
 import org.apache.pig.impl.plan.OperatorKey;
 import org.junit.After;
 import org.junit.Before;
@@ -76,6 +79,22 @@
         Util.deleteFile(cluster, INPUT_FILE);
     }
     
+    public void testCollectedGrpSpecifiedInSingleQuotes1(){
+        
+        LogicalPlanTester lpt = new LogicalPlanTester();
+        lpt.buildPlan("A = LOAD '" + INPUT_FILE + "' as (id, name, grade);");
+        LogicalPlan lp = lpt.buildPlan("B = group A by id using 'collected';");
+        assertEquals(LOCogroup.GROUPTYPE.COLLECTED, 
((LOCogroup)lp.getLeaves().get(0)).getGroupType());
+    }
+    
+    public void testCollectedGrpSpecifiedInSingleQuotes2(){
+        
+        LogicalPlanTester lpt = new LogicalPlanTester();
+        lpt.buildPlan("A = LOAD '" + INPUT_FILE + "' as (id, name, grade);");
+        LogicalPlan lp = lpt.buildPlan("B = group A all using 'regular';");
+        assertEquals(LOCogroup.GROUPTYPE.REGULAR, 
((LOCogroup)lp.getLeaves().get(0)).getGroupType());
+    }
+    
     public void testPOMapsideGroupNoNullPlans() throws IOException {
         POCollectedGroup pmg = new POCollectedGroup(new OperatorKey());
         List<PhysicalPlan> plans = pmg.getPlans();

Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java?rev=907463&r1=907462&r2=907463&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java Sun Feb  7 18:45:36 
2010
@@ -32,10 +32,12 @@
 import org.apache.pig.data.Tuple;
 import org.apache.pig.data.TupleFactory;
 import org.apache.pig.impl.io.FileLocalizer;
+import org.apache.pig.impl.logicalLayer.LOJoin;
+import org.apache.pig.impl.logicalLayer.LogicalPlan;
+import org.apache.pig.impl.logicalLayer.LOJoin.JOINTYPE;
 import org.apache.pig.impl.logicalLayer.parser.ParseException;
 import org.apache.pig.impl.logicalLayer.schema.Schema;
 import org.apache.pig.impl.util.LogUtils;
-import org.apache.pig.test.utils.Identity;
 import org.apache.pig.test.utils.LogicalPlanTester;
 import org.junit.Before;
 import org.junit.Test;
@@ -95,7 +97,6 @@
         }
     }
 
-    
     @Test
     public void testJoinUnkownSchema() throws Exception {
         // If any of the input schema is unknown, the resulting schema should 
be unknown as well
@@ -109,7 +110,7 @@
             assertTrue(schema == null);
         }
     }
-    
+
     @Test
     public void testDefaultJoin() throws IOException, ParseException {
         for (ExecType execType : execTypes) {
@@ -553,5 +554,54 @@
             deleteInputFile(execType, secondInput);
         }
     }
-
+    
+    @Test
+    public void testLiteralsForJoinAlgoSpecification1() {
+        
+        LogicalPlanTester lpt = new LogicalPlanTester();
+        lpt.buildPlan("a = load 'A'; ");
+        lpt.buildPlan("b = load 'B'; ");
+        LogicalPlan lp = lpt.buildPlan("c = Join a by $0, b by $0 using 
'merge'; ");
+        assertEquals(JOINTYPE.MERGE, 
((LOJoin)lp.getLeaves().get(0)).getJoinType());
+    }
+    
+    @Test
+    public void testLiteralsForJoinAlgoSpecification2() {
+        
+        LogicalPlanTester lpt = new LogicalPlanTester();
+        lpt.buildPlan("a = load 'A'; ");
+        lpt.buildPlan("b = load 'B'; ");
+        LogicalPlan lp = lpt.buildPlan("c = Join a by $0, b by $0 using 
'hash'; ");
+        assertEquals(JOINTYPE.HASH, 
((LOJoin)lp.getLeaves().get(0)).getJoinType());
+    }
+    
+    @Test
+    public void testLiteralsForJoinAlgoSpecification5() {
+        
+        LogicalPlanTester lpt = new LogicalPlanTester();
+        lpt.buildPlan("a = load 'A'; ");
+        lpt.buildPlan("b = load 'B'; ");
+        LogicalPlan lp = lpt.buildPlan("c = Join a by $0, b by $0 using 
'default'; ");
+        assertEquals(JOINTYPE.HASH, 
((LOJoin)lp.getLeaves().get(0)).getJoinType());
+    }
+    
+    @Test
+    public void testLiteralsForJoinAlgoSpecification3() {
+        
+        LogicalPlanTester lpt = new LogicalPlanTester();
+        lpt.buildPlan("a = load 'A'; ");
+        lpt.buildPlan("b = load 'B'; ");
+        LogicalPlan lp = lpt.buildPlan("c = Join a by $0, b by $0 using 
'repl'; ");
+        assertEquals(JOINTYPE.REPLICATED, 
((LOJoin)lp.getLeaves().get(0)).getJoinType());
+    }
+    
+    @Test
+    public void testLiteralsForJoinAlgoSpecification4() {
+        
+        LogicalPlanTester lpt = new LogicalPlanTester();
+        lpt.buildPlan("a = load 'A'; ");
+        lpt.buildPlan("b = load 'B'; ");
+        LogicalPlan lp = lpt.buildPlan("c = Join a by $0, b by $0 using 
'replicated'; ");
+        assertEquals(JOINTYPE.REPLICATED, 
((LOJoin)lp.getLeaves().get(0)).getJoinType());
+    }
 }


Reply via email to