Author: cheolsoo
Date: Mon Apr 27 15:42:21 2015
New Revision: 1676312

URL: http://svn.apache.org/r1676312
Log:
PIG-4511: Add columns to prune from PluckTuple (jbabcock via cheolsoo)

Modified:
    pig/branches/branch-0.15/CHANGES.txt
    pig/branches/branch-0.15/src/docs/src/documentation/content/xdocs/func.xml
    pig/branches/branch-0.15/src/org/apache/pig/builtin/PluckTuple.java
    pig/branches/branch-0.15/test/org/apache/pig/builtin/TestPluckTuple.java

Modified: pig/branches/branch-0.15/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/branches/branch-0.15/CHANGES.txt?rev=1676312&r1=1676311&r2=1676312&view=diff
==============================================================================
--- pig/branches/branch-0.15/CHANGES.txt (original)
+++ pig/branches/branch-0.15/CHANGES.txt Mon Apr 27 15:42:21 2015
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
  
 IMPROVEMENTS
 
+PIG-4511: Add columns to prune from PluckTuple (jbabcock via cheolsoo)
+
 PIG-4434: Improve auto-parallelism for tez (daijy)
 
 PIG-4495: Better multi-query planning in case of multiple edges (rohini)

Modified: 
pig/branches/branch-0.15/src/docs/src/documentation/content/xdocs/func.xml
URL: 
http://svn.apache.org/viewvc/pig/branches/branch-0.15/src/docs/src/documentation/content/xdocs/func.xml?rev=1676312&r1=1676311&r2=1676312&view=diff
==============================================================================
--- pig/branches/branch-0.15/src/docs/src/documentation/content/xdocs/func.xml 
(original)
+++ pig/branches/branch-0.15/src/docs/src/documentation/content/xdocs/func.xml 
Mon Apr 27 15:42:21 2015
@@ -915,7 +915,8 @@ DUMP X;
    
    <section id="plucktuple">
      <title>PluckTuple</title>
-     <p>Allows the user to specify a string prefix, and then filter for the 
columns in a relation that begin with that prefix or match that regex 
pattern.</p>
+     <p>Allows the user to specify a string prefix, and then filter for the 
columns in a relation that begin with that prefix or match that regex pattern. 
Optionally, include flag 'false' to filter
+      for columns that do not match that prefix or match that regex pattern</p>
      
      <section>
        <title>Syntax</title>
@@ -923,6 +924,7 @@ DUMP X;
          <tr>
            <td>
              <p>DEFINE pluck PluckTuple(expression1)</p>
+             <p>DEFINE pluck PluckTuple(expression1,expression3)</p>
              <p>pluck(expression2)</p>
            </td>
          </tr>
@@ -948,6 +950,14 @@ DUMP X;
              <p>The fields to apply the pluck to, usually '*'</p>
            </td>
         </tr>
+    <tr>
+           <td>
+             <p>expression3</p>
+           </td>
+           <td>
+             <p>A boolean flag to indicate whether to include or exclude 
matching columns</p>
+           </td>
+   </tr>
        </table>
      </section>
      
@@ -964,6 +974,10 @@ describe c;
 c: {a::x: bytearray,a::y: bytearray,b::x: bytearray,b::y: bytearray}
 describe d;
 d: {plucked::a::x: bytearray,plucked::a::y: bytearray}
+DEFINE pluckNegative PluckTuple('a::','false');
+d = foreach c generate FLATTEN(pluckNegative(*));
+describe d;
+d: {plucked::b::x: bytearray,plucked::b::y: bytearray}
 </source>
      </section>
    </section>

Modified: pig/branches/branch-0.15/src/org/apache/pig/builtin/PluckTuple.java
URL: 
http://svn.apache.org/viewvc/pig/branches/branch-0.15/src/org/apache/pig/builtin/PluckTuple.java?rev=1676312&r1=1676311&r2=1676312&view=diff
==============================================================================
--- pig/branches/branch-0.15/src/org/apache/pig/builtin/PluckTuple.java 
(original)
+++ pig/branches/branch-0.15/src/org/apache/pig/builtin/PluckTuple.java Mon Apr 
27 15:42:21 2015
@@ -66,9 +66,15 @@ public class PluckTuple extends EvalFunc
     private boolean isInitialized = false;
     private int[] indicesToInclude;
     private String prefix;
+    private boolean match;
 
     public PluckTuple(String prefix) {
+        this(prefix,"true");
+    }
+
+    public PluckTuple(String prefix, String match) {
         this.prefix = prefix;
+        this.match = Boolean.valueOf(match);
         pattern = Pattern.compile(prefix);
     }
 
@@ -79,7 +85,10 @@ public class PluckTuple extends EvalFunc
             Schema inputSchema = getInputSchema();
             for (int i = 0; i < inputSchema.size(); i++) {
                 String alias = inputSchema.getField(i).alias;
-                if (alias.startsWith(prefix) || 
pattern.matcher(alias).matches()) {
+                if ((alias.startsWith(prefix) || 
pattern.matcher(alias).matches()) && this.match) {
+                    indicesToInclude.add(i);
+                }
+                else if (!alias.startsWith(prefix) && 
!pattern.matcher(alias).matches() && !this.match){
                     indicesToInclude.add(i);
                 }
             }
@@ -108,7 +117,10 @@ public class PluckTuple extends EvalFunc
                 } catch (FrontendException e) {
                     throw new RuntimeException(e); // Should never happen
                 }
-                if (alias.startsWith(prefix) || 
pattern.matcher(alias).matches()) {
+                if ((alias.startsWith(prefix) || 
pattern.matcher(alias).matches()) && this.match) {
+                    indicesToInclude.add(i);
+                }
+                else if (!alias.startsWith(prefix) && 
!pattern.matcher(alias).matches() && !this.match){
                     indicesToInclude.add(i);
                 }
             }

Modified: 
pig/branches/branch-0.15/test/org/apache/pig/builtin/TestPluckTuple.java
URL: 
http://svn.apache.org/viewvc/pig/branches/branch-0.15/test/org/apache/pig/builtin/TestPluckTuple.java?rev=1676312&r1=1676311&r2=1676312&view=diff
==============================================================================
--- pig/branches/branch-0.15/test/org/apache/pig/builtin/TestPluckTuple.java 
(original)
+++ pig/branches/branch-0.15/test/org/apache/pig/builtin/TestPluckTuple.java 
Mon Apr 27 15:42:21 2015
@@ -55,6 +55,17 @@ public class TestPluckTuple {
     }
 
     @Test
+    public void testNegativeStartsWith() throws Exception {
+        String query = "a = load 'a' as (x:int,y:chararray,z:long);" +
+                       "b = load 'b' as (x:int,y:chararray,z:long);" +
+                       "c = join a by x, b by x;" +
+                       "define pluck PluckTuple('a::','false');" +
+                       "d = foreach c generate flatten(pluck(*));";
+        pigServer.registerQuery(query);
+        assertTrue(Schema.equals(pigServer.dumpSchema("b"), 
pigServer.dumpSchema("d"), false, true));
+    }
+
+    @Test
     public void testPatternMatches() throws Exception {
         String query = "a1 = load 'a1' as (x:int,y:chararray,z:long);" +
                 "a2 = load 'a2' as (x:int,y:chararray,z:long);" +
@@ -66,6 +77,17 @@ public class TestPluckTuple {
     }
 
     @Test
+    public void testNegativePatternMatches() throws Exception {
+        String query = "a1 = load 'a1' as (x:int,y:chararray,z:long);" +
+                "a2 = load 'a2' as (x:int,y:chararray,z:long);" +
+                "b = join a1 by x, a2 by x;" +
+                "define pluck PluckTuple('a[2|3]::.*','false');" +
+                "c = foreach b generate flatten(pluck(*));";
+        pigServer.registerQuery(query);
+        assertTrue(Schema.equals(pigServer.dumpSchema("a1"), 
pigServer.dumpSchema("c"), false, true));
+    }
+
+    @Test
     public void testOutput() throws Exception {
         Data data = resetData(pigServer);
 
@@ -98,4 +120,39 @@ public class TestPluckTuple {
         assertEquals(exp2, it.next());
         assertFalse(it.hasNext());
     }
+
+    @Test
+    public void testNegativeOutput() throws Exception {
+        Data data = resetData(pigServer);
+
+        Tuple exp1 = tuple(1, "sasf", 5L);
+        Tuple exp2 = tuple(2, "woah", 6L);
+
+        data.set("a",
+            Utils.getSchemaFromString("x:int,y:chararray,z:long"),
+            tuple(1, "hey", 2L),
+            tuple(2, "woah", 3L),
+            tuple(3, "c", 4L)
+            );
+        data.set("b",
+            Utils.getSchemaFromString("x:int,y:chararray,z:long"),
+            exp1,
+            exp2,
+            tuple(4, "c", 7L)
+            );
+
+        String query = "a = load 'a' using mock.Storage();" +
+            "b = load 'b' using mock.Storage();" +
+            "c = join a by x, b by x;" +
+            "define pluck PluckTuple('a::','false');" +
+            "d = foreach c generate flatten(pluck(*));";
+        pigServer.registerQuery(query);
+        Iterator<Tuple> it = pigServer.openIterator("d");
+        assertTrue(it.hasNext());
+        assertEquals(exp1, it.next());
+        assertTrue(it.hasNext());
+        assertEquals(exp2, it.next());
+        assertFalse(it.hasNext());
+    }
+
 }
\ No newline at end of file


Reply via email to