Author: cheolsoo
Date: Fri Jan 30 05:56:50 2015
New Revision: 1655940

URL: http://svn.apache.org/r1655940
Log:
PIG-4401: Add pattern matching to PluckTuple (cheolsoo)

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/docs/src/documentation/content/xdocs/func.xml
    pig/trunk/src/org/apache/pig/builtin/PluckTuple.java
    pig/trunk/test/org/apache/pig/builtin/TestPluckTuple.java

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1655940&r1=1655939&r2=1655940&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Fri Jan 30 05:56:50 2015
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
  
 IMPROVEMENTS
 
+PIG-4401: Add pattern matching to PluckTuple (cheolsoo)
+
 PIG-2692: Make the Pig unit faciliities more generalizable and update javadocs 
(razsapps via daijy)
 
 PIG-4379: Make RoundRobinPartitioner public (daijy)

Modified: pig/trunk/src/docs/src/documentation/content/xdocs/func.xml
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/docs/src/documentation/content/xdocs/func.xml?rev=1655940&r1=1655939&r2=1655940&view=diff
==============================================================================
--- pig/trunk/src/docs/src/documentation/content/xdocs/func.xml (original)
+++ pig/trunk/src/docs/src/documentation/content/xdocs/func.xml Fri Jan 30 
05:56:50 2015
@@ -915,7 +915,7 @@ DUMP X;
    
    <section id="plucktuple">
      <title>PluckTuple</title>
-     <p>Allows the user to specify a string prefix, and then filter for the 
columns in a relation that begin with that prefix.</p>
+     <p>Allows the user to specify a string prefix, and then filter for the 
columns in a relation that begin with that prefix or match that regex 
pattern.</p>
      
      <section>
        <title>Syntax</title>
@@ -937,7 +937,7 @@ DUMP X;
              <p>expression1</p>
            </td>
            <td>
-             <p>A prefix to pluck by</p>
+             <p>A prefix to pluck by or an regex pattern to pluck by</p>
            </td>
         </tr>
         <tr>

Modified: pig/trunk/src/org/apache/pig/builtin/PluckTuple.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/PluckTuple.java?rev=1655940&r1=1655939&r2=1655940&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/PluckTuple.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/PluckTuple.java Fri Jan 30 05:56:50 
2015
@@ -20,6 +20,7 @@ package org.apache.pig.builtin;
 
 import java.io.IOException;
 import java.util.List;
+import java.util.regex.Pattern;
 
 import org.apache.pig.EvalFunc;
 import org.apache.pig.data.DataType;
@@ -35,6 +36,8 @@ import com.google.common.collect.Lists;
  * filter for the columns in a relation that begin with that prefix.
  *
  * Example:
+ *
+ * 1) Prefix
  * a = load 'a' as (x, y);
  * b = load 'b' as (x, y);
  * c = join a by x, b by x;
@@ -44,9 +47,21 @@ import com.google.common.collect.Lists;
  * c: {a::x: bytearray,a::y: bytearray,b::x: bytearray,b::y: bytearray}
  * describe d;
  * d: {plucked::a::x: bytearray,plucked::a::y: bytearray}
+ *
+ * 2) Regex
+ * a = load 'a' as (x, y);
+ * b = load 'b' as (x, y);
+ * c = join a by x, b by x;
+ * DEFINE pluck PluckTuple('.*::y');
+ * d = foreach c generate FLATTEN(pluck(*));
+ * describe c;
+ * c: {a::x: bytearray,a::y: bytearray,b::x: bytearray,b::y: bytearray}
+ * describe d;
+ * d: {plucked::a::y: bytearray,plucked::a::y: bytearray}
  */
 public class PluckTuple extends EvalFunc<Tuple> {
     private static final TupleFactory mTupleFactory = 
TupleFactory.getInstance();
+    private static Pattern pattern;
 
     private boolean isInitialized = false;
     private int[] indicesToInclude;
@@ -54,6 +69,7 @@ public class PluckTuple extends EvalFunc
 
     public PluckTuple(String prefix) {
         this.prefix = prefix;
+        pattern = Pattern.compile(prefix);
     }
 
     @Override
@@ -63,7 +79,7 @@ public class PluckTuple extends EvalFunc
             Schema inputSchema = getInputSchema();
             for (int i = 0; i < inputSchema.size(); i++) {
                 String alias = inputSchema.getField(i).alias;
-                if (alias.startsWith(prefix)) {
+                if (alias.startsWith(prefix) || 
pattern.matcher(alias).matches()) {
                     indicesToInclude.add(i);
                 }
             }
@@ -92,7 +108,7 @@ public class PluckTuple extends EvalFunc
                 } catch (FrontendException e) {
                     throw new RuntimeException(e); // Should never happen
                 }
-                if (alias.startsWith(prefix)) {
+                if (alias.startsWith(prefix) || 
pattern.matcher(alias).matches()) {
                     indicesToInclude.add(i);
                 }
             }

Modified: pig/trunk/test/org/apache/pig/builtin/TestPluckTuple.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/builtin/TestPluckTuple.java?rev=1655940&r1=1655939&r2=1655940&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/builtin/TestPluckTuple.java (original)
+++ pig/trunk/test/org/apache/pig/builtin/TestPluckTuple.java Fri Jan 30 
05:56:50 2015
@@ -44,7 +44,7 @@ public class TestPluckTuple {
     }
 
     @Test
-    public void testSchema() throws Exception {
+    public void testStartsWith() throws Exception {
         String query = "a = load 'a' as (x:int,y:chararray,z:long);" +
                        "b = load 'b' as (x:int,y:chararray,z:long);" +
                        "c = join a by x, b by x;" +
@@ -55,6 +55,17 @@ public class TestPluckTuple {
     }
 
     @Test
+    public void testPatternMatches() throws Exception {
+        String query = "a1 = load 'a1' as (x:int,y:chararray,z:long);" +
+                "a2 = load 'a2' as (x:int,y:chararray,z:long);" +
+                "b = join a1 by x, a2 by x;" +
+                "define pluck PluckTuple('a[2|3]::.*');" +
+                "c = foreach b generate flatten(pluck(*));";
+        pigServer.registerQuery(query);
+        assertTrue(Schema.equals(pigServer.dumpSchema("a2"), 
pigServer.dumpSchema("c"), false, true));
+    }
+
+    @Test
     public void testOutput() throws Exception {
         Data data = resetData(pigServer);
 


Reply via email to