Author: cheolsoo
Date: Fri Jan 30 05:56:50 2015
New Revision: 1655940
URL: http://svn.apache.org/r1655940
Log:
PIG-4401: Add pattern matching to PluckTuple (cheolsoo)
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/docs/src/documentation/content/xdocs/func.xml
pig/trunk/src/org/apache/pig/builtin/PluckTuple.java
pig/trunk/test/org/apache/pig/builtin/TestPluckTuple.java
Modified: pig/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1655940&r1=1655939&r2=1655940&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Fri Jan 30 05:56:50 2015
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
IMPROVEMENTS
+PIG-4401: Add pattern matching to PluckTuple (cheolsoo)
+
PIG-2692: Make the Pig unit faciliities more generalizable and update javadocs
(razsapps via daijy)
PIG-4379: Make RoundRobinPartitioner public (daijy)
Modified: pig/trunk/src/docs/src/documentation/content/xdocs/func.xml
URL:
http://svn.apache.org/viewvc/pig/trunk/src/docs/src/documentation/content/xdocs/func.xml?rev=1655940&r1=1655939&r2=1655940&view=diff
==============================================================================
--- pig/trunk/src/docs/src/documentation/content/xdocs/func.xml (original)
+++ pig/trunk/src/docs/src/documentation/content/xdocs/func.xml Fri Jan 30
05:56:50 2015
@@ -915,7 +915,7 @@ DUMP X;
<section id="plucktuple">
<title>PluckTuple</title>
- <p>Allows the user to specify a string prefix, and then filter for the
columns in a relation that begin with that prefix.</p>
+ <p>Allows the user to specify a string prefix, and then filter for the
columns in a relation that begin with that prefix or match that regex
pattern.</p>
<section>
<title>Syntax</title>
@@ -937,7 +937,7 @@ DUMP X;
<p>expression1</p>
</td>
<td>
- <p>A prefix to pluck by</p>
+ <p>A prefix to pluck by or an regex pattern to pluck by</p>
</td>
</tr>
<tr>
Modified: pig/trunk/src/org/apache/pig/builtin/PluckTuple.java
URL:
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/PluckTuple.java?rev=1655940&r1=1655939&r2=1655940&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/PluckTuple.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/PluckTuple.java Fri Jan 30 05:56:50
2015
@@ -20,6 +20,7 @@ package org.apache.pig.builtin;
import java.io.IOException;
import java.util.List;
+import java.util.regex.Pattern;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataType;
@@ -35,6 +36,8 @@ import com.google.common.collect.Lists;
* filter for the columns in a relation that begin with that prefix.
*
* Example:
+ *
+ * 1) Prefix
* a = load 'a' as (x, y);
* b = load 'b' as (x, y);
* c = join a by x, b by x;
@@ -44,9 +47,21 @@ import com.google.common.collect.Lists;
* c: {a::x: bytearray,a::y: bytearray,b::x: bytearray,b::y: bytearray}
* describe d;
* d: {plucked::a::x: bytearray,plucked::a::y: bytearray}
+ *
+ * 2) Regex
+ * a = load 'a' as (x, y);
+ * b = load 'b' as (x, y);
+ * c = join a by x, b by x;
+ * DEFINE pluck PluckTuple('.*::y');
+ * d = foreach c generate FLATTEN(pluck(*));
+ * describe c;
+ * c: {a::x: bytearray,a::y: bytearray,b::x: bytearray,b::y: bytearray}
+ * describe d;
+ * d: {plucked::a::y: bytearray,plucked::a::y: bytearray}
*/
public class PluckTuple extends EvalFunc<Tuple> {
private static final TupleFactory mTupleFactory =
TupleFactory.getInstance();
+ private static Pattern pattern;
private boolean isInitialized = false;
private int[] indicesToInclude;
@@ -54,6 +69,7 @@ public class PluckTuple extends EvalFunc
public PluckTuple(String prefix) {
this.prefix = prefix;
+ pattern = Pattern.compile(prefix);
}
@Override
@@ -63,7 +79,7 @@ public class PluckTuple extends EvalFunc
Schema inputSchema = getInputSchema();
for (int i = 0; i < inputSchema.size(); i++) {
String alias = inputSchema.getField(i).alias;
- if (alias.startsWith(prefix)) {
+ if (alias.startsWith(prefix) ||
pattern.matcher(alias).matches()) {
indicesToInclude.add(i);
}
}
@@ -92,7 +108,7 @@ public class PluckTuple extends EvalFunc
} catch (FrontendException e) {
throw new RuntimeException(e); // Should never happen
}
- if (alias.startsWith(prefix)) {
+ if (alias.startsWith(prefix) ||
pattern.matcher(alias).matches()) {
indicesToInclude.add(i);
}
}
Modified: pig/trunk/test/org/apache/pig/builtin/TestPluckTuple.java
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/builtin/TestPluckTuple.java?rev=1655940&r1=1655939&r2=1655940&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/builtin/TestPluckTuple.java (original)
+++ pig/trunk/test/org/apache/pig/builtin/TestPluckTuple.java Fri Jan 30
05:56:50 2015
@@ -44,7 +44,7 @@ public class TestPluckTuple {
}
@Test
- public void testSchema() throws Exception {
+ public void testStartsWith() throws Exception {
String query = "a = load 'a' as (x:int,y:chararray,z:long);" +
"b = load 'b' as (x:int,y:chararray,z:long);" +
"c = join a by x, b by x;" +
@@ -55,6 +55,17 @@ public class TestPluckTuple {
}
@Test
+ public void testPatternMatches() throws Exception {
+ String query = "a1 = load 'a1' as (x:int,y:chararray,z:long);" +
+ "a2 = load 'a2' as (x:int,y:chararray,z:long);" +
+ "b = join a1 by x, a2 by x;" +
+ "define pluck PluckTuple('a[2|3]::.*');" +
+ "c = foreach b generate flatten(pluck(*));";
+ pigServer.registerQuery(query);
+ assertTrue(Schema.equals(pigServer.dumpSchema("a2"),
pigServer.dumpSchema("c"), false, true));
+ }
+
+ @Test
public void testOutput() throws Exception {
Data data = resetData(pigServer);