Author: cheolsoo
Date: Mon Apr 27 15:38:35 2015
New Revision: 1676308
URL: http://svn.apache.org/r1676308
Log:
PIG-4511: Add columns to prune from PluckTuple (jbabcock via cheolsoo)
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/docs/src/documentation/content/xdocs/func.xml
pig/trunk/src/org/apache/pig/builtin/PluckTuple.java
pig/trunk/test/org/apache/pig/builtin/TestPluckTuple.java
Modified: pig/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1676308&r1=1676307&r2=1676308&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Mon Apr 27 15:38:35 2015
@@ -32,6 +32,8 @@ INCOMPATIBLE CHANGES
IMPROVEMENTS
+PIG-4511: Add columns to prune from PluckTuple (jbabcock)
+
PIG-4434: Improve auto-parallelism for tez (daijy)
PIG-4495: Better multi-query planning in case of multiple edges (rohini)
Modified: pig/trunk/src/docs/src/documentation/content/xdocs/func.xml
URL:
http://svn.apache.org/viewvc/pig/trunk/src/docs/src/documentation/content/xdocs/func.xml?rev=1676308&r1=1676307&r2=1676308&view=diff
==============================================================================
--- pig/trunk/src/docs/src/documentation/content/xdocs/func.xml (original)
+++ pig/trunk/src/docs/src/documentation/content/xdocs/func.xml Mon Apr 27
15:38:35 2015
@@ -915,7 +915,8 @@ DUMP X;
<section id="plucktuple">
<title>PluckTuple</title>
- <p>Allows the user to specify a string prefix, and then filter for the
columns in a relation that begin with that prefix or match that regex
pattern.</p>
+ <p>Allows the user to specify a string prefix, and then filter for the
columns in a relation that begin with that prefix or match that regex pattern.
Optionally, include flag 'false' to filter
+ for columns that do not match that prefix or match that regex pattern</p>
<section>
<title>Syntax</title>
@@ -923,6 +924,7 @@ DUMP X;
<tr>
<td>
<p>DEFINE pluck PluckTuple(expression1)</p>
+ <p>DEFINE pluck PluckTuple(expression1,expression3)</p>
<p>pluck(expression2)</p>
</td>
</tr>
@@ -948,6 +950,14 @@ DUMP X;
<p>The fields to apply the pluck to, usually '*'</p>
</td>
</tr>
+ <tr>
+ <td>
+ <p>expression3</p>
+ </td>
+ <td>
+ <p>A boolean flag to indicate whether to include or exclude
matching columns</p>
+ </td>
+ </tr>
</table>
</section>
@@ -964,6 +974,10 @@ describe c;
c: {a::x: bytearray,a::y: bytearray,b::x: bytearray,b::y: bytearray}
describe d;
d: {plucked::a::x: bytearray,plucked::a::y: bytearray}
+DEFINE pluckNegative PluckTuple('a::','false');
+d = foreach c generate FLATTEN(pluckNegative(*));
+describe d;
+d: {plucked::b::x: bytearray,plucked::b::y: bytearray}
</source>
</section>
</section>
Modified: pig/trunk/src/org/apache/pig/builtin/PluckTuple.java
URL:
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/PluckTuple.java?rev=1676308&r1=1676307&r2=1676308&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/PluckTuple.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/PluckTuple.java Mon Apr 27 15:38:35
2015
@@ -66,9 +66,15 @@ public class PluckTuple extends EvalFunc
private boolean isInitialized = false;
private int[] indicesToInclude;
private String prefix;
+ private boolean match;
public PluckTuple(String prefix) {
+ this(prefix,"true");
+ }
+
+ public PluckTuple(String prefix, String match) {
this.prefix = prefix;
+ this.match = Boolean.valueOf(match);
pattern = Pattern.compile(prefix);
}
@@ -79,7 +85,10 @@ public class PluckTuple extends EvalFunc
Schema inputSchema = getInputSchema();
for (int i = 0; i < inputSchema.size(); i++) {
String alias = inputSchema.getField(i).alias;
- if (alias.startsWith(prefix) ||
pattern.matcher(alias).matches()) {
+ if ((alias.startsWith(prefix) ||
pattern.matcher(alias).matches()) && this.match) {
+ indicesToInclude.add(i);
+ }
+ else if (!alias.startsWith(prefix) &&
!pattern.matcher(alias).matches() && !this.match){
indicesToInclude.add(i);
}
}
@@ -108,7 +117,10 @@ public class PluckTuple extends EvalFunc
} catch (FrontendException e) {
throw new RuntimeException(e); // Should never happen
}
- if (alias.startsWith(prefix) ||
pattern.matcher(alias).matches()) {
+ if ((alias.startsWith(prefix) ||
pattern.matcher(alias).matches()) && this.match) {
+ indicesToInclude.add(i);
+ }
+ else if (!alias.startsWith(prefix) &&
!pattern.matcher(alias).matches() && !this.match){
indicesToInclude.add(i);
}
}
Modified: pig/trunk/test/org/apache/pig/builtin/TestPluckTuple.java
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/builtin/TestPluckTuple.java?rev=1676308&r1=1676307&r2=1676308&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/builtin/TestPluckTuple.java (original)
+++ pig/trunk/test/org/apache/pig/builtin/TestPluckTuple.java Mon Apr 27
15:38:35 2015
@@ -55,6 +55,17 @@ public class TestPluckTuple {
}
@Test
+ public void testNegativeStartsWith() throws Exception {
+ String query = "a = load 'a' as (x:int,y:chararray,z:long);" +
+ "b = load 'b' as (x:int,y:chararray,z:long);" +
+ "c = join a by x, b by x;" +
+ "define pluck PluckTuple('a::','false');" +
+ "d = foreach c generate flatten(pluck(*));";
+ pigServer.registerQuery(query);
+ assertTrue(Schema.equals(pigServer.dumpSchema("b"),
pigServer.dumpSchema("d"), false, true));
+ }
+
+ @Test
public void testPatternMatches() throws Exception {
String query = "a1 = load 'a1' as (x:int,y:chararray,z:long);" +
"a2 = load 'a2' as (x:int,y:chararray,z:long);" +
@@ -66,6 +77,17 @@ public class TestPluckTuple {
}
@Test
+ public void testNegativePatternMatches() throws Exception {
+ String query = "a1 = load 'a1' as (x:int,y:chararray,z:long);" +
+ "a2 = load 'a2' as (x:int,y:chararray,z:long);" +
+ "b = join a1 by x, a2 by x;" +
+ "define pluck PluckTuple('a[2|3]::.*','false');" +
+ "c = foreach b generate flatten(pluck(*));";
+ pigServer.registerQuery(query);
+ assertTrue(Schema.equals(pigServer.dumpSchema("a1"),
pigServer.dumpSchema("c"), false, true));
+ }
+
+ @Test
public void testOutput() throws Exception {
Data data = resetData(pigServer);
@@ -98,4 +120,39 @@ public class TestPluckTuple {
assertEquals(exp2, it.next());
assertFalse(it.hasNext());
}
+
+ @Test
+ public void testNegativeOutput() throws Exception {
+ Data data = resetData(pigServer);
+
+ Tuple exp1 = tuple(1, "sasf", 5L);
+ Tuple exp2 = tuple(2, "woah", 6L);
+
+ data.set("a",
+ Utils.getSchemaFromString("x:int,y:chararray,z:long"),
+ tuple(1, "hey", 2L),
+ tuple(2, "woah", 3L),
+ tuple(3, "c", 4L)
+ );
+ data.set("b",
+ Utils.getSchemaFromString("x:int,y:chararray,z:long"),
+ exp1,
+ exp2,
+ tuple(4, "c", 7L)
+ );
+
+ String query = "a = load 'a' using mock.Storage();" +
+ "b = load 'b' using mock.Storage();" +
+ "c = join a by x, b by x;" +
+ "define pluck PluckTuple('a::','false');" +
+ "d = foreach c generate flatten(pluck(*));";
+ pigServer.registerQuery(query);
+ Iterator<Tuple> it = pigServer.openIterator("d");
+ assertTrue(it.hasNext());
+ assertEquals(exp1, it.next());
+ assertTrue(it.hasNext());
+ assertEquals(exp2, it.next());
+ assertFalse(it.hasNext());
+ }
+
}
\ No newline at end of file