Author: knoguchi
Date: Thu May 25 20:15:13 2017
New Revision: 1796191
URL: http://svn.apache.org/viewvc?rev=1796191&view=rev
Log:
PIG-5231: PigStorage with -schema may produce inconsistent outputs with more
fields (knoguchi)
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/org/apache/pig/builtin/PigStorage.java
pig/trunk/test/org/apache/pig/test/TestPigStorage.java
Modified: pig/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1796191&r1=1796190&r2=1796191&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu May 25 20:15:13 2017
@@ -101,6 +101,8 @@ OPTIMIZATIONS
Â
BUG FIXES
+PIG-5231: PigStorage with -schema may produce inconsistent outputs with more
fields (knoguchi)
+
PIG-5224: Extra foreach from ColumnPrune preventing Accumulator usage
(knoguchi)
PIG-5235: Typecast with as-clause fails for tuple/bag with an empty schema
(knoguchi)
Modified: pig/trunk/src/org/apache/pig/builtin/PigStorage.java
URL:
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/PigStorage.java?rev=1796191&r1=1796190&r2=1796191&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/PigStorage.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/PigStorage.java Thu May 25 20:15:13
2017
@@ -334,6 +334,18 @@ LoadPushDown, LoadMetadata, StoreMetadat
tupleIdx++;
}
}
+ // If input record somehow has more fields than the provided schema
+ // drop the extra fields
+ if( tup.size() > fieldSchemas.length ) {
+ int lastindex = tup.size() - 1;
+ List<Object> list = tup.getAll();
+ for(int i = lastindex; i >= fieldSchemas.length ; i--) {
+ list.remove(i);
+ }
+ // Tuple.getAll() may not return reference to the interal List
+ // so creating a new Tuple.
+ tup = mTupleFactory.newTupleNoCopy(list);
+ }
}
return tup;
}
Modified: pig/trunk/test/org/apache/pig/test/TestPigStorage.java
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestPigStorage.java?rev=1796191&r1=1796190&r2=1796191&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestPigStorage.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestPigStorage.java Thu May 25 20:15:13
2017
@@ -789,4 +789,35 @@ public class TestPigStorage {
pig.store("a", datadir + "aout", "PigStorage(',')");
}
+ @Test
+ public void testPigStroageSchemaWithMultipleSchema() throws Exception {
+ pigContext.connect();
+ String query = "A = LOAD '" + datadir + "originput' using
PigStorage(',') as (f1:chararray, f2:int);"
+ + "B = FOREACH A generate f1, f2, 3 as (f3:int);";
+ pig.registerQuery(query);
+ pig.store("A", datadir + "aout", "PigStorage('\\t', '-schema')");
+ pig.store("B", datadir + "bout", "PigStorage('\\t', '-schema')");
+
+ // We want to test the case when aout/.pig_schema is chosen for loading
+ // aout AND bout.
+ // Picking of schema is not deterministic given it's picked from a SET.
+ // For this test, we simply delete the other schema.
+ new File(datadir + "bout/.pig_schema" ).delete();
+
+ // Loading from 2 directories, each containing 2 fields and 3 fields
+ // respectively.
+ pig.registerQuery("C = LOAD '" + datadir + "aout," + datadir + "bout '
using PigStorage('\\t', '-schema');");
+ Schema a_schema = pig.dumpSchema("A");
+ Schema c_schema = pig.dumpSchema("C");
+ Assert.assertEquals("PigStorage schema should pick up the .pig_schema
from A", a_schema, c_schema);
+ Iterator<Tuple> iter = pig.openIterator("C");
+ int counter = 0;
+ while (iter.hasNext()) {
+ Assert.assertEquals("All tuples should only contain 2 fields
defined in schema",
+ 2, iter.next().size());
+ counter++;
+ }
+ Assert.assertEquals(20, counter);
+ }
+
}