svn commit: r1804929 - in /pig/trunk: ./ src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/ src/org/apache/pig/backend/hadoop/executionengine/spark/converter/ src/org/apache/pig/data/

2017-08-13 Thread szita
Author: szita
Date: Sun Aug 13 16:09:32 2017
New Revision: 1804929

URL: http://svn.apache.org/viewvc?rev=1804929=rev
Log:
PIG-5277: Spark mode is writing nulls among tuples to the output (workaround) 
(szita)

Added:
pig/trunk/src/org/apache/pig/data/NonWritableTuple.java
Modified:
pig/trunk/CHANGES.txt

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigOutputFormat.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/JoinGroupSparkConverter.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/OutputConsumerIterator.java

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1804929=1804928=1804929=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Sun Aug 13 16:09:32 2017
@@ -44,6 +44,8 @@ OPTIMIZATIONS
  
 BUG FIXES
 
+PIG-5277: Spark mode is writing nulls among tuples to the output (workaround) 
(szita)
+
 PIG-5283: Configuration is not passed to SparkPigSplits on the backend (szita)
 
 PIG-5284: Fix flakyness introduced by PIG-3655 (szita)

Modified: 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigOutputFormat.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigOutputFormat.java?rev=1804929=1804928=1804929=diff
==
--- 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigOutputFormat.java
 (original)
+++ 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigOutputFormat.java
 Sun Aug 13 16:09:32 2017
@@ -37,6 +37,7 @@ import org.apache.pig.backend.hadoop.exe
 import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.StoreFuncDecorator;
 import org.apache.pig.backend.hadoop.executionengine.shims.HadoopShims;
 import org.apache.pig.backend.hadoop.executionengine.util.MapRedUtil;
+import org.apache.pig.data.NonWritableTuple;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.impl.PigContext;
 import org.apache.pig.impl.util.ObjectSerializer;
@@ -141,7 +142,9 @@ public class PigOutputFormat extends Out
 public void write(WritableComparable key, Tuple value)
 throws IOException, InterruptedException {
 if(mode == Mode.SINGLE_STORE) {
-storeDecorator.putNext(value);
+if (!(value instanceof NonWritableTuple)) {
+storeDecorator.putNext(value);
+}
 } else {
 throw new IOException("Internal Error: Unexpected code path");
 }

Modified: 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/JoinGroupSparkConverter.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/JoinGroupSparkConverter.java?rev=1804929=1804928=1804929=diff
==
--- 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/JoinGroupSparkConverter.java
 (original)
+++ 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/JoinGroupSparkConverter.java
 Sun Aug 13 16:09:32 2017
@@ -34,6 +34,7 @@ import org.apache.pig.backend.hadoop.exe
 import org.apache.pig.backend.hadoop.executionengine.spark.SparkUtil;
 import 
org.apache.pig.backend.hadoop.executionengine.spark.operator.POGlobalRearrangeSpark;
 import 
org.apache.pig.backend.hadoop.executionengine.spark.operator.POJoinGroupSpark;
+import org.apache.pig.data.NonWritableTuple;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.impl.io.NullableTuple;
 import org.apache.pig.impl.io.PigNullableWritable;
@@ -209,7 +210,7 @@ public class JoinGroupSparkConverter imp
 out = (Tuple) result.result;
 break;
 case POStatus.STATUS_NULL:
-out = null;
+out = NonWritableTuple.INSTANCE;
 break;
 default:
 throw new RuntimeException(

Modified: 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/OutputConsumerIterator.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/OutputConsumerIterator.java?rev=1804929=1804928=1804929=diff
==
--- 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/OutputConsumerIterator.java
 (original)
+++ 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/OutputConsumerIterator.java
 Sun Aug 13 16:09:32 2017
@@ -20,6 +20,7 

svn commit: r1801128 - in /pig/trunk: CHANGES.txt src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceLauncher.java src/org/apache/pig/tools/pigstats/mapreduce/MRJobStats.java

2017-07-07 Thread szita
Author: szita
Date: Fri Jul  7 08:38:49 2017
New Revision: 1801128

URL: http://svn.apache.org/viewvc?rev=1801128=rev
Log:
PIG-5269: MapReduceLauncher and MRJobStats imports 
org.python.google.common.collect.Lists instead of 
org.google.common.collect.Lists (nkollar via szita)

Modified:
pig/trunk/CHANGES.txt

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceLauncher.java
pig/trunk/src/org/apache/pig/tools/pigstats/mapreduce/MRJobStats.java

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1801128=1801127=1801128=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Fri Jul  7 08:38:49 2017
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
  
 IMPROVEMENTS
 
+PIG-5269: MapReduceLauncher and MRJobStats imports 
org.python.google.common.collect.Lists instead of 
org.google.common.collect.Lists (nkollar via szita)
+
 PIG-4700: Enable progress reporting for Tasks in Tez (satishsaley via rohini)
 
 PIG-5251: Bump joda-time to 2.9.9 (dbist13 via rohini)

Modified: 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceLauncher.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceLauncher.java?rev=1801128=1801127=1801128=diff
==
--- 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceLauncher.java
 (original)
+++ 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceLauncher.java
 Fri Jul  7 08:38:49 2017
@@ -87,9 +87,6 @@ import org.apache.pig.tools.pigstats.map
 import org.apache.pig.tools.pigstats.mapreduce.MRPigStatsUtil;
 import org.apache.pig.tools.pigstats.mapreduce.MRScriptState;
 
-import org.python.google.common.collect.Lists;
-
-
 /**
  * Main class that launches pig for Map Reduce
  *

Modified: pig/trunk/src/org/apache/pig/tools/pigstats/mapreduce/MRJobStats.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/tools/pigstats/mapreduce/MRJobStats.java?rev=1801128=1801127=1801128=diff
==
--- pig/trunk/src/org/apache/pig/tools/pigstats/mapreduce/MRJobStats.java 
(original)
+++ pig/trunk/src/org/apache/pig/tools/pigstats/mapreduce/MRJobStats.java Fri 
Jul  7 08:38:49 2017
@@ -26,6 +26,7 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 
+import com.google.common.collect.Lists;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -56,9 +57,6 @@ import org.apache.pig.tools.pigstats.Out
 import org.apache.pig.tools.pigstats.PigStats.JobGraph;
 import org.apache.pig.tools.pigstats.PigStats.JobGraphPrinter;
 
-import org.python.google.common.collect.Lists;
-
-
 /**
  * This class encapsulates the runtime statistics of a MapReduce job.
  * Job statistics is collected when job is completed.




svn commit: r1801161 - in /pig/trunk: ./ test/org/apache/pig/spark/ test/org/apache/pig/test/utils/dotGraph/

2017-07-07 Thread szita
Author: szita
Date: Fri Jul  7 13:38:22 2017
New Revision: 1801161

URL: http://svn.apache.org/viewvc?rev=1801161=rev
Log:
PIG-5237: Fix DOT file parsing to enable DOT-based physical plan testing 
(YaShock via szita)

Modified:
pig/trunk/CHANGES.txt
pig/trunk/test/org/apache/pig/spark/TestSparkCompiler.java
pig/trunk/test/org/apache/pig/test/utils/dotGraph/DOTParser.jjt
pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotEdge.java
pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotGraph.java
pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotGraphReader.java
pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotNode.java

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1801161=1801160=1801161=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Fri Jul  7 13:38:22 2017
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
  
 IMPROVEMENTS
 
+PIG-5237: Fix DOT file parsing to enable DOT-based physical plan testing 
(YaShock via szita)
+
 PIG-5269: MapReduceLauncher and MRJobStats imports 
org.python.google.common.collect.Lists instead of 
org.google.common.collect.Lists (nkollar via szita)
 
 PIG-4700: Enable progress reporting for Tasks in Tez (satishsaley via rohini)

Modified: pig/trunk/test/org/apache/pig/spark/TestSparkCompiler.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/spark/TestSparkCompiler.java?rev=1801161=1801160=1801161=diff
==
--- pig/trunk/test/org/apache/pig/spark/TestSparkCompiler.java (original)
+++ pig/trunk/test/org/apache/pig/spark/TestSparkCompiler.java Fri Jul  7 
13:38:22 2017
@@ -24,7 +24,6 @@ import java.io.FileOutputStream;
 import java.io.PrintStream;
 import java.util.Properties;
 import java.util.Random;
-
 import javax.xml.parsers.ParserConfigurationException;
 import javax.xml.transform.TransformerException;
 
@@ -44,13 +43,15 @@ import org.apache.pig.impl.plan.NodeIdGe
 import org.apache.pig.impl.plan.VisitorException;
 import org.apache.pig.test.Util;
 import org.apache.pig.test.utils.TestHelper;
+import org.apache.pig.test.utils.dotGraph.DotGraph;
+import org.apache.pig.test.utils.dotGraph.DotGraphReader;
 
 import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
-import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
 /**
  * Test cases to test the SparkCompiler. VERY IMPORTANT NOTE: The tests here
@@ -74,9 +75,8 @@ public class TestSparkCompiler {
 public void doPrint(PrintStream ps, SparkOperPlan plan) throws 
VisitorException, ParserConfigurationException, TransformerException {
 switch (this) {
 case DOT:
-throw new RuntimeException("Testing in DOT format not 
supported yet");
-//(new DotSparkPrinter(plan, ps)).dump();
-//break;
+(new DotSparkPrinter(plan, ps)).dump();
+break;
 case XML:
 XMLSparkPrinter printer = new XMLSparkPrinter(ps, plan);
 printer.visit();
@@ -88,6 +88,19 @@ public class TestSparkCompiler {
 break;
 }
 }
+
+public boolean compare(String goldenPlan, String compiledPlan) {
+switch (this) {
+case DOT:
+DotGraph a = DotGraphReader.load(goldenPlan);
+DotGraph b = DotGraphReader.load(compiledPlan);
+return a.isomorphic(b);
+case XML:
+case TEXT:
+default:
+return 
TestHelper.sortUDFs(Util.removeSignature(goldenPlan)).equals(TestHelper.sortUDFs(Util.removeSignature(compiledPlan)));
+}
+}
 }
 
 // If for some reason, the golden files need to be regenerated, set this to
@@ -135,8 +148,7 @@ public class TestSparkCompiler {
 
 run(query, 
"test/org/apache/pig/test/data/GoldenFiles/spark/SPARKC-LoadStore-1-text.gld", 
PlanPrinter.TEXT);
 run(query, 
"test/org/apache/pig/test/data/GoldenFiles/spark/SPARKC-LoadStore-1-xml.gld", 
PlanPrinter.XML);
-//TODO: enable this when DOT file comparison is supported
-//run(query, 
"test/org/apache/pig/test/data/GoldenFiles/spark/SPARKC-LoadStore-1-dot.gld", 
PlanPrinter.DOT);
+run(query, 
"test/org/apache/pig/test/data/GoldenFiles/spark/SPARKC-LoadStore-1-dot.gld", 
PlanPrinter.DOT);
 }
 
 private void run(String query, String expectedFile, PlanPrinter 
planPrinter) throws Exception {
@@ -174,8 +186,8 @@ public class TestSparkCompiler {
 
 String goldenPlanClean = Util.standardizeNewline(goldenPlan).trim();
 String compiledPlanC

svn commit: r1802674 - /pig/trunk/CHANGES.txt

2017-07-22 Thread szita
Author: szita
Date: Sat Jul 22 11:18:59 2017
New Revision: 1802674

URL: http://svn.apache.org/viewvc?rev=1802674=rev
Log:
PIG-5157: Adding missing entry in CHANGES.txt

Modified:
pig/trunk/CHANGES.txt

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1802674=1802673=1802674=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Sat Jul 22 11:18:59 2017
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
  
 IMPROVEMENTS
 
+PIG-5157: Upgrade to Spark 2.0 (nkollar via liyunzhang)
+
 PIG-5237: Fix DOT file parsing to enable DOT-based physical plan testing 
(YaShock via szita)
 
 PIG-5269: MapReduceLauncher and MRJobStats imports 
org.python.google.common.collect.Lists instead of 
org.google.common.collect.Lists (nkollar via szita)




svn commit: r1802675 - in /pig/trunk: ./ src/org/apache/pig/backend/hadoop/executionengine/spark/ src/org/apache/pig/backend/hadoop/executionengine/spark/converter/

2017-07-22 Thread szita
Author: szita
Date: Sat Jul 22 11:28:02 2017
New Revision: 1802675

URL: http://svn.apache.org/viewvc?rev=1802675=rev
Log:
PIG-5274: TestEvalPipelineLocal#testSetLocationCalledInFE is failing in spark 
mode after PIG-5157 (nkollar via szita)

Modified:
pig/trunk/CHANGES.txt

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkShims.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SkewedJoinConverter.java

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1802675=1802674=1802675=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Sat Jul 22 11:28:02 2017
@@ -38,6 +38,8 @@ OPTIMIZATIONS
  
 BUG FIXES
 
+PIG-5274: TestEvalPipelineLocal#testSetLocationCalledInFE is failing in spark 
mode after PIG-5157 (nkollar via szita)
+
 PIG-4767: Partition filter not pushed down when filter clause references 
variable from another load path (knoguchi)
 
 PIG-5270: Typo in Pig Logging (FromAlaska49 via daijy)

Modified: 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java?rev=1802675=1802674=1802675=diff
==
--- 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java
 (original)
+++ 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java
 Sat Jul 22 11:28:02 2017
@@ -175,7 +175,6 @@ public class SparkLauncher extends Launc
 SparkPigStats sparkStats = (SparkPigStats) pigContext
 .getExecutionEngine().instantiatePigStats();
 sparkStats.initialize(pigContext, sparkplan, jobConf);
-UDFContext.getUDFContext().addJobConf(jobConf);
 PigStats.start(sparkStats);
 
 startSparkIfNeeded(jobConf, pigContext);

Modified: 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkShims.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkShims.java?rev=1802675=1802674=1802675=diff
==
--- 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkShims.java
 (original)
+++ 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkShims.java
 Sat Jul 22 11:28:02 2017
@@ -60,7 +60,12 @@ public abstract class SparkShims impleme
 
 public static SparkShims getInstance() {
 if (sparkShims == null) {
-String sparkVersion = 
UDFContext.getUDFContext().getJobConf().get(SPARK_VERSION, "");
+String sparkVersion;
+if (UDFContext.getUDFContext().isFrontend()) {
+sparkVersion = SparkContext.getOrCreate().version();
+} else {
+sparkVersion = 
UDFContext.getUDFContext().getJobConf().get(SPARK_VERSION, "");
+}
 LOG.info("Initializing SparkShims for Spark version: " + 
sparkVersion);
 String sparkMajorVersion = getSparkMajorVersion(sparkVersion);
 try {

Modified: 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SkewedJoinConverter.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SkewedJoinConverter.java?rev=1802675=1802674=1802675=diff
==
--- 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SkewedJoinConverter.java
 (original)
+++ 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SkewedJoinConverter.java
 Sat Jul 22 11:28:02 2017
@@ -232,8 +232,8 @@ public class SkewedJoinConverter impleme
 }
 
 return result;
-} catch (Exception e) {
-log.warn(e);
+} catch (ExecException e) {
+log.error(e);
 return null;
 }
 }




svn commit: r1802676 - in /pig/trunk: ./ src/org/apache/pig/ src/org/apache/pig/impl/io/ test/org/apache/pig/data/ test/org/apache/pig/test/

2017-07-22 Thread szita
Author: szita
Date: Sat Jul 22 11:38:47 2017
New Revision: 1802676

URL: http://svn.apache.org/viewvc?rev=1802676=rev
Log:
PIG-3655: BinStorage and InterStorage approach to record markers is broken 
(szita)

Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/org/apache/pig/PigConfiguration.java
pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java
pig/trunk/src/org/apache/pig/impl/io/InterRecordWriter.java
pig/trunk/src/org/apache/pig/impl/io/InterStorage.java
pig/trunk/test/org/apache/pig/data/TestSchemaTuple.java
pig/trunk/test/org/apache/pig/test/TestBinInterSedes.java
pig/trunk/test/org/apache/pig/test/TestFRJoin2.java

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1802676=1802675=1802676=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Sat Jul 22 11:38:47 2017
@@ -38,6 +38,8 @@ OPTIMIZATIONS
  
 BUG FIXES
 
+PIG-3655: BinStorage and InterStorage approach to record markers is broken 
(szita)
+
 PIG-5274: TestEvalPipelineLocal#testSetLocationCalledInFE is failing in spark 
mode after PIG-5157 (nkollar via szita)
 
 PIG-4767: Partition filter not pushed down when filter clause references 
variable from another load path (knoguchi)

Modified: pig/trunk/src/org/apache/pig/PigConfiguration.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/PigConfiguration.java?rev=1802676=1802675=1802676=diff
==
--- pig/trunk/src/org/apache/pig/PigConfiguration.java (original)
+++ pig/trunk/src/org/apache/pig/PigConfiguration.java Sat Jul 22 11:38:47 2017
@@ -40,6 +40,24 @@ public class PigConfiguration {
  */
 public static final String PIG_AUTO_LOCAL_INPUT_MAXBYTES = 
"pig.auto.local.input.maxbytes";
 
+
+/**
+ * Sets the length of record markers in binary files produces by Pig 
between jobs
+ * The longer the byte sequence means less chance of collision with actual 
data,
+ * shorter sequence means less overhead
+ */
+public static final String PIG_INTERSTORAGE_SYNCMARKER_SIZE = 
"pig.interstorage.syncmarker.size";
+public static final int PIG_INTERSTORAGE_SYNCMARKER_SIZE_MAX = 16;
+public static final int PIG_INTERSTORAGE_SYNCMARKER_SIZE_DEFAULT = 10;
+public static final int PIG_INTERSTORAGE_SYNCMARKER_SIZE_MIN = 2;
+
+/**
+ * Defines the interval (in bytes) when a sync marker should be written 
into the binary file
+ */
+public static final String PIG_INTERSTORAGE_SYNCMARKER_INTERVAL = 
"pig.interstorage.syncmarker.interval";
+public static final long PIG_INTERSTORAGE_SYNCMARKER_INTERVAL_DEFAULT = 
2000;
+
+
 /**
  * Boolean value used to enable or disable fetching without a mapreduce 
job for DUMP. True by default
  */

Modified: pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java?rev=1802676=1802675=1802676=diff
==
--- pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java (original)
+++ pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java Sat Jul 22 
11:38:47 2017
@@ -42,16 +42,23 @@ import org.apache.pig.data.Tuple;
 public class InterRecordReader extends RecordReader<Text, Tuple> {
 
   private long start;
-  private long pos;
+  private long lastDataPos;
   private long end;
   private BufferedPositionedInputStream in;
   private Tuple value = null;
-  public static final int RECORD_1 = 0x01;
-  public static final int RECORD_2 = 0x02;
-  public static final int RECORD_3 = 0x03;
   private DataInputStream inData = null;
   private static InterSedes sedes = InterSedesFactory.getInterSedesInstance();
 
+  private byte[] syncMarker;
+  private long lastSyncPos = -1;
+  private long syncMarkerInterval;
+  private long dataBytesSeen = 0;
+
+  public InterRecordReader(int syncMarkerLength, long syncMarkerInterval) {
+  this.syncMarker = new byte[syncMarkerLength];
+  this.syncMarkerInterval = syncMarkerInterval;
+  }
+
   public void initialize(InputSplit genericSplit,
  TaskAttemptContext context) throws IOException {
 FileSplit split = (FileSplit) genericSplit;
@@ -60,63 +67,131 @@ public class InterRecordReader extends R
 end = start + split.getLength();
 final Path file = split.getPath();
 
-// open the file and seek to the start of the split
+// open the file
 FileSystem fs = file.getFileSystem(job);
 FSDataInputStream fileIn = fs.open(split.getPath());
-if (start != 0) {
-fileIn.seek(start);
+
+// read the magic byte sequence serving as record marker but only if the 
file is not empty
+if (!(start == 0 && end == 0)) {
+fileIn.readFully(0, syncMarker,

svn commit: r1804497 - in /pig/trunk: CHANGES.txt src/org/apache/pig/backend/hadoop/executionengine/spark/SparkPigSplit.java

2017-08-09 Thread szita
Author: szita
Date: Wed Aug  9 08:41:45 2017
New Revision: 1804497

URL: http://svn.apache.org/viewvc?rev=1804497=rev
Log:
PIG-5283: Configuration is not passed to SparkPigSplits on the backend (szita)

Modified:
pig/trunk/CHANGES.txt

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkPigSplit.java

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1804497=1804496=1804497=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Wed Aug  9 08:41:45 2017
@@ -40,6 +40,8 @@ OPTIMIZATIONS
  
 BUG FIXES
 
+PIG-5283: Configuration is not passed to SparkPigSplits on the backend (szita)
+
 PIG-5284: Fix flakyness introduced by PIG-3655 (szita)
 
 PIG-5278: Unit test failures because of PIG-5264 (nkollar via rohini)

Modified: 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkPigSplit.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkPigSplit.java?rev=1804497=1804496=1804497=diff
==
--- 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkPigSplit.java
 (original)
+++ 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkPigSplit.java
 Wed Aug  9 08:41:45 2017
@@ -22,16 +22,21 @@ import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
 import java.io.Serializable;
+import java.util.List;
 
 import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapred.SplitLocationInfo;
 import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.apache.pig.PigConfiguration;
 import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
 
+import com.google.common.collect.Lists;
+
 /**
  * Wrapper class for PigSplits in Spark mode
  *
@@ -124,11 +129,13 @@ public interface SparkPigSplit extends W
 
@Override
public void readFields(DataInput is) throws IOException {
+   this.getConf().readFields(is);
pigSplit.readFields(is);
}
 
@Override
public void write(DataOutput os) throws IOException {
+   SparkPigSplitsUtils.writeConfigForPigSplits(this.getConf(), os);
pigSplit.write(os);
}
 
@@ -242,11 +249,13 @@ public interface SparkPigSplit extends W
 
 @Override
 public void readFields(DataInput is) throws IOException {
+this.getConf().readFields(is);
 pigSplit.readFields(is);
 }
 
 @Override
 public void write(DataOutput os) throws IOException {
+SparkPigSplitsUtils.writeConfigForPigSplits(this.getConf(), os);
 pigSplit.write(os);
 }
 
@@ -301,4 +310,32 @@ public interface SparkPigSplit extends W
 }
 }
 
+public static class SparkPigSplitsUtils {
+
+private static final List PIGSPLIT_CONFIG_KEYS = 
Lists.newArrayList(
+CommonConfigurationKeys.IO_SERIALIZATIONS_KEY,
+PigConfiguration.PIG_COMPRESS_INPUT_SPLITS
+);
+
+/**
+ * Writes a subset of the originalConf into the output stream os. Only 
keys in PIG_SPLIT_CONFIG_KEYS are
+ * considered due to optimization purposes. During deseralization on a 
Spark executor we need to take care of
+ * setting the configuration manually because Spark only sets an empty 
Configuration instance on the PigSplit.
+ * @param originalConf
+ * @param os
+ * @throws IOException
+ */
+public static void writeConfigForPigSplits(Configuration originalConf, 
DataOutput os) throws IOException {
+Configuration conf = new Configuration(false);
+for (String key : PIGSPLIT_CONFIG_KEYS) {
+String value = originalConf.get(key);
+if (value != null) {
+conf.set(key, value);
+}
+}
+conf.write(os);
+}
+
+}
+
 }




svn commit: r1805427 - in /pig/trunk: CHANGES.txt build.xml

2017-08-18 Thread szita
Author: szita
Date: Fri Aug 18 13:34:36 2017
New Revision: 1805427

URL: http://svn.apache.org/viewvc?rev=1805427=rev
Log:
PIG-5294: Spark unit tests are always run in spark1 mode (szita)

Modified:
pig/trunk/CHANGES.txt
pig/trunk/build.xml

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1805427=1805426=1805427=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Fri Aug 18 13:34:36 2017
@@ -44,6 +44,8 @@ OPTIMIZATIONS
  
 BUG FIXES
 
+PIG-5294: Spark unit tests are always run in spark1 mode (szita)
+
 PIG-5277: Spark mode is writing nulls among tuples to the output (workaround) 
(szita)
 
 PIG-5283: Configuration is not passed to SparkPigSplits on the backend (szita)

Modified: pig/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1805427=1805426=1805427=diff
==
--- pig/trunk/build.xml (original)
+++ pig/trunk/build.xml Fri Aug 18 13:34:36 2017
@@ -456,6 +456,7 @@
 
 
 
+
 
 
 
 
 
-
+
 *** Building Test Sources ***
 *** To compile with all warnings enabled, supply 
-Dall.warnings=1 on command line ***
 *** Else, you will only be warned about deprecations ***
@@ -892,32 +893,32 @@
 
 
 
-
+
 
 Tests failed!
 
 
-
+
 
 Tests failed!
 
 
-
+
 
 Tests failed!
 
 
-
+
 
 Tests failed!
 
 
-
+
 
 Tests failed!
 
 
-
+
 
 Tests failed!
 
@@ -998,7 +999,7 @@
 
 
 
+
depends="setWindowsPath,setLinuxPath,compile-test,jar-simple,debugger.check,jackson-pig-3039-test-download">
 
   
 




svn commit: r1799421 - in /pig/trunk: CHANGES.txt build.xml

2017-06-21 Thread szita
Author: szita
Date: Wed Jun 21 09:59:36 2017
New Revision: 1799421

URL: http://svn.apache.org/viewvc?rev=1799421=rev
Log:
PIG-5262: Fix jdiff related issues: fail build upon error, correct xml 
character escaping (szita)

Modified:
pig/trunk/CHANGES.txt
pig/trunk/build.xml

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1799421=1799420=1799421=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Wed Jun 21 09:59:36 2017
@@ -32,6 +32,8 @@ OPTIMIZATIONS
  
 BUG FIXES
 
+PIG-5262: Fix jdiff related issues: fail build upon error, correct xml 
character escaping (szita)
+
 PIG-5225: Several unit tests are not annotated with @Test (nkollar via rohini)
 
 

Modified: pig/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1799421=1799420=1799421=diff
==
--- pig/trunk/build.xml (original)
+++ pig/trunk/build.xml Wed Jun 21 09:59:36 2017
@@ -1574,6 +1574,8 @@
 
 
 
+
+
 
 
 
@@ -1601,6 +1603,10 @@
   

 
+
+
+   
+
  
 
  




svn commit: r1799414 [2/2] - in /pig/trunk: build.xml src/docs/jdiff/pig_0.16.0.xml src/docs/jdiff/pig_0.17.0.xml

2017-06-21 Thread szita
Added: pig/trunk/src/docs/jdiff/pig_0.17.0.xml
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/docs/jdiff/pig_0.17.0.xml?rev=1799414=auto
==
--- pig/trunk/src/docs/jdiff/pig_0.17.0.xml (added)
+++ pig/trunk/src/docs/jdiff/pig_0.17.0.xml Wed Jun 21 09:29:16 2017
@@ -0,0 +1,93183 @@
+
+
+
+
+
+
+
+
+
+  
+  
+
+  
+  
+  
+  
+  
+
+
+  
+  
+  
+
+
+  
+  
+  
+
+
+
+
+  
+  
+  
+  
+
+
+
+
+  
+  
+
+
+
+
+
+
+  
+  
+
+
+
+
+  
+  
+  
+  
+
+  
+  
+  
+
+
+  
+  
+  
+
+
+  
+  
+  
+
+
+
+
+  
+  
+  
+  
+
+
+  
+  
+  
+
+
+  
+  
+  
+
+
+  
+  
+  
+
+
+  
+  
+  
+
+
+  
+  
+  
+  
+  
+
+
+  
+  
+  
+
+
+  
+  
+  
+
+
+
+
+  
+  
+  
+  
+
+  
+  
+  
+  
+
+
+
+
+  
+  
+  
+  
+
+
+
+  
+  
+  
+  
+  
+
+
+  
+  
+  
+  
+  
+
+
+  
+  
+  
+  
+
+
+
+
+
+
+  
+  
+  
+  
+
+
+
+
+  
+
+
+  
+  
+  
+
+
+  
+
+
+
+
+
+
+
+  
+  
+  
+  
+
+  
+  
+  
+  
+
+
+  
+  
+  
+  
+  
+  
+
+
+
+
+  
+  
+  
+  
+
+  
+  
+  
+
+
+
+
+  
+  
+  
+  
+
+
+
+  
+  
+
+
+  
+  
+  
+
+
+  
+  
+  
+
+
+  
+  
+  
+  
+  
+
+
+  
+  
+  
+
+
+  
+  
+  
+  
+  
+
+
+  
+  
+  
+  
+
+
+  
+  
+  
+
+
+
+
+  
+  
+  
+  
+
+
+  
+  
+  
+  
+
+
+  
+  
+  
+
+
+  
+  
+  
+
+
+
+
+  
+  
+  
+  
+
+
+
+
+  
+  
+  
+  
+
+
+  
+  
+  
+  
+
+
+  
+  
+  
+
+
+  
+  
+  
+
+
+  
+  
+  
+
+
+
+
+  
+
+
+  
+  
+  
+  
+
+
+  
+  
+  
+
+
+  
+  
+  
+
+
+  
+  
+  
+
+
+  
+  
+  
+
+
+
+
+  
+  
+  
+  
+
+
+
+  
+
+
+
+
+
+
+
+
+  
+  
+  
+  
+
+  
+  
+  
+  
+  
+
+
+
+
+  
+  
+  
+  
+
+
+  
+  
+  
+  
+
+
+  
+  
+  
+  
+
+
+  
+  
+  
+
+
+  
+  
+  
+
+
+  
+  
+  
+
+
+
+
+
+
+
+
+  
+  
+  
+  
+
+
+
+  
+  
+
+
+  
+  
+
+  
+  
+  
+  
+
+
+
+  
+  
+  
+
+
+
+
+
+
+  
+  
+  
+  
+
+
+
+
+
+
+
+
+  
+  
+  
+  
+
+  
+  
+  
+
+
+  
+  
+  
+
+
+  
+  
+  
+
+
+
+  
+  
+  
+  
+
+  
+  
+  
+
+
+
+
+  
+  
+  
+
+
+  
+  
+  
+  
+
+  
+  
+  
+  
+
+  
+  
+  
+
+
+  
+  
+  
+
+
+
+  
+  
+  
+  
+
+
+
+
+
+
+  
+  
+  
+  
+
+
+
+  
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  
+  
+  
+  
+
+
+
+
+
+
+  
+  
+  
+  
+
+
+
+
+  
+  
+
+
+
+
+  
+  
+  
+  
+
+
+
+
+
+
+
+  
+
+
+  
+  
+
+
+  
+  
+
+
+
+
+
+
+  
+
+
+
+
+
+
+
+
+  
+  
+  
+  
+
+
+
+  
+  
+  
+
+  
+  
+  
+  
+
+
+
+  
+  
+  
+
+
+  
+  
+  
+
+
+  
+  
+  
+
+
+  
+  
+  
+
+
+  
+  
+  
+
+
+  
+  
+  
+  
+
+
+  
+  
+  
+  
+
+
+  
+  
+  
+  
+
+
+  
+  
+  
+
+
+  
+  
+  
+   

svn commit: r1799414 [1/2] - in /pig/trunk: build.xml src/docs/jdiff/pig_0.16.0.xml src/docs/jdiff/pig_0.17.0.xml

2017-06-21 Thread szita
Author: szita
Date: Wed Jun 21 09:29:16 2017
New Revision: 1799414

URL: http://svn.apache.org/viewvc?rev=1799414=rev
Log:
Jdiff change for 0.17.0

Added:
pig/trunk/src/docs/jdiff/pig_0.17.0.xml
Removed:
pig/trunk/src/docs/jdiff/pig_0.16.0.xml
Modified:
pig/trunk/build.xml

Modified: pig/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1799414=1799413=1799414=diff
==
--- pig/trunk/build.xml (original)
+++ pig/trunk/build.xml Wed Jun 21 09:29:16 2017
@@ -286,7 +286,7 @@
 
 
 
-
+
 http://hadoop.apache.org/${name}/docs/r${jdiff.stable}/api/"/>
 
 




svn commit: r1799423 - in /pig/branches/branch-0.17: CHANGES.txt build.xml

2017-06-21 Thread szita
Author: szita
Date: Wed Jun 21 10:09:51 2017
New Revision: 1799423

URL: http://svn.apache.org/viewvc?rev=1799423=rev
Log:
PIG-5262: Fix jdiff related issues: fail build upon error, correct xml 
character escaping (szita)

Modified:
pig/branches/branch-0.17/CHANGES.txt
pig/branches/branch-0.17/build.xml

Modified: pig/branches/branch-0.17/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/branches/branch-0.17/CHANGES.txt?rev=1799423=1799422=1799423=diff
==
--- pig/branches/branch-0.17/CHANGES.txt (original)
+++ pig/branches/branch-0.17/CHANGES.txt Wed Jun 21 10:09:51 2017
@@ -111,6 +111,8 @@ OPTIMIZATIONS
  
 BUG FIXES
 
+PIG-5262: Fix jdiff related issues: fail build upon error, correct xml 
character escaping (szita)
+
 PIG-5248: Fix TestCombiner#testGroupByLimit after PigOnSpark merge (rohini)
 
 PIG-5245: TestGrunt.testStopOnFailure is flaky (rohini)

Modified: pig/branches/branch-0.17/build.xml
URL: 
http://svn.apache.org/viewvc/pig/branches/branch-0.17/build.xml?rev=1799423=1799422=1799423=diff
==
--- pig/branches/branch-0.17/build.xml (original)
+++ pig/branches/branch-0.17/build.xml Wed Jun 21 10:09:51 2017
@@ -1574,6 +1574,8 @@
 
 
 
+
+
 
 
 
@@ -1601,6 +1603,10 @@
   

 
+
+
+   
+
  
 
  




svn commit: r1799357 - in /pig/site: author/src/documentation/content/xdocs/ publish/ publish/docs/r0.17.0/ publish/docs/r0.17.0/api/ publish/docs/r0.17.0/api/org/ publish/docs/r0.17.0/api/org/apache/

2017-06-20 Thread szita
Author: szita
Date: Tue Jun 20 16:52:49 2017
New Revision: 1799357

URL: http://svn.apache.org/viewvc?rev=1799357=rev
Log:
Pig release 0.17.0


[This commit notification would consist of 921 parts, 
which exceeds the limit of 50 ones, so it was shortened to the summary.]


svn commit: r1796452 - in /pig/trunk: CHANGES.txt src/docs/src/documentation/content/xdocs/pig-index.xml

2017-05-27 Thread szita
Author: szita
Date: Sat May 27 20:29:46 2017
New Revision: 1796452

URL: http://svn.apache.org/viewvc?rev=1796452=rev
Log:
PIG-5188: Review pig-index.xml (szita)

Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/docs/src/documentation/content/xdocs/pig-index.xml

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1796452=1796451=1796452=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Sat May 27 20:29:46 2017
@@ -36,6 +36,8 @@ PIG-5067: Revisit union on numeric type
  
 IMPROVEMENTS
 
+PIG-5188: Review pig-index.xml (szita)
+
 PIG-4924: Translate failures.maxpercent MR setting to Tez Tez (rohini)
 
 PIG-5236: json simple jar not included automatically with piggybank 
AvroStorage (satishsaley via rohini)

Modified: pig/trunk/src/docs/src/documentation/content/xdocs/pig-index.xml
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/docs/src/documentation/content/xdocs/pig-index.xml?rev=1796452=1796451=1796452=diff
==
--- pig/trunk/src/docs/src/documentation/content/xdocs/pig-index.xml (original)
+++ pig/trunk/src/docs/src/documentation/content/xdocs/pig-index.xml Sat May 27 
20:29:46 2017
@@ -152,6 +152,8 @@
 
 BinStorage function
 
+Bloom join
+
 Boolean expressions
 
 Boolean operators
@@ -426,6 +428,8 @@
 
 IMPORT (macros) operator
 
+IN function
+
 INDEXOF function
 
 installing Pig
@@ -491,6 +495,7 @@
 JOIN (outer) operator
 
 joins
+ bloom joins
  inner joins
  join 
optimizations
  merge joins
@@ -626,6 +631,8 @@
 
 NATIVE operator
 
+NonFSLoadFunc interface 
+
 nested blocks (FOREACH operator) 
 
 NOT (Boolean)
@@ -821,6 +828,8 @@
 
 REGEX_EXTRACT_ALL function
 
+REGEX_SEARCH function
+
 REGISTER statement
 
 regular expressions. See pattern matching
@@ -1069,8 +1078,9 @@

 
 
-Z (top)   
+Z (top)
 
+Zeppelin
 
   
 




svn commit: r1796639 [12/12] - in /pig/trunk: ./ bin/ ivy/ src/META-INF/services/ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapRed

2017-05-29 Thread szita
Modified: pig/trunk/test/org/apache/pig/test/TestPigRunner.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestPigRunner.java?rev=1796639=1796638=1796639=diff
==
--- pig/trunk/test/org/apache/pig/test/TestPigRunner.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestPigRunner.java Mon May 29 15:00:39 
2017
@@ -41,11 +41,13 @@ import org.apache.commons.lang3.ArrayUti
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.Counters;
+import org.apache.pig.ExecType;
 import org.apache.pig.PigConfiguration;
 import org.apache.pig.PigRunner;
 import org.apache.pig.PigRunner.ReturnCode;
 import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil;
 import 
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRConfiguration;
+import org.apache.pig.backend.hadoop.executionengine.spark.SparkExecType;
 import org.apache.pig.impl.PigContext;
 import org.apache.pig.impl.io.FileLocalizer;
 import org.apache.pig.impl.plan.OperatorPlan;
@@ -58,6 +60,7 @@ import org.apache.pig.tools.pigstats.Pig
 import org.apache.pig.tools.pigstats.PigStatsUtil;
 import org.apache.pig.tools.pigstats.mapreduce.MRJobStats;
 import org.apache.pig.tools.pigstats.mapreduce.MRPigStatsUtil;
+import org.apache.pig.tools.pigstats.spark.SparkJobStats;
 import org.junit.AfterClass;
 import org.junit.Assume;
 import org.junit.Before;
@@ -207,12 +210,13 @@ public class TestPigRunner {
 PigStats stats = PigRunner.run(args, new 
TestNotificationListener(execType));
 
 assertTrue(stats.isSuccessful());
-if (execType.toString().startsWith("tez")) {
-assertEquals(1, stats.getNumberJobs());
-assertEquals(stats.getJobGraph().size(), 1);
-} else {
+if (execType.equals("mapreduce")) {
 assertEquals(2, stats.getNumberJobs());
 assertEquals(stats.getJobGraph().size(), 2);
+} else {
+// Tez and Spark
+assertEquals(1, stats.getNumberJobs());
+assertEquals(stats.getJobGraph().size(), 1);
 }
 
 Configuration conf = 
ConfigurationUtil.toConfiguration(stats.getPigProperties());
@@ -274,6 +278,10 @@ public class TestPigRunner {
 assertEquals(stats.getJobGraph().size(), 1);
 // 5 vertices
 
assertEquals(stats.getJobGraph().getSources().get(0).getPlan().size(), 5);
+} else if (execType.equals("spark")) {
+// In spark mode,the number of spark job is calculated by the 
number of POStore.
+// 1 POStore generates 1 spark job.
+assertEquals(stats.getJobGraph().size(), 1);
 } else {
 assertEquals(stats.getJobGraph().size(), 4);
 }
@@ -294,7 +302,12 @@ public class TestPigRunner {
 //   Need to investigate
 // assertEquals("B", ((JobStats) 
stats.getJobGraph().getPredecessors(
 //js).get(0)).getAlias());
+} else if (execType.equals("spark")) {
+assertEquals("A,B", ((JobStats) 
stats.getJobGraph().getSources().get(
+0)).getAlias());
+// TODO: alias is not set for 
sample-aggregation/partition/sort job.
 } else {
+
 assertEquals("A", ((JobStats) 
stats.getJobGraph().getSources().get(
 0)).getAlias());
 assertEquals("B", ((JobStats) 
stats.getJobGraph().getPredecessors(
@@ -323,7 +336,14 @@ public class TestPigRunner {
 String[] args = { "-x", execType, PIG_FILE };
 PigStats stats = PigRunner.run(args, new 
TestNotificationListener(execType));
 assertTrue(stats.isSuccessful());
-assertTrue(stats.getJobGraph().size() == 1);
+if (execType.equals("spark")) {
+// In spark mode,the number of spark job is calculated by the 
number of POStore.
+// 2 POStore generates 2 spark jobs.
+assertTrue(stats.getJobGraph().size() == 2);
+} else {
+assertTrue(stats.getJobGraph().size() == 1);
+}
+
 // Each output file should include the following:
 // output:
 //   1\t2\t3\n
@@ -372,7 +392,13 @@ public class TestPigRunner {
 String[] args = { "-x", execType, PIG_FILE };
 PigStats stats = PigRunner.run(args, new 
TestNotificationListener(execType));
 assertTrue(stats.isSuccessful());
-assertEquals(stats.getJobGraph().size(), 1);
+if (execType.equals("spark")) {
+// In spark mode,the number of spark job is calculated by the 
number of POStore.
+// 2 POStore generates 2 spark jobs.
+

svn commit: r1796639 [9/12] - in /pig/trunk: ./ bin/ ivy/ src/META-INF/services/ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapRedu

2017-05-29 Thread szita
Added: 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOperator.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOperator.java?rev=1796639=auto
==
--- 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOperator.java
 (added)
+++ 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOperator.java
 Mon May 29 15:00:39 2017
@@ -0,0 +1,302 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.backend.hadoop.executionengine.spark.plan;
+
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
+import org.apache.pig.impl.plan.Operator;
+import org.apache.pig.impl.plan.OperatorKey;
+import org.apache.pig.impl.plan.VisitorException;
+import org.apache.pig.impl.util.MultiMap;
+
+/**
+ * An operator model for a Spark job. Acts as a host to the plans that will
+ * execute in spark.
+ */
+public class SparkOperator extends Operator {
+private static enum OPER_FEATURE {
+NONE,
+// Indicate if this job is a sampling job
+SAMPLER,
+// Indicate if this job is a merge indexer
+INDEXER,
+// Indicate if this job is a group by job
+GROUPBY,
+// Indicate if this job is a cogroup job
+COGROUP,
+// Indicate if this job is a regular join job
+HASHJOIN,
+// Indicate if this job is a union job
+UNION,
+// Indicate if this job is a native job
+NATIVE,
+// Indicate if this job is a limit job
+LIMIT,
+// Indicate if this job is a limit job after sort
+LIMIT_AFTER_SORT;
+};
+
+public PhysicalPlan physicalPlan;
+
+public Set UDFs;
+
+/* Name of the Custom Partitioner used */
+public String customPartitioner = null;
+
+public Set scalars;
+
+public int requestedParallelism = -1;
+
+private BitSet feature = new BitSet();
+
+private boolean splitter = false;
+
+// Name of the partition file generated by sampling process,
+// Used by Skewed Join
+private String skewedJoinPartitionFile;
+
+private boolean usingTypedComparator = false;
+
+private boolean combineSmallSplits = true;
+
+private List crossKeys = null;
+
+private MultiMap multiQueryOptimizeConnectionMap 
= new MultiMap();
+
+// Indicates if a UDF comparator is used
+boolean isUDFComparatorUsed = false;
+
+//The quantiles file name if globalSort is true
+private String quantFile;
+
+//Indicates if this job is an order by job
+private boolean globalSort = false;
+
+public SparkOperator(OperatorKey k) {
+super(k);
+physicalPlan = new PhysicalPlan();
+UDFs = new HashSet();
+scalars = new HashSet();
+}
+
+@Override
+public boolean supportsMultipleInputs() {
+return true;
+}
+
+@Override
+public boolean supportsMultipleOutputs() {
+return true;
+}
+
+@Override
+public String name() {
+String udfStr = getUDFsAsStr();
+StringBuilder sb = new StringBuilder("Spark" + "("
++ requestedParallelism + (udfStr.equals("") ? "" : ",")
++ udfStr + ")" + " - " + mKey.toString());
+return sb.toString();
+}
+
+private String getUDFsAsStr() {
+StringBuilder sb = new StringBuilder();
+if (UDFs != null && UDFs.size() > 0) {
+for (String str : UDFs) {
+sb.append(str.substring(str.lastIndexOf('.') + 1));
+sb.append(',');
+}
+sb.deleteCharAt(sb.length() - 1);
+}
+return sb.toString();
+}
+
+public void add(PhysicalOperator physicalOper) {
+this.physicalPlan.add(physicalOper);
+}
+
+@Override
+public 

svn commit: r1796639 [11/12] - in /pig/trunk: ./ bin/ ivy/ src/META-INF/services/ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapRed

2017-05-29 Thread szita
Added: pig/trunk/test/org/apache/pig/spark/TestIndexedKey.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/spark/TestIndexedKey.java?rev=1796639=auto
==
--- pig/trunk/test/org/apache/pig/spark/TestIndexedKey.java (added)
+++ pig/trunk/test/org/apache/pig/spark/TestIndexedKey.java Mon May 29 15:00:39 
2017
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.spark;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import 
org.apache.pig.backend.hadoop.executionengine.spark.converter.IndexedKey;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+import static org.junit.Assert.assertEquals;
+
+@RunWith(JUnit4.class)
+public class TestIndexedKey {
+
+/**Case1:Compare IndexedKeys with same index value
+ * key1key2equal?  hashCode1hashCode2
+ * foo null  N hashCode(foo)index
+ * nullfoo   N indexhashCode(foo)
+ * foo foo   Y hashCode(foo)hashCode(foo)
+ * nullnull  Y indexindex
+ * (1,1)   (1,1) Y hashCode((1,1))  hashCode((1,1))
+ * (1,)(1,)  Y hashCode((1,))   hashCode((1,))
+ * (1,1)   (1,2) N hashCode((1,1))  hashCode((1,2))
+ */
+@Test
+public void testIndexedKeyWithSameIndexValue() throws Exception {
+IndexedKey a0 = new IndexedKey(new Byte("0"), "foo");
+IndexedKey a1 = new IndexedKey(new Byte("0"), null);
+assertEquals(a0.equals(a1), false);
+assertEquals(a0.hashCode()==a1.hashCode(),false);
+
+IndexedKey a2 = new IndexedKey(new Byte("0"), null);
+IndexedKey a3 = new IndexedKey(new Byte("0"), "foo");
+assertEquals(a2.equals(a3),false);
+assertEquals(a2.hashCode()==a3.hashCode(),false);
+
+IndexedKey a4 = new IndexedKey(new Byte("0"), "foo");
+IndexedKey a5 = new IndexedKey(new Byte("0"), "foo");
+assertEquals(a4.equals(a5),true);
+assertEquals(a4.hashCode()==a5.hashCode(),true);
+
+IndexedKey a6 = new IndexedKey(new Byte("0"), null);
+IndexedKey a7 = new IndexedKey(new Byte("0"), null);
+assertEquals(a6.equals(a7),true);
+assertEquals(a6.hashCode()==a7.hashCode(),true);
+
+Tuple t1 = TupleFactory.getInstance().newTuple(2);
+t1.set(0,"1");
+t1.set(1,"1");
+Tuple t2 = TupleFactory.getInstance().newTuple(2);
+t2.set(0,"1");
+t2.set(1,"1");
+IndexedKey a8 = new IndexedKey(new Byte("0"), t1);
+IndexedKey a9 = new IndexedKey(new Byte("0"), t2);
+assertEquals(a8.equals(a9),true);
+assertEquals(a8.hashCode()==a9.hashCode(),true);
+
+Tuple t3 = TupleFactory.getInstance().newTuple(2);
+t3.set(0,"1");
+t3.set(1,null);
+Tuple t4 = TupleFactory.getInstance().newTuple(2);
+t4.set(0,"1");
+t4.set(1,null);
+IndexedKey a10 = new IndexedKey(new Byte("0"), t3);
+IndexedKey a11 = new IndexedKey(new Byte("0"), t4);
+assertEquals(a10.equals(a11),true);
+assertEquals(a10.hashCode()==a11.hashCode(),true);
+
+Tuple t5 = TupleFactory.getInstance().newTuple(2);
+t5.set(0,"1");
+t5.set(1,"1");
+Tuple t6 = TupleFactory.getInstance().newTuple(2);
+t6.set(0,"1");
+t6.set(1,"2");
+IndexedKey a12 = new IndexedKey(new Byte("0"), t5);
+IndexedKey a13 = new IndexedKey(new Byte("0"), t6);
+assertEquals(a12.equals(a13),false);
+assertEquals(a12.hashCode()==a13.hashCode(),false);
+}
+
+/*
+ * Case2:Compare IndexedKeys with different index value
+ * key1key2equal?  hashCode1hashCode2
+ * foo null N  hashCode(foo)index2
+ * nullfoo  N  index1   hashCode(foo)
+ * foo foo  Y  hashCode(foo)hashCode(foo)
+ * nullnull N  index1   index2
+ * (1,1)   (1,1)Y  hashCode((1,1))  hashCode((1,1))
+ * (1,)(1,) N  

svn commit: r1796639 [4/12] - in /pig/trunk: ./ bin/ ivy/ src/META-INF/services/ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapRedu

2017-05-29 Thread szita
Added: 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FRJoinConverter.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FRJoinConverter.java?rev=1796639=auto
==
--- 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FRJoinConverter.java
 (added)
+++ 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FRJoinConverter.java
 Mon May 29 15:00:39 2017
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.backend.hadoop.executionengine.spark.converter;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POFRJoinSpark;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POFRJoin;
+import org.apache.pig.backend.hadoop.executionengine.spark.SparkPigContext;
+import org.apache.pig.backend.hadoop.executionengine.spark.SparkUtil;
+import org.apache.pig.data.Tuple;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.rdd.RDD;
+
+@SuppressWarnings("serial")
+public class FRJoinConverter implements
+RDDConverter {
+private static final Log LOG = LogFactory.getLog(FRJoinConverter.class);
+
+private Set replicatedInputs;
+
+public RDD convert(List predecessors,
+  POFRJoin poFRJoin) throws IOException {
+SparkUtil.assertPredecessorSizeGreaterThan(predecessors, poFRJoin, 1);
+RDD rdd = predecessors.get(0);
+
+attachReplicatedInputs((POFRJoinSpark) poFRJoin);
+
+FRJoinFunction frJoinFunction = new FRJoinFunction(poFRJoin);
+return rdd.toJavaRDD().mapPartitions(frJoinFunction, true).rdd();
+}
+
+private void attachReplicatedInputs(POFRJoinSpark poFRJoin) {
+Map replicatedInputMap = new HashMap<>();
+
+for (String replicatedInput : replicatedInputs) {
+replicatedInputMap.put(replicatedInput, 
SparkPigContext.get().getBroadcastedVars().get(replicatedInput).value());
+}
+
+poFRJoin.attachInputs(replicatedInputMap);
+}
+
+private static class FRJoinFunction implements
+FlatMapFunction, Serializable {
+
+private POFRJoin poFRJoin;
+private FRJoinFunction(POFRJoin poFRJoin) {
+this.poFRJoin = poFRJoin;
+}
+
+@Override
+public Iterable call(final Iterator input) throws 
Exception {
+
+return new Iterable() {
+
+@Override
+public Iterator iterator() {
+return new OutputConsumerIterator(input) {
+
+@Override
+protected void attach(Tuple tuple) {
+poFRJoin.setInputs(null);
+poFRJoin.attachInput(tuple);
+}
+
+@Override
+protected Result getNextResult() throws ExecException {
+return poFRJoin.getNextTuple();
+}
+
+@Override
+protected void endOfInput() {
+}
+};
+}
+};
+}
+
+}
+
+public void setReplicatedInputs(Set replicatedInputs) {
+this.replicatedInputs = replicatedInputs;
+}
+}
\ No newline at end of file

Added: 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FilterConverter.java
URL: 

svn commit: r1796639 [7/12] - in /pig/trunk: ./ bin/ ivy/ src/META-INF/services/ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapRedu

2017-05-29 Thread szita
Added: 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/ParallelismSetter.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/ParallelismSetter.java?rev=1796639=auto
==
--- 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/ParallelismSetter.java
 (added)
+++ 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/ParallelismSetter.java
 Mon May 29 15:00:39 2017
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.backend.hadoop.executionengine.spark.optimizer;
+
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.pig.FuncSpec;
+import 
org.apache.pig.backend.hadoop.executionengine.spark.operator.NativeSparkOperator;
+import 
org.apache.pig.backend.hadoop.executionengine.spark.plan.SparkOpPlanVisitor;
+import org.apache.pig.backend.hadoop.executionengine.spark.plan.SparkOperPlan;
+import org.apache.pig.backend.hadoop.executionengine.spark.plan.SparkOperator;
+import org.apache.pig.impl.PigContext;
+import org.apache.pig.impl.PigImplConstants;
+import org.apache.pig.impl.builtin.GFCross;
+import org.apache.pig.impl.plan.DependencyOrderWalker;
+import org.apache.pig.impl.plan.VisitorException;
+
+public class ParallelismSetter extends SparkOpPlanVisitor {
+private JobConf jobConf;
+
+public ParallelismSetter(SparkOperPlan plan, JobConf jobConf) {
+super(plan, new DependencyOrderWalker(plan));
+this.jobConf = jobConf;
+}
+
+@Override
+public void visitSparkOp(SparkOperator sparkOp) throws VisitorException {
+if (sparkOp instanceof NativeSparkOperator) {
+return;
+}
+
+if (sparkOp.getCrossKeys() != null) {
+for (String key : sparkOp.getCrossKeys()) {
+jobConf.set(PigImplConstants.PIG_CROSS_PARALLELISM + "." + key,
+// TODO: Estimate parallelism. For now we are 
hard-coding GFCross.DEFAULT_PARALLELISM
+Integer.toString(96));
+}
+}
+}
+}
\ No newline at end of file

Added: 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/SecondaryKeyOptimizerSpark.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/SecondaryKeyOptimizerSpark.java?rev=1796639=auto
==
--- 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/SecondaryKeyOptimizerSpark.java
 (added)
+++ 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/SecondaryKeyOptimizerSpark.java
 Mon May 29 15:00:39 2017
@@ -0,0 +1,217 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.pig.backend.hadoop.executionengine.spark.optimizer;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import java.util.List;
+
+import 
org.apache.pig.backend.hadoop.executionengine.optimizer.SecondaryKeyOptimizer;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
+import 

svn commit: r1796639 [5/12] - in /pig/trunk: ./ bin/ ivy/ src/META-INF/services/ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapRedu

2017-05-29 Thread szita
Added: 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PackageConverter.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PackageConverter.java?rev=1796639=auto
==
--- 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PackageConverter.java
 (added)
+++ 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PackageConverter.java
 Mon May 29 15:00:39 2017
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.backend.hadoop.executionengine.spark.converter;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.Iterator;
+import java.util.List;
+
+import scala.runtime.AbstractFunction1;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.backend.hadoop.executionengine.physicalLayer.POStatus;
+import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POPackage;
+import org.apache.pig.backend.hadoop.executionengine.spark.SparkUtil;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.io.NullableTuple;
+import org.apache.pig.impl.io.PigNullableWritable;
+import org.apache.spark.rdd.RDD;
+
+@SuppressWarnings({ "serial" })
+public class PackageConverter implements RDDConverter 
{
+private static final Log LOG = LogFactory.getLog(PackageConverter.class);
+
+@Override
+public RDD convert(List predecessors,
+POPackage physicalOperator) throws IOException {
+SparkUtil.assertPredecessorSize(predecessors, physicalOperator, 1);
+RDD rdd = predecessors.get(0);
+// package will generate the group from the result of the local
+// rearrange
+return rdd.map(new PackageFunction(physicalOperator),
+SparkUtil.getManifest(Tuple.class));
+}
+
+private static class PackageFunction extends
+AbstractFunction1 implements Serializable {
+
+private final POPackage physicalOperator;
+
+public PackageFunction(POPackage physicalOperator) {
+this.physicalOperator = physicalOperator;
+}
+
+@Override
+public Tuple apply(final Tuple t) {
+// (key, Seq:{(index, key, value without key)})
+if (LOG.isDebugEnabled())
+LOG.debug("PackageFunction in " + t);
+Result result;
+try {
+PigNullableWritable key = new PigNullableWritable() {
+
+public Object getValueAsPigType() {
+try {
+Object keyTuple = t.get(0);
+return keyTuple;
+} catch (ExecException e) {
+throw new RuntimeException(e);
+}
+}
+};
+final Iterator bagIterator = (Iterator) t.get(1);
+Iterator iterator = new 
Iterator() {
+public boolean hasNext() {
+return bagIterator.hasNext();
+}
+
+public NullableTuple next() {
+try {
+// we want the value and index only
+Tuple next = bagIterator.next();
+NullableTuple nullableTuple = new NullableTuple(
+(Tuple) next.get(1));
+nullableTuple.setIndex(((Number) next.get(0))
+.byteValue());
+if (LOG.isDebugEnabled())
+LOG.debug("Setting index to " + next.get(0) +
+" for tuple " + (Tuple)next.get(1));
+return nullableTuple;
+} catch (ExecException 

svn commit: r1796639 [1/12] - in /pig/trunk: ./ bin/ ivy/ src/META-INF/services/ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapRedu

2017-05-29 Thread szita
Author: szita
Date: Mon May 29 15:00:39 2017
New Revision: 1796639

URL: http://svn.apache.org/viewvc?rev=1796639=rev
Log:
PIG-4059: Pig On Spark

Added:

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/NoopFilterRemoverUtil.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POBroadcastSpark.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POFRJoinSpark.java
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/JobGraphBuilder.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/JobMetricsListener.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/KryoSerializer.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/MapReducePartitionerWrapper.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkEngineConf.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkExecType.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkExecutionEngine.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLocalExecType.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkPOUserFuncVisitor.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkPigContext.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkPigRecordReader.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkPigSplit.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkUtil.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/UDFJarsFinder.java
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/BroadcastConverter.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/CollectedGroupConverter.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/CounterConverter.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/DistinctConverter.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FRJoinConverter.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FilterConverter.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/ForEachConverter.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/GlobalRearrangeConverter.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/IndexedKey.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/IteratorTransform.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/JoinGroupSparkConverter.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LimitConverter.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LoadConverter.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LocalRearrangeConverter.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/MergeCogroupConverter.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/MergeJoinConverter.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/OutputConsumerIterator.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PackageConverter.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PigSecondaryKeyComparatorSpark.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PoissonSampleConverter.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/RDDConverter.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/RankConverter.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/ReduceByConverter.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SecondaryKeySortUtil.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SkewedJoinConverter.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SortConverter.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SparkSampleSortConverter.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SplitConverter.java

svn commit: r1796639 [10/12] - in /pig/trunk: ./ bin/ ivy/ src/META-INF/services/ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapRed

2017-05-29 Thread szita
Added: pig/trunk/src/org/apache/pig/tools/pigstats/spark/SparkJobStats.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/tools/pigstats/spark/SparkJobStats.java?rev=1796639=auto
==
--- pig/trunk/src/org/apache/pig/tools/pigstats/spark/SparkJobStats.java (added)
+++ pig/trunk/src/org/apache/pig/tools/pigstats/spark/SparkJobStats.java Mon 
May 29 15:00:39 2017
@@ -0,0 +1,349 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.tools.pigstats.spark;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.pig.tools.pigstats.*;
+import scala.Option;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapred.Counters;
+import org.apache.pig.PigWarning;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore;
+import org.apache.pig.backend.hadoop.executionengine.spark.JobMetricsListener;
+import org.apache.pig.backend.hadoop.executionengine.spark.plan.SparkOperator;
+import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.newplan.PlanVisitor;
+import org.apache.spark.executor.ShuffleReadMetrics;
+import org.apache.spark.executor.ShuffleWriteMetrics;
+import org.apache.spark.executor.TaskMetrics;
+
+import com.google.common.collect.Maps;
+
+public class SparkJobStats extends JobStats {
+
+private int jobId;
+private Map stats = Maps.newLinkedHashMap();
+private boolean disableCounter;
+private Counters counters = null;
+public static String FS_COUNTER_GROUP = "FS_GROUP";
+private Map>> warningCounters = 
null;
+
+protected SparkJobStats(int jobId, PigStats.JobGraph plan, Configuration 
conf) {
+this(String.valueOf(jobId), plan, conf);
+this.jobId = jobId;
+}
+
+protected SparkJobStats(String jobId, PigStats.JobGraph plan, 
Configuration conf) {
+super(jobId, plan);
+setConf(conf);
+}
+
+public void setConf(Configuration conf) {
+super.setConf(conf);
+disableCounter = conf.getBoolean("pig.disable.counter", false);
+initializeHadoopCounter();
+}
+
+public void addOutputInfo(POStore poStore, boolean success,
+  JobMetricsListener jobMetricsListener) {
+if (!poStore.isTmpStore()) {
+long bytes = getOutputSize(poStore, conf);
+long recordsCount = -1;
+if (disableCounter == false) {
+recordsCount = SparkStatsUtil.getRecordCount(poStore);
+}
+OutputStats outputStats = new 
OutputStats(poStore.getSFile().getFileName(),
+bytes, recordsCount, success);
+outputStats.setPOStore(poStore);
+outputStats.setConf(conf);
+
+outputs.add(outputStats);
+}
+}
+
+public void addInputStats(POLoad po, boolean success,
+  boolean singleInput) {
+
+long recordsCount = -1;
+if (disableCounter == false) {
+recordsCount = SparkStatsUtil.getRecordCount(po);
+}
+long bytesRead = -1;
+if (singleInput && stats.get("BytesRead") != null) {
+bytesRead = stats.get("BytesRead");
+}
+InputStats inputStats = new InputStats(po.getLFile().getFileName(),
+bytesRead, recordsCount, success);
+inputStats.setConf(conf);
+
+inputs.add(inputStats);
+}
+
+public void collectStats(JobMetricsListener jobMetricsListener) {
+if (jobMetricsListener != null) {
+Map taskMetrics = 
jobMetricsListener.getJobMetric(jobId);
+if (taskMetrics == null) {
+throw new RuntimeException("No task metrics available for 
jobId " + jobId);
+}
+stats = combineTaskMetrics(taskMetrics);
+}
+}
+
+public Map getStats() {
+return stats;
+}
+
+private Map combineTaskMetrics(Map 

svn commit: r1796639 [3/12] - in /pig/trunk: ./ bin/ ivy/ src/META-INF/services/ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapRedu

2017-05-29 Thread szita
Added: 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java?rev=1796639=auto
==
--- 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java
 (added)
+++ 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java
 Mon May 29 15:00:39 2017
@@ -0,0 +1,735 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.backend.hadoop.executionengine.spark;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.UUID;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.transform.TransformerException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.pig.PigConfiguration;
+import org.apache.pig.PigException;
+import org.apache.pig.PigWarning;
+import org.apache.pig.backend.BackendException;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil;
+import org.apache.pig.backend.hadoop.executionengine.Launcher;
+import 
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRConfiguration;
+import 
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapReduce;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POBroadcastSpark;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POCollectedGroup;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POCounter;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.PODistinct;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POFRJoinSpark;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POFilter;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POForEach;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLimit;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLocalRearrange;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POMergeCogroup;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POMergeJoin;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POPackage;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POPreCombinerLocalRearrange;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.PORank;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POSkewedJoin;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POSort;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POSplit;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStream;
+import 

svn commit: r1796639 [6/12] - in /pig/trunk: ./ bin/ ivy/ src/META-INF/services/ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapRedu

2017-05-29 Thread szita
Added: 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/StoreConverter.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/StoreConverter.java?rev=1796639=auto
==
--- 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/StoreConverter.java
 (added)
+++ 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/StoreConverter.java
 Mon May 29 15:00:39 2017
@@ -0,0 +1,175 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.backend.hadoop.executionengine.spark.converter;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.pig.tools.pigstats.spark.SparkCounters;
+import org.apache.pig.tools.pigstats.spark.SparkPigStatusReporter;
+import org.apache.pig.tools.pigstats.spark.SparkStatsUtil;
+import scala.Tuple2;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;
+import org.apache.pig.PigConfiguration;
+import org.apache.pig.StoreFuncInterface;
+import 
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler;
+import 
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputFormat;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore;
+import org.apache.pig.backend.hadoop.executionengine.spark.SparkUtil;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.util.ObjectSerializer;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.rdd.PairRDDFunctions;
+import org.apache.spark.rdd.RDD;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Converter that takes a POStore and stores it's content.
+ */
+@SuppressWarnings({ "serial" })
+public class StoreConverter implements
+RDDConverter, POStore> {
+
+private static final Log LOG = LogFactory.getLog(StoreConverter.class);
+
+private JobConf jobConf = null;
+public StoreConverter(JobConf jobConf) {
+this.jobConf = jobConf;
+}
+
+@Override
+public RDD> convert(List predecessors,
+POStore op) throws IOException {
+SparkUtil.assertPredecessorSize(predecessors, op, 1);
+RDD rdd = predecessors.get(0);
+
+
SparkPigStatusReporter.getInstance().createCounter(SparkStatsUtil.SPARK_STORE_COUNTER_GROUP,
+SparkStatsUtil.getCounterName(op));
+
+// convert back to KV pairs
+JavaRDD> rddPairs = rdd.toJavaRDD().map(
+buildFromTupleFunction(op));
+
+PairRDDFunctions pairRDDFunctions = new 
PairRDDFunctions(
+rddPairs.rdd(), SparkUtil.getManifest(Text.class),
+SparkUtil.getManifest(Tuple.class), null);
+
+POStore poStore = configureStorer(jobConf, op);
+
+if ("true".equalsIgnoreCase(jobConf
+.get(PigConfiguration.PIG_OUTPUT_LAZY))) {
+Job storeJob = new Job(jobConf);
+LazyOutputFormat.setOutputFormatClass(storeJob,
+PigOutputFormat.class);
+jobConf = (JobConf) storeJob.getConfiguration();
+jobConf.setOutputKeyClass(Text.class);
+jobConf.setOutputValueClass(Tuple.class);
+String fileName = poStore.getSFile().getFileName();
+Path filePath = new Path(fileName);
+FileOutputFormat.setOutputPath(jobConf,filePath);
+pairRDDFunctions.saveAsNewAPIHadoopDataset(jobConf);
+} else {
+pairRDDFunctions.saveAsNewAPIHadoopFile(poStore.getSFile()
+.getFileName(), 

svn commit: r1796647 - in /pig/trunk: CHANGES.txt src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java src/org/apache/pig/builtin/HiveUDAF.java

2017-05-29 Thread szita
Author: szita
Date: Mon May 29 15:19:17 2017
New Revision: 1796647

URL: http://svn.apache.org/viewvc?rev=1796647=rev
Log:
PIG-5194: HiveUDF fails with Spark exec type (szita)

Modified:
pig/trunk/CHANGES.txt

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java
pig/trunk/src/org/apache/pig/builtin/HiveUDAF.java

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1796647=1796646=1796647=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Mon May 29 15:19:17 2017
@@ -109,6 +109,8 @@ OPTIMIZATIONS
  
 BUG FIXES
 
+PIG-5194: HiveUDF fails with Spark exec type (szita)
+
 PIG-5231: PigStorage with -schema may produce inconsistent outputs with more 
fields (knoguchi)
 
 PIG-5224: Extra foreach from ColumnPrune preventing Accumulator usage 
(knoguchi)

Modified: 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java?rev=1796647=1796646=1796647=diff
==
--- 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java
 (original)
+++ 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java
 Mon May 29 15:19:17 2017
@@ -20,6 +20,7 @@ package org.apache.pig.backend.hadoop.ex
 import java.io.File;
 import java.io.IOException;
 import java.io.PrintStream;
+import java.net.URL;
 import java.nio.file.Files;
 import java.nio.file.Paths;
 import java.util.ArrayList;
@@ -387,8 +388,8 @@ public class SparkLauncher extends Launc
 for (String file : shipFiles.split(",")) {
 File shipFile = new File(file.trim());
 if (shipFile.exists()) {
-addResourceToSparkJobWorkingDirectory(shipFile,
-shipFile.getName(), ResourceType.FILE);
+addResourceToSparkJobWorkingDirectory(shipFile, 
shipFile.getName(),
+shipFile.getName().endsWith(".jar") ? 
ResourceType.JAR : ResourceType.FILE );
 }
 }
 }
@@ -429,7 +430,7 @@ public class SparkLauncher extends Launc
 Set allJars = new HashSet();
 LOG.info("Add default jars to Spark Job");
 allJars.addAll(JarManager.getDefaultJars());
-LOG.info("Add extra jars to Spark Job");
+LOG.info("Add script jars to Spark Job");
 for (String scriptJar : pigContext.scriptJars) {
 allJars.add(scriptJar);
 }
@@ -448,6 +449,11 @@ public class SparkLauncher extends Launc
 allJars.add(scriptUDFJarFile.getAbsolutePath().toString());
 }
 
+LOG.info("Add extra jars to Spark job");
+for (URL extraJarUrl : pigContext.extraJars) {
+allJars.add(extraJarUrl.getFile());
+}
+
 //Upload all jars to spark working directory
 for (String jar : allJars) {
 File jarFile = new File(jar);

Modified: pig/trunk/src/org/apache/pig/builtin/HiveUDAF.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/HiveUDAF.java?rev=1796647=1796646=1796647=diff
==
--- pig/trunk/src/org/apache/pig/builtin/HiveUDAF.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/HiveUDAF.java Mon May 29 15:19:17 2017
@@ -135,11 +135,11 @@ public class HiveUDAF extends HiveUDFBas
 return;
 }
 
-if (m == Mode.PARTIAL1 || m == Mode.FINAL) {
+if (m == Mode.PARTIAL1 || m == Mode.PARTIAL2 || m == 
Mode.FINAL) {
 intermediateOutputObjectInspector = 
evaluator.init(Mode.PARTIAL1, inputObjectInspectorAsArray);
 intermediateOutputTypeInfo = 
TypeInfoUtils.getTypeInfoFromObjectInspector(intermediateOutputObjectInspector);
 
-if (m == Mode.FINAL) {
+if (m == Mode.PARTIAL2 || m == Mode.FINAL) {
 intermediateInputObjectInspector = 
HiveUtils.createObjectInspector(intermediateOutputTypeInfo);
 intermediateInputObjectInspectorAsArray = new 
ObjectInspector[] {intermediateInputObjectInspector};
 outputObjectInspector = evaluator.init(Mode.FINAL, 
intermediateInputObjectInspectorAsArray);
@@ -208,20 +208,41 @@ public class HiveUDAF extends HiveUDFBas
 }
 
 static public class Initial extends EvalFunc {
+
+private boolean inited = false;
+private String funcName;
+ConstantObjectInspectInfo constantsInfo;
+private SchemaAndEvaluatorInfo schemaAndEvaluatorInfo = new 
Schem

svn commit: r1796703 - in /pig/trunk: CHANGES.txt src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java src/org/apache/pig/backend/hadoop/executionengine/spark/optimi

2017-05-29 Thread szita
Author: szita
Date: Mon May 29 21:45:33 2017
New Revision: 1796703

URL: http://svn.apache.org/viewvc?rev=1796703=rev
Log:
PIG-5207: BugFix e2e tests fail on spark (szita)

Modified:
pig/trunk/CHANGES.txt

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/CombinerOptimizer.java

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1796703=1796702=1796703=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Mon May 29 21:45:33 2017
@@ -109,6 +109,8 @@ OPTIMIZATIONS
  
 BUG FIXES
 
+PIG-5207: BugFix e2e tests fail on spark (szita)
+
 PIG-5194: HiveUDF fails with Spark exec type (szita)
 
 PIG-5231: PigStorage with -schema may produce inconsistent outputs with more 
fields (knoguchi)

Modified: 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java?rev=1796703=1796702=1796703=diff
==
--- 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java
 (original)
+++ 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java
 Mon May 29 21:45:33 2017
@@ -24,6 +24,7 @@ import java.io.PrintStream;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -40,6 +41,8 @@ import org.apache.pig.impl.plan.PlanExce
 import org.apache.pig.impl.plan.VisitorException;
 import org.apache.pig.impl.util.MultiMap;
 
+import com.google.common.collect.HashBiMap;
+
 /**
  *
  * The base class for all types of physical plans.
@@ -304,6 +307,16 @@ public class PhysicalPlan extends Operat
 }
 }
 
+//Fix order of edges in mToEdges lists
+Map<PhysicalOperator, PhysicalOperator> invertedMatches = 
HashBiMap.create(matches).inverse();
+for (PhysicalOperator newOp : clone.mToEdges.keySet()) {
+List newList = clone.mToEdges.get(newOp);
+if (newList.size() > 1) {
+List originalList = 
this.mToEdges.get(invertedMatches.get(newOp));
+Collections.sort(newList, new 
EdgeOrderHelper(originalList,invertedMatches));
+}
+}
+
 return clone;
 }
 
@@ -315,4 +328,21 @@ public class PhysicalPlan extends Operat
 {
 opmap = null;
 }
+
+
+private static class EdgeOrderHelper implements 
Comparator {
+
+private final Map<PhysicalOperator, PhysicalOperator> invertedMatches;
+private final List originalList;
+
+public EdgeOrderHelper(List originalList, 
Map<PhysicalOperator, PhysicalOperator> invertedMatches) {
+this.originalList = originalList;
+this.invertedMatches = invertedMatches;
+}
+
+@Override
+public int compare(PhysicalOperator o1, PhysicalOperator o2) {
+return originalList.indexOf(invertedMatches.get(o1)) - 
originalList.indexOf(invertedMatches.get(o2));
+}
+}
 }

Modified: 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/CombinerOptimizer.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/CombinerOptimizer.java?rev=1796703=1796702=1796703=diff
==
--- 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/CombinerOptimizer.java
 (original)
+++ 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/CombinerOptimizer.java
 Mon May 29 21:45:33 2017
@@ -296,8 +296,13 @@ public class CombinerOptimizer extends S
 );
 newProj.setResultType(DataType.BAG);
 
-PhysicalOperator udfInput = 
pplan.getPredecessors(combineUdf).get(0);
-pplan.disconnect(udfInput, combineUdf);
+for (PhysicalOperator originalUdfInput : 
pplan.getPredecessors(combineUdf).toArray(new PhysicalOperator[0])) {
+if (pplan.getPredecessors(originalUdfInput) != null) {
+pplan.trimAbove(originalUdfInput);
+}
+pplan.remove(originalUdfInput);
+}
+
 pplan.add(newProj);
 pplan.connect(newProj, combineUdf);
 i++;




svn commit: r1796822 - in /pig/trunk: CHANGES.txt build.xml test/excluded-tests-mr test/excluded-tests-tez test/org/apache/pig/test/TestEvalPipeline2.java

2017-05-30 Thread szita
Author: szita
Date: Tue May 30 09:32:31 2017
New Revision: 1796822

URL: http://svn.apache.org/viewvc?rev=1796822=rev
Log:
PIG-5244: Several unit tests are failing in Tez mode after merging spark branch 
(nkollar via szita)

Modified:
pig/trunk/CHANGES.txt
pig/trunk/build.xml
pig/trunk/test/excluded-tests-mr
pig/trunk/test/excluded-tests-tez
pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1796822=1796821=1796822=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Tue May 30 09:32:31 2017
@@ -122,6 +122,8 @@ OPTIMIZATIONS
  
 BUG FIXES
 
+PIG-5244: Several unit tests are failing in Tez mode after merging spark 
branch (nkollar via szita)
+
 PIG-5207: BugFix e2e tests fail on spark (szita)
 
 PIG-5194: HiveUDF fails with Spark exec type (szita)

Modified: pig/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1796822=1796821=1796822=diff
==
--- pig/trunk/build.xml (original)
+++ pig/trunk/build.xml Tue May 30 09:32:31 2017
@@ -909,6 +909,9 @@
 
 
 
+
+
+
 
 
 

Modified: pig/trunk/test/excluded-tests-mr
URL: 
http://svn.apache.org/viewvc/pig/trunk/test/excluded-tests-mr?rev=1796822=1796821=1796822=diff
==
--- pig/trunk/test/excluded-tests-mr (original)
+++ pig/trunk/test/excluded-tests-mr Tue May 30 09:32:31 2017
@@ -1,2 +1,2 @@
 **/tez/*.java
-+**/spark/*.java
\ No newline at end of file
+**/spark/*.java
\ No newline at end of file

Modified: pig/trunk/test/excluded-tests-tez
URL: 
http://svn.apache.org/viewvc/pig/trunk/test/excluded-tests-tez?rev=1796822=1796821=1796822=diff
==
--- pig/trunk/test/excluded-tests-tez (original)
+++ pig/trunk/test/excluded-tests-tez Tue May 30 09:32:31 2017
@@ -1,2 +1,2 @@
 **/Test*MR.java
-+**/spark/*.java
\ No newline at end of file
+**/spark/*.java
\ No newline at end of file

Modified: pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java?rev=1796822=1796821=1796822=diff
==
--- pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java Tue May 30 
09:32:31 2017
@@ -1597,9 +1597,8 @@ public class TestEvalPipeline2 {
 
 String[] expected = new String[] {"(1,A)", "(1,B)", "(2,C)"};
 
-Util.checkQueryOutputs(iter, expected,
-
org.apache.pig.newplan.logical.Util.translateSchema(pigServer.dumpSchema("flattened")),
 
-Util.isSparkExecType(cluster.getExecType()));
+Util.checkQueryOutputsAfterSortRecursive(iter, expected,
+
org.apache.pig.newplan.logical.Util.translateSchema(pigServer.dumpSchema("flattened")));
 }
 
 // See PIG-2237




svn commit: r1796824 - in /pig/branches/branch-0.17: CHANGES.txt build.xml test/excluded-tests-mr test/excluded-tests-tez test/org/apache/pig/test/TestEvalPipeline2.java

2017-05-30 Thread szita
Author: szita
Date: Tue May 30 09:51:27 2017
New Revision: 1796824

URL: http://svn.apache.org/viewvc?rev=1796824=rev
Log:
PIG-5244: Several unit tests are failing in Tez mode after merging spark branch 
(nkollar via szita)

Modified:
pig/branches/branch-0.17/CHANGES.txt
pig/branches/branch-0.17/build.xml
pig/branches/branch-0.17/test/excluded-tests-mr
pig/branches/branch-0.17/test/excluded-tests-tez
pig/branches/branch-0.17/test/org/apache/pig/test/TestEvalPipeline2.java

Modified: pig/branches/branch-0.17/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/branches/branch-0.17/CHANGES.txt?rev=1796824=1796823=1796824=diff
==
--- pig/branches/branch-0.17/CHANGES.txt (original)
+++ pig/branches/branch-0.17/CHANGES.txt Tue May 30 09:51:27 2017
@@ -111,6 +111,8 @@ OPTIMIZATIONS
  
 BUG FIXES
 
+PIG-5244: Several unit tests are failing in Tez mode after merging spark 
branch (nkollar via szita)
+
 PIG-5207: BugFix e2e tests fail on spark (szita)
 
 PIG-5194: HiveUDF fails with Spark exec type (szita)

Modified: pig/branches/branch-0.17/build.xml
URL: 
http://svn.apache.org/viewvc/pig/branches/branch-0.17/build.xml?rev=1796824=1796823=1796824=diff
==
--- pig/branches/branch-0.17/build.xml (original)
+++ pig/branches/branch-0.17/build.xml Tue May 30 09:51:27 2017
@@ -909,6 +909,9 @@
 
 
 
+
+
+
 
 
 

Modified: pig/branches/branch-0.17/test/excluded-tests-mr
URL: 
http://svn.apache.org/viewvc/pig/branches/branch-0.17/test/excluded-tests-mr?rev=1796824=1796823=1796824=diff
==
--- pig/branches/branch-0.17/test/excluded-tests-mr (original)
+++ pig/branches/branch-0.17/test/excluded-tests-mr Tue May 30 09:51:27 2017
@@ -1,2 +1,2 @@
 **/tez/*.java
-+**/spark/*.java
\ No newline at end of file
+**/spark/*.java
\ No newline at end of file

Modified: pig/branches/branch-0.17/test/excluded-tests-tez
URL: 
http://svn.apache.org/viewvc/pig/branches/branch-0.17/test/excluded-tests-tez?rev=1796824=1796823=1796824=diff
==
--- pig/branches/branch-0.17/test/excluded-tests-tez (original)
+++ pig/branches/branch-0.17/test/excluded-tests-tez Tue May 30 09:51:27 2017
@@ -1,2 +1,2 @@
 **/Test*MR.java
-+**/spark/*.java
\ No newline at end of file
+**/spark/*.java
\ No newline at end of file

Modified: 
pig/branches/branch-0.17/test/org/apache/pig/test/TestEvalPipeline2.java
URL: 
http://svn.apache.org/viewvc/pig/branches/branch-0.17/test/org/apache/pig/test/TestEvalPipeline2.java?rev=1796824=1796823=1796824=diff
==
--- pig/branches/branch-0.17/test/org/apache/pig/test/TestEvalPipeline2.java 
(original)
+++ pig/branches/branch-0.17/test/org/apache/pig/test/TestEvalPipeline2.java 
Tue May 30 09:51:27 2017
@@ -1597,9 +1597,8 @@ public class TestEvalPipeline2 {
 
 String[] expected = new String[] {"(1,A)", "(1,B)", "(2,C)"};
 
-Util.checkQueryOutputs(iter, expected,
-
org.apache.pig.newplan.logical.Util.translateSchema(pigServer.dumpSchema("flattened")),
 
-Util.isSparkExecType(cluster.getExecType()));
+Util.checkQueryOutputsAfterSortRecursive(iter, expected,
+
org.apache.pig.newplan.logical.Util.translateSchema(pigServer.dumpSchema("flattened")));
 }
 
 // See PIG-2237




svn commit: r1797327 - in /pig/branches/branch-0.17: RELEASE_NOTES.txt build.xml src/docs/src/documentation/content/xdocs/perf.xml src/docs/src/documentation/content/xdocs/start.xml src/docs/src/docum

2017-06-02 Thread szita
Author: szita
Date: Fri Jun  2 08:29:36 2017
New Revision: 1797327

URL: http://svn.apache.org/viewvc?rev=1797327=rev
Log:
Updating release notes and version numbers for Pig 0.17 release

Modified:
pig/branches/branch-0.17/RELEASE_NOTES.txt
pig/branches/branch-0.17/build.xml
pig/branches/branch-0.17/src/docs/src/documentation/content/xdocs/perf.xml
pig/branches/branch-0.17/src/docs/src/documentation/content/xdocs/start.xml
pig/branches/branch-0.17/src/docs/src/documentation/content/xdocs/test.xml

Modified: pig/branches/branch-0.17/RELEASE_NOTES.txt
URL: 
http://svn.apache.org/viewvc/pig/branches/branch-0.17/RELEASE_NOTES.txt?rev=1797327=1797326=1797327=diff
==
--- pig/branches/branch-0.17/RELEASE_NOTES.txt (original)
+++ pig/branches/branch-0.17/RELEASE_NOTES.txt Fri Jun  2 08:29:36 2017
@@ -1,26 +1,28 @@
-These notes are for Pig 0.3.0 release.
+These notes are for Pig 0.17.0 release.
 
 Highlights
 ==
 
-The main focus of this release is multiquery support that allows to optimize
-multiple queries within the same script that share a computation.
+The highlights of this release includes Pig on Spark
 
 System Requirements
 ===
 
-1. Java 1.6.x or newer, preferably from Sun. Set JAVA_HOME to the root of your
+1. Java 1.7.x or newer, preferably from Sun. Set JAVA_HOME to the root of your
 Java installation
 2. Ant build tool:  http://ant.apache.org - to build source only
-3. Cygwin: http://www.cygwin.com/ - to run under Windows 
-4. This release is compatible with Hadoop 0.18.x releases
+3. Run under Unix and Windows
+4. This release is compatible with Hadoop 2.5+ releases. Note Hadoop 1.X is not
+   supported anymore.
+5. For using Spark execution engine Spark 1.6.x is required. (Spark 2 support 
is
+   likely to come in the next release)
 
 Trying the Release
 ==
 
-1. Download pig-0.3.0.tar.gz
-2. Unpack the file: tar -xzvf pig-0.3.0.tar.gz
-3. Move into the installation directory: cd pig-0.3.0
+1. Download pig-0.17.0.tar.gz
+2. Unpack the file: tar -xzvf pig-0.17.0.tar.gz
+3. Move into the installation directory: cd pig-0.17.0
 4. To run pig without Hadoop cluster, execute the command below. This will
 take you into an interactive shell called grunt that allows you to navigate
 the local file system and execute Pig commands against the local files
@@ -36,7 +38,6 @@ export PIG_CLASSPATH=/hadoop/conf
 7. To run unit tests run
 ant test 
 8. To build jar file with available user defined functions run commands below.
-This currently only works with Java 1.6.x.
 cd contrib/piggybank/java
 ant
 9. To build the tutorial:
@@ -47,9 +48,6 @@ This currently only works with Java 1.6.
 Relevant Documentation
 ==
 
-Pig Language Manual(including Grunt commands):
-http://wiki.apache.org/pig-data/attachments/FrontPage/attachments/plrm.htm 
-UDF Manual: http://wiki.apache.org/pig/UDFManual
-Piggy Bank: http://wiki.apache.org/pig/PiggyBank
-Pig Tutorial: http://wiki.apache.org/pig/PigTutorial
-Pig Eclipse Plugin (PigPen):  http://wiki.apache.org/pig/PigPen
+Pig Documentation: http://pig.apache.org/docs/r0.17.0/
+Pig Wiki: https://cwiki.apache.org/confluence/display/PIG/Index
+Pig Tutorial: https://cwiki.apache.org/confluence/display/PIG/PigTutorial
\ No newline at end of file

Modified: pig/branches/branch-0.17/build.xml
URL: 
http://svn.apache.org/viewvc/pig/branches/branch-0.17/build.xml?rev=1797327=1797326=1797327=diff
==
--- pig/branches/branch-0.17/build.xml (original)
+++ pig/branches/branch-0.17/build.xml Fri Jun  2 08:29:36 2017
@@ -42,7 +42,7 @@
 
 
 
-
+
 
 
 

Modified: 
pig/branches/branch-0.17/src/docs/src/documentation/content/xdocs/perf.xml
URL: 
http://svn.apache.org/viewvc/pig/branches/branch-0.17/src/docs/src/documentation/content/xdocs/perf.xml?rev=1797327=1797326=1797327=diff
==
--- pig/branches/branch-0.17/src/docs/src/documentation/content/xdocs/perf.xml 
(original)
+++ pig/branches/branch-0.17/src/docs/src/documentation/content/xdocs/perf.xml 
Fri Jun  2 08:29:36 2017
@@ -42,7 +42,7 @@
   
   
 Tez session/container reuse
-One downside of MapReduce is the startup cost for a job is very high. 
That hurts the performance especially for small job. Tez alleviate the problem 
by using session and container reuse, so it is not necessary to start an 
application master for every job, and start a JVM for every task. By default, 
session/container reuse is on and we usually shall not turn it off. JVM reuse 
might cause some side effect if static variable is used since static variable 
might live across different jobs. So if static variable is used in 
EvalFunc/LoadFunc/StoreFunc, be sure to implement a cleanup function and 
register with http

svn commit: r1797379 - /pig/trunk/build.xml

2017-06-02 Thread szita
Author: szita
Date: Fri Jun  2 11:58:31 2017
New Revision: 1797379

URL: http://svn.apache.org/viewvc?rev=1797379=rev
Log:
PIG-4923: Drop Hadoop 1.x support in Pig 0.17 (PIG-4923.srcReleaseFix.patch)

Modified:
pig/trunk/build.xml

Modified: pig/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1797379=1797378=1797379=diff
==
--- pig/trunk/build.xml (original)
+++ pig/trunk/build.xml Fri Jun  2 11:58:31 2017
@@ -1177,7 +1177,8 @@
 
 
 
-
+
+
 
 
 




svn commit: r1797377 - /pig/branches/branch-0.17/build.xml

2017-06-02 Thread szita
Author: szita
Date: Fri Jun  2 11:57:07 2017
New Revision: 1797377

URL: http://svn.apache.org/viewvc?rev=1797377=rev
Log:
PIG-4923: Drop Hadoop 1.x support in Pig 0.17 (PIG-4923.srcReleaseFix.patch)

Modified:
pig/branches/branch-0.17/build.xml

Modified: pig/branches/branch-0.17/build.xml
URL: 
http://svn.apache.org/viewvc/pig/branches/branch-0.17/build.xml?rev=1797377=1797376=1797377=diff
==
--- pig/branches/branch-0.17/build.xml (original)
+++ pig/branches/branch-0.17/build.xml Fri Jun  2 11:57:07 2017
@@ -1177,7 +1177,8 @@
 
 
 
-
+
+
 
 
 




svn commit: r1797388 - /pig/tags/release-0.17.0-rc0/

2017-06-02 Thread szita
Author: szita
Date: Fri Jun  2 13:24:52 2017
New Revision: 1797388

URL: http://svn.apache.org/viewvc?rev=1797388=rev
Log:
Pig 0.17.0-rc0 release.

Added:
pig/tags/release-0.17.0-rc0/   (props changed)
  - copied from r1797387, pig/branches/branch-0.17/

Propchange: pig/tags/release-0.17.0-rc0/
--
--- svn:ignore (added)
+++ svn:ignore Fri Jun  2 13:24:52 2017
@@ -0,0 +1,4 @@
+
+dist
+depend
+pig.jar

Propchange: pig/tags/release-0.17.0-rc0/
--
svn:mergeinfo = /hadoop/pig/branches/multiquery:741727-770826




svn commit: r20044 - in /dev/pig: ./ pig-0.17.0/

2017-06-16 Thread szita
Author: szita
Date: Fri Jun 16 07:29:23 2017
New Revision: 20044

Log:
Pig 0.17.0 release.

Added:
dev/pig/HEADER.html
dev/pig/KEYS
dev/pig/latest   (with props)
dev/pig/pig-0.17.0/README.txt
dev/pig/pig-0.17.0/RELEASE_NOTES.txt
dev/pig/pig-0.17.0/pig-0.17.0-src.tar.gz   (with props)
dev/pig/pig-0.17.0/pig-0.17.0-src.tar.gz.asc
dev/pig/pig-0.17.0/pig-0.17.0-src.tar.gz.md5
dev/pig/pig-0.17.0/pig-0.17.0.tar.gz   (with props)
dev/pig/pig-0.17.0/pig-0.17.0.tar.gz.asc
dev/pig/pig-0.17.0/pig-0.17.0.tar.gz.md5

Added: dev/pig/HEADER.html
==
--- dev/pig/HEADER.html (added)
+++ dev/pig/HEADER.html Fri Jun 16 07:29:23 2017
@@ -0,0 +1,7 @@
+http://pig.apache.org/;>Pig Releases
+
+Please make sure you're downloading from http://www.apache.org/dyn/closer.cgi/pig/;>a nearby
+mirror site, not from www.apache.org.
+
+Older releases are available from the http://archive.apache.org/dist/pig/;>archives.

Added: dev/pig/KEYS
==
--- dev/pig/KEYS (added)
+++ dev/pig/KEYS Fri Jun 16 07:29:23 2017
@@ -0,0 +1,211 @@
+pub  1024D/06687D96 2008-07-14 Olga Natkovich <ol...@yahoo-inc.com>
+sig 3   06687D96 2008-07-14   Olga Natkovich <ol...@yahoo-inc.com>
+sub  1024g/B6456039 2008-07-14
+sig 06687D96 2008-07-14   Olga Natkovich <ol...@yahoo-inc.com>
+
+-BEGIN PGP PUBLIC KEY BLOCK-
+Version: GnuPG v1.2.6 (GNU/Linux)
+
+mQGiBEz/+9QRBACtgXbyja8lZlYinW6pGW+a7A8jhaEUS1FN7IGTIqJtqwcN7pS3
+jL9x/8DpuMzI2H7/uEElqa3vUqwGsHZy1mK8xJ2IWfvDJkx8KKDBRFc/5gG4KrPz
+zhEtqiYy+UYAyYTIa31G0Yi0tUtl7w4NMrk4SbAXvidWOLlc7fPbnzNwewCg29NA
+d+DhtjMsGISw+9UdXskuK5kD/A2rRkkUGSXMMG3F6t8UxoEFS2hp6LFpjhPpp7bp
+MEx4Dd3SaWYMgv2kX2Lw4GKY/cWMCBuRvSkoGvbUrLbyFk3eUFDxBthc9VudFmga
+JpCXT/hIR1xJ8cqBJmfCc6K/z9U5kjyFfxm4lX/Y+iUL5f3QTvIAmVlawnZARAFu
+H2aPA/4/s+qQDulGGYXRNtNoHzppzfO24mPPvjgfDJ4jupZwOISm3/Jc4zY/dGxi
+x3/61cQIrZXJ+OBlsfIfPnz5zh1e+I4y1XzozZlvBPUaVoMKUZHUXyQsi9UlgvIz
+QdU7iK0MpYEpXC+DC1KQtkXwNSdx63+a40wyh+vW5QZ583Fmz7QkT2xnYSBOYXRr
+b3ZpY2ggPG9sZ2FuQHlhaG9vLWluYy5jb20+iF4EExECAB4FAkz/+9QCGwMGCwkI
+BwMCAxUCAwMWAgECHgECF4AACgkQd9QuHGc7InkMSQCeKR0Q2Qst2zc6t/JA7T6y
+X/ukBVIAnR5de4YI3+LsToqZJFUvPDg4uG5TuQENBEz/+9YQBADcTndyZ0Vmh46R
+UjqNRmv27rcsH62fvLIE7vUE2AQaDE1hEMwawjZXhtFa6Bzh6WWRXqkdNUWnx0E8
+HIHhI6jAOJ2Zgc5zickkFOC/zLuipogCt0n1saK0ZXyKOLWOEX/ggxb9QUcHjpT2
+Rih5Sf95XQ8q5jI6kPvv6YwX0rwMhwADBQQAqSbFpnQIb+V1xo6cwqBpjizVSadM
+M5lXmsVngW+QMRp5hrw2bGfzkyDo2eowItUVyleCzTl7npFFFHKtNDS2g40QOB3z
+YzKXLa+dsst2BUzbZryXpHUZLQ0IynsY14m3CTjuhoBe4awQl2bZrl6Yuvad/RNy
+eJYZ4TCJB3/0BnCISQQYEQIACQUCTP/71gIbDAAKCRB31C4cZzsieTujAKCfvkXU
+b6o3+hGaiQ91NNa06K3eOQCffz+zGQJQ4Ab18AVUfjq69TRHEHA=
+=0PGJ
+-END PGP PUBLIC KEY BLOCK-
+
+-BEGIN PGP PUBLIC KEY BLOCK-
+Version: GnuPG v1.2.6 (GNU/Linux)
+
+mQGiBEnNUFYRBACdB2/nTuzObFu/B6dqTo301jF0BiD0J4Nl1qz9cP5IwrJArCzw
+BQGSzN5UX0gKTpi9FWzXPWrc/On3jAk65q9FgkUTbKQqwtRK2UUjD7GpsMiryIBx
+3+f1d8MEkOsFVg+bOzKqIY2VbvksM3GElCusKaWTZpgdsY27UkdHZIj/3wCg4Qq4
+ftOTc25XTEPNgAiAE8kO5qcD/27pjtQu3nzA47VdAvWGNAiNjdwsan/bWUUAvl81
+XmQ0GHlo9D0iyOd2GX9WHuguZ4/tf203f0oOHmgvYFllsF6OttBTIfd57HXyrdQd
+VuI8JuKxqM0FYaQkDKNseJZH1X80d604IOWWPVcxPs0Aqdcw1F9e6e5XUPqkBXye
+6IKeA/9YxJpY5QFb4EP/AyyGjIj1CsmukwBHJ+fNKribdyY5YgaX0THAjXlYLRmC
+HZugmtVIF71EiDutHfq9RPiLP1O13nh3zapo9MD9CrqJqPE7SavGSQ+l1Tnedp6Y
+UGWffmL7e0XBCSvB3QKf+ZxOLIK2s72Wl5Mwd6gfBEdcBxBqsLQtQWxhbiBHYXRl
+cyAoTm8gY29tbWVudCkgPGdhdGVzQHlhaG9vLWluYy5jb20+iF4EExECAB4FAknN
+UFYCGwMGCwkIBwMCAxUCAwMWAgECHgECF4AACgkQiL0/VwTZuDLWUACfWFyJwm6D
+oAW88ITpvypdOtRakYsAoM22YGm4jla+y9lryous9eIHNu1VuQENBEnNUFgQBADK
+2OL+zY/4V80Bans/v0sRf8cUzB82eW84vMgxRmMS+Kwty8CBwEV4sgWbv0vJwifo
+9ZhlMLjqmBwGTR3wIXqtRQAyk8rLYod31KWFyt64vZaubbxZNDxiM5CMFO+q3xjL
+hbsMnIC/QliKT2d0K2radTp+jNz7lOkmSvZ9iQ7/0wADBgP/TWsRYmViLcUcOa1N
+4Cij8Y3c1tD2qYI5b9eDY5GiOeECss0CudJN/cIvDNstLtLa4JbX5INRpskVTVsx
+Duermrsj5/tONUb9GwBnhUuzA0GW1WCkpZXJG2Z2iwKcJ8wQ5KaPj9TNdahF5h7q
+outJyNeVe9TC2PytS0tfCzd2lnOISQQYEQIACQUCSc1QWAIbDAAKCRCIvT9XBNm4
+MrvDAJ4ySDj+5CSCB+DQ8PotTK5oX7fDcQCfc0d5dQrqc2ul8/4WDB/LmNcXuhI=
+=B5eJ
+-END PGP PUBLIC KEY BLOCK-
+-BEGIN PGP PUBLIC KEY BLOCK-
+Version: GnuPG v1.2.6 (GNU/Linux)
+
+mQGiBEvhqBsRBACLI1GZhvYhVqep2oyGFPKuB/xaK55TYMoWQT60dQ1iUMGgKXww
+yfsQ0Jd2sn/VJ4b1WmO7jfZ+5+ijHnk22j9ogVLJnFt/FAtYNonwt3lA5El6kbsD
+gU/juBRLXVnSo9tu9a5cy6l02iT3Ndq5yGuV6VBJnGEqX+D7UtS9oCBLswCgwMR3
+UWOhvo0dUQdZ7sIuko40GxsEAIRIpdh479C2Q6fGlp2FdqoRXzEUt13a3S0le/Pf
+1uIZLs8Ns9L1nrCT66bq//4qRweN8PmEaFT6UZk3tK29a46Vbc+fSbmGRYtAtmiU
+1P8jUTSKUZBb4MNAHq9MfICd9bxfMyBupUQhH2mlZqKafLg48D8G1jaOECnXO4YV
+aR0WA/9yJ95sfC4L6MN3iIaHpQcuLStGLyIGqWp44t2ok0AhY5CJm6YaKALV0Aki
++uiGNirMLdZTSNiDZvUu2XxShzPkzTW87Xh5QkkeNB/SeSTvox2O/5TZchCr+6qF
+rOd9qoxrfDzJeZkc3xY7G/bil40TWbOTbOPmK039vMI8PXe5VrQdRGFuaWVsIERh
+aSA8ZGFpanljQGdtYWlsLmNvbT6IYAQTEQIAIAUCS+GoGwIbAwYLCQgHAwIEFQII
+AwQWAgMBAh4BAheAAAoJEI3MXbca8D1y8/8Ao

svn commit: r1798663 - in /pig/tags: release-0.17.0-rc0/ release-0.17.0/

2017-06-14 Thread szita
Author: szita
Date: Wed Jun 14 11:08:58 2017
New Revision: 1798663

URL: http://svn.apache.org/viewvc?rev=1798663=rev
Log:
Pig 0.17.0 release.

Added:
pig/tags/release-0.17.0/   (props changed)
  - copied from r1798662, pig/tags/release-0.17.0-rc0/
Removed:
pig/tags/release-0.17.0-rc0/

Propchange: pig/tags/release-0.17.0/
--
--- svn:ignore (added)
+++ svn:ignore Wed Jun 14 11:08:58 2017
@@ -0,0 +1,4 @@
+
+dist
+depend
+pig.jar

Propchange: pig/tags/release-0.17.0/
--
svn:mergeinfo = /hadoop/pig/branches/multiquery:741727-770826




svn commit: r1809162 - in /pig/trunk: CHANGES.txt contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestAllLoader.java ivy.xml ivy/libraries.properties ivy/pig-template.xml iv

2017-09-21 Thread szita
Author: szita
Date: Thu Sep 21 13:40:47 2017
New Revision: 1809162

URL: http://svn.apache.org/viewvc?rev=1809162=rev
Log:
PIG-5298: Verify if org.mortbay.jetty is removable (nkollar via szita)

Modified:
pig/trunk/CHANGES.txt

pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestAllLoader.java
pig/trunk/ivy.xml
pig/trunk/ivy/libraries.properties
pig/trunk/ivy/pig-template.xml
pig/trunk/ivy/piggybank-template.xml

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1809162=1809161=1809162=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Sep 21 13:40:47 2017
@@ -26,6 +26,8 @@ PIG-5282: Upgade to Java 8 (satishsaley
  
 IMPROVEMENTS
 
+PIG-5298: Verify if org.mortbay.jetty is removable (nkollar via szita)
+
 PIG-5268: Review of org.apache.pig.backend.hadoop.datastorage.HDataStorage 
(belugabehr via daijy)
 
 PIG-5288: Improve performance of PigTextRawBytesComparator (rohini)

Modified: 
pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestAllLoader.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestAllLoader.java?rev=1809162=1809161=1809162=diff
==
--- 
pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestAllLoader.java
 (original)
+++ 
pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestAllLoader.java
 Thu Sep 21 13:40:47 2017
@@ -56,9 +56,11 @@ import org.apache.pig.test.Util;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
-import org.mortbay.log.Log;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class TestAllLoader extends TestCase {
+private static final Logger LOG = 
LoggerFactory.getLogger(TestAllLoader.class);
 
 enum TYPE {
 HIVERC(".rc", new HiveRCFileTestWriter()), GZIP_PLAIN(".gz",
@@ -424,7 +426,7 @@ public class TestAllLoader extends TestC
 count++;
 }
 
-Log.info("Validating expected: " + totalRowCount + " against " + 
count);
+LOG.info("Validating expected: " + totalRowCount + " against " + 
count);
 assertEquals(totalRowCount, count);
 }
 

Modified: pig/trunk/ivy.xml
URL: 
http://svn.apache.org/viewvc/pig/trunk/ivy.xml?rev=1809162=1809161=1809162=diff
==
--- pig/trunk/ivy.xml (original)
+++ pig/trunk/ivy.xml Thu Sep 21 13:40:47 2017
@@ -109,9 +109,7 @@
   conf="hadoop2->master"/>
 
-
-
 
@@ -168,12 +166,6 @@
   rev="${hadoop-mapreduce.version}" conf="hadoop2->master"/>
 
-
-  
-
-
 
 http://svn.apache.org/viewvc/pig/trunk/ivy/libraries.properties?rev=1809162=1809161=1809162=diff
==
--- pig/trunk/ivy/libraries.properties (original)
+++ pig/trunk/ivy/libraries.properties Thu Sep 21 13:40:47 2017
@@ -36,7 +36,6 @@ xmlenc.version=0.52
 jersey.version=1.8
 checkstyle.version=4.2
 ivy.version=2.2.0
-jasper.version=6.1.14
 groovy.version=2.4.5
 guava.version=11.0
 hadoop-common.version=2.7.3
@@ -55,7 +54,6 @@ jaxb-impl.version=2.2.3-1
 jdeb.version=0.8
 jdiff.version=1.0.9
 jettison.version=1.3.4
-jetty.version=6.1.26
 jline.version=2.11
 joda-time.version=2.9.9
 jopt.version=4.1
@@ -99,3 +97,4 @@ curator.version=2.6.0
 htrace.version=3.1.0-incubating
 commons-lang3.version=3.1
 scala-xml.version=1.0.5
+glassfish.el.version=3.0.1-b08
\ No newline at end of file

Modified: pig/trunk/ivy/pig-template.xml
URL: 
http://svn.apache.org/viewvc/pig/trunk/ivy/pig-template.xml?rev=1809162=1809161=1809162=diff
==
--- pig/trunk/ivy/pig-template.xml (original)
+++ pig/trunk/ivy/pig-template.xml Thu Sep 21 13:40:47 2017
@@ -49,26 +49,6 @@
   3.1
 
 
-  org.mortbay.jetty
-  jetty
-  6.1.26
-
-
-  org.mortbay.jetty
-  jetty-util
-  6.1.26
-
-
-  org.mortbay.jetty
-  jsp-api-2.1
-  6.1.14
-
-
-  org.mortbay.jetty
-  jsp-2.1
-  6.1.14
-
-
   commons-el
   commons-el
   1.0
@@ -134,16 +114,16 @@
   2.9.9
 
 
+  org.glassfish
+  javax.el
+  3.0.1-b08
+
+
   org.apache.avro
   avro
   1.7.5
   
 
-  
-  org.mortbay.jetty
-  jetty
-
-
   
   org.apache.ant
   ant

Modified: pig/trunk/ivy/piggybank-template.xml
UR

svn commit: r1811322 - in /pig/trunk: CHANGES.txt build.xml src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java src/org/apache/pig/impl/util/JarManager.java test/org/apache/pig/

2017-10-06 Thread szita
Author: szita
Date: Fri Oct  6 12:02:22 2017
New Revision: 1811322

URL: http://svn.apache.org/viewvc?rev=1811322=rev
Log:
PIG-5305: Enable yarn-client mode execution of tests in Spark (1) mode (szita)

Modified:
pig/trunk/CHANGES.txt
pig/trunk/build.xml

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java
pig/trunk/src/org/apache/pig/impl/util/JarManager.java
pig/trunk/test/org/apache/pig/builtin/TestOrcStoragePushdown.java

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1811322=1811321=1811322=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Fri Oct  6 12:02:22 2017
@@ -26,6 +26,8 @@ PIG-5282: Upgade to Java 8 (satishsaley
  
 IMPROVEMENTS
 
+PIG-5305: Enable yarn-client mode execution of tests in Spark (1) mode (szita)
+
 PIG-4120: Broadcast the index file in case of POMergeCoGroup and POMergeJoin 
(satishsaley via rohini)
 
 PIG-5306: REGEX_EXTRACT() logs every line that doesn't match (satishsaley via 
rohini)

Modified: pig/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1811322=1811321=1811322=diff
==
--- pig/trunk/build.xml (original)
+++ pig/trunk/build.xml Fri Oct  6 12:02:22 2017
@@ -118,6 +118,7 @@
 
 
 
+
 
 
 
@@ -160,7 +161,6 @@
 
 
 
-
 
 
 
@@ -278,6 +278,7 @@
   
value="${mvnrepo}/org/codehaus/jackson/jackson-core-asl/${jackson-pig-3039-test.version}/jackson-core-asl-${jackson-pig-3039-test.version}.jar"/>
 
+
 
 
 
@@ -893,32 +894,32 @@
 
 
 
-
+
 
 Tests failed!
 
 
-
+
 
 Tests failed!
 
 
-
+
 
 Tests failed!
 
 
-
+
 
 Tests failed!
 
 
-
+
 
 Tests failed!
 
 
-
+
 
 Tests failed!
 
@@ -957,6 +958,7 @@
 
 
 
+
 
 
 
@@ -999,7 +1001,7 @@
 
 
 
+
depends="setWindowsPath,setLinuxPath,compile-test,debugger.check,jackson-pig-3039-test-download">
 
   
 
@@ -1072,6 +1074,16 @@
 
 
 
+
+ *** Creating pigtest.jar ***
+
+
+
+
+
+
+
+
 
 
 

Modified: 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java?rev=1811322=1811321=1811322=diff
==
--- 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java
 (original)
+++ 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java
 Fri Oct  6 12:02:22 2017
@@ -431,6 +431,7 @@ public class SparkLauncher extends Launc
 Set allJars = new HashSet();
 LOG.info("Add default jars to Spark Job");
 allJars.addAll(JarManager.getDefaultJars());
+JarManager.addPigTestJarIfPresent(allJars);
 LOG.info("Add script jars to Spark Job");
 for (String scriptJar : pigContext.scriptJars) {
 allJars.add(scriptJar);
@@ -536,23 +537,35 @@ public class SparkLauncher extends Launc
 return sparkPlan;
 }
 
+
+private static String getMaster(PigContext pc){
+String master = null;
+if (pc.getExecType().isLocal()) {
+master = "local";
+} else {
+master = System.getenv("SPARK_MASTER");
+if (master == null) {
+LOG.info("SPARK_MASTER not specified, using \"local\"");
+master = "local";
+}
+}
+return master;
+}
+
 /**
  * Only one SparkContext may be active per JVM (SPARK-2243). When multiple 
threads start SparkLaucher,
- * the static member sparkContext should be initialized only once
+ * the static member sparkContext should be initialized only by either 
local or cluster mode at a time.
+ *
+ * In case it was already initialized with a different mode than what the 
new pigContext instance wants, it will
+ * close down the existing SparkContext and re-initalize it with the new 
mode.
  */
 private static synchronized void startSparkIfNeeded(JobConf jobConf, 
PigContext pc) throws PigException {
+String master = getMaster(pc);
+if (sparkContext != null && !master.equals(sparkContext.master())){
+sparkContext.close();
+  

svn commit: r1816542 - in /pig/trunk: CHANGES.txt src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java

2017-11-28 Thread szita
Author: szita
Date: Tue Nov 28 13:14:51 2017
New Revision: 1816542

URL: http://svn.apache.org/viewvc?rev=1816542=rev
Log:
PIG-5316: Initialize mapred.task.id property for PoS jobs (nkollar via szita)

Modified:
pig/trunk/CHANGES.txt

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1816542=1816541=1816542=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Tue Nov 28 13:14:51 2017
@@ -26,6 +26,8 @@ PIG-5282: Upgade to Java 8 (satishsaley
  
 IMPROVEMENTS
 
+PIG-5316: Initialize mapred.task.id property for PoS jobs (nkollar via szita)
+
 PIG-5302: Remove HttpClient dependency (nkollar via szita)
 
 PIG-5305: Enable yarn-client mode execution of tests in Spark (1) mode (szita)

Modified: 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java?rev=1816542=1816541=1816542=diff
==
--- 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java
 (original)
+++ 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java
 Tue Nov 28 13:14:51 2017
@@ -42,6 +42,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
 import org.apache.pig.PigConfiguration;
 import org.apache.pig.PigException;
 import org.apache.pig.PigWarning;
@@ -182,6 +183,7 @@ public class SparkLauncher extends Launc
 jobGroupID = String.format("%s-%s",sparkContext.getConf().getAppId(),
 UUID.randomUUID().toString());
 jobConf.set(MRConfiguration.JOB_ID,jobGroupID);
+jobConf.set(MRConfiguration.TASK_ID, new TaskAttemptID().toString());
 
 sparkContext.setJobGroup(jobGroupID, "Pig query to Spark cluster",
 false);




svn commit: r1816554 - /pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java

2017-11-28 Thread szita
Author: szita
Date: Tue Nov 28 15:25:15 2017
New Revision: 1816554

URL: http://svn.apache.org/viewvc?rev=1816554=rev
Log:
PIG-5316: Initialize mapred.task.id property for PoS jobs (fix)

Modified:

pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java

Modified: 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java?rev=1816554=1816553=1816554=diff
==
--- 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java
 (original)
+++ 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java
 Tue Nov 28 15:25:15 2017
@@ -42,7 +42,6 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
 import org.apache.pig.PigConfiguration;
 import org.apache.pig.PigException;
 import org.apache.pig.PigWarning;
@@ -75,6 +74,7 @@ import org.apache.pig.backend.hadoop.exe
 import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore;
 import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStream;
 import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POUnion;
+import org.apache.pig.backend.hadoop.executionengine.shims.HadoopShims;
 import 
org.apache.pig.backend.hadoop.executionengine.spark.converter.BroadcastConverter;
 import 
org.apache.pig.backend.hadoop.executionengine.spark.converter.CollectedGroupConverter;
 import 
org.apache.pig.backend.hadoop.executionengine.spark.converter.CounterConverter;
@@ -183,7 +183,7 @@ public class SparkLauncher extends Launc
 jobGroupID = String.format("%s-%s",sparkContext.getConf().getAppId(),
 UUID.randomUUID().toString());
 jobConf.set(MRConfiguration.JOB_ID,jobGroupID);
-jobConf.set(MRConfiguration.TASK_ID, new TaskAttemptID().toString());
+jobConf.set(MRConfiguration.TASK_ID, 
HadoopShims.getNewTaskAttemptID().toString());
 
 sparkContext.setJobGroup(jobGroupID, "Pig query to Spark cluster",
 false);




svn commit: r1817995 - in /pig/trunk: ./ test/org/apache/pig/test/

2017-12-13 Thread szita
Author: szita
Date: Wed Dec 13 10:29:18 2017
New Revision: 1817995

URL: http://svn.apache.org/viewvc?rev=1817995=rev
Log:
PIG-5318: Unit test failures on Pig on Spark with Spark 2.2 (nkollar via szita)

Modified:
pig/trunk/CHANGES.txt
pig/trunk/test/org/apache/pig/test/TestAssert.java
pig/trunk/test/org/apache/pig/test/TestEvalPipeline.java
pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java
pig/trunk/test/org/apache/pig/test/TestGrunt.java
pig/trunk/test/org/apache/pig/test/TestScalarAliases.java
pig/trunk/test/org/apache/pig/test/TestStoreBase.java
pig/trunk/test/org/apache/pig/test/Util.java

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1817995=1817994=1817995=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Wed Dec 13 10:29:18 2017
@@ -60,6 +60,8 @@ OPTIMIZATIONS
  
 BUG FIXES
 
+PIG-5318: Unit test failures on Pig on Spark with Spark 2.2 (nkollar via szita)
+
 PIG-5201: Null handling on FLATTEN (knoguchi)
 
 PIG-5315: pig.script is not set for scripts run via PigServer (satishsaley via 
rohini)

Modified: pig/trunk/test/org/apache/pig/test/TestAssert.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestAssert.java?rev=1817995=1817994=1817995=diff
==
--- pig/trunk/test/org/apache/pig/test/TestAssert.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestAssert.java Wed Dec 13 10:29:18 2017
@@ -25,6 +25,7 @@ import java.io.ByteArrayInputStream;
 import java.io.InputStream;
 import java.util.List;
 
+import org.apache.commons.lang3.exception.ExceptionUtils;
 import org.junit.Assert;
 
 import org.apache.pig.PigServer;
@@ -118,7 +119,7 @@ public class TestAssert {
   } catch (FrontendException fe) {
   if 
(pigServer.getPigContext().getExecType().toString().startsWith("TEZ")
   || 
pigServer.getPigContext().getExecType().toString().startsWith("SPARK")) {
-  Assert.assertTrue(fe.getCause().getMessage().contains(
+  
Assert.assertTrue(ExceptionUtils.getRootCause(fe).getMessage().contains(
   "Assertion violated: i should be greater than 1"));
   } else {
   Assert.assertTrue(fe.getCause().getMessage().contains(
@@ -150,7 +151,7 @@ public class TestAssert {
   } catch (FrontendException fe) {
   if 
(pigServer.getPigContext().getExecType().toString().startsWith("TEZ")
   || 
pigServer.getPigContext().getExecType().toString().startsWith("SPARK")) {
-  Assert.assertTrue(fe.getCause().getMessage().contains(
+  
Assert.assertTrue(ExceptionUtils.getRootCause(fe).getMessage().contains(
   "Assertion violated: i should be greater than 1"));
   } else {
   Assert.assertTrue(fe.getCause().getMessage().contains(

Modified: pig/trunk/test/org/apache/pig/test/TestEvalPipeline.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestEvalPipeline.java?rev=1817995=1817994=1817995=diff
==
--- pig/trunk/test/org/apache/pig/test/TestEvalPipeline.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestEvalPipeline.java Wed Dec 13 
10:29:18 2017
@@ -24,7 +24,6 @@ import java.io.PrintStream;
 import java.io.PrintWriter;
 import java.io.StringWriter;
 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;

Modified: pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java?rev=1817995=1817994=1817995=diff
==
--- pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java Wed Dec 13 
10:29:18 2017
@@ -34,6 +34,7 @@ import java.util.Map;
 import java.util.Properties;
 import java.util.Random;
 
+import org.apache.commons.lang3.exception.ExceptionUtils;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -1459,7 +1460,8 @@ public class TestEvalPipeline2 {
 pigServer.openIterator("b");
 Assert.fail();
 } catch (Exception e) {
-
Assert.assertTrue(e.getMessage().contains(ArrayList.class.getName()));
+
Assert.assertTrue(ExceptionUtils.getRootCause(e).getMessage().contains(
+"Unexpected data type " + ArrayList.class.getName() + 
" found in stream."));
 }
 }
 finally 

svn commit: r1815189 - in /pig/trunk: CHANGES.txt NOTICE.txt build.xml ivy.xml ivy/libraries.properties ivy/pig-template.xml ivy/piggybank-template.xml ivy/pigunit-template.xml

2017-11-14 Thread szita
Author: szita
Date: Tue Nov 14 08:53:16 2017
New Revision: 1815189

URL: http://svn.apache.org/viewvc?rev=1815189=rev
Log:
PIG-5302: Remove HttpClient dependency (nkollar via szita)

Modified:
pig/trunk/CHANGES.txt
pig/trunk/NOTICE.txt
pig/trunk/build.xml
pig/trunk/ivy.xml
pig/trunk/ivy/libraries.properties
pig/trunk/ivy/pig-template.xml
pig/trunk/ivy/piggybank-template.xml
pig/trunk/ivy/pigunit-template.xml

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1815189=1815188=1815189=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Tue Nov 14 08:53:16 2017
@@ -26,6 +26,8 @@ PIG-5282: Upgade to Java 8 (satishsaley
  
 IMPROVEMENTS
 
+PIG-5302: Remove HttpClient dependency (nkollar via szita)
+
 PIG-5305: Enable yarn-client mode execution of tests in Spark (1) mode (szita)
 
 PIG-4120: Broadcast the index file in case of POMergeCoGroup and POMergeJoin 
(satishsaley via rohini)

Modified: pig/trunk/NOTICE.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/NOTICE.txt?rev=1815189=1815188=1815189=diff
==
--- pig/trunk/NOTICE.txt (original)
+++ pig/trunk/NOTICE.txt Tue Nov 14 08:53:16 2017
@@ -36,10 +36,6 @@ This product includes/uses HyperSQL (htt
 Copyright (c) 2001-2010, The HSQL Development Group
 All rights reserved.
 
-This product includes/uses xmlenc (http://xmlenc.sourceforge.net/)
-Copyright 2003-2005, Ernst de Haan <wfe.deh...@gmail.com>
-All rights reserved.
-
 This product includes/uses Joda (http://joda-time.sourceforge.net/)
 Copyright 2001-2006 Stephen Colebourne
 

Modified: pig/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1815189=1815188=1815189=diff
==
--- pig/trunk/build.xml (original)
+++ pig/trunk/build.xml Tue Nov 14 08:53:16 2017
@@ -776,7 +776,6 @@
 
 
 
-
 
 
 

Modified: pig/trunk/ivy.xml
URL: 
http://svn.apache.org/viewvc/pig/trunk/ivy.xml?rev=1815189=1815188=1815189=diff
==
--- pig/trunk/ivy.xml (original)
+++ pig/trunk/ivy.xml Tue Nov 14 08:53:16 2017
@@ -55,12 +55,8 @@
   
 
-
 
-
 
 
 
-
 
 http://svn.apache.org/viewvc/pig/trunk/ivy/libraries.properties?rev=1815189=1815188=1815189=diff
==
--- pig/trunk/ivy/libraries.properties (original)
+++ pig/trunk/ivy/libraries.properties Tue Nov 14 08:53:16 2017
@@ -26,13 +26,11 @@ commons-codec.version=1.4
 commons-io.version=2.3
 commons-el.version=1.0
 commons-logging.version=1.1.1
-commons-lang.version=2.4
+commons-lang.version=2.6
 commons-configuration.version=1.6
 commons-collections.version=3.2.1
 commons-collections4.version=4.0
-commons-httpclient.version=3.1
 commons-math3.version=3.1.1
-xmlenc.version=0.52
 jersey.version=1.8
 checkstyle.version=4.2
 ivy.version=2.2.0
@@ -44,7 +42,7 @@ hadoop-mapreduce.version=2.7.3
 hbase1.version=1.2.4
 hsqldb.version=1.8.0.10
 hive.version=1.2.1
-httpcomponents.version=4.1
+httpcomponents.version=4.4
 jackson.version=1.9.13
 jackson-pig-3039-test.version=1.9.9
 javacc.version=4.2
@@ -95,6 +93,6 @@ snappy.version=0.2
 leveldbjni.version=1.8
 curator.version=2.6.0
 htrace.version=3.1.0-incubating
-commons-lang3.version=3.1
+commons-lang3.version=3.6
 scala-xml.version=1.0.5
 glassfish.el.version=3.0.1-b08
\ No newline at end of file

Modified: pig/trunk/ivy/pig-template.xml
URL: 
http://svn.apache.org/viewvc/pig/trunk/ivy/pig-template.xml?rev=1815189=1815188=1815189=diff
==
--- pig/trunk/ivy/pig-template.xml (original)
+++ pig/trunk/ivy/pig-template.xml Tue Nov 14 08:53:16 2017
@@ -28,16 +28,6 @@
   commons-cli
   1.2
 
-   
-  xmlenc
-  xmlenc
-  0.52
-
-
-  commons-httpclient
-  commons-httpclient
-  3.1
-
 
   commons-codec
   commons-codec

Modified: pig/trunk/ivy/piggybank-template.xml
URL: 
http://svn.apache.org/viewvc/pig/trunk/ivy/piggybank-template.xml?rev=1815189=1815188=1815189=diff
==
--- pig/trunk/ivy/piggybank-template.xml (original)
+++ pig/trunk/ivy/piggybank-template.xml Tue Nov 14 08:53:16 2017
@@ -26,7 +26,7 @@
 
   commons-lang
   commons-lang
-  2.4
+  2.6
 
 
   log4j

Modified: pig/trunk/ivy/pigunit-template.xml
URL: 
http://svn.apache.org/viewvc/pig/trunk/ivy/pigunit-template.xml?rev=1815189=1815188=1815189=diff
==
--- pig/trunk/ivy/p

svn commit: r1834854 - in /pig/trunk: CHANGES.txt ivy.xml ivy/libraries.properties

2018-07-02 Thread szita
Author: szita
Date: Mon Jul  2 15:25:55 2018
New Revision: 1834854

URL: http://svn.apache.org/viewvc?rev=1834854=rev
Log:
PIG-5344: Update Apache HTTPD LogParser to latest version (nielsbasjes via 
szita)

Modified:
pig/trunk/CHANGES.txt
pig/trunk/ivy.xml
pig/trunk/ivy/libraries.properties

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1834854=1834853=1834854=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Mon Jul  2 15:25:55 2018
@@ -26,6 +26,8 @@ PIG-5282: Upgade to Java 8 (satishsaley
  
 IMPROVEMENTS
 
+PIG-5344: Update Apache HTTPD LogParser to latest version (nielsbasjes via 
szita)
+
 PIG-4092: Predicate pushdown for Parquet (nkollar via rohini)
 
 PIG-5317: Upgrade old dependencies: commons-lang, hsqldb, commons-logging 
(nkollar via rohini)

Modified: pig/trunk/ivy.xml
URL: 
http://svn.apache.org/viewvc/pig/trunk/ivy.xml?rev=1834854=1834853=1834854=diff
==
--- pig/trunk/ivy.xml (original)
+++ pig/trunk/ivy.xml Mon Jul  2 15:25:55 2018
@@ -74,13 +74,9 @@
 
 
-
-
-
+  conf="compile->master">
+  
+
 
 http://svn.apache.org/viewvc/pig/trunk/ivy/libraries.properties?rev=1834854=1834853=1834854=diff
==
--- pig/trunk/ivy/libraries.properties (original)
+++ pig/trunk/ivy/libraries.properties Mon Jul  2 15:25:55 2018
@@ -19,7 +19,7 @@ apacheant.version=1.7.1
 apacherat.version=0.8
 automaton.version=1.11-8
 avro.version=1.7.5
-basjes-httpdlog-pigloader.version=2.4
+basjes-httpdlog-pigloader.version=5.0
 commons-beanutils.version=1.7.0
 commons-cli.version=1.2
 commons-codec.version=1.4




svn commit: r1820631 - in /pig/trunk: CHANGES.txt src/org/apache/pig/newplan/logical/relational/LOStore.java test/org/apache/pig/test/TestSchema.java

2018-01-09 Thread szita
Author: szita
Date: Tue Jan  9 10:03:07 2018
New Revision: 1820631

URL: http://svn.apache.org/viewvc?rev=1820631=rev
Log:
PIG-5325: Schema disambiguation can't be turned off for nested schemas (szita)

Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/org/apache/pig/newplan/logical/relational/LOStore.java
pig/trunk/test/org/apache/pig/test/TestSchema.java

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1820631=1820630=1820631=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Tue Jan  9 10:03:07 2018
@@ -64,6 +64,8 @@ OPTIMIZATIONS
  
 BUG FIXES
 
+PIG-5325: Schema disambiguation can't be turned off for nested schemas (szita)
+
 PIG-5311: POReservoirSample fails for more than Integer.MAX_VALUE records 
(rohini)
 
 PIG-3864: ToDate(userstring, format, timezone) computes DateTime with strange 
handling of Daylight Saving Time with location based timezones (daijy via 
rohini)

Modified: pig/trunk/src/org/apache/pig/newplan/logical/relational/LOStore.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/newplan/logical/relational/LOStore.java?rev=1820631=1820630=1820631=diff
==
--- pig/trunk/src/org/apache/pig/newplan/logical/relational/LOStore.java 
(original)
+++ pig/trunk/src/org/apache/pig/newplan/logical/relational/LOStore.java Tue 
Jan  9 10:03:07 2018
@@ -63,16 +63,31 @@ public class LOStore extends LogicalRela
 public LogicalSchema getSchema() throws FrontendException {
 schema = 
((LogicalRelationalOperator)plan.getPredecessors(this).get(0)).getSchema();
 
-if (!disambiguationEnabled && schema != null && schema.getFields() != 
null) {
+if (!disambiguationEnabled) {
 //If requested try and remove parent alias substring including 
colon(s)
+removeDisambiguation(schema);
+}
+
+return schema;
+}
+
+/**
+ * Removes schema disambiguation parts (parent alias and :) from field 
aliases
+ * @param schema
+ * @return
+ */
+private static LogicalSchema removeDisambiguation(LogicalSchema schema) {
+if (schema != null && schema.getFields() != null) {
 for (LogicalSchema.LogicalFieldSchema field : schema.getFields()) {
+if (field.schema != null) {
+removeDisambiguation(field.schema);
+}
 if (field.alias == null || !field.alias.contains(":")) {
 continue;
 }
 field.alias = 
field.alias.substring(field.alias.lastIndexOf(":") + 1);
 }
 }
-
 return schema;
 }
 

Modified: pig/trunk/test/org/apache/pig/test/TestSchema.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestSchema.java?rev=1820631=1820630=1820631=diff
==
--- pig/trunk/test/org/apache/pig/test/TestSchema.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestSchema.java Tue Jan  9 10:03:07 2018
@@ -990,6 +990,33 @@ public class TestSchema {
 }
 
 @Test
+public void testDisabledDisambiguationContainsNoColonsForNestedSchema() 
throws IOException {
+resetDisambiguationTestPropertyOverride();
+
+String inputFileName = "testPrepend-nested-input.txt";
+String[] inputData = new String[]{"apple\t1\tred", 
"orange\t2\torange", "kiwi\t3\tgreen", "orange\t4\torange"};
+Util.createInputFile(cluster, inputFileName, inputData);
+
+String script = "A = LOAD '" + inputFileName + "' AS (fruit:chararray, 
foo:int, color: chararray);" +
+"B = LOAD '" + inputFileName + "' AS (id:chararray, bar:int);" 
+
+"C = JOIN A by fruit, B by id;" +
+"D = GROUP C by fruit;" +
+"E = LOAD '" + inputFileName + "' AS (name:chararray, 
qwe:int);" +
+"F = JOIN E by name, D by group;";
+
+Util.registerMultiLineQuery(pigServer, script);
+
+//Prepending should happen with default settings
+assertEquals("{E::name: chararray,E::qwe: int,D::group: 
chararray,D::C: {(A::fruit: chararray,A::foo: int,A::color: chararray,B::id: 
chararray,B::bar: int)}}", pigServer.dumpSchema("F").toString());
+
+//Override prepend property setting (check for flatten, join)
+
pigServer.getPigContext().getProperties().setProperty(PigConfiguration.PIG_STORE_SCHEMA_DISAMBIGUATE,
 "false");
+assertEquals("{name: chararray,qwe: int,group: chararray,C: {(fruit: 
chararray,foo: int,color: chararray,id:" +
+ 

svn commit: r1839568 - in /pig/trunk: ./ ivy/ src/org/apache/pig/backend/hadoop/hbase/ test/org/apache/pig/test/

2018-08-29 Thread szita
Author: szita
Date: Wed Aug 29 11:33:28 2018
New Revision: 1839568

URL: http://svn.apache.org/viewvc?rev=1839568=rev
Log:
PIG-5191: Pig HBase 2.0.0 support (nkollar via szita, reviewed by rohini)

Modified:
pig/trunk/CHANGES.txt
pig/trunk/build.xml
pig/trunk/ivy.xml
pig/trunk/ivy/libraries.properties
pig/trunk/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java
pig/trunk/src/org/apache/pig/backend/hadoop/hbase/HBaseTableInputFormat.java
pig/trunk/test/org/apache/pig/test/TestHBaseStorage.java
pig/trunk/test/org/apache/pig/test/TestJobSubmission.java

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1839568=1839567=1839568=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Wed Aug 29 11:33:28 2018
@@ -26,6 +26,8 @@ PIG-5282: Upgade to Java 8 (satishsaley
  
 IMPROVEMENTS
 
+PIG-5191: Pig HBase 2.0.0 support (nkollar via szita, reviewed by rohini)
+
 PIG-5344: Update Apache HTTPD LogParser to latest version (nielsbasjes via 
szita)
 
 PIG-4092: Predicate pushdown for Parquet (nkollar via rohini)

Modified: pig/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1839568=1839567=1839568=diff
==
--- pig/trunk/build.xml (original)
+++ pig/trunk/build.xml Wed Aug 29 11:33:28 2018
@@ -1713,7 +1713,7 @@


+ 
pattern="${ivy.lib.dir.spark}/[artifact]-[revision](-[classifier]).[ext]" 
conf="spark${sparkversion},hbase${hbaseversion}"/>

  
 

Modified: pig/trunk/ivy.xml
URL: 
http://svn.apache.org/viewvc/pig/trunk/ivy.xml?rev=1839568=1839567=1839568=diff
==
--- pig/trunk/ivy.xml (original)
+++ pig/trunk/ivy.xml Wed Aug 29 11:33:28 2018
@@ -40,6 +40,7 @@
 
 
 
+
 
 
   
@@ -308,6 +309,167 @@
   
   
 
+
+
+
+
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+
+
+
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+
+
+
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+
+
+
+  
+  
+  
+
+
+
+  
+  
+
+
+
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+
+
+
+  
+  
+  
+
+
+
+  
+  
+
+
+
+  
+  
+
+
+
+  
+  
+
+
+
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+
+
+
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+
+
+
+  
+
+
+
+
+
+
+
+
+  
+  
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 
 
 
@@ -316,7 +478,6 @@
 
   
 
-
 
 
 
@@ -420,6 +581,7 @@
 
 
 
+
 
 
 

Modified: pig/trunk/ivy/libraries.properties
URL: 
http://svn.apache.org/viewvc/pig/trunk/ivy/libraries.properties?rev=1839568=1839567=1839568=diff
==
--- pig/trunk/ivy/libraries.properties (original)
+++ pig/trunk/ivy/libraries.properties Wed Aug 29 11:33:28 2018
@@ -39,6 +39,7 @@ hadoop-common.version=2.7.3
 hadoop-hdfs.version=2.7.3
 hadoop-mapreduce.version=2.7.3
 hbase1.version=1.2.4
+hbase2.version=2.0.0
 hsqldb.version=2.4.0
 hive.version=1.2.1
 httpcomponents.version=4.4
@@ -64,7 +65,7 @@ antlr.version=3.4
 stringtemplate.version=4.0.4
 log4j.version=1.2.16
 netty.version=3.6.6.Final
-netty-all.version=4.0.23.Final
+netty-all.version=4.1.1.Final
 rats-lib.version=0.5.1
 slf4j-api.version=1.6.1
 slf4j-log4j12.version=1.6.1
@@ -92,6 +93,7 @@ snappy.version=0.2
 leveldbjni.version=1.8
 curator.version=2.6.0
 htrace.version=3.1.0-incubating
+htrace4.version=4.0.1-incubating
 commons-lang3.version=3.6
 scala-xml.version=1.0.5
 glassfish.el.version=3.0.1-b08
\ No newline at end of file

Modified: pig/trunk/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java?rev=1839568=1839567=1839568=diff
==
--- pig/trunk/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java 
(original)
+++ pig/trunk/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java Wed Aug 
29 11:33:28 2018
@@ -45,7 +45,10 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.conf.Configuration;
 import org.apache.

svn commit: r1840299 - in /pig/trunk: BUILDING.md CHANGES.txt build.xml dev-support/docker/Dockerfile

2018-09-07 Thread szita
Author: szita
Date: Fri Sep  7 13:33:33 2018
New Revision: 1840299

URL: http://svn.apache.org/viewvc?rev=1840299=rev
Log:
PIG-5343: Upgrade developer build environment (nielsbasjes via szita)

Modified:
pig/trunk/BUILDING.md
pig/trunk/CHANGES.txt
pig/trunk/build.xml
pig/trunk/dev-support/docker/Dockerfile

Modified: pig/trunk/BUILDING.md
URL: 
http://svn.apache.org/viewvc/pig/trunk/BUILDING.md?rev=1840299=1840298=1840299=diff
==
--- pig/trunk/BUILDING.md (original)
+++ pig/trunk/BUILDING.md Fri Sep  7 13:33:33 2018
@@ -3,7 +3,7 @@
 ## Requirements:
 
 * Unix System
-* JDK 1.7+
+* JDK 1.8+
 * Ant 1.8.1+
 * Findbugs 3.x+
 * Forrest 0.9 (for building the documentation)

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1840299=1840298=1840299=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Fri Sep  7 13:33:33 2018
@@ -26,6 +26,8 @@ PIG-5282: Upgade to Java 8 (satishsaley
  
 IMPROVEMENTS
 
+PIG-5343: Upgrade developer build environment (nielsbasjes via szita)
+
 PIG-5191: Pig HBase 2.0.0 support (nkollar via szita, reviewed by rohini)
 
 PIG-5344: Update Apache HTTPD LogParser to latest version (nielsbasjes via 
szita)

Modified: pig/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1840299=1840298=1840299=diff
==
--- pig/trunk/build.xml (original)
+++ pig/trunk/build.xml Fri Sep  7 13:33:33 2018
@@ -952,7 +952,7 @@
 
 
 
-
+
 
 
 

Modified: pig/trunk/dev-support/docker/Dockerfile
URL: 
http://svn.apache.org/viewvc/pig/trunk/dev-support/docker/Dockerfile?rev=1840299=1840298=1840299=diff
==
--- pig/trunk/dev-support/docker/Dockerfile (original)
+++ pig/trunk/dev-support/docker/Dockerfile Fri Sep  7 13:33:33 2018
@@ -17,7 +17,7 @@
 # Dockerfile for installing the necessary dependencies for building Apache Pig.
 # See BUILDING.md.
 
-FROM ubuntu:trusty
+FROM ubuntu:bionic
 
 # Define working directory.
 WORKDIR /root
@@ -29,16 +29,17 @@ RUN sed -i 's/# \(.*multiverse$\)/\1/g'
 apt-get install -y build-essential && \
 apt-get install -y software-properties-common && \
 apt-get install --no-install-recommends -y \
+sudo \
 git subversion \
 byobu htop man unzip vim \
 cabal-install \
 curl wget \
-openjdk-7-jdk \
+openjdk-8-jdk \
 ant ant-contrib ant-optional make maven \
 cmake gcc g++ protobuf-compiler \
 build-essential libtool \
 zlib1g-dev pkg-config libssl-dev \
-snappy libsnappy-dev \
+ubuntu-snappy ubuntu-snappy-cli libsnappy-dev \
 bzip2 libbz2-dev \
 libjansson-dev \
 fuse libfuse-dev \
@@ -47,11 +48,7 @@ RUN sed -i 's/# \(.*multiverse$\)/\1/g'
 rm -rf /var/lib/apt/lists/*
 
 # Define commonly used JAVA_HOME variable
-ENV JAVA_HOME /usr/lib/jvm/java-7-openjdk-amd64
-
-# Fixing the Apache commons / Maven dependency problem under Ubuntu:
-# See http://wiki.apache.org/commons/VfsProblems
-RUN cd /usr/share/maven/lib && ln -s ../../java/commons-lang.jar .
+ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64
 
 # Avoid out of memory errors in builds
 ENV MAVEN_OPTS -Xms256m -Xmx512m
@@ -64,12 +61,9 @@ RUN mkdir -p /opt/findbugs && \
 ENV FINDBUGS_HOME /opt/findbugs
 
 # Install Forrest in /usr/local/apache-forrest
-# Screenscrape the download page for a local mirror URL
-RUN cd /usr/local/ && \
-curl https://forrest.apache.org/mirrors.cgi | \
-fgrep href | fgrep apache-forrest-0.9 | \
-sed 's@^.*"\(http[^"]*apache-forrest-[^"]*.tar.gz\)".*@\1@' | \
-xargs -n1 -r wget
+# Download
+RUN cd /usr/local/ && wget 
"http://www.apache.org/dyn/closer.lua?action=download=/forrest/apache-forrest-0.9-sources.tar.gz;
  -O "apache-forrest-0.9-sources.tar.gz"
+RUN cd /usr/local/ && wget 
"http://www.apache.org/dyn/closer.lua?action=download=/forrest/apache-forrest-0.9-dependencies.tar.gz;
 -O "apache-forrest-0.9-dependencies.tar.gz"
 
 # Unpack Apache Forrest
 RUN cd /usr/local/ && \




svn commit: r1850723 - in /pig/trunk: CHANGES.txt src/org/apache/pig/impl/io/InterRecordReader.java

2019-01-08 Thread szita
Author: szita
Date: Tue Jan  8 10:37:57 2019
New Revision: 1850723

URL: http://svn.apache.org/viewvc?rev=1850723=rev
Log:
PIG-5374: Use CircularFifoBuffer in InterRecordReader (szita)

Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1850723=1850722=1850723=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Tue Jan  8 10:37:57 2019
@@ -88,6 +88,8 @@ OPTIMIZATIONS
  
 BUG FIXES
 
+PIG-5374: Use CircularFifoBuffer in InterRecordReader (szita)
+
 PIG-5373: InterRecordReader might skip records if certain sync markers are 
used (szita)
 
 PIG-5370: Union onschema + columnprune dropping used fields (knoguchi)

Modified: pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java?rev=1850723=1850722=1850723=diff
==
--- pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java (original)
+++ pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java Tue Jan  8 
10:37:57 2019
@@ -20,7 +20,7 @@ package org.apache.pig.impl.io;
 import java.io.DataInputStream;
 import java.io.IOException;
 
-import org.apache.commons.collections4.queue.CircularFifoQueue;
+import org.apache.commons.collections.buffer.CircularFifoBuffer;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
@@ -92,7 +92,7 @@ public class InterRecordReader extends R
  */
   public boolean skipUntilMarkerOrSplitEndOrEOF() throws IOException {
   int b = Integer.MIN_VALUE;
-  CircularFifoQueue queue = new 
CircularFifoQueue(syncMarker.length);
+  CircularFifoBuffer queue = new CircularFifoBuffer(syncMarker.length);
   outer:while (b != -1) {
   //There may be a case where we read through a whole split without a 
marker, then we shouldn't proceed
   // because the records are from the next split which another reader 
would pick up too
@@ -107,13 +107,13 @@ public class InterRecordReader extends R
   if (b == -1) return false;
 
   queue.add(b);
-  if (queue.size() != queue.maxSize()) {
+  if (!queue.isFull()) {
   //Not enough bytes read yet
   continue outer;
   }
   int i = 0;
-  for (Integer seenByte : queue){
-  if (syncMarker[i++] != seenByte.byteValue()) {
+  for (Object seenByte : queue){
+  if (syncMarker[i++] != ((Integer)seenByte).byteValue()) {
   continue outer;
   }
   }




svn commit: r1850245 - in /pig/trunk: CHANGES.txt src/org/apache/pig/impl/io/InterRecordReader.java test/org/apache/pig/test/TestBinInterSedes.java

2019-01-03 Thread szita
Author: szita
Date: Thu Jan  3 15:54:11 2019
New Revision: 1850245

URL: http://svn.apache.org/viewvc?rev=1850245=rev
Log:
PIG-5373: InterRecordReader might skip records if certain sync markers are used 
(szita)

Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java
pig/trunk/test/org/apache/pig/test/TestBinInterSedes.java

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1850245=1850244=1850245=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Jan  3 15:54:11 2019
@@ -88,6 +88,8 @@ OPTIMIZATIONS
  
 BUG FIXES
 
+PIG-5373: InterRecordReader might skip records if certain sync markers are 
used (szita)
+
 PIG-5370: Union onschema + columnprune dropping used fields (knoguchi)
 
 PIG-5362: Parameter substitution of shell cmd results doesn't handle backslash 
(wlauer via rohini)

Modified: pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java?rev=1850245=1850244=1850245=diff
==
--- pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java (original)
+++ pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java Thu Jan  3 
15:54:11 2019
@@ -20,6 +20,7 @@ package org.apache.pig.impl.io;
 import java.io.DataInputStream;
 import java.io.IOException;
 
+import org.apache.commons.collections4.queue.CircularFifoQueue;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
@@ -89,35 +90,34 @@ public class InterRecordReader extends R
  * @return true if marker was observed, false if EOF or EndOfSplit was 
reached
  * @throws IOException
  */
-  private boolean skipUntilMarkerOrSplitEndOrEOF() throws IOException {
+  public boolean skipUntilMarkerOrSplitEndOrEOF() throws IOException {
   int b = Integer.MIN_VALUE;
-outer:while (b != -1) {
-  if (b != syncMarker[0]) {
+  CircularFifoQueue queue = new 
CircularFifoQueue(syncMarker.length);
+  outer:while (b != -1) {
+  //There may be a case where we read through a whole split without a 
marker, then we shouldn't proceed
+  // because the records are from the next split which another reader 
would pick up too
+  //One exception of reading past split end is if at least the first 
byte of the marker was seen before split
+  // end.
+  if (in.getPosition() >= (end+syncMarker.length-1)) {
+  return false;
+  }
+  b = in.read();
 
-  //There may be a case where we read through a whole split 
without a marker, then we shouldn't proceed
-  // because the records are from the next split which another 
reader would pick up too
-  if (in.getPosition() >= end) {
-  return false;
-  }
-  b = in.read();
-  if ((byte) b != syncMarker[0] && b != -1) {
-  continue;
-  }
-  if (b == -1) return false;
+  //EOF reached
+  if (b == -1) return false;
+
+  queue.add(b);
+  if (queue.size() != queue.maxSize()) {
+  //Not enough bytes read yet
+  continue outer;
   }
-  int i = 1;
-  while (i < syncMarker.length) {
-  b = in.read();
-  if (b == -1) return false;
-  if ((byte) b != syncMarker[i]) {
-  if (in.getPosition() > end) {
-  //Again we should not read past the split end, only if 
at least the first byte of marker was seen before it
-  return false;
-  }
+  int i = 0;
+  for (Integer seenByte : queue){
+  if (syncMarker[i++] != seenByte.byteValue()) {
   continue outer;
   }
-  ++i;
   }
+  //Found marker: queue content equals sync marker
   lastSyncPos = in.getPosition();
   return true;
   }

Modified: pig/trunk/test/org/apache/pig/test/TestBinInterSedes.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestBinInterSedes.java?rev=1850245=1850244=1850245=diff
==
--- pig/trunk/test/org/apache/pig/test/TestBinInterSedes.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestBinInterSedes.java Thu Jan  3 
15:54:11 2019
@@ -18,6 +18,7 @@
 package org.apache.pig.test;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
@@ -26,6 +27,7 @@ import java.io.DataOutput;
 import java.io.DataOutputStream;

svn commit: r1841248 - in /pig/trunk: CHANGES.txt build.xml

2018-09-18 Thread szita
Author: szita
Date: Tue Sep 18 18:56:27 2018
New Revision: 1841248

URL: http://svn.apache.org/viewvc?rev=1841248=rev
Log:
PIG-5358: Remove hive-contrib jar from lib directory (szita)

Modified:
pig/trunk/CHANGES.txt
pig/trunk/build.xml

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1841248=1841247=1841248=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Tue Sep 18 18:56:27 2018
@@ -26,6 +26,8 @@ PIG-5282: Upgade to Java 8 (satishsaley
  
 IMPROVEMENTS
 
+PIG-5358: Remove hive-contrib jar from lib directory (szita)
+
 PIG-5343: Upgrade developer build environment (nielsbasjes via szita)
 
 PIG-5191: Pig HBase 2.0.0 support (nkollar via szita, reviewed by rohini)

Modified: pig/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1841248=1841247=1841248=diff
==
--- pig/trunk/build.xml (original)
+++ pig/trunk/build.xml Tue Sep 18 18:56:27 2018
@@ -731,7 +731,7 @@
 
 
 
-
+
 
 
 




svn commit: r1873947 - in /pig/trunk: ./ ivy/ shims/src/hive1/ shims/src/hive1/org/ shims/src/hive1/org/apache/ shims/src/hive1/org/apache/pig/ shims/src/hive1/org/apache/pig/hive/ shims/src/hive3/ sh

2020-02-12 Thread szita
Author: szita
Date: Wed Feb 12 15:27:05 2020
New Revision: 1873947

URL: http://svn.apache.org/viewvc?rev=1873947=rev
Log:
PIG-4764: Make Pig work with Hive 3.1 (szita)

Added:
pig/trunk/shims/src/hive1/
pig/trunk/shims/src/hive1/org/
pig/trunk/shims/src/hive1/org/apache/
pig/trunk/shims/src/hive1/org/apache/pig/
pig/trunk/shims/src/hive1/org/apache/pig/hive/
pig/trunk/shims/src/hive1/org/apache/pig/hive/HiveShims.java
pig/trunk/shims/src/hive3/
pig/trunk/shims/src/hive3/org/
pig/trunk/shims/src/hive3/org/apache/
pig/trunk/shims/src/hive3/org/apache/pig/
pig/trunk/shims/src/hive3/org/apache/pig/hive/
pig/trunk/shims/src/hive3/org/apache/pig/hive/HiveShims.java
Modified:
pig/trunk/CHANGES.txt
pig/trunk/build.xml
pig/trunk/ivy.xml
pig/trunk/ivy/libraries.properties
pig/trunk/src/org/apache/pig/Expression.java
pig/trunk/src/org/apache/pig/builtin/HiveUDAF.java
pig/trunk/src/org/apache/pig/builtin/HiveUDFBase.java
pig/trunk/src/org/apache/pig/builtin/OrcStorage.java
pig/trunk/src/org/apache/pig/impl/util/hive/HiveUtils.java
pig/trunk/src/org/apache/pig/newplan/FilterExtractor.java
pig/trunk/test/org/apache/pig/builtin/TestOrcStorage.java
pig/trunk/test/org/apache/pig/builtin/TestOrcStoragePushdown.java
pig/trunk/test/org/apache/pig/test/TestLoaderStorerShipCacheFiles.java

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1873947=1873946=1873947=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Wed Feb 12 15:27:05 2020
@@ -26,6 +26,8 @@ PIG-5282: Upgade to Java 8 (satishsaley
  
 IMPROVEMENTS
 
+PIG-4764: Make Pig work with Hive 3.1 (szita)
+
 PIG-5352: Please add OWASP Dependency Check to the build ivy.xml (knoguchi)
 
 PIG-5385: Skip calling extra gc() before spilling large bag when unnecessary 
(knoguchi)

Modified: pig/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1873947=1873946=1873947=diff
==
--- pig/trunk/build.xml (original)
+++ pig/trunk/build.xml Wed Feb 12 15:27:05 2020
@@ -154,7 +154,7 @@
 
   
 
-   
+
 
 
 
@@ -241,6 +241,7 @@
 
 
 
+
 
 
 
@@ -248,6 +249,7 @@
 
 
 
+
 
 https://repository.apache.org"/>
 
@@ -353,6 +355,7 @@
 
 
 
+
 
 
 
@@ -568,8 +571,8 @@
 *** Building Main Sources ***
 *** To compile with all warnings enabled, supply 
-Dall.warnings=1 on command line ***
 *** Else, you will only be warned about deprecations ***
-*** Hadoop version used: ${hadoopversion} ; HBase version used: 
${hbaseversion} ; Spark version used: ${sparkversion} ***
-*** Hadoop version used: ${hadoopversion} ; HBase version used: 
${hbaseversion} ; Spark version used: ${sparkversion} ; Hive version used: 
${hiveversion} ***
+
 
 
@@ -734,6 +737,7 @@
 
 
 
+
 
 
 
@@ -1161,6 +1165,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -1236,6 +1244,7 @@
 
 
 
+
 
 
 
@@ -1723,7 +1732,7 @@
 
  

-   *** Ivy resolve with Hadoop ${hadoopversion}, Spark 
${sparkversion} and HBase ${hbaseversion} ***
+   *** Ivy resolve with Hadoop ${hadoopversion}, Spark 
${sparkversion}, HBase ${hbaseversion}, Hive ${hiveversion} ***


  

Modified: pig/trunk/ivy.xml
URL: 
http://svn.apache.org/viewvc/pig/trunk/ivy.xml?rev=1873947=1873946=1873947=diff
==
--- pig/trunk/ivy.xml (original)
+++ pig/trunk/ivy.xml Wed Feb 12 15:27:05 2020
@@ -31,7 +31,7 @@
 
 
 
-
+
 
 
 
@@ -43,6 +43,8 @@
 
 
 
+
+
 
   
   
@@ -525,23 +527,48 @@
 
 
-
+
+
+
+  
+
+
+
+
+
+
+
+
+
   
 
 
+conf="hive3->master" />
 
+conf="hive3->master" />
 
+conf="hive3->master" />
 
+
 
+conf="hive3->master" />
+
+
+
+
 
-
+
+
 
 

Modified: pig/trunk/ivy/libraries.properties
URL: 
http://svn.apache.org/viewvc/pig/trunk/ivy/libraries.properties?rev=1873947=1873946=1873947=diff
==
--- pig/trunk/ivy/libraries.properties (original)
+++ pig

svn commit: r1872871 - in /pig/trunk: CHANGES.txt build.xml ivy/ivysettings.xml test/e2e/pig/build.xml

2020-01-16 Thread szita
Author: szita
Date: Thu Jan 16 10:20:43 2020
New Revision: 1872871

URL: http://svn.apache.org/viewvc?rev=1872871=rev
Log:
PIG-5395: Pig build is failing due to maven repo access point change (szita)

Modified:
pig/trunk/CHANGES.txt
pig/trunk/build.xml
pig/trunk/ivy/ivysettings.xml
pig/trunk/test/e2e/pig/build.xml

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1872871=1872870=1872871=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Jan 16 10:20:43 2020
@@ -96,6 +96,8 @@ OPTIMIZATIONS
  
 BUG FIXES
 
+PIG-5395: Pig build is failing due to maven repo access point change (szita)
+
 PIG-5375: NullPointerException for multi-level self unions with Tez 
UnionOptimizer (knoguchi)
 
 PIG-5386: Pig local mode with bundled Hadoop broken (nkollar)

Modified: pig/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1872871=1872870=1872871=diff
==
--- pig/trunk/build.xml (original)
+++ pig/trunk/build.xml Thu Jan 16 10:20:43 2020
@@ -251,7 +251,7 @@
 
 https://repository.apache.org"/>
 
-http://repo2.maven.org/maven2"/>
+https://repo1.maven.org/maven2"/>
 
 
 

Modified: pig/trunk/ivy/ivysettings.xml
URL: 
http://svn.apache.org/viewvc/pig/trunk/ivy/ivysettings.xml?rev=1872871=1872870=1872871=diff
==
--- pig/trunk/ivy/ivysettings.xml (original)
+++ pig/trunk/ivy/ivysettings.xml Thu Jan 16 10:20:43 2020
@@ -21,7 +21,7 @@
   see http://www.jayasoft.org/ivy/doc/configuration
   -->
   
   




svn commit: r1876880 - in /pig/trunk: CHANGES.txt src/org/apache/pig/tools/parameters/ParamLoader.jj src/org/apache/pig/tools/parameters/PigFileParser.jj

2020-04-23 Thread szita
Author: szita
Date: Thu Apr 23 11:25:37 2020
New Revision: 1876880

URL: http://svn.apache.org/viewvc?rev=1876880=rev
Log:
PIG-5362: Parameter substitution of shell cmd results doesn't handle backslash 
addendum (szita)

Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/org/apache/pig/tools/parameters/ParamLoader.jj
pig/trunk/src/org/apache/pig/tools/parameters/PigFileParser.jj

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1876880=1876879=1876880=diff
==
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Apr 23 11:25:37 2020
@@ -100,6 +100,8 @@ OPTIMIZATIONS
  
 BUG FIXES
 
+PIG-5362: Parameter substitution of shell cmd results doesn't handle backslash 
addendum (szita)
+
 PIG-5395: Pig build is failing due to maven repo access point change (szita)
 
 PIG-5375: NullPointerException for multi-level self unions with Tez 
UnionOptimizer (knoguchi)

Modified: pig/trunk/src/org/apache/pig/tools/parameters/ParamLoader.jj
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/tools/parameters/ParamLoader.jj?rev=1876880=1876879=1876880=diff
==
--- pig/trunk/src/org/apache/pig/tools/parameters/ParamLoader.jj (original)
+++ pig/trunk/src/org/apache/pig/tools/parameters/ParamLoader.jj Thu Apr 23 
11:25:37 2020
@@ -43,16 +43,6 @@ public class ParamLoader {
 public void setContext(PreprocessorContext pc) {
 this.pc = pc;
 }
-
-private static String unquote(String s)
-{
-if (s.charAt(0) == '\'' && s.charAt(s.length()-1) == '\'')
-return s.substring(1, s.length()-1);
-else if (s.charAt(0) == '"' && s.charAt(s.length()-1) == '"')
-return s.substring(1, s.length()-1);
-else
-return s;
-}
 }
 
 PARSER_END(ParamLoader)
@@ -81,16 +71,52 @@ TOKEN :
 |
 
 |
-
-|
 
 |
 
+}
+
+MORE :
+{
+ : DOUBLE_QUOTE
+|
+ : SINGLE_QUOTE
+}
+
+ TOKEN :
+{
+ {
+image.deleteCharAt(image.length()-1);
+image.deleteCharAt(0);
+matchedToken.image = image.toString();
+} : DEFAULT
+} 
 
+ TOKEN :
+{
+ {
+image.deleteCharAt(image.length()-1);
+image.deleteCharAt(0);
+matchedToken.image = image.toString();
+} : DEFAULT 
+}
+
+ MORE :
+{
+ { image.replace(image.length()-2, 
image.length(), "\""); }
+}
+
+ MORE :
+{
+ { image.replace(image.length()-2, 
image.length(), "'"); }
 }
 
+ MORE:
+{
+< (~[]) >
+}
 
 
 boolean Parse() throws IOException  :
@@ -117,7 +143,10 @@ boolean Parse() throws IOException  :
 |
 val=  { pc.processShellCmd(id.image , val.image);}
 |
-val= { s = unquote(val.image); 
pc.processOrdLine(id.image , s); }
+val= { pc.processOrdLine(id.image , 
val.image); }
+|
+val= { pc.processOrdLine(id.image , 
val.image); }
+
 )
 )
 |

Modified: pig/trunk/src/org/apache/pig/tools/parameters/PigFileParser.jj
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/tools/parameters/PigFileParser.jj?rev=1876880=1876879=1876880=diff
==
--- pig/trunk/src/org/apache/pig/tools/parameters/PigFileParser.jj (original)
+++ pig/trunk/src/org/apache/pig/tools/parameters/PigFileParser.jj Thu Apr 23 
11:25:37 2020
@@ -223,25 +223,22 @@ TOKEN_MGR_DECLS : {
 }: DEFAULT
 }
 
-TOKEN : 
+< DEFAULT, IN_DECLARE > TOKEN : 
 {
-
+ : DEFAULT
 |
 
 |
 
-}
-
-// comments(single line and multi-line)
-TOKEN : 
-{
-  
+>   
 }
 
-TOKEN:
+< DEFAULT, IN_DECLARE >TOKEN:
 {
 <#LETTER : ["a"-"z", "A"-"Z"] >
 |
@@ -254,22 +251,31 @@ TOKEN:
 
 TOKEN :
 {
-
+ : IN_DECLARE
 |
- 
+ : IN_DECLARE
+|
+ : IN_REGISTER
+|
+
 }
 
-
-TOKEN : 
+< DEFAULT, IN_DECLARE > TOKEN:
 {
- : IN_REGISTER
-|
 )*( |  | )*>
 |
-
-|
-
+ : DEFAULT
+}
+
+< IN_DECLARE > MORE :
+{
+ : DOUBLE_QUOTE
 |
+ : SINGLE_QUOTE
+}
+
+< DEFAULT, IN_DECLARE > TOKEN:
+{
 // see others() rule for use of OTHER and NOT_OTHER_CHAR
 // others() is supposed to match 'everything else'. To ensure that others()
 // don't swallow other(all the ones above) tokens, it uses two tokens 
OTHER and NOT_OTHER_CHAR
@@ -281,6 +287,39 @@ TOKEN :
 
 }
 
+< DOUBLE_QUOTE > TOKEN :
+{
+ {
+image.deleteCharAt(image.length()-1);
+image.deleteCharAt(0);
+matchedToken.image = image.toString();
+} : DEFAULT
+} 
+
+< SINGLE_QUOTE > TOKEN :
+{
+ {
+image.delete