Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/MaxTupleBy1stField.java URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/MaxTupleBy1stField.java?rev=1784224&r1=1784223&r2=1784224&view=diff ============================================================================== --- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/MaxTupleBy1stField.java (original) +++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/MaxTupleBy1stField.java Fri Feb 24 03:34:37 2017 @@ -21,7 +21,6 @@ package org.apache.pig.piggybank.evaluat import java.io.IOException; import java.util.Iterator; -import org.apache.pig.Accumulator; import org.apache.pig.Algebraic; import org.apache.pig.EvalFunc; import org.apache.pig.backend.executionengine.ExecException; @@ -44,7 +43,7 @@ import org.apache.pig.impl.logicalLayer. * * @author Vadim Zaliva <[email protected]> */ -public class MaxTupleBy1stField extends EvalFunc<Tuple> implements Algebraic, Accumulator<Tuple> +public class MaxTupleBy1stField extends EvalFunc<Tuple> implements Algebraic { /** * Indicates once for how many items progress hartbeat should be sent. @@ -132,11 +131,6 @@ public class MaxTupleBy1stField extends protected static Tuple max(Tuple input, PigProgressable reporter) throws ExecException { DataBag values = (DataBag) input.get(0); - return max(values,reporter); - } - - protected static Tuple max(DataBag values, PigProgressable reporter) throws ExecException - { // if we were handed an empty bag, return NULL // this is in compliance with SQL standard @@ -189,44 +183,4 @@ public class MaxTupleBy1stField extends return Final.class.getName(); } - - /** - * Accumulator implementation - */ - - private Tuple intermediate = null; - - /** - * Accumulate implementation - calls max() on the incoming tuple set including intermediate tuple if already exists - * @param b A tuple containing a single field, which is a bag. The bag will contain the set - * @throws IOException - */ - @Override - public void accumulate(Tuple b) throws IOException { - try{ - DataBag values = BagFactory.getInstance().newDefaultBag(); - values.addAll((DataBag) b.get(0)); - - if (intermediate != null) { - values.add(intermediate); - } - intermediate = max(values,reporter); - - }catch (ExecException ee){ - IOException oughtToBeEE = new IOException(); - oughtToBeEE.initCause(ee); - throw oughtToBeEE; - } - } - - @Override - public Tuple getValue() { - return intermediate; - } - - @Override - public void cleanup() { - intermediate = null; - } - }
Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/Over.java URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/Over.java?rev=1784224&r1=1784223&r2=1784224&view=diff ============================================================================== --- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/Over.java (original) +++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/Over.java Fri Feb 24 03:34:37 2017 @@ -23,13 +23,10 @@ import java.util.Iterator; import java.util.List; import org.apache.pig.EvalFunc; +import org.apache.pig.FuncSpec; import org.apache.pig.PigException; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.builtin.AVG; -import org.apache.pig.builtin.BigDecimalAvg; -import org.apache.pig.builtin.BigDecimalMax; -import org.apache.pig.builtin.BigDecimalMin; -import org.apache.pig.builtin.BigDecimalSum; import org.apache.pig.builtin.COUNT; import org.apache.pig.builtin.DoubleAvg; import org.apache.pig.builtin.DoubleMax; @@ -57,7 +54,6 @@ import org.apache.pig.data.Tuple; import org.apache.pig.data.TupleFactory; import org.apache.pig.impl.logicalLayer.FrontendException; import org.apache.pig.impl.logicalLayer.schema.Schema; -import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; /** * Given an aggregate function, a bag, and possibly a window definition, @@ -77,27 +73,23 @@ import org.apache.pig.impl.logicalLayer. * <li>sum(int)</li> * <li>sum(long)</li> * <li>sum(bytearray)</li> - * <li>sum(bigdecimal)</li> * <li>avg(double)</li> * <li>avg(float)</li> * <li>avg(long)</li> * <li>avg(int)</li> * <li>avg(bytearray)</li> - * <li>avg(bigdecimal)</li> * <li>min(double)</li> * <li>min(float)</li> * <li>min(long)</li> * <li>min(int)</li> * <li>min(chararray)</li> * <li>min(bytearray)</li> - * <li>min(bigdecimal)</li> * <li>max(double)</li> * <li>max(float)</li> * <li>max(long)</li> * <li>max(int)</li> * <li>max(chararray)</li> * <li>max(bytearray)</li> - * <li>max(bigdecimal)</li> * <li>row_number</li> * <li>first_value</li> * <li>last_value</li> @@ -161,8 +153,7 @@ import org.apache.pig.impl.logicalLayer. * current row and 3 following) over T;</tt> * * <p>Over accepts a constructor argument specifying the name and type, - * colon-separated, of its return schema. If the argument option is 'true' use the inner-search, - * take the name and type of bag and return a schema with alias+'_over' and the same type</p> + * colon-separated, of its return schema.</p> * * <p><pre> * DEFINE IOver org.apache.pig.piggybank.evaluation.Over('state_rk:int'); @@ -197,14 +188,12 @@ public class Over extends EvalFunc<DataB private Object[] udfArgs; private byte returnType; private String returnName; - private boolean searchInnerType; public Over() { initialized = false; udfArgs = null; func = null; returnType = DataType.UNKNOWN; - searchInnerType = false; } public Over(String typespec) { @@ -213,16 +202,12 @@ public class Over extends EvalFunc<DataB String[] fn_tn = typespec.split(":", 2); this.returnName = fn_tn[0]; this.returnType = DataType.findTypeByName(fn_tn[1]); - } else if(Boolean.parseBoolean(typespec)) { - searchInnerType = Boolean.parseBoolean(typespec); - }else{ + } else { this.returnName = "result"; this.returnType = DataType.findTypeByName(typespec); - } + } } - - @Override public DataBag exec(Tuple input) throws IOException { if (input == null || input.size() < 2) { @@ -270,42 +255,19 @@ public class Over extends EvalFunc<DataB @Override public Schema outputSchema(Schema inputSch) { try { - FieldSchema field; - - if (searchInnerType) { - field = new FieldSchema(inputSch.getField(0)); - while (searchInnerType) { - if (field.schema != null - && field.schema.getFields().size() > 1) { - searchInnerType = false; - } else { - if (field.type == DataType.TUPLE - || field.type == DataType.BAG) { - field = new FieldSchema(field.schema.getField(0)); - } else { - field.alias = field.alias + "_over"; - searchInnerType = false; - } - } - } - - searchInnerType = true; - } else if (returnType == DataType.UNKNOWN) { + if (returnType == DataType.UNKNOWN) { return Schema.generateNestedSchema(DataType.BAG, DataType.NULL); } else { - field = new Schema.FieldSchema(returnName, returnType); + Schema outputTupleSchema = new Schema(new Schema.FieldSchema(returnName, returnType)); + return new Schema(new Schema.FieldSchema( + getSchemaName(this.getClass().getName().toLowerCase(), inputSch), + outputTupleSchema, + DataType.BAG)); } - - Schema outputTupleSchema = new Schema(field); - return new Schema(new Schema.FieldSchema(getSchemaName(this - .getClass().getName().toLowerCase(), inputSch), - outputTupleSchema, DataType.BAG)); - } catch (FrontendException fe) { throw new RuntimeException("Unable to create nested schema", fe); } } - private void init(Tuple input) throws IOException { initialized = true; @@ -367,8 +329,6 @@ public class Over extends EvalFunc<DataB func = new LongSum(); } else if ("sum(bytearray)".equalsIgnoreCase(agg)) { func = new SUM(); - } else if ("sum(bigdecimal)".equalsIgnoreCase(agg)) { - func = new BigDecimalSum(); } else if ("avg(double)".equalsIgnoreCase(agg)) { func = new DoubleAvg(); } else if ("avg(float)".equalsIgnoreCase(agg)) { @@ -379,8 +339,6 @@ public class Over extends EvalFunc<DataB func = new IntAvg(); } else if ("avg(bytearray)".equalsIgnoreCase(agg)) { func = new AVG(); - } else if ("avg(bigdecimal)".equalsIgnoreCase(agg)) { - func = new BigDecimalAvg(); } else if ("min(double)".equalsIgnoreCase(agg)) { func = new DoubleMin(); } else if ("min(float)".equalsIgnoreCase(agg)) { @@ -393,8 +351,6 @@ public class Over extends EvalFunc<DataB func = new StringMin(); } else if ("min(bytearray)".equalsIgnoreCase(agg)) { func = new MIN(); - } else if ("min(bigdecimal)".equalsIgnoreCase(agg)) { - func = new BigDecimalMin(); } else if ("max(double)".equalsIgnoreCase(agg)) { func = new DoubleMax(); } else if ("max(float)".equalsIgnoreCase(agg)) { @@ -407,8 +363,6 @@ public class Over extends EvalFunc<DataB func = new StringMax(); } else if ("max(bytearray)".equalsIgnoreCase(agg)) { func = new MAX(); - } else if ("max(bigdecimal)".equalsIgnoreCase(agg)) { - func = new BigDecimalMax(); } else if ("row_number".equalsIgnoreCase(agg)) { func = new RowNumber(); } else if ("first_value".equalsIgnoreCase(agg)) { Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/SearchEngineExtractor.java URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/SearchEngineExtractor.java?rev=1784224&r1=1784223&r2=1784224&view=diff ============================================================================== --- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/SearchEngineExtractor.java (original) +++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/SearchEngineExtractor.java Fri Feb 24 03:34:37 2017 @@ -363,15 +363,6 @@ public class SearchEngineExtractor exten searchEngines.put("search.lycos.com", "Lycos"); searchEngines.put("search.msn.co.uk", "MSN UK"); searchEngines.put("search.msn.com", "MSN"); - searchEngines.put("bing.com", "Bing"); - searchEngines.put("ssl.bing.com", "Bing"); - searchEngines.put("cn.bing.com", "Bing China"); - searchEngines.put("br.bing.com", "Bing Brazil"); - searchEngines.put("it.bing.com", "Bing Italy"); - searchEngines.put("be.bing.com", "Bing Netherlands"); - searchEngines.put("uk.bing.com", "Bing UK"); - searchEngines.put("hk.bing.com", "Bing Hong Kong"); - searchEngines.put("nz.bing.com", "Bing New Zeland"); searchEngines.put("search.myway.com", "MyWay"); searchEngines.put("search.mywebsearch.com", "My Web Search"); searchEngines.put("search.ntlworld.com", "NTLWorld"); Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java?rev=1784224&r1=1784223&r2=1784224&view=diff ============================================================================== --- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java (original) +++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java Fri Feb 24 03:34:37 2017 @@ -16,11 +16,8 @@ package org.apache.pig.piggybank.evaluat import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; -import java.util.Iterator; import java.util.List; -import javax.xml.XMLConstants; -import javax.xml.namespace.NamespaceContext; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.xpath.XPathFactory; @@ -52,7 +49,8 @@ public class XPath extends EvalFunc<Stri private static boolean cache = true; private static boolean ignoreNamespace = true; - + public static final String EMPTY_STRING = ""; + /** * input should contain: 1) xml 2) xpath * 3) optional cache xml doc flag @@ -97,13 +95,8 @@ public class XPath extends EvalFunc<Stri return null; } - if(input.size() > 2) { + if(input.size() > 2) cache = (Boolean) input.get(2); - } - - if (input.size() > 3) { - ignoreNamespace = (Boolean) input.get(3); - } if (!cache || xpath == null || !xml.equals(this.xml)) { final InputSource source = new InputSource(new StringReader(xml)); @@ -111,7 +104,6 @@ public class XPath extends EvalFunc<Stri this.xml = xml; // track the xml for subsequent calls to this udf final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); - dbf.setNamespaceAware(!ignoreNamespace); final DocumentBuilder db = dbf.newDocumentBuilder(); this.document = db.parse(source); @@ -120,32 +112,14 @@ public class XPath extends EvalFunc<Stri this.xpath = xpathFactory.newXPath(); - if (!ignoreNamespace) { - xpath.setNamespaceContext(new NamespaceContext() { - @Override - public String getNamespaceURI(String prefix) { - if (prefix.equals(XMLConstants.DEFAULT_NS_PREFIX)) { - return document.lookupNamespaceURI(null); - } else { - return document.lookupNamespaceURI(prefix); - } - } - - @Override - public String getPrefix(String namespaceURI) { - return document.lookupPrefix(namespaceURI); - } - - @Override - public Iterator getPrefixes(String namespaceURI) { - return null; - } - }); - } } String xpathString = (String) input.get(1); + if (ignoreNamespace) { + xpathString = createNameSpaceIgnoreXpathString(xpathString); + } + final String value = xpath.evaluate(xpathString, document); return value; @@ -191,6 +165,34 @@ public class XPath extends EvalFunc<Stri } return true; } + + + /** + * Returns a new the xPathString by adding additional parameters + * in the existing xPathString for ignoring the namespace during compilation. + * + * @param String xpathString + * @return String modified xpathString + */ + private String createNameSpaceIgnoreXpathString(final String xpathString) { + final String QUERY_PREFIX = "//*"; + final String LOCAL_PREFIX = "[local-name()='"; + final String LOCAL_POSTFIX = "']"; + final String SPLITTER = "/"; + + try { + String xpathStringWithLocalName = EMPTY_STRING; + String[] individualNodes = xpathString.split(SPLITTER); + + for (String node : individualNodes) { + xpathStringWithLocalName = xpathStringWithLocalName.concat(QUERY_PREFIX + LOCAL_PREFIX + node + + LOCAL_POSTFIX); + } + return xpathStringWithLocalName; + } catch (Exception ex) { + return xpathString; + } + } /** * Returns argument schemas of the UDF. Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java?rev=1784224&r1=1784223&r2=1784224&view=diff ============================================================================== --- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java (original) +++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java Fri Feb 24 03:34:37 2017 @@ -580,7 +580,7 @@ public class CSVExcelStorage extends Pig } } else if (b == DOUBLE_QUOTE) { // Does a double quote immediately follow? - if ((i < recordLen-1) && (buf[i+1] == DOUBLE_QUOTE) && (fieldBuffer.position() != 0)) { + if ((i < recordLen-1) && (buf[i+1] == DOUBLE_QUOTE)) { fieldBuffer.put(b); nextTupleSkipChar = true; continue; Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/DBStorage.java URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/DBStorage.java?rev=1784224&r1=1784223&r2=1784224&view=diff ============================================================================== --- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/DBStorage.java (original) +++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/DBStorage.java Fri Feb 24 03:34:37 2017 @@ -91,7 +91,6 @@ public class DBStorage extends StoreFunc /** * Write the tuple to Database directly here. */ - @Override public void putNext(Tuple tuple) throws IOException { int sqlPos = 1; try { @@ -374,9 +373,4 @@ public class DBStorage extends StoreFunc p.setProperty(SCHEMA_SIGNATURE, s.toString()); } - @Override - public Boolean supportsParallelWriteToStoreLocation() { - return false; - } - } Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/IndexedStorage.java URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/IndexedStorage.java?rev=1784224&r1=1784223&r2=1784224&view=diff ============================================================================== --- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/IndexedStorage.java (original) +++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/IndexedStorage.java Fri Feb 24 03:34:37 2017 @@ -60,6 +60,7 @@ import org.apache.pig.data.TupleFactory; import org.apache.pig.impl.util.StorageUtil; import org.apache.pig.data.DataType; import org.apache.pig.data.DataByteArray; +import org.apache.pig.backend.hadoop.executionengine.shims.HadoopShims; /** * <code>IndexedStorage</code> is a form of <code>PigStorage</code> that supports a Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/MultiStorage.java URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/MultiStorage.java?rev=1784224&r1=1784223&r2=1784224&view=diff ============================================================================== --- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/MultiStorage.java (original) +++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/MultiStorage.java Fri Feb 24 03:34:37 2017 @@ -16,9 +16,7 @@ import java.io.ByteArrayOutputStream; import java.io.DataOutputStream; import java.io.IOException; import java.text.NumberFormat; -import java.util.ArrayList; import java.util.HashMap; -import java.util.List; import java.util.Map; import org.apache.hadoop.conf.Configuration; @@ -44,9 +42,6 @@ import org.apache.pig.backend.executione import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRConfiguration; import org.apache.pig.data.Tuple; import org.apache.pig.impl.util.StorageUtil; -import org.apache.xml.utils.StringBufferPool; - -import com.google.common.base.Strings; /** * The UDF is useful for splitting the output data into a bunch of directories @@ -78,21 +73,13 @@ import com.google.common.base.Strings; * If the output is compressed,then the sub directories and the output files will * be having the extension. Say for example in the above case if bz2 is used one file * will look like ;/my/home/output.bz2/a1.bz2/a1-0000.bz2 - * - * Key field can also be a comma separated list of indices e.g. '0,1' - in this case - * storage will be multi-level: - * /my/home/output/a1/b1/a1-b1-0000 - * /my/home/output/a1/b2/a1-b2-0000 - * There is also an option to leave key values out of storage, see isRemoveKeys. */ public class MultiStorage extends StoreFunc { - private static final String KEYFIELD_DELIMETER = ","; private Path outputPath; // User specified output Path - private final List<Integer> splitFieldIndices= new ArrayList<Integer>(); // Indices of the key fields + private int splitFieldIndex = -1; // Index of the key field private String fieldDel; // delimiter of the output record. private Compression comp; // Compression type of output data. - private boolean isRemoveKeys = false; // Compression types supported by this store enum Compression { @@ -108,14 +95,9 @@ public class MultiStorage extends StoreF this(parentPathStr, splitFieldIndex, compression, "\\t"); } - public MultiStorage(String parentPathStr, String splitFieldIndex, - String compression, String fieldDel) { - this(parentPathStr, splitFieldIndex, compression, fieldDel, "false"); - } - /** * Constructor - * + * * @param parentPathStr * Parent output dir path (this will be specified in store statement, * so MultiStorage don't use this parameter in reality. However, we don't @@ -126,26 +108,18 @@ public class MultiStorage extends StoreF * 'bz2', 'bz', 'gz' or 'none' * @param fieldDel * Output record field delimiter. - * @param isRemoveKeys - * Removes key columns from result during write. */ public MultiStorage(String parentPathStr, String splitFieldIndex, - String compression, String fieldDel, String isRemoveKeys) { - this.isRemoveKeys = Boolean.parseBoolean(isRemoveKeys); + String compression, String fieldDel) { this.outputPath = new Path(parentPathStr); - - String[] splitFieldIndices = splitFieldIndex.split(KEYFIELD_DELIMETER); - for (String splitFieldIndexString : splitFieldIndices){ - this.splitFieldIndices.add(Integer.parseInt(splitFieldIndexString)); - } - + this.splitFieldIndex = Integer.parseInt(splitFieldIndex); this.fieldDel = fieldDel; try { this.comp = (compression == null) ? Compression.none : Compression - .valueOf(compression.toLowerCase()); + .valueOf(compression.toLowerCase()); } catch (IllegalArgumentException e) { System.err.println("Exception when converting compression string: " - + compression + " to enum. No compression will be used"); + + compression + " to enum. No compression will be used"); this.comp = Compression.none; } } @@ -153,26 +127,22 @@ public class MultiStorage extends StoreF //-------------------------------------------------------------------------- // Implementation of StoreFunc - private RecordWriter<List<String>, Tuple> writer; + private RecordWriter<String, Tuple> writer; @Override public void putNext(Tuple tuple) throws IOException { - for (int splitFieldIndex : this.splitFieldIndices) { - if (tuple.size() <= splitFieldIndex) { - throw new IOException("split field index:" + splitFieldIndex - + " >= tuple size:" + tuple.size()); - } + if (tuple.size() <= splitFieldIndex) { + throw new IOException("split field index:" + this.splitFieldIndex + + " >= tuple size:" + tuple.size()); } - List<String> fields = new ArrayList<String>(); - for (int splitFieldIndex : this.splitFieldIndices){ - try { - fields.add(String.valueOf(tuple.get(splitFieldIndex))); - } catch (ExecException exec) { - throw new IOException(exec); - } + Object field = null; + try { + field = tuple.get(splitFieldIndex); + } catch (ExecException exec) { + throw new IOException(exec); } try { - writer.write(fields, tuple); + writer.write(String.valueOf(field), tuple); } catch (InterruptedException e) { throw new IOException(e); } @@ -183,9 +153,6 @@ public class MultiStorage extends StoreF public OutputFormat getOutputFormat() throws IOException { MultiStorageOutputFormat format = new MultiStorageOutputFormat(); format.setKeyValueSeparator(fieldDel); - if (this.isRemoveKeys){ - format.setSkipIndices(this.splitFieldIndices); - } return format; } @@ -207,33 +174,27 @@ public class MultiStorage extends StoreF FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); } } - - @Override - public Boolean supportsParallelWriteToStoreLocation() { - return false; - } - + //-------------------------------------------------------------------------- // Implementation of OutputFormat public static class MultiStorageOutputFormat extends - TextOutputFormat<List<String>, Tuple> { + TextOutputFormat<String, Tuple> { private String keyValueSeparator = "\\t"; private byte fieldDel = '\t'; - private List<Integer> skipIndices = null; - + @Override - public RecordWriter<List<String>, Tuple> + public RecordWriter<String, Tuple> getRecordWriter(TaskAttemptContext context ) throws IOException, InterruptedException { final TaskAttemptContext ctx = context; - return new RecordWriter<List<String>, Tuple>() { + return new RecordWriter<String, Tuple>() { - private Map<List<String>, MyLineRecordWriter> storeMap = - new HashMap<List<String>, MyLineRecordWriter>(); + private Map<String, MyLineRecordWriter> storeMap = + new HashMap<String, MyLineRecordWriter>(); private static final int BUFFER_SIZE = 1024; @@ -241,7 +202,7 @@ public class MultiStorage extends StoreF new ByteArrayOutputStream(BUFFER_SIZE); @Override - public void write(List<String> key, Tuple val) throws IOException { + public void write(String key, Tuple val) throws IOException { int sz = val.size(); for (int i = 0; i < sz; i++) { Object field; @@ -251,13 +212,9 @@ public class MultiStorage extends StoreF throw ee; } - boolean skipCurrentField = skipIndices != null && skipIndices.contains(i); + StorageUtil.putField(mOut, field); - if (!skipCurrentField) { - StorageUtil.putField(mOut, field); - } - - if (i != sz - 1 && !skipCurrentField) { + if (i != sz - 1) { mOut.write(fieldDel); } } @@ -274,17 +231,17 @@ public class MultiStorage extends StoreF } } - private MyLineRecordWriter getStore(List<String> fieldValues) throws IOException { - MyLineRecordWriter store = storeMap.get(fieldValues); + private MyLineRecordWriter getStore(String fieldValue) throws IOException { + MyLineRecordWriter store = storeMap.get(fieldValue); if (store == null) { - DataOutputStream os = createOutputStream(fieldValues); + DataOutputStream os = createOutputStream(fieldValue); store = new MyLineRecordWriter(os, keyValueSeparator); - storeMap.put(fieldValues, store); + storeMap.put(fieldValue, store); } return store; } - private DataOutputStream createOutputStream(List<String> fieldValues) throws IOException { + private DataOutputStream createOutputStream(String fieldValue) throws IOException { Configuration conf = ctx.getConfiguration(); TaskID taskId = ctx.getTaskAttemptID().getTaskID(); @@ -302,21 +259,7 @@ public class MultiStorage extends StoreF NumberFormat nf = NumberFormat.getInstance(); nf.setMinimumIntegerDigits(4); - StringBuffer pathStringBuffer = new StringBuffer(); - for (String fieldValue : fieldValues){ - String safeFieldValue = fieldValue.replaceAll("\\/","-"); - pathStringBuffer.append(safeFieldValue); - pathStringBuffer.append("/"); - } - pathStringBuffer.deleteCharAt(pathStringBuffer.length()-1); - String pathString = pathStringBuffer.toString(); - String idString = pathString.replaceAll("\\/","-"); - - if (!Strings.isNullOrEmpty(extension)){ - pathString = pathString.replaceAll("\\/",extension+"\\/"); - } - - Path path = new Path(pathString+extension, idString + '-' + Path path = new Path(fieldValue+extension, fieldValue + '-' + nf.format(taskId.getId())+extension); Path workOutputPath = ((FileOutputCommitter)getOutputCommitter(ctx)).getWorkPath(); Path file = new Path(workOutputPath, path); @@ -336,12 +279,8 @@ public class MultiStorage extends StoreF keyValueSeparator = sep; fieldDel = StorageUtil.parseFieldDel(keyValueSeparator); } - - public void setSkipIndices(List<Integer> skipIndices) { - this.skipIndices = skipIndices; - } - - //------------------------------------------------------------------------ + + //------------------------------------------------------------------------ // protected static class MyLineRecordWriter Modified: pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/evaluation/TestOver.java URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/evaluation/TestOver.java?rev=1784224&r1=1784223&r2=1784224&view=diff ============================================================================== --- pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/evaluation/TestOver.java (original) +++ pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/evaluation/TestOver.java Fri Feb 24 03:34:37 2017 @@ -18,11 +18,12 @@ package org.apache.pig.piggybank.evaluation; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; -import java.math.BigDecimal; import java.util.Iterator; +import java.util.List; import java.util.Random; import org.apache.pig.backend.executionengine.ExecException; @@ -33,6 +34,8 @@ import org.apache.pig.data.DataType; import org.apache.pig.data.Tuple; import org.apache.pig.data.TupleFactory; import org.apache.pig.impl.logicalLayer.schema.Schema; + +import org.junit.Before; import org.junit.Test; public class TestOver { @@ -63,25 +66,11 @@ public class TestOver { out = func.outputSchema(in); assertEquals("{org.apache.pig.piggybank.evaluation.over_3: {result: double}}", out.toString()); - // bigdecimal - func = new Over("BIGDECIMAL"); - in = Schema.generateNestedSchema(DataType.BAG, DataType.INTEGER); - out = func.outputSchema(in); - assertEquals("{org.apache.pig.piggybank.evaluation.over_4: {result: bigdecimal}}", out.toString()); - // named func = new Over("bob:chararray"); in = Schema.generateNestedSchema(DataType.BAG, DataType.INTEGER); out = func.outputSchema(in); - assertEquals("{org.apache.pig.piggybank.evaluation.over_5: {bob: chararray}}", out.toString()); - - - // Search inner alias and type - func = new Over("true"); - in = Schema.generateNestedSchema(DataType.BAG, DataType.BIGDECIMAL); - in.getField(0).schema.getField(0).alias="test"; - out = func.outputSchema(in); - assertEquals("{org.apache.pig.piggybank.evaluation.over_6: {test_over: bigdecimal}}", out.toString()); + assertEquals("{org.apache.pig.piggybank.evaluation.over_4: {bob: chararray}}", out.toString()); } @Test @@ -408,28 +397,6 @@ public class TestOver { assertEquals(new Long(10), to.get(0)); } } - - @Test - public void testSumBigDecimal() throws Exception { - Over func = new Over(); - DataBag inbag = BagFactory.getInstance().newDefaultBag(); - for (int i = 0; i < 10; i++) { - Tuple t = TupleFactory.getInstance().newTuple(1); - t.set(0, new BigDecimal(1)); - inbag.add(t); - } - Tuple t = TupleFactory.getInstance().newTuple(4); - t.set(0, inbag); - t.set(1, "sum(bigdecimal)"); - t.set(2, -1); - t.set(3, -1); - DataBag outbag = func.exec(t); - assertEquals(10, outbag.size()); - for (Tuple to : outbag) { - assertEquals(1, to.size()); - assertEquals(new BigDecimal(10), to.get(0)); - } - } @Test public void testAvgDouble() throws Exception { @@ -542,29 +509,6 @@ public class TestOver { } @Test - public void testAvgBigDecimal() throws Exception { - Over func = new Over(); - DataBag inbag = BagFactory.getInstance().newDefaultBag(); - for (int i = 0; i < 10; i++) { - Tuple t = TupleFactory.getInstance().newTuple(1); - t.set(0, new BigDecimal(i)); - inbag.add(t); - } - Tuple t = TupleFactory.getInstance().newTuple(4); - t.set(0, inbag); - t.set(1, "avg(bigdecimal)"); - t.set(2, -1); - t.set(3, -1); - DataBag outbag = func.exec(t); - assertEquals(10, outbag.size()); - for (Tuple to : outbag) { - assertEquals(1, to.size()); - assertEquals(new BigDecimal(4.5), to.get(0)); - } - } - - - @Test public void testMinDouble() throws Exception { Over func = new Over(); DataBag inbag = BagFactory.getInstance().newDefaultBag(); @@ -683,26 +627,6 @@ public class TestOver { assertEquals("0", to.get(0)); } } - - @Test - public void testMinBigDecimal() throws Exception { - Over func = new Over(); - DataBag inbag = BagFactory.getInstance().newDefaultBag(); - for (int i = 0; i < 10; i++) { - Tuple t = TupleFactory.getInstance().newTuple(1); - t.set(0, new BigDecimal(i)); - inbag.add(t); - } - Tuple t = TupleFactory.getInstance().newTuple(2); - t.set(0, inbag); - t.set(1, "min(bigdecimal)"); - DataBag outbag = func.exec(t); - assertEquals(10, outbag.size()); - for (Tuple to : outbag) { - assertEquals(1, to.size()); - assertEquals(new BigDecimal(0), to.get(0)); - } - } @Test public void testMaxDouble() throws Exception { @@ -830,28 +754,6 @@ public class TestOver { assertEquals("9", to.get(0)); } } - - @Test - public void testMaxBigDecimal() throws Exception { - Over func = new Over(); - DataBag inbag = BagFactory.getInstance().newDefaultBag(); - for (int i = 0; i < 10; i++) { - Tuple t = TupleFactory.getInstance().newTuple(1); - t.set(0, new BigDecimal(i)); - inbag.add(t); - } - Tuple t = TupleFactory.getInstance().newTuple(2); - t.set(0, inbag); - t.set(1, "max(bigdecimal)"); - DataBag outbag = func.exec(t); - assertEquals(10, outbag.size()); - int count = 0; - for (Tuple to : outbag) { - assertEquals(1, to.size()); - assertEquals(new BigDecimal(count++), to.get(0)); - } - } - @Test public void testRowNumber() throws Exception { Modified: pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/xml/XPathTest.java URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/xml/XPathTest.java?rev=1784224&r1=1784223&r2=1784224&view=diff ============================================================================== --- pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/xml/XPathTest.java (original) +++ pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/xml/XPathTest.java Fri Feb 24 03:34:37 2017 @@ -151,95 +151,6 @@ public class XPathTest { } @Test - public void testExecTupleWithDontIgnoreNamespace() throws Exception { - - final XPath xpath = new XPath(); - - final Tuple tuple = mock(Tuple.class); - - when(tuple.get(0)).thenReturn("<?xml version='1.0'?>\n" + - "<foo:document xmlns:foo=\"http://apache.org/foo\" xmlns:bar=\"http://apache.org/bar\">" + - "<bar:element>MyBar</bar:element>" + - "</foo:document>"); - - when(tuple.size()).thenReturn(4); - when(tuple.get(2)).thenReturn(true); - when(tuple.get(3)).thenReturn(false); - - when(tuple.get(1)).thenReturn("/foo:document/bar:element"); - assertEquals("MyBar", xpath.exec(tuple)); - - } - - @Test - public void testExecTupleWithDontIgnoreNamespace() throws Exception { - - final XPath xpath = new XPath(); - - final Tuple tuple = mock(Tuple.class); - - when(tuple.get(0)).thenReturn("<?xml version='1.0'?>\n" + - "<foo:document xmlns:foo=\"http://apache.org/foo\" xmlns:bar=\"http://apache.org/bar\">" + - "<bar:element>MyBar</bar:element>" + - "</foo:document>"); - - when(tuple.size()).thenReturn(4); - when(tuple.get(2)).thenReturn(true); - when(tuple.get(3)).thenReturn(false); - - when(tuple.get(1)).thenReturn("/foo:document/bar:element"); - assertEquals("MyBar", xpath.exec(tuple)); - - } - - @Test - public void testExecTupleWithDontIgnoreNamespace() throws Exception { - - final XPath xpath = new XPath(); - - final Tuple tuple = mock(Tuple.class); - - when(tuple.get(0)).thenReturn("<?xml version='1.0'?>\n" + - "<foo:document xmlns:foo=\"http://apache.org/foo\" xmlns:bar=\"http://apache.org/bar\">" + - "<bar:element>MyBar</bar:element>" + - "</foo:document>"); - - when(tuple.size()).thenReturn(4); - when(tuple.get(2)).thenReturn(true); - when(tuple.get(3)).thenReturn(false); - - when(tuple.get(1)).thenReturn("/foo:document/bar:element"); - assertEquals("MyBar", xpath.exec(tuple)); - - } - - - @Test - public void testFunctionInXPath() throws Exception { - - final XPath xpath = new XPath(); - - final Tuple tuple = mock(Tuple.class); - - when(tuple.get(0)).thenReturn("<Aa name=\"test1\">" + - "<Bb Cc=\"1\"/>" + - "<Bb Cc=\"1\"/>" + - "<Bb Cc=\"1\"/>" + - "<Bb Cc=\"1\"/>" + - "<Dd>test2</Dd>" + - "</Aa>"); - - when(tuple.size()).thenReturn(4); - when(tuple.get(1)).thenReturn("sum(Aa/Bb/@Cc)"); - when(tuple.get(2)).thenReturn(true); - when(tuple.get(3)).thenReturn(true); - - assertEquals("4", xpath.exec(tuple)); - - } - - - @Test public void testExecTupleWithElementNodeWithComplexNameSpace() throws Exception { final XPath xpath = new XPath(); @@ -299,31 +210,7 @@ public class XPathTest { assertEquals("4 stars3.5 stars4 stars4.2 stars3.5 stars", xpath.exec(tuple)); } - - @Test - public void testFunctionInXPath() throws Exception { - - final XPath xpath = new XPath(); - - final Tuple tuple = mock(Tuple.class); - - when(tuple.get(0)).thenReturn("<Aa name=\"test1\">" + - "<Bb Cc=\"1\"/>" + - "<Bb Cc=\"1\"/>" + - "<Bb Cc=\"1\"/>" + - "<Bb Cc=\"1\"/>" + - "<Dd>test2</Dd>" + - "</Aa>"); - - when(tuple.size()).thenReturn(4); - when(tuple.get(1)).thenReturn("sum(Aa/Bb/@Cc)"); - when(tuple.get(2)).thenReturn(true); - when(tuple.get(3)).thenReturn(true); - - assertEquals("4", xpath.exec(tuple)); - - } - + @Ignore //--optional test @Test public void testCacheBenefit() throws Exception{ Modified: pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java?rev=1784224&r1=1784223&r2=1784224&view=diff ============================================================================== --- pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java (original) +++ pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java Fri Feb 24 03:34:37 2017 @@ -218,7 +218,7 @@ public class TestCSVExcelStorage { Util.registerMultiLineQuery(pig, script); Iterator<Tuple> it = pig.openIterator("a"); Assert.assertEquals(Util.createTuple(new String[] {"foo,\"bar\",baz"}), it.next()); - Assert.assertEquals(Util.createTuple(new String[] {"\"\"\""}), it.next()); + Assert.assertEquals(Util.createTuple(new String[] {"\"\"\"\""}), it.next()); } // Handle newlines in fields Modified: pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestLogFormatLoader.java URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestLogFormatLoader.java?rev=1784224&r1=1784223&r2=1784224&view=diff ============================================================================== --- pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestLogFormatLoader.java (original) +++ pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestLogFormatLoader.java Fri Feb 24 03:34:37 2017 @@ -109,7 +109,7 @@ public class TestLogFormatLoader { Tuple actual = out.get(0); Tuple expected = tuple( "2001:980:91c0:1:8d31:a232:25e5:85d", - "05/Sep/2010:11:27:50 +0200", + "[05/Sep/2010:11:27:50 +0200]", "koken-pannen_303_hs-koken-pannen-afj-120601_B3_product_1_9200000002876066", map( "promo" , "koken-pannen_303_hs-koken-pannen-afj-120601_B3_product_1_9200000002876066", Modified: pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestMultiStorage.java URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestMultiStorage.java?rev=1784224&r1=1784223&r2=1784224&view=diff ============================================================================== --- pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestMultiStorage.java (original) +++ pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestMultiStorage.java Fri Feb 24 03:34:37 2017 @@ -18,41 +18,34 @@ import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; -import java.util.List; -import java.util.Map; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; +import org.apache.pig.ExecType; import org.apache.pig.PigServer; -import org.apache.pig.backend.executionengine.ExecJob; -import org.apache.pig.test.MiniGenericCluster; +import org.apache.pig.backend.executionengine.ExecException; +import org.apache.pig.test.MiniCluster; import org.apache.pig.test.Util; -import org.apache.pig.tools.pigstats.JobStats; -import org.apache.pig.tools.pigstats.OutputStats; -import org.apache.pig.tools.pigstats.PigStats; -import org.apache.pig.tools.pigstats.mapreduce.SimplePigStats; import org.junit.After; -import org.junit.AfterClass; import org.junit.Before; import org.junit.Test; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import junit.framework.Assert; +import junit.framework.TestCase; -public class TestMultiStorage { +public class TestMultiStorage extends TestCase { private static final String INPUT_FILE = "MultiStorageInput.txt"; private PigServer pigServer; private PigServer pigServerLocal; - private static MiniGenericCluster cluster = MiniGenericCluster.buildCluster(); + private MiniCluster cluster = MiniCluster.buildCluster(); - public TestMultiStorage() throws Exception { - pigServer = new PigServer(cluster.getExecType(), cluster.getProperties()); - pigServerLocal = new PigServer(Util.getLocalTestMode()); + public TestMultiStorage() throws ExecException, IOException { + pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); + pigServerLocal = new PigServer(ExecType.LOCAL); } public static final PathFilter hiddenPathFilter = new PathFilter() { @@ -81,83 +74,59 @@ public class TestMultiStorage { Util.copyFromLocalToCluster(cluster, INPUT_FILE, INPUT_FILE); } + @Override @Before public void setUp() throws Exception { createFile(); FileSystem fs = FileSystem.getLocal(new Configuration()); Path localOut = new Path("local-out"); + Path dummy = new Path("dummy"); if (fs.exists(localOut)) { fs.delete(localOut, true); } + if (fs.exists(dummy)) { + fs.delete(dummy, true); + } } + @Override @After public void tearDown() throws Exception { new File(INPUT_FILE).delete(); Util.deleteFile(cluster, INPUT_FILE); - } - - @AfterClass - public static void shutdown() { cluster.shutDown(); } enum Mode { local, cluster - } + }; @Test public void testMultiStorage() throws IOException { final String LOAD = "A = LOAD '" + INPUT_FILE + "' as (id, name, n);"; final String MULTI_STORE_CLUSTER = "STORE A INTO 'mr-out' USING " + "org.apache.pig.piggybank.storage.MultiStorage('mr-out', '1');"; - final String MULTI_STORE_LOCAL = "STORE A INTO 'local-out' USING " + final String MULTI_STORE_LOCAL = "STORE A INTO 'dummy' USING " + "org.apache.pig.piggybank.storage.MultiStorage('local-out', '1');"; System.out.print("Testing in LOCAL mode: ..."); - testMultiStorage(Mode.local, "local-out", LOAD, MULTI_STORE_LOCAL); + //testMultiStorage(Mode.local, "local-out", LOAD, MULTI_STORE_LOCAL); System.out.println("Succeeded!"); - + System.out.print("Testing in CLUSTER mode: ..."); testMultiStorage( Mode.cluster, "mr-out", LOAD, MULTI_STORE_CLUSTER); System.out.println("Succeeded!"); + + } - @Test - public void testOutputStats() throws IOException { - FileSystem fs = cluster.getFileSystem(); - - pigServer.setBatchOn(); - pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "' as (id, name, n);"); - pigServer.registerQuery("B = FILTER A BY name == 'apple';"); - pigServer.registerQuery("STORE A INTO 'out1' USING org.apache.pig.piggybank.storage.MultiStorage('out1', '1');"); //153 bytes - pigServer.registerQuery("STORE B INTO 'out2' USING org.apache.pig.piggybank.storage.MultiStorage('out2', '1');"); // 45 bytes - - ExecJob job = pigServer.executeBatch().get(0); - - PigStats stats = job.getStatistics(); - PigStats.JobGraph jobGraph = stats.getJobGraph(); - JobStats jobStats = (JobStats) jobGraph.getSinks().get(0); - Map<String, Long> multiStoreCounters = jobStats.getMultiStoreCounters(); - List<OutputStats> outputStats = SimplePigStats.get().getOutputStats(); - OutputStats outputStats1 = "out1".equals(outputStats.get(0).getName()) ? outputStats.get(0) : outputStats.get(1); - OutputStats outputStats2 = "out2".equals(outputStats.get(0).getName()) ? outputStats.get(0) : outputStats.get(1); - - assertEquals(153 + 45, stats.getBytesWritten()); - assertEquals(2, outputStats.size()); // 2 split conditions - assertEquals(153, outputStats1.getBytes()); - assertEquals(45, outputStats2.getBytes()); - assertEquals(9, outputStats1.getRecords()); - assertEquals(3, outputStats2.getRecords()); - assertEquals(3L, multiStoreCounters.get("Output records in _1_out2").longValue()); - assertEquals(9L, multiStoreCounters.get("Output records in _0_out1").longValue()); - - fs.delete(new Path("out1"), true); - fs.delete(new Path("out2"), true); - } - - /** - * The actual method that run the test in local or cluster mode. + /** + * The actual method that run the test in local or cluster mode. + * + * @param pigServer + * @param mode + * @param queries + * @throws IOException */ private void testMultiStorage( Mode mode, String outPath, String... queries) throws IOException { @@ -173,38 +142,42 @@ public class TestMultiStorage { /** * Test if records are split into directories corresponding to split field * values + * + * @param mode + * @throws IOException */ private void verifyResults(Mode mode, String outPath) throws IOException { FileSystem fs = (Mode.local == mode ? FileSystem .getLocal(new Configuration()) : cluster.getFileSystem()); Path output = new Path(outPath); - assertTrue("Output dir does not exists!", fs.exists(output) + Assert.assertTrue("Output dir does not exists!", fs.exists(output) && fs.getFileStatus(output).isDir()); Path[] paths = FileUtil.stat2Paths(fs.listStatus(output, hiddenPathFilter)); - assertTrue("Split field dirs not found!", paths != null); + Assert.assertTrue("Split field dirs not found!", paths != null); for (Path path : paths) { String splitField = path.getName(); Path[] files = FileUtil.stat2Paths(fs.listStatus(path, hiddenPathFilter)); - assertTrue("No files found for path: " + path.toUri().getPath(), + Assert.assertTrue("No files found for path: " + path.toUri().getPath(), files != null); for (Path filePath : files) { - assertTrue("This shouldn't be a directory", fs.isFile(filePath)); + Assert.assertTrue("This shouldn't be a directory", fs.isFile(filePath)); + BufferedReader reader = new BufferedReader(new InputStreamReader(fs .open(filePath))); String line = ""; int count = 0; while ((line = reader.readLine()) != null) { String[] fields = line.split("\\t"); - assertEquals(fields.length, 3); - assertEquals("Unexpected field value in the output record", + Assert.assertEquals(fields.length, 3); + Assert.assertEquals("Unexpected field value in the output record", splitField, fields[1]); count++; System.out.println("field: " + fields[1]); - } + } reader.close(); - assertEquals(count, 3); + Assert.assertEquals(count, 3); } } } Modified: pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestMultiStorageCompression.java URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestMultiStorageCompression.java?rev=1784224&r1=1784223&r2=1784224&view=diff ============================================================================== --- pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestMultiStorageCompression.java (original) +++ pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestMultiStorageCompression.java Fri Feb 24 03:34:37 2017 @@ -21,14 +21,11 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; import java.util.Collections; import java.util.List; -import java.util.Set; import junit.framework.TestCase; -import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.compress.BZip2Codec; @@ -40,10 +37,6 @@ import org.apache.pig.backend.executione import org.apache.pig.impl.logicalLayer.FrontendException; import org.apache.pig.test.Util; -import com.google.common.collect.Sets; - -import org.junit.Assert; - public class TestMultiStorageCompression extends TestCase { private static String patternString = "(\\d+)!+(\\w+)~+(\\w+)"; @@ -66,8 +59,8 @@ public class TestMultiStorageCompression filesToDelete.add(outputPath); try { - runQuery(outputPath, "0", type); - verifyResults(type, outputPath); + runQuery(outputPath, type); + verifyResults(type, filesToDelete, outputPath); } finally { cleanUpDirs(filesToDelete); } @@ -84,22 +77,22 @@ public class TestMultiStorageCompression filesToDelete.add(outputPath); try { - runQuery(outputPath, "0", type); - verifyResults(type, outputPath); + runQuery(outputPath, type); + verifyResults(type, filesToDelete, outputPath); } finally { cleanUpDirs(filesToDelete); } } - private void cleanUpDirs(List<String> filesToDelete) throws IOException { + private void cleanUpDirs(List<String> filesToDelete) { // Delete files recursively Collections.reverse(filesToDelete); for (String string : filesToDelete) - FileUtils.deleteDirectory(new File(string)); + new File(string).delete(); } - private void verifyResults(String type, + private void verifyResults(String type, List<String> filesToDelete, String outputPath) throws IOException, FileNotFoundException { // Verify the output File outputDir = new File(outputPath); @@ -121,10 +114,12 @@ public class TestMultiStorageCompression continue; String topFolder = outputPath + File.separator + indexFolder; File indexFolderFile = new File(topFolder); + filesToDelete.add(topFolder); String[] list = indexFolderFile.list(); for (String outputFile : list) { String file = topFolder + File.separator + outputFile; + filesToDelete.add(file); // Skip off any file starting with . if (outputFile.startsWith(".")) @@ -164,7 +159,7 @@ public class TestMultiStorageCompression } } - private void runQuery(String outputPath, String keyColIndices, String compressionType) + private void runQuery(String outputPath, String compressionType) throws Exception, ExecException, IOException, FrontendException { // create a data file @@ -177,7 +172,7 @@ public class TestMultiStorageCompression String query2 = "STORE A INTO '" + Util.encodeEscape(outputPath) + "' USING org.apache.pig.piggybank.storage.MultiStorage" + "('" - + Util.encodeEscape(outputPath) + "','"+keyColIndices+"', '" + compressionType + "', '\\t');"; + + Util.encodeEscape(outputPath) + "','0', '" + compressionType + "', '\\t');"; // Run Pig pig.setBatchOn(); @@ -187,32 +182,5 @@ public class TestMultiStorageCompression pig.executeBatch(); } - public void testMultiStorageShouldSupportMultiLevelAndGz() throws Exception { - String type = "gz"; - String outputDir = "output001.multi." + type; - List<String> filesToDelete = new ArrayList<String>(); - - String tmpDir = System.getProperty("java.io.tmpdir"); - String outputPath = tmpDir + File.separator + outputDir; - - filesToDelete.add(outputPath); - try { - runQuery(outputPath, "1,0", type); - Collection<File> fileList = FileUtils.listFiles(new File(outputPath),null,true); - Set<String> expectedPaths = Sets.newHashSet( "output001.multi.gz/a.gz/f1.gz/a-f1-0,000.gz", - "output001.multi.gz/b.gz/f2.gz/b-f2-0,000.gz", - "output001.multi.gz/c.gz/f3.gz/c-f3-0,000.gz", - "output001.multi.gz/d.gz/f4.gz/d-f4-0,000.gz"); - for (File file : fileList){ - String foundPath = file.getAbsolutePath().substring(file.getAbsolutePath().indexOf(outputDir)); - if (expectedPaths.contains(foundPath)){ - expectedPaths.remove(foundPath); - } - } - Assert.assertTrue(expectedPaths.isEmpty()); - } finally { - cleanUpDirs(filesToDelete); - } - } } Modified: pig/branches/spark/ivy.xml URL: http://svn.apache.org/viewvc/pig/branches/spark/ivy.xml?rev=1784224&r1=1784223&r2=1784224&view=diff ============================================================================== --- pig/branches/spark/ivy.xml (original) +++ pig/branches/spark/ivy.xml Fri Feb 24 03:34:37 2017 @@ -38,8 +38,10 @@ <conf name="jdiff" visibility="private"/> <conf name="checkstyle" visibility="private"/> <conf name="buildJar" extends="compile,test" visibility="private"/> - <conf name="hadoop2" visibility="private"/> - <conf name="hbase1" visibility="private"/> + <conf name="hadoop20" visibility="private"/> + <conf name="hadoop23" visibility="private"/> + <conf name="hbase94" visibility="private"/> + <conf name="hbase95" visibility="private"/> <conf name="spark" visibility="private" /> </configurations> <publications> @@ -59,17 +61,17 @@ <dependency org="commons-beanutils" name="commons-beanutils-core" rev="${commons-beanutils.version}" conf="checkstyle->master"/> <dependency org="xmlenc" name="xmlenc" rev="${xmlenc.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="com.sun.jersey" name="jersey-bundle" rev="${jersey.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="com.sun.jersey" name="jersey-server" rev="${jersey.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="com.sun.jersey.contribs" name="jersey-guice" rev="${jersey.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="commons-codec" name="commons-codec" rev="${commons-codec.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="commons-httpclient" name="commons-httpclient" rev="${commons-httpclient.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="commons-el" name="commons-el" rev="${commons-el.version}" conf="compile->master"/> <dependency org="commons-io" name="commons-io" rev="${commons-io.version}" @@ -87,86 +89,92 @@ <dependency org="nl.basjes.parse" name="parser-core" rev="${basjes-httpdlog-pigloader.version}" conf="compile->master"/> <dependency org="commons-configuration" name="commons-configuration" rev="${commons-configuration.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="commons-collections" name="commons-collections" rev="${commons-collections.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="javax.servlet" name="servlet-api" rev="${servlet-api.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="javax.ws.rs" name="jsr311-api" rev="${jsr311-api.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="com.google.protobuf" name="protobuf-java" rev="${protobuf-java.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="javax.inject" name="javax.inject" rev="${javax-inject.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="javax.xml.bind" name="jaxb-api" rev="${jaxb-api.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="com.sun.xml.bind" name="jaxb-impl" rev="${jaxb-impl.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="com.google.inject" name="guice" rev="${guice.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="com.google.inject.extensions" name="guice-servlet" rev="${guice-servlet.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="aopalliance" name="aopalliance" rev="${aopalliance.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="org.mortbay.jetty" name="jsp-2.1" rev="${jasper.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="org.mortbay.jetty" name="jsp-api-2.1" rev="${jasper.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="log4j" name="log4j" rev="${log4j.version}" conf="compile->master"/> - <dependency org="org.apache.hadoop" name="hadoop-annotations" - rev="${hadoop-common.version}" conf="hadoop2->master"/> + <dependency org="com.sun.jersey" name="jersey-core" rev="${jersey-core.version}" + conf="hadoop20->default"/> + <dependency org="org.apache.hadoop" name="hadoop-core" rev="${hadoop-core.version}" + conf="hadoop20->default"/> + <dependency org="org.apache.hadoop" name="hadoop-test" rev="${hadoop-test.version}" + conf="hadoop20->default"/> + <dependency org="org.apache.hadoop" name="hadoop-annotations" + rev="${hadoop-common.version}" conf="hadoop23->master"/> <dependency org="org.apache.hadoop" name="hadoop-auth" - rev="${hadoop-common.version}" conf="hadoop2->master"/> + rev="${hadoop-common.version}" conf="hadoop23->master"/> <dependency org="org.apache.hadoop" name="hadoop-common" - rev="${hadoop-common.version}" conf="hadoop2->master"> + rev="${hadoop-common.version}" conf="hadoop23->master"> <artifact name="hadoop-common" ext="jar" /> <artifact name="hadoop-common" type="tests" ext="jar" m:classifier="tests" /> </dependency> <dependency org="org.apache.hadoop" name="hadoop-hdfs" - rev="${hadoop-hdfs.version}" conf="hadoop2->master"> + rev="${hadoop-hdfs.version}" conf="hadoop23->master"> <artifact name="hadoop-hdfs" ext="jar" /> <artifact name="hadoop-hdfs" type="tests" ext="jar" m:classifier="tests" /> </dependency> <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-core" rev="${hadoop-mapreduce.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-jobclient" rev="${hadoop-mapreduce.version}" - conf="hadoop2->master"> + conf="hadoop23->master"> <artifact name="hadoop-mapreduce-client-jobclient" ext="jar" /> <artifact name="hadoop-mapreduce-client-jobclient" type="tests" ext="jar" m:classifier="tests"/> <exclude org="commons-daemon" module="commons-daemon"/><!--bad POM--> <exclude org="org.apache.commons" module="commons-daemon"/><!--bad POM--> </dependency> <dependency org="org.apache.hadoop" name="hadoop-yarn-server-tests" rev="${hadoop-mapreduce.version}" - conf="hadoop2->master"> + conf="hadoop23->master"> <artifact name="hadoop-yarn-server-tests" type="jar" m:classifier="tests"/> </dependency> <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-app" rev="${hadoop-mapreduce.version}" - conf="hadoop2->master" /> + conf="hadoop23->master" /> <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-shuffle" rev="${hadoop-mapreduce.version}" - conf="hadoop2->master" /> + conf="hadoop23->master" /> <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-common" - rev="${hadoop-mapreduce.version}" conf="hadoop2->master"/> + rev="${hadoop-mapreduce.version}" conf="hadoop23->master"/> <dependency org="org.apache.hadoop" name="hadoop-yarn-api" - rev="${hadoop-mapreduce.version}" conf="hadoop2->master"/> + rev="${hadoop-mapreduce.version}" conf="hadoop23->master"/> <dependency org="org.apache.hadoop" name="hadoop-yarn-common" - rev="${hadoop-mapreduce.version}" conf="hadoop2->master"/> + rev="${hadoop-mapreduce.version}" conf="hadoop23->master"/> <dependency org="org.apache.hadoop" name="hadoop-yarn-server" - rev="${hadoop-mapreduce.version}" conf="hadoop2->master"/> + rev="${hadoop-mapreduce.version}" conf="hadoop23->master"/> <dependency org="org.apache.hadoop" name="hadoop-yarn-server-web-proxy" - rev="${hadoop-mapreduce.version}" conf="hadoop2->master"/> + rev="${hadoop-mapreduce.version}" conf="hadoop23->master"/> <dependency org="org.apache.hadoop" name="hadoop-yarn-server-common" - rev="${hadoop-mapreduce.version}" conf="hadoop2->master"/> + rev="${hadoop-mapreduce.version}" conf="hadoop23->master"/> <dependency org="org.apache.hadoop" name="hadoop-yarn-server-nodemanager" - rev="${hadoop-mapreduce.version}" conf="hadoop2->master"/> + rev="${hadoop-mapreduce.version}" conf="hadoop23->master"/> <dependency org="org.apache.hadoop" name="hadoop-yarn-server-resourcemanager" - rev="${hadoop-mapreduce.version}" conf="hadoop2->master"/> + rev="${hadoop-mapreduce.version}" conf="hadoop23->master"/> <dependency org="org.apache.hadoop" name="hadoop-yarn-client" - rev="${hadoop-mapreduce.version}" conf="hadoop2->master"/> + rev="${hadoop-mapreduce.version}" conf="hadoop23->master"/> <dependency org="org.apache.hadoop" name="hadoop-yarn-server-applicationhistoryservice" - rev="${hadoop-mapreduce.version}" conf="hadoop2->master"/> + rev="${hadoop-mapreduce.version}" conf="hadoop23->master"/> <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-hs" - rev="${hadoop-mapreduce.version}" conf="hadoop2->master"/> + rev="${hadoop-mapreduce.version}" conf="hadoop23->master"/> <dependency org="org.mortbay.jetty" name="jetty" rev="${jetty.version}" conf="compile->master"> <artifact name="jetty" ext="jar" /> @@ -185,7 +193,13 @@ <exclude org="org.codehaus.jackson" module="jackson-mapper-asl"/> </dependency> <dependency org="org.apache.avro" name="avro-mapred" rev="${avro.version}" - conf="hadoop2->default;checkstyle->master"> + conf="hadoop20->default;checkstyle->master"> + <exclude org="org.codehaus.jackson" module="jackson-core-asl"/> + <exclude org="org.codehaus.jackson" module="jackson-mapper-asl"/> + <exclude org="io.netty" module="netty"/> + </dependency> + <dependency org="org.apache.avro" name="avro-mapred" rev="${avro.version}" + conf="hadoop23->default;checkstyle->master"> <artifact name="avro-mapred" type="jar" m:classifier="hadoop2"/> <exclude org="org.codehaus.jackson" module="jackson-core-asl"/> <exclude org="org.codehaus.jackson" module="jackson-mapper-asl"/> @@ -247,14 +261,37 @@ <dependency org="org.antlr" name="ST4" rev="${stringtemplate.version}" conf="compile->default"/> <dependency org="org.apache.zookeeper" name="zookeeper" rev="${zookeeper.version}" conf="compile->master"/> <dependency org="io.netty" name="netty" rev="${netty.version}" conf="test->master"/> - <dependency org="io.netty" name="netty-all" rev="${netty-all.version}" conf="test->master" /> <dependency org="dk.brics.automaton" name="automaton" rev="1.11-8" conf="compile->default"/> <dependency org="org.jruby" name="jruby-complete" rev="${jruby.version}" conf="compile->master"/> <dependency org="asm" name="asm" rev="${asm.version}" conf="compile->default"/> + <!-- HBase dependency in format for releases up to 0.94 (including) --> + <dependency org="org.apache.hbase" name="hbase" rev="${hbase94.version}" conf="hbase94->master"> + <artifact name="hbase" type="jar"/> + <artifact name="hbase" type="test-jar" ext="jar" m:classifier="tests"/> + <exclude org="org.apache.thrift" module="thrift"/> + <exclude org="org.apache.hadoop" module="hadoop-core"/> + <exclude org="org.apache.ant" module="ant" /> + <exclude org="org.slf4j" module="slf4j"/> + <exclude org="org.slf4j" module="slf4j-api"/> + <exclude org="org.slf4j" module="slf4j-log4j12" /> + <exclude org="org.slf4j" module="log4j12"/> + <exclude org="org.slf4j" module="log4j-over-slf4j"/> + <exclude org="stax" module="stax-api" /> + <exclude org="javax.xml.bind" module="jaxb-api" /> + <exclude org="javax.ws.rs" module="jsr311-api" /> + <exclude org="tomcat" module="jasper-runtime"/> + <exclude org="tomcat" module="jasper-compiler"/> + <exclude org="com.google.protobuf" module="protobuf-java"/> + <exclude org="com.sun.jersey" module="jersey-core"/> + <exclude org="com.sun.jersey" module="jersey-server"/> + <exclude org="com.sun.jersey" module="jersey-json"/> + <exclude org="asm" module="asm"/> + </dependency> + <!-- HBase dependency in format for releases higher or equal to 0.95 --> - <dependency org="org.apache.hbase" name="hbase-client" rev="${hbase1.version}" conf="hbase1->master"> + <dependency org="org.apache.hbase" name="hbase-client" rev="${hbase95.version}" conf="hbase95->master"> <artifact name="hbase-client" type="jar"/> <artifact name="hbase-client" type="test-jar" ext="jar" m:classifier="tests"/> <exclude org="org.slf4j" module="slf4j-api"/> @@ -270,7 +307,7 @@ <exclude org="asm" module="asm"/> </dependency> - <dependency org="org.apache.hbase" name="hbase-common" rev="${hbase1.version}" conf="hbase1->master"> + <dependency org="org.apache.hbase" name="hbase-common" rev="${hbase95.version}" conf="hbase95->master"> <artifact name="hbase-common" type="jar"/> <artifact name="hbase-common" type="test-jar" ext="jar" m:classifier="tests"/> <exclude org="org.apache.hadoop" module="hadoop-core"/> @@ -285,7 +322,7 @@ <exclude org="asm" module="asm"/> </dependency> - <dependency org="org.apache.hbase" name="hbase-server" rev="${hbase1.version}" conf="hbase1->master"> + <dependency org="org.apache.hbase" name="hbase-server" rev="${hbase95.version}" conf="hbase95->master"> <artifact name="hbase-server" type="jar"/> <artifact name="hbase-server" type="test-jar" ext="jar" m:classifier="tests"/> <exclude org="org.apache.hadoop" module="hadoop-core"/> @@ -302,20 +339,20 @@ <exclude org="asm" module="asm"/> </dependency> - <dependency org="org.apache.hbase" name="hbase-protocol" rev="${hbase1.version}" conf="hbase1->master"> + <dependency org="org.apache.hbase" name="hbase-protocol" rev="${hbase95.version}" conf="hbase95->master"> <artifact name="hbase-protocol" type="jar"/> <artifact name="hbase-protocol" type="test-jar" ext="jar" m:classifier="tests"/> <exclude org="com.google.protobuf" module="protobuf-java"/> </dependency> - <dependency org="org.apache.hbase" name="hbase-hadoop-compat" rev="${hbase1.version}" conf="hbase1->master"> + <dependency org="org.apache.hbase" name="hbase-hadoop-compat" rev="${hbase95.version}" conf="hbase95->master"> <artifact name="hbase-hadoop-compat" type="jar"/> <artifact name="hbase-hadoop-compat" type="test-jar" ext="jar" m:classifier="tests"/> </dependency> - <dependency org="org.apache.hbase" name="hbase-hadoop2-compat" rev="${hbase1.version}" conf="hbase1->master"> - <artifact name="hbase-hadoop2-compat" type="jar"/> - <artifact name="hbase-hadoop2-compat" type="test-jar" ext="jar" m:classifier="tests"/> + <dependency org="org.apache.hbase" name="hbase-${hbase.hadoop.version}-compat" rev="${hbase95.version}" conf="hbase95->master"> + <artifact name="hbase-${hbase.hadoop.version}-compat" type="jar"/> + <artifact name="hbase-${hbase.hadoop.version}-compat" type="test-jar" ext="jar" m:classifier="tests"/> <exclude org="org.apache.hadoop" module="hadoop-core"/> <exclude org="org.slf4j" module="slf4j-api"/> <exclude org="stax" module="stax-api" /> @@ -328,17 +365,20 @@ <exclude org="asm" module="asm"/> </dependency> - <dependency org="org.htrace" name="htrace-core" rev="3.0.4" conf="hadoop2->master"/> - <dependency org="org.apache.htrace" name="htrace-core" rev="${htrace.version}" conf="hadoop2->master"/> + <dependency org="org.htrace" name="htrace-core" rev="3.0.4" conf="hadoop23->master"/> + <dependency org="org.apache.htrace" name="htrace-core" rev="${htrace.version}" conf="hadoop23->master"/> <dependency org="org.fusesource.leveldbjni" name="leveldbjni-all" rev="${leveldbjni.version}" - conf="hadoop2->master"/> - <dependency org="org.cloudera.htrace" name="htrace-core" rev="2.00" conf="hbase1->master"> + conf="hadoop23->master"/> + <dependency org="org.cloudera.htrace" name="htrace-core" rev="2.00" conf="hbase95->master"> <artifact name="htrace-core" type="jar"/> </dependency> - <dependency org="com.lmax" name="disruptor" rev="3.3.0" conf="hbase1->master"/> + <dependency org="com.lmax" name="disruptor" rev="3.3.0" conf="hbase95->master"/> <!-- for TestHBaseStorage --> - <dependency org="org.apache.hbase" name="hbase-procedure" rev="${hbase1.version}" conf="test->master"/> + <dependency org="com.github.stephenc.high-scale-lib" name="high-scale-lib" rev="${high-scale-lib.version}" + conf="test->default"/> + <dependency org="com.google.protobuf" name="protobuf-java" rev="${protobuf-java.version}" + conf="test->default"/> <dependency org="com.yammer.metrics" name="metrics-core" rev="${metrics-core.version}" conf="test->default"/> @@ -388,10 +428,10 @@ conf="compile->master" /> <dependency org="org.apache.hive.shims" name="hive-shims-common" rev="${hive.version}" changing="true" conf="compile->master" /> - <dependency org="org.apache.hive" name="hive-contrib" rev="${hive.version}" changing="true" - conf="test->master" /> <dependency org="org.apache.hive.shims" name="hive-shims-0.23" rev="${hive.version}" changing="true" - conf="hadoop2->master" /> + conf="hadoop23->master" /> + <dependency org="org.apache.hive.shims" name="hive-shims-0.20S" rev="${hive.version}" changing="true" + conf="hadoop20->master" /> <dependency org="org.iq80.snappy" name="snappy" rev="${snappy.version}" conf="test->master" /> <dependency org="com.esotericsoftware.kryo" name="kryo" rev="${kryo.version}" @@ -426,31 +466,31 @@ <!-- for Tez integration --> <dependency org="org.apache.tez" name="tez" rev="${tez.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="org.apache.tez" name="tez-common" rev="${tez.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="org.apache.tez" name="tez-api" rev="${tez.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="org.apache.tez" name="tez-dag" rev="${tez.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="org.apache.tez" name="tez-runtime-internals" rev="${tez.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="org.apache.tez" name="tez-runtime-library" rev="${tez.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="org.apache.tez" name="tez-mapreduce" rev="${tez.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="org.apache.tez" name="tez-yarn-timeline-history-with-acls" rev="${tez.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="org.apache.commons" name="commons-collections4" rev="${commons-collections4.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="org.codehaus.jettison" name="jettison" rev="${jettison.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="org.apache.commons" name="commons-math3" rev="${commons-math3.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="org.apache.curator" name="curator-framework" rev="${curator.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> <dependency org="org.apache.curator" name="curator-client" rev="${curator.version}" - conf="hadoop2->master"/> + conf="hadoop23->master"/> </dependencies> </ivy-module> Modified: pig/branches/spark/ivy/libraries.properties URL: http://svn.apache.org/viewvc/pig/branches/spark/ivy/libraries.properties?rev=1784224&r1=1784223&r2=1784224&view=diff ============================================================================== --- pig/branches/spark/ivy/libraries.properties (original) +++ pig/branches/spark/ivy/libraries.properties Fri Feb 24 03:34:37 2017 @@ -40,10 +40,14 @@ ivy.version=2.2.0 jasper.version=6.1.14 groovy.version=2.4.5 guava.version=11.0 -hadoop-common.version=2.7.3 -hadoop-hdfs.version=2.7.3 -hadoop-mapreduce.version=2.7.3 -hbase1.version=1.2.4 +jersey-core.version=1.8 +hadoop-core.version=1.0.4 +hadoop-test.version=1.0.4 +hadoop-common.version=2.6.0 +hadoop-hdfs.version=2.6.0 +hadoop-mapreduce.version=2.6.0 +hbase94.version=0.94.1 +hbase95.version=0.98.12-${hbase.hadoop.version} hsqldb.version=1.8.0.10 hive.version=1.2.1 httpcomponents.version=4.1 @@ -57,20 +61,19 @@ jdeb.version=0.8 jdiff.version=1.0.9 jettison.version=1.3.4 jetty.version=6.1.26 -jline.version=2.11 -joda-time.version=2.9.3 +jline.version=1.0 +joda-time.version=2.8.2 jopt.version=4.1 json-simple.version=1.1 junit.version=4.11 jruby.version=1.6.7 -jython.version=2.7.0 -kryo.version=2.22 +jython.version=2.5.3 +kryo.version=2.21 rhino.version=1.7R2 antlr.version=3.4 stringtemplate.version=4.0.4 log4j.version=1.2.16 netty.version=3.6.6.Final -netty-all.version=4.0.23.Final rats-lib.version=0.5.1 slf4j-api.version=1.6.1 slf4j-log4j12.version=1.6.1 @@ -82,6 +85,7 @@ zookeeper.version=3.4.5 servlet.version=4.0.6 servlet-api.version=2.5 protobuf-java.version=2.5.0 +high-scale-lib.version=1.1.1 metrics-core.version=2.1.2 guice.version=3.0 guice-servlet.version=3.0 Modified: pig/branches/spark/ivy/pig-template.xml URL: http://svn.apache.org/viewvc/pig/branches/spark/ivy/pig-template.xml?rev=1784224&r1=1784223&r2=1784224&view=diff ============================================================================== --- pig/branches/spark/ivy/pig-template.xml (original) +++ pig/branches/spark/ivy/pig-template.xml Fri Feb 24 03:34:37 2017 @@ -46,7 +46,7 @@ <dependency> <groupId>commons-net</groupId> <artifactId>commons-net</artifactId> - <version>3.1</version> + <version>1.4.1</version> </dependency> <dependency> <groupId>org.mortbay.jetty</groupId> @@ -59,6 +59,16 @@ <version>6.1.26</version> </dependency> <dependency> + <groupId>tomcat</groupId> + <artifactId>jasper-runtime</artifactId> + <version>5.5.12</version> + </dependency> + <dependency> + <groupId>tomcat</groupId> + <artifactId>jasper-compiler</artifactId> + <version>5.5.12</version> + </dependency> + <dependency> <groupId>org.mortbay.jetty</groupId> <artifactId>jsp-api-2.1</artifactId> <version>6.1.14</version> @@ -76,17 +86,27 @@ <dependency> <groupId>net.java.dev.jets3t</groupId> <artifactId>jets3t</artifactId> - <version>0.9.0</version> + <version>0.7.1</version> </dependency> <dependency> - <groupId>javax.servlet</groupId> - <artifactId>servlet-api</artifactId> - <version>2.5</version> + <groupId>commons-net</groupId> + <artifactId>commons-net</artifactId> + <version>1.4.1</version> + </dependency> + <dependency> + <groupId>org.mortbay.jetty</groupId> + <artifactId>servlet-api-2.5</artifactId> + <version>6.1.14</version> + </dependency> + <dependency> + <groupId>net.sf.kosmosfs</groupId> + <artifactId>kfs</artifactId> + <version>0.3</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> - <version>4.11</version> + <version>4.8.1</version> </dependency> <dependency> <groupId>hsqldb</groupId> @@ -94,9 +114,14 @@ <version>1.8.0.10</version> </dependency> <dependency> + <groupId>oro</groupId> + <artifactId>oro</artifactId> + <version>2.0.8</version> + </dependency> + <dependency> <groupId>jline</groupId> <artifactId>jline</artifactId> - <version>2.11</version> + <version>1.0</version> </dependency> <dependency> <groupId>org.antlr</groupId> @@ -121,22 +146,22 @@ <dependency> <groupId>org.codehaus.jackson</groupId> <artifactId>jackson-mapper-asl</artifactId> - <version>1.9.13</version> + <version>1.8.8</version> </dependency> <dependency> <groupId>org.codehaus.jackson</groupId> <artifactId>jackson-core-asl</artifactId> - <version>1.9.13</version> + <version>1.8.8</version> </dependency> <dependency> <groupId>joda-time</groupId> <artifactId>joda-time</artifactId> - <version>2.9.3</version> + <version>2.1</version> </dependency> <dependency> <groupId>org.apache.avro</groupId> <artifactId>avro</artifactId> - <version>1.7.5</version> + <version>1.7.4</version> <exclusions> <exclusion> <!-- Don't pull in Avro's (later) version of Jetty.--> @@ -158,7 +183,7 @@ <dependency> <groupId>org.codehaus.groovy</groupId> <artifactId>groovy-all</artifactId> - <version>2.4.5</version> + <version>1.8.6</version> </dependency> </dependencies> </project> Modified: pig/branches/spark/ivy/piggybank-template.xml URL: http://svn.apache.org/viewvc/pig/branches/spark/ivy/piggybank-template.xml?rev=1784224&r1=1784223&r2=1784224&view=diff ============================================================================== --- pig/branches/spark/ivy/piggybank-template.xml (original) +++ pig/branches/spark/ivy/piggybank-template.xml Fri Feb 24 03:34:37 2017 @@ -51,7 +51,7 @@ <dependency> <groupId>joda-time</groupId> <artifactId>joda-time</artifactId> - <version>2.9.3</version> + <version>2.1</version> </dependency> <dependency> <groupId>com.googlecode.json-simple</groupId> @@ -61,7 +61,7 @@ <dependency> <groupId>org.apache.avro</groupId> <artifactId>avro</artifactId> - <version>1.7.5</version> + <version>1.7.4</version> <exclusions> <exclusion> <!-- Don't pull in Avro's (later) version of Jetty.--> @@ -78,7 +78,7 @@ <dependency> <groupId>nl.basjes.parse.httpdlog</groupId> <artifactId>httpdlog-pigloader</artifactId> - <version>2.4</version> + <version>2.2</version> </dependency> <dependency> <groupId>org.apache.pig</groupId> Modified: pig/branches/spark/src/META-INF/services/org.apache.pig.ExecType URL: http://svn.apache.org/viewvc/pig/branches/spark/src/META-INF/services/org.apache.pig.ExecType?rev=1784224&r1=1784223&r2=1784224&view=diff ============================================================================== --- pig/branches/spark/src/META-INF/services/org.apache.pig.ExecType (original) +++ pig/branches/spark/src/META-INF/services/org.apache.pig.ExecType Fri Feb 24 03:34:37 2017 @@ -13,7 +13,7 @@ # org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.LocalExecType org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRExecType -org.apache.pig.backend.hadoop.executionengine.tez.TezLocalExecType -org.apache.pig.backend.hadoop.executionengine.tez.TezExecType +#org.apache.pig.backend.hadoop.executionengine.tez.TezLocalExecType +#org.apache.pig.backend.hadoop.executionengine.tez.TezExecType org.apache.pig.backend.hadoop.executionengine.spark.SparkExecType org.apache.pig.backend.hadoop.executionengine.spark.SparkLocalExecType
