Author: xuefu
Date: Fri Mar 4 18:17:39 2016
New Revision: 1733627
URL: http://svn.apache.org/viewvc?rev=1733627&view=rev
Log:
PIG-4820: Merge trunk[3] into spark branch (Pallavi via Xuefu)
Added:
pig/branches/spark/conf/ivysettings.xml
- copied unchanged from r1733612, pig/trunk/conf/ivysettings.xml
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/REPLACE_MULTI.java
- copied unchanged from r1733612,
pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/REPLACE_MULTI.java
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/apachelog/LogFormatLoader.java
- copied unchanged from r1733612,
pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/apachelog/LogFormatLoader.java
pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/string/TestReplaceMulti.java
- copied unchanged from r1733612,
pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/string/TestReplaceMulti.java
pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestLogFormatLoader.java
- copied unchanged from r1733612,
pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestLogFormatLoader.java
pig/branches/spark/src/docs/jdiff/pig_0.15.0.xml
- copied unchanged from r1733612, pig/trunk/src/docs/jdiff/pig_0.15.0.xml
pig/branches/spark/src/org/apache/hadoop/
- copied from r1733612, pig/trunk/src/org/apache/hadoop/
pig/branches/spark/src/org/apache/pig/CounterBasedErrorHandler.java
- copied unchanged from r1733612,
pig/trunk/src/org/apache/pig/CounterBasedErrorHandler.java
pig/branches/spark/src/org/apache/pig/ErrorHandler.java
- copied unchanged from r1733612,
pig/trunk/src/org/apache/pig/ErrorHandler.java
pig/branches/spark/src/org/apache/pig/ErrorHandling.java
- copied unchanged from r1733612,
pig/trunk/src/org/apache/pig/ErrorHandling.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigWritableComparators.java
- copied unchanged from r1733612,
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigWritableComparators.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/UDFEndOfAllInputNeededVisitor.java
- copied unchanged from r1733612,
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/UDFEndOfAllInputNeededVisitor.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/StoreFuncDecorator.java
- copied unchanged from r1733612,
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/StoreFuncDecorator.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/optimizer/TezEstimatedParallelismClearer.java
- copied unchanged from r1733612,
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/optimizer/TezEstimatedParallelismClearer.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/udf/IsFirstReduceOfKeyTez.java
- copied unchanged from r1733612,
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/udf/IsFirstReduceOfKeyTez.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/PigGraceShuffleVertexManager.java
- copied unchanged from r1733612,
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/PigGraceShuffleVertexManager.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/PigInputFormatTez.java
- copied unchanged from r1733612,
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/PigInputFormatTez.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/util/TezRuntimeUtil.java
- copied unchanged from r1733612,
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/util/TezRuntimeUtil.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/util/TezUDFContextSeparator.java
- copied unchanged from r1733612,
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/util/TezUDFContextSeparator.java
pig/branches/spark/src/org/apache/pig/builtin/HiveUDAF.java
- copied unchanged from r1733612,
pig/trunk/src/org/apache/pig/builtin/HiveUDAF.java
pig/branches/spark/src/org/apache/pig/builtin/HiveUDF.java
- copied unchanged from r1733612,
pig/trunk/src/org/apache/pig/builtin/HiveUDF.java
pig/branches/spark/src/org/apache/pig/builtin/HiveUDFBase.java
- copied unchanged from r1733612,
pig/trunk/src/org/apache/pig/builtin/HiveUDFBase.java
pig/branches/spark/src/org/apache/pig/builtin/HiveUDTF.java
- copied unchanged from r1733612,
pig/trunk/src/org/apache/pig/builtin/HiveUDTF.java
pig/branches/spark/src/org/apache/pig/data/UnlimitedNullTuple.java
- copied unchanged from r1733612,
pig/trunk/src/org/apache/pig/data/UnlimitedNullTuple.java
pig/branches/spark/src/org/apache/pig/impl/builtin/IsFirstReduceOfKey.java
- copied unchanged from r1733612,
pig/trunk/src/org/apache/pig/impl/builtin/IsFirstReduceOfKey.java
pig/branches/spark/src/org/apache/pig/impl/io/compress/
- copied from r1733612, pig/trunk/src/org/apache/pig/impl/io/compress/
pig/branches/spark/src/org/apache/pig/impl/util/UDFContextSeparator.java
- copied unchanged from r1733612,
pig/trunk/src/org/apache/pig/impl/util/UDFContextSeparator.java
pig/branches/spark/src/org/apache/pig/impl/util/hive/
- copied from r1733612, pig/trunk/src/org/apache/pig/impl/util/hive/
pig/branches/spark/src/org/apache/pig/parser/RegisterResolver.java
- copied unchanged from r1733612,
pig/trunk/src/org/apache/pig/parser/RegisterResolver.java
pig/branches/spark/src/org/apache/pig/tools/DownloadResolver.java
- copied unchanged from r1733612,
pig/trunk/src/org/apache/pig/tools/DownloadResolver.java
pig/branches/spark/test/e2e/pig/udfs/groovy/
- copied from r1733612, pig/trunk/test/e2e/pig/udfs/groovy/
pig/branches/spark/test/e2e/pig/udfs/java/org/apache/pig/test/udf/evalfunc/DummyContextUDF.java
- copied unchanged from r1733612,
pig/trunk/test/e2e/pig/udfs/java/org/apache/pig/test/udf/evalfunc/DummyContextUDF.java
pig/branches/spark/test/e2e/pig/udfs/js/
- copied from r1733612, pig/trunk/test/e2e/pig/udfs/js/
pig/branches/spark/test/excluded-tests-mr
- copied unchanged from r1733612, pig/trunk/test/excluded-tests-mr
pig/branches/spark/test/excluded-tests-tez
- copied unchanged from r1733612, pig/trunk/test/excluded-tests-tez
pig/branches/spark/test/org/apache/pig/builtin/TestTOMAP.java
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/builtin/TestTOMAP.java
pig/branches/spark/test/org/apache/pig/builtin/TestUtf8StorageConverter.java
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/builtin/TestUtf8StorageConverter.java
pig/branches/spark/test/org/apache/pig/builtin/avro/code/java/
- copied from r1733612,
pig/trunk/test/org/apache/pig/builtin/avro/code/java/
pig/branches/spark/test/org/apache/pig/builtin/avro/schema/RecordPojo.avsc
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/builtin/avro/schema/RecordPojo.avsc
pig/branches/spark/test/org/apache/pig/builtin/avro/schema/nullableArrayInMap.avsc
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/builtin/avro/schema/nullableArrayInMap.avsc
pig/branches/spark/test/org/apache/pig/builtin/avro/schema/nullableRecordInMap.avsc
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/builtin/avro/schema/nullableRecordInMap.avsc
pig/branches/spark/test/org/apache/pig/builtin/avro/schema/recordInMap.avsc
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/builtin/avro/schema/recordInMap.avsc
pig/branches/spark/test/org/apache/pig/impl/util/
- copied from r1733612, pig/trunk/test/org/apache/pig/impl/util/
pig/branches/spark/test/org/apache/pig/test/TestErrorHandlingStoreFunc.java
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/TestErrorHandlingStoreFunc.java
pig/branches/spark/test/org/apache/pig/test/TestReadToEndLoader.java
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/TestReadToEndLoader.java
pig/branches/spark/test/org/apache/pig/test/TestRegisterParser.java
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/TestRegisterParser.java
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Cross-1.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Cross-1.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Cross-2.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Cross-2.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Cross-3.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Cross-3.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-LoadStore-1.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-LoadStore-1.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-LoadStore-2-JDK7.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-LoadStore-2-JDK7.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-LoadStore-2.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-LoadStore-2.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-MQ-6-OPTOFF.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-MQ-6-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-MQ-6.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-MQ-6.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-MQ-7-OPTOFF.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-MQ-7-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-MQ-7.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-MQ-7.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-MQ-8-OPTOFF.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-MQ-8-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-MQ-8.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-MQ-8.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Native-1.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Native-1.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Order-2.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Order-2.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Order-3.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Order-3.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-1.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-1.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-2.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-2.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-3.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-3.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-4.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-4.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-5.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-5.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-6.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-6.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-2.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-2.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-1-DummyStore-OPTOFF.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-1-DummyStore-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-11-OPTOFF.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-11-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-11.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-11.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-12-OPTOFF.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-12-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-12.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-12.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-13-OPTOFF.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-13-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-13.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-13.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-14-OPTOFF.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-14-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-14.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-14.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15-OPTOFF.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16-OPTOFF.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-17-OPTOFF.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-17-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-17.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-17.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-18-OPTOFF.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-18-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-18.gld
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-18.gld
pig/branches/spark/test/org/apache/pig/test/data/testivysettings.xml
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/data/testivysettings.xml
pig/branches/spark/test/org/apache/pig/test/utils/CloseAwareFSDataInputStream.java
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/utils/CloseAwareFSDataInputStream.java
pig/branches/spark/test/org/apache/pig/test/utils/CloseAwareOutputStream.java
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/test/utils/CloseAwareOutputStream.java
pig/branches/spark/test/org/apache/pig/tez/TestTezGraceParallelism.java
- copied unchanged from r1733612,
pig/trunk/test/org/apache/pig/tez/TestTezGraceParallelism.java
Removed:
pig/branches/spark/src/docs/jdiff/pig_0.14.0.xml
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/partitioners/RollupHIIPartitioner.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/PORollupHIIForEach.java
pig/branches/spark/src/org/apache/pig/impl/util/orc/OrcUtils.java
pig/branches/spark/src/org/apache/pig/newplan/logical/relational/LORollupHIIForEach.java
pig/branches/spark/src/org/apache/pig/newplan/logical/rules/RollupHIIOptimizer.java
pig/branches/spark/test/tez-local-tests
pig/branches/spark/test/tez-tests
Modified:
pig/branches/spark/ (props changed)
pig/branches/spark/CHANGES.txt
pig/branches/spark/README.txt
pig/branches/spark/bin/pig
pig/branches/spark/build.xml
pig/branches/spark/conf/pig.properties
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/IsInt.java
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/RANDOM.java
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/allloader/LoadFuncHelper.java
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorage.java
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroRecordReader.java
pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java
pig/branches/spark/ivy.xml
pig/branches/spark/ivy/libraries.properties
pig/branches/spark/ivy/pig-template.xml
pig/branches/spark/ivy/piggybank-template.xml
pig/branches/spark/lib-src/bzip2/org/apache/tools/bzip2r/CBZip2InputStream.java
pig/branches/spark/shims/test/hadoop20/org/apache/pig/test/MiniCluster.java
pig/branches/spark/shims/test/hadoop23/org/apache/pig/test/TezMiniCluster.java
pig/branches/spark/src/docs/src/documentation/content/xdocs/basic.xml
pig/branches/spark/src/docs/src/documentation/content/xdocs/func.xml
pig/branches/spark/src/docs/src/documentation/content/xdocs/perf.xml
pig/branches/spark/src/docs/src/documentation/content/xdocs/pig-index.xml
pig/branches/spark/src/docs/src/documentation/content/xdocs/start.xml
pig/branches/spark/src/docs/src/documentation/content/xdocs/tabs.xml
pig/branches/spark/src/docs/src/documentation/content/xdocs/udf.xml
pig/branches/spark/src/org/apache/pig/EvalFunc.java
pig/branches/spark/src/org/apache/pig/JVMReuseImpl.java
pig/branches/spark/src/org/apache/pig/Main.java
pig/branches/spark/src/org/apache/pig/PigConfiguration.java
pig/branches/spark/src/org/apache/pig/PigConstants.java
pig/branches/spark/src/org/apache/pig/PigServer.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/HDataType.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/datastorage/ConfigurationUtil.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/datastorage/HPath.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/Launcher.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchOptimizer.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigBigDecimalRawComparator.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigBigIntegerRawComparator.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigBooleanRawComparator.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigBytesRawComparator.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigCombiner.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigDateTimeRawComparator.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigDoubleRawComparator.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigFloatRawComparator.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigGenericMapBase.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigGenericMapReduce.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigInputFormat.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigIntRawComparator.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigLongRawComparator.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigOutputFormat.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigSecondaryKeyComparator.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigSplit.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigTextRawComparator.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigTupleDefaultRawComparator.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigTupleSortComparator.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/ProgressableReporter.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/partitioners/SkewedPartitioner.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/plans/EndOfAllInputSetter.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/PhysicalOperator.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POProject.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/PORelationToExprProject.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POUserFunc.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/UnaryComparisonOperator.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhyPlanVisitor.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POCounter.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POCross.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/PODistinct.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POFRJoin.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POFilter.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POForEach.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POLimit.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POLocalRearrange.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeCogroup.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeJoin.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POOptimizedForEach.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POPackage.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POPartialAgg.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POPartitionRearrange.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POPoissonSample.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POPreCombinerLocalRearrange.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/PORank.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POReservoirSample.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSplit.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POStore.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POStream.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/Packager.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/util/PlanHelper.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/TezDagBuilder.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/TezJob.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/TezJobCompiler.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/TezLauncher.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/TezResourceManager.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/TezSessionManager.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezEdgeDescriptor.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezOperPlan.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezOperator.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezPOPackageAnnotator.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezPlanContainer.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezPlanContainerPrinter.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezPrinter.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/operator/POCounterStatsTez.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/operator/POFRJoinTez.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/operator/POIdentityInOutTez.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/operator/POLocalRearrangeTez.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/operator/POPartitionRearrangeTez.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/operator/PORankTez.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/operator/POShuffleTezLoad.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/operator/POShuffledValueInputTez.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/operator/POSimpleTezLoad.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/operator/POStoreTez.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/operator/POValueInputTez.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/operator/POValueOutputTez.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/optimizer/LoaderProcessor.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/optimizer/MultiQueryOptimizerTez.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/optimizer/ParallelismSetter.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/optimizer/SecondaryKeyOptimizerTez.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/optimizer/TezOperDependencyParallelismEstimator.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/optimizer/UnionOptimizer.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/udf/ReadScalarsTez.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/PartitionerDefinedVertexManager.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/PigOutputFormatTez.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/PigProcessor.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/SkewedPartitionerTez.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/util/MRToTezHelper.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/util/TezCompilerUtil.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/util/CombinerOptimizerUtil.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java
pig/branches/spark/src/org/apache/pig/builtin/AVG.java
pig/branches/spark/src/org/apache/pig/builtin/AlgebraicBigDecimalMathBase.java
pig/branches/spark/src/org/apache/pig/builtin/AlgebraicBigIntegerMathBase.java
pig/branches/spark/src/org/apache/pig/builtin/AlgebraicByteArrayMathBase.java
pig/branches/spark/src/org/apache/pig/builtin/AlgebraicDoubleMathBase.java
pig/branches/spark/src/org/apache/pig/builtin/AlgebraicFloatMathBase.java
pig/branches/spark/src/org/apache/pig/builtin/AlgebraicIntMathBase.java
pig/branches/spark/src/org/apache/pig/builtin/AlgebraicLongMathBase.java
pig/branches/spark/src/org/apache/pig/builtin/AvroStorage.java
pig/branches/spark/src/org/apache/pig/builtin/BigDecimalAvg.java
pig/branches/spark/src/org/apache/pig/builtin/BigIntegerAvg.java
pig/branches/spark/src/org/apache/pig/builtin/DateTimeMax.java
pig/branches/spark/src/org/apache/pig/builtin/DateTimeMin.java
pig/branches/spark/src/org/apache/pig/builtin/Distinct.java
pig/branches/spark/src/org/apache/pig/builtin/DoubleAvg.java
pig/branches/spark/src/org/apache/pig/builtin/FloatAvg.java
pig/branches/spark/src/org/apache/pig/builtin/IntAvg.java
pig/branches/spark/src/org/apache/pig/builtin/JsonLoader.java
pig/branches/spark/src/org/apache/pig/builtin/LongAvg.java
pig/branches/spark/src/org/apache/pig/builtin/OrcStorage.java
pig/branches/spark/src/org/apache/pig/builtin/PigStorage.java
pig/branches/spark/src/org/apache/pig/builtin/PluckTuple.java
pig/branches/spark/src/org/apache/pig/builtin/RANDOM.java
pig/branches/spark/src/org/apache/pig/builtin/REPLACE.java
pig/branches/spark/src/org/apache/pig/builtin/RollupDimensions.java
pig/branches/spark/src/org/apache/pig/builtin/StringMax.java
pig/branches/spark/src/org/apache/pig/builtin/StringMin.java
pig/branches/spark/src/org/apache/pig/builtin/TOMAP.java
pig/branches/spark/src/org/apache/pig/builtin/TOP.java
pig/branches/spark/src/org/apache/pig/builtin/TextLoader.java
pig/branches/spark/src/org/apache/pig/builtin/ToDate.java
pig/branches/spark/src/org/apache/pig/builtin/Utf8StorageConverter.java
pig/branches/spark/src/org/apache/pig/builtin/VALUELIST.java
pig/branches/spark/src/org/apache/pig/builtin/VALUESET.java
pig/branches/spark/src/org/apache/pig/builtin/mock/Storage.java
pig/branches/spark/src/org/apache/pig/data/BinInterSedes.java
pig/branches/spark/src/org/apache/pig/data/DataReaderWriter.java
pig/branches/spark/src/org/apache/pig/data/DataType.java
pig/branches/spark/src/org/apache/pig/data/SchemaTuple.java
pig/branches/spark/src/org/apache/pig/impl/PigContext.java
pig/branches/spark/src/org/apache/pig/impl/builtin/GFCross.java
pig/branches/spark/src/org/apache/pig/impl/builtin/GetMemNumRows.java
pig/branches/spark/src/org/apache/pig/impl/builtin/PoissonSampleLoader.java
pig/branches/spark/src/org/apache/pig/impl/builtin/ReadScalars.java
pig/branches/spark/src/org/apache/pig/impl/io/NullablePartitionWritable.java
pig/branches/spark/src/org/apache/pig/impl/io/ReadToEndLoader.java
pig/branches/spark/src/org/apache/pig/impl/logicalLayer/schema/SchemaUtil.java
pig/branches/spark/src/org/apache/pig/impl/plan/OperatorKey.java
pig/branches/spark/src/org/apache/pig/impl/streaming/OutputHandler.java
pig/branches/spark/src/org/apache/pig/impl/streaming/PigStreamingUDF.java
pig/branches/spark/src/org/apache/pig/impl/util/CompilerUtils.java
pig/branches/spark/src/org/apache/pig/impl/util/PropertiesUtil.java
pig/branches/spark/src/org/apache/pig/impl/util/SpillableMemoryManager.java
pig/branches/spark/src/org/apache/pig/impl/util/UDFContext.java
pig/branches/spark/src/org/apache/pig/impl/util/Utils.java
pig/branches/spark/src/org/apache/pig/impl/util/avro/AvroBagWrapper.java
pig/branches/spark/src/org/apache/pig/impl/util/avro/AvroMapWrapper.java
pig/branches/spark/src/org/apache/pig/impl/util/avro/AvroStorageDataConversionUtilities.java
pig/branches/spark/src/org/apache/pig/impl/util/avro/AvroStorageSchemaConversionUtilities.java
pig/branches/spark/src/org/apache/pig/impl/util/avro/AvroTupleWrapper.java
pig/branches/spark/src/org/apache/pig/newplan/logical/expression/ExpToPhyTranslationVisitor.java
pig/branches/spark/src/org/apache/pig/newplan/logical/expression/UserFuncExpression.java
pig/branches/spark/src/org/apache/pig/newplan/logical/optimizer/LogicalPlanOptimizer.java
pig/branches/spark/src/org/apache/pig/newplan/logical/relational/LOCogroup.java
pig/branches/spark/src/org/apache/pig/newplan/logical/relational/LOCube.java
pig/branches/spark/src/org/apache/pig/newplan/logical/relational/LogToPhyTranslationVisitor.java
pig/branches/spark/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java
pig/branches/spark/src/org/apache/pig/newplan/logical/relational/LogicalRelationalNodesVisitor.java
pig/branches/spark/src/org/apache/pig/newplan/logical/relational/LogicalSchema.java
pig/branches/spark/src/org/apache/pig/newplan/logical/relational/MapSideMergeValidator.java
pig/branches/spark/src/org/apache/pig/newplan/logical/rules/OptimizerUtils.java
pig/branches/spark/src/org/apache/pig/newplan/logical/rules/PartitionFilterOptimizer.java
pig/branches/spark/src/org/apache/pig/newplan/logical/rules/PredicatePushdownOptimizer.java
pig/branches/spark/src/org/apache/pig/newplan/logical/visitor/TypeCheckingExpVisitor.java
pig/branches/spark/src/org/apache/pig/newplan/logical/visitor/TypeCheckingRelVisitor.java
pig/branches/spark/src/org/apache/pig/parser/AliasMasker.g
pig/branches/spark/src/org/apache/pig/parser/AstPrinter.g
pig/branches/spark/src/org/apache/pig/parser/AstValidator.g
pig/branches/spark/src/org/apache/pig/parser/LogicalPlanBuilder.java
pig/branches/spark/src/org/apache/pig/parser/LogicalPlanGenerator.g
pig/branches/spark/src/org/apache/pig/parser/QueryLexer.g
pig/branches/spark/src/org/apache/pig/parser/QueryParser.g
pig/branches/spark/src/org/apache/pig/parser/QueryParserDriver.java
pig/branches/spark/src/org/apache/pig/pen/ExampleGenerator.java
pig/branches/spark/src/org/apache/pig/scripting/BoundScript.java
pig/branches/spark/src/org/apache/pig/scripting/groovy/GroovyEvalFunc.java
pig/branches/spark/src/org/apache/pig/tools/grunt/GruntParser.java
pig/branches/spark/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj
pig/branches/spark/src/org/apache/pig/tools/pigstats/OutputStats.java
pig/branches/spark/src/org/apache/pig/tools/pigstats/PigStatsUtil.java
pig/branches/spark/src/org/apache/pig/tools/pigstats/PigStatusReporter.java
pig/branches/spark/src/org/apache/pig/tools/pigstats/ScriptState.java
pig/branches/spark/src/org/apache/pig/tools/pigstats/mapreduce/MRScriptState.java
pig/branches/spark/src/org/apache/pig/tools/pigstats/tez/TezDAGStats.java
pig/branches/spark/src/org/apache/pig/tools/pigstats/tez/TezPigScriptStats.java
pig/branches/spark/src/org/apache/pig/tools/pigstats/tez/TezScriptState.java
pig/branches/spark/src/org/apache/pig/tools/pigstats/tez/TezVertexStats.java
pig/branches/spark/src/pig-default.properties (contents, props changed)
pig/branches/spark/src/python/streaming/controller.py
pig/branches/spark/test/e2e/pig/build.xml
pig/branches/spark/test/e2e/pig/drivers/TestDriverPig.pm
pig/branches/spark/test/e2e/pig/tests/hcat.conf
pig/branches/spark/test/e2e/pig/tests/multiquery.conf
pig/branches/spark/test/e2e/pig/tests/nightly.conf
pig/branches/spark/test/e2e/pig/tests/streaming.conf
pig/branches/spark/test/e2e/pig/tests/turing_jython.conf
pig/branches/spark/test/e2e/pig/udfs/java/build.xml
pig/branches/spark/test/org/apache/pig/TestMain.java
pig/branches/spark/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java
pig/branches/spark/test/org/apache/pig/builtin/TestAvroStorage.java
pig/branches/spark/test/org/apache/pig/builtin/TestPluckTuple.java
pig/branches/spark/test/org/apache/pig/builtin/TestTOP.java
pig/branches/spark/test/org/apache/pig/builtin/mock/TestMockStorage.java
pig/branches/spark/test/org/apache/pig/impl/builtin/TestStreamingUDF.java
pig/branches/spark/test/org/apache/pig/impl/streaming/TestPigStreamingUDF.java
pig/branches/spark/test/org/apache/pig/impl/streaming/TestStreamingUDFOutputHandler.java
pig/branches/spark/test/org/apache/pig/newplan/logical/optimizer/TestImplicitSplitOnTuple.java
pig/branches/spark/test/org/apache/pig/test/TestAccumulator.java
pig/branches/spark/test/org/apache/pig/test/TestAssert.java
pig/branches/spark/test/org/apache/pig/test/TestBZip.java
pig/branches/spark/test/org/apache/pig/test/TestBuiltin.java
pig/branches/spark/test/org/apache/pig/test/TestCubeOperator.java
pig/branches/spark/test/org/apache/pig/test/TestEvalPipeline.java
pig/branches/spark/test/org/apache/pig/test/TestEvalPipeline2.java
pig/branches/spark/test/org/apache/pig/test/TestFRJoin.java
pig/branches/spark/test/org/apache/pig/test/TestGrunt.java
pig/branches/spark/test/org/apache/pig/test/TestHBaseStorage.java
pig/branches/spark/test/org/apache/pig/test/TestHBaseStorageParams.java
pig/branches/spark/test/org/apache/pig/test/TestJobSubmission.java
pig/branches/spark/test/org/apache/pig/test/TestLimitVariable.java
pig/branches/spark/test/org/apache/pig/test/TestLoaderStorerShipCacheFiles.java
pig/branches/spark/test/org/apache/pig/test/TestLogicalPlanBuilder.java
pig/branches/spark/test/org/apache/pig/test/TestMRCompiler.java
pig/branches/spark/test/org/apache/pig/test/TestMergeJoin.java
pig/branches/spark/test/org/apache/pig/test/TestMultiQuery.java
pig/branches/spark/test/org/apache/pig/test/TestMultiQueryLocal.java
pig/branches/spark/test/org/apache/pig/test/TestPOCast.java
pig/branches/spark/test/org/apache/pig/test/TestPOPartialAgg.java
pig/branches/spark/test/org/apache/pig/test/TestPigRunner.java
pig/branches/spark/test/org/apache/pig/test/TestPigServerLocal.java
pig/branches/spark/test/org/apache/pig/test/TestPigStats.java
pig/branches/spark/test/org/apache/pig/test/TestPigStatsMR.java
pig/branches/spark/test/org/apache/pig/test/TestRank1.java
pig/branches/spark/test/org/apache/pig/test/TestRank2.java
pig/branches/spark/test/org/apache/pig/test/TestRank3.java
pig/branches/spark/test/org/apache/pig/test/TestRegisteredJarVisibility.java
pig/branches/spark/test/org/apache/pig/test/TestScalarAliases.java
pig/branches/spark/test/org/apache/pig/test/TestScalarAliasesLocal.java
pig/branches/spark/test/org/apache/pig/test/TestSecondarySort.java
pig/branches/spark/test/org/apache/pig/test/TestSkewedJoin.java
pig/branches/spark/test/org/apache/pig/test/TestStreaming.java
pig/branches/spark/test/org/apache/pig/test/TestStringUDFs.java
pig/branches/spark/test/org/apache/pig/test/TestTypeCheckingValidatorNewLP.java
pig/branches/spark/test/org/apache/pig/test/TestUDFContext.java
pig/branches/spark/test/org/apache/pig/test/Util.java
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Limit-2.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-MQ-2-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-MQ-2.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Order-1.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-1.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-1-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-1.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-10-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-10.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-2-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-2.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-9-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-9.gld
pig/branches/spark/test/org/apache/pig/test/pigunit/TestPigTest.java
pig/branches/spark/test/org/apache/pig/tez/TestGroupConstParallelTez.java
pig/branches/spark/test/org/apache/pig/tez/TestJobSubmissionTez.java
pig/branches/spark/test/org/apache/pig/tez/TestPigStatsTez.java
pig/branches/spark/test/org/apache/pig/tez/TestTezAutoParallelism.java
pig/branches/spark/test/org/apache/pig/tez/TestTezCompiler.java
pig/branches/spark/test/org/apache/pig/tez/TestTezJobControlCompiler.java
pig/branches/spark/test/org/apache/pig/tez/TestTezLauncher.java
pig/branches/spark/test/perf/pigmix/bin/runpigmix.pl
Propchange: pig/branches/spark/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Fri Mar 4 18:17:39 2016
@@ -1,2 +1,2 @@
/hadoop/pig/branches/multiquery:741727-770826
-/pig/trunk:1621676-1654952
+/pig/trunk:1621676-1733612
Modified: pig/branches/spark/CHANGES.txt
URL:
http://svn.apache.org/viewvc/pig/branches/spark/CHANGES.txt?rev=1733627&r1=1733626&r2=1733627&view=diff
==============================================================================
--- pig/branches/spark/CHANGES.txt (original)
+++ pig/branches/spark/CHANGES.txt Fri Mar 4 18:17:39 2016
@@ -19,11 +19,240 @@
Pig Change Log
Trunk (unreleased changes)
+
+INCOMPATIBLE CHANGES
+
+IMPROVEMENTS
+
+PIG-4817: Bump HTTP Logparser to version 2.4 (nielsbasjes via daijy)
+
+PIG-4811: Upgrade groovy library to address MethodClosure vulnerability
(daijy)
+
+PIG-4803: Improve performance of regex-based builtin functions (eyal via daijy)
+
+PIG-4802: Autoparallelism should estimate less when there is combiner (rohini)
+
+PIG-4761: Add more information to front end error messages (eyal via daijy)
+
+PIG-4792: Do not add java and sun system properties to jobconf (rohini)
+
+PIG-4787: Log JSONLoader exception while parsing records (rohini)
+
+PIG-4763: Insufficient check for the number of arguments in runpigmix.pl
(sekikn via rohini)
+
+PIG-4411: Support for vertex level configuration like speculative execution
(rohini)
+
+PIG-4775: Better default values for shuffle bytes per reducer (rohini)
+
+PIG-4753: Pigmix should have option to delete outputs after completing the
tests (mitdesai via rohini)
+
+PIG-4744: Honor tez.staging-dir setting in tez-site.xml (rohini via daijy)
+
+PIG-4742: Document Pig's Register Artifact Command added in PIG-4417
(akshayrai09 via daijy)
+
+PIG-4417: Pig's register command should support automatic fetching of jars
from repo (akshayrai09 via daijy)
+
+PIG-4713: Document Bloom UDF (gliptak via daijy)
+
+PIG-3251: Bzip2TextInputFormat requires double the memory of maximum record
size (knoguchi)
+
+PIG-4704: Customizable Error Handling for Storers in Pig (siddhimehta via
daijy)
+
+PIG-4717: Update Apache HTTPD LogParser to latest version (nielsbasjes via
daijy)
+
+PIG-4468: Pig's jackson version conflicts with that of hadoop 2.6.0 or newer
(zjffdu via daijy)
+
+PIG-4708: Upgrade joda-time to 2.8 (rohini)
+
+PIG-4697: Pig needs to serialize only part of the udfcontext for each vertex
(rohini)
+
+PIG-4702: Load once for sampling and partitioning in order by for certain
LoadFuncs (rohini)
+
+PIG-4699: Print Job stats information in Tez like mapreduce (rohini)
+
+PIG-4554: Compress pig.script before encoding (sandyridgeracer via rohini)
+
+PIG-4670: Embedded Python scripts still parse line by line (rohini)
+
+PIG-4663: HBaseStorage should allow the MaxResultsPerColumnFamily limit to
avoid memory or scan timeout issues (pmazak via rohini)
+
+PIG-4673: Built In UDF - REPLACE_MULTI : For a given string, search and
replace all occurrences
+ of search keys with replacement values ([email protected] via daijy)
+
+PIG-4674: TOMAP should infer schema (daijy)
+
+PIG-4676: Upgrade Hive to 1.2.1 (daijy)
+
+PIG-4574: Eliminate identity vertex for order by and skewed join right after
LOAD (rohini)
+
+PIG-4365: TOP udf should implement Accumulator interface (eyal via rohini)
+
+PIG-4570: Allow AvroStorage to use a class for the schema (pmazak via daijy)
+
+PIG-4405: Adding 'map[]' support to mock/Storage (nielsbasjes via daijy)
+
+PIG-4638: Allow TOMAP to accept dynamically sized input (nielsbasjes via daijy)
+
+PIG-4639: Add better parser for Apache HTTPD access log (nielsbasjes via daijy)
+
+BUG FIXES
+
+PIG-4819: RANDOM() udf can lead to missing or redundant records (knoguchi)
+
+PIG-4816: Read a null scalar causing a Tez failure (daijy)
+
+PIG-4818: Single quote inside comment in GENERATE is not being ignored
(knoguchi)
+
+PIG-4814: AvroStorage does not take namenode HA as part of schema file url
(daijy)
+
+PIG-4812: Register Groovy UDF with relative path does not work (daijy)
+
+PIG-4806: UDFContext can be reset in the middle during Tez input and output
initialization (rohini)
+
+PIG-4808: PluckTuple overwrites regex if used more than once in the same
script (eyal via daijy)
+
+PIG-4801: Provide backward compatibility with mapreduce mapred.task settings
(rohini)
+
+PIG-4759: Fix Classresolution_1 e2e failure (rohini)
+
+PIG-4800: EvalFunc.getCacheFiles() fails for different namenode (rohini)
+
+PIG-4790: Join after union fail due to UnionOptimizer (rohini)
+
+PIG-4686: Backend code should not call AvroStorageUtils.getPaths (mitdesai via
rohini)
+
+PIG-4795: Flushing ObjectOutputStream before calling toByteArray on the
underlying ByteArrayOutputStream (emopers via daijy)
+
+PIG-4690: Union with self replicate join will fail in Tez (rohini)
+
+PIG-4791: PORelationToExprProject filters records instead of returning
emptybag in nested foreach after union (rohini)
+
+PIG-4779: testBZ2Concatenation[pig.bzip.use.hadoop.inputformat = true] failing
due to successful read (knoguchi)
+
+PIG-4587: Applying isFirstReduceOfKey for Skewed left outer join skips records
(rohini)
+
+PIG-4782: OutOfMemoryError: GC overhead limit exceeded with POPartialAgg
(rohini)
+
+PIG-4737: Check and fix clone implementation for all classes extending
PhysicalOperator (rohini)
+
+PIG-4770: OOM with POPartialAgg in some cases (rohini)
+
+PIG-4773: [Pig on Tez] Secondary key descending sort in nested foreach after
union does ascending instead (rohini)
+
+PIG-4774: Fix NPE in SUM,AVG,MIN,MAX UDFs for null bag input (rohini)
+
+PIG-4757: Job stats on successfully read/output records wrong with multiple
inputs/outputs (rohini)
+
+PIG-4769: UnionOptimizer hits errors when merging vertex group into split
(rohini)
+
+PIG-4768: EvalFunc reporter is null in Tez (rohini)
+
+PIG-4760: TezDAGStats.convertToHadoopCounters is not used, but impose MR
counter limit (daijy)
+
+PIG-4755: Typo in runpigmix script (mitdesai via daijy)
+
+PIG-4736: Removing empty keys in UDFContext broke one LoadFunc (rohini)
+
+PIG-4733: Avoid NullPointerException in JVMReuseImpl for builtin classes
(rohini)
+
+PIG-4722: [Pig on Tez] NPE while running Combiner (rohini)
+
+PIG-4730: [Pig on Tez] Total parallelism estimation does not account load
parallelism (rohini)
+
+PIG-4689: CSV Writes incorrect header if two CSV files are created in one
script (nielsbasjes via daijy)
+
+PIG-4727: Incorrect types table for AVG in docs (nsmith via daijy)
+
+PIG-4725: Typo in FrontendException messages "Incompatable" (nsmith via daijy)
+
+PIG-4721: IsEmpty documentation error (nsmith via daijy)
+
+PIG-4712: [Pig on Tez] NPE in Bloom UDF after Union (rohini)
+
+PIG-4707: [Pig on Tez] Streaming job hangs with pig.exec.mapPartAgg=true
(rohini)
+
+PIG-4703: TezOperator.stores shall not ship to backend (daijy)
+
+PIG-4696: Empty map returned by a streaming_python udf wrongly contains a null
key (cheolsoo)
+
+PIG-4691: [Pig on Tez] Support for whitelisting storefuncs for union
optimization (rohini)
+
+PIG-3957: Refactor out resetting input key in TezDagBuilder (rohini)
+
+PIG-4688: Limit followed by POPartialAgg can give empty or partial results in
Tez (rohini)
+
+PIG-4635: NPE while running pig script in tez mode (daijy)
+
+PIG-4683: Nested order is broken after PIG-3591 in some cases (daijy)
+
+PIG-4679: Performance degradation due to InputSizeReducerEstimator since
PIG-3754 (daijy)
+
+PIG-4315: MergeJoin or Split followed by order by gives NPE in Tez (rohini)
+
+PIG-4654: Reduce tez memory.reserve-fraction and clear spillables for better
memory utilization (rohini)
+
+PIG-4628: Pig 0.14 job with order by fails in mapreduce mode with Oozie
(knoguchi)
+
+PIG-4651: Optimize NullablePartitionWritable serialization for skewed join
(rohini)
+
+PIG-4627: [Pig on Tez] Self join does not handle null values correctly (rohini)
+
+PIG-4644: PORelationToExprProject.clone() is broken (erwaman via rohini)
+
+PIG-4650: ant mvn-deploy target is broken (daijy)
+
+PIG-4649: [Pig on Tez] Union followed by HCatStorer misses some data (rohini)
+
+PIG-4636: Occurred spelled incorrectly in error message for Launcher and
POMergeCogroup (stevenmz via daijy)
+
+PIG-4624: Error on ORC empty file without schema (daijy)
+
+PIG-3622: Allow casting bytearray fields to bytearray type (redisliu via daijy)
+
+PIG-4618: When use tez as the engine , set pig.user.cache.enabled=true do not
take effect (wisgood via rohini)
+
+PIG-4533: Document error: Pig does support concatenated gz file (xhudik via
daijy)
+
+PIG-4578: ToDateISO should support optional ' ' space variant used by JDBC
(michaelthoward via daijy)
+
+Release 0.15.0
INCOMPATIBLE CHANGES
IMPROVEMENTS
+PIG-4560: Pig 0.15.0 Documentation (daijy)
+
+PIG-4429: Add Pig alias information and Pig script to the DAG view in Tez UI
(daijy)
+
+PIG-3994: Implement getting backend exception for Tez (rohini)
+
+PIG-4563: Upgrade to released Tez 0.7.0 (daijy)
+
+PIG-4525: Clarify "Scalar has more than one row in the output." (Niels Basjes
via gates)
+
+PIG-4511: Add columns to prune from PluckTuple (jbabcock via cheolsoo)
+
+PIG-4434: Improve auto-parallelism for tez (daijy)
+
+PIG-4495: Better multi-query planning in case of multiple edges (rohini)
+
+PIG-3294: Allow Pig use Hive UDFs (daijy)
+
+PIG-4476: Fix logging in AvroStorage* classes and SchemaTuple class (rdsr via
rohini)
+
+PIG-4458: Support UDFs in a FOREACH Before a Merge Join (wattsinabox via daijy)
+
+PIG-4454: Upgrade tez to 0.6.0 (daijy)
+
+PIG-4451: Log partition and predicate filter pushdown information and fix
optimizer looping (rohini)
+
+PIG-4430: Pig should support reading log4j.properties file from classpath as
well (rdsr via daijy)
+
+PIG-4407: Allow specifying a replication factor for jarcache (jira.shegalov
via rohini)
+
+PIG-4401: Add pattern matching to PluckTuple (cheolsoo)
+
PIG-2692: Make the Pig unit faciliities more generalizable and update javadocs
(razsapps via daijy)
PIG-4379: Make RoundRobinPartitioner public (daijy)
@@ -38,12 +267,173 @@ PIG-4360: HBaseStorage should support se
PIG-4337: Split Types and MultiQuery e2e tests into multiple groups (rohini)
-PIG-4066: An optimization for ROLLUP operation in Pig (hxquangnhat via
cheolsoo)
-
PIG-4333: Split BigData tests into multiple groups (rohini)
BUG FIXES
+PIG-4592: Pig 0.15 stopped working with Hadoop 1.x (daijy)
+
+PIG-4580: Fix TestTezAutoParallelism.testSkewedJoinIncreaseParallelism test
failure (daijy)
+
+PIG-4571: TestPigRunner.testGetHadoopCounters fail on Windows (daijy)
+
+PIG-4541: Skewed full outer join does not return records if any relation is
empty. Outer join does not
+ return any record if left relation is empty (daijy)
+
+PIG-4564: Pig can deadlock in POPartialAgg if there is a bag (rohini via daijy)
+
+PIG-4569: Fix e2e test Rank_1 failure (rohini)
+
+PIG-4490: MIN/MAX builtin UDFs return wrong results when accumulating for
strings (xplenty via rohini)
+
+PIG-4418: NullPointerException in JVMReuseImpl (rohini)
+
+PIG-4562: Typo in DataType.toDateTime (daijy)
+
+PIG-4559: Fix several new tez e2e test failures (daijy)
+
+PIG-4506: binstorage fails to write biginteger (ssavvides via daijy)
+
+PIG-4556: Local mode is broken in some case by PIG-4247 (daijy)
+
+PIG-4523: Tez engine should use tez config rather than mr config whenever
possible (daijy)
+
+PIG-4452: Embedded SQL using "SQL" instead of "sql" fails with string index
out of range: -1 error (daijy)
+
+PIG-4543: TestEvalPipelineLocal.testRankWithEmptyReduce fail on Hadoop 1
(daijy)
+
+PIG-4544: Upgrade Hbase to 0.98.12 (daijy)
+
+PIG-4481: e2e tests ComputeSpec_1, ComputeSpec_2 and StreamingPerformance_3
produce different result on Windows (daijy)
+
+PIG-4496: Fix CBZip2InputStream to close underlying stream (petersla via daijy)
+
+PIG-4528: Fix a typo in src/docs/src/documentation/content/xdocs/basic.xml
(namusyaka via daijy)
+
+PIG-4532: Pig Documentation contains typo for AvroStorage
(fredericschmaljohann via daijy)
+
+PIG-4377: Skewed outer join produce wrong result in some cases (daijy)
+
+PIG-4538: Pig script fail with CNF in follow up MR job (daijy)
+
+PIG-4537: Fix unit test failure introduced by TEZ-2392: TestCollectedGroup,
TestLimitVariable, TestMapSideCogroup, etc (daijy)
+
+PIG-4530: StackOverflow in TestMultiQueryLocal running under hadoop20
(nielsbasjes via rohini)
+
+PIG-4529: Pig on tez hit counter limit imposed by MR (daijy)
+
+PIG-4524: Pig Minicluster unit tests broken by TEZ-2333 (daijy)
+
+PIG-4527: NON-ASCII Characters in Javadoc break 'ant docs' (nielsbasjes via
daijy)
+
+PIG-4494: Pig's htrace version conflicts with that of hadoop 2.6.0 (daijy)
+
+PIG-4519: Correct link to Contribute page (gliptak via daijy)
+
+PIG-4514: pig trunk compilation is broken -
VertexManagerPluginContext.reconfigureVertex change (thejas)
+
+PIG-4503: [Pig on Tez] NPE in UnionOptimizer with multiple levels of union
(rohini)
+
+PIG-4509: [Pig on Tez] Unassigned applications not killed on shutdown (rohini)
+
+PIG-4508: [Pig on Tez] PigProcessor check for commit only on MROutput (rohini)
+
+PIG-4505: [Pig on Tez] Auto adjust AM memory can hit OOM with 3.5GXmx (rohini)
+
+PIG-4502: E2E tests build fail with udfs compile (nmaheshwari via daijy)
+
+PIG-4498: AvroStorage in Piggbank does not handle bad records and fails (viraj
via rohini)
+
+PIG-4499: mvn-build miss tez classes in pig-h2.jar (daijy)
+
+PIG-4488: Pig on tez mask tez.queue.name (daijy)
+
+PIG-4497: [Pig on Tez] NPE for null scalar (rohini)
+
+PIG-4493: Pig on Tez gives wrong results if Union is followed by Split (rohini)
+
+PIG-4491: Streaming Python Bytearray Bugs (jeremykarn via daijy)
+
+PIG-4487: Pig on Tez gives wrong success message on failure in case of
multiple outputs (rohini)
+
+PIG-4483: Pig on Tez output statistics shows storing to same directory twice
for union (rohini)
+
+PIG-4480: Pig script failure on Tez with split and order by due to missing
sample collection (rohini)
+
+PIG-4484: Ant pull jetty-6.1.26.zip on some platform (daijy)
+
+PIG-4479: Pig script with union within nested splits followed by join failed
on Tez (rohini)
+
+PIG-4457: Error is thrown by JobStats.getOutputSize() when storing to a MySql
table (rohini)
+
+PIG-4475: Keys in AvroMapWrapper are not proper Pig types (rdsr via daijy)
+
+PIG-4478: TestCSVExcelStorage fails with jdk8 (rohini)
+
+PIG-4474: Increasing intermediate parallelism has issue with default
parallelism (rohini)
+
+PIG-4465: Pig streaming ship fails for relative paths on Tez (rohini)
+
+PIG-4461: Use benchmarks for Windows Pig e2e tests (nmaheshwari via daijy)
+
+PIG-4463: AvroMapWrapper still leaks Avro data types and
AvroStorageDataConversionUtilities do not handle
+ Pig maps (rdsr via daijy)
+
+PIG-4460: TestBuiltIn testValueListOutputSchemaComplexType and
testValueSetOutputSchemaComplexType tests
+ create bags whose inner schema is not a tuple (erwaman via daijy)
+
+PIG-4448: AvroMapWrapper leaks Avro data types when the map values are complex
avro records (rdsr via daijy)
+
+PIG-4453: Remove test-tez-local target (daijy)
+
+PIG-4443: Write inputsplits in Tez to disk if the size is huge and option to
compress pig input splits (rohini)
+
+PIG-4447: Pig Cannot handle nullable values (arrays and records) in avro
records (rdsr via daijy)
+
+PIG-4444: Fix unit test failure TestTezAutoParallelism (daijy)
+
+PIG-4445: VALUELIST and VALUESET outputSchema does not match actual schema of
data returned when map value schema
+ is complex (erwaman via daijy)
+
+PIG-4442: Eliminate redundant RPC call to get file information in HPath
(cnauroth via daijy)
+
+PIG-4440: Some code samples in documentation use Unicode left/right single
quotes, which cause a
+ parse failure (cnauroth via daijy)
+
+PIG-4264: Port TestAvroStorage to tez local mode (daijy)
+
+PIG-4437: Fix tez unit test failure TestJoinSmoke, TestSkewedJoin (daijy)
+
+PIG-4432: Built-in VALUELIST and VALUESET UDFs do not preserve the schema when
the map value type is
+ a complex type (erwaman via daijy)
+
+PIG-4408: Merge join should support replicated join as a predecessor (bridiver
via daijy)
+
+PIG-4389: Flag to run selected test suites in e2e tests (daijy)
+
+PIG-4385: testDefaultBootup fails because it cannot find "pig.properties"
(mkudlej via daijy)
+
+PIG-4397: CSVExcelStorage incorrect output if last field value is null (daijy)
+
+PIG-4431: ReadToEndLoader does not close the record reader for the last input
split (rdsr via daijy)
+
+PIG-4426: RowNumber(simple) Rank not producing correct results (knoguchi)
+
+PIG-4433: Loading bigdecimal in nested tuple does not work (kpriceyahoo via
daijy)
+
+PIG-4410: Fix testRankWithEmptyReduce in tez mode (daijy)
+
+PIG-4392: RANK BY fails when default_parallel is greater than cardinality of
field being ranked by (daijy)
+
+PIG-4403: Combining -Dpig.additional.jars.uris with -useHCatalog breaks due to
combination
+ with colon instead of comma (ovlaere via daijy)
+
+PIG-4402: JavaScript UDF example in the doc is broken (cheolsoo)
+
+PIG-4394: Fix Split_9 and Union_5 e2e failures (rohini)
+
+PIG-4391: Fix TestPigStats test failure (rohini)
+
PIG-4387: Honor yarn settings in tez-site.xml and optimize dag status fetch
(rohini)
PIG-4352: Port local mode tests to Tez - TestUnionOnSchema (daijy)
@@ -123,6 +513,10 @@ IMPROVEMENTS
BUG FIXES
+PIG-4409: fs.defaultFS is overwritten in JobConf by replicated join at runtime
(cheolsoo)
+
+PIG-4404: LOAD with HBaseStorage on secure cluster is broken in Tez (rohini)
+
PIG-4375: ObjectCache should use ProcessorContext.getObjectRegistry() (rohini)
PIG-4334: PigProcessor does not set pig.datetime.default.tz (rohini)
@@ -219,6 +613,9 @@ PIG-3939: SPRINTF function to format str
PIG-3970: Merge Tez branch into trunk (daijy)
OPTIMIZATIONS
+
+PIG-4657: [Pig on Tez] Optimize GroupBy and Distinct key comparison (rohini)
+
BUG FIXES
Modified: pig/branches/spark/README.txt
URL:
http://svn.apache.org/viewvc/pig/branches/spark/README.txt?rev=1733627&r1=1733626&r2=1733627&view=diff
==============================================================================
--- pig/branches/spark/README.txt (original)
+++ pig/branches/spark/README.txt Fri Mar 4 18:17:39 2016
@@ -35,5 +35,4 @@ Contributing to the Project
===========================
We welcome all contributions. For the details, please, visit
-http://wiki.apache.org/pig/HowToContribute.
-
+https://cwiki.apache.org/confluence/display/PIG/HowToContribute
Modified: pig/branches/spark/bin/pig
URL:
http://svn.apache.org/viewvc/pig/branches/spark/bin/pig?rev=1733627&r1=1733626&r2=1733627&view=diff
==============================================================================
--- pig/branches/spark/bin/pig (original)
+++ pig/branches/spark/bin/pig Fri Mar 4 18:17:39 2016
@@ -190,7 +190,7 @@ if [ "$includeHCatalog" == "true" ]; the
HCAT_CLASSPATHS=$hiveMetaStoreVersion:$thriftVersion:$hiveExecVersion:$fbJarVersion:$jdoECJarVersion:$slfJarVersion:$hbaseHiveVersion:$hcatJarPath:$hbaseHCatJarPath:$pigHCatJarPath
ADDITIONAL_CLASSPATHS=file://$hiveMetaStoreVersion,file://$thriftVersion,file://$hiveExecVersion,file://$fbJarVersion,file://$jdoECJarVersion,file://$slfJarVersion,file://$hbaseHiveVersion,file://$hcatJarPath,file://$hbaseHCatJarPath,file://$pigHCatJarPath
if [ "$additionalJars" != "" ]; then
- ADDITIONAL_CLASSPATHS=$ADDITIONAL_CLASSPATHS:$additionalJars
+ ADDITIONAL_CLASSPATHS=$ADDITIONAL_CLASSPATHS,$additionalJars
fi
CLASSPATH=${CLASSPATH}:$HCAT_CLASSPATHS:$HIVE_HOME/conf
fi
@@ -355,7 +355,7 @@ PIG_OPTS="$PIG_OPTS -Dpig.home.dir=$PIG_
if [ "$includeHCatalog" == "true" ]; then
addJars=`echo $PIG_OPTS | awk '{ for (i=1; i<=NF; i++) print $i; }' | grep
"\-Dpig.additional.jars.uris=" | sed s/-Dpig.additional.jars.uris=//`
if [ "$addJars" != "" ]; then
- ADDITIONAL_CLASSPATHS=$addJars:$ADDITIONAL_CLASSPATHS
+ ADDITIONAL_CLASSPATHS=$addJars,$ADDITIONAL_CLASSPATHS
PIG_OPTS=`echo $PIG_OPTS | sed 's/-Dpig.additional.jars.uris=[^ ]*//'`
fi
PIG_OPTS="$PIG_OPTS -Dpig.additional.jars.uris=$ADDITIONAL_CLASSPATHS"
Modified: pig/branches/spark/build.xml
URL:
http://svn.apache.org/viewvc/pig/branches/spark/build.xml?rev=1733627&r1=1733626&r2=1733627&view=diff
==============================================================================
--- pig/branches/spark/build.xml (original)
+++ pig/branches/spark/build.xml Fri Mar 4 18:17:39 2016
@@ -31,7 +31,7 @@
<property name="pigsmoke.pom" value="${basedir}/ivy/pigsmoke.pom" />
<property name="pigunit.pom" value="${basedir}/ivy/pigunit.pom" />
<property name="piggybank.pom" value="${basedir}/ivy/piggybank.pom" />
- <property name="pig.version" value="0.15.0" />
+ <property name="pig.version" value="0.16.0" />
<property name="pig.version.suffix" value="-SNAPSHOT" />
<property name="version" value="${pig.version}${pig.version.suffix}" />
<property name="final.name" value="${name}-${version}" />
@@ -104,13 +104,13 @@
<property name="test.unit.file" value="${test.src.dir}/unit-tests"/>
<property name="test.smoke.file" value="${test.src.dir}/smoke-tests"/>
<property name="test.all.file" value="${test.src.dir}/all-tests"/>
- <property name="test.tez.file" value="${test.src.dir}/tez-tests"/>
- <property name="test.tez_local.file"
value="${test.src.dir}/tez-local-tests"/>
<property name="test.spark.file" value="${test.src.dir}/spark-tests"/>
<property name="test.spark_local.file"
value="${test.src.dir}/spark-local-tests"/>
<property name="test.exclude.file" value="${test.src.dir}/excluded-tests"/>
<property name="test.exclude.file.20"
value="${test.src.dir}/excluded-tests-20"/>
<property name="test.exclude.file.23"
value="${test.src.dir}/excluded-tests-23"/>
+ <property name="test.exclude.file.mr"
value="${test.src.dir}/excluded-tests-mr"/>
+ <property name="test.exclude.file.tez"
value="${test.src.dir}/excluded-tests-tez"/>
<property name="pigunit.jarfile" value="pigunit.jar" />
<property name="piggybank.jarfile"
value="${basedir}/contrib/piggybank/java/piggybank.jar" />
<property name="smoke.tests.jarfile"
value="${build.dir}/${final.name}-smoketests.jar" />
@@ -283,7 +283,7 @@
<property name="xerces.jar"
value="${ivy.lib.dir}/xercesImpl-${xerces.version}.jar"/>
<property name="jdiff.build.dir" value="${build.docs}/jdiff"/>
<property name="jdiff.xml.dir" value="${docs.dir}/jdiff"/>
- <property name="jdiff.stable" value="0.14.0"/>
+ <property name="jdiff.stable" value="0.15.0"/>
<property name="jdiff.stable.javadoc"
value="http://hadoop.apache.org/${name}/docs/r${jdiff.stable}/api/"/>
<!-- Packaging properties -->
@@ -758,6 +758,9 @@
<fileset dir="${ivy.lib.dir}" includes="accumulo-*.jar"
excludes="accumulo-minicluster*.jar"/>
<fileset dir="${ivy.lib.dir}" includes="json-simple-*.jar"/>
<fileset dir="${ivy.lib.dir}" includes="kryo-*.jar"/>
+ <fileset dir="${ivy.lib.dir}"
includes="httpdlog-*-${basjes-httpdlog-pigloader.version}.jar"/>
+ <fileset dir="${ivy.lib.dir}"
includes="parser-core-${basjes-httpdlog-pigloader.version}.jar"/>
+ <fileset dir="${ivy.lib.dir}" includes="ivy-*.jar"/>
</copy>
</target>
@@ -816,22 +819,13 @@
project.setProperty(attributes.get("name"),
attributes.get("value"));
</scriptdef>
- <target name="mvn-jar" description="Create pig.jar for maven deploy for
Hadoop2/23 and Hadoop1/20">
- <propertyreset name="hadoopversion" value="23" />
- <propertyreset name="isHadoop" value="true" />
- <propertyreset name="src.shims.dir"
value="${basedir}/shims/src/hadoop${hadoopversion}" />
- <antcall target="clean" inheritRefs="true" inheritall="true"/>
- <antcall target="jar" inheritRefs="true" inheritall="true"/>
- <!-- Move and rename pig jar for Hadoop2/23 to a different location
before compiling for Hadoop1/20 -->
- <move file="${output.jarfile.core}"
tofile="${basedir}/${artifact.pig-h2.jar}"/>
- <delete dir="${build.dir}" />
- <propertyreset name="hadoopversion" value="20" />
- <propertyreset name="isHadoop" value="" />
- <propertyreset name="src.shims.dir"
value="${basedir}/shims/src/hadoop${hadoopversion}" />
- <antcall target="jar" inheritRefs="true" inheritall="true"/>
- <move file="${output.jarfile.core}"
tofile="${output.jarfile.core-h1}"/>
- <move file="${basedir}/${artifact.pig-h2.jar}"
tofile="${output.jarfile.core-h2}"/>
- </target>
+ <macrodef name="propertycopy">
+ <attribute name="name"/>
+ <attribute name="from"/>
+ <sequential>
+ <propertyreset name="@{name}" value="${@{from}}"/>
+ </sequential>
+ </macrodef>
<!-- ================================================================== -->
<!-- macrodef: buildJar -->
@@ -856,6 +850,7 @@
<zipgroupfileset refid="@{includedJars}" />
<fileset file="${basedir}/src/pig-default.properties" />
<fileset file="${basedir}/src/main/jruby/pigudf.rb" />
+ <fileset file="${basedir}/conf/ivysettings.xml" />
<exclude name="hadoop-site.xml" />
</jar>
</sequential>
@@ -920,15 +915,10 @@
</target>
<target name="test-tez"
depends="setTezEnv,setWindowsPath,setLinuxPath,compile-test,jar,debugger.check,jackson-pig-3039-test-download"
description="Run tez unit tests">
- <macro-test-runner test.file="${test.tez.file}"
tests.failed="test-tez.failed"/>
+ <macro-test-runner test.file="${test.all.file}"
tests.failed="test-tez.failed"/>
<fail if="test-tez.failed">Tests failed!</fail>
</target>
- <target name="test-tez-local"
depends="setTezEnv,setWindowsPath,setLinuxPath,compile-test,jar,debugger.check,jackson-pig-3039-test-download"
description="Run tez local mode unit tests">
- <macro-test-runner test.file="${test.tez_local.file}"
tests.failed="test-tez.failed"/>
- <fail if="test-tez.failed">Tests failed!</fail>
- </target>
-
<target name="test-spark"
depends="setSparkEnv,setWindowsPath,setLinuxPath,compile-test,jar,debugger.check,jackson-pig-3039-test-download"
description="Run Spark unit tests in Spark cluster-local mode">
<macro-test-runner test.file="${test.spark.file}"
tests.failed="test-spark.failed"/>
<fail if="test-spark.failed">Tests failed!</fail>
@@ -957,6 +947,8 @@
<mkdir dir="${test.log.dir}"/>
<tempfile property="junit.tmp.dir" prefix="pig_junit_tmp"
destDir="${java.io.tmpdir}" />
<mkdir dir="${junit.tmp.dir}/"/>
+ <propertycopy name="test.exclude.file.for.exectype"
from="test.exclude.file.${test.exec.type}"/>
+ <echo>Tests in ${test.exclude.file.for.exectype} will be excluded</echo>
<junit showoutput="${test.output}" printsummary="yes"
haltonfailure="no" fork="yes" maxmemory="2048m" dir="${basedir}"
timeout="${test.timeout}" errorProperty="@{tests.failed}"
failureProperty="@{tests.failed}">
<sysproperty key="hadoopversion" value="${hadoopversion}" />
<sysproperty key="test.exec.type" value="${test.exec.type}" />
@@ -989,6 +981,7 @@
<excludesfile name="${test.exclude.file}"
if="test.exclude.file"/>
<excludesfile name="${test.exclude.file.20}"
unless="isHadoop23"/>
<excludesfile name="${test.exclude.file.23}"
if="isHadoop23"/>
+ <excludesfile name="${test.exclude.file.for.exectype}"/>
</patternset>
<exclude name="**/${exclude.testcase}.java"
if="exclude.testcase" />
<exclude name="**/TestRegisteredJarVisibility.java"
if="offline"/>
@@ -1033,7 +1026,7 @@
<echo message="=======================" />
<propertyreset name="test.exec.type" value="tez" />
<propertyreset name="test.log.dir"
value="${test.build.dir}/logs/${test.exec.type}" />
- <macro-test-runner test.file="${test.tez.file}"
tests.failed="test.tez.failed"/>
+ <macro-test-runner test.file="${test.all.file}"
tests.failed="test.tez.failed"/>
<condition property="any.tests.failed">
<or>
<isset property="test.mr.failed"/>
@@ -1295,7 +1288,7 @@
uri="urn:maven-artifact-ant"
classpathref="mvn-ant-task.classpath"/>
</target>
- <target name="mvn-install" depends="mvn-taskdef,mvn-jar, set-version,
source-jar,
+ <target name="mvn-install" depends="mvn-taskdef,jar-h12, set-version,
source-jar,
javadoc-jar, pigunit-jar, smoketests-jar, piggybank"
description="To install pig to local filesystem's m2 cache">
<artifact:pom file="${pig.pom}" id="pig"/>
@@ -1319,9 +1312,11 @@
</artifact:install>
</target>
- <target name="mvn-build" depends="mvn-jar, source-jar,
+ <target name="mvn-build" depends="jar-h12, source-jar,
javadoc-jar, smoketests-jar,
pigunit-jar, piggybank"
description="To build the pig jar artifacts to be deployed to apache
maven repository">
+ <move file="${output.jarfile.backcompat-core-h1}"
tofile="${output.jarfile.core}"/>
+ <move file="${output.jarfile.backcompat-core-h2}"
tofile="${output.jarfile.core-h2}"/>
</target>
<!-- Expects that mvn-build has already been run but does not run it. In
some cases building
@@ -1375,7 +1370,7 @@
<target name="simpledeploy" unless="staging">
<artifact:pom file="${pig.pom}" id="pig"/>
<artifact:install-provider artifactId="wagon-http"
version="${wagon-http.version}"/>
- <artifact:deploy file="${output.jarfile.core-h1}">
+ <artifact:deploy file="${output.jarfile.core}">
<remoteRepository id="${snapshots_repo_id}"
url="${asfsnapshotrepo}"/>
<pom refid="pig"/>
<attach file="${output.jarfile.core-h2}" classifier="h2" />
Modified: pig/branches/spark/conf/pig.properties
URL:
http://svn.apache.org/viewvc/pig/branches/spark/conf/pig.properties?rev=1733627&r1=1733626&r2=1733627&view=diff
==============================================================================
--- pig/branches/spark/conf/pig.properties (original)
+++ pig/branches/spark/conf/pig.properties Fri Mar 4 18:17:39 2016
@@ -91,6 +91,8 @@
#
# * mapreduce (default): use the Hadoop cluster defined in your Hadoop config
files
# * local: use local mode
+# * tez: use Tez on Hadoop cluster
+# * tez_local: use Tez local mode
#
# exectype=mapreduce
@@ -150,11 +152,22 @@
#
# pig.user.cache.location=/tmp
+# Replication factor for cached jars. If not specified
mapred.submit.replication
+# is used, whose default is 10.
+#
+# pig.user.cache.replication=10
+
# Default UTC offset. (default: the host's current UTC offset) Supply a UTC
# offset in Java's timezone format: e.g., +08:00.
#
# pig.datetime.default.tz=
+# Path to download the artifacts when registering ivy coordinates. This
defaults
+# to the directory grape uses for downloading libraries.
+# (default: ~/.groovy/grapes)
+#
+# pig.artifacts.download.location=
+
############################################################################
#
# Memory impacting properties
@@ -370,6 +383,14 @@
#
# pig.auto.local.input.maxbytes=100000000
+
+#
+# Should use hadoop's BZipCodec for bzip2 input? (for PigStorage and
TextLoader)
+# Only available for hadoop 2.X and after and ignored for others.(Default:
true)
+#
+# pig.bzip.use.hadoop.inputformat=true
+
+
############################################################################
#
# Security Features
@@ -561,10 +582,21 @@ hcat.bin=/usr/local/hcat/bin/hcat
#
# opt.fetch=true
-# Enable auto parallelism in tez. This should be used by default unless
-# you encounter some bug in automatic parallelism. If set to false, use 1 as
-# default parallelism
-pig.tez.auto.parallelism=true
+#########################################################################
+#
+# Error Handling Properties
+#
+# By default, Pig job fails immediately on encountering an errors on writing
Tuples for Store.
+# If you want Pig to allow certain errors before failing you can set this
property.
+# If the propery is set to true and the StoreFunc implements ErrorHandling if
will allow configurable errors
+# based on the OutputErrorHandler implementation
+# pig.allow.store.errors = false
+#
+# Controls the minimum number of errors for store
+# pig.errors.min.records = 0
+#
+# Set the threshold for percentage of errors
+# pig.error.threshold.percent = 0.0f
###########################################################################
#
@@ -592,3 +624,33 @@ pig.tez.auto.parallelism=true
# python2.7.
#
# pig.streaming.udf.python.command=python
+
+###########################################################################
+#
+# Tez specific properties
+#
+
+# Enable auto/grace parallelism in tez. Default is true and these should be
+# used by default unless you encounter some bug in automatic parallelism.
+# If pig.tez.auto.parallelism is set to false, 1 is used as default parallelism
+
+#pig.tez.auto.parallelism=true
+#pig.tez.grace.parallelism=true
+
+# Union optimization (pig.tez.opt.union=true) in tez uses vertex groups to
store
+# output from different vertices into one final output location.
+# If a StoreFunc's OutputCommitter does not work with multiple vertices
+# writing to same location, then you can disable union optimization just
+# for that StoreFunc. Refer PIG-4649. You can also specify a whitelist of
StoreFuncs
+# that are known to work with multiple vertices writing to same location
instead of a blacklist
+
+#pig.tez.opt.union.unsupported.storefuncs=org.apache.hcatalog.pig.HCatStorer,org.apache.hive.hcatalog.pig.HCatStorer
+#pig.tez.opt.union.supported.storefuncs=
+
+
+# Pig only reads once from datasource for LoadFuncs specified here during sort
instead of
+# loading once for sampling and loading again for partitioning.
+# Used to avoid hitting external non-filesystem datasources like HBase and
Accumulo twice.
+
+pig.sort.readonce.loadfuncs=org.apache.pig.backend.hadoop.hbase.HBaseStorage,org.apache.pig.backend.hadoop.accumulo.AccumuloStorage
+
Modified:
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/IsInt.java
URL:
http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/IsInt.java?rev=1733627&r1=1733626&r2=1733627&view=diff
==============================================================================
---
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/IsInt.java
(original)
+++
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/IsInt.java
Fri Mar 4 18:17:39 2016
@@ -28,7 +28,7 @@ import org.apache.pig.impl.logicalLayer.
/**
* This UDF is used to check whether the String input is an Integer.
- * Note this function checks for Integer range â2,147,483,648 to
2,147,483,647.
+ * Note this function checks for Integer range -2,147,483,648 to 2,147,483,647.
* If range is not important, use IsNumeric instead if you would like to check
if a String is numeric.
* Also IsNumeric performs slightly better compared to this function.
*/
Modified:
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/RANDOM.java
URL:
http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/RANDOM.java?rev=1733627&r1=1733626&r2=1733627&view=diff
==============================================================================
---
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/RANDOM.java
(original)
+++
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/RANDOM.java
Fri Mar 4 18:17:39 2016
@@ -18,21 +18,5 @@
package org.apache.pig.piggybank.evaluation.math;
-import java.io.IOException;
-
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.impl.logicalLayer.schema.Schema;
-import org.apache.pig.data.DataType;
-
-public class RANDOM extends EvalFunc<Double>{
-
- public Double exec(Tuple input) throws IOException {
- return Math.random();
- }
-
- @Override
- public Schema outputSchema(Schema input) {
- return new Schema(new
Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(),
input), DataType.DOUBLE));
- }
+public class RANDOM extends org.apache.pig.builtin.RANDOM {
}
Modified:
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java
URL:
http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java?rev=1733627&r1=1733626&r2=1733627&view=diff
==============================================================================
---
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java
(original)
+++
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java
Fri Mar 4 18:17:39 2016
@@ -305,6 +305,7 @@ public class CSVExcelStorage extends Pig
// Substitute a null value with an empty string. See PIG-2470.
if (field == null) {
+ fieldStr = null;
mProtoTuple.add("");
continue;
}
@@ -669,6 +670,11 @@ public class CSVExcelStorage extends Pig
this.udfContextSignature = signature;
}
+ @Override
+ public void setStoreFuncUDFContextSignature(String signature) {
+ this.udfContextSignature = signature;
+ }
+
@Override
public List<OperatorSet> getFeatures() {
return Arrays.asList(LoadPushDown.OperatorSet.PROJECTION);
Modified:
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/allloader/LoadFuncHelper.java
URL:
http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/allloader/LoadFuncHelper.java?rev=1733627&r1=1733626&r2=1733627&view=diff
==============================================================================
---
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/allloader/LoadFuncHelper.java
(original)
+++
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/allloader/LoadFuncHelper.java
Fri Mar 4 18:17:39 2016
@@ -162,7 +162,7 @@ public class LoadFuncHelper {
path = getFirstFile(fileSystem, path);
if (path == null) {
- throw new FrontendException(path + " has no files");
+ throw new FrontendException(location + " has no files");
}
}
Modified:
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorage.java
URL:
http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorage.java?rev=1733627&r1=1733626&r2=1733627&view=diff
==============================================================================
---
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorage.java
(original)
+++
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorage.java
Fri Mar 4 18:17:39 2016
@@ -186,18 +186,20 @@ public class AvroStorage extends FileInp
}
}
- Configuration conf = job.getConfiguration();
- Set<Path> paths = AvroStorageUtils.getPaths(location, conf, true);
- if (!paths.isEmpty()) {
- // Set top level directories in input format. Adding all files will
- // bloat configuration size
- FileInputFormat.setInputPaths(job, paths.toArray(new
Path[paths.size()]));
- // Scan all directories including sub directories for schema
- if (inputAvroSchema == null) {
- setInputAvroSchema(paths, conf);
+ if (inputAvroSchema == null ||
UDFContext.getUDFContext().isFrontend()) {
+ Configuration conf = job.getConfiguration();
+ Set<Path> paths = AvroStorageUtils.getPaths(location, conf, true);
+ if (!paths.isEmpty()) {
+ // Set top level directories in input format. Adding all files
will
+ // bloat configuration size
+ FileInputFormat.setInputPaths(job, paths.toArray(new
Path[paths.size()]));
+ // Scan all directories including sub directories for schema
+ if (inputAvroSchema == null) {
+ setInputAvroSchema(paths, conf);
+ }
+ } else {
+ throw new IOException("Input path \'" + location + "\' is not
found");
}
- } else {
- throw new IOException("Input path \'" + location + "\' is not
found");
}
}
Modified:
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroRecordReader.java
URL:
http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroRecordReader.java?rev=1733627&r1=1733626&r2=1733627&view=diff
==============================================================================
---
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroRecordReader.java
(original)
+++
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroRecordReader.java
Fri Mar 4 18:17:39 2016
@@ -194,30 +194,43 @@ public class PigAvroRecordReader extends
@Override
public Writable getCurrentValue() throws IOException, InterruptedException
{
- Object obj = reader.next();
- Tuple result = null;
- if (obj instanceof Tuple) {
- AvroStorageLog.details("Class =" + obj.getClass());
- result = (Tuple) obj;
- } else {
- if (obj != null) {
- AvroStorageLog.details("Wrap class " + obj.getClass() + " as a
tuple.");
+ try {
+ Object obj = reader.next();
+ Tuple result = null;
+ if (obj instanceof Tuple) {
+ AvroStorageLog.details("Class =" + obj.getClass());
+ result = (Tuple) obj;
+ } else {
+ if (obj != null) {
+ AvroStorageLog.details("Wrap class " + obj.getClass() + "
as a tuple.");
+ }
+ else {
+ AvroStorageLog.details("Wrap null as a tuple.");
+ }
+ result = wrapAsTuple(obj);
}
- else {
- AvroStorageLog.details("Wrap null as a tuple.");
+ if (schemaToMergedSchemaMap != null) {
+ // remap the position of fields to the merged schema
+ Map<Integer, Integer> map = schemaToMergedSchemaMap.get(path);
+ if (map == null) {
+ throw new IOException("The schema of '" + path + "' " +
+ "is not merged by AvroStorage.");
+ }
+ result = remap(result, map);
}
- result = wrapAsTuple(obj);
+ return result;
}
- if (schemaToMergedSchemaMap != null) {
- // remap the position of fields to the merged schema
- Map<Integer, Integer> map = schemaToMergedSchemaMap.get(path);
- if (map == null) {
- throw new IOException("The schema of '" + path + "' " +
- "is not merged by AvroStorage.");
+ catch(Exception e) {
+ if (ignoreBadFiles) {
+ LOG.warn("Ignoring bad record for '" + path + "'.");
+ return null;
+ }
+ else {
+ //re-throw exception
+ LOG.error("Bad record for '" + path + "'.");
+ throw new IOException(e);
}
- result = remap(result, map);
}
- return result;
}
/**