Author: praveen
Date: Tue Jan 27 02:27:45 2015
New Revision: 1654955
URL: http://svn.apache.org/r1654955
Log:
PIG-4398: Merge from trunk (2) [Spark Branch] (praveen)
Added:
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPathAll.java
- copied unchanged from r1654952,
pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPathAll.java
pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/xml/XPathAllTest.java
- copied unchanged from r1654952,
pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/xml/XPathAllTest.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/partitioners/RollupHIIPartitioner.java
- copied unchanged from r1654952,
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/partitioners/RollupHIIPartitioner.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/PORollupHIIForEach.java
- copied unchanged from r1654952,
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/PORollupHIIForEach.java
pig/branches/spark/src/org/apache/pig/builtin/RoundRobinPartitioner.java
- copied unchanged from r1654952,
pig/trunk/src/org/apache/pig/builtin/RoundRobinPartitioner.java
pig/branches/spark/src/org/apache/pig/newplan/logical/relational/LORollupHIIForEach.java
- copied unchanged from r1654952,
pig/trunk/src/org/apache/pig/newplan/logical/relational/LORollupHIIForEach.java
pig/branches/spark/src/org/apache/pig/newplan/logical/rules/RollupHIIOptimizer.java
- copied unchanged from r1654952,
pig/trunk/src/org/apache/pig/newplan/logical/rules/RollupHIIOptimizer.java
pig/branches/spark/test/e2e/pig/tests/utf8.conf
- copied unchanged from r1654952, pig/trunk/test/e2e/pig/tests/utf8.conf
pig/branches/spark/test/e2e/pig/udfs/java/org/apache/pig/test/udf/evalfunc/ENCODE.java
- copied unchanged from r1654952,
pig/trunk/test/e2e/pig/udfs/java/org/apache/pig/test/udf/evalfunc/ENCODE.java
pig/branches/spark/test/org/apache/pig/builtin/avro/data/json/recordsWithMapsOfArrayOfRecords.json
- copied unchanged from r1654952,
pig/trunk/test/org/apache/pig/builtin/avro/data/json/recordsWithMapsOfArrayOfRecords.json
pig/branches/spark/test/org/apache/pig/builtin/avro/schema/recordsWithMapsOfArrayOfRecords.avsc
- copied unchanged from r1654952,
pig/trunk/test/org/apache/pig/builtin/avro/schema/recordsWithMapsOfArrayOfRecords.avsc
pig/branches/spark/test/org/apache/pig/test/TestJoinBase.java
- copied unchanged from r1654952,
pig/trunk/test/org/apache/pig/test/TestJoinBase.java
pig/branches/spark/test/org/apache/pig/test/TestJoinLocal.java
- copied unchanged from r1654952,
pig/trunk/test/org/apache/pig/test/TestJoinLocal.java
pig/branches/spark/test/org/apache/pig/test/TestPOPartialAggPlanMR.java
- copied unchanged from r1654952,
pig/trunk/test/org/apache/pig/test/TestPOPartialAggPlanMR.java
pig/branches/spark/test/org/apache/pig/test/TestPigServerLocal.java
- copied unchanged from r1654952,
pig/trunk/test/org/apache/pig/test/TestPigServerLocal.java
pig/branches/spark/test/org/apache/pig/test/TestPigServerWithMacrosRemote.java
- copied unchanged from r1654952,
pig/trunk/test/org/apache/pig/test/TestPigServerWithMacrosRemote.java
pig/branches/spark/test/org/apache/pig/test/TestPigStatsMR.java
- copied unchanged from r1654952,
pig/trunk/test/org/apache/pig/test/TestPigStatsMR.java
pig/branches/spark/test/org/apache/pig/test/TestScalarAliasesLocal.java
- copied unchanged from r1654952,
pig/trunk/test/org/apache/pig/test/TestScalarAliasesLocal.java
pig/branches/spark/test/org/apache/pig/test/TestStoreBase.java
- copied unchanged from r1654952,
pig/trunk/test/org/apache/pig/test/TestStoreBase.java
pig/branches/spark/test/org/apache/pig/test/TestStoreLocal.java
- copied unchanged from r1654952,
pig/trunk/test/org/apache/pig/test/TestStoreLocal.java
pig/branches/spark/test/org/apache/pig/tez/TestPOPartialAggPlanTez.java
- copied unchanged from r1654952,
pig/trunk/test/org/apache/pig/tez/TestPOPartialAggPlanTez.java
pig/branches/spark/test/org/apache/pig/tez/TestPigStatsTez.java
- copied unchanged from r1654952,
pig/trunk/test/org/apache/pig/tez/TestPigStatsTez.java
pig/branches/spark/test/org/apache/pig/tez/TezUtil.java
- copied unchanged from r1654952,
pig/trunk/test/org/apache/pig/tez/TezUtil.java
Removed:
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/RoundRobinPartitioner.java
Modified:
pig/branches/spark/ (props changed)
pig/branches/spark/CHANGES.txt
pig/branches/spark/build.xml
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java
pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/xml/XPathTest.java
pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java
pig/branches/spark/ivy.xml
pig/branches/spark/ivy/libraries.properties
pig/branches/spark/shims/test/hadoop23/org/apache/pig/test/MiniCluster.java
pig/branches/spark/shims/test/hadoop23/org/apache/pig/test/TezMiniCluster.java
pig/branches/spark/src/docs/src/documentation/content/xdocs/test.xml
pig/branches/spark/src/org/apache/pig/Main.java
pig/branches/spark/src/org/apache/pig/PigConstants.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigCombiner.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigGenericMapReduce.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POUserFunc.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhyPlanVisitor.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POPackage.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POReservoirSample.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/util/PlanHelper.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/TezExecutionEngine.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/TezJob.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/TezLauncher.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezOperator.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezPrinter.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/operator/POShuffledValueInputTez.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/operator/POValueOutputTez.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/optimizer/MultiQueryOptimizerTez.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/optimizer/UnionOptimizer.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/ObjectCache.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/PigProcessor.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/tez/util/TezCompilerUtil.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java
pig/branches/spark/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java
pig/branches/spark/src/org/apache/pig/builtin/JsonLoader.java
pig/branches/spark/src/org/apache/pig/builtin/RollupDimensions.java
pig/branches/spark/src/org/apache/pig/builtin/Utf8StorageConverter.java
pig/branches/spark/src/org/apache/pig/builtin/mock/Storage.java
pig/branches/spark/src/org/apache/pig/impl/plan/DependencyOrderWalker.java
pig/branches/spark/src/org/apache/pig/impl/util/avro/AvroStorageSchemaConversionUtilities.java
pig/branches/spark/src/org/apache/pig/newplan/logical/expression/DereferenceExpression.java
pig/branches/spark/src/org/apache/pig/newplan/logical/expression/ExpToPhyTranslationVisitor.java
pig/branches/spark/src/org/apache/pig/newplan/logical/expression/UserFuncExpression.java
pig/branches/spark/src/org/apache/pig/newplan/logical/optimizer/LogicalPlanOptimizer.java
pig/branches/spark/src/org/apache/pig/newplan/logical/relational/LOCogroup.java
pig/branches/spark/src/org/apache/pig/newplan/logical/relational/LOCube.java
pig/branches/spark/src/org/apache/pig/newplan/logical/relational/LogToPhyTranslationVisitor.java
pig/branches/spark/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java
pig/branches/spark/src/org/apache/pig/newplan/logical/relational/LogicalRelationalNodesVisitor.java
pig/branches/spark/src/org/apache/pig/newplan/logical/rules/OptimizerUtils.java
pig/branches/spark/src/org/apache/pig/parser/AliasMasker.g
pig/branches/spark/src/org/apache/pig/parser/AstPrinter.g
pig/branches/spark/src/org/apache/pig/parser/AstValidator.g
pig/branches/spark/src/org/apache/pig/parser/DryRunGruntParser.java
pig/branches/spark/src/org/apache/pig/parser/LogicalPlanBuilder.java
pig/branches/spark/src/org/apache/pig/parser/LogicalPlanGenerator.g
pig/branches/spark/src/org/apache/pig/parser/QueryLexer.g
pig/branches/spark/src/org/apache/pig/parser/QueryParser.g
pig/branches/spark/src/org/apache/pig/tools/grunt/GruntParser.java
pig/branches/spark/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj
pig/branches/spark/src/org/apache/pig/tools/pigstats/tez/TezScriptState.java
pig/branches/spark/test/e2e/harness/build.xml
pig/branches/spark/test/e2e/harness/xmlReport.pl
pig/branches/spark/test/e2e/pig/build.xml
pig/branches/spark/test/e2e/pig/deployers/ExistingClusterDeployer.pm
pig/branches/spark/test/e2e/pig/drivers/TestDriverPig.pm
pig/branches/spark/test/e2e/pig/tests/bigdata.conf
pig/branches/spark/test/e2e/pig/tests/multiquery.conf
pig/branches/spark/test/e2e/pig/tests/nightly.conf
pig/branches/spark/test/e2e/pig/tools/generate/generate_data.pl
pig/branches/spark/test/excluded-tests-20
pig/branches/spark/test/org/apache/pig/TestLoadStoreFuncLifeCycle.java
pig/branches/spark/test/org/apache/pig/builtin/TestAvroStorage.java
pig/branches/spark/test/org/apache/pig/builtin/TestOrcStorage.java
pig/branches/spark/test/org/apache/pig/builtin/TestOrcStoragePushdown.java
pig/branches/spark/test/org/apache/pig/builtin/TestPluckTuple.java
pig/branches/spark/test/org/apache/pig/builtin/mock/TestMockStorage.java
pig/branches/spark/test/org/apache/pig/impl/builtin/TestStreamingUDF.java
pig/branches/spark/test/org/apache/pig/newplan/logical/optimizer/TestImplicitSplitOnTuple.java
pig/branches/spark/test/org/apache/pig/newplan/logical/optimizer/TestSchemaResetter.java
pig/branches/spark/test/org/apache/pig/newplan/logical/relational/TestLocationInPhysicalPlan.java
pig/branches/spark/test/org/apache/pig/parser/TestColumnAliasConversion.java
pig/branches/spark/test/org/apache/pig/parser/TestLogicalPlanGenerator.java
pig/branches/spark/test/org/apache/pig/pigunit/PigTest.java
pig/branches/spark/test/org/apache/pig/test/MiniGenericCluster.java
pig/branches/spark/test/org/apache/pig/test/TestAlgebraicEvalLocal.java
pig/branches/spark/test/org/apache/pig/test/TestAssert.java
pig/branches/spark/test/org/apache/pig/test/TestAutoLocalMode.java
pig/branches/spark/test/org/apache/pig/test/TestBatchAliases.java
pig/branches/spark/test/org/apache/pig/test/TestBlackAndWhitelistValidator.java
pig/branches/spark/test/org/apache/pig/test/TestBuiltInBagToTupleOrString.java
pig/branches/spark/test/org/apache/pig/test/TestBuiltin.java
pig/branches/spark/test/org/apache/pig/test/TestBuiltinInvoker.java
pig/branches/spark/test/org/apache/pig/test/TestCase.java
pig/branches/spark/test/org/apache/pig/test/TestCommit.java
pig/branches/spark/test/org/apache/pig/test/TestConversions.java
pig/branches/spark/test/org/apache/pig/test/TestCubeOperator.java
pig/branches/spark/test/org/apache/pig/test/TestDataBagAccess.java
pig/branches/spark/test/org/apache/pig/test/TestEvalPipelineLocal.java
pig/branches/spark/test/org/apache/pig/test/TestFetch.java
pig/branches/spark/test/org/apache/pig/test/TestFilterOpNumeric.java
pig/branches/spark/test/org/apache/pig/test/TestFilterOpString.java
pig/branches/spark/test/org/apache/pig/test/TestForEachNestedPlan.java
pig/branches/spark/test/org/apache/pig/test/TestForEachNestedPlanLocal.java
pig/branches/spark/test/org/apache/pig/test/TestForEachStar.java
pig/branches/spark/test/org/apache/pig/test/TestHBaseStorage.java
pig/branches/spark/test/org/apache/pig/test/TestIn.java
pig/branches/spark/test/org/apache/pig/test/TestInfixArithmetic.java
pig/branches/spark/test/org/apache/pig/test/TestInputOutputFileValidator.java
pig/branches/spark/test/org/apache/pig/test/TestJoin.java
pig/branches/spark/test/org/apache/pig/test/TestJsonLoaderStorage.java
pig/branches/spark/test/org/apache/pig/test/TestLOLoadDeterminedSchema.java
pig/branches/spark/test/org/apache/pig/test/TestLimitSchemaStore.java
pig/branches/spark/test/org/apache/pig/test/TestLoad.java
pig/branches/spark/test/org/apache/pig/test/TestLocal.java
pig/branches/spark/test/org/apache/pig/test/TestLocal2.java
pig/branches/spark/test/org/apache/pig/test/TestMacroExpansion.java
pig/branches/spark/test/org/apache/pig/test/TestMultiQuery.java
pig/branches/spark/test/org/apache/pig/test/TestMultiQueryBasic.java
pig/branches/spark/test/org/apache/pig/test/TestMultiQueryLocal.java
pig/branches/spark/test/org/apache/pig/test/TestNestedForeach.java
pig/branches/spark/test/org/apache/pig/test/TestNewPlanColumnPrune.java
pig/branches/spark/test/org/apache/pig/test/TestNullConstant.java
pig/branches/spark/test/org/apache/pig/test/TestOrderBy3.java
pig/branches/spark/test/org/apache/pig/test/TestPONegative.java
pig/branches/spark/test/org/apache/pig/test/TestPOPartialAggPlan.java
pig/branches/spark/test/org/apache/pig/test/TestParamSubPreproc.java
pig/branches/spark/test/org/apache/pig/test/TestParser.java
pig/branches/spark/test/org/apache/pig/test/TestPigContext.java
pig/branches/spark/test/org/apache/pig/test/TestPigRunner.java
pig/branches/spark/test/org/apache/pig/test/TestPigServer.java
pig/branches/spark/test/org/apache/pig/test/TestPigServerWithMacros.java
pig/branches/spark/test/org/apache/pig/test/TestPigSplit.java
pig/branches/spark/test/org/apache/pig/test/TestPigStats.java
pig/branches/spark/test/org/apache/pig/test/TestPigStorage.java
pig/branches/spark/test/org/apache/pig/test/TestPoissonSampleLoader.java
pig/branches/spark/test/org/apache/pig/test/TestProject.java
pig/branches/spark/test/org/apache/pig/test/TestProjectStarExpander.java
pig/branches/spark/test/org/apache/pig/test/TestPruneColumn.java
pig/branches/spark/test/org/apache/pig/test/TestRank1.java
pig/branches/spark/test/org/apache/pig/test/TestRank2.java
pig/branches/spark/test/org/apache/pig/test/TestRank3.java
pig/branches/spark/test/org/apache/pig/test/TestRegisteredJarVisibility.java
pig/branches/spark/test/org/apache/pig/test/TestRelationToExprProject.java
pig/branches/spark/test/org/apache/pig/test/TestScalarAliases.java
pig/branches/spark/test/org/apache/pig/test/TestScriptingLanguagePython.java
pig/branches/spark/test/org/apache/pig/test/TestSplit.java
pig/branches/spark/test/org/apache/pig/test/TestSplitIndex.java
pig/branches/spark/test/org/apache/pig/test/TestStore.java
pig/branches/spark/test/org/apache/pig/test/TestStoreInstances.java
pig/branches/spark/test/org/apache/pig/test/TestStreaming.java
pig/branches/spark/test/org/apache/pig/test/TestStreamingLocal.java
pig/branches/spark/test/org/apache/pig/test/TestTypedMap.java
pig/branches/spark/test/org/apache/pig/test/TestUDF.java
pig/branches/spark/test/org/apache/pig/test/TestUDFContext.java
pig/branches/spark/test/org/apache/pig/test/TestUDFGroovy.java
pig/branches/spark/test/org/apache/pig/test/TestUDFWithoutParameter.java
pig/branches/spark/test/org/apache/pig/test/TestUTF8.java
pig/branches/spark/test/org/apache/pig/test/TestUnion.java
pig/branches/spark/test/org/apache/pig/test/TestUnionOnSchema.java
pig/branches/spark/test/org/apache/pig/test/Util.java
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Limit-2.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Limit-3.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-MQ-1-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-MQ-2-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-MQ-2.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-MQ-3.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-MQ-4-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-MQ-5-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Order-1.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Rank-1.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Rank-2.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-1.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-1-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-10-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-2-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-2.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-4-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-7-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-7.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-8-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-8.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-9-OPTOFF.gld
pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-9.gld
pig/branches/spark/test/org/apache/pig/test/pigunit/TestPigTest.java
pig/branches/spark/test/org/apache/pig/test/pigunit/pig/TestGruntParser.java
pig/branches/spark/test/org/apache/pig/tez/TestTezJobControlCompiler.java
pig/branches/spark/test/org/apache/pig/tools/grunt/TestGruntParser.java
pig/branches/spark/test/tez-local-tests
pig/branches/spark/test/tez-tests
Propchange: pig/branches/spark/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue Jan 27 02:27:45 2015
@@ -1,2 +1,2 @@
/hadoop/pig/branches/multiquery:741727-770826
-/pig/trunk:1621676-1642118
+/pig/trunk:1621676-1654952
Modified: pig/branches/spark/CHANGES.txt
URL:
http://svn.apache.org/viewvc/pig/branches/spark/CHANGES.txt?rev=1654955&r1=1654954&r2=1654955&view=diff
==============================================================================
--- pig/branches/spark/CHANGES.txt (original)
+++ pig/branches/spark/CHANGES.txt Tue Jan 27 02:27:45 2015
@@ -24,10 +24,78 @@ INCOMPATIBLE CHANGES
IMPROVEMENTS
+PIG-2692: Make the Pig unit faciliities more generalizable and update javadocs
(razsapps via daijy)
+
+PIG-4379: Make RoundRobinPartitioner public (daijy)
+
+PIG-4378: Better way to fix tez local mode test hanging (daijy)
+
+PIG-4358: Add test cases for utf8 chinese in Pig (nmaheshwari via daijy)
+
+PIG-4370: HBaseStorage should support delete markers (bridiver via daijy)
+
+PIG-4360: HBaseStorage should support setting the timestamp field (bridiver
via daijy)
+
+PIG-4337: Split Types and MultiQuery e2e tests into multiple groups (rohini)
+
+PIG-4066: An optimization for ROLLUP operation in Pig (hxquangnhat via
cheolsoo)
+
PIG-4333: Split BigData tests into multiple groups (rohini)
BUG FIXES
+PIG-4387: Honor yarn settings in tez-site.xml and optimize dag status fetch
(rohini)
+
+PIG-4352: Port local mode tests to Tez - TestUnionOnSchema (daijy)
+
+PIG-4359: Port local mode tests to Tez - part4 (daijy)
+
+PIG-4340: PigStorage fails parsing empty map (daijy)
+
+PIG-4366: Port local mode tests to Tez - part5 (daijy)
+
+PIG-4381: PIG grunt shell DEFINE commands fails when it spans multiple lines
(daijy)
+
+PIG-4384: TezLauncher thread should be deamon thread (zjffdu via daijy)
+
+PIG-4376: NullPointerException accessing a field of an invalid bag from a
nested foreach
+ (kspringborn via daijy)
+
+PIG-4355: Piggybank: XPath cant handle namespace in xpath, nor can it return
more than one match
+ (cavanaug via daijy)
+
+PIG-4371: Duplicate snappy.version in libraries.properties (daijy)
+
+PIG-4368: Port local mode tests to Tez - TestLoadStoreFuncLifeCycle (daijy)
+
+PIG-4367: Port local mode tests to Tez - TestMultiQueryBasic (daijy)
+
+PIG-4339: e2e test framework assumes default exectype as mapred (rohini)
+
+PIG-2949: JsonLoader only reads arrays of objects (eyal via daijy)
+
+PIG-4213: CSVExcelStorage not quoting texts containing \r (CR) when storing
(alfonso.nishikawa via daijy)
+
+PIG-2647: Split Combining drops splits with empty getLocations() (tmwoodruff
via daijy)
+
+PIG-4294: Enable unit test "TestNestedForeach" for spark (kellyzly via rohini)
+
+PIG-4282: Enable unit test "TestForEachNestedPlan" for spark (kellyzly via
rohini)
+
+PIG-4361: Fix perl script problem in TestStreaming.java (kellyzly via xuefu)
+
+PIG-4354: Port local mode tests to Tez - part3 (daijy)
+
+PIG-4338: Fix test failures with JDK8 (rohini)
+
+PIG-4351: TestPigRunner.simpleTest2 fail on trunk (daijy)
+
+PIG-4350: Port local mode tests to Tez - part2 (daijy)
+
+PIG-4326: AvroStorageSchemaConversionUtilities does not properly convert
schema for maps of arrays of records (mprim via daijy)
+
+PIG-4345: e2e test "RubyUDFs_13" fails because of the different result of
"group a all" in different engines like "spark", "mapreduce" (kellyzly via
rohini)
+
PIG-4332: Remove redundant jars packaged into pig-withouthadoop.jar for hadoop
2 (rohini)
PIG-4331: update README, '-x' option in usage to include tez (thejas via daijy)
@@ -55,6 +123,8 @@ IMPROVEMENTS
BUG FIXES
+PIG-4375: ObjectCache should use ProcessorContext.getObjectRegistry() (rohini)
+
PIG-4334: PigProcessor does not set pig.datetime.default.tz (rohini)
PIG-4342: Pig 0.14 cannot identify the uppercase of DECLARE and DEFAULT (daijy)
Modified: pig/branches/spark/build.xml
URL:
http://svn.apache.org/viewvc/pig/branches/spark/build.xml?rev=1654955&r1=1654954&r2=1654955&view=diff
==============================================================================
--- pig/branches/spark/build.xml (original)
+++ pig/branches/spark/build.xml Tue Jan 27 02:27:45 2015
@@ -61,7 +61,7 @@
<property name="javac.debug" value="on" />
<property name="javac.optimize" value="on" />
<property name="javac.deprecation" value="off" />
- <property name="javac.version" value="1.5" />
+ <property name="javac.version" value="1.7" />
<property name="javac.args" value="" />
<condition property="javac.args.warnings" value="-Xmaxwarns 1000000 -Xlint
-Xlint:-deprecation" else="-Xmaxwarns 1000000">
<isset property="all.warnings" />
Modified:
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java
URL:
http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java?rev=1654955&r1=1654954&r2=1654955&view=diff
==============================================================================
---
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java
(original)
+++
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java
Tue Jan 27 02:27:45 2015
@@ -38,113 +38,203 @@ import org.xml.sax.InputSource;
*/
public class XPath extends EvalFunc<String> {
- /** Hold onto last xpath & xml in case the next call to xpath() is feeding
the same xml document
- * The reason for this is because creating an xpath object is costly. */
+ /**
+ * Hold onto last xpath & xml in case the next call to xpath() is feeding
+ * the same xml document The reason for this is because creating an xpath
+ * object is costly.
+ */
private javax.xml.xpath.XPath xpath = null;
private String xml = null;
private Document document;
private static boolean cache = true;
+ private static boolean ignoreNamespace = true;
+ public static final String EMPTY_STRING = "";
/**
- * input should contain: 1) xml 2) xpath 3) optional cache xml doc flag
+ * input should contain: 1) xml 2) xpath
+ * 3) optional cache xml doc flag
+ * 4) optional ignore namespace flag
*
* Usage:
* 1) XPath(xml, xpath)
* 2) XPath(xml, xpath, false)
+ * 3) XPath(xml, xpath, false, false)
*
- * @param 1st element should to be the xml
+ * @param input
+ * 1st element should to be the xml
* 2nd element should be the xpath
* 3rd optional boolean cache flag (default true)
+ * 4th optional boolean ignore namespace flag (default true)
+ *
+ *
+ * This UDF will cache the last xml document. This is helpful when
+ * multiple consecutive xpathAll calls are made for the same xml
+ * document. Caching can be turned off to ensure that the UDF's
+ * recreates the internal javax.xml.xpath.XPathAll for every call
*
- * This UDF will cache the last xml document. This is helpful when
multiple consecutive xpath calls are made for the same xml document.
- * Caching can be turned off to ensure that the UDF's recreates the
internal javax.xml.xpath.XPath for every call
+ * This UDF will also support ignoring the namespace in the xml
tags.
+ * This will help to search xpath items by ignoring its namespace.
+ * Ignoring of the namespace can be turned off for special cases
using
+ * a fourth argument in the UDF.
*
* @return chararrary result or null if no match
*/
@Override
public String exec(final Tuple input) throws IOException {
- if (input == null || input.size() <= 1) {
- warn("Error processing input, not enough parameters or null input"
+ input,
- PigWarning.UDF_WARNING_1);
- return null;
- }
-
-
- if (input.size() > 3) {
- warn("Error processing input, too many parameters" + input,
- PigWarning.UDF_WARNING_1);
+ if (!isArgsValid(input)) { // Validate arguments
return null;
}
try {
final String xml = (String) input.get(0);
+
if (xml == null) {
return null;
}
if(input.size() > 2)
cache = (Boolean) input.get(2);
-
- if(!cache || xpath == null || !xml.equals(this.xml))
- {
+
+ if (!cache || xpath == null || !xml.equals(this.xml)) {
final InputSource source = new InputSource(new
StringReader(xml));
-
- this.xml = xml; //track the xml for subsequent calls to this
udf
+
+ this.xml = xml; // track the xml for subsequent calls to this
udf
final DocumentBuilderFactory dbf =
DocumentBuilderFactory.newInstance();
final DocumentBuilder db = dbf.newDocumentBuilder();
-
+
this.document = db.parse(source);
final XPathFactory xpathFactory = XPathFactory.newInstance();
this.xpath = xpathFactory.newXPath();
-
+
+ }
+
+ String xpathString = (String) input.get(1);
+
+ if (ignoreNamespace) {
+ xpathString = createNameSpaceIgnoreXpathString(xpathString);
}
-
- final String xpathString = (String) input.get(1);
final String value = xpath.evaluate(xpathString, document);
return value;
} catch (Exception e) {
- warn("Error processing input " + input.getType(0),
- PigWarning.UDF_WARNING_1);
-
+ warn("Error processing input " + input.getType(0),
PigWarning.UDF_WARNING_1);
+
return null;
}
}
+
+ /**
+ * Validates values of the input parameters.
+ *
+ * @param Tuple
+ * @return boolean
+ */
+ private boolean isArgsValid(final Tuple input) {
+ if (input == null || input.size() <= 1) {
+ warn("Error processing input, not enough parameters or null input"
+ input, PigWarning.UDF_WARNING_1);
+ return false;
+ }
+
+ if (input.size() > 4) {
+ warn("Error processing input, too many parameters" + input,
PigWarning.UDF_WARNING_1);
+ return false;
+ }
+
+ try {
+ // 3rd Parameter - CACHE
+ if (input.size() > 2 && !(input.get(2) instanceof Boolean)) {
+ warn("Error processing input, invalid value in 3rd parameter"
+ input, PigWarning.UDF_WARNING_1);
+ return false;
+ }
+
+ // 4rd Parameter IGNORE_NAMESPACE
+ if (input.size() > 3 && !(input.get(3) instanceof Boolean)) {
+ warn("Error processing input, invalid value in 4th parameter"
+ input, PigWarning.UDF_WARNING_1);
+ return false;
+ }
+ } catch (Exception ex) {
+ return false;
+ }
+ return true;
+ }
+
+
+ /**
+ * Returns a new the xPathString by adding additional parameters
+ * in the existing xPathString for ignoring the namespace during
compilation.
+ *
+ * @param String xpathString
+ * @return String modified xpathString
+ */
+ private String createNameSpaceIgnoreXpathString(final String xpathString) {
+ final String QUERY_PREFIX = "//*";
+ final String LOCAL_PREFIX = "[local-name()='";
+ final String LOCAL_POSTFIX = "']";
+ final String SPLITTER = "/";
+
+ try {
+ String xpathStringWithLocalName = EMPTY_STRING;
+ String[] individualNodes = xpathString.split(SPLITTER);
+
+ for (String node : individualNodes) {
+ xpathStringWithLocalName =
xpathStringWithLocalName.concat(QUERY_PREFIX + LOCAL_PREFIX + node
+ + LOCAL_POSTFIX);
+ }
+ return xpathStringWithLocalName;
+ } catch (Exception ex) {
+ return xpathString;
+ }
+ }
+
+ /**
+ * Returns argument schemas of the UDF.
+ *
+ * @return List
+ */
+ @Override
+ public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
- @Override
- public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
+ final List<FuncSpec> funcList = new ArrayList<FuncSpec>();
- final List<FuncSpec> funcList = new ArrayList<FuncSpec>();
+ /* either two chararray arguments */
+ List<FieldSchema> fields = new ArrayList<FieldSchema>();
+ fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
+ fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
- /*either two chararray arguments*/
- List<FieldSchema> fields = new ArrayList<FieldSchema>();
- fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
- fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
+ Schema twoArgInSchema = new Schema(fields);
- Schema twoArgInSchema = new Schema(fields);
+ funcList.add(new FuncSpec(this.getClass().getName(), twoArgInSchema));
- funcList.add(new FuncSpec(this.getClass().getName(),
twoArgInSchema));
+ /* or two chararray and a boolean argument */
+ fields = new ArrayList<FieldSchema>();
+ fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
+ fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
+ fields.add(new Schema.FieldSchema(null, DataType.BOOLEAN));
- /*or two chararray and a boolean argument*/
- fields = new ArrayList<FieldSchema>();
- fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
- fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
- fields.add(new Schema.FieldSchema(null, DataType.BOOLEAN));
+ Schema threeArgInSchema = new Schema(fields);
- Schema threeArgInSchema = new Schema(fields);
+ funcList.add(new FuncSpec(this.getClass().getName(),
threeArgInSchema));
- funcList.add(new FuncSpec(this.getClass().getName(),
threeArgInSchema));
+ /* or two chararray and two boolean arguments */
+ fields = new ArrayList<FieldSchema>();
+ fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
+ fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
+ fields.add(new Schema.FieldSchema(null, DataType.BOOLEAN));
+ fields.add(new Schema.FieldSchema(null, DataType.BOOLEAN));
- return funcList;
- }
+ Schema fourArgInSchema = new Schema(fields);
-}
+ funcList.add(new FuncSpec(this.getClass().getName(), fourArgInSchema));
+ return funcList;
+ }
+
+}
Modified:
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java
URL:
http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java?rev=1654955&r1=1654954&r2=1654955&view=diff
==============================================================================
---
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java
(original)
+++
pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java
Tue Jan 27 02:27:45 2015
@@ -103,6 +103,7 @@ public class CSVExcelStorage extends Pig
public static enum Headers { DEFAULT, READ_INPUT_HEADER,
SKIP_INPUT_HEADER, WRITE_OUTPUT_HEADER, SKIP_OUTPUT_HEADER }
protected final static byte LINEFEED = '\n';
+ protected final static byte CARRIAGE_RETURN = '\r';
protected final static byte DOUBLE_QUOTE = '"';
protected final static byte RECORD_DEL = LINEFEED;
@@ -293,6 +294,7 @@ public class CSVExcelStorage extends Pig
ArrayList<Object> mProtoTuple = new ArrayList<Object>();
int embeddedNewlineIndex = -1;
+ int embeddedCarriageReturn = -1;
String fieldStr = null;
// For good debug messages:
int fieldCounter = -1;
@@ -315,13 +317,15 @@ public class CSVExcelStorage extends Pig
// If any field delimiters are in the field, or if we did replace
// any double quotes with a pair of double quotes above,
// or if the string includes a newline character (LF:\n:0x0A)
+ // or includes a carriage return (CR:\r:0x0D)
// and we are to allow newlines in fields,
// then the entire field must be enclosed in double quotes:
embeddedNewlineIndex = fieldStr.indexOf(LINEFEED);
+ embeddedCarriageReturn = fieldStr.indexOf(CARRIAGE_RETURN);
if ((fieldStr.indexOf(fieldDelimiter) != -1) ||
(fieldStr.indexOf(DOUBLE_QUOTE) != -1) ||
- (multilineTreatment == Multiline.YES) && (embeddedNewlineIndex
!= -1)) {
+ (multilineTreatment == Multiline.YES) && (embeddedNewlineIndex
!= -1 || embeddedCarriageReturn != -1)) {
fieldStr = "\"" + fieldStr + "\"";
}
Modified:
pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/xml/XPathTest.java
URL:
http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/xml/XPathTest.java?rev=1654955&r1=1654954&r2=1654955&view=diff
==============================================================================
---
pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/xml/XPathTest.java
(original)
+++
pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/xml/XPathTest.java
Tue Jan 27 02:27:45 2015
@@ -21,6 +21,7 @@ import org.apache.commons.lang.math.Rand
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.piggybank.evaluation.xml.XPath;
+import org.junit.Ignore;
import org.junit.Test;
public class XPathTest {
@@ -121,8 +122,97 @@ public class XPathTest {
}
+ @Test
+ public void testExecTupleWithNamespace() throws Exception {
+
+ final XPath xpath = new XPath();
+
+ final Tuple tuple = mock(Tuple.class);
+
+ when(tuple.get(0)).thenReturn(
+ "<ann:book id=\"bk101\">" + "<author>Gambardella,
Matthew</author>"
+ + "<title>XML Developer's Guide</title>" +
"<genre>Computer</genre>" + "<price>44.95</price>"
+ + "<publish_date>2000-10-01</publish_date>"
+ + "<description>An in-depth look at creating
applications with XML.</description>"
+ + "</ann:book>");
+
+ when(tuple.size()).thenReturn(4);
+ when(tuple.get(2)).thenReturn(true);
+ when(tuple.get(3)).thenReturn(true);
+
+ when(tuple.get(1)).thenReturn("book/author");
+ assertEquals("Gambardella, Matthew", xpath.exec(tuple));
+ assertNotEquals("Someone else", xpath.exec(tuple));
+
+ when(tuple.get(1)).thenReturn("book/price");
+ assertEquals("44.95", xpath.exec(tuple));
+ assertNotEquals("00.00", xpath.exec(tuple));
+
+ }
+
+ @Test
+ public void testExecTupleWithElementNodeWithComplexNameSpace() throws
Exception {
+
+ final XPath xpath = new XPath();
+
+ final Tuple tuple = mock(Tuple.class);
+
+ when(tuple.get(0)).thenReturn(
+
+ "<cbs:bookstore>"
+ +"<cbs:book>"
+ + "<bsbi:authors>"
+ + "<bsbi:author_1>Gambardella</bsbi:author_1>"
+ + "<bsbi:author_2>Matthew</bsbi:author_2>"
+ + "<bsbi:author_3>Mike</bsbi:author_3>"
+ + "</bsbi:authors>"
+ + "<bsbi:title>23</bsbi:title>"
+ + "<bsbi:genre>semiAutomatic</bsbi:genre>"
+ + "<bsbi:price>enabled</bsbi:price>"
+ +
"<bsbi:publish_date>leftToRight</bsbi:publish_date>"
+ + "<bsbi:description>282</bsbi:description>"
+ + "<bsbi:reviews>"
+ + "<review_1>4 stars</review_1>"
+ + "<review_2>3.5 stars</review_2>"
+ + "<review_3>4 stars</review_3>"
+ + "<review_4>4.2 stars</review_4>"
+ + "<review_5>3.5 stars</review_5>"
+ + "</bsbi:reviews>"
+ + "</cbs:book>"
+ + "<cbs:book>"
+ + "<bsbi:authors>"
+ + "<bsbi:author_1>O'Brien</bsbi:author_1>"
+ + "<bsbi:author_2>Tim</bsbi:author_2>"
+ + "</bsbi:authors>"
+ + "<bsbi:title>23</bsbi:title>"
+ + "<bsbi:genre>semiAutomatic</bsbi:genre>"
+ + "<bsbi:price>enabled</bsbi:price>"
+ + "<bsbi:publish_date>leftToRight</bsbi:publish_date>"
+ + "<bsbi:description>282</bsbi:description>"
+ + "<bsbi:reviews>"
+ + "<bsbi:review_1>3.5 stars</bsbi:review_1>"
+ + "<bsbi:review_2>4 stars</bsbi:review_2>"
+ + "<bsbi:review_3>3.5 stars</bsbi:review_3>"
+ + "<bsbi:review_4>4.2 stars</bsbi:review_4>"
+ + "<bsbi:review_5>4 stars</bsbi:review_5>"
+
+ + "</bsbi:reviews>"
+ + "</cbs:book></cbs:bookstore>");
+
+ when(tuple.size()).thenReturn(4);
+ when(tuple.get(2)).thenReturn(true);
+ when(tuple.get(3)).thenReturn(true);
+
+ when(tuple.get(1)).thenReturn("bookstore/book/authors");
+ assertEquals("GambardellaMatthewMike", xpath.exec(tuple));
+
+ when(tuple.get(1)).thenReturn("bookstore/book/reviews");
+ assertEquals("4 stars3.5 stars4 stars4.2 stars3.5 stars",
xpath.exec(tuple));
+
+ }
- //@Test --optional test
+ @Ignore //--optional test
+ @Test
public void testCacheBenefit() throws Exception{
final XPath xpath = new XPath();
Modified:
pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java
URL:
http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java?rev=1654955&r1=1654954&r2=1654955&view=diff
==============================================================================
---
pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java
(original)
+++
pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java
Tue Jan 27 02:27:45 2015
@@ -19,6 +19,7 @@
package org.apache.pig.piggybank.test.storage;
import java.io.File;
+import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
@@ -27,10 +28,11 @@ import java.util.Properties;
import junit.framework.Assert;
import org.apache.commons.lang.StringUtils;
-
import org.apache.pig.ExecType;
import org.apache.pig.PigServer;
import
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRConfiguration;
+import org.apache.pig.builtin.mock.Storage;
+import org.apache.pig.builtin.mock.Storage.Data;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.apache.pig.tools.parameters.ParseException;
@@ -58,6 +60,7 @@ public class TestCSVExcelStorage {
"1st Field,\"A poem that continues\n" +
"for several lines\n" +
"do we\n" +
+ "(even with \r)" +
"handle that?\",Good,Fairy\n";
String[] testStrCommaArray =
@@ -69,7 +72,7 @@ public class TestCSVExcelStorage {
"\"Conrad\nEmil\",Dinger,40",
"Emil,\"\nDinger\",40",
"Quote problem,\"My \"\"famous\"\"\nsong\",60",
- "1st Field,\"A poem that continues\nfor several lines\ndo
we\nhandle that?\",Good,Fairy",
+ "1st Field,\"A poem that continues\nfor several lines\ndo
we\n(even with \r)handle that?\",Good,Fairy",
};
@SuppressWarnings("serial")
@@ -83,7 +86,7 @@ public class TestCSVExcelStorage {
add(Util.createTuple(new String[] {"Conrad\nEmil", "Dinger",
"40"}));
add(Util.createTuple(new String[] {"Emil", "\nDinger", "40"}));
add(Util.createTuple(new String[] {"Quote problem", "My
\"famous\"\nsong", "60"}));
- add(Util.createTuple(new String[] {"1st Field", "A poem that
continues\nfor several lines\ndo we\nhandle that?", "Good", "Fairy"}));
+ add(Util.createTuple(new String[] {"1st Field", "A poem that
continues\nfor several lines\ndo we\n(even with \n)handle that?", "Good",
"Fairy"}));
}
};
@@ -104,7 +107,8 @@ public class TestCSVExcelStorage {
add(Util.createTuple(new String[] {"1st Field", "A poem that
continues"}));
add(Util.createTuple(new String[] {"for several lines"}));
add(Util.createTuple(new String[] {"do we"}));
- add(Util.createTuple(new String[] {"handle that?,Good,Fairy"}));
// Trailing double quote eats rest of line
+ add(Util.createTuple(new String[] {"(even with "}));
+ add(Util.createTuple(new String[] {")handle that?,Good,Fairy"}));
// Trailing double quote eats rest of line
}
};
@@ -161,6 +165,7 @@ public class TestCSVExcelStorage {
"1,,,,\"",
"qwe",
"rty\", uiop",
+ "1,10,2.718,3.14159,\"abc\rdef\",uiop",
"1,,,,\"qwe,rty\",uiop",
"1,,,,\"q\"\"wert\"\"y\", uiop",
"1,,,,qwerty,\"u\"\"io\"\"p\""
@@ -338,6 +343,7 @@ public class TestCSVExcelStorage {
"(1,10,,3.15159,,uiop)", // extra field (input data
has "moose" after "uiop")
"(1,,2.718,,qwerty,uiop)", // quoted regular fields
(2.718, qwerty, and uiop in quotes)
"(1,,,,\nqwe\nrty, uiop)", // newlines in quotes
+ "(1,10,2.718,3.14159,abc\ndef,uiop)",// After LOAD \r => \n
(PIG-4213)
"(1,,,,qwe,rty,uiop)", // commas in quotes
"(1,,,,q\"wert\"y, uiop)", // quotes in quotes
"(1,,,,qwerty,u\"io\"p)" // quotes in quotes at the
end of a line
@@ -384,6 +390,8 @@ public class TestCSVExcelStorage {
"(1,,,,\")", // since we are just using
TextLoader for verification
"(qwe)", // it treats the
linebreaks as meaning separate records
"(rty\", uiop)", // but as shown in the
load() test, CSVExcelStorage will read these properly
+ "(1,10,2.718,3.14159,\"abc)",
+ "(def\",uiop)",
"(1,,,,\"qwe,rty\",uiop)",
"(1,,,,\"q\"\"wert\"\"y\", uiop)",
"(1,,,,qwerty,\"u\"\"io\"\"p\")"
@@ -426,4 +434,48 @@ public class TestCSVExcelStorage {
Assert.assertEquals(StringUtils.join(expected, "\n"),
StringUtils.join(data, "\n"));
}
+
+ // Test that STORE stores CR (\r) quoted/unquoted in
yes_multiline/no_multiline
+ @Test
+ public void storeCR() throws IOException {
+ ArrayList<Tuple> inputTuples = new ArrayList<Tuple>();
+ inputTuples.add(Storage.tuple(1,"text","a line\rand another line to
write"));
+ String expected = "1,text,\"a line\rand another line to write\"\n";
+ String expectedNoMultiline = "1,text,a line\rand another line to
write\n";
+
+ // Prepare the input using mock.Storage() since this will not
interpret \r
+ Data data = Storage.resetData(pig);
+ data.set("inputTuples", inputTuples);
+
+ // Test for quoted when YES_MULTILINE
+ // Execute
+ String testOut = dataDir + "csv_cr_quoted_output_yes_multiline" ;
+ String script = "A = load 'inputTuples' USING mock.Storage() as
(f1:int, f2:chararray, f3:chararray);" +
+ "STORE A INTO '" + testOut + "' USING " +
+ "org.apache.pig.piggybank.storage.CSVExcelStorage(',',
'YES_MULTILINE', 'UNIX');";
+ Util.registerMultiLineQuery(pig, script);
+ // Load result
+ FileInputStream resultFile = new FileInputStream(testOut +
"/part-m-00000");
+ byte[] actualBytes = new byte[resultFile.available()];
+ resultFile.read(actualBytes);
+ resultFile.close();
+ String actual = new String(actualBytes);
+ Assert.assertEquals(expected, actual);
+
+ // Test for unquoted when NO_MULTILINE
+ // Execute
+ testOut = dataDir + "csv_cr_quoted_output_no_multiline" ;
+ script = "A = load 'inputTuples' USING mock.Storage() as (f1:int,
f2:chararray, f3:chararray);" +
+ "STORE A INTO '" + testOut + "' USING " +
+ "org.apache.pig.piggybank.storage.CSVExcelStorage(',',
'NO_MULTILINE', 'UNIX');";
+ Util.registerMultiLineQuery(pig, script);
+ // Load result
+ resultFile = new FileInputStream(testOut + "/part-m-00000");
+ actualBytes = new byte[resultFile.available()];
+ resultFile.read(actualBytes);
+ resultFile.close();
+ actual = new String(actualBytes);
+ Assert.assertEquals(expectedNoMultiline, actual);
+ }
+
}
Modified: pig/branches/spark/ivy.xml
URL:
http://svn.apache.org/viewvc/pig/branches/spark/ivy.xml?rev=1654955&r1=1654954&r2=1654955&view=diff
==============================================================================
--- pig/branches/spark/ivy.xml (original)
+++ pig/branches/spark/ivy.xml Tue Jan 27 02:27:45 2015
@@ -213,7 +213,7 @@
</dependency>
<dependency org="net.sf.jopt-simple" name="jopt-simple"
rev="${jopt.version}"
conf="test->default" />
- <dependency org="org.xerial.snappy" name="snappy-java"
rev="${snappy.version}"
+ <dependency org="org.xerial.snappy" name="snappy-java"
rev="${snappy-java.version}"
conf="compile->default;checkstyle->master"/>
<dependency org="com.googlecode.json-simple" name="json-simple"
rev="${json-simple.version}"
conf="compile->master;checkstyle->master"/>
Modified: pig/branches/spark/ivy/libraries.properties
URL:
http://svn.apache.org/viewvc/pig/branches/spark/ivy/libraries.properties?rev=1654955&r1=1654954&r2=1654955&view=diff
==============================================================================
--- pig/branches/spark/ivy/libraries.properties (original)
+++ pig/branches/spark/ivy/libraries.properties Tue Jan 27 02:27:45 2015
@@ -94,7 +94,7 @@ jsr311-api.version=1.1.1
mockito.version=1.8.4
jansi.version=1.9
asm.version=3.3.1
-snappy.version=1.1.0.1
-tez.version=0.5.2
+snappy-java.version=1.1.0.1
+tez.version=0.5.3
parquet-pig-bundle.version=1.2.3
snappy.version=0.2
Modified:
pig/branches/spark/shims/test/hadoop23/org/apache/pig/test/MiniCluster.java
URL:
http://svn.apache.org/viewvc/pig/branches/spark/shims/test/hadoop23/org/apache/pig/test/MiniCluster.java?rev=1654955&r1=1654954&r2=1654955&view=diff
==============================================================================
--- pig/branches/spark/shims/test/hadoop23/org/apache/pig/test/MiniCluster.java
(original)
+++ pig/branches/spark/shims/test/hadoop23/org/apache/pig/test/MiniCluster.java
Tue Jan 27 02:27:45 2015
@@ -27,7 +27,9 @@ import org.apache.hadoop.hdfs.MiniDFSClu
import org.apache.hadoop.mapreduce.filecache.DistributedCache;
import org.apache.hadoop.mapreduce.v2.MiniMRYarnCluster;
import org.apache.pig.ExecType;
+import org.apache.pig.backend.hadoop.executionengine.Launcher;
import
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRConfiguration;
+import
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher;
/**
* This class builds a single instance of itself with the Singleton
@@ -128,4 +130,8 @@ public class MiniCluster extends MiniGen
if (m_mr != null) { m_mr.stop(); }
m_mr = null;
}
+
+ static public Launcher getLauncher() {
+ return new MapReduceLauncher();
+ }
}
Modified:
pig/branches/spark/shims/test/hadoop23/org/apache/pig/test/TezMiniCluster.java
URL:
http://svn.apache.org/viewvc/pig/branches/spark/shims/test/hadoop23/org/apache/pig/test/TezMiniCluster.java?rev=1654955&r1=1654954&r2=1654955&view=diff
==============================================================================
---
pig/branches/spark/shims/test/hadoop23/org/apache/pig/test/TezMiniCluster.java
(original)
+++
pig/branches/spark/shims/test/hadoop23/org/apache/pig/test/TezMiniCluster.java
Tue Jan 27 02:27:45 2015
@@ -33,7 +33,9 @@ import org.apache.hadoop.mapreduce.v2.Mi
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.pig.ExecType;
import org.apache.pig.PigConfiguration;
+import org.apache.pig.backend.hadoop.executionengine.Launcher;
import org.apache.pig.backend.hadoop.executionengine.tez.TezExecType;
+import org.apache.pig.backend.hadoop.executionengine.tez.TezLauncher;
import org.apache.pig.backend.hadoop.executionengine.tez.TezSessionManager;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.mapreduce.hadoop.MRJobConfig;
@@ -185,4 +187,8 @@ public class TezMiniCluster extends Mini
YARN_CONF_FILE.delete();
}
}
+
+ static public Launcher getLauncher() {
+ return new TezLauncher();
+ }
}
Modified: pig/branches/spark/src/docs/src/documentation/content/xdocs/test.xml
URL:
http://svn.apache.org/viewvc/pig/branches/spark/src/docs/src/documentation/content/xdocs/test.xml?rev=1654955&r1=1654954&r2=1654955&view=diff
==============================================================================
--- pig/branches/spark/src/docs/src/documentation/content/xdocs/test.xml
(original)
+++ pig/branches/spark/src/docs/src/documentation/content/xdocs/test.xml Tue
Jan 27 02:27:45 2015
@@ -963,6 +963,50 @@ junit.framework.ComparisonFailure: null
at org.apache.pig.pigunit.PigTest.assertEquals(PigTest.java:272)
</source>
</section>
+
+<!-- +++++++++++++++++++++++++++++++++++++++ -->
+ <section>
+ <title>Mocking</title>
+
+ <p>Sometimes you need to mock out the data in specific aliases. Using
PigTest's
+ mocking you can override an alias everywhere it is assigned. If you do
not know
+ the schema (or want to keep your test dynamic) you can use
PigTest.getAliasToSchemaMap()
+ to determine the schema. If you chose to go this route, you should
cache the map for
+ the specific script to ensure efficient execution.
+ </p>
+
+ <source>
+ @Test
+ public void testTop2Queries() {
+ String[] args = {
+ "n=2",
+ };
+
+ PigTest test = new PigTest("top_queries.pig", args);
+
+ String[] mockData = {
+ "yahoo",
+ "yahoo",
+ "yahoo",
+ "twitter",
+ "facebook",
+ "facebook",
+ "linkedin",
+ };
+
+ //You should cache the map if you can
+ String schema = test.getAliasToSchemaMap().get("data");
+ test.mockAlias("data", mockData, schema);
+
+ String[] output = {
+ "(yahoo,3)",
+ "(facebook,2)",
+ };
+
+ test.assertOutputAnyOrder("queries_limit", output);
+ }
+</source>
+ </section>
</section>
<!-- +++++++++++++++++++++++++++++++++++++++ -->
Modified: pig/branches/spark/src/org/apache/pig/Main.java
URL:
http://svn.apache.org/viewvc/pig/branches/spark/src/org/apache/pig/Main.java?rev=1654955&r1=1654954&r2=1654955&view=diff
==============================================================================
--- pig/branches/spark/src/org/apache/pig/Main.java (original)
+++ pig/branches/spark/src/org/apache/pig/Main.java Tue Jan 27 02:27:45 2015
@@ -546,7 +546,7 @@ public class Main {
// Interactive
mode = ExecMode.SHELL;
//Reader is created by first loading
"pig.load.default.statements" or .pigbootup file if available
- ConsoleReader reader = new
ConsoleReaderWithParamSub(Utils.getCompositeStream(System.in, properties), new
OutputStreamWriter(System.out), pigContext);
+ ConsoleReader reader = new
ConsoleReader(Utils.getCompositeStream(System.in, properties), new
OutputStreamWriter(System.out));
reader.setDefaultPrompt("grunt> ");
final String HISTORYFILE = ".pig_history";
String historyFile = System.getProperty("user.home") +
File.separator + HISTORYFILE;
@@ -895,6 +895,7 @@ public class Main {
System.out.println(" GroupByConstParallelSetter - Force
parallel 1 for \"group all\" statement");
System.out.println(" PartitionFilterOptimizer -
Pushdown partition filter conditions to loader implementing LoadMetaData");
System.out.println(" PredicatePushdownOptimizer -
Pushdown filter predicates to loader implementing LoadPredicatePushDown");
+ System.out.println(" RollupHIIOptimizer - Apply Rollup
HII optimization");
System.out.println(" All - Disable all optimizations");
System.out.println(" All optimizations listed here are
enabled by default. Optimization values are case insensitive.");
System.out.println(" -v, -verbose - Print all error messages to
screen");
@@ -1072,23 +1073,4 @@ public class Main {
: ReturnCode.SUCCESS;
}
- static class ConsoleReaderWithParamSub extends ConsoleReader {
- PigContext pc;
- ConsoleReaderWithParamSub(InputStream in, Writer out, PigContext
pigContext) throws IOException {
- super(in, out);
- pc = pigContext;
- }
-
- @Override
- public String readLine() throws IOException {
- String line = super.readLine();
- if (null == line) {
- return line;
- }
- String paramSubLine = pc.doParamSubstitution(new
BufferedReader(new StringReader(line)));
- return paramSubLine;
- }
-
- }
-
}
Modified: pig/branches/spark/src/org/apache/pig/PigConstants.java
URL:
http://svn.apache.org/viewvc/pig/branches/spark/src/org/apache/pig/PigConstants.java?rev=1654955&r1=1654954&r2=1654955&view=diff
==============================================================================
--- pig/branches/spark/src/org/apache/pig/PigConstants.java (original)
+++ pig/branches/spark/src/org/apache/pig/PigConstants.java Tue Jan 27 02:27:45
2015
@@ -59,4 +59,45 @@ public class PigConstants {
public static final String TIME_UDFS_ELAPSED_TIME_COUNTER =
"approx_microsecs";
public static final String TASK_INDEX = "mapreduce.task.index";
+
+ /**
+ * This parameter is used to check if the rollup is optimizable or not
after going
+ * through the RollupHIIOptimizer
+ */
+ public static final String PIG_HII_ROLLUP_OPTIMIZABLE =
"pig.hii.rollup.optimizable";
+
+ /**
+ * This parameter stores the value of the pivot position. If the rollup is
not optimizable
+ * this value will be -1; If the rollup is optimizable: if the user did
specify the pivot
+ * in the rollup clause, this parameter will get that value; if the user
did not specify
+ * the pivot in the rollup clause, this parameter will get the value of
the median position
+ * of the fields in the rollup clause
+ */
+ public static final String PIG_HII_ROLLUP_PIVOT = "pig.hii.rollup.pivot";
+
+ /**
+ * This parameter stores the index of the first field involves in the
rollup (or the first field
+ * involves in the rollup after changing the position of rollup to the end
in case of having cube)
+ */
+ public static final String PIG_HII_ROLLUP_FIELD_INDEX =
"pig.hii.rollup.field.index";
+
+ /**
+ * This parameter stores the index of the first field involves in the
rollup before
+ * changing the position of rollup to the end in case of having cube
+ */
+ public static final String PIG_HII_ROLLUP_OLD_FIELD_INDEX =
"pig.hii.rollup.old.field.index";
+
+ /**
+ * This parameter stores the size of total fields which involve in the
CUBE clause. For example, we
+ * have two CUBE clause:
+ * B = CUBE A BY CUBE(year, month, day), ROLLUP(hour, minute, second);
+ * B = CUBE A BY ROLLUP(year, month, day, hour, minute, second);
+ * So this parameter will be 6 at both cases.
+ */
+ public static final String PIG_HII_NUMBER_TOTAL_FIELD =
"pig.hii.number.total.field";
+
+ /**
+ * This parameter stores the number of algebraic functions that used after
rollup.
+ */
+ public static final String PIG_HII_NUMBER_ALGEBRAIC =
"pig.hii.number.algebraic";
}
\ No newline at end of file
Modified:
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java
URL:
http://svn.apache.org/viewvc/pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java?rev=1654955&r1=1654954&r2=1654955&view=diff
==============================================================================
---
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java
(original)
+++
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java
Tue Jan 27 02:27:45 2015
@@ -66,12 +66,14 @@ import org.apache.pig.FuncSpec;
import org.apache.pig.LoadFunc;
import org.apache.pig.OverwritableStoreFunc;
import org.apache.pig.PigConfiguration;
+import org.apache.pig.PigConstants;
import org.apache.pig.PigException;
import org.apache.pig.StoreFuncInterface;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.HDataType;
import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil;
import org.apache.pig.backend.hadoop.executionengine.JobCreationException;
+import
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.partitioners.RollupHIIPartitioner;
import
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.partitioners.SecondaryKeyPartitioner;
import
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.partitioners.SkewedPartitioner;
import
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.partitioners.WeightedRangePartitioner;
@@ -183,6 +185,8 @@ public class JobControlCompiler{
public static final String PIG_MAP_STORES = "pig.map.stores";
public static final String PIG_REDUCE_STORES = "pig.reduce.stores";
+ private static final String ROLLUP_PARTITIONER =
RollupHIIPartitioner.class.getName();
+
// A mapping of job to pair of store locations and tmp locations for that
job
private Map<Job, Pair<List<POStore>, Path>> jobStoreMap;
@@ -524,6 +528,9 @@ public class JobControlCompiler{
configureCompression(conf);
try{
+ //Set default value for PIG_HII_ROLLUP_OPTIMIZABLE to false
+ conf.setBoolean(PigConstants.PIG_HII_ROLLUP_OPTIMIZABLE, false);
+
//Process the POLoads
List<POLoad> lds = PlanHelper.getPhysicalOperators(mro.mapPlan,
POLoad.class);
@@ -836,13 +843,52 @@ public class JobControlCompiler{
log.info("Setting identity combiner class.");
}
pack = (POPackage)mro.reducePlan.getRoots().get(0);
- if(!pigContext.inIllustrator)
+
+ if(pack!=null) {
+ if(pack.getPivot()!=-1) {
+ //Set value for PIG_HII_ROLLUP_OPTIMIZABLE to true
+
conf.setBoolean(PigConstants.PIG_HII_ROLLUP_OPTIMIZABLE, true);
+ //Set the pivot value
+ conf.setInt(PigConstants.PIG_HII_ROLLUP_PIVOT,
pack.getPivot());
+ //Set the index of the first field involves in ROLLUP
+ conf.setInt(PigConstants.PIG_HII_ROLLUP_FIELD_INDEX,
pack.getRollupFieldIndex());
+ //Set the original index of the first field involves
in ROLLUP in case it was moved to the end
+ //(if we have the combination of cube and rollup)
+
conf.setInt(PigConstants.PIG_HII_ROLLUP_OLD_FIELD_INDEX,
pack.getRollupOldFieldIndex());
+ //Set the size of total fields that involve in CUBE
clause
+ conf.setInt(PigConstants.PIG_HII_NUMBER_TOTAL_FIELD,
pack.getDimensionSize());
+ //Set number of algebraic functions that used after
rollup
+ conf.setInt(PigConstants.PIG_HII_NUMBER_ALGEBRAIC,
pack.getNumberAlgebraic());
+ //Set number of reducer to 1 due to using IRG algorithm
+ if(pack.getPivot() == 0 && !mro.reducePlan.isEmpty()) {
+ updateNumReducers(plan, mro, nwJob);
+ }
+ }
+ }
+
+ if (!pigContext.inIllustrator) {
mro.reducePlan.remove(pack);
- nwJob.setMapperClass(PigMapReduce.Map.class);
+ }
+
+ if (pack!=null && pack.getPivot()!=-1) {
+ nwJob.setMapperClass(PigMapReduce.MapRollupHII.class);
+ } else {
+ nwJob.setMapperClass(PigMapReduce.Map.class);
+ }
+
nwJob.setReducerClass(PigMapReduce.Reduce.class);
- if (mro.customPartitioner != null)
-
nwJob.setPartitionerClass(PigContext.resolveClassName(mro.customPartitioner));
+ // Set Rollup Partitioner in case the pivot is not equal to -1
+ // and the custormPartitioner name is our rollup partitioner.
+ if (mro.customPartitioner != null) {
+ if (mro.customPartitioner.equals(ROLLUP_PARTITIONER)) {
+ if (pack.getPivot()!=-1) {
+
nwJob.setPartitionerClass(PigContext.resolveClassName(mro.customPartitioner));
+ }
+ } else {
+
nwJob.setPartitionerClass(PigContext.resolveClassName(mro.customPartitioner));
+ }
+ }
if(!pigContext.inIllustrator)
conf.set("pig.mapPlan",
ObjectSerializer.serialize(mro.mapPlan));
@@ -1058,6 +1104,26 @@ public class JobControlCompiler{
}
/**
+ * If pivot position is zero, we use only one reducer
+ * @param plan the MR plan
+ * @param mro the MR operator
+ * @param nwJob the current job
+ * @throws IOException
+ */
+ public void updateNumReducers(MROperPlan plan, MapReduceOper mro,
+ org.apache.hadoop.mapreduce.Job nwJob) throws IOException {
+ // Change number of reducer to 1 if only IRG is used
+ if (mro.customPartitioner != null &&
mro.customPartitioner.equals(ROLLUP_PARTITIONER)) {
+ log.info("Changing Parallelism to 1 due to using IRG");
+ }
+ conf.setInt("pig.info.reducers.default.parallel", 1);
+ conf.setInt("pig.info.reducers.requested.parallel", 1);
+ conf.setInt("pig.info.reducers.estimated.parallel", 1);
+ conf.setInt(MRConfiguration.REDUCE_TASKS, 1);
+ nwJob.setNumReduceTasks(1);
+ }
+
+ /**
* Calculate the runtime #reducers based on the default_parallel,
requested parallel and estimated
* parallel, and save it to MapReduceOper's runtimeParallelism.
* @return the runtimeParallelism
Modified:
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
URL:
http://svn.apache.org/viewvc/pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java?rev=1654955&r1=1654954&r2=1654955&view=diff
==============================================================================
---
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
(original)
+++
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
Tue Jan 27 02:27:45 2015
@@ -76,6 +76,7 @@ import org.apache.pig.backend.hadoop.exe
import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POPackage;
import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POPartitionRearrange;
import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.PORank;
+import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.PORollupHIIForEach;
import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POSkewedJoin;
import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POSort;
import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POSplit;
@@ -1098,6 +1099,11 @@ public class MRCompiler extends PhyPlanV
}
@Override
+ public void visitPORollupHIIForEach(PORollupHIIForEach op) throws
VisitorException {
+ visitPOForEach(op);
+ }
+
+ @Override
public void visitGlobalRearrange(POGlobalRearrange op) throws
VisitorException{
try{
blocking(op);
Modified:
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigCombiner.java
URL:
http://svn.apache.org/viewvc/pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigCombiner.java?rev=1654955&r1=1654954&r2=1654955&view=diff
==============================================================================
---
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigCombiner.java
(original)
+++
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigCombiner.java
Tue Jan 27 02:27:45 2015
@@ -268,6 +268,8 @@ public class PigCombiner {
leaf = null;
pack = null;
pigReporter = null;
+ // Avoid OOM in Tez.
+ PhysicalOperator.setReporter(null);
pigContext = null;
roots = null;
cp = null;
Modified:
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigGenericMapReduce.java
URL:
http://svn.apache.org/viewvc/pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigGenericMapReduce.java?rev=1654955&r1=1654954&r2=1654955&view=diff
==============================================================================
---
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigGenericMapReduce.java
(original)
+++
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigGenericMapReduce.java
Tue Jan 27 02:27:45 2015
@@ -30,6 +30,7 @@ import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.jobcontrol.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.pig.JVMReuseManager;
import org.apache.pig.PigConstants;
import org.apache.pig.PigException;
@@ -50,6 +51,7 @@ import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataType;
import org.apache.pig.data.SchemaTupleBackend;
import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.io.NullablePartitionWritable;
import org.apache.pig.impl.io.NullableTuple;
@@ -137,6 +139,92 @@ public class PigGenericMapReduce {
}
/**
+ * This map is only used for the Rollup when the RollupHIIOptimizer is
enabled
+ *
+ */
+ public static class MapRollupHII extends PigMapBase {
+ @Override
+ public void collect(Context oc, Tuple tuple)
+ throws InterruptedException, IOException {
+
+ Byte index = (Byte)tuple.get(0);
+ PigNullableWritable key =
+ HDataType.getWritableComparableTypes(tuple.get(1), keyType);
+ NullableTuple val = new NullableTuple((Tuple)tuple.get(2));
+
+ // Both the key and the value need the index. The key needs it so
+ // that it can be sorted on the index in addition to the key
+ // value. The value needs it so that POPackage can properly
+ // assign the tuple to its slot in the projection.
+ key.setIndex(index);
+ val.setIndex(index);
+
+ oc.write(key, val);
+ }
+
+ @Override
+ public void cleanup(Context oc)
+ throws InterruptedException, IOException {
+
+ Configuration jConf = oc.getConfiguration();
+
+ boolean isHII =
jConf.getBoolean(PigConstants.PIG_HII_ROLLUP_OPTIMIZABLE, false);
+ //If our rule is enabled and is using, there will be a
PORollupHIIForEach
+ //We will create marker tuples which are considered as markers for
reducers
+ //to calculate the remaining results when that reducer goes to the
end of the
+ //input records. This marker tuple will have larger size than the
defaut by one
+ //dimension. This addition dimension will be the value which are
ranged from 0 to
+ //number of reducers. By this addition, we can make sure that
every reducers can
+ //receive these marker tuples to finish their works.
+ if(isHII) {
+ int reducerNo = jConf.getInt("mapred.reduce.tasks", 0);
+ int length =
jConf.getInt(PigConstants.PIG_HII_NUMBER_TOTAL_FIELD, 0);
+ int nAlgebraic =
jConf.getInt(PigConstants.PIG_HII_NUMBER_ALGEBRAIC, 1);
+
+ if(length == 0)
+ return;
+
+ TupleFactory mTupleFactory = TupleFactory.getInstance();
+ //An array of marker tuples which has size equals to number of
reducers
+ Tuple group[] = new Tuple[reducerNo];
+ int count = 0;
+ //Make sure that all reducers will receive those marker tuples
+ while(count < reducerNo) {
+ //Create marker tuple with last field is the reducer's
index,
+ //the rest are null.
+ group[count] = mTupleFactory.newTuple();
+ for (int k = 0; k <= length; k++) {
+ if(k < length) {
+ group[count].append(null);
+ } else {
+ group[count].append(count);
+ }
+ }
+
+ Tuple value = mTupleFactory.newTuple();
+ Tuple []tmp = new Tuple[nAlgebraic];
+ long valtmp = 1;
+ for(int i = 0; i < nAlgebraic; i++){
+ tmp[i] = mTupleFactory.newTuple();
+ tmp[i].append(valtmp);
+ value.append(tmp[i]);
+ }
+ Tuple out = mTupleFactory.newTuple();
+ out.append(0);
+ out.append(group[count]);
+ out.append(value);
+
+ PigNullableWritable key =
HDataType.getWritableComparableTypes(out.get(1), keyType);
+ NullableTuple val = new NullableTuple((Tuple)out.get(2));
+ oc.write(key, val);
+ count++;
+ }
+ }
+ super.cleanup(oc);
+ }
+ }
+
+ /**
* This "specialized" map class is ONLY to be used in pig queries with
* order by a udf. A UDF used for comparison in the order by expects
* to be handed tuples. Hence this map class ensures that the "key" used
Modified:
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POUserFunc.java
URL:
http://svn.apache.org/viewvc/pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POUserFunc.java?rev=1654955&r1=1654954&r2=1654955&view=diff
==============================================================================
---
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POUserFunc.java
(original)
+++
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POUserFunc.java
Tue Jan 27 02:27:45 2015
@@ -45,6 +45,7 @@ import org.apache.pig.backend.hadoop.exe
import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhyPlanVisitor;
import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.util.MonitoredUDFExecutor;
import org.apache.pig.builtin.MonitoredUDF;
+import org.apache.pig.builtin.RollupDimensions;
import org.apache.pig.data.DataType;
import org.apache.pig.data.SchemaTupleClassGenerator.GenContext;
import org.apache.pig.data.SchemaTupleFactory;
@@ -86,6 +87,28 @@ public class POUserFunc extends Expressi
private long timingFrequency = 100L;
private boolean doTiming = false;
+ private static final String ROLLUP_UDF = RollupDimensions.class.getName();
+ //the pivot value
+ private int pivot = -1;
+
+ private boolean rollupHIIoptimizable = false;
+
+ public void setPivot(int pvt) {
+ this.pivot = pvt;
+ }
+
+ public int getPivot() {
+ return this.pivot;
+ }
+
+ public void setRollupHIIOptimizable(boolean check) {
+ this.rollupHIIoptimizable = check;
+ }
+
+ public boolean getRollupHIIOptimizable() {
+ return this.rollupHIIoptimizable;
+ }
+
public PhysicalOperator getReferencedOperator() {
return referencedOperator;
}
@@ -131,6 +154,17 @@ public class POUserFunc extends Expressi
if (func.getClass().isAnnotationPresent(MonitoredUDF.class)) {
executor = new MonitoredUDFExecutor(func);
}
+
+ if (funcSpec.getClassName().equals(ROLLUP_UDF) &&
this.rollupHIIoptimizable != false) {
+ try {
+ ((RollupDimensions) func).setPivot(this.pivot);
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ ((RollupDimensions)
func).setRollupHIIOptimizable(this.rollupHIIoptimizable);
+ }
+
//the next couple of initializations do not work as intended for the
following reasons
//the reporter and pigLogger are member variables of PhysicalOperator
//when instanitateFunc is invoked at deserialization time, both
Modified:
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhyPlanVisitor.java
URL:
http://svn.apache.org/viewvc/pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhyPlanVisitor.java?rev=1654955&r1=1654954&r2=1654955&view=diff
==============================================================================
---
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhyPlanVisitor.java
(original)
+++
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhyPlanVisitor.java
Tue Jan 27 02:27:45 2015
@@ -67,6 +67,7 @@ import org.apache.pig.backend.hadoop.exe
import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POPreCombinerLocalRearrange;
import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.PORank;
import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POReservoirSample;
+import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.PORollupHIIForEach;
import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POSkewedJoin;
import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POSort;
import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POSplit;
@@ -152,6 +153,10 @@ public class PhyPlanVisitor extends Plan
}
}
+ public void visitPORollupHIIForEach(PORollupHIIForEach nhfe) throws
VisitorException {
+ visitPOForEach(nhfe);
+ }
+
public void visitUnion(POUnion un) throws VisitorException{
//do nothing
}
Modified:
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java
URL:
http://svn.apache.org/viewvc/pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java?rev=1654955&r1=1654954&r2=1654955&view=diff
==============================================================================
---
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java
(original)
+++
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java
Tue Jan 27 02:27:45 2015
@@ -23,6 +23,7 @@ import java.io.OutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -229,7 +230,10 @@ public class PhysicalPlan extends Operat
// clone.
Map<PhysicalOperator, PhysicalOperator> matches =
new HashMap<PhysicalOperator, PhysicalOperator>(mOps.size());
- for (PhysicalOperator op : mOps.keySet()) {
+ // Sorting just so that explain output (scope ids) is same in jdk7 and
jdk8
+ List<PhysicalOperator> opsToClone = new
ArrayList<PhysicalOperator>(mOps.keySet());
+ Collections.sort(opsToClone);
+ for (PhysicalOperator op : opsToClone) {
PhysicalOperator c = op.clone();
clone.add(c);
if (opmap != null)
Modified:
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POPackage.java
URL:
http://svn.apache.org/viewvc/pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POPackage.java?rev=1654955&r1=1654954&r2=1654955&view=diff
==============================================================================
---
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POPackage.java
(original)
+++
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POPackage.java
Tue Jan 27 02:27:45 2015
@@ -87,6 +87,58 @@ public class POPackage extends PhysicalO
private transient boolean useDefaultBag;
private transient int accumulativeBatchSize;
+ //the pivot value
+ private int pivot = -1;
+ //the index of the first field involves in ROLLUP
+ protected int rollupFieldIndex = 0;
+ //the original index of the first field involves in ROLLUP in case it was
moved to the end
+ //(if we have the combination of cube and rollup)
+ private int rollupOldFieldIndex = 0;
+ //the size of total fields that involve in CUBE clause
+ private int dimensionSize = 0;
+ //number of algebraic function that used after rollup
+ private int nAlgebraic = 0;
+
+ public void setPivot(int pvt) {
+ this.pivot = pvt;
+ }
+
+ public int getPivot() {
+ return this.pivot;
+ }
+
+ public void setDimensionSize(int ds) {
+ this.dimensionSize = ds;
+ }
+
+ public int getDimensionSize() {
+ return this.dimensionSize;
+ }
+
+ public void setNumberAlgebraic(int na) {
+ this.nAlgebraic = na;
+ }
+
+ public int getNumberAlgebraic() {
+ return this.nAlgebraic;
+ }
+
+ public void setRollupOldFieldIndex(int rofi) {
+ this.rollupOldFieldIndex = rofi;
+ }
+
+ public int getRollupOldFieldIndex() {
+ return this.rollupOldFieldIndex;
+ }
+
+ public void setRollupFieldIndex(int rfi) {
+ this.rollupFieldIndex = rfi;
+ }
+
+ public int getRollupFieldIndex() {
+ return this.rollupFieldIndex;
+ }
+
public POPackage(OperatorKey k) {
this(k, -1, null);
}
Modified:
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POReservoirSample.java
URL:
http://svn.apache.org/viewvc/pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POReservoirSample.java?rev=1654955&r1=1654954&r2=1654955&view=diff
==============================================================================
---
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POReservoirSample.java
(original)
+++
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POReservoirSample.java
Tue Jan 27 02:27:45 2015
@@ -108,23 +108,24 @@ public class POReservoirSample extends P
}
}
- Random randGen = new Random();
+ if (res.returnStatus != POStatus.STATUS_EOP) {
+ Random randGen = new Random();
+ while (true) {
+ // pick this as sample
+ res = processInput();
+ if (res.returnStatus == POStatus.STATUS_NULL) {
+ continue;
+ } else if (res.returnStatus != POStatus.STATUS_OK) {
+ break;
+ }
- while (true) {
- // pick this as sample
- res = processInput();
- if (res.returnStatus == POStatus.STATUS_NULL) {
- continue;
- } else if (res.returnStatus != POStatus.STATUS_OK) {
- break;
+ // collect samples until input is exhausted
+ int rand = randGen.nextInt(rowProcessed);
+ if (rand < numSamples) {
+ samples[rand] = res;
+ }
+ rowProcessed++;
}
-
- // collect samples until input is exhausted
- int rand = randGen.nextInt(rowProcessed);
- if (rand < numSamples) {
- samples[rand] = res;
- }
- rowProcessed++;
}
if (this.parentPlan.endOfAllInput && res.returnStatus ==
POStatus.STATUS_EOP) {
Modified:
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/util/PlanHelper.java
URL:
http://svn.apache.org/viewvc/pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/util/PlanHelper.java?rev=1654955&r1=1654954&r2=1654955&view=diff
==============================================================================
---
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/util/PlanHelper.java
(original)
+++
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/util/PlanHelper.java
Tue Jan 27 02:27:45 2015
@@ -72,6 +72,7 @@ import org.apache.pig.backend.hadoop.exe
import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POPoissonSample;
import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POPreCombinerLocalRearrange;
import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POReservoirSample;
+import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.PORollupHIIForEach;
import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POSkewedJoin;
import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POSort;
import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POSplit;
@@ -236,6 +237,12 @@ public class PlanHelper {
}
@Override
+ public void visitPORollupHIIForEach(PORollupHIIForEach hfe) throws
VisitorException {
+ super.visitPORollupHIIForEach(hfe);
+ visit(hfe);
+ }
+
+ @Override
public void visitUnion(POUnion un) throws VisitorException {
super.visitUnion(un);
visit(un);