Author: olga Date: Mon Dec 21 20:13:34 2009 New Revision: 892962 URL: http://svn.apache.org/viewvc?rev=892962&view=rev Log: PIG-1158: pig command line -M option doesn't support table union correctly (comma seperated paths) (rding via olgan)
Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt hadoop/pig/trunk/test/org/apache/pig/test/TestLoad.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=892962&r1=892961&r2=892962&view=diff ============================================================================== --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Mon Dec 21 20:13:34 2009 @@ -68,6 +68,9 @@ BUG FIXES +PIG-1158: pig command line -M option doesn't support table union correctly +(comma seperated paths) (rding via olgan) + PIG-1143: Poisson Sample Loader should compute the number of samples required only once (sriranjan via olgan) Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt?rev=892962&r1=892961&r2=892962&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt (original) +++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt Mon Dec 21 20:13:34 2009 @@ -206,27 +206,6 @@ ArrayList<String> pathStrings = new ArrayList<String>(); - // If multiquery is off we revert to the old behavior, which - // did not try to convert paths to their absolute location. - boolean isMultiQuery = "true".equalsIgnoreCase(pigContext.getProperties().getProperty("opt.multiquery","true")); - if (!isMultiQuery) { - if (!isLoad) { // stores do not require any change - return filename; - } - - // Local loads in the hadoop context require copying the - // file to dfs first. - if (pigContext.getExecType() != ExecType.LOCAL) { - for (String strname : fnames) { - if (strname.startsWith(FileLocalizer.LOCAL_PREFIX)) { - strname = FileLocalizer.hadoopify(strname, pigContext); - } - pathStrings.add(strname); - } - } - return StringUtils.join(pathStrings, ","); - } - boolean hadoopify = false; for (String strname : fnames) { Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestLoad.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestLoad.java?rev=892962&r1=892961&r2=892962&view=diff ============================================================================== --- hadoop/pig/trunk/test/org/apache/pig/test/TestLoad.java (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestLoad.java Mon Dec 21 20:13:34 2009 @@ -17,17 +17,24 @@ */ package org.apache.pig.test; -import java.util.*; +import static org.junit.Assert.*; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; +import java.util.HashMap; +import java.util.Map; import junit.framework.Assert; import org.apache.pig.ExecType; import org.apache.pig.FuncSpec; +import org.apache.pig.PigServer; +import org.apache.pig.backend.datastorage.DataStorage; import org.apache.pig.backend.executionengine.ExecException; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.POStatus; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; import org.apache.pig.builtin.PigStorage; import org.apache.pig.data.DataBag; import org.apache.pig.data.DataByteArray; @@ -35,28 +42,20 @@ import org.apache.pig.data.DefaultTuple; import org.apache.pig.data.Tuple; import org.apache.pig.impl.PigContext; -import org.apache.pig.impl.io.FileSpec; -import org.apache.pig.impl.plan.OperatorKey; -import org.apache.pig.PigServer; import org.apache.pig.impl.io.FileLocalizer; -import org.apache.pig.backend.hadoop.executionengine.physicalLayer.POStatus; -import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result; -import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; -import org.apache.pig.test.utils.GenPhyOp; -import org.apache.pig.test.utils.TestHelper; +import org.apache.pig.impl.io.FileSpec; import org.apache.pig.impl.logicalLayer.LOLoad; import org.apache.pig.impl.logicalLayer.LogicalOperator; import org.apache.pig.impl.logicalLayer.LogicalPlan; import org.apache.pig.impl.logicalLayer.LogicalPlanBuilder; -import org.apache.pig.backend.datastorage.ContainerDescriptor; -import org.apache.pig.backend.datastorage.DataStorage; -import org.apache.pig.backend.datastorage.DataStorageException; -import org.apache.pig.backend.datastorage.ElementDescriptor; +import org.apache.pig.impl.plan.OperatorKey; +import org.apache.pig.test.utils.GenPhyOp; +import org.apache.pig.test.utils.TestHelper; import org.junit.After; import org.junit.Before; import org.junit.Test; -public class TestLoad extends junit.framework.TestCase { +public class TestLoad { FileSpec inpFSpec; POLoad ld; PigContext pc; @@ -66,6 +65,7 @@ PigServer pig; static MiniCluster cluster = MiniCluster.buildCluster(); + @Before public void setUp() throws Exception { curDir = System.getProperty("user.dir"); @@ -95,8 +95,6 @@ } } - - @After public void tearDown() throws Exception { } @@ -198,37 +196,41 @@ } private void checkLoadPath(String orig, String expected, boolean isTmp) throws Exception { - pc.getProperties().setProperty("opt.multiquery",""+true); - - DataStorage dfs = pc.getDfs(); - dfs.setActiveContainer(dfs.asContainer("/tmp")); - Map<LogicalOperator, LogicalPlan> aliases = new HashMap<LogicalOperator, LogicalPlan>(); - Map<OperatorKey, LogicalOperator> logicalOpTable = new HashMap<OperatorKey, LogicalOperator>(); - Map<String, LogicalOperator> aliasOp = new HashMap<String, LogicalOperator>(); - Map<String, String> fileNameMap = new HashMap<String, String>(); + boolean[] multiquery = {true, false}; - LogicalPlanBuilder builder = new LogicalPlanBuilder(pc); - - String query = "a = load '"+orig+"';"; - LogicalPlan lp = builder.parse("Test-Load", - query, - aliases, - logicalOpTable, - aliasOp, - fileNameMap); - Assert.assertTrue(lp.size()>0); - LogicalOperator op = lp.getRoots().get(0); - - Assert.assertTrue(op instanceof LOLoad); - LOLoad load = (LOLoad)op; - - String p = load.getInputFile().getFileName(); - p = p.replaceAll("hdfs://[0-9a-zA-Z:\\.]*/","/"); - - if (isTmp) { - Assert.assertTrue(p.matches("/tmp.*")); - } else { - Assert.assertEquals(p, expected); + for (boolean b : multiquery) { + pc.getProperties().setProperty("opt.multiquery", "" + b); + + DataStorage dfs = pc.getDfs(); + dfs.setActiveContainer(dfs.asContainer("/tmp")); + Map<LogicalOperator, LogicalPlan> aliases = new HashMap<LogicalOperator, LogicalPlan>(); + Map<OperatorKey, LogicalOperator> logicalOpTable = new HashMap<OperatorKey, LogicalOperator>(); + Map<String, LogicalOperator> aliasOp = new HashMap<String, LogicalOperator>(); + Map<String, String> fileNameMap = new HashMap<String, String>(); + + LogicalPlanBuilder builder = new LogicalPlanBuilder(pc); + + String query = "a = load '"+orig+"';"; + LogicalPlan lp = builder.parse("Test-Load", + query, + aliases, + logicalOpTable, + aliasOp, + fileNameMap); + Assert.assertTrue(lp.size()>0); + LogicalOperator op = lp.getRoots().get(0); + + Assert.assertTrue(op instanceof LOLoad); + LOLoad load = (LOLoad)op; + + String p = load.getInputFile().getFileName(); + p = p.replaceAll("hdfs://[0-9a-zA-Z:\\.]*/","/"); + + if (isTmp) { + Assert.assertTrue(p.matches("/tmp.*")); + } else { + Assert.assertEquals(p, expected); + } } } }