Author: olga
Date: Mon Dec 21 20:13:34 2009
New Revision: 892962
URL: http://svn.apache.org/viewvc?rev=892962&view=rev
Log:
PIG-1158: pig command line -M option doesn't support table union correctly
(comma seperated paths) (rding via olgan)
Modified:
hadoop/pig/trunk/CHANGES.txt
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
hadoop/pig/trunk/test/org/apache/pig/test/TestLoad.java
Modified: hadoop/pig/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=892962&r1=892961&r2=892962&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Mon Dec 21 20:13:34 2009
@@ -68,6 +68,9 @@
BUG FIXES
+PIG-1158: pig command line -M option doesn't support table union correctly
+(comma seperated paths) (rding via olgan)
+
PIG-1143: Poisson Sample Loader should compute the number of samples required
only once (sriranjan via olgan)
Modified:
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt?rev=892962&r1=892961&r2=892962&view=diff
==============================================================================
---
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
(original)
+++
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
Mon Dec 21 20:13:34 2009
@@ -206,27 +206,6 @@
ArrayList<String> pathStrings = new ArrayList<String>();
- // If multiquery is off we revert to the old behavior, which
- // did not try to convert paths to their absolute location.
- boolean isMultiQuery =
"true".equalsIgnoreCase(pigContext.getProperties().getProperty("opt.multiquery","true"));
- if (!isMultiQuery) {
- if (!isLoad) { // stores do not require any change
- return filename;
- }
-
- // Local loads in the hadoop context require copying the
- // file to dfs first.
- if (pigContext.getExecType() != ExecType.LOCAL) {
- for (String strname : fnames) {
- if (strname.startsWith(FileLocalizer.LOCAL_PREFIX)) {
- strname = FileLocalizer.hadoopify(strname, pigContext);
- }
- pathStrings.add(strname);
- }
- }
- return StringUtils.join(pathStrings, ",");
- }
-
boolean hadoopify = false;
for (String strname : fnames) {
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestLoad.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestLoad.java?rev=892962&r1=892961&r2=892962&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestLoad.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestLoad.java Mon Dec 21 20:13:34
2009
@@ -17,17 +17,24 @@
*/
package org.apache.pig.test;
-import java.util.*;
+import static org.junit.Assert.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
+import java.util.HashMap;
+import java.util.Map;
import junit.framework.Assert;
import org.apache.pig.ExecType;
import org.apache.pig.FuncSpec;
+import org.apache.pig.PigServer;
+import org.apache.pig.backend.datastorage.DataStorage;
import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.backend.hadoop.executionengine.physicalLayer.POStatus;
+import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result;
+import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad;
import org.apache.pig.builtin.PigStorage;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
@@ -35,28 +42,20 @@
import org.apache.pig.data.DefaultTuple;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.PigContext;
-import org.apache.pig.impl.io.FileSpec;
-import org.apache.pig.impl.plan.OperatorKey;
-import org.apache.pig.PigServer;
import org.apache.pig.impl.io.FileLocalizer;
-import org.apache.pig.backend.hadoop.executionengine.physicalLayer.POStatus;
-import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result;
-import
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad;
-import org.apache.pig.test.utils.GenPhyOp;
-import org.apache.pig.test.utils.TestHelper;
+import org.apache.pig.impl.io.FileSpec;
import org.apache.pig.impl.logicalLayer.LOLoad;
import org.apache.pig.impl.logicalLayer.LogicalOperator;
import org.apache.pig.impl.logicalLayer.LogicalPlan;
import org.apache.pig.impl.logicalLayer.LogicalPlanBuilder;
-import org.apache.pig.backend.datastorage.ContainerDescriptor;
-import org.apache.pig.backend.datastorage.DataStorage;
-import org.apache.pig.backend.datastorage.DataStorageException;
-import org.apache.pig.backend.datastorage.ElementDescriptor;
+import org.apache.pig.impl.plan.OperatorKey;
+import org.apache.pig.test.utils.GenPhyOp;
+import org.apache.pig.test.utils.TestHelper;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
-public class TestLoad extends junit.framework.TestCase {
+public class TestLoad {
FileSpec inpFSpec;
POLoad ld;
PigContext pc;
@@ -66,6 +65,7 @@
PigServer pig;
static MiniCluster cluster = MiniCluster.buildCluster();
+
@Before
public void setUp() throws Exception {
curDir = System.getProperty("user.dir");
@@ -95,8 +95,6 @@
}
}
-
-
@After
public void tearDown() throws Exception {
}
@@ -198,37 +196,41 @@
}
private void checkLoadPath(String orig, String expected, boolean isTmp)
throws Exception {
- pc.getProperties().setProperty("opt.multiquery",""+true);
-
- DataStorage dfs = pc.getDfs();
- dfs.setActiveContainer(dfs.asContainer("/tmp"));
- Map<LogicalOperator, LogicalPlan> aliases = new
HashMap<LogicalOperator, LogicalPlan>();
- Map<OperatorKey, LogicalOperator> logicalOpTable = new
HashMap<OperatorKey, LogicalOperator>();
- Map<String, LogicalOperator> aliasOp = new HashMap<String,
LogicalOperator>();
- Map<String, String> fileNameMap = new HashMap<String, String>();
+ boolean[] multiquery = {true, false};
- LogicalPlanBuilder builder = new LogicalPlanBuilder(pc);
-
- String query = "a = load '"+orig+"';";
- LogicalPlan lp = builder.parse("Test-Load",
- query,
- aliases,
- logicalOpTable,
- aliasOp,
- fileNameMap);
- Assert.assertTrue(lp.size()>0);
- LogicalOperator op = lp.getRoots().get(0);
-
- Assert.assertTrue(op instanceof LOLoad);
- LOLoad load = (LOLoad)op;
-
- String p = load.getInputFile().getFileName();
- p = p.replaceAll("hdfs://[0-9a-zA-Z:\\.]*/","/");
-
- if (isTmp) {
- Assert.assertTrue(p.matches("/tmp.*"));
- } else {
- Assert.assertEquals(p, expected);
+ for (boolean b : multiquery) {
+ pc.getProperties().setProperty("opt.multiquery", "" + b);
+
+ DataStorage dfs = pc.getDfs();
+ dfs.setActiveContainer(dfs.asContainer("/tmp"));
+ Map<LogicalOperator, LogicalPlan> aliases = new
HashMap<LogicalOperator, LogicalPlan>();
+ Map<OperatorKey, LogicalOperator> logicalOpTable = new
HashMap<OperatorKey, LogicalOperator>();
+ Map<String, LogicalOperator> aliasOp = new HashMap<String,
LogicalOperator>();
+ Map<String, String> fileNameMap = new HashMap<String, String>();
+
+ LogicalPlanBuilder builder = new LogicalPlanBuilder(pc);
+
+ String query = "a = load '"+orig+"';";
+ LogicalPlan lp = builder.parse("Test-Load",
+ query,
+ aliases,
+ logicalOpTable,
+ aliasOp,
+ fileNameMap);
+ Assert.assertTrue(lp.size()>0);
+ LogicalOperator op = lp.getRoots().get(0);
+
+ Assert.assertTrue(op instanceof LOLoad);
+ LOLoad load = (LOLoad)op;
+
+ String p = load.getInputFile().getFileName();
+ p = p.replaceAll("hdfs://[0-9a-zA-Z:\\.]*/","/");
+
+ if (isTmp) {
+ Assert.assertTrue(p.matches("/tmp.*"));
+ } else {
+ Assert.assertEquals(p, expected);
+ }
}
}
}