Author: pradeepkth
Date: Fri May 29 21:27:27 2009
New Revision: 780113
URL: http://svn.apache.org/viewvc?rev=780113&view=rev
Log:
PIG-816: PigStorage() does not accept Unicode characters in its contructor
(pradeepkth)
Modified:
hadoop/pig/trunk/CHANGES.txt
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReducePOStoreImpl.java
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java
hadoop/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java
Modified: hadoop/pig/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=780113&r1=780112&r2=780113&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Fri May 29 21:27:27 2009
@@ -48,6 +48,8 @@
BUG FIXES
+PIG-816: PigStorage() does not accept Unicode characters in its contructor
(pradeepkth)
+
PIG-818: Explain doesn't handle PODemux properly (hagleitn via olgan)
PIG-819: run -param -param; is a valid grunt command (milindb via olgan)
Modified:
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java?rev=780113&r1=780112&r2=780113&view=diff
==============================================================================
---
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java
(original)
+++
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java
Fri May 29 21:27:27 2009
@@ -383,8 +383,14 @@
String outputPath = st.getSFile().getFileName();
FuncSpec outputFuncSpec = st.getSFile().getFuncSpec();
FileOutputFormat.setOutputPath(jobConf, new Path(outputPath));
-
- jobConf.set("pig.storeFunc", outputFuncSpec.toString());
+
+ // serialize the store func spec using ObjectSerializer
+ // ObjectSerializer.serialize() uses default java serialization
+ // and then further encodes the output so that control
characters
+ // get encoded as regular characters. Otherwise any control
characters
+ // in the store funcspec would break the job.xml which is
created by
+ // hadoop from the jobconf.
+ jobConf.set("pig.storeFunc",
ObjectSerializer.serialize(outputFuncSpec.toString()));
jobConf.set(PIG_STORE_CONFIG,
ObjectSerializer.serialize(new
StoreConfig(outputPath, st.getSchema())));
Modified:
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReducePOStoreImpl.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReducePOStoreImpl.java?rev=780113&r1=780112&r2=780113&view=diff
==============================================================================
---
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReducePOStoreImpl.java
(original)
+++
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReducePOStoreImpl.java
Fri May 29 21:27:27 2009
@@ -109,7 +109,13 @@
// PigOuputFormat will look for pig.storeFunc to actually
// write stuff out.
- outputConf.set("pig.storeFunc", sFile.getFuncSpec().toString());
+ // serialize the store func spec using ObjectSerializer
+ // ObjectSerializer.serialize() uses default java serialization
+ // and then further encodes the output so that control characters
+ // get encoded as regular characters. Otherwise any control characters
+ // in the store funcspec would break the job.xml which is created by
+ // hadoop from the jobconf.
+ outputConf.set("pig.storeFunc",
ObjectSerializer.serialize(sFile.getFuncSpec().toString()));
// We set the output dir to the final location of the output,
// the output dir set in the original job config points to the
Modified:
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java?rev=780113&r1=780112&r2=780113&view=diff
==============================================================================
---
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java
(original)
+++
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java
Fri May 29 21:27:27 2009
@@ -34,18 +34,19 @@
*/
public static StoreFunc getStoreFunc(JobConf conf) throws ExecException {
StoreFunc store;
- String storeFunc = conf.get("pig.storeFunc", "");
- if (storeFunc.length() == 0) {
- store = new PigStorage();
- } else {
- try {
+ try {
+ String storeFunc = conf.get("pig.storeFunc", "");
+ if (storeFunc.length() == 0) {
+ store = new PigStorage();
+ } else {
+ storeFunc = (String) ObjectSerializer.deserialize(storeFunc);
store = (StoreFunc) PigContext
.instantiateFuncFromSpec(storeFunc);
- } catch (Exception e) {
- int errCode = 2081;
- String msg = "Unable to setup the store function.";
- throw new ExecException(msg, errCode, PigException.BUG, e);
}
+ } catch (Exception e) {
+ int errCode = 2081;
+ String msg = "Unable to setup the store function.";
+ throw new ExecException(msg, errCode, PigException.BUG, e);
}
return store;
}
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java?rev=780113&r1=780112&r2=780113&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java Fri May 29
21:27:27 2009
@@ -290,5 +290,33 @@
Util.deleteFile(cluster, "table_bs_ac_clxt");
}
+
+ @Test
+ public void testPigStorageWithCtrlChars() throws Exception {
+ String[] inputData = { "hello\u0001world", "good\u0001morning",
"nice\u0001day" };
+ Util.createInputFile(cluster, "testPigStorageWithCtrlCharsInput.txt",
inputData);
+ String script = "a = load 'testPigStorageWithCtrlCharsInput.txt' using
PigStorage('\u0001');" +
+ "b = foreach a generate $0, CONCAT($0, '\u0005'), $1; "
+
+ "store b into 'testPigStorageWithCtrlCharsOutput.txt'
using PigStorage('\u0001');" +
+ "c = load 'testPigStorageWithCtrlCharsOutput.txt' using
PigStorage('\u0001') as (f1:chararray, f2:chararray, f3:chararray);";
+ Util.registerQuery(pigServer, script);
+ Iterator<Tuple> it = pigServer.openIterator("c");
+ HashMap<String, Tuple> expectedResults = new HashMap<String, Tuple>();
+ expectedResults.put("hello", (Tuple)
Util.getPigConstant("('hello','hello\u0005','world')"));
+ expectedResults.put("good", (Tuple)
Util.getPigConstant("('good','good\u0005','morning')"));
+ expectedResults.put("nice", (Tuple)
Util.getPigConstant("('nice','nice\u0005','day')"));
+ HashMap<String, Boolean> seen = new HashMap<String, Boolean>();
+ int numRows = 0;
+ while(it.hasNext()) {
+ Tuple t = it.next();
+ String firstCol = (String) t.get(0);
+ assertFalse(seen.containsKey(firstCol));
+ seen.put(firstCol, true);
+ assertEquals(expectedResults.get(firstCol), t);
+ numRows++;
+ }
+ assertEquals(3, numRows);
+
+ }
}