Author: pradeepkth Date: Fri May 29 21:27:27 2009 New Revision: 780113 URL: http://svn.apache.org/viewvc?rev=780113&view=rev Log: PIG-816: PigStorage() does not accept Unicode characters in its contructor (pradeepkth)
Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReducePOStoreImpl.java hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java hadoop/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=780113&r1=780112&r2=780113&view=diff ============================================================================== --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Fri May 29 21:27:27 2009 @@ -48,6 +48,8 @@ BUG FIXES +PIG-816: PigStorage() does not accept Unicode characters in its contructor (pradeepkth) + PIG-818: Explain doesn't handle PODemux properly (hagleitn via olgan) PIG-819: run -param -param; is a valid grunt command (milindb via olgan) Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java?rev=780113&r1=780112&r2=780113&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java Fri May 29 21:27:27 2009 @@ -383,8 +383,14 @@ String outputPath = st.getSFile().getFileName(); FuncSpec outputFuncSpec = st.getSFile().getFuncSpec(); FileOutputFormat.setOutputPath(jobConf, new Path(outputPath)); - - jobConf.set("pig.storeFunc", outputFuncSpec.toString()); + + // serialize the store func spec using ObjectSerializer + // ObjectSerializer.serialize() uses default java serialization + // and then further encodes the output so that control characters + // get encoded as regular characters. Otherwise any control characters + // in the store funcspec would break the job.xml which is created by + // hadoop from the jobconf. + jobConf.set("pig.storeFunc", ObjectSerializer.serialize(outputFuncSpec.toString())); jobConf.set(PIG_STORE_CONFIG, ObjectSerializer.serialize(new StoreConfig(outputPath, st.getSchema()))); Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReducePOStoreImpl.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReducePOStoreImpl.java?rev=780113&r1=780112&r2=780113&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReducePOStoreImpl.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReducePOStoreImpl.java Fri May 29 21:27:27 2009 @@ -109,7 +109,13 @@ // PigOuputFormat will look for pig.storeFunc to actually // write stuff out. - outputConf.set("pig.storeFunc", sFile.getFuncSpec().toString()); + // serialize the store func spec using ObjectSerializer + // ObjectSerializer.serialize() uses default java serialization + // and then further encodes the output so that control characters + // get encoded as regular characters. Otherwise any control characters + // in the store funcspec would break the job.xml which is created by + // hadoop from the jobconf. + outputConf.set("pig.storeFunc", ObjectSerializer.serialize(sFile.getFuncSpec().toString())); // We set the output dir to the final location of the output, // the output dir set in the original job config points to the Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java?rev=780113&r1=780112&r2=780113&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java Fri May 29 21:27:27 2009 @@ -34,18 +34,19 @@ */ public static StoreFunc getStoreFunc(JobConf conf) throws ExecException { StoreFunc store; - String storeFunc = conf.get("pig.storeFunc", ""); - if (storeFunc.length() == 0) { - store = new PigStorage(); - } else { - try { + try { + String storeFunc = conf.get("pig.storeFunc", ""); + if (storeFunc.length() == 0) { + store = new PigStorage(); + } else { + storeFunc = (String) ObjectSerializer.deserialize(storeFunc); store = (StoreFunc) PigContext .instantiateFuncFromSpec(storeFunc); - } catch (Exception e) { - int errCode = 2081; - String msg = "Unable to setup the store function."; - throw new ExecException(msg, errCode, PigException.BUG, e); } + } catch (Exception e) { + int errCode = 2081; + String msg = "Unable to setup the store function."; + throw new ExecException(msg, errCode, PigException.BUG, e); } return store; } Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java?rev=780113&r1=780112&r2=780113&view=diff ============================================================================== --- hadoop/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java Fri May 29 21:27:27 2009 @@ -290,5 +290,33 @@ Util.deleteFile(cluster, "table_bs_ac_clxt"); } + + @Test + public void testPigStorageWithCtrlChars() throws Exception { + String[] inputData = { "hello\u0001world", "good\u0001morning", "nice\u0001day" }; + Util.createInputFile(cluster, "testPigStorageWithCtrlCharsInput.txt", inputData); + String script = "a = load 'testPigStorageWithCtrlCharsInput.txt' using PigStorage('\u0001');" + + "b = foreach a generate $0, CONCAT($0, '\u0005'), $1; " + + "store b into 'testPigStorageWithCtrlCharsOutput.txt' using PigStorage('\u0001');" + + "c = load 'testPigStorageWithCtrlCharsOutput.txt' using PigStorage('\u0001') as (f1:chararray, f2:chararray, f3:chararray);"; + Util.registerQuery(pigServer, script); + Iterator<Tuple> it = pigServer.openIterator("c"); + HashMap<String, Tuple> expectedResults = new HashMap<String, Tuple>(); + expectedResults.put("hello", (Tuple) Util.getPigConstant("('hello','hello\u0005','world')")); + expectedResults.put("good", (Tuple) Util.getPigConstant("('good','good\u0005','morning')")); + expectedResults.put("nice", (Tuple) Util.getPigConstant("('nice','nice\u0005','day')")); + HashMap<String, Boolean> seen = new HashMap<String, Boolean>(); + int numRows = 0; + while(it.hasNext()) { + Tuple t = it.next(); + String firstCol = (String) t.get(0); + assertFalse(seen.containsKey(firstCol)); + seen.put(firstCol, true); + assertEquals(expectedResults.get(firstCol), t); + numRows++; + } + assertEquals(3, numRows); + + } }