breed
Fri, 14 Mar 2008 15:23:37 -0700
Author: breed Date: Fri Mar 14 15:23:05 2008 New Revision: 637293 URL: http://svn.apache.org/viewvc?rev=637293&view=rev Log: [#PIG-123] cannot escape single quotes in single quoted strings when using the eq or match operator Added: incubator/pig/trunk/test/org/apache/pig/test/TestPigScriptParser.java Modified: incubator/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt incubator/pig/trunk/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj Modified: incubator/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt URL: http://svn.apache.org/viewvc/incubator/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt?rev=637293&r1=637292&r2=637293&view=diff ============================================================================== --- incubator/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt (original) +++ incubator/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt Fri Mar 14 15:23:05 2008 @@ -113,7 +113,7 @@ } static String unquote(String s) { - return s.substring(1, s.length()-1); + return StringUtils.unescapeInputString(s.substring(1, s.length()-1)) ; } static int undollar(String s) { @@ -245,6 +245,91 @@ } + +class StringUtils { + + public static String unescapeInputString(String input) { + + if (input == null) { + return new String() ; + } + + // Needed variables + // preset the size so our StringBuilders don't have to grow + int inputlength = input.length(); + StringBuilder unicode = new StringBuilder(4); + StringBuilder output = new StringBuilder(inputlength) ; + boolean hadSlash = false; + boolean inUnicode = false; + + // The main loop + for (int i = 0; i < inputlength; i++) { + char ch = input.charAt(i); + // currently doing unicode mode + if (inUnicode) { + unicode.append(ch); + if (unicode.length() == 4) { + // unicode now contains the four hex digits + try { + int value = Integer.parseInt(unicode.toString(), 0x10); + output.append((char) value) ; + // reuse the StringBuilder + unicode.setLength(0); + inUnicode = false; + hadSlash = false; + } catch (NumberFormatException nfe) { + throw new RuntimeException("Unable to parse unicode value: " + unicode, nfe); + } + } + continue; + } + if (hadSlash) { + // handle an escaped value + hadSlash = false; + switch (ch) { + case '\\': + output.append('\\'); + break; + case '\'': + output.append('\''); + break; + case 'r': + output.append('\r'); + break; + case 'f': + output.append('\f'); + break; + case 't': + output.append('\t'); + break; + case 'n': + output.append('\n'); + break; + case 'b': + output.append('\b'); + break; + case 'u': + { + // switch to unicode mode + inUnicode = true; + break; + } + default : + output.append(ch); + break; + } + continue; + } else if (ch == '\\') { + hadSlash = true; + continue; + } + output.append(ch); + } + + return output.toString() ; + } +} + PARSER_END(QueryParser) @@ -318,7 +403,20 @@ | < INTEGER: ( <DIGIT> )+ > } -TOKEN : { <QUOTEDSTRING : "'" (~["'"])* "'"> } +TOKEN : { <QUOTEDSTRING : "'" + ( (~["'","\\","\n","\r"]) + | ("\\" + ( ["n","t","b","r","f","\\","'"] ) + ) + | ("\\u" + ["0"-"9","A"-"F","a"-"f"] + ["0"-"9","A"-"F","a"-"f"] + ["0"-"9","A"-"F","a"-"f"] + ["0"-"9","A"-"F","a"-"f"] + ) + )* + "'"> } + TOKEN : { <EXECCOMMAND : "`" (~["`"])* "`"> } // Pig has special variables starting with $ TOKEN : { <DOLLARVAR : "$" <INTEGER> > } Modified: incubator/pig/trunk/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj URL: http://svn.apache.org/viewvc/incubator/pig/trunk/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj?rev=637293&r1=637292&r2=637293&view=diff ============================================================================== --- incubator/pig/trunk/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj (original) +++ incubator/pig/trunk/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj Fri Mar 14 15:23:05 2008 @@ -188,7 +188,8 @@ <IN_STRING> MORE : { - <"'"> { SwitchTo(prevState);} + <"\\'"> +| <"'"> { SwitchTo(prevState);} | <("\n" | "\r" | "\r\n")> {secondary_prompt();} | <(~[])> } @@ -248,7 +249,19 @@ TOKEN: {<IDENTIFIER: (<LETTER>)+(<DIGIT> | <LETTER> | <SPECIALCHAR>)*>} TOKEN: {<PATH: (~["(", ")", ";", "\r", " ", "\t", "\n"])+>} -TOKEN : { <QUOTEDSTRING : "'" (~["'"])* "'"> } +TOKEN : { <QUOTEDSTRING : "'" + ( (~["'","\\","\n","\r"]) + | ("\\" + ( ["n","t","b","r","f","\\","'"] ) + ) + | ("\\u" + ["0"-"9","A"-"F","a"-"f"] + ["0"-"9","A"-"F","a"-"f"] + ["0"-"9","A"-"F","a"-"f"] + ["0"-"9","A"-"F","a"-"f"] + ) + )* + "'"> } void parse() throws IOException: { Token t1, t2; Added: incubator/pig/trunk/test/org/apache/pig/test/TestPigScriptParser.java URL: http://svn.apache.org/viewvc/incubator/pig/trunk/test/org/apache/pig/test/TestPigScriptParser.java?rev=637293&view=auto ============================================================================== --- incubator/pig/trunk/test/org/apache/pig/test/TestPigScriptParser.java (added) +++ incubator/pig/trunk/test/org/apache/pig/test/TestPigScriptParser.java Fri Mar 14 15:23:05 2008 @@ -0,0 +1,111 @@ +package org.apache.pig.test; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.util.Map; +import java.util.HashMap; + +import org.junit.Test; +import junit.framework.TestCase; + +import org.apache.pig.PigServer; +import org.apache.pig.impl.PigContext; +import org.apache.pig.impl.logicalLayer.* ; +import org.apache.pig.impl.logicalLayer.parser.* ; +import org.apache.pig.impl.eval.* ; +import org.apache.pig.impl.eval.cond.* ; + +public class TestPigScriptParser extends TestCase { + + @Test + public void testParserWithEscapeCharacters() throws Exception { + + // All the needed variables + Map<String, LogicalPlan> aliases = new HashMap<String, LogicalPlan>() ; + Map<OperatorKey, LogicalOperator> opTable = new HashMap<OperatorKey, LogicalOperator>() ; + PigContext pigContext = new PigContext(PigServer.ExecType.LOCAL) ; + + String tempFile = this.prepareTempFile() ; + + // Start the real parsing job + { + // Initial statement + String query = String.format("A = LOAD '%s' ;", tempFile) ; + ByteArrayInputStream in = new ByteArrayInputStream(query.getBytes()); + QueryParser parser = new QueryParser(in, pigContext, "scope", aliases, opTable) ; + LogicalPlan lp = parser.Parse() ; + aliases.put(lp.getAlias(), lp) ; + } + + { + // Normal condition + String query = "B1 = filter A by $0 eq 'This is a test string' ;" ; + checkParsedConstContent(aliases, opTable, pigContext, + query, "This is a test string") ; + } + + { + // single-quote condition + String query = "B2 = filter A by $0 eq 'This is a test \\'string' ;" ; + checkParsedConstContent(aliases, opTable, pigContext, + query, "This is a test 'string") ; + } + + { + // newline condition + String query = "B3 = filter A by $0 eq 'This is a test \\nstring' ;" ; + checkParsedConstContent(aliases, opTable, pigContext, + query, "This is a test \nstring") ; + } + + { + // Unicode + String query = "B4 = filter A by $0 eq 'This is a test \\uD30C\\uC774string' ;" ; + checkParsedConstContent(aliases, opTable, pigContext, + query, "This is a test \uD30C\uC774string") ; + } + } + + private void checkParsedConstContent(Map<String, LogicalPlan> aliases, + Map<OperatorKey, LogicalOperator> opTable, + PigContext pigContext, + String query, + String expectedContent) + throws Exception { + // Run the parser + ByteArrayInputStream in = new ByteArrayInputStream(query.getBytes()); + QueryParser parser = new QueryParser(in, pigContext, "scope", aliases, opTable) ; + LogicalPlan lp = parser.Parse() ; + aliases.put(lp.getAlias(), lp) ; + + // Digging down the tree + LOEval eval = (LOEval)opTable.get(lp.getRoot()) ; + CompCond compCond = ((CompCond)(((FilterSpec) eval.getSpec()).cond)) ; + + // Here is the actual check logic + if (compCond.left instanceof ConstSpec) { + ConstSpec constSpec = (ConstSpec) compCond.left ; + assertTrue("Must be equal", + constSpec.constant.equals(expectedContent)) ; + } + // If not left, it must be right. + else { + ConstSpec constSpec = (ConstSpec) compCond.right ; + assertTrue("Must be equal", + constSpec.constant.equals(expectedContent)) ; + } + } + + private String prepareTempFile() throws IOException { + File inputFile = File.createTempFile("test", "txt"); + inputFile.deleteOnExit() ; + PrintStream ps = new PrintStream(new FileOutputStream(inputFile)); + ps.println("hohoho") ; + ps.close(); + return inputFile.getPath() ; + } + +}