Author: olga
Date: Thu Jul 17 15:52:31 2008
New Revision: 677768

URL: http://svn.apache.org/viewvc?rev=677768&view=rev
Log:
merge of PIG-123: escaping

Added:
    
incubator/pig/branches/types/test/org/apache/pig/test/TestPigScriptParser.java
Modified:
    
incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
    
incubator/pig/branches/types/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj

Modified: 
incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
URL: 
http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt?rev=677768&r1=677767&r2=677768&view=diff
==============================================================================
--- 
incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
 (original)
+++ 
incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
 Thu Jul 17 15:52:31 2008
@@ -141,7 +141,7 @@
     }
 
     static String unquote(String s) {
-               return s.substring(1, s.length()-1);
+               return StringUtils.unescapeInputString(s.substring(1, 
s.length()-1)) ;
        }
        
        static int undollar(String s) {
@@ -397,6 +397,91 @@
 
 }
 
+
+class StringUtils {
+    
+       public static String unescapeInputString(String input)  {
+
+            if (input == null) {
+                return new String() ;
+            }
+            
+            // Needed variables
+            // preset the size so our StringBuilders don't have to grow
+            int inputlength = input.length();       
+            StringBuilder unicode = new StringBuilder(4);
+            StringBuilder output = new StringBuilder(inputlength) ;
+            boolean hadSlash = false;
+            boolean inUnicode = false;
+            
+            // The main loop
+            for (int i = 0; i < inputlength; i++) {
+                char ch = input.charAt(i);
+                // currently doing unicode mode
+                if (inUnicode) {
+                    unicode.append(ch);
+                    if (unicode.length() == 4) {
+                        // unicode now contains the four hex digits
+                        try {
+                            int value = Integer.parseInt(unicode.toString(), 
0x10);
+                            output.append((char) value) ;
+                            // reuse the StringBuilder
+                            unicode.setLength(0);
+                            inUnicode = false;
+                            hadSlash = false;
+                        } catch (NumberFormatException nfe) {
+                            throw new RuntimeException("Unable to parse 
unicode value: " + unicode, nfe);
+                        }
+                    }
+                    continue;
+                }
+                if (hadSlash) {
+                    // handle an escaped value
+                    hadSlash = false;
+                    switch (ch) {
+                        case '\\':
+                            output.append('\\');
+                            break;
+                        case '\'':
+                            output.append('\'');
+                            break;
+                        case 'r':
+                            output.append('\r');
+                            break;
+                        case 'f':
+                            output.append('\f');
+                            break;
+                        case 't':
+                            output.append('\t');
+                            break;
+                        case 'n':
+                            output.append('\n');
+                            break;
+                        case 'b':
+                            output.append('\b');
+                            break;
+                        case 'u':
+                            {
+                                // switch to unicode mode
+                                inUnicode = true;
+                                break;
+                            }
+                        default :
+                            output.append(ch);
+                            break;
+                    }
+                    continue;
+                } else if (ch == '\\') {
+                    hadSlash = true;
+                    continue;
+                }
+                output.append(ch);
+            }
+            
+            return output.toString() ;
+        }
+}
+
        
 PARSER_END(QueryParser)
 
@@ -478,7 +563,20 @@
 |   < FLOATNUMBER: <DOUBLENUMBER> (["f","F"])? >
 }
 
-TOKEN : { <QUOTEDSTRING : "'" (~["'"])* "'"> }
+TOKEN : { <QUOTEDSTRING :  "'"
+(   (~["'","\\","\n","\r"])
+  | ("\\"
+      ( ["n","t","b","r","f","\\","'"] )
+    )
+  | ("\\u"
+        ["0"-"9","A"-"F","a"-"f"]
+        ["0"-"9","A"-"F","a"-"f"]
+        ["0"-"9","A"-"F","a"-"f"]
+        ["0"-"9","A"-"F","a"-"f"]
+    )
+)*
+"'"> }
+
 // Pig has special variables starting with $
 TOKEN : { <DOLLARVAR : "$" <INTEGER> > }
 

Modified: 
incubator/pig/branches/types/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj
URL: 
http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj?rev=677768&r1=677767&r2=677768&view=diff
==============================================================================
--- 
incubator/pig/branches/types/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj
 (original)
+++ 
incubator/pig/branches/types/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj
 Thu Jul 17 15:52:31 2008
@@ -187,7 +187,8 @@
 
 <IN_STRING> MORE :
 {
-       <"'"> { SwitchTo(prevState);}
+       <"\\'">
+|      <"'"> { SwitchTo(prevState);}
 |      <("\n" | "\r" | "\r\n")> {secondary_prompt();}
 |      <(~[])>
 }
@@ -247,7 +248,19 @@
 
 TOKEN: {<IDENTIFIER: (<LETTER>)+(<DIGIT> | <LETTER> | <SPECIALCHAR>)*>}
 TOKEN: {<PATH: (~["(", ")", ";", "\r", " ", "\t", "\n"])+>}
-TOKEN : { <QUOTEDSTRING : "'" (~["'"])* "'"> }
+TOKEN : { <QUOTEDSTRING :  "'"
+      (   (~["'","\\","\n","\r"])
+        | ("\\"
+            ( ["n","t","b","r","f","\\","'"] )
+          )
+        | ("\\u"
+            ["0"-"9","A"-"F","a"-"f"]
+            ["0"-"9","A"-"F","a"-"f"]
+            ["0"-"9","A"-"F","a"-"f"]
+            ["0"-"9","A"-"F","a"-"f"]
+          )
+      )*
+      "'"> }
 void parse() throws IOException:
 {
        Token t1, t2;

Added: 
incubator/pig/branches/types/test/org/apache/pig/test/TestPigScriptParser.java
URL: 
http://svn.apache.org/viewvc/incubator/pig/branches/types/test/org/apache/pig/test/TestPigScriptParser.java?rev=677768&view=auto
==============================================================================
--- 
incubator/pig/branches/types/test/org/apache/pig/test/TestPigScriptParser.java 
(added)
+++ 
incubator/pig/branches/types/test/org/apache/pig/test/TestPigScriptParser.java 
Thu Jul 17 15:52:31 2008
@@ -0,0 +1,117 @@
+package org.apache.pig.test;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.List;
+import java.util.Map;
+import java.util.HashMap;
+
+import org.junit.Test;
+import junit.framework.TestCase;
+
+import org.apache.pig.PigServer;
+import org.apache.pig.ExecType;
+import org.apache.pig.impl.PigContext;
+import org.apache.pig.impl.plan.OperatorKey;
+import org.apache.pig.impl.logicalLayer.* ;
+import org.apache.pig.impl.logicalLayer.parser.* ;
+
+public class TestPigScriptParser extends TestCase {
+
+    @Test
+    public void testParserWithEscapeCharacters() throws Exception {
+
+        // All the needed variables
+        Map<LogicalOperator, LogicalPlan> aliases = new 
HashMap<LogicalOperator, LogicalPlan>();
+        Map<OperatorKey, LogicalOperator> opTable = new HashMap<OperatorKey, 
LogicalOperator>() ;
+        Map<String, LogicalOperator> aliasOp = new HashMap<String, 
LogicalOperator>() ;
+        Map<String, ExpressionOperator> defineAliases = new HashMap<String, 
ExpressionOperator>();
+        PigContext pigContext = new PigContext(ExecType.LOCAL) ;
+        
+        String tempFile = this.prepareTempFile() ;
+        
+        // Start the real parsing job
+        {
+               // Initial statement
+               String query = String.format("A = LOAD '%s' ;", tempFile) ;
+               ByteArrayInputStream in = new 
ByteArrayInputStream(query.getBytes()); 
+               QueryParser parser = new QueryParser(in, pigContext, "scope", 
aliases, opTable, aliasOp, defineAliases) ;
+               LogicalPlan lp = parser.Parse() ; 
+        }
+        
+        {
+               // Normal condition
+               String query = "B1 = filter A by $0 eq 'This is a test string' 
;" ;
+               checkParsedConstContent(aliases, opTable, pigContext, aliasOp, 
defineAliases,
+                                       query, "This is a test string") ;       
+        }
+        
+        {
+               // single-quote condition
+               String query = "B2 = filter A by $0 eq 'This is a test 
\\'string' ;" ;
+               checkParsedConstContent(aliases, opTable, pigContext, aliasOp, 
defineAliases,
+                                       query, "This is a test 'string") ;      
+        }
+        
+        {
+               // newline condition
+               String query = "B3 = filter A by $0 eq 'This is a test 
\\nstring' ;" ;
+               checkParsedConstContent(aliases, opTable, pigContext, aliasOp, 
defineAliases,
+                                       query, "This is a test \nstring") ;     
+        }
+        
+        {
+               // Unicode
+               String query = "B4 = filter A by $0 eq 'This is a test 
\\uD30C\\uC774string' ;" ;
+               checkParsedConstContent(aliases, opTable, pigContext, aliasOp, 
defineAliases,
+                                       query, "This is a test 
\uD30C\uC774string") ;   
+        }
+    }
+
+       private void checkParsedConstContent(Map<LogicalOperator, LogicalPlan> 
aliases,
+                                         Map<OperatorKey, LogicalOperator> 
opTable,
+                                         PigContext pigContext,
+                                         Map<String, LogicalOperator> aliasOp,
+                                         Map<String, ExpressionOperator> 
defineAliases,
+                                         String query,
+                                         String expectedContent)
+                                        throws Exception {
+        // Run the parser
+        ByteArrayInputStream in = new ByteArrayInputStream(query.getBytes()); 
+        QueryParser parser = new QueryParser(in, pigContext, "scope", aliases, 
opTable, aliasOp, defineAliases) ;
+        LogicalPlan lp = parser.Parse() ; 
+        
+        // Digging down the tree
+        LogicalOperator root = lp.getRoots().get(0) ;
+        LogicalOperator filter = lp.getSuccessors(root).get(0);
+        LogicalPlan comparisonPlan = ((LOFilter)filter).getComparisonPlan();
+        List<LogicalOperator> comparisonPlanRoots = comparisonPlan.getRoots();
+        LogicalOperator compRootOne = comparisonPlanRoots.get(0);
+        LogicalOperator compRootTwo = comparisonPlanRoots.get(1);
+
+        
+        // Here is the actual check logic
+        if (compRootOne instanceof LOConst) {
+            assertTrue("Must be equal", 
+                        
((String)((LOConst)compRootOne).getValue()).equals(expectedContent)) ;
+        } 
+        // If not left, it must be right.
+        else {
+            assertTrue("Must be equal", 
+                        
((String)((LOConst)compRootTwo).getValue()).equals(expectedContent)) ;
+        }
+    }
+
+    private String prepareTempFile() throws IOException {
+        File inputFile = File.createTempFile("test", "txt");
+        inputFile.deleteOnExit() ;
+        PrintStream ps = new PrintStream(new FileOutputStream(inputFile));
+        ps.println("hohoho") ;
+        ps.close();
+        return inputFile.getPath() ;
+    }
+
+}


Reply via email to