Author: pradeepkth
Date: Tue Sep 22 17:33:24 2009
New Revision: 817739

URL: http://svn.apache.org/viewvc?rev=817739&view=rev
Log:
PIG-738: Regexp passed from pigscript fails in UDF  (pradeepkth)

Added:
    hadoop/pig/trunk/test/org/apache/pig/test/RegexGroupCount.java
Modified:
    hadoop/pig/trunk/CHANGES.txt
    hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
    hadoop/pig/trunk/test/org/apache/pig/test/TestPigScriptParser.java

Modified: hadoop/pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=817739&r1=817738&r2=817739&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Tue Sep 22 17:33:24 2009
@@ -32,6 +32,8 @@
 
 BUG FIXES
 
+PIG-738: Regexp passed from pigscript fails in UDF  (pradeepkth)
+
 PIG-942: Maps are not implicitly casted (pradeepkth)
 
 PIG-513:  Removed unecessary bounds check in DefaultTuple (ashutoshc via

Modified: 
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt?rev=817739&r1=817738&r2=817739&view=diff
==============================================================================
--- 
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt 
(original)
+++ 
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt 
Tue Sep 22 17:33:24 2009
@@ -1277,8 +1277,8 @@
 {
        (
        (
-       t = <QUOTEDSTRING> {sb.append(t.image);}
-       ( "," t = <QUOTEDSTRING> {sb.append(",");sb.append(t.image);} )*
+       t = <QUOTEDSTRING> 
{sb.append(StringUtils.unescapeInputString(t.image));}
+       ( "," t = <QUOTEDSTRING> 
{sb.append(",");sb.append(StringUtils.unescapeInputString(t.image));} )*
        )
        | {}
        )

Added: hadoop/pig/trunk/test/org/apache/pig/test/RegexGroupCount.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/RegexGroupCount.java?rev=817739&view=auto
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/RegexGroupCount.java (added)
+++ hadoop/pig/trunk/test/org/apache/pig/test/RegexGroupCount.java Tue Sep 22 
17:33:24 2009
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * A test udf to test that users can pass in escaped dot
+ * as part of a regex to udf's argument.
+ * Example: In perl the regex would be "www\.abc\.com" - 
+ * the user's intent is to supply this as a regex pattern 
+ * where dot (.) is escaped. As a java
+ * string this would be "www\\.abc\\.com" - the parser should
+ * eventually give this java string to the udf. In pig script too
+ * the user would give this as 'www\\.abc\\.com'
+ */
+package org.apache.pig.test;
+
+import java.io.IOException;
+
+import java.util.regex.Matcher;
+
+import java.util.regex.Pattern;
+
+import org.apache.pig.EvalFunc;
+
+import org.apache.pig.data.Tuple;
+
+import org.apache.pig.impl.util.WrappedIOException;
+
+public class RegexGroupCount extends EvalFunc<Integer> {
+
+    private final Pattern pattern_;
+
+    public RegexGroupCount(String patternStr) {
+
+       System.out.println("My pattern supplied is "+patternStr);
+
+       System.out.println("Equality test 
"+patternStr.equals("www\\.xyz\\.com/sports"));
+        
+       pattern_ = Pattern.compile(patternStr, 
Pattern.DOTALL|Pattern.CASE_INSENSITIVE);  
+
+    } 
+
+    //@Override
+
+    public Integer exec(Tuple input)  throws IOException {
+
+              int i = 9999;
+
+              if (input == null || input.size() == 0) {   return 8888;  }
+
+              String istr = (String) input.get(0);
+
+              System.out.println("My input is: "+istr);
+
+                try {
+
+                    i = 0;
+
+                    Matcher matcher = pattern_.matcher(istr);
+
+                    while (matcher.find()) {
+
+                               i++;
+
+                    }
+
+                } catch (NullPointerException e) {
+
+                    i = 7777;
+
+                }  catch (Exception e) {
+
+                    i = 6666;
+
+                    throw WrappedIOException.wrap("Caught exception processing 
RegexGroupCount", e);
+
+                }
+
+                return i;
+
+    }
+
+}

Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestPigScriptParser.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestPigScriptParser.java?rev=817739&r1=817738&r2=817739&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestPigScriptParser.java 
(original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestPigScriptParser.java Tue Sep 
22 17:33:24 2009
@@ -22,6 +22,7 @@
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.PrintStream;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.HashMap;
@@ -32,6 +33,7 @@
 
 import org.apache.pig.PigServer;
 import org.apache.pig.ExecType;
+import org.apache.pig.data.Tuple;
 import org.apache.pig.impl.PigContext;
 import org.apache.pig.impl.plan.OperatorKey;
 import org.apache.pig.impl.logicalLayer.* ;
@@ -77,6 +79,17 @@
         }
         
         {
+            // escaping dot
+            // the reason we have 4 backslashes below is we really want to put 
two backslashes but
+            // since this is to be represented in a Java String, we escape 
each backslash with one more
+            // backslash - hence 4. In a pig script in a file, this would be
+            // \\.string
+            String query = "B2 = filter A by $0 eq 'This is a test 
\\\\.string' ;" ;
+            checkParsedConstContent(aliases, opTable, pigContext, aliasOp, 
fileNameMap,
+                                    query, "This is a test \\.string") ;  
+        }
+        
+        {
                // newline condition
                String query = "B3 = filter A by $0 eq 'This is a test 
\\nstring' ;" ;
                checkParsedConstContent(aliases, opTable, pigContext, aliasOp, 
fileNameMap, 
@@ -90,7 +103,38 @@
                                        query, "This is a test 
\uD30C\uC774string") ;   
         }
     }
-
+    
+    @Test
+    public void testDefineUDF() throws Exception {
+        String inputData[] = {
+                
"dshfdskfwww.xyz.com/sportsjoadfjdslpdshfdskfwww.xyz.com/sportsjoadfjdsl" ,
+                "kas;dka;sd" ,
+                "jsjsjwww.xyz.com/sports" ,
+                "jsdLSJDcom/sports" ,
+                "wwwJxyzMcom/sports"
+        };
+        File f = Util.createFile(inputData);
+        String[] queryLines = new String[] { 
+                // the reason we have 4 backslashes below is we really want to 
put two backslashes but
+                // since this is to be represented in a Java String, we escape 
each backslash with one more
+                // backslash - hence 4. In a pig script in a file, this would 
be
+                // www\\.xyz\\.com
+                "define minelogs 
org.apache.pig.test.RegexGroupCount('www\\\\.xyz\\\\.com/sports');" ,
+                       "A = load 'file://" + f.getAbsolutePath() + "'  using 
PigStorage() as (source : chararray);" ,
+                       "B = foreach A generate minelogs(source) as 
sportslogs;" };
+        PigServer ps = new PigServer(ExecType.LOCAL);
+        for (String line : queryLines) {
+            ps.registerQuery(line);
+        }
+        Iterator<Tuple> it = ps.openIterator("B");
+        int[] expectedResults = new int[] {2,0,1,0,0};
+        int i = 0;
+        while(it.hasNext()) {
+            Tuple t = it.next();
+            assertEquals(expectedResults[i++], t.get(0));
+        }
+    }
+    
        private void checkParsedConstContent(Map<LogicalOperator, LogicalPlan> 
aliases,
                                              Map<OperatorKey, LogicalOperator> 
opTable,
                                              PigContext pigContext,


Reply via email to