Author: daijy
Date: Mon Feb 15 06:32:36 2016
New Revision: 1730455

URL: http://svn.apache.org/viewvc?rev=1730455&view=rev
Log:
PIG-4803: Improve performance of regex-based builtin functions

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/org/apache/pig/builtin/REPLACE.java
    pig/trunk/test/org/apache/pig/test/TestStringUDFs.java

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1730455&r1=1730454&r2=1730455&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Mon Feb 15 06:32:36 2016
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
 
 IMPROVEMENTS
 
+PIG-4803: Improve performance of regex-based builtin functions (eyal via daijy)
+
 PIG-4802: Autoparallelism should estimate less when there is combiner (rohini)
 
 PIG-4761: Add more information to front end error messages (eyal via daijy)

Modified: pig/trunk/src/org/apache/pig/builtin/REPLACE.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/REPLACE.java?rev=1730455&r1=1730454&r2=1730455&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/REPLACE.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/REPLACE.java Mon Feb 15 06:32:36 2016
@@ -21,14 +21,15 @@ package org.apache.pig.builtin;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.regex.Pattern;
 
 import org.apache.pig.EvalFunc;
 import org.apache.pig.FuncSpec;
 import org.apache.pig.PigWarning;
-import org.apache.pig.data.Tuple;
 import org.apache.pig.data.DataType;
-import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.data.Tuple;
 import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
 
 /**
  * REPLACE implements eval function to replace part of a string.
@@ -42,6 +43,8 @@ import org.apache.pig.impl.logicalLayer.
  */
 public class REPLACE extends EvalFunc<String>
 {
+    private Pattern mPattern = null;
+
     /**
      * Method invoked on every tuple during foreach evaluation
      * @param input tuple; first column is assumed to have the column to 
convert
@@ -52,13 +55,29 @@ public class REPLACE extends EvalFunc<St
         if (input == null || input.size() < 3)
             return null;
 
-        try{
-            String source = (String)input.get(0);
-            String target = (String)input.get(1);
-            String replacewith = (String)input.get(2);
-            return source.replaceAll(target, replacewith);
+        String source = (String)input.get(0);
+        String target = (String)input.get(1);
+
+        if (target == null) {
+            warn("Replace : Regular expression is null", 
PigWarning.UDF_WARNING_1);
+            return null;
+        }
+        
+        if (mPattern == null || ! target.equals(mPattern.pattern())) {
+            try {
+                mPattern = Pattern.compile(target);
+            } catch (Exception e) {
+                warn("Replace : Mal-Formed Regular expression : " + target, 
PigWarning.UDF_WARNING_1);
+                return null;
+            }
+        }            
+        
+        String replacewith = (String)input.get(2);
+        
+        try {    
+           return mPattern.matcher(source).replaceAll(replacewith);
         }catch(Exception e){
-            warn("Failed to process input; error - " + e.getMessage(), 
PigWarning.UDF_WARNING_1);
+            warn("Replace : Failed to process input; error - " + 
e.getMessage(), PigWarning.UDF_WARNING_1);
             return null;
         }
     }

Modified: pig/trunk/test/org/apache/pig/test/TestStringUDFs.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestStringUDFs.java?rev=1730455&r1=1730454&r2=1730455&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestStringUDFs.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestStringUDFs.java Mon Feb 15 06:32:36 
2016
@@ -128,7 +128,10 @@ public class TestStringUDFs {
         REPLACE replace = new REPLACE();
         Tuple testTuple = Util.buildTuple("foobar", "z", "x");
         assertEquals("foobar".replace("z", "x"), replace.exec(testTuple));
-        
+
+        // Use cached version of pattern in REPLACE
+        assertEquals("foobar".replace("z", "x"), replace.exec(testTuple));
+
         testTuple = Util.buildTuple("foobar", "oo", "aa");
         assertEquals("foobar".replace("oo", "aa"), replace.exec(testTuple));
     }


Reply via email to