Author: daijy
Date: Mon Feb 15 06:32:36 2016
New Revision: 1730455
URL: http://svn.apache.org/viewvc?rev=1730455&view=rev
Log:
PIG-4803: Improve performance of regex-based builtin functions
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/org/apache/pig/builtin/REPLACE.java
pig/trunk/test/org/apache/pig/test/TestStringUDFs.java
Modified: pig/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1730455&r1=1730454&r2=1730455&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Mon Feb 15 06:32:36 2016
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
IMPROVEMENTS
+PIG-4803: Improve performance of regex-based builtin functions (eyal via daijy)
+
PIG-4802: Autoparallelism should estimate less when there is combiner (rohini)
PIG-4761: Add more information to front end error messages (eyal via daijy)
Modified: pig/trunk/src/org/apache/pig/builtin/REPLACE.java
URL:
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/REPLACE.java?rev=1730455&r1=1730454&r2=1730455&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/REPLACE.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/REPLACE.java Mon Feb 15 06:32:36 2016
@@ -21,14 +21,15 @@ package org.apache.pig.builtin;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import java.util.regex.Pattern;
import org.apache.pig.EvalFunc;
import org.apache.pig.FuncSpec;
import org.apache.pig.PigWarning;
-import org.apache.pig.data.Tuple;
import org.apache.pig.data.DataType;
-import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
/**
* REPLACE implements eval function to replace part of a string.
@@ -42,6 +43,8 @@ import org.apache.pig.impl.logicalLayer.
*/
public class REPLACE extends EvalFunc<String>
{
+ private Pattern mPattern = null;
+
/**
* Method invoked on every tuple during foreach evaluation
* @param input tuple; first column is assumed to have the column to
convert
@@ -52,13 +55,29 @@ public class REPLACE extends EvalFunc<St
if (input == null || input.size() < 3)
return null;
- try{
- String source = (String)input.get(0);
- String target = (String)input.get(1);
- String replacewith = (String)input.get(2);
- return source.replaceAll(target, replacewith);
+ String source = (String)input.get(0);
+ String target = (String)input.get(1);
+
+ if (target == null) {
+ warn("Replace : Regular expression is null",
PigWarning.UDF_WARNING_1);
+ return null;
+ }
+
+ if (mPattern == null || ! target.equals(mPattern.pattern())) {
+ try {
+ mPattern = Pattern.compile(target);
+ } catch (Exception e) {
+ warn("Replace : Mal-Formed Regular expression : " + target,
PigWarning.UDF_WARNING_1);
+ return null;
+ }
+ }
+
+ String replacewith = (String)input.get(2);
+
+ try {
+ return mPattern.matcher(source).replaceAll(replacewith);
}catch(Exception e){
- warn("Failed to process input; error - " + e.getMessage(),
PigWarning.UDF_WARNING_1);
+ warn("Replace : Failed to process input; error - " +
e.getMessage(), PigWarning.UDF_WARNING_1);
return null;
}
}
Modified: pig/trunk/test/org/apache/pig/test/TestStringUDFs.java
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestStringUDFs.java?rev=1730455&r1=1730454&r2=1730455&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestStringUDFs.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestStringUDFs.java Mon Feb 15 06:32:36
2016
@@ -128,7 +128,10 @@ public class TestStringUDFs {
REPLACE replace = new REPLACE();
Tuple testTuple = Util.buildTuple("foobar", "z", "x");
assertEquals("foobar".replace("z", "x"), replace.exec(testTuple));
-
+
+ // Use cached version of pattern in REPLACE
+ assertEquals("foobar".replace("z", "x"), replace.exec(testTuple));
+
testTuple = Util.buildTuple("foobar", "oo", "aa");
assertEquals("foobar".replace("oo", "aa"), replace.exec(testTuple));
}