Author: olga
Date: Tue Jun 30 16:08:44 2009
New Revision: 789814

URL: http://svn.apache.org/viewvc?rev=789814&view=rev
Log:
PIG-868: added strin manipulation functions (bennies via olgan)

Added:
    
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/INDEXOF.java
    
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/LASTINDEXOF.java
    
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/LOWER.java
    
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/REPLACE.java
    
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/SUBSTRING.java
Modified:
    hadoop/pig/trunk/contrib/CHANGES.txt
    
hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/TestEvalString.java

Modified: hadoop/pig/trunk/contrib/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/CHANGES.txt?rev=789814&r1=789813&r2=789814&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/CHANGES.txt (original)
+++ hadoop/pig/trunk/contrib/CHANGES.txt Tue Jun 30 16:08:44 2009
@@ -1,3 +1,4 @@
+PIG-868: added strin manipulation functions (bennies via olgan)
 PIG-273: addition of Top and SearchQuery UDFs (ankur via olgan)
 PIG-246: created UDF repository (olgan)
 PIG-245: UDF wrappers for Java Math functions (ajaygarg via olgan)

Added: 
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/INDEXOF.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/INDEXOF.java?rev=789814&view=auto
==============================================================================
--- 
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/INDEXOF.java
 (added)
+++ 
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/INDEXOF.java
 Tue Jun 30 16:08:44 2009
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.piggybank.evaluation.string;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.ArrayList;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.DataType;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.FuncSpec;
+
+
+/**
+ * string.INSTR implements eval function to search for a string
+ * Example:
+ *      register pigudfs.jar;
+ *      A = load 'mydata' as (name);
+ *      B = foreach A generate string.INDEXOF(name, ",");
+ *      dump B;
+ */
+public class INDEXOF extends EvalFunc<Integer>
+{
+    /**
+     * Method invoked on every tuple during foreach evaluation
+     * @param input tuple; first column is assumed to have the column to 
convert
+     *                     the second column is the string we search for
+     *                     the third is an optional column from where to start 
the search
+     * @exception java.io.IOException
+     */
+    public Integer exec(Tuple input) throws IOException {
+        if (input == null || input.size() == 0)
+            return null;
+
+        try{
+            String str = (String)input.get(0);
+            String search = (String)input.get(1);
+            int fromIndex = 0;
+            if (input.size() ==3)
+                fromIndex = (Integer)input.get(1);
+            return str.indexOf(search, fromIndex);
+        }catch(Exception e){
+            System.err.println("Failed to process input; error - " + 
e.getMessage());
+            return null;
+        }
+    }
+
+    @Override
+    public Schema outputSchema(Schema input) {
+        return new Schema(new Schema.FieldSchema(null, DataType.INTEGER));
+    }
+
+}
\ No newline at end of file

Added: 
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/LASTINDEXOF.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/LASTINDEXOF.java?rev=789814&view=auto
==============================================================================
--- 
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/LASTINDEXOF.java
 (added)
+++ 
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/LASTINDEXOF.java
 Tue Jun 30 16:08:44 2009
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.piggybank.evaluation.string;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.ArrayList;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.DataType;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.FuncSpec;
+
+
+/**
+ * string.INSTR implements eval function to search for the last occurance of a 
string
+ * Example:
+ *      register pigudfs.jar;
+ *      A = load 'mydata' as (name);
+ *      B = foreach A generate string.LASTINDEXOF(name, ",");
+ *      dump B;
+ */
+public class LASTINDEXOF extends EvalFunc<Integer>
+{
+    /**
+     * Method invoked on every tuple during foreach evaluation
+     * @param input tuple; first column is assumed to have the column to 
convert
+     * @exception java.io.IOException
+     */
+    public Integer exec(Tuple input) throws IOException {
+        if (input == null || input.size() == 0)
+            return null;
+
+        try{
+            String str = (String)input.get(0);
+            String search = (String)input.get(1);
+            return str.lastIndexOf(search);
+        }catch(Exception e){
+            System.err.println("Failed to process input; error - " + 
e.getMessage());
+            return null;
+        }
+    }
+
+    @Override
+    public Schema outputSchema(Schema input) {
+        return new Schema(new Schema.FieldSchema(null, DataType.INTEGER));
+    }
+
+}
\ No newline at end of file

Added: 
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/LOWER.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/LOWER.java?rev=789814&view=auto
==============================================================================
--- 
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/LOWER.java
 (added)
+++ 
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/LOWER.java
 Tue Jun 30 16:08:44 2009
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.piggybank.evaluation.string;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.ArrayList;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.DataType;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.FuncSpec;
+
+
+/**
+ * string.LOWER implements eval function to convert a string to lower case
+ * Example:
+ *      register pigudfs.jar;
+ *      A = load 'mydata' as (name);
+ *      B = foreach A generate string.LOWER(name);
+ *      dump B;
+ */
+public class LOWER extends EvalFunc<String>
+{
+    /**
+     * Method invoked on every tuple during foreach evaluation
+     * @param input tuple; first column is assumed to have the column to 
convert
+     * @exception java.io.IOException
+     */
+    public String exec(Tuple input) throws IOException {
+        if (input == null || input.size() == 0)
+            return null;
+
+        try{
+            String str = (String)input.get(0);
+            return str.toLowerCase();
+        }catch(Exception e){
+            System.err.println("Failed to process input; error - " + 
e.getMessage());
+            return null;
+        }
+    }
+
+    //@Override
+    /**
+     * This method gives a name to the column.
+     * @param input - schema of the input data
+     * @return schema of the input data
+     */
+    public Schema outputSchema(Schema input) {
+        return new Schema(new 
Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), 
input), DataType.CHARARRAY));
+    }
+
+     /* (non-Javadoc)
+      * @see org.apache.pig.EvalFunc#getArgToFuncMapping()
+      */
+     @Override
+     public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
+        List<FuncSpec> funcList = new ArrayList<FuncSpec>();
+        funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new 
Schema.FieldSchema(null, DataType.CHARARRAY))));
+
+        return funcList;
+     }
+
+}
\ No newline at end of file

Added: 
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/REPLACE.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/REPLACE.java?rev=789814&view=auto
==============================================================================
--- 
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/REPLACE.java
 (added)
+++ 
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/REPLACE.java
 Tue Jun 30 16:08:44 2009
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.piggybank.evaluation.string;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.ArrayList;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.DataType;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.FuncSpec;
+
+
+/**
+ * string.REPLACE implements eval function to replace part ofa string.
+ * Example:
+ *      register pigudfs.jar;
+ *      A = load 'mydata' as (name);
+ *      B = foreach A generate string.REPLACE(name, 'blabla', 'bla');
+ *      dump B;
+ */
+public class REPLACE extends EvalFunc<String>
+{
+    /**
+     * Method invoked on every tuple during foreach evaluation
+     * @param input tuple; first column is assumed to have the column to 
convert
+     * @exception java.io.IOException
+     */
+    public String exec(Tuple input) throws IOException {
+        if (input == null || input.size() == 0)
+            return null;
+
+        try{
+            String source = (String)input.get(0);
+            String target = (String)input.get(1);
+            String replacewith = (String)input.get(2);
+            return source.replaceAll(target, replacewith);
+        }catch(Exception e){
+            System.err.println("Failed to process input; error - " + 
e.getMessage());
+            return null;
+        }
+    }
+
+    //@Override
+//    /**
+//     * This method gives a name to the column.
+//     * @param input - schema of the input data
+//     * @return schema of the input data
+//     */
+//    public Schema outputSchema(Schema input) {
+//        return new Schema(new 
Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), 
input), DataType.CHARARRAY));
+//    }
+
+    @Override
+    public Schema outputSchema(Schema input) {
+        return new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY));
+    }
+
+}
\ No newline at end of file

Added: 
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/SUBSTRING.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/SUBSTRING.java?rev=789814&view=auto
==============================================================================
--- 
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/SUBSTRING.java
 (added)
+++ 
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/SUBSTRING.java
 Tue Jun 30 16:08:44 2009
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.piggybank.evaluation.string;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.ArrayList;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.DataType;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.FuncSpec;
+
+
+/**
+ * string.SUBSTRING implements eval function to get a part of a string.
+ * Example:
+ *      register pigudfs.jar;
+ *      A = load 'mydata' as (name);
+ *      B = foreach A generate string.SUBSTRING(name, 10, 12);
+ *      dump B;
+ */
+public class SUBSTRING extends EvalFunc<String>
+{
+    /**
+     * Method invoked on every tuple during foreach evaluation
+     * @param input tuple; first column is assumed to have the column to 
convert
+     * @exception java.io.IOException
+     */
+    public String exec(Tuple input) throws IOException {
+        if (input == null || input.size() == 0)
+            return null;
+
+        try{
+            String source = (String)input.get(0);
+            Integer beginindex = (Integer)input.get(1);
+            Integer endindex = (Integer)input.get(2);
+            return source.substring(beginindex, endindex);
+        }catch(Exception e){
+            System.err.println("Failed to process input; error - " + 
e.getMessage());
+            return null;
+        }
+    }
+
+    //@Override
+//    /**
+//     * This method gives a name to the column.
+//     * @param input - schema of the input data
+//     * @return schema of the input data
+//     */
+//    public Schema outputSchema(Schema input) {
+//        return new Schema(new 
Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), 
input), DataType.CHARARRAY));
+//    }
+
+    @Override
+    public Schema outputSchema(Schema input) {
+        return new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY));
+    }
+
+}
\ No newline at end of file

Modified: 
hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/TestEvalString.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/TestEvalString.java?rev=789814&r1=789813&r2=789814&view=diff
==============================================================================
--- 
hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/TestEvalString.java
 (original)
+++ 
hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/TestEvalString.java
 Tue Jun 30 16:08:44 2009
@@ -21,6 +21,8 @@
 import java.io.PrintWriter;
 import java.util.Iterator;
 import java.util.Properties;
+import java.util.LinkedList;
+import java.util.List;
 
 import junit.framework.TestCase;
 
@@ -30,7 +32,7 @@
 import org.apache.pig.data.DefaultTupleFactory;
 import org.apache.pig.impl.logicalLayer.schema.Schema;
 
-import org.apache.pig.piggybank.evaluation.string.UPPER;
+import org.apache.pig.piggybank.evaluation.string.*;
 
 // This class tests all string eval functions.
 
@@ -56,4 +58,83 @@
         //assertTrue(outSchema.toString().equals("upper_" + fieldName));
 
     }
+
+    @Test
+    public void testLOWER() throws Exception {
+        LOWER func = new LOWER();
+
+        // test excution
+        String in = "Hello World!";
+        String expected = "hello world!";
+
+        Tuple input = DefaultTupleFactory.getInstance().newTuple(in);
+
+        String output = func.exec(input);
+        assertTrue(output.equals(expected));
+    }
+
+    @Test
+    public void testINDEXOF() throws Exception {
+        INDEXOF func = new INDEXOF();
+
+        // test excution
+        List l = new LinkedList();
+        l.add("Hello World!");
+        l.add("o");
+
+        Tuple input = DefaultTupleFactory.getInstance().newTuple(l);
+
+        Integer output = func.exec(input);
+        assertTrue(output.intValue()==4);
+    }
+
+    @Test
+    public void testLASTINDEXOF() throws Exception {
+        LASTINDEXOF func = new LASTINDEXOF();
+
+        // test excution
+        List l = new LinkedList();
+        l.add("Hello World!");
+        l.add("o");
+
+        Tuple input = DefaultTupleFactory.getInstance().newTuple(l);
+
+        Integer output = func.exec(input);
+        assertTrue(output.intValue()==7);
+    }
+
+    @Test
+    public void testREPLACE() throws Exception {
+        REPLACE func = new REPLACE();
+
+        // test excution
+        List l = new LinkedList();
+        l.add("Hello World!");
+        l.add("o");
+        l.add("a");
+        String expected = "Hella Warld!";
+
+        Tuple input = DefaultTupleFactory.getInstance().newTuple(l);
+
+        String output = func.exec(input);
+        assertTrue(output.equals(expected));
+    }
+
+    @Test
+    public void testSUBSTRING() throws Exception {
+        SUBSTRING func = new SUBSTRING();
+
+        // test excution
+        List l = new LinkedList();
+        l.add("Hello World!");
+        l.add(1);
+        l.add(5);
+        String expected = "ello";
+
+        Tuple input = DefaultTupleFactory.getInstance().newTuple(l);
+
+        String output = func.exec(input);
+        assertTrue(output.equals(expected));
+    }
+
 }


Reply via email to