Author: olga Date: Tue Jun 30 16:08:44 2009 New Revision: 789814 URL: http://svn.apache.org/viewvc?rev=789814&view=rev Log: PIG-868: added strin manipulation functions (bennies via olgan)
Added: hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/INDEXOF.java hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/LASTINDEXOF.java hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/LOWER.java hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/REPLACE.java hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/SUBSTRING.java Modified: hadoop/pig/trunk/contrib/CHANGES.txt hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/TestEvalString.java Modified: hadoop/pig/trunk/contrib/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/CHANGES.txt?rev=789814&r1=789813&r2=789814&view=diff ============================================================================== --- hadoop/pig/trunk/contrib/CHANGES.txt (original) +++ hadoop/pig/trunk/contrib/CHANGES.txt Tue Jun 30 16:08:44 2009 @@ -1,3 +1,4 @@ +PIG-868: added strin manipulation functions (bennies via olgan) PIG-273: addition of Top and SearchQuery UDFs (ankur via olgan) PIG-246: created UDF repository (olgan) PIG-245: UDF wrappers for Java Math functions (ajaygarg via olgan) Added: hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/INDEXOF.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/INDEXOF.java?rev=789814&view=auto ============================================================================== --- hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/INDEXOF.java (added) +++ hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/INDEXOF.java Tue Jun 30 16:08:44 2009 @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pig.piggybank.evaluation.string; + +import java.io.IOException; +import java.util.List; +import java.util.ArrayList; + +import org.apache.pig.EvalFunc; +import org.apache.pig.data.Tuple; +import org.apache.pig.data.DataType; +import org.apache.pig.impl.logicalLayer.schema.Schema; +import org.apache.pig.impl.logicalLayer.FrontendException; +import org.apache.pig.FuncSpec; + + +/** + * string.INSTR implements eval function to search for a string + * Example: + * register pigudfs.jar; + * A = load 'mydata' as (name); + * B = foreach A generate string.INDEXOF(name, ","); + * dump B; + */ +public class INDEXOF extends EvalFunc<Integer> +{ + /** + * Method invoked on every tuple during foreach evaluation + * @param input tuple; first column is assumed to have the column to convert + * the second column is the string we search for + * the third is an optional column from where to start the search + * @exception java.io.IOException + */ + public Integer exec(Tuple input) throws IOException { + if (input == null || input.size() == 0) + return null; + + try{ + String str = (String)input.get(0); + String search = (String)input.get(1); + int fromIndex = 0; + if (input.size() ==3) + fromIndex = (Integer)input.get(1); + return str.indexOf(search, fromIndex); + }catch(Exception e){ + System.err.println("Failed to process input; error - " + e.getMessage()); + return null; + } + } + + @Override + public Schema outputSchema(Schema input) { + return new Schema(new Schema.FieldSchema(null, DataType.INTEGER)); + } + +} \ No newline at end of file Added: hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/LASTINDEXOF.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/LASTINDEXOF.java?rev=789814&view=auto ============================================================================== --- hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/LASTINDEXOF.java (added) +++ hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/LASTINDEXOF.java Tue Jun 30 16:08:44 2009 @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pig.piggybank.evaluation.string; + +import java.io.IOException; +import java.util.List; +import java.util.ArrayList; + +import org.apache.pig.EvalFunc; +import org.apache.pig.data.Tuple; +import org.apache.pig.data.DataType; +import org.apache.pig.impl.logicalLayer.schema.Schema; +import org.apache.pig.impl.logicalLayer.FrontendException; +import org.apache.pig.FuncSpec; + + +/** + * string.INSTR implements eval function to search for the last occurance of a string + * Example: + * register pigudfs.jar; + * A = load 'mydata' as (name); + * B = foreach A generate string.LASTINDEXOF(name, ","); + * dump B; + */ +public class LASTINDEXOF extends EvalFunc<Integer> +{ + /** + * Method invoked on every tuple during foreach evaluation + * @param input tuple; first column is assumed to have the column to convert + * @exception java.io.IOException + */ + public Integer exec(Tuple input) throws IOException { + if (input == null || input.size() == 0) + return null; + + try{ + String str = (String)input.get(0); + String search = (String)input.get(1); + return str.lastIndexOf(search); + }catch(Exception e){ + System.err.println("Failed to process input; error - " + e.getMessage()); + return null; + } + } + + @Override + public Schema outputSchema(Schema input) { + return new Schema(new Schema.FieldSchema(null, DataType.INTEGER)); + } + +} \ No newline at end of file Added: hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/LOWER.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/LOWER.java?rev=789814&view=auto ============================================================================== --- hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/LOWER.java (added) +++ hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/LOWER.java Tue Jun 30 16:08:44 2009 @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pig.piggybank.evaluation.string; + +import java.io.IOException; +import java.util.List; +import java.util.ArrayList; + +import org.apache.pig.EvalFunc; +import org.apache.pig.data.Tuple; +import org.apache.pig.data.DataType; +import org.apache.pig.impl.logicalLayer.schema.Schema; +import org.apache.pig.impl.logicalLayer.FrontendException; +import org.apache.pig.FuncSpec; + + +/** + * string.LOWER implements eval function to convert a string to lower case + * Example: + * register pigudfs.jar; + * A = load 'mydata' as (name); + * B = foreach A generate string.LOWER(name); + * dump B; + */ +public class LOWER extends EvalFunc<String> +{ + /** + * Method invoked on every tuple during foreach evaluation + * @param input tuple; first column is assumed to have the column to convert + * @exception java.io.IOException + */ + public String exec(Tuple input) throws IOException { + if (input == null || input.size() == 0) + return null; + + try{ + String str = (String)input.get(0); + return str.toLowerCase(); + }catch(Exception e){ + System.err.println("Failed to process input; error - " + e.getMessage()); + return null; + } + } + + //@Override + /** + * This method gives a name to the column. + * @param input - schema of the input data + * @return schema of the input data + */ + public Schema outputSchema(Schema input) { + return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input), DataType.CHARARRAY)); + } + + /* (non-Javadoc) + * @see org.apache.pig.EvalFunc#getArgToFuncMapping() + */ + @Override + public List<FuncSpec> getArgToFuncMapping() throws FrontendException { + List<FuncSpec> funcList = new ArrayList<FuncSpec>(); + funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY)))); + + return funcList; + } + +} \ No newline at end of file Added: hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/REPLACE.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/REPLACE.java?rev=789814&view=auto ============================================================================== --- hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/REPLACE.java (added) +++ hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/REPLACE.java Tue Jun 30 16:08:44 2009 @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pig.piggybank.evaluation.string; + +import java.io.IOException; +import java.util.List; +import java.util.ArrayList; + +import org.apache.pig.EvalFunc; +import org.apache.pig.data.Tuple; +import org.apache.pig.data.DataType; +import org.apache.pig.impl.logicalLayer.schema.Schema; +import org.apache.pig.impl.logicalLayer.FrontendException; +import org.apache.pig.FuncSpec; + + +/** + * string.REPLACE implements eval function to replace part ofa string. + * Example: + * register pigudfs.jar; + * A = load 'mydata' as (name); + * B = foreach A generate string.REPLACE(name, 'blabla', 'bla'); + * dump B; + */ +public class REPLACE extends EvalFunc<String> +{ + /** + * Method invoked on every tuple during foreach evaluation + * @param input tuple; first column is assumed to have the column to convert + * @exception java.io.IOException + */ + public String exec(Tuple input) throws IOException { + if (input == null || input.size() == 0) + return null; + + try{ + String source = (String)input.get(0); + String target = (String)input.get(1); + String replacewith = (String)input.get(2); + return source.replaceAll(target, replacewith); + }catch(Exception e){ + System.err.println("Failed to process input; error - " + e.getMessage()); + return null; + } + } + + //@Override +// /** +// * This method gives a name to the column. +// * @param input - schema of the input data +// * @return schema of the input data +// */ +// public Schema outputSchema(Schema input) { +// return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input), DataType.CHARARRAY)); +// } + + @Override + public Schema outputSchema(Schema input) { + return new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY)); + } + +} \ No newline at end of file Added: hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/SUBSTRING.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/SUBSTRING.java?rev=789814&view=auto ============================================================================== --- hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/SUBSTRING.java (added) +++ hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/SUBSTRING.java Tue Jun 30 16:08:44 2009 @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pig.piggybank.evaluation.string; + +import java.io.IOException; +import java.util.List; +import java.util.ArrayList; + +import org.apache.pig.EvalFunc; +import org.apache.pig.data.Tuple; +import org.apache.pig.data.DataType; +import org.apache.pig.impl.logicalLayer.schema.Schema; +import org.apache.pig.impl.logicalLayer.FrontendException; +import org.apache.pig.FuncSpec; + + +/** + * string.SUBSTRING implements eval function to get a part of a string. + * Example: + * register pigudfs.jar; + * A = load 'mydata' as (name); + * B = foreach A generate string.SUBSTRING(name, 10, 12); + * dump B; + */ +public class SUBSTRING extends EvalFunc<String> +{ + /** + * Method invoked on every tuple during foreach evaluation + * @param input tuple; first column is assumed to have the column to convert + * @exception java.io.IOException + */ + public String exec(Tuple input) throws IOException { + if (input == null || input.size() == 0) + return null; + + try{ + String source = (String)input.get(0); + Integer beginindex = (Integer)input.get(1); + Integer endindex = (Integer)input.get(2); + return source.substring(beginindex, endindex); + }catch(Exception e){ + System.err.println("Failed to process input; error - " + e.getMessage()); + return null; + } + } + + //@Override +// /** +// * This method gives a name to the column. +// * @param input - schema of the input data +// * @return schema of the input data +// */ +// public Schema outputSchema(Schema input) { +// return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input), DataType.CHARARRAY)); +// } + + @Override + public Schema outputSchema(Schema input) { + return new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY)); + } + +} \ No newline at end of file Modified: hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/TestEvalString.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/TestEvalString.java?rev=789814&r1=789813&r2=789814&view=diff ============================================================================== --- hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/TestEvalString.java (original) +++ hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/TestEvalString.java Tue Jun 30 16:08:44 2009 @@ -21,6 +21,8 @@ import java.io.PrintWriter; import java.util.Iterator; import java.util.Properties; +import java.util.LinkedList; +import java.util.List; import junit.framework.TestCase; @@ -30,7 +32,7 @@ import org.apache.pig.data.DefaultTupleFactory; import org.apache.pig.impl.logicalLayer.schema.Schema; -import org.apache.pig.piggybank.evaluation.string.UPPER; +import org.apache.pig.piggybank.evaluation.string.*; // This class tests all string eval functions. @@ -56,4 +58,83 @@ //assertTrue(outSchema.toString().equals("upper_" + fieldName)); } + + @Test + public void testLOWER() throws Exception { + LOWER func = new LOWER(); + + // test excution + String in = "Hello World!"; + String expected = "hello world!"; + + Tuple input = DefaultTupleFactory.getInstance().newTuple(in); + + String output = func.exec(input); + assertTrue(output.equals(expected)); + } + + @Test + public void testINDEXOF() throws Exception { + INDEXOF func = new INDEXOF(); + + // test excution + List l = new LinkedList(); + l.add("Hello World!"); + l.add("o"); + + Tuple input = DefaultTupleFactory.getInstance().newTuple(l); + + Integer output = func.exec(input); + assertTrue(output.intValue()==4); + } + + @Test + public void testLASTINDEXOF() throws Exception { + LASTINDEXOF func = new LASTINDEXOF(); + + // test excution + List l = new LinkedList(); + l.add("Hello World!"); + l.add("o"); + + Tuple input = DefaultTupleFactory.getInstance().newTuple(l); + + Integer output = func.exec(input); + assertTrue(output.intValue()==7); + } + + @Test + public void testREPLACE() throws Exception { + REPLACE func = new REPLACE(); + + // test excution + List l = new LinkedList(); + l.add("Hello World!"); + l.add("o"); + l.add("a"); + String expected = "Hella Warld!"; + + Tuple input = DefaultTupleFactory.getInstance().newTuple(l); + + String output = func.exec(input); + assertTrue(output.equals(expected)); + } + + @Test + public void testSUBSTRING() throws Exception { + SUBSTRING func = new SUBSTRING(); + + // test excution + List l = new LinkedList(); + l.add("Hello World!"); + l.add(1); + l.add(5); + String expected = "ello"; + + Tuple input = DefaultTupleFactory.getInstance().newTuple(l); + + String output = func.exec(input); + assertTrue(output.equals(expected)); + } + }