Author: gates Date: Sat Apr 24 00:59:20 2010 New Revision: 937570 URL: http://svn.apache.org/viewvc?rev=937570&view=rev Log: PIG-1385 UDF to create tuples and bags.
Added: hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToBag.java hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToTuple.java hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/TestToBagToTuple.java Modified: hadoop/pig/trunk/contrib/CHANGES.txt Modified: hadoop/pig/trunk/contrib/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/CHANGES.txt?rev=937570&r1=937569&r2=937570&view=diff ============================================================================== --- hadoop/pig/trunk/contrib/CHANGES.txt (original) +++ hadoop/pig/trunk/contrib/CHANGES.txt Sat Apr 24 00:59:20 2010 @@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES IMPROVEMENTS +PIG-1385 UDF to create tuples and bags (hcbusy via gates) + PIG-1331 Add Owl as a contrib project (ajaykidave via gates) OPTIMIZATIONS Added: hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToBag.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToBag.java?rev=937570&view=auto ============================================================================== --- hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToBag.java (added) +++ hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToBag.java Sat Apr 24 00:59:20 2010 @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pig.piggybank.evaluation.util; + + +import java.io.IOException; + +import org.apache.pig.EvalFunc; +import org.apache.pig.data.BagFactory; +import org.apache.pig.data.DataBag; +import org.apache.pig.data.Tuple; +import org.apache.pig.data.TupleFactory; + +/** + * This class takes a list of items and puts them into a bag + * + * T = foreach U generate ToBag($0, $1, $2); + * + * It's like saying this: + * + * T = foreach U generate {($0), ($1), ($2)} + * + */ +public class ToBag extends EvalFunc<DataBag> { + + @Override + public DataBag exec(Tuple input) throws IOException { + try { + DataBag bag = BagFactory.getInstance().newDefaultBag(); + + for (int i = 0; i < input.size(); ++i) { + final Object object = input.get(i); + if (object != null) { + Tuple tp2 = TupleFactory.getInstance().newTuple(1); + tp2.set(0, object); + bag.add(tp2); + } + } + + return bag; + } catch (Exception ee) { + throw new RuntimeException("Error while creating a bag", ee); + } + } + +} Added: hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToTuple.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToTuple.java?rev=937570&view=auto ============================================================================== --- hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToTuple.java (added) +++ hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToTuple.java Sat Apr 24 00:59:20 2010 @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pig.piggybank.evaluation.util; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.pig.EvalFunc; +import org.apache.pig.data.DataType; +import org.apache.pig.data.Tuple; +import org.apache.pig.data.TupleFactory; +import org.apache.pig.impl.logicalLayer.schema.Schema; + +/** + * This class makes a tuple out of the parameter + * + * T = foreach U generate ToTuple($0, $1, $2); + * + * It generates a tuple containing $0, $1, and $2 + * + * + */ +public class ToTuple extends EvalFunc<Tuple> { + + @Override + public Tuple exec(Tuple input) throws IOException { + try { + List<Object> items = new ArrayList<Object>(); + for (int i = 0; i < input.size(); ++i) { + items.add(input.get(i)); + } + return TupleFactory.getInstance().newTuple(items); + } catch (Exception e) { + throw new RuntimeException("Error while creating a tuple", e); + } + } + + @Override + public Schema outputSchema(Schema input) { + try { + Schema tupleSchema = new Schema(); + for (int i = 0; i < input.size(); ++i) { + tupleSchema.add(input.getField(i)); + } + return new Schema(new Schema.FieldSchema(getSchemaName(this + .getClass().getName().toLowerCase(), input), tupleSchema, + DataType.TUPLE)); + } catch (Exception e) { + return null; + } + } + +} Added: hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/TestToBagToTuple.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/TestToBagToTuple.java?rev=937570&view=auto ============================================================================== --- hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/TestToBagToTuple.java (added) +++ hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/TestToBagToTuple.java Sat Apr 24 00:59:20 2010 @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pig.piggybank.test.evaluation.util; + +import java.util.HashSet; +import java.util.Set; + +import junit.framework.Assert; + +import org.apache.pig.data.DataBag; +import org.apache.pig.data.Tuple; +import org.apache.pig.data.TupleFactory; +import org.apache.pig.piggybank.evaluation.util.ToBag; +import org.apache.pig.piggybank.evaluation.util.ToTuple; +import org.junit.Test; + +public class TestToBagToTuple { + @Test + public void toBag() throws Exception{ + ToBag tb = new ToBag(); + + Tuple input = TupleFactory.getInstance().newTuple(); + for (int i = 0; i < 100; ++i) { + input.append(i); + } + + Set<Integer> s = new HashSet<Integer>(); + DataBag db = tb.exec(input); + for (Tuple t : db) { + s.add((Integer) t.get(0)); + } + + // finally check the bag had everything we put in the tuple. + Assert.assertEquals(100, s.size()); + for (int i = 0; i < 100; ++i) { + Assert.assertTrue(s.contains(i)); + } + } + + @Test + public void toTuple() throws Exception{ + ToTuple tb = new ToTuple(); + + Tuple input = TupleFactory.getInstance().newTuple(); + for (int i = 0; i < 100; ++i) { + input.append(i); + } + + Tuple output = tb.exec(input); + Assert.assertFalse(input == output); + Assert.assertEquals(input, output); + } +}