Author: gates
Date: Sat Apr 24 00:59:20 2010
New Revision: 937570

URL: http://svn.apache.org/viewvc?rev=937570&view=rev
Log:
PIG-1385 UDF to create tuples and bags.

Added:
    
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToBag.java
    
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToTuple.java
    
hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/TestToBagToTuple.java
Modified:
    hadoop/pig/trunk/contrib/CHANGES.txt

Modified: hadoop/pig/trunk/contrib/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/CHANGES.txt?rev=937570&r1=937569&r2=937570&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/CHANGES.txt (original)
+++ hadoop/pig/trunk/contrib/CHANGES.txt Sat Apr 24 00:59:20 2010
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
 
 IMPROVEMENTS
 
+PIG-1385 UDF to create tuples and bags (hcbusy via gates)
+
 PIG-1331 Add Owl as a contrib project (ajaykidave via gates)
 
 OPTIMIZATIONS

Added: 
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToBag.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToBag.java?rev=937570&view=auto
==============================================================================
--- 
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToBag.java
 (added)
+++ 
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToBag.java
 Sat Apr 24 00:59:20 2010
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.piggybank.evaluation.util;
+
+
+import java.io.IOException;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.BagFactory;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+
+/**
+ * This class takes a list of items and puts them into a bag
+ * 
+ * T = foreach U generate ToBag($0, $1, $2);
+ * 
+ * It's like saying this:
+ * 
+ * T = foreach U generate {($0), ($1), ($2)}
+ *
+ */
+public class ToBag extends EvalFunc<DataBag> {
+
+    @Override
+    public DataBag exec(Tuple input) throws IOException {
+        try {
+            DataBag bag = BagFactory.getInstance().newDefaultBag();
+
+            for (int i = 0; i < input.size(); ++i) {
+                final Object object = input.get(i);
+                if (object != null) {
+                    Tuple tp2 = TupleFactory.getInstance().newTuple(1);
+                    tp2.set(0, object);
+                    bag.add(tp2);
+                }
+            }
+
+            return bag;
+        } catch (Exception ee) {
+            throw new RuntimeException("Error while creating a bag", ee);
+        }
+    }
+
+}

Added: 
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToTuple.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToTuple.java?rev=937570&view=auto
==============================================================================
--- 
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToTuple.java
 (added)
+++ 
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToTuple.java
 Sat Apr 24 00:59:20 2010
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.piggybank.evaluation.util;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+
+/**
+ * This class makes a tuple out of the parameter
+ *
+ * T = foreach U generate ToTuple($0, $1, $2);
+ * 
+ * It generates a tuple containing $0, $1, and $2
+ *
+ *
+ */
+public class ToTuple extends EvalFunc<Tuple> {
+
+    @Override
+    public Tuple exec(Tuple input) throws IOException {
+        try {
+            List<Object> items = new ArrayList<Object>();
+            for (int i = 0; i < input.size(); ++i) {
+                items.add(input.get(i));
+            }
+            return TupleFactory.getInstance().newTuple(items);
+        } catch (Exception e) {
+            throw new RuntimeException("Error while creating a tuple", e);
+        }
+    }
+
+    @Override
+    public Schema outputSchema(Schema input) {
+        try {
+            Schema tupleSchema = new Schema();
+            for (int i = 0; i < input.size(); ++i) {
+                tupleSchema.add(input.getField(i));
+            }
+            return new Schema(new Schema.FieldSchema(getSchemaName(this
+                    .getClass().getName().toLowerCase(), input), tupleSchema,
+                    DataType.TUPLE));
+        } catch (Exception e) {
+            return null;
+        }
+    }
+
+}

Added: 
hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/TestToBagToTuple.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/TestToBagToTuple.java?rev=937570&view=auto
==============================================================================
--- 
hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/TestToBagToTuple.java
 (added)
+++ 
hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/TestToBagToTuple.java
 Sat Apr 24 00:59:20 2010
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.piggybank.test.evaluation.util;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import junit.framework.Assert;
+
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+import org.apache.pig.piggybank.evaluation.util.ToBag;
+import org.apache.pig.piggybank.evaluation.util.ToTuple;
+import org.junit.Test;
+
+public class TestToBagToTuple {
+    @Test
+    public void toBag() throws Exception{
+        ToBag tb = new ToBag();
+
+        Tuple input = TupleFactory.getInstance().newTuple();
+        for (int i = 0; i < 100; ++i) {
+            input.append(i);
+        }
+
+        Set<Integer> s = new HashSet<Integer>();
+        DataBag db = tb.exec(input);
+        for (Tuple t : db) {
+            s.add((Integer) t.get(0));
+        }
+
+        // finally check the bag had everything we put in the tuple.
+        Assert.assertEquals(100, s.size());
+        for (int i = 0; i < 100; ++i) {
+            Assert.assertTrue(s.contains(i));
+        }
+    }
+
+    @Test
+    public void toTuple() throws Exception{
+        ToTuple tb = new ToTuple();
+
+        Tuple input = TupleFactory.getInstance().newTuple();
+        for (int i = 0; i < 100; ++i) {
+            input.append(i);
+        }
+
+        Tuple output = tb.exec(input);
+        Assert.assertFalse(input == output);
+        Assert.assertEquals(input, output);
+    }
+}


Reply via email to