http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/util/IntBoolConversionPigTests.java ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/util/IntBoolConversionPigTests.java b/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/util/IntBoolConversionPigTests.java new file mode 100644 index 0000000..505783a --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/util/IntBoolConversionPigTests.java @@ -0,0 +1,59 @@ +package org.apache.bigtop.itest.datafu.util; + +import org.apache.pig.pigunit.PigTest; +import org.junit.Test; + +import org.apache.bigtop.itest.datafu.PigTests; + +public class IntBoolConversionPigTests extends PigTests +{ + @Test + public void intToBoolTest() throws Exception + { + PigTest test = createPigTest("datafu/util/intToBoolTest.pig"); + + String[] input = { + "", // null + "0", + "1" + }; + + String[] output = { + "(false)", + "(false)", + "(true)" + }; + + test.assertOutput("data",input,"data2",output); + } + + @Test + public void intToBoolToIntTest() throws Exception + { + PigTest test = createPigTest("datafu/util/intToBoolToIntTest.pig"); + + String[] input = { + "", // null + "0", + "1", + "2", + "-1", + "-2", + "0", + "" + }; + + String[] output = { + "(0)", + "(0)", + "(1)", + "(1)", + "(1)", + "(1)", + "(0)", + "(0)" + }; + + test.assertOutput("data",input,"data3",output); + } +}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/aliasBagFieldsTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/aliasBagFieldsTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/aliasBagFieldsTest.pig new file mode 100644 index 0000000..247c832 --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/aliasBagFieldsTest.pig @@ -0,0 +1,20 @@ +register $JAR_PATH + +define AliasBagFields datafu.pig.bags.AliasBagFields('[a#alpha,b#numeric]'); + +data = LOAD 'input' AS (data: bag {T: tuple(a:CHARARRAY, b:INT, c:INT)}); + +data2 = FOREACH data GENERATE AliasBagFields(data) as data; + +describe data2; + +data3 = FOREACH data2 GENERATE FLATTEN(data); + +describe data3; + +data4 = FOREACH data3 GENERATE data::alpha, data::numeric; + +describe data4; + +STORE data4 INTO 'output'; + http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/appendToBagTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/appendToBagTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/appendToBagTest.pig new file mode 100644 index 0000000..d906bc4 --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/appendToBagTest.pig @@ -0,0 +1,9 @@ +register $JAR_PATH + +define AppendToBag datafu.pig.bags.AppendToBag(); + +data = LOAD 'input' AS (key:INT, B: bag{T: tuple(v:INT)}, T: tuple(v:INT)); + +data2 = FOREACH data GENERATE key, AppendToBag(B,T) as B; + +STORE data2 INTO 'output'; http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/bagConcatTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/bagConcatTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/bagConcatTest.pig new file mode 100644 index 0000000..30d46a0 --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/bagConcatTest.pig @@ -0,0 +1,11 @@ +register $JAR_PATH + +define BagConcat datafu.pig.bags.BagConcat(); + +data = LOAD 'input' AS (A: bag{T: tuple(v:INT)}, B: bag{T: tuple(v:INT)}, C: bag{T: tuple(v:INT)}); + +data2 = FOREACH data GENERATE BagConcat(A,B,C); + +describe data2 + +STORE data2 INTO 'output'; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/bagSplitTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/bagSplitTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/bagSplitTest.pig new file mode 100644 index 0000000..ee4f538 --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/bagSplitTest.pig @@ -0,0 +1,14 @@ +register $JAR_PATH + +define BagSplit datafu.pig.bags.BagSplit(); + +data = LOAD 'input' AS (B:bag{T:tuple(val1:INT,val2:INT)}); + +data2 = FOREACH data GENERATE BagSplit($MAX,B); +describe data2; + +data3 = FOREACH data2 GENERATE FLATTEN($0); + +describe data3 + +STORE data3 INTO 'output'; http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/bagSplitWithBagNumTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/bagSplitWithBagNumTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/bagSplitWithBagNumTest.pig new file mode 100644 index 0000000..833e912 --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/bagSplitWithBagNumTest.pig @@ -0,0 +1,11 @@ +register $JAR_PATH + +define BagSplit datafu.pig.bags.BagSplit('true'); + +data = LOAD 'input' AS (B:bag{T:tuple(val1:INT,val2:INT)}); + +data2 = FOREACH data GENERATE BagSplit($MAX,B); + +data3 = FOREACH data2 GENERATE FLATTEN($0); + +STORE data3 INTO 'output'; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/comprehensiveBagSplitAndEnumerate.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/comprehensiveBagSplitAndEnumerate.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/comprehensiveBagSplitAndEnumerate.pig new file mode 100644 index 0000000..88d7392 --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/comprehensiveBagSplitAndEnumerate.pig @@ -0,0 +1,26 @@ +register $JAR_PATH + +define BagSplit datafu.pig.bags.BagSplit(); +define Enumerate datafu.pig.bags.Enumerate('1'); + +data = LOAD 'input' AS (data: bag {T: tuple(name:CHARARRAY, score:double)}); + +data2 = FOREACH data GENERATE BagSplit(3,data) as the_bags; + +describe data2 + +data3 = FOREACH data2 GENERATE Enumerate(the_bags) as enumerated_bags; + +describe data3 + +data4 = FOREACH data3 GENERATE FLATTEN(enumerated_bags) as (data,i); + +describe data4 + +data5 = FOREACH data4 GENERATE data as the_data, i as the_key; + +describe data5 + +data_out = FOREACH data5 GENERATE FLATTEN(the_data), the_key; + +describe data_out \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/distinctByTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/distinctByTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/distinctByTest.pig new file mode 100644 index 0000000..9532d07 --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/distinctByTest.pig @@ -0,0 +1,12 @@ +register $JAR_PATH + +define DistinctBy datafu.pig.bags.DistinctBy('0'); + +data = LOAD 'input' AS (data: bag {T: tuple(a:CHARARRAY, b:INT, c:INT)}); + +data2 = FOREACH data GENERATE DistinctBy(data); + +describe data2; + +STORE data2 INTO 'output'; + http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/enumerateTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/enumerateTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/enumerateTest.pig new file mode 100644 index 0000000..1647485 --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/enumerateTest.pig @@ -0,0 +1,16 @@ +register $JAR_PATH + +define Enumerate datafu.pig.bags.Enumerate(); + +data = LOAD 'input' AS (data: bag {T: tuple(v1:INT,B: bag{T: tuple(v2:INT)})}); + +data2 = FOREACH data GENERATE Enumerate(data); +describe data2; + +data3 = FOREACH data2 GENERATE FLATTEN($0); +describe data3; + +data4 = FOREACH data3 GENERATE $0 as v1, $1 as B, $2 as i; +describe data4; + +STORE data4 INTO 'output'; http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/enumerateWithReverseTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/enumerateWithReverseTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/enumerateWithReverseTest.pig new file mode 100644 index 0000000..1f04b04 --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/enumerateWithReverseTest.pig @@ -0,0 +1,16 @@ +register $JAR_PATH + +define Enumerate datafu.pig.bags.Enumerate('1', 'true'); + +data = LOAD 'input' AS (data: bag {T: tuple(v1:INT,B: bag{T: tuple(v2:INT)})}); + +data2 = FOREACH data GENERATE Enumerate(data); +describe data2; + +data3 = FOREACH data2 GENERATE FLATTEN($0); +describe data3; + +data4 = FOREACH data3 GENERATE $0 as v1, $1 as B, $2 as i; +describe data4; + +STORE data4 INTO 'output'; http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/enumerateWithStartTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/enumerateWithStartTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/enumerateWithStartTest.pig new file mode 100644 index 0000000..d288a6e --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/enumerateWithStartTest.pig @@ -0,0 +1,16 @@ +register $JAR_PATH + +define Enumerate datafu.pig.bags.Enumerate('1'); + +data = LOAD 'input' AS (data: bag {T: tuple(v1:INT,B: bag{T: tuple(v2:INT)})}); + +data2 = FOREACH data GENERATE Enumerate(data); +describe data2; + +data3 = FOREACH data2 GENERATE FLATTEN($0); +describe data3; + +data4 = FOREACH data3 GENERATE $0 as v1, $1 as B, $2 as i; +describe data4; + +STORE data4 INTO 'output'; http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/firstTupleFromBagTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/firstTupleFromBagTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/firstTupleFromBagTest.pig new file mode 100644 index 0000000..921787e --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/firstTupleFromBagTest.pig @@ -0,0 +1,9 @@ +register $JAR_PATH + +define FirstTupleFromBag datafu.pig.bags.FirstTupleFromBag(); + +data = LOAD 'input' AS (key:INT, B: bag{T: tuple(v:INT)}); + +data2 = FOREACH data GENERATE key, FirstTupleFromBag(B, null) as B; + +STORE data2 INTO 'output'; http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/nullToEmptyBagTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/nullToEmptyBagTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/nullToEmptyBagTest.pig new file mode 100644 index 0000000..3e809b3 --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/nullToEmptyBagTest.pig @@ -0,0 +1,14 @@ +register $JAR_PATH + +define NullToEmptyBag datafu.pig.bags.NullToEmptyBag(); + +data = LOAD 'input' AS (B: bag {T: tuple(v:INT)}); + +dump data; + +data2 = FOREACH data GENERATE NullToEmptyBag(B) as P; + +dump data2; + +STORE data2 INTO 'output'; + http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/prependToBagTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/prependToBagTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/prependToBagTest.pig new file mode 100644 index 0000000..c852346 --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/prependToBagTest.pig @@ -0,0 +1,9 @@ +register $JAR_PATH + +define PrependToBag datafu.pig.bags.PrependToBag(); + +data = LOAD 'input' AS (key:INT, B: bag{T: tuple(v:INT)}, T: tuple(v:INT)); + +data2 = FOREACH data GENERATE key, PrependToBag(B,T) as B; + +STORE data2 INTO 'output'; http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/sets/setIntersectTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/sets/setIntersectTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/sets/setIntersectTest.pig new file mode 100644 index 0000000..6f590e8 --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/sets/setIntersectTest.pig @@ -0,0 +1,9 @@ +register $JAR_PATH + +define SetIntersect datafu.pig.bags.sets.SetIntersect(); + +data = LOAD 'input' AS (B1:bag{T:tuple(val1:int,val2:int)},B2:bag{T:tuple(val1:int,val2:int)}); + +data2 = FOREACH data GENERATE SetIntersect(B1,B2); + +STORE data2 INTO 'output'; http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/sets/setUnionTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/sets/setUnionTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/sets/setUnionTest.pig new file mode 100644 index 0000000..a5e1c4d --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/sets/setUnionTest.pig @@ -0,0 +1,13 @@ +register $JAR_PATH + +define SetUnion datafu.pig.bags.sets.SetUnion(); + +data = LOAD 'input' AS (B1:bag{T:tuple(val1:int,val2:int)},B2:bag{T:tuple(val1:int,val2:int)}); + +dump data + +data2 = FOREACH data GENERATE SetUnion(B1,B2); + +dump data2 + +STORE data2 INTO 'output'; http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/unorderedPairsTests.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/unorderedPairsTests.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/unorderedPairsTests.pig new file mode 100644 index 0000000..1bf68bd --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/unorderedPairsTests.pig @@ -0,0 +1,16 @@ +register $JAR_PATH + +define UnorderedPairs datafu.pig.bags.UnorderedPairs(); + +data = LOAD 'input' AS (B: bag {T: tuple(v:INT)}); + +data2 = FOREACH data GENERATE UnorderedPairs(B) as P; + +data3 = FOREACH data2 GENERATE FLATTEN(P); + +data4 = FOREACH data3 GENERATE FLATTEN(elem1), FLATTEN(elem2); + +data5 = ORDER data4 BY $0, $1; + +STORE data5 INTO 'output'; + http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/unorderedPairsTests2.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/unorderedPairsTests2.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/unorderedPairsTests2.pig new file mode 100644 index 0000000..aada011 --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/unorderedPairsTests2.pig @@ -0,0 +1,12 @@ +register $JAR_PATH + +define UnorderedPairs datafu.pig.bags.UnorderedPairs(); + +data = LOAD 'input' AS (A:int, B: bag {T: tuple(v:INT)}); + +data2 = FOREACH data GENERATE A, UnorderedPairs(B) as P; + +data3 = FOREACH data2 GENERATE A, FLATTEN(P); + +STORE data3 INTO 'output'; + http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/date/timeCountPageViewsTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/date/timeCountPageViewsTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/date/timeCountPageViewsTest.pig new file mode 100644 index 0000000..1e23a41 --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/date/timeCountPageViewsTest.pig @@ -0,0 +1,13 @@ +register $JAR_PATH + +define TimeCount datafu.pig.date.TimeCount('$TIME_WINDOW'); + +views = LOAD 'input' AS (user_id:int, page_id:int, time:chararray); + +views_grouped = GROUP views BY (user_id, page_id); +view_counts = foreach views_grouped { + views = order views by time; + generate group.user_id as user_id, group.page_id as page_id, TimeCount(views.(time)) as count; +} + +STORE view_counts INTO 'output'; http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/geo/haversineTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/geo/haversineTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/geo/haversineTest.pig new file mode 100644 index 0000000..e52cc1f --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/geo/haversineTest.pig @@ -0,0 +1,9 @@ +register $JAR_PATH + +define HaversineDistInMiles datafu.pig.geo.HaversineDistInMiles(); + +data = LOAD 'input' AS (lat1:double,lng1:double,lat2:double,lng2:double); + +data2 = FOREACH data GENERATE HaversineDistInMiles(lat1,lng1,lat2,lng2); + +STORE data2 INTO 'output'; http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/hash/md5Base64Test.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/hash/md5Base64Test.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/hash/md5Base64Test.pig new file mode 100644 index 0000000..5a12c2e --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/hash/md5Base64Test.pig @@ -0,0 +1,9 @@ +register $JAR_PATH + +define MD5 datafu.pig.hash.MD5Base64(); + +data_in = LOAD 'input' as (val:chararray); + +data_out = FOREACH data_in GENERATE MD5(val) as val; + +STORE data_out INTO 'output'; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/hash/md5Test.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/hash/md5Test.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/hash/md5Test.pig new file mode 100644 index 0000000..3fc6aaa --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/hash/md5Test.pig @@ -0,0 +1,9 @@ +register $JAR_PATH + +define MD5 datafu.pig.hash.MD5(); + +data_in = LOAD 'input' as (val:chararray); + +data_out = FOREACH data_in GENERATE MD5(val) as val; + +STORE data_out INTO 'output'; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/linkanalysis/pageRankTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/linkanalysis/pageRankTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/linkanalysis/pageRankTest.pig new file mode 100644 index 0000000..a0e439c --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/linkanalysis/pageRankTest.pig @@ -0,0 +1,25 @@ +register $JAR_PATH + +/* Need to enable dangling node handling since the Wikipedia example has them, + otherwise the ranks won't be right. */ +define PageRank datafu.pig.linkanalysis.PageRank('dangling_nodes','true'); + +data = LOAD 'input' AS (topic:INT,source:INT,dest:INT,weight:DOUBLE); + +data_grouped = GROUP data by (topic,source); + +data_grouped = foreach data_grouped { + generate group.topic as topic, group.source as source, data.(dest,weight) as edges; +}; + +data_grouped2 = GROUP data_grouped by topic; +data_grouped2 = foreach data_grouped2 { + generate group as topic, FLATTEN(PageRank(data_grouped.(source,edges))) as (source,rank); +}; + +data_grouped3 = FOREACH data_grouped2 GENERATE + topic, + source, + rank; + +STORE data_grouped3 INTO 'output'; http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/numbers/randomIntRangeTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/numbers/randomIntRangeTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/numbers/randomIntRangeTest.pig new file mode 100644 index 0000000..3ca45c7 --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/numbers/randomIntRangeTest.pig @@ -0,0 +1,8 @@ +register $JAR_PATH + +define RandInt datafu.pig.numbers.RandInt(); + +data = LOAD 'input' AS (key:INT); +data2 = FOREACH data GENERATE key, RandInt($MIN,$MAX) as val; + +STORE data2 INTO 'output'; http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/sessions/sessionizeTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/sessions/sessionizeTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/sessions/sessionizeTest.pig new file mode 100644 index 0000000..6a4939e --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/sessions/sessionizeTest.pig @@ -0,0 +1,17 @@ +register $JAR_PATH + +define Sessionize datafu.pig.sessions.Sessionize('$TIME_WINDOW'); + +views = LOAD 'input' AS (time:chararray, user_id:int, value:int); + +views_grouped = GROUP views BY user_id; +view_counts = FOREACH views_grouped { + views = ORDER views BY time; + GENERATE flatten(Sessionize(views)) as (time,user_id,value,session_id); +} + +max_value = GROUP view_counts BY (user_id, session_id); + +max_value = FOREACH max_value GENERATE group.user_id, MAX(view_counts.value) AS val; + +STORE max_value INTO 'output'; http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/markovPairDefault.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/markovPairDefault.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/markovPairDefault.pig new file mode 100644 index 0000000..a121cb1 --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/markovPairDefault.pig @@ -0,0 +1,14 @@ +register $JAR_PATH + +define markovPairs datafu.pig.stats.MarkovPairs(); + +data = load 'input' as $schema; +describe data; + +data_out1 = foreach data generate data as orig_bag; +describe data_out1; + +data_out = foreach data_out1 generate markovPairs(orig_bag) as markov_bag; +describe data_out; + +store data_out into 'output'; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/markovPairLookahead.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/markovPairLookahead.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/markovPairLookahead.pig new file mode 100644 index 0000000..269a1bc --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/markovPairLookahead.pig @@ -0,0 +1,14 @@ +register $JAR_PATH + +define markovPairs datafu.pig.stats.MarkovPairs('$lookahead'); + +data = load 'input' as $schema; +describe data; + +data_out1 = foreach data generate data as orig_bag; +describe data_out1; + +data_out = foreach data_out1 generate markovPairs(orig_bag) as markov_bag; +describe data_out; + +store data_out into 'output'; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/medianTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/medianTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/medianTest.pig new file mode 100644 index 0000000..0a439ce --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/medianTest.pig @@ -0,0 +1,21 @@ +register $JAR_PATH + +define Median datafu.pig.stats.Median(); + +data_in = LOAD 'input' as (val:int); + +/*describe data_in;*/ + +data_out = GROUP data_in ALL; + +/*describe data_out;*/ + +data_out = FOREACH data_out { + sorted = ORDER data_in BY val; + GENERATE Median(sorted) as medians; +} +data_out = FOREACH data_out GENERATE FLATTEN(medians); + +/*describe data_out;*/ + +STORE data_out into 'output'; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/quantileTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/quantileTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/quantileTest.pig new file mode 100644 index 0000000..604d179 --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/quantileTest.pig @@ -0,0 +1,21 @@ +register $JAR_PATH + +define Quantile datafu.pig.stats.Quantile($QUANTILES); + +data_in = LOAD 'input' as (val:int); + +/*describe data_in;*/ + +data_out = GROUP data_in ALL; + +/*describe data_out;*/ + +data_out = FOREACH data_out { + sorted = ORDER data_in BY val; + GENERATE Quantile(sorted) as quantiles; +} +data_out = FOREACH data_out GENERATE FLATTEN(quantiles); + +/*describe data_out;*/ + +STORE data_out into 'output'; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/streamingMedianTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/streamingMedianTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/streamingMedianTest.pig new file mode 100644 index 0000000..27d64f3 --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/streamingMedianTest.pig @@ -0,0 +1,21 @@ +register $JAR_PATH + +define Median datafu.pig.stats.StreamingMedian(); + +data_in = LOAD 'input' as (val:int); + +/*describe data_in;*/ + +data_out = GROUP data_in ALL; + +/*describe data_out;*/ + +data_out = FOREACH data_out { + sorted = ORDER data_in BY val; + GENERATE Median(sorted) as medians; +} +data_out = FOREACH data_out GENERATE FLATTEN(medians); + +/*describe data_out;*/ + +STORE data_out into 'output'; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/streamingQuantileTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/streamingQuantileTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/streamingQuantileTest.pig new file mode 100644 index 0000000..51c3bc5 --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/streamingQuantileTest.pig @@ -0,0 +1,18 @@ +register $JAR_PATH + +define Quantile datafu.pig.stats.StreamingQuantile($QUANTILES); + +data_in = LOAD 'input' as (val:int); + +/*describe data_in;*/ + +data_out = GROUP data_in ALL; + +/*describe data_out;*/ + +data_out = FOREACH data_out GENERATE Quantile(data_in.val) as quantiles; +data_out = FOREACH data_out GENERATE FLATTEN(quantiles); + +/*describe data_out;*/ + +STORE data_out into 'output'; http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/wilsonBinConfTests.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/wilsonBinConfTests.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/wilsonBinConfTests.pig new file mode 100644 index 0000000..19fa466 --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/wilsonBinConfTests.pig @@ -0,0 +1,11 @@ +register $JAR_PATH + +define WilsonBinConf datafu.pig.stats.WilsonBinConf('$alpha'); + +data = load 'input' as (successes:long, totals:long); +describe data; + +data_out = FOREACH data GENERATE WilsonBinConf(successes, totals) as interval; +data_out = FOREACH data_out GENERATE FLATTEN(interval); + +store data_out into 'output'; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/urls/userAgentTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/urls/userAgentTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/urls/userAgentTest.pig new file mode 100644 index 0000000..4548755 --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/urls/userAgentTest.pig @@ -0,0 +1,8 @@ +register $JAR_PATH + +define UserAgentClassify datafu.pig.urls.UserAgentClassify(); + +data = load 'input' as (usr_agent:chararray); +data_out = foreach data generate UserAgentClassify(usr_agent) as class; +describe data_out; +store data_out into 'output'; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/assertWithMessageTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/assertWithMessageTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/assertWithMessageTest.pig new file mode 100644 index 0000000..f240987 --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/assertWithMessageTest.pig @@ -0,0 +1,10 @@ +register $JAR_PATH + +define ASSERT datafu.pig.util.ASSERT(); + +data = LOAD 'input' AS (val:INT); + +data2 = FILTER data BY ASSERT(val,'assertion appears to have failed, doh!'); + +STORE data2 INTO 'output'; + http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/assertWithoutMessageTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/assertWithoutMessageTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/assertWithoutMessageTest.pig new file mode 100644 index 0000000..c6368e7 --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/assertWithoutMessageTest.pig @@ -0,0 +1,10 @@ +register $JAR_PATH + +define ASSERT datafu.pig.util.ASSERT(); + +data = LOAD 'input' AS (val:INT); + +data2 = FILTER data BY ASSERT(val); + +STORE data2 INTO 'output'; + http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/intToBoolTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/intToBoolTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/intToBoolTest.pig new file mode 100644 index 0000000..18cda42 --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/intToBoolTest.pig @@ -0,0 +1,10 @@ +register $JAR_PATH + +define IntToBool datafu.pig.util.IntToBool(); + +data = LOAD 'input' AS (val:INT); + +data2 = FOREACH data GENERATE IntToBool(val); + +STORE data2 INTO 'output'; + http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/intToBoolToIntTest.pig ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/intToBoolToIntTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/intToBoolToIntTest.pig new file mode 100644 index 0000000..82d3ee0 --- /dev/null +++ b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/intToBoolToIntTest.pig @@ -0,0 +1,12 @@ +register $JAR_PATH + +define IntToBool datafu.pig.util.IntToBool(); +define BoolToInt datafu.pig.util.BoolToInt(); + +data = LOAD 'input' AS (val:INT); + +data2 = FOREACH data GENERATE IntToBool(val) as val; +data3 = FOREACH data2 GENERATE BoolToInt(val) as val; + +STORE data3 INTO 'output'; + http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-artifacts/pom.xml ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/pom.xml b/bigtop-tests/test-artifacts/pom.xml index e3777b7..ed05bae 100644 --- a/bigtop-tests/test-artifacts/pom.xml +++ b/bigtop-tests/test-artifacts/pom.xml @@ -45,6 +45,7 @@ <module>hue</module> <module>solr</module> <module>crunch</module> + <module>datafu</module> <module>fatjar</module> </modules> http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/bigtop-tests/test-execution/smokes/datafu/pom.xml ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-execution/smokes/datafu/pom.xml b/bigtop-tests/test-execution/smokes/datafu/pom.xml new file mode 100644 index 0000000..50cb38d --- /dev/null +++ b/bigtop-tests/test-execution/smokes/datafu/pom.xml @@ -0,0 +1,123 @@ +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <parent> + <groupId>org.apache.bigtop.itest</groupId> + <artifactId>smoke-tests</artifactId> + <version>0.6.0-SNAPSHOT</version> + <relativePath>../pom.xml</relativePath> + </parent> + + <groupId>org.apache.bigtop.itest</groupId> + <artifactId>datafu-smoke-execution</artifactId> + <version>0.6.0-SNAPSHOT</version> + <name>datafu smoke test execution</name> + + <properties> + <org.apache.maven-dependency-plugin.groupId>org.apache.bigtop.itest</org.apache.maven-dependency-plugin.groupId> + <org.apache.maven-dependency-plugin.artifactId>datafu-smoke</org.apache.maven-dependency-plugin.artifactId> + <org.apache.maven-dependency-plugin.version>${project.version}</org.apache.maven-dependency-plugin.version> + <org.apache.maven-dependency-plugin.output>${project.build.directory}</org.apache.maven-dependency-plugin.output> + <org.apache.maven-dependency-plugin.type>jar</org.apache.maven-dependency-plugin.type> + <org.apache.maven-failsafe-plugin.testInclude>**/*Tests*</org.apache.maven-failsafe-plugin.testInclude> + + <HADOOP_MAPRED_HOME>${env.HADOOP_MAPRED_HOME}</HADOOP_MAPRED_HOME> + <HADOOP_CONF_DIR>${env.HADOOP_CONF_DIR}</HADOOP_CONF_DIR> + <PIG_HOME>${env.PIG_HOME}</PIG_HOME> + </properties> + + <dependencies> + <dependency> + <groupId>${org.apache.maven-dependency-plugin.groupId}</groupId> + <artifactId>${org.apache.maven-dependency-plugin.artifactId}</artifactId> + <version>${project.version}</version> + </dependency> + </dependencies> + + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-install-plugin</artifactId> + <executions> + <execution> + <phase>initialize</phase> + <goals> + <goal>install-file</goal> + </goals> + </execution> + </executions> + <configuration> + <file>${PIG_HOME}/pig.jar</file> + <groupId>org.apache.pig</groupId> + <artifactId>pig</artifactId> + <version>${pig.version}</version> + <packaging>jar</packaging> + </configuration> + </plugin> + + + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-dependency-plugin</artifactId> + </plugin> + + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-enforcer-plugin</artifactId> + <version>1.0</version> + <executions> + <execution> + <id>enforce-property</id> + <goals> + <goal>enforce</goal> + </goals> + <configuration> + <rules> + <requireProperty> + <property>HADOOP_MAPRED_HOME</property> + <message>HADOOP_MAPRED_HOME env. variable has to be set</message> + </requireProperty> + <requireProperty> + <property>HADOOP_CONF_DIR</property> + <message>HADOOP_CONF_DIR env. variable has to be set</message> + </requireProperty> + <requireProperty> + <property>PIG_HOME</property> + <message>PIG_HOME env. variable has to be set</message> + </requireProperty> + </rules> + <fail>true</fail> + </configuration> + </execution> + </executions> + </plugin> + + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-failsafe-plugin</artifactId> + <version>2.11</version> + <configuration> + <forkMode>always</forkMode> + <argLine>-Dpigunit.exectype.cluster=true</argLine> + <additionalClasspathElements> + <additionalClasspathElement>${HADOOP_CONF_DIR}</additionalClasspathElement> + </additionalClasspathElements> + <systemPropertyVariables> + <datafu.jar.dir>${PIG_HOME}</datafu.jar.dir> + </systemPropertyVariables> + </configuration> + + <!-- configuration> + <testSourceDirectory>/root/stacks/smokes/datafu/target/com/cloudera/itest/datafu/</testSourceDirectory> + <testClassesDirectory>/root/.m2/repository/com/cloudera/itest/datafu/4.1-cdh4u1-SNAPSHOT/</testClassesDirectory> + <skipTests>false</skipTests> + <testFailureIgnore>false</testFailureIgnore> + <argLine>-Dsun.lang.ClassLoader.allowArraySyntax=true -Djava.endorsed.dirs=${project.build.testOutputDirectory}/endorsed</argLine> + </configuration --> + </plugin> + </plugins> + </build> +</project> http://git-wip-us.apache.org/repos/asf/bigtop/blob/41213f98/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index a8017ad..0f3a632 100644 --- a/pom.xml +++ b/pom.xml @@ -179,13 +179,11 @@ <groupId>org.apache.pig</groupId> <artifactId>pig</artifactId> <version>${pig.version}</version> - <scope>test</scope> </dependency> <dependency> <groupId>org.apache.pig</groupId> <artifactId>pigsmoke</artifactId> <version>${pig-smoke.version}</version> - <scope>test</scope> </dependency> <dependency> <groupId>org.apache.zookeeper</groupId> @@ -206,7 +204,6 @@ <groupId>org.apache.sqoop</groupId> <artifactId>sqoop</artifactId> <version>${sqoop.version}</version> - <scope>test</scope> </dependency> </dependencies> </dependencyManagement>
