BIGTOP-3086: Drop datafu packaging

Since pig has been removed with BIGTOP-3075,
we should remove datafu as well, since datafu-pig depends on pig.

Change-Id: Ide8d8cb5e8223cf6307a48f7e31a606a02dcefdb
Signed-off-by: Yuqi Gu <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/bigtop/repo
Commit: http://git-wip-us.apache.org/repos/asf/bigtop/commit/4cee56bd
Tree: http://git-wip-us.apache.org/repos/asf/bigtop/tree/4cee56bd
Diff: http://git-wip-us.apache.org/repos/asf/bigtop/diff/4cee56bd

Branch: refs/heads/master
Commit: 4cee56bdc831b254805a121d43c33fc6a3a9c53e
Parents: aaffc1e
Author: Yuqi Gu <[email protected]>
Authored: Mon Oct 8 06:11:23 2018 +0000
Committer: Jun He <[email protected]>
Committed: Mon Dec 10 05:17:39 2018 +0000

----------------------------------------------------------------------
 bigtop-ci/jenkins/jobsCreator.groovy            |   4 +-
 .../src/common/datafu/do-component-build        |  22 -
 .../src/common/datafu/install_datafu.sh         |  80 ----
 bigtop-packages/src/deb/datafu/changelog        |   1 -
 bigtop-packages/src/deb/datafu/compat           |   1 -
 bigtop-packages/src/deb/datafu/control          |  38 --
 bigtop-packages/src/deb/datafu/copyright        |  15 -
 .../src/deb/datafu/pig-udf-datafu.install       |   1 -
 bigtop-packages/src/deb/datafu/rules            |  36 --
 bigtop-packages/src/deb/datafu/source/format    |   1 -
 bigtop-packages/src/rpm/datafu/BUILD/.gitignore |   0
 bigtop-packages/src/rpm/datafu/RPMS/.gitignore  |   0
 .../src/rpm/datafu/SOURCES/.gitignore           |   0
 .../src/rpm/datafu/SPECS/datafu.spec            |  72 ---
 bigtop-packages/src/rpm/datafu/SRPMS/.gitignore |   0
 bigtop-tests/test-artifacts/datafu/pom.xml      |  68 ---
 .../java/datafu/linkanalysis/PageRank.java      | 441 -------------------
 .../java/datafu/pig/linkanalysis/PageRank.java  | 372 ----------------
 .../apache/bigtop/itest/datafu/PigTests.java    | 211 ---------
 .../bigtop/itest/datafu/bags/BagTests.java      | 308 -------------
 .../bigtop/itest/datafu/bags/sets/SetTests.java |  74 ----
 .../bigtop/itest/datafu/date/TimeTests.java     |  65 ---
 .../bigtop/itest/datafu/geo/GeoTests.java       |  75 ----
 .../bigtop/itest/datafu/hash/HashTests.java     |  63 ---
 .../itest/datafu/linkanalysis/PageRankTest.java | 299 -------------
 .../datafu/linkanalysis/PageRankTests.java      | 120 -----
 .../itest/datafu/numbers/NumberTests.java       |  65 ---
 .../itest/datafu/sessions/SessionTests.java     |  92 ----
 .../itest/datafu/stats/MarkovPairTests.java     | 105 -----
 .../itest/datafu/stats/QuantileTests.java       | 196 ---------
 .../itest/datafu/stats/WilsonBinConfTests.java  |  81 ----
 .../bigtop/itest/datafu/urls/UserAgentTest.java |  57 ---
 .../bigtop/itest/datafu/util/AssertTests.java   |  93 ----
 .../datafu/util/IntBoolConversionPigTests.java  |  77 ----
 .../datafu/bags/aliasBagFieldsTest.pig          |  20 -
 .../resources/datafu/bags/appendToBagTest.pig   |   9 -
 .../resources/datafu/bags/bagConcatTest.pig     |  11 -
 .../main/resources/datafu/bags/bagSplitTest.pig |  14 -
 .../datafu/bags/bagSplitWithBagNumTest.pig      |  11 -
 .../bags/comprehensiveBagSplitAndEnumerate.pig  |  26 --
 .../resources/datafu/bags/distinctByTest.pig    |  12 -
 .../resources/datafu/bags/enumerateTest.pig     |  16 -
 .../datafu/bags/enumerateWithReverseTest.pig    |  16 -
 .../datafu/bags/enumerateWithStartTest.pig      |  16 -
 .../datafu/bags/firstTupleFromBagTest.pig       |   9 -
 .../datafu/bags/nullToEmptyBagTest.pig          |  14 -
 .../resources/datafu/bags/prependToBagTest.pig  |   9 -
 .../datafu/bags/sets/setIntersectTest.pig       |   9 -
 .../resources/datafu/bags/sets/setUnionTest.pig |  13 -
 .../datafu/bags/unorderedPairsTests.pig         |  16 -
 .../datafu/bags/unorderedPairsTests2.pig        |  12 -
 .../datafu/date/timeCountPageViewsTest.pig      |  13 -
 .../main/resources/datafu/geo/haversineTest.pig |   9 -
 .../resources/datafu/hash/md5Base64Test.pig     |   9 -
 .../src/main/resources/datafu/hash/md5Test.pig  |   9 -
 .../datafu/linkanalysis/pageRankTest.pig        |  25 --
 .../datafu/numbers/randomIntRangeTest.pig       |   8 -
 .../datafu/sessions/sessionizeTest.pig          |  17 -
 .../datafu/stats/markovPairDefault.pig          |  14 -
 .../datafu/stats/markovPairLookahead.pig        |  14 -
 .../main/resources/datafu/stats/medianTest.pig  |  21 -
 .../resources/datafu/stats/quantileTest.pig     |  21 -
 .../datafu/stats/streamingMedianTest.pig        |  21 -
 .../datafu/stats/streamingQuantileTest.pig      |  18 -
 .../datafu/stats/wilsonBinConfTests.pig         |  11 -
 .../resources/datafu/urls/userAgentTest.pig     |   8 -
 .../datafu/util/assertWithMessageTest.pig       |  10 -
 .../datafu/util/assertWithoutMessageTest.pig    |  10 -
 .../resources/datafu/util/intToBoolTest.pig     |  10 -
 .../datafu/util/intToBoolToIntTest.pig          |  12 -
 .../package/src/main/resources/package_data.xml |  19 -
 bigtop-tests/test-artifacts/pom.xml             |   1 -
 .../test-execution/smokes/datafu/pom.xml        | 140 ------
 bigtop.bom                                      |  11 -
 74 files changed, 2 insertions(+), 3795 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-ci/jenkins/jobsCreator.groovy
----------------------------------------------------------------------
diff --git a/bigtop-ci/jenkins/jobsCreator.groovy 
b/bigtop-ci/jenkins/jobsCreator.groovy
index 5ced028..3d5240d 100644
--- a/bigtop-ci/jenkins/jobsCreator.groovy
+++ b/bigtop-ci/jenkins/jobsCreator.groovy
@@ -16,8 +16,8 @@
  */
 
 // FIXME: it would be nice to extract the following from bigtop.mk on the fly
-def bigtopComponents = ["bigtop-groovy", "bigtop-jsvc", "bigtop-tomcat", 
"bigtop-utils", 
-                        "zookeeper", "hadoop", "hbase", "hive", "pig", 
"crunch", "datafu", 
+def bigtopComponents = ["bigtop-groovy", "bigtop-jsvc", "bigtop-tomcat", 
"bigtop-utils",
+                        "zookeeper", "hadoop", "hbase", "hive", "pig", 
"crunch",
                         "flume", "giraph", "ignite-hadoop", "mahout", "oozie", 
"phoenix",
                         "solr", "spark", "sqoop", "alluxio", "whirr"]
 // FIXME: it would be nice to extract the following from some static 
configuration file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/common/datafu/do-component-build
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/common/datafu/do-component-build 
b/bigtop-packages/src/common/datafu/do-component-build
deleted file mode 100644
index e8ea9a7..0000000
--- a/bigtop-packages/src/common/datafu/do-component-build
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -ex
-
-. `dirname $0`/bigtop.bom
-
-gradle -b bootstrap.gradle
-./gradlew clean assemble

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/common/datafu/install_datafu.sh
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/common/datafu/install_datafu.sh 
b/bigtop-packages/src/common/datafu/install_datafu.sh
deleted file mode 100755
index df65c9e..0000000
--- a/bigtop-packages/src/common/datafu/install_datafu.sh
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -e
-
-usage() {
-  echo "
-usage: $0 <options>
-  Required not-so-options:
-     --build-dir=DIR             path to pig dist.dir
-     --prefix=PREFIX             path to install into
-
-  Optional options:
-     --lib-dir=DIR               path to install pig home [/usr/lib/pig]
-     --build-dir=DIR             path to pig dist dir
-     ... [ see source for more similar options ]
-  "
-  exit 1
-}
-
-OPTS=$(getopt \
-  -n $0 \
-  -o '' \
-  -l 'prefix:' \
-  -l 'lib-dir:' \
-  -l 'build-dir:' -- "$@")
-
-if [ $? != 0 ] ; then
-    usage
-fi
-
-eval set -- "$OPTS"
-while true ; do
-    case "$1" in
-        --prefix)
-        PREFIX=$2 ; shift 2
-        ;;
-        --build-dir)
-        BUILD_DIR=$2 ; shift 2
-        ;;
-        --lib-dir)
-        LIB_DIR=$2 ; shift 2
-        ;;
-        --)
-        shift ; break
-        ;;
-        *)
-        echo "Unknown option: $1"
-        usage
-        exit 1
-        ;;
-    esac
-done
-
-for var in PREFIX BUILD_DIR ; do
-  if [ -z "$(eval "echo \$$var")" ]; then
-    echo Missing param: $var
-    usage
-  fi
-done
-
-LIB_DIR=${LIB_DIR:-/usr/lib/pig}
-
-# First we'll move everything into lib
-install -d -m 0755 $PREFIX/$LIB_DIR
-cp $BUILD_DIR/datafu-*.jar $PREFIX/$LIB_DIR

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/deb/datafu/changelog
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/datafu/changelog 
b/bigtop-packages/src/deb/datafu/changelog
deleted file mode 100644
index 547ed02..0000000
--- a/bigtop-packages/src/deb/datafu/changelog
+++ /dev/null
@@ -1 +0,0 @@
---- This is auto-generated 

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/deb/datafu/compat
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/datafu/compat 
b/bigtop-packages/src/deb/datafu/compat
deleted file mode 100644
index 7f8f011..0000000
--- a/bigtop-packages/src/deb/datafu/compat
+++ /dev/null
@@ -1 +0,0 @@
-7

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/deb/datafu/control
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/datafu/control 
b/bigtop-packages/src/deb/datafu/control
deleted file mode 100644
index 04a5a65..0000000
--- a/bigtop-packages/src/deb/datafu/control
+++ /dev/null
@@ -1,38 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-Source: pig-udf-datafu 
-Section: misc
-Priority: extra
-Maintainer: Bigtop <[email protected]>
-Build-Depends: debhelper (>= 7.0.50~)
-Standards-Version: 3.8.0
-Homepage: https://github.com/linkedin/datafu
-
-Package: pig-udf-datafu
-Architecture: all
-Depends: pig
-Description: A collection of user-defined functions for Hadoop and Pig.
- DataFu is a collection of user-defined functions for working with large-scale 
- data in Hadoop and Pig. This library was born out of the need for a stable, 
- well-tested library of UDFs for data mining and statistics. It is used 
- at LinkedIn in many of our off-line workflows for data derived products like 
- "People You May Know" and "Skills". 
- .
- It contains functions for: PageRank, Quantiles (median), variance, 
Sessionization, 
- Convenience bag functions (e.g., set operations, enumerating bags, etc), 
- Convenience utility functions (e.g., assertions, easier writing of EvalFuncs)
- and more...
-

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/deb/datafu/copyright
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/datafu/copyright 
b/bigtop-packages/src/deb/datafu/copyright
deleted file mode 100644
index 422ec82..0000000
--- a/bigtop-packages/src/deb/datafu/copyright
+++ /dev/null
@@ -1,15 +0,0 @@
-Format: http://dep.debian.net/deps/dep5
-Source: https://github.com/linkedin/datafu
-Upstream-Name: DataFu
-
-Files: *
-Copyright: 2010, LinkedIn, Inc
-License: Apache-2.0
-
-Files debian/*
-Copyright: 2011, The Apache Software Foundation
-License: Apache-2.0
-
-License: Apache-2.0
- On Debian systems, the complete text of the Apache 2.0 license
- can be found in "/usr/share/common-licenses/Apache-2.0".

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/deb/datafu/pig-udf-datafu.install
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/datafu/pig-udf-datafu.install 
b/bigtop-packages/src/deb/datafu/pig-udf-datafu.install
deleted file mode 100644
index 6a9697b..0000000
--- a/bigtop-packages/src/deb/datafu/pig-udf-datafu.install
+++ /dev/null
@@ -1 +0,0 @@
-/usr/lib/pig

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/deb/datafu/rules
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/datafu/rules 
b/bigtop-packages/src/deb/datafu/rules
deleted file mode 100755
index 24a5f38..0000000
--- a/bigtop-packages/src/deb/datafu/rules
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/make -f
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# -*- makefile -*-
-
-# Uncomment this to turn on verbose mode.
-export DH_VERBOSE=1
-
-# This has to be exported to make some magic below work.
-export DH_OPTIONS
-
-%:
-       dh $@
-
-override_dh_auto_build:
-       # we'll just use the build from the tarball.
-       bash debian/do-component-build -Divy.home=${HOME}/.ivy2
-
-override_dh_auto_install:
-       sh -x debian/install_datafu.sh \
-         --build-dir=datafu-pig/build/libs \
-         --prefix=debian/tmp

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/deb/datafu/source/format
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/datafu/source/format 
b/bigtop-packages/src/deb/datafu/source/format
deleted file mode 100644
index 163aaf8..0000000
--- a/bigtop-packages/src/deb/datafu/source/format
+++ /dev/null
@@ -1 +0,0 @@
-3.0 (quilt)

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/rpm/datafu/BUILD/.gitignore
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/rpm/datafu/BUILD/.gitignore 
b/bigtop-packages/src/rpm/datafu/BUILD/.gitignore
deleted file mode 100644
index e69de29..0000000

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/rpm/datafu/RPMS/.gitignore
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/rpm/datafu/RPMS/.gitignore 
b/bigtop-packages/src/rpm/datafu/RPMS/.gitignore
deleted file mode 100644
index e69de29..0000000

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/rpm/datafu/SOURCES/.gitignore
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/rpm/datafu/SOURCES/.gitignore 
b/bigtop-packages/src/rpm/datafu/SOURCES/.gitignore
deleted file mode 100644
index e69de29..0000000

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/rpm/datafu/SPECS/datafu.spec
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/rpm/datafu/SPECS/datafu.spec 
b/bigtop-packages/src/rpm/datafu/SPECS/datafu.spec
deleted file mode 100644
index 0185736..0000000
--- a/bigtop-packages/src/rpm/datafu/SPECS/datafu.spec
+++ /dev/null
@@ -1,72 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-%define datafu_name datafu
-%define lib_datafu /usr/lib/pig
-
-%if  %{?suse_version:1}0
-%define doc_datafu %{_docdir}/datafu-doc
-%else
-%define doc_datafu %{_docdir}/datafu-doc-%{datafu_version}
-%endif
-
-# disable repacking jars
-%define __os_install_post %{nil}
-
-Name: pig-udf-datafu
-Version: %{datafu_version}
-Release: %{datafu_release}
-Summary: A collection of user-defined functions for Hadoop and Pig.
-URL: https://github.com/linkedin/datafu
-Group: Development/Libraries
-BuildArch: noarch
-Buildroot: %(mktemp -ud 
%{_tmppath}/%{datafu_name}-%{version}-%{release}-XXXXXX)
-License: ASL 2.0
-Source0: %{datafu_name}-%{datafu_base_version}.tar.gz
-Source1: do-component-build 
-Source2: install_%{datafu_name}.sh
-Requires: hadoop-client, bigtop-utils >= 0.7
-
-
-%description 
-DataFu is a collection of user-defined functions for working with large-scale
-data in Hadoop and Pig. This library was born out of the need for a stable,
-well-tested library of UDFs for data mining and statistics. It is used
-at LinkedIn in many of our off-line workflows for data derived products like
-"People You May Know" and "Skills".
-
-It contains functions for: PageRank, Quantiles (median), variance, 
Sessionization,
-Convenience bag functions (e.g., set operations, enumerating bags, etc),
-Convenience utility functions (e.g., assertions, easier writing of EvalFuncs)
-and more...
-
-%prep
-%setup -n apache-%{datafu_name}-incubating-sources-%{datafu_base_version}
-
-%build
-bash $RPM_SOURCE_DIR/do-component-build
-
-%install
-%__rm -rf $RPM_BUILD_ROOT
-sh $RPM_SOURCE_DIR/install_datafu.sh \
-          --build-dir=datafu-pig/build/libs \
-          --prefix=$RPM_BUILD_ROOT
-
-#######################
-#### FILES SECTION ####
-#######################
-%files 
-%defattr(-,root,root,755)
-%{lib_datafu}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/rpm/datafu/SRPMS/.gitignore
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/rpm/datafu/SRPMS/.gitignore 
b/bigtop-packages/src/rpm/datafu/SRPMS/.gitignore
deleted file mode 100644
index e69de29..0000000

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/pom.xml
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/pom.xml 
b/bigtop-tests/test-artifacts/datafu/pom.xml
deleted file mode 100644
index f0ed55b..0000000
--- a/bigtop-tests/test-artifacts/datafu/pom.xml
+++ /dev/null
@@ -1,68 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
-
-  <parent>
-    <groupId>org.apache.bigtop.itest</groupId>
-    <artifactId>bigtop-smokes</artifactId>
-    <version>1.3.1-SNAPSHOT</version>
-    <relativePath>../pom.xml</relativePath>
-  </parent>
-
-  <modelVersion>4.0.0</modelVersion>
-  <groupId>org.apache.bigtop.itest</groupId>
-  <artifactId>datafu-smoke</artifactId>
-  <version>1.3.1-SNAPSHOT</version>
-  <name>datafusmoke</name>
-
-  <dependencies>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-client</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.pig</groupId>
-      <artifactId>pig</artifactId>
-      <version>0.11.1</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.pig</groupId>
-      <artifactId>pigunit</artifactId>
-      <version>0.11.1</version>
-    </dependency>
-    <dependency>
-      <groupId>com.google.guava</groupId>
-      <artifactId>guava</artifactId>
-      <version>r06</version>
-    </dependency>
-    <dependency>
-      <groupId>joda-time</groupId>
-      <artifactId>joda-time</artifactId>
-      <version>1.6</version>
-    </dependency>
-    <dependency>
-      <groupId>it.unimi.dsi</groupId>
-      <artifactId>fastutil</artifactId>
-      <version>6.3</version>
-    </dependency>
-  </dependencies>
-
-  <build>
-  </build>
-
-</project>

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/groovy/java/datafu/linkanalysis/PageRank.java
----------------------------------------------------------------------
diff --git 
a/bigtop-tests/test-artifacts/datafu/src/main/groovy/java/datafu/linkanalysis/PageRank.java
 
b/bigtop-tests/test-artifacts/datafu/src/main/groovy/java/datafu/linkanalysis/PageRank.java
deleted file mode 100644
index 2cadcf9..0000000
--- 
a/bigtop-tests/test-artifacts/datafu/src/main/groovy/java/datafu/linkanalysis/PageRank.java
+++ /dev/null
@@ -1,441 +0,0 @@
-/*
- * Copyright 2010 LinkedIn, Inc
- * 
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy 
of
- * the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations 
under
- * the License.
- */
- 
-package datafu.linkanalysis;
-
-import it.unimi.dsi.fastutil.floats.FloatArrayList;
-import it.unimi.dsi.fastutil.ints.Int2IntMap;
-import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap;
-import it.unimi.dsi.fastutil.ints.IntArrayList;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedOutputStream;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.Map;
-
-import com.google.common.collect.AbstractIterator;
-
-/**
- * An implementation of {@link <a href="http://en.wikipedia.org/wiki/PageRank"; 
target="_blank">PageRank</a>}.
- * This implementation is not distributed.  It is intended for graphs of a 
reasonable size which can be processed
- * on a single machine.  Nodes are stored in memory.  Edges are stored in 
memory and can optionally be spilled to
- * disk once a certain limit is reached.  
- */
-public class PageRank
-{    
-  private float totalRankChange;
-  private long edgeCount;
-  private long nodeCount;
-  
-  // the damping factor
-  private static float ALPHA = 0.85f;
-  
-  // edge weights (which are doubles) are multiplied by this value so they can 
be stored as integers internally
-  private static float EDGE_WEIGHT_MULTIPLIER = 100000;
-    
-  private final Int2IntOpenHashMap nodeIndices = new Int2IntOpenHashMap();
-  private final FloatArrayList nodeData = new FloatArrayList(); // rank, total 
weight, contribution, (repeat)
-  
-  private final IntArrayList danglingNodes = new IntArrayList();
-  
-  private final IntArrayList edges = new IntArrayList(); // source, dest node 
count... dest id, weight pos, (repeat)
-  
-  private boolean shouldHandleDanglingNodes = false;
-  private boolean shouldCacheEdgesOnDisk = false;
-  private long edgeCachingThreshold;
-  
-  private File edgesFile;
-  private DataOutputStream edgeDataOutputStream;
-  private boolean usingEdgeDiskCache;
-  
-  public interface ProgressIndicator
-  {
-    void progress();
-  }
-  
-  public void clear() throws IOException
-  {
-    this.edgeCount = 0;
-    this.nodeCount = 0;
-    this.totalRankChange = 0.0f;
-    
-    this.nodeIndices.clear();
-    this.nodeData.clear();
-    this.edges.clear();
-    this.danglingNodes.clear();
-    
-    if (edgeDataOutputStream != null)
-    {
-      this.edgeDataOutputStream.close();
-      this.edgeDataOutputStream = null;
-    }
-    
-    this.usingEdgeDiskCache = false;
-    this.edgesFile = null;
-  }
-  
-  /**
-   * Gets whether disk is being used to cache edges.
-   * @return True if the edges are cached on disk.
-   */
-  public boolean isUsingEdgeDiskCache()
-  {
-    return usingEdgeDiskCache;
-  }
-  
-  /**
-   * Enable disk caching of edges once there are too many (disabled by 
default).
-   */
-  public void enableEdgeDiskCaching()
-  {
-    shouldCacheEdgesOnDisk = true;
-  }
-  
-  /**
-   * Disable disk caching of edges once there are too many (disabled by 
default).
-   */
-  public void disableEdgeDiskCaching()
-  {
-    shouldCacheEdgesOnDisk = false;
-  }
-  
-  /**
-   * Gets whether edge disk caching is enabled.
-   * @return True if edge disk caching is enabled.
-   */
-  public boolean isEdgeDiskCachingEnabled()
-  {
-    return shouldCacheEdgesOnDisk;
-  }
-  
-  /**
-   * Gets the number of edges past which they will be cached on disk instead 
of in memory.
-   * Edge disk caching must be enabled for this to have any effect.
-   * @return Edge count past which caching occurs
-   */
-  public long getEdgeCachingThreshold()
-  {
-    return edgeCachingThreshold;
-  }
-
-  /**
-   * Set the number of edges past which they will be cached on disk instead of 
in memory.
-   * Edge disk caching must be enabled for this to have any effect.
-   * @param count Edge count past which caching occurs
-   */
-  public void setEdgeCachingThreshold(long count)
-  {
-    edgeCachingThreshold = count;
-  }
-  
-  /**
-   * Enables dangling node handling (disabled by default).
-   */
-  public void enableDanglingNodeHandling()
-  {
-    shouldHandleDanglingNodes = true;
-  }
-  
-  /**
-   * Disables dangling node handling (disabled by default).
-   */
-  public void disableDanglingNodeHandling()
-  {
-    shouldHandleDanglingNodes = false;
-  }
-  
-  public long nodeCount()
-  {
-    return this.nodeCount;
-  }
-  
-  public long edgeCount()
-  {
-    return this.edgeCount;
-  }
-
-  public Int2IntMap.FastEntrySet getNodeIds()
-  {
-    return this.nodeIndices.int2IntEntrySet();
-  }
-  
-  public float getNodeRank(int nodeId)
-  {
-    int nodeIndex = this.nodeIndices.get(nodeId);
-    return nodeData.get(nodeIndex);
-  }
-  
-  public float getTotalRankChange()
-  {
-    return this.totalRankChange;
-  }
-  
-  private void maybeCreateNode(int nodeId)
-  {
-    // create from node if it doesn't already exist
-    if (!nodeIndices.containsKey(nodeId))
-    {      
-      int index = this.nodeData.size();
-      
-      this.nodeData.add(0.0f); // rank
-      this.nodeData.add(0.0f); // total weight
-      this.nodeData.add(0.0f); // contribution
-      
-      this.nodeIndices.put(nodeId, index);
-      
-      this.nodeCount++;
-    }
-  }
-  
-  public void addEdges(Integer sourceId, ArrayList<Map<String,Object>> 
sourceEdges) throws IOException
-  {
-    int source = sourceId.intValue();
-   
-    maybeCreateNode(source);
-    
-    if (this.shouldCacheEdgesOnDisk && !usingEdgeDiskCache && 
(sourceEdges.size() + this.edgeCount) >= this.edgeCachingThreshold)
-    {
-      writeEdgesToDisk();
-    }
-    
-    // store the source node id itself
-    appendEdgeData(source);
-    
-    // store how many outgoing edges this node has
-    appendEdgeData(sourceEdges.size());
-    
-    // store the outgoing edges
-    for (Map<String,Object> edge : sourceEdges)
-    {
-      int dest = ((Integer)edge.get("dest")).intValue();
-      float weight = ((Double)edge.get("weight")).floatValue();
-            
-      maybeCreateNode(dest);
-      
-      appendEdgeData(dest);
-      
-      // location of weight in weights array
-      appendEdgeData(Math.max(1, (int)(weight * EDGE_WEIGHT_MULTIPLIER)));
-      
-      this.edgeCount++;
-    }
-  }
-  
-  private void appendEdgeData(int data) throws IOException
-  {
-    if (this.edgeDataOutputStream != null)
-    {
-      this.edgeDataOutputStream.writeInt(data);
-    }
-    else
-    {
-      this.edges.add(data);
-    }
-  }
-    
-  public void init(ProgressIndicator progressIndicator) throws IOException
-  {
-    if (this.edgeDataOutputStream != null)
-    {
-      this.edgeDataOutputStream.close();
-      this.edgeDataOutputStream = null;
-    }
-    
-    // initialize all nodes to an equal share of the total rank (1.0)
-    float nodeRank = 1.0f / this.nodeCount;        
-    for (int j=0; j<this.nodeData.size(); j+=3)
-    {
-      nodeData.set(j, nodeRank);      
-      progressIndicator.progress();
-    }      
-    
-    Iterator<Integer> edgeData = getEdgeData();
-    
-    while(edgeData.hasNext())
-    {
-      int sourceId = edgeData.next();
-      int nodeEdgeCount = edgeData.next();
-      
-      while (nodeEdgeCount-- > 0)
-      {
-        // skip the destination node id
-        edgeData.next();
-        
-        float weight = edgeData.next();
-                
-        int nodeIndex = this.nodeIndices.get(sourceId);
-        
-        float totalWeight = this.nodeData.getFloat(nodeIndex+1); 
-        totalWeight += weight;
-        this.nodeData.set(nodeIndex+1, totalWeight);
-        
-        progressIndicator.progress();
-      }
-    }
-    
-    // if handling dangling nodes, get a list of them by finding those nodes 
with no outgoing
-    // edges (i.e. total outgoing edge weight is 0.0)
-    if (shouldHandleDanglingNodes)
-    {
-      for (Map.Entry<Integer,Integer> e : nodeIndices.entrySet())
-      {
-        int nodeId = e.getKey();
-        int nodeIndex = e.getValue();
-        float totalWeight = nodeData.getFloat(nodeIndex+1);
-        if (totalWeight == 0.0f)
-        {
-          danglingNodes.add(nodeId);
-        }
-      }
-    }
-  }
-  
-  public float nextIteration(ProgressIndicator progressIndicator) throws 
IOException
-  {
-    distribute(progressIndicator);
-    commit(progressIndicator);
-    
-    return getTotalRankChange();
-  }
-  
-  public void distribute(ProgressIndicator progressIndicator) throws 
IOException
-  {    
-    Iterator<Integer> edgeData = getEdgeData();
-    
-    while(edgeData.hasNext())
-    {
-      int sourceId = edgeData.next();
-      int nodeEdgeCount = edgeData.next();
-      
-      while (nodeEdgeCount-- > 0)
-      {
-        int toId = edgeData.next();
-        float weight = edgeData.next();
-                
-        int fromNodeIndex = this.nodeIndices.get(sourceId);
-        int toNodeIndex = this.nodeIndices.get(toId);
-        
-        float contributionChange = weight * 
this.nodeData.getFloat(fromNodeIndex) / this.nodeData.getFloat(fromNodeIndex+1);
-        
-        float currentContribution = this.nodeData.getFloat(toNodeIndex+2);
-        this.nodeData.set(toNodeIndex+2, currentContribution + 
contributionChange);
-        
-        progressIndicator.progress();
-      }      
-    }
-    
-    if (shouldHandleDanglingNodes)
-    {
-      // get the rank from each of the dangling nodes
-      float totalRank = 0.0f;
-      for (int nodeId : danglingNodes)
-      {
-        int nodeIndex = nodeIndices.get(nodeId);
-        float rank = nodeData.get(nodeIndex);
-        totalRank += rank;
-      }
-      
-      // distribute the dangling node ranks to all the nodes in the graph
-      // note: the alpha factor is applied in the commit stage
-      float contributionIncrease = totalRank / this.nodeCount;
-      for (int i=2; i<nodeData.size(); i += 3)
-      {
-        float contribution = nodeData.getFloat(i);
-        contribution += contributionIncrease;
-        nodeData.set(i, contribution);
-      }
-    }
-  }
-  
-  public void commit(ProgressIndicator progressIndicator)
-  {
-    this.totalRankChange = 0.0f;
-    
-    for (int id : nodeIndices.keySet())
-    {
-      int nodeIndex = this.nodeIndices.get(id);
-      
-      float alpha = datafu.linkanalysis.PageRank.ALPHA;
-      float newRank = (1.0f - alpha)/nodeCount + alpha * 
this.nodeData.get(nodeIndex+2);
-      
-      this.nodeData.set(nodeIndex+2, 0.0f);
-      
-      float lastRankDiff = newRank - this.nodeData.get(nodeIndex);
-      
-      this.nodeData.set(nodeIndex, newRank);
-      
-      this.totalRankChange += Math.abs(lastRankDiff);
-      
-      progressIndicator.progress();
-    }
-  }
-  
-  private void writeEdgesToDisk() throws IOException
-  { 
-    this.edgesFile = File.createTempFile("fastgraph", null);
-    
-    FileOutputStream outStream = new FileOutputStream(this.edgesFile);
-    BufferedOutputStream bufferedStream = new BufferedOutputStream(outStream);
-    this.edgeDataOutputStream = new DataOutputStream(bufferedStream);
-    
-    for (int edgeData : edges)
-    {
-      this.edgeDataOutputStream.writeInt(edgeData);
-    }
-    
-    this.edges.clear();
-    usingEdgeDiskCache = true;
-  }
-  
-  private Iterator<Integer> getEdgeData() throws IOException
-  {
-    if (!usingEdgeDiskCache)
-    {
-      return this.edges.iterator();
-    }
-    else
-    {
-      FileInputStream fileInputStream = new FileInputStream(this.edgesFile);
-      BufferedInputStream inputStream = new 
BufferedInputStream(fileInputStream);
-      final DataInputStream dataInputStream = new DataInputStream(inputStream);
-      
-      return new AbstractIterator<Integer>() {
-        
-        @Override
-        protected Integer computeNext()
-        {
-          try
-          {
-            return dataInputStream.readInt();
-          }
-          catch (IOException e)
-          {
-            return endOfData();
-          }
-        }
-        
-      };
-    }
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/groovy/java/datafu/pig/linkanalysis/PageRank.java
----------------------------------------------------------------------
diff --git 
a/bigtop-tests/test-artifacts/datafu/src/main/groovy/java/datafu/pig/linkanalysis/PageRank.java
 
b/bigtop-tests/test-artifacts/datafu/src/main/groovy/java/datafu/pig/linkanalysis/PageRank.java
deleted file mode 100644
index 2460fc2..0000000
--- 
a/bigtop-tests/test-artifacts/datafu/src/main/groovy/java/datafu/pig/linkanalysis/PageRank.java
+++ /dev/null
@@ -1,372 +0,0 @@
-/*
- * Copyright 2010 LinkedIn, Inc
- * 
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy 
of
- * the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations 
under
- * the License.
- */
- 
-package datafu.pig.linkanalysis;
-
-import it.unimi.dsi.fastutil.ints.Int2IntMap;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.pig.Accumulator;
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.BagFactory;
-import org.apache.pig.data.DataBag;
-import org.apache.pig.data.DataType;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.data.TupleFactory;
-import org.apache.pig.impl.logicalLayer.FrontendException;
-import org.apache.pig.impl.logicalLayer.schema.Schema;
-
-import datafu.linkanalysis.PageRank.ProgressIndicator;
-
-
-/**
- * A UDF which implements {@link <a 
href="http://en.wikipedia.org/wiki/PageRank"; target="_blank">PageRank</a>}.  
- * Each graph is stored in memory while running the algorithm, with edges 
optionally 
- * spilled to disk to conserve memory.  This can be used to distribute the 
execution of PageRank on a large number of 
- * reasonable sized graphs.  It does not distribute execuion of PageRank on a 
single graph.  Each graph is identified
- * by an integer valued topic ID.
- * <p>
- * Example:
- * <pre>
- * {@code
- * 
- * topic_edges = LOAD 'input_edges' as 
(topic:INT,source:INT,dest:INT,weight:DOUBLE);
- * 
- * topic_edges_grouped = GROUP topic_edges by (topic, source) ;
- * topic_edges_grouped = FOREACH topic_edges_grouped GENERATE
- *    group.topic as topic,
- *    group.source as source,
- *    topic_edges.(dest,weight) as edges;
- * 
- * topic_edges_grouped_by_topic = GROUP topic_edges_grouped BY topic; 
- * 
- * topic_ranks = FOREACH topic_edges_grouped_by_topic GENERATE
- *    group as topic,
- *    FLATTEN(PageRank(topic_edges_grouped.(source,edges))) as (source,rank);
- *
- * skill_ranks = FOREACH skill_ranks GENERATE
- *    topic, source, rank;
- * 
- * }
- * </pre> 
- */
-public class PageRank extends EvalFunc<DataBag> implements Accumulator<DataBag>
-{
-  private final datafu.linkanalysis.PageRank graph = new 
datafu.linkanalysis.PageRank();
-
-  private int maxNodesAndEdges = 100000000;
-  private int maxEdgesInMemory = 30000000;
-  private double tolerance = 1e-16;
-  private int maxIters = 150;
-  private boolean useEdgeDiskStorage = false;
-  private boolean enableDanglingNodeHandling = false;
-  private boolean aborted = false;
-
-  TupleFactory tupleFactory = TupleFactory.getInstance();
-  BagFactory bagFactory = BagFactory.getInstance();
-  
-  public PageRank()
-  {
-    initialize();
-  }
-
-  public PageRank(String... parameters)
-  {
-    if (parameters.length % 2 != 0)
-    {
-      throw new RuntimeException("Invalid parameters list");
-    }
-
-    for (int i=0; i<parameters.length; i+=2)
-    {
-      String parameterName = parameters[i];
-      String value = parameters[i+1];
-      if (parameterName.equals("max_nodes_and_edges"))
-      {
-        maxNodesAndEdges = Integer.parseInt(value);
-      }
-      else if (parameterName.equals("max_edges_in_memory"))
-      {
-        maxEdgesInMemory = Integer.parseInt(value);
-      }
-      else if (parameterName.equals("tolerance"))
-      {
-        tolerance = Double.parseDouble(value);
-      }
-      else if (parameterName.equals("max_iters"))
-      {
-        maxIters = Integer.parseInt(value);
-      }
-      else if (parameterName.equals("spill_to_edge_disk_storage"))
-      {
-        useEdgeDiskStorage = Boolean.parseBoolean(value);
-      }
-      else if (parameterName.equals("dangling_nodes"))
-      {
-        enableDanglingNodeHandling = Boolean.parseBoolean(value);
-      }
-    }
-
-    initialize();
-  }
-
-  private void initialize()
-  {
-    long heapSize = Runtime.getRuntime().totalMemory();
-    long heapMaxSize = Runtime.getRuntime().maxMemory();
-    long heapFreeSize = Runtime.getRuntime().freeMemory();
-//    System.out.println(String.format("Heap size: %d, Max heap size: %d, Heap 
free size: %d", heapSize, heapMaxSize, heapFreeSize));
-
-    if (useEdgeDiskStorage)
-    {
-      this.graph.enableEdgeDiskCaching();
-    }
-    else
-    {
-      this.graph.disableEdgeDiskCaching();
-    }
-
-    if (enableDanglingNodeHandling)
-    {
-      this.graph.enableDanglingNodeHandling();
-    }
-    else
-    {
-      this.graph.disableDanglingNodeHandling();
-    }
-
-    this.graph.setEdgeCachingThreshold(maxEdgesInMemory);
-  }
-
-  @Override
-  public void accumulate(Tuple t) throws IOException
-  {
-    if (aborted)
-    {
-      return;
-    }
-    
-    DataBag bag = (DataBag) t.get(0);
-    if (bag == null || bag.size() == 0)
-      return;
-    
-    for (Tuple sourceTuple : bag) 
-    {
-      Integer sourceId = (Integer)sourceTuple.get(0);
-      DataBag edges = (DataBag)sourceTuple.get(1);
-
-      ArrayList<Map<String,Object>> edgesMapList = new ArrayList<Map<String, 
Object>>();
-
-      for (Tuple edgeTuple : edges)
-      {
-        Integer destId = (Integer)edgeTuple.get(0);
-        Double weight = (Double)edgeTuple.get(1);
-        HashMap<String,Object> edgeMap = new HashMap<String, Object>();
-        edgeMap.put("dest",destId);
-        edgeMap.put("weight",weight);
-        edgesMapList.add(edgeMap);
-      }
-
-      graph.addEdges(sourceId, edgesMapList);
-
-      if (graph.nodeCount() + graph.edgeCount() > maxNodesAndEdges)
-      {
-        System.out.println(String.format("There are too many nodes and edges 
(%d + %d > %d). Aborting.", graph.nodeCount(), graph.edgeCount(), 
maxNodesAndEdges));
-        aborted = true;
-      }
-
-      reporter.progress();
-    }
-  }
-
-  @Override
-  public DataBag getValue()
-  {
-    if (aborted)
-    {
-      return null;
-    }
-    
-    System.out.println(String.format("Nodes: %d, Edges: %d", 
graph.nodeCount(), graph.edgeCount()));
-    
-    ProgressIndicator progressIndicator = getProgressIndicator();
-    System.out.println("Finished loading graph.");
-    long startTime = System.nanoTime();
-    System.out.println("Initializing.");
-    try
-    {
-      graph.init(progressIndicator);
-    }
-    catch (IOException e)
-    {
-      e.printStackTrace();
-      return null;
-    }
-    System.out.println(String.format("Done, took %f ms", (System.nanoTime() - 
startTime)/10.0e6));
-
-    float totalDiff;
-    int iter = 0;
-
-    System.out.println("Beginning iterations");
-    startTime = System.nanoTime();
-    do
-    {
-      // TODO log percentage complete every 5 minutes
-      try
-      {
-        totalDiff = graph.nextIteration(progressIndicator);
-      }
-      catch (IOException e)
-      {
-        e.printStackTrace();
-        return null;
-      }
-      iter++;
-    } while(iter < maxIters && totalDiff > tolerance);
-    System.out.println(String.format("Done, %d iterations took %f ms", iter, 
(System.nanoTime() - startTime)/10.0e6));
-
-    DataBag output = bagFactory.newDefaultBag();
-
-    for (Int2IntMap.Entry node : graph.getNodeIds())
-    {
-      int nodeId = node.getIntKey();
-      float rank = graph.getNodeRank(nodeId);
-      List nodeData = new ArrayList(2);
-      nodeData.add(nodeId);
-      nodeData.add(rank);
-      output.add(tupleFactory.newTuple(nodeData));
-    }
-
-    return output;
-  }
-
-  @Override
-  public void cleanup()
-  {
-    try
-    {
-      aborted = false;
-      this.graph.clear();
-    }
-    catch (IOException e)
-    { 
-      e.printStackTrace();
-    }
-  }
-
-  @Override
-  public DataBag exec(Tuple input) throws IOException
-  {
-    try
-    {
-      accumulate(input);
-      
-      return getValue();
-    }
-    finally
-    {
-      cleanup();
-    }
-  }
-
-  private ProgressIndicator getProgressIndicator()
-  {
-    return new ProgressIndicator()
-        {
-          @Override
-          public void progress()
-          {
-            reporter.progress();
-          }
-        };
-  }
-
-  @Override
-  public Schema outputSchema(Schema input)
-  {
-    try
-    {
-      Schema.FieldSchema inputFieldSchema = input.getField(0);
-
-      if (inputFieldSchema.type != DataType.BAG)
-      {
-        throw new RuntimeException("Expected a BAG as input");
-      }
-
-      Schema inputBagSchema = inputFieldSchema.schema;
-
-      if (inputBagSchema.getField(0).type != DataType.TUPLE)
-      {
-        throw new RuntimeException(String.format("Expected input bag to 
contain a TUPLE, but instead found %s",
-                                                 
DataType.findTypeName(inputBagSchema.getField(0).type)));
-      }
-      
-      Schema inputTupleSchema = inputBagSchema.getField(0).schema;
-      
-      if (inputTupleSchema.getField(0).type != DataType.INTEGER)
-      {
-        throw new RuntimeException(String.format("Expected source to be an 
INTEGER, but instead found %s",
-                                                 
DataType.findTypeName(inputTupleSchema.getField(0).type)));
-      }
-
-      if (inputTupleSchema.getField(1).type != DataType.BAG)
-      {
-        throw new RuntimeException(String.format("Expected edges to be 
represented with a BAG"));
-      }
-
-      Schema.FieldSchema edgesFieldSchema = inputTupleSchema.getField(1);
-
-      if (edgesFieldSchema.schema.getField(0).type != DataType.TUPLE)
-      {
-        throw new RuntimeException(String.format("Expected edges field to 
contain a TUPLE, but instead found %s",
-                                                 
DataType.findTypeName(edgesFieldSchema.schema.getField(0).type)));
-      }
-      
-      Schema edgesTupleSchema = edgesFieldSchema.schema.getField(0).schema;
-      
-      if (edgesTupleSchema.getField(0).type != DataType.INTEGER)
-      {
-        throw new RuntimeException(String.format("Expected destination edge ID 
to an INTEGER, but instead found %s",
-                                                 
DataType.findTypeName(edgesFieldSchema.schema.getField(0).type)));
-      }
-
-      if (edgesTupleSchema.getField(1).type != DataType.DOUBLE)
-      {
-        throw new RuntimeException(String.format("Expected destination edge 
weight to a DOUBLE, but instead found %s",
-                                                 
DataType.findTypeName(edgesFieldSchema.schema.getField(1).type)));
-      }
-
-      Schema tupleSchema = new Schema();
-      tupleSchema.add(new Schema.FieldSchema("node",DataType.INTEGER));
-      tupleSchema.add(new Schema.FieldSchema("rank",DataType.FLOAT));
-
-      return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass()
-                                                                 .getName()
-                                                                 
.toLowerCase(), input),
-                                               tupleSchema,
-                                               DataType.BAG));
-    }
-    catch (FrontendException e)
-    {
-      throw new RuntimeException(e);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/PigTests.java
----------------------------------------------------------------------
diff --git 
a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/PigTests.java
 
b/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/PigTests.java
deleted file mode 100644
index 8b11111..0000000
--- 
a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/PigTests.java
+++ /dev/null
@@ -1,211 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.bigtop.itest.datafu;
-
-import static org.junit.Assert.*;
-
-import java.io.BufferedInputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileWriter;
-import java.io.FilenameFilter;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.regex.Pattern;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.pigunit.PigTest;
-import org.apache.pig.tools.parameters.ParseException;
-
-public abstract class PigTests
-{    
-  protected String[] getDefaultArgs()
-  {
-    String[] args = {
-        "JAR_PATH=" + getJarPath()
-      };
-    return args;
-  }
-  
-  protected List<String> getDefaultArgsAsList()
-  {
-    String[] args = getDefaultArgs();
-    List<String> argsList = new ArrayList<String>(args.length);
-    for (String arg : args)
-    {
-      argsList.add(arg);
-    }
-    return argsList;
-  }
-  
-  protected PigTest createPigTest(String scriptPath, String... args) throws 
IOException
-  {
-    // append args to list of default args
-    List<String> theArgs = getDefaultArgsAsList();
-    for (String arg : args)
-    {
-      theArgs.add(arg);
-    }
-    
-    String[] lines = getLinesFromFile(scriptPath);
-    
-    for (String arg : theArgs)
-    {
-      String[] parts = arg.split("=",2);
-      if (parts.length == 2)
-      {
-        for (int i=0; i<lines.length; i++)
-        {
-          lines[i] = lines[i].replaceAll(Pattern.quote("$" + parts[0]), 
parts[1]);
-        }
-      }
-    }
-    
-    return new PigTest(lines);
-  }
-  
-  protected PigTest createPigTest(String scriptPath) throws IOException
-  {
-    return createPigTest(scriptPath, getDefaultArgs());
-  }
-  
-  protected String getJarPath()
-  {
-    String jarDir = "dist";
-    if (System.getProperty("datafu.jar.dir") != null)
-    {
-      jarDir = System.getProperty("datafu.jar.dir");
-    }
-    
-    String jarDirPath = new File(/* System.getProperty("user.dir"), */ 
jarDir).getAbsolutePath();
-            
-    File userDir = new File(jarDirPath);
-    
-    String[] files = userDir.list(new FilenameFilter() {
-
-      @Override
-      public boolean accept(File dir, String name)
-      {
-        return name.startsWith("datafu") && name.endsWith(".jar") && 
!name.contains("sources") && !name.contains("javadoc");
-      }
-      
-    });
-    
-    if (files.length == 0)
-    {
-      throw new RuntimeException("Could not find JAR file");
-    }
-    else if (files.length > 1)
-    {
-      throw new RuntimeException("Found more JAR files than expected");
-    }
-    
-    return  userDir.getAbsolutePath() + "/" + files[0];
-  }
-  
-  protected List<Tuple> getLinesForAlias(PigTest test, String alias) throws 
IOException, ParseException
-  {
-    return getLinesForAlias(test,alias,true);
-  }
-  
-  protected List<Tuple> getLinesForAlias(PigTest test, String alias, boolean 
logValues) throws IOException, ParseException
-  {
-    Iterator<Tuple> tuplesIterator = test.getAlias(alias);
-    List<Tuple> tuples = new ArrayList<Tuple>();
-    if (logValues)
-    {
-      System.out.println(String.format("Values for %s: ", alias));
-    }
-    while (tuplesIterator.hasNext())
-    {
-      Tuple tuple = tuplesIterator.next();
-      if (logValues)
-      {
-        System.out.println(tuple.toString());
-      }
-      tuples.add(tuple);
-    }
-    return tuples;
-  }
-    
-  protected void writeLinesToFile(String fileName, String... lines) throws 
IOException
-  {
-    File inputFile = deleteIfExists(getFile(fileName));
-    writeLinesToFile(inputFile, lines);
-  }
-  
-  protected void writeLinesToFile(File file, String[] lines) throws IOException
-  {
-    FileWriter writer = new FileWriter(file);
-    for (String line : lines)
-    {
-      writer.write(line + "\n");
-    }
-    writer.close();
-  }
-
-  protected void assertOutput(PigTest test, String alias, String... expected) 
throws IOException, ParseException
-  {
-    List<Tuple> tuples = getLinesForAlias(test, alias);
-    assertEquals(expected.length, tuples.size());
-    int i=0;
-    for (String e : expected)
-    {
-      assertEquals(e, tuples.get(i++).toString());
-    }
-  }
-  
-  protected File deleteIfExists(File file)
-  {
-    if (file.exists())
-    {
-      file.delete();
-    }
-    return file;
-  }
-  
-  protected File getFile(String fileName)
-  {
-    return new File(System.getProperty("user.dir"), 
fileName).getAbsoluteFile();
-  }
-  
-  /**
-   * Gets the lines from a given file.
-   * 
-   * @param relativeFilePath The path relative to the datafu-tests project.
-   * @return The lines from the file
-   * @throws IOException
-   */
-  protected String[] getLinesFromFile(String relativeFilePath) throws 
IOException
-  {
-    // assume that the working directory is the datafu-tests project
-    File file = new File(System.getProperty("user.dir"), 
relativeFilePath).getAbsoluteFile();
-    BufferedInputStream content = new BufferedInputStream(new 
FileInputStream(file));
-    Object[] lines = IOUtils.readLines(content).toArray();
-    String[] result = new String[lines.length];
-    for (int i=0; i<lines.length; i++)
-    {
-      result[i] = (String)lines[i];
-    }
-    return result;
-  }
-}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/bags/BagTests.java
----------------------------------------------------------------------
diff --git 
a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/bags/BagTests.java
 
b/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/bags/BagTests.java
deleted file mode 100644
index 8e72846..0000000
--- 
a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/bags/BagTests.java
+++ /dev/null
@@ -1,308 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.bigtop.itest.datafu.bags;
-
-import org.apache.pig.pigunit.PigTest;
-import org.junit.Test;
-
-import org.apache.bigtop.itest.datafu.PigTests;
-
-
-public class BagTests extends PigTests
-{
-  @Test
-  public void nullToEmptyBagTest() throws Exception
-  {
-    PigTest test = createPigTest("datafu/bags/nullToEmptyBagTest.pig");
-            
-    writeLinesToFile("input", 
-                     "({(1),(2),(3),(4),(5)})",
-                     "()",
-                     "{(4),(5)})");
-            
-    test.runScript();
-        
-    assertOutput(test, "data2",
-                 "({(1),(2),(3),(4),(5)})",
-                 "({})",
-                 "({(4),(5)})");
-  }
-  
-  @Test
-  public void appendToBagTest() throws Exception
-  {
-    PigTest test = createPigTest("datafu/bags/appendToBagTest.pig");
-    
-    writeLinesToFile("input", 
-                     "1\t{(1),(2),(3)}\t(4)",
-                     "2\t{(10),(20),(30),(40),(50)}\t(60)");
-                  
-    test.runScript();
-            
-    assertOutput(test, "data2",
-                 "(1,{(1),(2),(3),(4)})",
-                 "(2,{(10),(20),(30),(40),(50),(60)})");
-  }
-
-   @Test
-  public void firstTupleFromBagTest() throws Exception
-  {
-    PigTest test = createPigTest("datafu/bags/firstTupleFromBagTest.pig");
-
-    writeLinesToFile("input", "1\t{(4),(9),(16)}");
-
-    test.runScript();
-
-    assertOutput(test, "data2", "(1,(4))");
-  }
-
-  
-  @Test
-  public void prependToBagTest() throws Exception
-  {
-    PigTest test = createPigTest("datafu/bags/prependToBagTest.pig");
-    
-    writeLinesToFile("input", 
-                     "1\t{(1),(2),(3)}\t(4)",
-                     "2\t{(10),(20),(30),(40),(50)}\t(60)");
-                  
-    test.runScript();
-            
-    assertOutput(test, "data2",
-                 "(1,{(4),(1),(2),(3)})",
-                 "(2,{(60),(10),(20),(30),(40),(50)})");
-  }
-  
-  @Test
-  public void bagConcatTest() throws Exception
-  {
-    PigTest test = createPigTest("datafu/bags/bagConcatTest.pig");
-
-    writeLinesToFile("input", 
-                     "({(1),(2),(3)}\t{(3),(5),(6)}\t{(10),(13)})",
-                     "({(2),(3),(4)}\t{(5),(5)}\t{(20)})");
-                  
-    test.runScript();
-            
-    assertOutput(test, "data2",
-                 "({(1),(2),(3),(3),(5),(6),(10),(13)})",
-                 "({(2),(3),(4),(5),(5),(20)})");
-  }
-  
-  @Test
-  public void unorderedPairsTest() throws Exception
-  {
-    PigTest test = createPigTest("datafu/bags/unorderedPairsTests.pig");
-    
-    String[] input = {
-      "{(1),(2),(3),(4),(5)}"
-    };
-    
-    String[] output = {
-        "(1,2)",
-        "(1,3)",
-        "(1,4)",
-        "(1,5)",
-        "(2,3)",
-        "(2,4)",
-        "(2,5)",
-        "(3,4)",
-        "(3,5)",
-        "(4,5)"
-      };
-    
-    test.assertOutput("data",input,"data4",output);
-  }
-  
-  @Test
-  public void unorderedPairsTest2() throws Exception
-  {
-    PigTest test = createPigTest("datafu/bags/unorderedPairsTests2.pig");
-        
-    this.writeLinesToFile("input", "1\t{(1),(2),(3),(4),(5)}");
-    
-    String[] output = {
-        "(1,2)",
-        "(1,3)",
-        "(1,4)",
-        "(1,5)",
-        "(2,3)",
-        "(2,4)",
-        "(2,5)",
-        "(3,4)",
-        "(3,5)",
-        "(4,5)"
-      };
-    
-    test.runScript();
-    this.getLinesForAlias(test, "data3");
-    
-    this.assertOutput(test, "data3",
-                      "(1,(1),(2))",
-                      "(1,(1),(3))",
-                      "(1,(1),(4))",
-                      "(1,(1),(5))",
-                      "(1,(2),(3))",
-                      "(1,(2),(4))",
-                      "(1,(2),(5))",
-                      "(1,(3),(4))",
-                      "(1,(3),(5))",
-                      "(1,(4),(5))");    
-  }
- 
-  @Test
-  public void bagSplitTest() throws Exception
-  {
-    PigTest test = createPigTest("datafu/bags/bagSplitTest.pig",
-                                 "MAX=5");
-    
-    writeLinesToFile("input", 
-                     
"{(1,11),(2,22),(3,33),(4,44),(5,55),(6,66),(7,77),(8,88),(9,99),(10,1010),(11,1111),(12,1212)}");
-    
-    test.runScript();
-    
-    assertOutput(test, "data3",
-                 "({(1,11),(2,22),(3,33),(4,44),(5,55)})",
-                 "({(6,66),(7,77),(8,88),(9,99),(10,1010)})",
-                 "({(11,1111),(12,1212)})");
-  }
-  
-  @Test
-  public void bagSplitWithBagNumTest() throws Exception
-  {
-    PigTest test = createPigTest("datafu/bags/bagSplitWithBagNumTest.pig",
-                                 "MAX=10");
-    
-    writeLinesToFile("input", 
-                     
"{(1,11),(2,22),(3,33),(4,44),(5,55),(6,66),(7,77),(8,88),(9,99),(10,1010),(11,1111),(12,1212)}");
-    
-    test.runScript();
-    
-    assertOutput(test, "data3",
-                 
"({(1,11),(2,22),(3,33),(4,44),(5,55),(6,66),(7,77),(8,88),(9,99),(10,1010)},0)",
-                 "({(11,1111),(12,1212)},1)");
-  }
-  
-  @Test
-  public void enumerateWithReverseTest() throws Exception
-  {
-    PigTest test = createPigTest("datafu/bags/enumerateWithReverseTest.pig");
-       
-    writeLinesToFile("input", 
-                     
"({(10,{(1),(2),(3)}),(20,{(4),(5),(6)}),(30,{(7),(8)}),(40,{(9),(10),(11)}),(50,{(12),(13),(14),(15)})})");
-    
-    test.runScript();
-    
-    assertOutput(test, "data4",
-                 "(10,{(1),(2),(3)},5)",
-                 "(20,{(4),(5),(6)},4)",
-                 "(30,{(7),(8)},3)",
-                 "(40,{(9),(10),(11)},2)",
-                 "(50,{(12),(13),(14),(15)},1)");
-  }
-  
-  @Test
-  public void enumerateWithStartTest() throws Exception
-  {
-    PigTest test = createPigTest("datafu/bags/enumerateWithStartTest.pig");
-       
-    writeLinesToFile("input", 
-                     
"({(10,{(1),(2),(3)}),(20,{(4),(5),(6)}),(30,{(7),(8)}),(40,{(9),(10),(11)}),(50,{(12),(13),(14),(15)})})");
-    
-    test.runScript();
-    
-    assertOutput(test, "data4",
-                 "(10,{(1),(2),(3)},1)",
-                 "(20,{(4),(5),(6)},2)",
-                 "(30,{(7),(8)},3)",
-                 "(40,{(9),(10),(11)},4)",
-                 "(50,{(12),(13),(14),(15)},5)");
-  }
-  
-  @Test
-  public void enumerateTest() throws Exception
-  {
-    PigTest test = createPigTest("datafu/bags/enumerateTest.pig");
-       
-    writeLinesToFile("input",
-                     
"({(10,{(1),(2),(3)}),(20,{(4),(5),(6)}),(30,{(7),(8)}),(40,{(9),(10),(11)}),(50,{(12),(13),(14),(15)})})");
-    
-    test.runScript();
-    
-    assertOutput(test, "data4",
-                 "(10,{(1),(2),(3)},0)",
-                 "(20,{(4),(5),(6)},1)",
-                 "(30,{(7),(8)},2)",
-                 "(40,{(9),(10),(11)},3)",
-                 "(50,{(12),(13),(14),(15)},4)");
-  }
-  
-  @Test
-  public void comprehensiveBagSplitAndEnumerate() throws Exception
-  {
-    PigTest test = 
createPigTest("datafu/bags/comprehensiveBagSplitAndEnumerate.pig");
-    
-    writeLinesToFile("input",
-                     "({(A,1.0),(B,2.0),(C,3.0),(D,4.0),(E,5.0)})");
-    
-    test.runScript();
-    
-    assertOutput(test, "data_out",
-                 // bag #1
-                 "(A,1.0,1)",
-                 "(B,2.0,1)",
-                 "(C,3.0,1)",
-                 // bag #2
-                 "(D,4.0,2)",
-                 "(E,5.0,2)");
-  }
-  
-  @Test
-  public void aliasBagFieldsTest() throws Exception
-  {
-    PigTest test = createPigTest("datafu/bags/aliasBagFieldsTest.pig");
-    
-    writeLinesToFile("input",
-                     "({(A,1,0),(B,2,0),(C,3,0),(D,4,0),(E,5,0)})");
-    
-    test.runScript();
-    
-    assertOutput(test, "data4",
-                 "(A,1)",
-                 "(B,2)",
-                 "(C,3)",
-                 "(D,4)",
-                 "(E,5)");
-  }
-
-  @Test
-  public void distinctByTest() throws Exception
-  {
-    PigTest test = createPigTest("datafu/bags/distinctByTest.pig");
-    
-    writeLinesToFile("input",
-                     
"({(Z,1,0),(A,1,0),(A,1,0),(B,2,0),(B,22,1),(C,3,0),(D,4,0),(E,5,0)})");
-    
-    test.runScript();
-    
-    assertOutput(test, "data2",
-                 "({(Z,1,0),(A,1,0),(B,2,0),(C,3,0),(D,4,0),(E,5,0)})");
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/bags/sets/SetTests.java
----------------------------------------------------------------------
diff --git 
a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/bags/sets/SetTests.java
 
b/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/bags/sets/SetTests.java
deleted file mode 100644
index 938ef3a..0000000
--- 
a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/bags/sets/SetTests.java
+++ /dev/null
@@ -1,74 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.bigtop.itest.datafu.bags.sets;
-
-import org.apache.pig.pigunit.PigTest;
-import org.junit.Test;
-
-import org.apache.bigtop.itest.datafu.PigTests;
-
-public class SetTests extends PigTests
-{
-  @Test
-  public void setIntersectTest() throws Exception
-  {
-    PigTest test = createPigTest("datafu/bags/sets/setIntersectTest.pig");
-    
-    String[] input = {
-      
"{(1,10),(2,20),(3,30),(4,40),(5,50),(6,60)}\t{(0,0),(2,20),(4,40),(8,80)}",
-      "{(1,10),(1,10),(2,20),(3,30),(3,30),(4,40),(4,40)}\t{(1,10),(3,30)}"
-    };
-    
-    String[] output = {
-        "({(2,20),(4,40)})",
-        "({(1,10),(3,30)})"
-      };
-    
-    test.assertOutput("data",input,"data2",output);
-  }
-  
-  @Test
-  public void setIntersectOutOfOrderTest() throws Exception
-  {
-    PigTest test = createPigTest("datafu/bags/sets/setIntersectTest.pig");
-    
-    this.writeLinesToFile("input", 
-                          
"{(1,10),(3,30),(2,20),(4,40),(5,50),(6,60)}\t{(0,0),(2,20),(4,40),(8,80)}");
-        
-    test.runScript();
-    
-    this.getLinesForAlias(test, "data2");
-  }
-  
-  @Test
-  public void setUnionTest() throws Exception
-  {
-    PigTest test = createPigTest("datafu/bags/sets/setUnionTest.pig");
-    
-    String[] input = {
-        
"{(1,10),(1,20),(1,30),(1,40),(1,50),(1,60),(1,80)}\t{(1,1),(1,20),(1,25),(1,25),(1,25),(1,40),(1,70),(1,80)}"
-    };
-    
-    String[] output = {
-        
"({(1,10),(1,20),(1,30),(1,40),(1,50),(1,60),(1,80),(1,1),(1,25),(1,70)})"
-      };
-    
-    test.assertOutput("data",input,"data2",output);
-  }
-}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/date/TimeTests.java
----------------------------------------------------------------------
diff --git 
a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/date/TimeTests.java
 
b/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/date/TimeTests.java
deleted file mode 100644
index 87fab7b..0000000
--- 
a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/date/TimeTests.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.bigtop.itest.datafu.date;
-
-import org.apache.pig.pigunit.PigTest;
-import org.junit.Test;
-
-import org.apache.bigtop.itest.datafu.PigTests;
-
-public class TimeTests extends PigTests
-{  
-  @Test
-  public void timeCountPageViewsTest() throws Exception
-  {
-    PigTest test = createPigTest("datafu/date/timeCountPageViewsTest.pig",
-                                 "TIME_WINDOW=30m",
-                                 "JAR_PATH=" + getJarPath());
-        
-    String[] input = {
-      "1\t100\t2010-01-01T01:00:00Z",
-      "1\t100\t2010-01-01T01:15:00Z",
-      "1\t100\t2010-01-01T01:31:00Z",
-      "1\t100\t2010-01-01T01:35:00Z",
-      "1\t100\t2010-01-01T02:30:00Z",
-
-      "1\t101\t2010-01-01T01:00:00Z",
-      "1\t101\t2010-01-01T01:31:00Z",
-      "1\t101\t2010-01-01T02:10:00Z",
-      "1\t101\t2010-01-01T02:40:30Z",
-      "1\t101\t2010-01-01T03:30:00Z",      
-
-      "1\t102\t2010-01-01T01:00:00Z",
-      "1\t102\t2010-01-01T01:01:00Z",
-      "1\t102\t2010-01-01T01:02:00Z",
-      "1\t102\t2010-01-01T01:10:00Z",
-      "1\t102\t2010-01-01T01:15:00Z",
-      "1\t102\t2010-01-01T01:25:00Z",
-      "1\t102\t2010-01-01T01:30:00Z"
-    };
-    
-    String[] output = {
-        "(1,100,2)",
-        "(1,101,5)",
-        "(1,102,1)"
-      };
-    
-    test.assertOutput("views",input,"view_counts",output);
-  }
-}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/geo/GeoTests.java
----------------------------------------------------------------------
diff --git 
a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/geo/GeoTests.java
 
b/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/geo/GeoTests.java
deleted file mode 100644
index 12d9f97..0000000
--- 
a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/geo/GeoTests.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.bigtop.itest.datafu.geo;
-
-import static org.junit.Assert.*;
-
-import java.util.List;
-
-import org.apache.pig.data.Tuple;
-import org.apache.pig.pigunit.PigTest;
-import org.junit.Test;
-
-import org.apache.bigtop.itest.datafu.PigTests;
-
-public class GeoTests extends PigTests
-{
-  @Test
-  public void haversineTest() throws Exception
-  {    
-    PigTest test = createPigTest("datafu/geo/haversineTest.pig");
-    
-    // Approximate latitude and longitude for major cities from maps.google.com
-    double[] la = {34.040143,-118.243103};
-    double[] tokyo = {35.637209,139.65271};
-    double[] ny = {40.716038,-73.99498};
-    double[] paris = {48.857713,2.342491};
-    double[] sydney = {-33.872696,151.195221};
-        
-    this.writeLinesToFile("input", 
-                          coords(la,tokyo),
-                          coords(ny,tokyo),
-                          coords(ny,sydney),
-                          coords(ny,paris));
-    
-    test.runScript();
-    
-    List<Tuple> distances = this.getLinesForAlias(test, "data2");
-    
-    // ensure distance is within 20 miles of expected (distances found online)
-    assertWithin(5478.0, distances.get(0), 20.0); // la <-> tokyo
-    assertWithin(6760.0, distances.get(1), 20.0); // ny <-> tokyo
-    assertWithin(9935.0, distances.get(2), 20.0); // ny <-> sydney
-    assertWithin(3635.0, distances.get(3), 20.0); // ny <-> paris
-    
-  }
-  
-  private void assertWithin(double expected, Tuple actual, double maxDiff) 
throws Exception
-  {
-    Double actualVal = (Double)actual.get(0);
-    assertTrue(Math.abs(expected-actualVal) < maxDiff);
-  }
-  
-  private String coords(double[] coords1, double[] coords2)
-  {
-    assertTrue(coords1.length == 2);
-    assertTrue(coords2.length == 2);
-    return String.format("%f\t%f\t%f\t%f", coords1[0], coords1[1], coords2[0], 
coords2[1]);
-  }
-}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/hash/HashTests.java
----------------------------------------------------------------------
diff --git 
a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/hash/HashTests.java
 
b/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/hash/HashTests.java
deleted file mode 100644
index e900c15..0000000
--- 
a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/hash/HashTests.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.bigtop.itest.datafu.hash;
-
-import org.apache.pig.pigunit.PigTest;
-import org.junit.Test;
-
-import org.apache.bigtop.itest.datafu.PigTests;
-
-public class HashTests  extends PigTests
-{
-  @Test
-  public void md5Test() throws Exception
-  {
-    PigTest test = createPigTest("datafu/hash/md5Test.pig");
-    
-    writeLinesToFile("input", 
-                     "ladsljkasdglk",
-                     "lkadsljasgjskdjks",
-                     "aladlasdgjks");
-            
-    test.runScript();
-        
-    assertOutput(test, "data_out",
-                 "(d9a82575758bb4978949dc0659205cc6)",
-                 "(9ec37f02fae0d8d6a7f4453a62272f1f)",
-                 "(cb94139a8b9f3243e68a898ec6bd9b3d)");
-  }
-  
-  @Test
-  public void md5Base64Test() throws Exception
-  {
-    PigTest test = createPigTest("datafu/hash/md5Base64Test.pig");
-    
-    writeLinesToFile("input", 
-                     "ladsljkasdglk",
-                     "lkadsljasgjskdjks",
-                     "aladlasdgjks");
-            
-    test.runScript();
-        
-    assertOutput(test, "data_out",
-                 "(2agldXWLtJeJSdwGWSBcxg==)",
-                 "(nsN/Avrg2Nan9EU6YicvHw==)",
-                 "(y5QTmoufMkPmiomOxr2bPQ==)");
-  }
-}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/linkanalysis/PageRankTest.java
----------------------------------------------------------------------
diff --git 
a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/linkanalysis/PageRankTest.java
 
b/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/linkanalysis/PageRankTest.java
deleted file mode 100644
index 407815c..0000000
--- 
a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/linkanalysis/PageRankTest.java
+++ /dev/null
@@ -1,299 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.bigtop.itest.datafu.linkanalysis;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.junit.Test;
-
-public class PageRankTest
-{
-  @Test
-  public void wikipediaGraphInMemoryTest() throws Exception {
-    System.out.println();
-    System.out.println("Starting wikipediaGraphInMemoryTest");
-    
-    datafu.linkanalysis.PageRank graph = new datafu.linkanalysis.PageRank();
-   
-    String[] edges = getWikiExampleEdges();
-    
-    Map<String,Integer> nodeIdsMap = loadGraphFromEdgeList(graph, edges);
-    
-    // Without dangling node handling we will not get the true page rank since 
the total rank will
-    // not add to 1.0.  Without dangling node handling some of the page rank 
drains out of the graph.
-    graph.enableDanglingNodeHandling();
-    
-    performIterations(graph, 150, 1e-18f);
-    
-    String[] expectedRanks = getWikiExampleExpectedRanks();
-    
-    Map<String,Float> expectedRanksMap = parseExpectedRanks(expectedRanks);
-    
-    validateExpectedRanks(graph, nodeIdsMap, expectedRanksMap);
-  }
-  
-  @Test
-  public void wikipediaGraphDiskCacheTest() throws Exception {
-    System.out.println();
-    System.out.println("Starting wikipediaGraphDiskCacheTest");
-    
-    datafu.linkanalysis.PageRank graph = new datafu.linkanalysis.PageRank();
-    
-    String[] edges = getWikiExampleEdges();
-    
-    graph.enableEdgeDiskCaching();
-    graph.setEdgeCachingThreshold(5);
-    
-    Map<String,Integer> nodeIdsMap = loadGraphFromEdgeList(graph, edges);
-    
-    assert graph.isUsingEdgeDiskCache() : "Expected disk cache to be used";
-    
-    // Without dangling node handling we will not get the true page rank since 
the total rank will
-    // not add to 1.0.  Without dangling node handling some of the page rank 
drains out of the graph.
-    graph.enableDanglingNodeHandling();
-    
-    performIterations(graph, 150, 1e-18f);
-    
-    String[] expectedRanks = getWikiExampleExpectedRanks();
-    
-    Map<String,Float> expectedRanksMap = parseExpectedRanks(expectedRanks);
-    
-    validateExpectedRanks(graph, nodeIdsMap, expectedRanksMap);
-  }
-  
-  @Test
-  public void hubAndSpokeInMemoryTest() throws Exception {
-    System.out.println();
-    System.out.println("Starting hubAndSpokeInMemoryTest");
-    
-    datafu.linkanalysis.PageRank graph = new datafu.linkanalysis.PageRank();
-   
-    String[] edges = getHubAndSpokeEdges();
-    
-    Map<String,Integer> nodeIdsMap = loadGraphFromEdgeList(graph, edges);
-    
-    graph.enableDanglingNodeHandling();
-    
-    performIterations(graph, 150, 1e-18f);
-    
-    // no need to validate, this is just a perf test for runtime comparison
-  }
-  
-  @Test
-  public void hubAndSpokeDiskCacheTest() throws Exception {
-    System.out.println();
-    System.out.println("Starting hubAndSpokeDiskCacheTest");
-    
-    datafu.linkanalysis.PageRank graph = new datafu.linkanalysis.PageRank();
-   
-    String[] edges = getHubAndSpokeEdges();
-    
-    graph.enableEdgeDiskCaching();
-    graph.setEdgeCachingThreshold(5);
-    
-    Map<String,Integer> nodeIdsMap = loadGraphFromEdgeList(graph, edges);
-    
-    graph.enableDanglingNodeHandling();
-    
-    performIterations(graph, 150, 1e-18f);
-    
-    // no need to validate, this is just a perf test for runtime comparison
-  }
-  
-  private String[] getHubAndSpokeEdges()
-  {
-    int count = 50000;
-    String[] edges = new String[count];
-    
-    for (int i=0; i<count; i++)
-    {
-      edges[i] = String.format("S%d H", i);
-    }
-    return edges;
-  }
-  
-  public static String[] getWikiExampleEdges()
-  {
-    // graph taken from:
-    // http://en.wikipedia.org/wiki/PageRank
-    String[] edges = {
-        "B C",
-        "C B",
-        "D A",
-        "D B",
-        "E D",
-        "E B",
-        "E F",
-        "F E",
-        "F B",
-        "P1 B",
-        "P1 E",
-        "P2 B",
-        "P2 E",
-        "P3 B",
-        "P3 E",
-        "P4 E",
-        "P5 E"
-      };
-    return edges;
-  }
-  
-  public static String[] getWikiExampleExpectedRanks()
-  {
-    // these ranks come from the Wikipedia page:
-    // http://en.wikipedia.org/wiki/PageRank
-    String[] expectedRanks = {
-        "A 3.3",
-        "B 38.4",
-        "C 34.3",
-        "D 3.9",
-        "E 8.1",
-        "F 3.9",
-        "P1 1.6",
-        "P2 1.6",
-        "P3 1.6",
-        "P4 1.6",
-        "P5 1.6"      
-      };
-    return expectedRanks;
-  }
-  
-  private Map<String,Integer> 
loadGraphFromEdgeList(datafu.linkanalysis.PageRank graph, String[] edges) 
throws IOException
-  {
-    Map<Integer,ArrayList<Map<String,Object>>> nodeEdgesMap = new 
HashMap<Integer,ArrayList<Map<String,Object>>>();
-    Map<String,Integer> nodeIdsMap = new HashMap<String,Integer>();
-    
-    for (String edge : edges)
-    {
-      String[] parts = edge.split(" ");
-      assert parts.length == 2 : "Expected two parts";
-      
-      int sourceId = getOrCreateId(parts[0], nodeIdsMap);
-      int destId = getOrCreateId(parts[1], nodeIdsMap);
-      
-      Map<String,Object> edgeMap = new HashMap<String,Object>();
-      edgeMap.put("weight", 1.0);
-      edgeMap.put("dest", destId);
-      
-      ArrayList<Map<String,Object>> nodeEdges = null;
-      
-      if (nodeEdgesMap.containsKey(sourceId))
-      {
-        nodeEdges = nodeEdgesMap.get(sourceId);
-      }
-      else
-      {
-        nodeEdges = new ArrayList<Map<String,Object>>();
-        nodeEdgesMap.put(sourceId, nodeEdges);
-      }
-      
-      nodeEdges.add(edgeMap);
-    }
-    
-    for (Map.Entry<Integer, ArrayList<Map<String,Object>>> e : 
nodeEdgesMap.entrySet())
-    {
-      graph.addEdges(e.getKey(), e.getValue());
-    }
-    
-    return nodeIdsMap;
-  }
-  
-  private void performIterations(datafu.linkanalysis.PageRank graph, int 
maxIters, float tolerance) throws IOException
-  {
-    System.out.println(String.format("Beginning iteration (maxIters = %d, 
tolerance=%e)", maxIters, tolerance));
-    
-    datafu.linkanalysis.PageRank.ProgressIndicator progressIndicator = 
getDummyProgressIndicator();
-    
-    System.out.println("Initializing graph");
-    long startTime = System.nanoTime();
-    graph.init(progressIndicator);
-    System.out.println(String.format("Done, took %f ms", (System.nanoTime() - 
startTime)/10.0e6));
-    
-    float totalDiff;
-    int iter = 0;
-    
-    System.out.println("Beginning iterations");
-    startTime = System.nanoTime();
-    do 
-    {
-      totalDiff = graph.nextIteration(progressIndicator);
-      iter++;      
-    } while(iter < maxIters && totalDiff > tolerance);
-    System.out.println(String.format("Done, took %f ms", (System.nanoTime() - 
startTime)/10.0e6));
-  }
-  
-  private datafu.linkanalysis.PageRank.ProgressIndicator 
getDummyProgressIndicator()
-  {
-    return new datafu.linkanalysis.PageRank.ProgressIndicator()
-    {
-      @Override
-      public void progress()
-      {
-        // do nothing
-      }     
-    };
-  }
-  
-  private void validateExpectedRanks(datafu.linkanalysis.PageRank graph, 
Map<String,Integer> nodeIds, Map<String,Float> expectedRanks)
-  {
-    System.out.println("Validating page rank results");
-    
-    for (Map.Entry<String,Integer> e : nodeIds.entrySet())
-    {
-      float rank = graph.getNodeRank(e.getValue());
-      
-      float expectedRank = expectedRanks.get(e.getKey());
-      // require 0.1% accuracy
-      assert (Math.abs(expectedRank - rank*100.0f) < 0.1) : String.format("Did 
not get expected rank for %s", e.getKey());      
-    }
-    
-    System.out.println("All ranks match expected");
-  }
-  
-  public static Map<String,Float> parseExpectedRanks(String[] expectedRanks)
-  {
-    Map<String,Float> expectedRanksMap = new HashMap<String,Float>();
-    for (String expectedRankString : expectedRanks)
-    {
-      String[] parts = expectedRankString.split(" ");
-      assert parts.length == 2 : "Expected two parts";
-      String name = parts[0];
-      Float expectedRank = Float.parseFloat(parts[1]);
-      expectedRanksMap.put(name, expectedRank);
-    }
-    return expectedRanksMap;
-  }
-
-  private Integer getOrCreateId(String name, Map<String,Integer> nodeIds)
-  {
-    if (nodeIds.containsKey(name))
-    {
-      return nodeIds.get(name);
-    }
-    else
-    {
-      Integer id = nodeIds.size();
-      nodeIds.put(name, id);
-      return id;
-    }
-  }
-}

Reply via email to