http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trix/TriXOutputTest.java
----------------------------------------------------------------------
diff --git 
a/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trix/TriXOutputTest.java
 
b/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trix/TriXOutputTest.java
deleted file mode 100644
index 9b6e307..0000000
--- 
a/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trix/TriXOutputTest.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- *     
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io.output.trix;
-
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.OutputFormat;
-import org.apache.jena.hadoop.rdf.io.output.AbstractQuadOutputFormatTests;
-import org.apache.jena.hadoop.rdf.types.QuadWritable;
-import org.apache.jena.riot.Lang;
-
-/**
- * Tests for TriX output format
- */
-public class TriXOutputTest extends AbstractQuadOutputFormatTests {
-
-    @Override
-    protected String getFileExtension() {
-        return ".trix";
-    }
-
-    @Override
-    protected Lang getRdfLanguage() {
-        return Lang.TRIX;
-    }
-
-    @Override
-    protected OutputFormat<NullWritable, QuadWritable> getOutputFormat() {
-        return new TriXOutputFormat<NullWritable>();
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/BatchedTurtleOutputTest.java
----------------------------------------------------------------------
diff --git 
a/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/BatchedTurtleOutputTest.java
 
b/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/BatchedTurtleOutputTest.java
deleted file mode 100644
index a6c4d70..0000000
--- 
a/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/BatchedTurtleOutputTest.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- *     
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io.output.turtle;
-
-import java.util.Arrays;
-import java.util.Collection;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.OutputFormat;
-import org.apache.jena.hadoop.rdf.io.RdfIOConstants;
-import org.apache.jena.hadoop.rdf.io.output.AbstractTripleOutputFormatTests;
-import org.apache.jena.hadoop.rdf.types.TripleWritable;
-import org.apache.jena.riot.Lang;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
-
-
-/**
- * Tests for Turtle output
- * 
- * 
- * 
- */
-@RunWith(Parameterized.class)
-public class BatchedTurtleOutputTest extends AbstractTripleOutputFormatTests {
-
-    static long $bs1 = RdfIOConstants.DEFAULT_OUTPUT_BATCH_SIZE;
-    static long $bs2 = 1000;
-    static long $bs3 = 100;
-    static long $bs4 = 1;
-
-    /**
-     * @return Test parameters
-     */
-    @Parameters
-    public static Collection<Object[]> data() {
-        return Arrays.asList(new Object[][] { { $bs1 }, { $bs2 }, { $bs3 }, { 
$bs4 } });
-    }
-
-    private final long batchSize;
-
-    /**
-     * Creates new tests
-     * 
-     * @param batchSize
-     *            Batch size
-     */
-    public BatchedTurtleOutputTest(long batchSize) {
-        this.batchSize = batchSize;
-    }
-
-    @Override
-    protected String getFileExtension() {
-        return ".ttl";
-    }
-
-    @Override
-    protected Lang getRdfLanguage() {
-        return Lang.TURTLE;
-    }
-    
-    @Override
-    protected Configuration prepareConfiguration() {
-        Configuration config = super.prepareConfiguration();
-        config.setLong(RdfIOConstants.OUTPUT_BATCH_SIZE, this.batchSize);
-        return config;
-    }
-
-    @Override
-    protected OutputFormat<NullWritable, TripleWritable> getOutputFormat() {
-        return new BatchedTurtleOutputFormat<NullWritable>();
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/StreamedTurtleOutputTest.java
----------------------------------------------------------------------
diff --git 
a/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/StreamedTurtleOutputTest.java
 
b/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/StreamedTurtleOutputTest.java
deleted file mode 100644
index d8843d3..0000000
--- 
a/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/StreamedTurtleOutputTest.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- *     
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io.output.turtle;
-
-import java.util.Arrays;
-import java.util.Collection;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.OutputFormat;
-import org.apache.jena.hadoop.rdf.io.RdfIOConstants;
-import org.apache.jena.hadoop.rdf.io.output.AbstractTripleOutputFormatTests;
-import org.apache.jena.hadoop.rdf.types.TripleWritable;
-import org.apache.jena.riot.Lang;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
-
-
-/**
- * Tests for Turtle output
- * 
- * 
- * 
- */
-@RunWith(Parameterized.class)
-public class StreamedTurtleOutputTest extends AbstractTripleOutputFormatTests {
-
-    static long $bs1 = RdfIOConstants.DEFAULT_OUTPUT_BATCH_SIZE;
-    static long $bs2 = 1000;
-    static long $bs3 = 100;
-    static long $bs4 = 1;
-
-    /**
-     * @return Test parameters
-     */
-    @Parameters
-    public static Collection<Object[]> data() {
-        return Arrays.asList(new Object[][] { { $bs1 }, { $bs2 }, { $bs3 }, { 
$bs4 } });
-    }
-
-    private final long batchSize;
-
-    /**
-     * Creates new tests
-     * 
-     * @param batchSize
-     *            Batch size
-     */
-    public StreamedTurtleOutputTest(long batchSize) {
-        this.batchSize = batchSize;
-    }
-
-    @Override
-    protected String getFileExtension() {
-        return ".ttl";
-    }
-
-    @Override
-    protected Lang getRdfLanguage() {
-        return Lang.TURTLE;
-    }
-    
-    @Override
-    protected Configuration prepareConfiguration() {
-        Configuration config = super.prepareConfiguration();
-        config.setLong(RdfIOConstants.OUTPUT_BATCH_SIZE, this.batchSize);
-        return config;
-    }
-
-    @Override
-    protected OutputFormat<NullWritable, TripleWritable> getOutputFormat() {
-        return new TurtleOutputFormat<NullWritable>();
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/TurtleBlankNodeOutputTests.java
----------------------------------------------------------------------
diff --git 
a/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/TurtleBlankNodeOutputTests.java
 
b/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/TurtleBlankNodeOutputTests.java
deleted file mode 100644
index 8dcae4e..0000000
--- 
a/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/TurtleBlankNodeOutputTests.java
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- *     
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io.output.turtle;
-
-import java.io.File;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.OutputFormat;
-import org.apache.jena.hadoop.rdf.io.RdfIOConstants;
-import org.apache.jena.hadoop.rdf.types.TripleWritable;
-import org.apache.jena.riot.RDFDataMgr;
-import org.junit.Assert;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
-
-import com.hp.hpl.jena.datatypes.xsd.XSDDatatype;
-import com.hp.hpl.jena.graph.Node;
-import com.hp.hpl.jena.graph.NodeFactory;
-import com.hp.hpl.jena.graph.Triple;
-import com.hp.hpl.jena.rdf.model.Model;
-import com.hp.hpl.jena.rdf.model.ResIterator;
-import com.hp.hpl.jena.rdf.model.Resource;
-
-/**
- * Tests for Turtle output with blank nodes
- * 
- * 
- * 
- */
-@RunWith(Parameterized.class)
-public class TurtleBlankNodeOutputTests extends StreamedTurtleOutputTest {
-
-       static long $bs1 = RdfIOConstants.DEFAULT_OUTPUT_BATCH_SIZE;
-       static long $bs2 = 1000;
-       static long $bs3 = 100;
-       static long $bs4 = 1;
-
-       /**
-        * @return Test parameters
-        */
-       @Parameters
-       public static Collection<Object[]> data() {
-               return Arrays.asList(new Object[][] { { $bs1 }, { $bs2 }, { 
$bs3 },
-                               { $bs4 } });
-       }
-
-       /**
-        * Creates new tests
-        * 
-        * @param batchSize
-        *            Batch size
-        */
-       public TurtleBlankNodeOutputTests(long batchSize) {
-               super(batchSize);
-       }
-
-       @Override
-       protected Iterator<TripleWritable> generateTuples(int num) {
-               List<TripleWritable> ts = new ArrayList<TripleWritable>();
-               Node subject = NodeFactory.createAnon();
-               for (int i = 0; i < num; i++) {
-                       Triple t = new Triple(subject,
-                                       
NodeFactory.createURI("http://example.org/predicate";),
-                                       
NodeFactory.createLiteral(Integer.toString(i),
-                                                       
XSDDatatype.XSDinteger));
-                       ts.add(new TripleWritable(t));
-               }
-               return ts.iterator();
-       }
-
-       @Override
-       protected void checkTuples(File f, long expected) {
-               super.checkTuples(f, expected);
-
-               Model m = RDFDataMgr.loadModel("file://" + f.getAbsolutePath(),
-                               this.getRdfLanguage());
-               ResIterator iter = m.listSubjects();
-               Set<Node> subjects = new HashSet<Node>();
-               while (iter.hasNext()) {
-                       Resource res = iter.next();
-                       Assert.assertTrue(res.isAnon());
-                       subjects.add(res.asNode());
-               }
-               // Should only be one subject unless the data was empty in 
which case
-               // there will be zero subjects
-               Assert.assertEquals(expected == 0 ? 0 : 1, subjects.size());
-       }
-
-       @Override
-       protected OutputFormat<NullWritable, TripleWritable> getOutputFormat() {
-               return new TurtleOutputFormat<NullWritable>();
-       }
-
-}

http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/registry/TestHadoopRdfIORegistry.java
----------------------------------------------------------------------
diff --git 
a/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/registry/TestHadoopRdfIORegistry.java
 
b/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/registry/TestHadoopRdfIORegistry.java
deleted file mode 100644
index 2eae232..0000000
--- 
a/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/registry/TestHadoopRdfIORegistry.java
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- *     
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.jena.hadoop.rdf.io.registry;
-
-import java.io.IOException;
-import java.io.StringWriter;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.jena.hadoop.rdf.types.QuadWritable;
-import org.apache.jena.hadoop.rdf.types.TripleWritable;
-import org.apache.jena.riot.Lang;
-import org.apache.jena.riot.RDFLanguages;
-import org.junit.Assert;
-import org.junit.Test;
-
-/**
- * Tests for the {@link HadoopRdfIORegistry}
- */
-public class TestHadoopRdfIORegistry {
-
-    private void testLang(Lang lang, boolean triples, boolean quads, boolean 
writesSupported) {
-        Assert.assertEquals(triples, 
HadoopRdfIORegistry.hasTriplesReader(lang));
-        Assert.assertEquals(quads, HadoopRdfIORegistry.hasQuadReader(lang));
-
-        // Some formats may be asymmetric
-        if (writesSupported) {
-            Assert.assertEquals(triples, 
HadoopRdfIORegistry.hasTriplesWriter(lang));
-            Assert.assertEquals(quads, 
HadoopRdfIORegistry.hasQuadWriter(lang));
-        } else {
-            Assert.assertFalse(HadoopRdfIORegistry.hasTriplesWriter(lang));
-            Assert.assertFalse(HadoopRdfIORegistry.hasQuadWriter(lang));
-        }
-
-        if (triples) {
-            // Check that triples are supported
-            RecordReader<LongWritable, TripleWritable> tripleReader;
-            try {
-                tripleReader = HadoopRdfIORegistry.createTripleReader(lang);
-                Assert.assertNotNull(tripleReader);
-            } catch (IOException e) {
-                Assert.fail("Registry indicates that " + lang.getName()
-                        + " can read triples but fails to produce a triple 
reader when asked: " + e.getMessage());
-            }
-
-            if (writesSupported) {
-                RecordWriter<NullWritable, TripleWritable> tripleWriter;
-                try {
-                    tripleWriter = 
HadoopRdfIORegistry.createTripleWriter(lang, new StringWriter(), new 
Configuration(
-                            false));
-                    Assert.assertNotNull(tripleWriter);
-                } catch (IOException e) {
-                    Assert.fail("Registry indicates that " + lang.getName()
-                            + " can write triples but fails to produce a 
triple writer when asked: " + e.getMessage());
-                }
-            }
-        } else {
-            // Check that triples are not supported
-            try {
-                HadoopRdfIORegistry.createTripleReader(lang);
-                Assert.fail("Registry indicates that " + lang.getName()
-                        + " cannot read triples but produced a triple reader 
when asked (error was expected)");
-            } catch (IOException e) {
-                // This is expected
-            }
-            try {
-                HadoopRdfIORegistry.createTripleWriter(lang, new 
StringWriter(), new Configuration(false));
-                Assert.fail("Registry indicates that " + lang.getName()
-                        + " cannot write triples but produced a triple write 
when asked (error was expected)");
-            } catch (IOException e) {
-                // This is expected
-            }
-        }
-
-        if (quads) {
-            // Check that quads are supported
-            RecordReader<LongWritable, QuadWritable> quadReader;
-            try {
-                quadReader = HadoopRdfIORegistry.createQuadReader(lang);
-                Assert.assertNotNull(quadReader);
-            } catch (IOException e) {
-                Assert.fail("Registry indicates that " + lang.getName()
-                        + " can read quads but fails to produce a quad reader 
when asked: " + e.getMessage());
-            }
-
-            if (writesSupported) {
-                RecordWriter<NullWritable, QuadWritable> quadWriter;
-                try {
-                    quadWriter = HadoopRdfIORegistry.createQuadWriter(lang, 
new StringWriter(),
-                            new Configuration(false));
-                    Assert.assertNotNull(quadWriter);
-                } catch (IOException e) {
-                    Assert.fail("Registry indicates that " + lang.getName()
-                            + " can write quads but fails to produce a triple 
writer when asked: " + e.getMessage());
-                }
-            }
-        } else {
-            try {
-                HadoopRdfIORegistry.createQuadReader(lang);
-                Assert.fail("Registry indicates that " + lang.getName()
-                        + " cannot read quads but produced a quad reader when 
asked (error was expected)");
-            } catch (IOException e) {
-                // This is expected
-            }
-            try {
-                HadoopRdfIORegistry.createQuadWriter(lang, new StringWriter(), 
new Configuration(false));
-                Assert.fail("Registry indicates that " + lang.getName()
-                        + " cannot write quads but produced a quad writer when 
asked (error was expected)");
-            } catch (IOException e) {
-                // This is expected
-            }
-        }
-    }
-
-    @Test
-    public void json_ld_registered() {
-        testLang(Lang.JSONLD, true, true, true);
-    }
-
-    @Test
-    public void nquads_registered() {
-        testLang(Lang.NQUADS, false, true, true);
-        testLang(Lang.NQ, false, true, true);
-    }
-
-    @Test
-    public void ntriples_registered() {
-        testLang(Lang.NTRIPLES, true, false, true);
-        testLang(Lang.NT, true, false, true);
-    }
-
-    @Test
-    public void rdf_json_registered() {
-        testLang(Lang.RDFJSON, true, false, true);
-    }
-
-    @Test
-    public void rdf_xml_registered() {
-        testLang(Lang.RDFXML, true, false, true);
-    }
-
-    @Test
-    public void rdf_thrift_registered() {
-        testLang(RDFLanguages.THRIFT, true, true, true);
-    }
-
-    @Test
-    public void trig_registered() {
-        testLang(Lang.TRIG, false, true, true);
-    }
-
-    @Test
-    public void trix_registered() {
-        testLang(Lang.TRIX, false, true, true);
-    }
-
-    @Test
-    public void turtle_registered() {
-        testLang(Lang.TURTLE, true, false, true);
-        testLang(Lang.TTL, true, false, true);
-        testLang(Lang.N3, true, false, true);
-    }
-
-    @Test
-    public void unregistered() {
-        testLang(Lang.RDFNULL, false, false, true);
-    }
-}

http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/hadoop-rdf-stats/hadoop-job.xml
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-stats/hadoop-job.xml 
b/jena-hadoop-rdf/hadoop-rdf-stats/hadoop-job.xml
deleted file mode 100644
index de72645..0000000
--- a/jena-hadoop-rdf/hadoop-rdf-stats/hadoop-job.xml
+++ /dev/null
@@ -1,46 +0,0 @@
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--->
-
-<assembly>
-  <id>hadoop-job</id>
-  <formats>
-    <format>jar</format>
-  </formats>
-  <includeBaseDirectory>false</includeBaseDirectory>
-  <dependencySets>
-    <dependencySet>
-      <unpack>false</unpack>
-      <scope>runtime</scope>
-      <outputDirectory>lib</outputDirectory>
-      <excludes>
-        <exclude>${groupId}:${artifactId}</exclude>
-      </excludes>
-    </dependencySet>
-    <dependencySet>
-      <unpack>true</unpack>
-      <includes>
-        <include>${groupId}:${artifactId}</include>
-      </includes>
-    </dependencySet>
-  </dependencySets>
-  <fileSets>
-    <fileSet>
-      <directory>${basedir}/target/test-classes</directory>
-      <outputDirectory>/</outputDirectory>
-    </fileSet>
- </fileSets>
-</assembly>

http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/hadoop-rdf-stats/pom.xml
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-stats/pom.xml 
b/jena-hadoop-rdf/hadoop-rdf-stats/pom.xml
deleted file mode 100644
index bf69fa6..0000000
--- a/jena-hadoop-rdf/hadoop-rdf-stats/pom.xml
+++ /dev/null
@@ -1,103 +0,0 @@
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
-       xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
-       <modelVersion>4.0.0</modelVersion>
-       <parent>
-               <groupId>org.apache.jena</groupId>
-               <artifactId>jena-hadoop-rdf</artifactId>
-               <version>0.9.0-SNAPSHOT</version>
-       </parent>
-       <artifactId>jena-hadoop-rdf-stats</artifactId>
-       <name>Apache Jena - RDF Tools for Hadoop - Statistics Demo App</name>
-       <description>A demo application that can be run on Hadoop to produce a 
statistical analysis on arbitrary RDF inputs</description>
-
-       <dependencies>
-               <!-- Internal Project Dependencies -->
-               <dependency>
-                       <groupId>org.apache.jena</groupId>
-                       <artifactId>jena-hadoop-rdf-io</artifactId>
-                       <version>${project.version}</version>
-               </dependency>
-               <dependency>
-                       <groupId>org.apache.jena</groupId>
-                       <artifactId>jena-hadoop-rdf-mapreduce</artifactId>
-                       <version>${project.version}</version>
-               </dependency>
-
-               <!-- CLI related Dependencies -->
-               <dependency>
-                       <groupId>io.airlift</groupId>
-                       <artifactId>airline</artifactId>
-                       <version>0.6</version>
-               </dependency>
-
-               <!-- Hadoop Dependencies -->
-               <!-- Note these will be provided on the Hadoop cluster hence 
the provided 
-                       scope -->
-               <dependency>
-                       <groupId>org.apache.hadoop</groupId>
-                       <artifactId>hadoop-common</artifactId>
-                       <scope>provided</scope>
-               </dependency>
-               <dependency>
-                       <groupId>org.apache.hadoop</groupId>
-                       <artifactId>hadoop-mapreduce-client-common</artifactId>
-                       <scope>provided</scope>
-               </dependency>
-
-               <!-- Test Dependencies -->
-               <dependency>
-                       <groupId>org.apache.jena</groupId>
-                       <artifactId>jena-hadoop-rdf-mapreduce</artifactId>
-                       <version>${project.version}</version>
-                       <classifier>tests</classifier>
-                       <scope>test</scope>
-               </dependency>
-               <dependency>
-                       <groupId>org.apache.mrunit</groupId>
-                       <artifactId>mrunit</artifactId>
-                       <scope>test</scope>
-                       <classifier>hadoop2</classifier>
-               </dependency>
-       </dependencies>
-
-       <build>
-               <plugins>
-                       <!-- Assembly plugin is used to produce the runnable 
Hadoop JAR with all 
-                               dependencies contained therein -->
-                       <plugin>
-                               <artifactId>maven-assembly-plugin</artifactId>
-                               <configuration>
-                                       <descriptors>
-                                               
<descriptor>hadoop-job.xml</descriptor>
-                                       </descriptors>
-                               </configuration>
-                               <executions>
-                                       <execution>
-                                               <id>make-assembly</id>
-                                               <phase>package</phase>
-                                               <goals>
-                                                       <goal>single</goal>
-                                               </goals>
-                                       </execution>
-                               </executions>
-                       </plugin>
-               </plugins>
-       </build>
-</project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/hadoop-rdf-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/RdfStats.java
----------------------------------------------------------------------
diff --git 
a/jena-hadoop-rdf/hadoop-rdf-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/RdfStats.java
 
b/jena-hadoop-rdf/hadoop-rdf-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/RdfStats.java
deleted file mode 100644
index 5f870ee..0000000
--- 
a/jena-hadoop-rdf/hadoop-rdf-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/RdfStats.java
+++ /dev/null
@@ -1,405 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- *     
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.stats;
-
-import io.airlift.command.Arguments;
-import io.airlift.command.Command;
-import io.airlift.command.Help;
-import io.airlift.command.HelpOption;
-import io.airlift.command.Option;
-import io.airlift.command.OptionType;
-import io.airlift.command.ParseArgumentsMissingException;
-import io.airlift.command.ParseArgumentsUnexpectedException;
-import io.airlift.command.ParseException;
-import io.airlift.command.ParseOptionMissingException;
-import io.airlift.command.ParseOptionMissingValueException;
-import io.airlift.command.SingleCommand;
-import io.airlift.command.model.CommandMetadata;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.concurrent.TimeUnit;
-
-import javax.inject.Inject;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.jena.hadoop.rdf.stats.jobs.JobFactory;
-
-
-/**
- * Entry point for the Hadoop job, handles launching all the relevant Hadoop
- * jobs
- */
-@Command(name = "bin/hadoop jar PATH_TO_JAR 
com.yarcdata.urika.hadoop.rdf.stats.RdfStats", description = "A command which 
computes statistics on RDF data using Hadoop")
-public class RdfStats implements Tool {
-
-    static final String ANSI_RED = "\u001B[31m";
-    static final String ANSI_RESET = "\u001B[0m";
-
-    private static final String DATA_TYPE_TRIPLES = "triples", DATA_TYPE_QUADS 
= "quads", DATA_TYPE_MIXED = "mixed";
-
-    /**
-     * Help option
-     */
-    @Inject
-    public HelpOption helpOption;
-
-    /**
-     * Gets/Sets whether all available statistics will be calculated
-     */
-    @Option(name = { "-a", "--all" }, description = "Requests that all 
available statistics be calculated", type = OptionType.COMMAND)
-    public boolean all = false;
-
-    /**
-     * Gets/Sets whether node usage counts will be calculated
-     */
-    @Option(name = { "-n", "--node-count" }, description = "Requests that node 
usage counts be calculated", type = OptionType.COMMAND)
-    public boolean nodeCount = false;
-
-    /**
-     * Gets/Sets whether characteristic sets will be calculated
-     */
-    @Option(name = { "-c", "--characteristic-sets" }, description = "Requests 
that characteristic sets be calculated", type = OptionType.COMMAND)
-    public boolean characteristicSets = false;
-
-    /**
-     * Gets/Sets whether type counts will be calculated
-     */
-    @Option(name = { "-t", "--type-counts" }, description = "Requests that 
rdf:type usage counts be calculated", type = OptionType.COMMAND)
-    public boolean typeCount = false;
-
-    /**
-     * Gets/Sets whether data type counts will be calculated
-     */
-    @Option(name = { "-d", "--data-types" }, description = "Requests that 
literal data type usage counts be calculated", type = OptionType.COMMAND)
-    public boolean dataTypeCount = false;
-
-    /**
-     * Gets/Sets whether namespace counts will be calculated
-     */
-    @Option(name = { "--namespaces" }, description = "Requests that namespace 
usage counts be calculated", type = OptionType.COMMAND)
-    public boolean namespaceCount = false;
-
-    /**
-     * Gets/Sets the input data type used
-     */
-    @Option(name = { "--input-type" }, allowedValues = { DATA_TYPE_MIXED, 
DATA_TYPE_QUADS, DATA_TYPE_TRIPLES }, description = "Specifies whether the 
input data is a mixture of quads and triples, just quads or just triples.  
Using the most specific data type will yield the most accurrate statistics")
-    public String inputType = DATA_TYPE_MIXED;
-
-    /**
-     * Gets/Sets the output path
-     */
-    @Option(name = { "-o", "--output" }, title = "OutputPath", description = 
"Sets the output path", arity = 1, required = true)
-    public String outputPath = null;
-
-    /**
-     * Gets/Sets the input path(s)
-     */
-    @Arguments(description = "Sets the input path(s)", title = "InputPath", 
required = true)
-    public List<String> inputPaths = new ArrayList<String>();
-
-    private Configuration config;
-
-    /**
-     * Entry point method
-     * 
-     * @param args
-     *            Arguments
-     * @throws Exception
-     */
-    public static void main(String[] args) throws Exception {
-        try {
-            // Run and exit with result code if no errors bubble up
-            // Note that the exit code may still be a error code
-            int res = ToolRunner.run(new Configuration(true), new RdfStats(), 
args);
-            System.exit(res);
-        } catch (Exception e) {
-            System.err.println(ANSI_RED + e.getMessage());
-            e.printStackTrace(System.err);
-        } finally {
-            System.err.print(ANSI_RESET);
-        }
-        // If any errors bubble up exit with non-zero code
-        System.exit(1);
-    }
-
-    private static void showUsage() {
-        CommandMetadata metadata = 
SingleCommand.singleCommand(RdfStats.class).getCommandMetadata();
-        StringBuilder builder = new StringBuilder();
-        Help.help(metadata, builder);
-        System.err.print(ANSI_RESET);
-        System.err.println(builder.toString());
-        System.exit(1);
-    }
-
-    @Override
-    public void setConf(Configuration conf) {
-        this.config = conf;
-    }
-
-    @Override
-    public Configuration getConf() {
-        return this.config;
-    }
-
-    @Override
-    public int run(String[] args) throws Exception {
-        try {
-            // Parse custom arguments
-            RdfStats cmd = 
SingleCommand.singleCommand(RdfStats.class).parse(args);
-
-            // Copy Hadoop configuration across
-            cmd.setConf(this.getConf());
-
-            // Show help if requested and exit with success
-            if (cmd.helpOption.showHelpIfRequested()) {
-                return 0;
-            }
-
-            // Run the command and exit with success
-            cmd.run();
-            return 0;
-
-        } catch (ParseOptionMissingException e) {
-            System.err.println(ANSI_RED + e.getMessage());
-            System.err.println();
-            showUsage();
-        } catch (ParseOptionMissingValueException e) {
-            System.err.println(ANSI_RED + e.getMessage());
-            System.err.println();
-            showUsage();
-        } catch (ParseArgumentsMissingException e) {
-            System.err.println(ANSI_RED + e.getMessage());
-            System.err.println();
-            showUsage();
-        } catch (ParseArgumentsUnexpectedException e) {
-            System.err.println(ANSI_RED + e.getMessage());
-            System.err.println();
-            showUsage();
-            // TODO Re-enable as and when we upgrade Airline
-            // } catch (ParseOptionIllegalValueException e) {
-            // System.err.println(ANSI_RED + e.getMessage());
-            // System.err.println();
-            // showUsage();
-        } catch (ParseException e) {
-            System.err.println(ANSI_RED + e.getMessage());
-            System.err.println();
-            showUsage();
-        } catch (UnsupportedOperationException e) {
-            System.err.println(ANSI_RED + e.getMessage());
-        } catch (Throwable e) {
-            System.err.println(ANSI_RED + e.getMessage());
-            e.printStackTrace(System.err);
-        } finally {
-            System.err.print(ANSI_RESET);
-        }
-        return 1;
-    }
-
-    private void run() throws Throwable {
-        if (!this.outputPath.endsWith("/")) {
-            this.outputPath += "/";
-        }
-
-        // If all statistics requested turn on all statistics
-        if (this.all) {
-            this.nodeCount = true;
-            this.characteristicSets = true;
-            this.typeCount = true;
-            this.dataTypeCount = true;
-            this.namespaceCount = true;
-        }
-
-        // How many statistics were requested?
-        int statsRequested = 0;
-        if (this.nodeCount)
-            statsRequested++;
-        if (this.characteristicSets)
-            statsRequested++;
-        if (this.typeCount)
-            statsRequested++;
-        if (this.dataTypeCount)
-            statsRequested++;
-        if (this.namespaceCount)
-            statsRequested++;
-
-        // Error if no statistics requested
-        if (statsRequested == 0) {
-            System.err
-                    .println("You did not request any statistics to be 
calculated, please use one/more of the relevant options to select the 
statistics to be computed");
-            return;
-        }
-        int statsComputed = 1;
-
-        // Compute statistics
-        if (this.nodeCount) {
-            Job job = this.selectNodeCountJob();
-            statsComputed = this.computeStatistic(job, statsComputed, 
statsRequested);
-        }
-        if (this.typeCount) {
-            Job[] jobs = this.selectTypeCountJobs();
-            statsComputed = this.computeStatistic(jobs, false, false, 
statsComputed, statsRequested);
-        }
-        if (this.dataTypeCount) {
-            Job job = this.selectDataTypeCountJob();
-            statsComputed = this.computeStatistic(job, statsComputed, 
statsRequested);
-        }
-        if (this.namespaceCount) {
-            Job job = this.selectNamespaceCountJob();
-            statsComputed = this.computeStatistic(job, statsComputed, 
statsRequested);
-        }
-        if (this.characteristicSets) {
-            Job[] jobs = this.selectCharacteristicSetJobs();
-            statsComputed = this.computeStatistic(jobs, false, false, 
statsComputed, statsRequested);
-        }
-    }
-
-    private int computeStatistic(Job job, int statsComputed, int 
statsRequested) throws Throwable {
-        System.out.println(String.format("Computing Statistic %d of %d 
requested", statsComputed, statsRequested));
-        this.runJob(job);
-        System.out.println(String.format("Computed Statistic %d of %d 
requested", statsComputed, statsRequested));
-        System.out.println();
-        return ++statsComputed;
-    }
-
-    private int computeStatistic(Job[] jobs, boolean continueOnFailure, 
boolean continueOnError, int statsComputed,
-            int statsRequested) {
-        System.out.println(String.format("Computing Statistic %d of %d 
requested", statsComputed, statsRequested));
-        this.runJobSequence(jobs, continueOnFailure, continueOnError);
-        System.out.println(String.format("Computed Statistic %d of %d 
requested", statsComputed, statsRequested));
-        System.out.println();
-        return ++statsComputed;
-    }
-
-    private boolean runJob(Job job) throws Throwable {
-        System.out.println("Submitting Job " + job.getJobName());
-        long start = System.nanoTime();
-        try {
-            job.submit();
-            if (job.monitorAndPrintJob()) {
-                System.out.println("Job " + job.getJobName() + " succeeded");
-                return true;
-            } else {
-                System.out.println("Job " + job.getJobName() + " failed");
-                return false;
-            }
-        } catch (Throwable e) {
-            System.out.println("Unexpected failure in Job " + 
job.getJobName());
-            throw e;
-        } finally {
-            long end = System.nanoTime();
-            System.out.println("Job " + job.getJobName() + " finished after "
-                    + String.format("%,d milliseconds", 
TimeUnit.NANOSECONDS.toMillis(end - start)));
-            System.out.println();
-        }
-    }
-
-    private void runJobSequence(Job[] jobs, boolean continueOnFailure, boolean 
continueOnError) {
-        for (int i = 0; i < jobs.length; i++) {
-            Job job = jobs[i];
-            try {
-                boolean success = this.runJob(job);
-                if (!success && !continueOnFailure)
-                    throw new IllegalStateException("Unable to complete job 
sequence because Job " + job.getJobName() + " failed");
-            } catch (IllegalStateException e) {
-                throw e;
-            } catch (Throwable e) {
-                if (!continueOnError)
-                    throw new IllegalStateException("Unable to complete job 
sequence because job " + job.getJobName()
-                            + " errorred", e);
-            }
-        }
-    }
-
-    private Job selectNodeCountJob() throws IOException {
-        String realOutputPath = outputPath + "node-counts/";
-        String[] inputs = new String[this.inputPaths.size()];
-        this.inputPaths.toArray(inputs);
-
-        if (DATA_TYPE_QUADS.equals(this.inputType)) {
-            return JobFactory.getQuadNodeCountJob(this.config, inputs, 
realOutputPath);
-        } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) {
-            return JobFactory.getTripleNodeCountJob(this.config, inputs, 
realOutputPath);
-        } else {
-            return JobFactory.getNodeCountJob(this.config, inputs, 
realOutputPath);
-        }
-    }
-
-    private Job selectDataTypeCountJob() throws IOException {
-        String realOutputPath = outputPath + "data-type-counts/";
-        String[] inputs = new String[this.inputPaths.size()];
-        this.inputPaths.toArray(inputs);
-
-        if (DATA_TYPE_QUADS.equals(this.inputType)) {
-            return JobFactory.getQuadDataTypeCountJob(this.config, inputs, 
realOutputPath);
-        } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) {
-            return JobFactory.getTripleDataTypeCountJob(this.config, inputs, 
realOutputPath);
-        } else {
-            return JobFactory.getDataTypeCountJob(this.config, inputs, 
realOutputPath);
-        }
-    }
-
-    private Job selectNamespaceCountJob() throws IOException {
-        String realOutputPath = outputPath + "namespace-counts/";
-        String[] inputs = new String[this.inputPaths.size()];
-        this.inputPaths.toArray(inputs);
-
-        if (DATA_TYPE_QUADS.equals(this.inputType)) {
-            return JobFactory.getQuadNamespaceCountJob(this.config, inputs, 
realOutputPath);
-        } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) {
-            return JobFactory.getTripleNamespaceCountJob(this.config, inputs, 
realOutputPath);
-        } else {
-            return JobFactory.getNamespaceCountJob(this.config, inputs, 
realOutputPath);
-        }
-    }
-
-    private Job[] selectCharacteristicSetJobs() throws IOException {
-        String intermediateOutputPath = outputPath + 
"characteristics/intermediate/";
-        String finalOutputPath = outputPath + "characteristics/final/";
-        String[] inputs = new String[this.inputPaths.size()];
-        this.inputPaths.toArray(inputs);
-
-        if (DATA_TYPE_QUADS.equals(this.inputType)) {
-            return JobFactory.getQuadCharacteristicSetJobs(this.config, 
inputs, intermediateOutputPath, finalOutputPath);
-        } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) {
-            return JobFactory.getTripleCharacteristicSetJobs(this.config, 
inputs, intermediateOutputPath, finalOutputPath);
-        } else {
-            return JobFactory.getCharacteristicSetJobs(this.config, inputs, 
intermediateOutputPath, finalOutputPath);
-        }
-    }
-
-    private Job[] selectTypeCountJobs() throws IOException {
-        String intermediateOutputPath = outputPath + "type-declarations/";
-        String finalOutputPath = outputPath + "type-counts/";
-        String[] inputs = new String[this.inputPaths.size()];
-        this.inputPaths.toArray(inputs);
-
-        if (DATA_TYPE_QUADS.equals(this.inputType)) {
-            return JobFactory.getQuadTypeCountJobs(this.config, inputs, 
intermediateOutputPath, finalOutputPath);
-        } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) {
-            return JobFactory.getTripleTypeCountJobs(this.config, inputs, 
intermediateOutputPath, finalOutputPath);
-        } else {
-            return JobFactory.getTypeCountJobs(this.config, inputs, 
intermediateOutputPath, finalOutputPath);
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/hadoop-rdf-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/jobs/JobFactory.java
----------------------------------------------------------------------
diff --git 
a/jena-hadoop-rdf/hadoop-rdf-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/jobs/JobFactory.java
 
b/jena-hadoop-rdf/hadoop-rdf-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/jobs/JobFactory.java
deleted file mode 100644
index 55bb8af..0000000
--- 
a/jena-hadoop-rdf/hadoop-rdf-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/jobs/JobFactory.java
+++ /dev/null
@@ -1,757 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- *     
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.stats.jobs;
-
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.SequenceFile.CompressionType;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.compress.BZip2Codec;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-import org.apache.hadoop.util.StringUtils;
-import org.apache.jena.hadoop.rdf.io.input.QuadsInputFormat;
-import org.apache.jena.hadoop.rdf.io.input.TriplesInputFormat;
-import org.apache.jena.hadoop.rdf.io.input.TriplesOrQuadsInputFormat;
-import org.apache.jena.hadoop.rdf.io.input.nquads.NQuadsInputFormat;
-import org.apache.jena.hadoop.rdf.io.input.ntriples.NTriplesInputFormat;
-import org.apache.jena.hadoop.rdf.io.output.nquads.NQuadsOutputFormat;
-import org.apache.jena.hadoop.rdf.io.output.ntriples.NTriplesNodeOutputFormat;
-import org.apache.jena.hadoop.rdf.io.output.ntriples.NTriplesOutputFormat;
-import org.apache.jena.hadoop.rdf.mapreduce.KeyMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.RdfMapReduceConstants;
-import org.apache.jena.hadoop.rdf.mapreduce.TextCountReducer;
-import 
org.apache.jena.hadoop.rdf.mapreduce.characteristics.CharacteristicSetReducer;
-import 
org.apache.jena.hadoop.rdf.mapreduce.characteristics.QuadCharacteristicSetGeneratingReducer;
-import 
org.apache.jena.hadoop.rdf.mapreduce.characteristics.TripleCharacteristicSetGeneratingReducer;
-import org.apache.jena.hadoop.rdf.mapreduce.count.NodeCountReducer;
-import org.apache.jena.hadoop.rdf.mapreduce.count.QuadNodeCountMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.count.TripleNodeCountMapper;
-import 
org.apache.jena.hadoop.rdf.mapreduce.count.datatypes.QuadDataTypeCountMapper;
-import 
org.apache.jena.hadoop.rdf.mapreduce.count.datatypes.TripleDataTypeCountMapper;
-import 
org.apache.jena.hadoop.rdf.mapreduce.count.namespaces.QuadNamespaceCountMapper;
-import 
org.apache.jena.hadoop.rdf.mapreduce.count.namespaces.TripleNamespaceCountMapper;
-import 
org.apache.jena.hadoop.rdf.mapreduce.count.positional.QuadObjectCountMapper;
-import 
org.apache.jena.hadoop.rdf.mapreduce.count.positional.TripleObjectCountMapper;
-import 
org.apache.jena.hadoop.rdf.mapreduce.filter.positional.QuadFilterByPredicateMapper;
-import 
org.apache.jena.hadoop.rdf.mapreduce.filter.positional.TripleFilterByPredicateUriMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.group.QuadGroupBySubjectMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.group.TripleGroupBySubjectMapper;
-import org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable;
-import org.apache.jena.hadoop.rdf.types.NodeWritable;
-import org.apache.jena.hadoop.rdf.types.QuadWritable;
-import org.apache.jena.hadoop.rdf.types.TripleWritable;
-
-import com.hp.hpl.jena.vocabulary.RDF;
-
-/**
- * Factory that can produce {@link Job} instances for computing various RDF
- * statistics
- * 
- * 
- * 
- */
-public class JobFactory {
-
-    /**
-     * Private constructor prevents instantiation
-     */
-    private JobFactory() {
-    }
-
-    /**
-     * Gets a job for computing node counts on RDF triple inputs
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param outputPath
-     *            Output path
-     * @return Job
-     * @throws IOException
-     */
-    public static Job getTripleNodeCountJob(Configuration config, String[] 
inputPaths, String outputPath) throws IOException {
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Triples Node Usage Count");
-
-        // Map/Reduce classes
-        job.setMapperClass(TripleNodeCountMapper.class);
-        job.setMapOutputKeyClass(NodeWritable.class);
-        job.setMapOutputValueClass(LongWritable.class);
-        job.setReducerClass(NodeCountReducer.class);
-
-        // Input and Output
-        job.setInputFormatClass(TriplesInputFormat.class);
-        job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
-        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        return job;
-    }
-
-    /**
-     * Gets a job for computing node counts on RDF quad inputs
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param outputPath
-     *            Output path
-     * @return Job
-     * @throws IOException
-     */
-    public static Job getQuadNodeCountJob(Configuration config, String[] 
inputPaths, String outputPath) throws IOException {
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Quads Node Usage Count");
-
-        // Map/Reduce classes
-        job.setMapperClass(QuadNodeCountMapper.class);
-        job.setMapOutputKeyClass(NodeWritable.class);
-        job.setMapOutputValueClass(LongWritable.class);
-        job.setReducerClass(NodeCountReducer.class);
-
-        // Input and Output
-        job.setInputFormatClass(QuadsInputFormat.class);
-        job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
-        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        return job;
-    }
-
-    /**
-     * Gets a job for computing node counts on RDF triple and/or quad inputs
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param outputPath
-     *            Output path
-     * @return Job
-     * @throws IOException
-     */
-    public static Job getNodeCountJob(Configuration config, String[] 
inputPaths, String outputPath) throws IOException {
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Node Usage Count");
-
-        // Map/Reduce classes
-        job.setMapperClass(QuadNodeCountMapper.class);
-        job.setMapOutputKeyClass(NodeWritable.class);
-        job.setMapOutputValueClass(LongWritable.class);
-        job.setReducerClass(NodeCountReducer.class);
-
-        // Input and Output
-        job.setInputFormatClass(TriplesOrQuadsInputFormat.class);
-        job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
-        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        return job;
-    }
-
-    /**
-     * Gets a sequence of jobs that can be used to compute characteristic sets
-     * for RDF triples
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param intermediateOutputPath
-     *            Intermediate output path
-     * @param outputPath
-     *            Final output path
-     * @return Sequence of jobs
-     * @throws IOException
-     */
-    public static Job[] getTripleCharacteristicSetJobs(Configuration config, 
String[] inputPaths, String intermediateOutputPath,
-            String outputPath) throws IOException {
-        Job[] jobs = new Job[2];
-
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Triples Characteristic Set (Generation)");
-
-        // Map/Reduce classes
-        job.setMapperClass(TripleGroupBySubjectMapper.class);
-        job.setMapOutputKeyClass(NodeWritable.class);
-        job.setMapOutputValueClass(TripleWritable.class);
-        job.setReducerClass(TripleCharacteristicSetGeneratingReducer.class);
-        job.setOutputKeyClass(CharacteristicSetWritable.class);
-        job.setOutputValueClass(NullWritable.class);
-
-        // Input and Output
-        job.setInputFormatClass(TriplesInputFormat.class);
-        job.setOutputFormatClass(SequenceFileOutputFormat.class);
-        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath));
-        SequenceFileOutputFormat.setCompressOutput(job, true);
-        FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
-        SequenceFileOutputFormat.setOutputCompressionType(job, 
CompressionType.BLOCK);
-
-        jobs[0] = job;
-
-        job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Triples Characteristic Set (Reduction)");
-
-        // Map/Reduce classes
-        job.setMapperClass(KeyMapper.class);
-        job.setMapOutputKeyClass(CharacteristicSetWritable.class);
-        job.setMapOutputValueClass(CharacteristicSetWritable.class);
-        job.setReducerClass(CharacteristicSetReducer.class);
-        job.setOutputKeyClass(CharacteristicSetWritable.class);
-        job.setOutputValueClass(CharacteristicSetWritable.class);
-
-        // Input and Output
-        job.setInputFormatClass(SequenceFileInputFormat.class);
-        job.setOutputFormatClass(TextOutputFormat.class);
-        FileInputFormat.setInputPaths(job, intermediateOutputPath);
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        jobs[1] = job;
-        return jobs;
-    }
-
-    /**
-     * Gets a sequence of jobs that can be used to compute characteristic sets
-     * for RDF quads
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param intermediateOutputPath
-     *            Intermediate output path
-     * @param outputPath
-     *            Final output path
-     * @return Sequence of jobs
-     * @throws IOException
-     */
-    public static Job[] getQuadCharacteristicSetJobs(Configuration config, 
String[] inputPaths, String intermediateOutputPath,
-            String outputPath) throws IOException {
-        Job[] jobs = new Job[2];
-
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Quads Characteristic Set (Generation)");
-
-        // Map/Reduce classes
-        job.setMapperClass(QuadGroupBySubjectMapper.class);
-        job.setMapOutputKeyClass(NodeWritable.class);
-        job.setMapOutputValueClass(QuadWritable.class);
-        job.setReducerClass(QuadCharacteristicSetGeneratingReducer.class);
-        job.setOutputKeyClass(CharacteristicSetWritable.class);
-        job.setOutputValueClass(NullWritable.class);
-
-        // Input and Output
-        job.setInputFormatClass(QuadsInputFormat.class);
-        job.setOutputFormatClass(SequenceFileOutputFormat.class);
-        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath));
-        SequenceFileOutputFormat.setCompressOutput(job, true);
-        FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
-        SequenceFileOutputFormat.setOutputCompressionType(job, 
CompressionType.BLOCK);
-
-        jobs[0] = job;
-
-        job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Quads Characteristic Set (Reduction)");
-
-        // Map/Reduce classes
-        job.setMapperClass(KeyMapper.class);
-        job.setMapOutputKeyClass(CharacteristicSetWritable.class);
-        job.setMapOutputValueClass(CharacteristicSetWritable.class);
-        job.setReducerClass(CharacteristicSetReducer.class);
-        job.setOutputKeyClass(CharacteristicSetWritable.class);
-        job.setOutputValueClass(CharacteristicSetWritable.class);
-
-        // Input and Output
-        job.setInputFormatClass(SequenceFileInputFormat.class);
-        job.setOutputFormatClass(TextOutputFormat.class);
-        FileInputFormat.setInputPaths(job, intermediateOutputPath);
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        jobs[1] = job;
-        return jobs;
-    }
-
-    /**
-     * Gets a sequence of jobs that can be used to compute characteristic sets
-     * for RDF triple and/or quad inputs
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param intermediateOutputPath
-     *            Intermediate output path
-     * @param outputPath
-     *            Final output path
-     * @return Sequence of jobs
-     * @throws IOException
-     */
-    public static Job[] getCharacteristicSetJobs(Configuration config, 
String[] inputPaths, String intermediateOutputPath,
-            String outputPath) throws IOException {
-        Job[] jobs = new Job[2];
-
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Characteristic Set (Generation)");
-
-        // Map/Reduce classes
-        job.setMapperClass(QuadGroupBySubjectMapper.class);
-        job.setMapOutputKeyClass(NodeWritable.class);
-        job.setMapOutputValueClass(QuadWritable.class);
-        job.setReducerClass(QuadCharacteristicSetGeneratingReducer.class);
-        job.setOutputKeyClass(CharacteristicSetWritable.class);
-        job.setOutputValueClass(NullWritable.class);
-
-        // Input and Output
-        job.setInputFormatClass(TriplesOrQuadsInputFormat.class);
-        job.setOutputFormatClass(SequenceFileOutputFormat.class);
-        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath));
-        SequenceFileOutputFormat.setCompressOutput(job, true);
-        FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
-        SequenceFileOutputFormat.setOutputCompressionType(job, 
CompressionType.BLOCK);
-
-        jobs[0] = job;
-
-        job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Characteristic Set (Reduction)");
-
-        // Map/Reduce classes
-        job.setMapperClass(KeyMapper.class);
-        job.setMapOutputKeyClass(CharacteristicSetWritable.class);
-        job.setMapOutputValueClass(CharacteristicSetWritable.class);
-        job.setReducerClass(CharacteristicSetReducer.class);
-        job.setOutputKeyClass(CharacteristicSetWritable.class);
-        job.setOutputValueClass(CharacteristicSetWritable.class);
-
-        // Input and Output
-        job.setInputFormatClass(SequenceFileInputFormat.class);
-        job.setOutputFormatClass(TextOutputFormat.class);
-        FileInputFormat.setInputPaths(job, intermediateOutputPath);
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        jobs[1] = job;
-        return jobs;
-    }
-
-    /**
-     * Gets a job for computing type counts on RDF triple inputs
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param intermediateOutputPath
-     *            Path for intermediate output which will be all the type
-     *            declaration triples present in the inputs
-     * @param outputPath
-     *            Output path
-     * @return Job
-     * @throws IOException
-     */
-    public static Job[] getTripleTypeCountJobs(Configuration config, String[] 
inputPaths, String intermediateOutputPath,
-            String outputPath) throws IOException {
-        Job[] jobs = new Job[2];
-
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Type Triples Extraction");
-
-        // Map/Reduce classes
-        
job.getConfiguration().setStrings(RdfMapReduceConstants.FILTER_PREDICATE_URIS, 
RDF.type.getURI());
-        job.setMapperClass(TripleFilterByPredicateUriMapper.class);
-        job.setMapOutputKeyClass(LongWritable.class);
-        job.setMapOutputValueClass(TripleWritable.class);
-
-        // Input and Output Format
-        job.setInputFormatClass(TriplesInputFormat.class);
-        job.setOutputFormatClass(NTriplesOutputFormat.class);
-        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath));
-
-        jobs[0] = job;
-
-        // Object Node Usage count job
-        job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Triples Type Usage Count");
-
-        // Map/Reduce classes
-        job.setMapperClass(TripleObjectCountMapper.class);
-        job.setMapOutputKeyClass(NodeWritable.class);
-        job.setMapOutputValueClass(LongWritable.class);
-        job.setReducerClass(NodeCountReducer.class);
-
-        // Input and Output
-        job.setInputFormatClass(NTriplesInputFormat.class);
-        NLineInputFormat.setNumLinesPerSplit(job, 10000); // TODO Would be
-                                                          // better if this was
-                                                          // intelligently
-                                                          // configured
-        job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
-        FileInputFormat.setInputPaths(job, intermediateOutputPath);
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        jobs[1] = job;
-
-        return jobs;
-    }
-
-    /**
-     * Gets a job for computing type counts on RDF quad inputs
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param intermediateOutputPath
-     *            Path for intermediate output which will be all the type
-     *            declaration quads present in the inputs
-     * @param outputPath
-     *            Output path
-     * @return Job
-     * @throws IOException
-     */
-    public static Job[] getQuadTypeCountJobs(Configuration config, String[] 
inputPaths, String intermediateOutputPath,
-            String outputPath) throws IOException {
-        Job[] jobs = new Job[2];
-
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Type Quads Extraction");
-
-        // Map/Reduce classes
-        
job.getConfiguration().setStrings(RdfMapReduceConstants.FILTER_PREDICATE_URIS, 
RDF.type.getURI());
-        job.setMapperClass(QuadFilterByPredicateMapper.class);
-        job.setMapOutputKeyClass(LongWritable.class);
-        job.setMapOutputValueClass(QuadWritable.class);
-
-        // Input and Output Format
-        job.setInputFormatClass(QuadsInputFormat.class);
-        job.setOutputFormatClass(NQuadsOutputFormat.class);
-        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath));
-
-        jobs[0] = job;
-
-        // Object Node Usage count job
-        job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Quads Type Usage Count");
-
-        // Map/Reduce classes
-        job.setMapperClass(QuadObjectCountMapper.class);
-        job.setMapOutputKeyClass(NodeWritable.class);
-        job.setMapOutputValueClass(LongWritable.class);
-        job.setReducerClass(NodeCountReducer.class);
-
-        // Input and Output
-        job.setInputFormatClass(NQuadsInputFormat.class);
-        NLineInputFormat.setNumLinesPerSplit(job, 10000); // TODO Would be
-                                                          // better if this was
-                                                          // intelligently
-                                                          // configured
-        job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
-        FileInputFormat.setInputPaths(job, intermediateOutputPath);
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        jobs[1] = job;
-
-        return jobs;
-    }
-
-    /**
-     * Gets a job for computing type counts on RDF triple and/or quad inputs
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param intermediateOutputPath
-     *            Path for intermediate output which will be all the type
-     *            declaration quads present in the inputs
-     * @param outputPath
-     *            Output path
-     * @return Job
-     * @throws IOException
-     */
-    public static Job[] getTypeCountJobs(Configuration config, String[] 
inputPaths, String intermediateOutputPath,
-            String outputPath) throws IOException {
-        Job[] jobs = new Job[2];
-
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Type Extraction");
-
-        // Map/Reduce classes
-        
job.getConfiguration().setStrings(RdfMapReduceConstants.FILTER_PREDICATE_URIS, 
RDF.type.getURI());
-        job.setMapperClass(QuadFilterByPredicateMapper.class);
-        job.setMapOutputKeyClass(LongWritable.class);
-        job.setMapOutputValueClass(QuadWritable.class);
-
-        // Input and Output Format
-        job.setInputFormatClass(TriplesOrQuadsInputFormat.class);
-        job.setOutputFormatClass(NQuadsOutputFormat.class);
-        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath));
-
-        jobs[0] = job;
-
-        // Object Node Usage count job
-        job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Type Usage Count");
-
-        // Map/Reduce classes
-        job.setMapperClass(QuadObjectCountMapper.class);
-        job.setMapOutputKeyClass(NodeWritable.class);
-        job.setMapOutputValueClass(LongWritable.class);
-        job.setReducerClass(NodeCountReducer.class);
-
-        // Input and Output
-        job.setInputFormatClass(NQuadsInputFormat.class);
-        NLineInputFormat.setNumLinesPerSplit(job, 10000); // TODO Would be
-                                                          // better if this was
-                                                          // intelligently
-                                                          // configured
-        job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
-        FileInputFormat.setInputPaths(job, intermediateOutputPath);
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        jobs[1] = job;
-
-        return jobs;
-    }
-
-    /**
-     * Gets a job for computing literal data type counts on RDF triple inputs
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param outputPath
-     *            Output path
-     * @return Job
-     * @throws IOException
-     */
-    public static Job getTripleDataTypeCountJob(Configuration config, String[] 
inputPaths, String outputPath) throws IOException {
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Triples Literal Data Type Usage Count");
-
-        // Map/Reduce classes
-        job.setMapperClass(TripleDataTypeCountMapper.class);
-        job.setMapOutputKeyClass(NodeWritable.class);
-        job.setMapOutputValueClass(LongWritable.class);
-        job.setReducerClass(NodeCountReducer.class);
-
-        // Input and Output
-        job.setInputFormatClass(TriplesInputFormat.class);
-        job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
-        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        return job;
-    }
-
-    /**
-     * Gets a job for computing literal data type counts on RDF quad inputs
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param outputPath
-     *            Output path
-     * @return Job
-     * @throws IOException
-     */
-    public static Job getQuadDataTypeCountJob(Configuration config, String[] 
inputPaths, String outputPath) throws IOException {
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Quads Literal Data Type Usage Count");
-
-        // Map/Reduce classes
-        job.setMapperClass(QuadDataTypeCountMapper.class);
-        job.setMapOutputKeyClass(NodeWritable.class);
-        job.setMapOutputValueClass(LongWritable.class);
-        job.setReducerClass(NodeCountReducer.class);
-
-        // Input and Output
-        job.setInputFormatClass(QuadsInputFormat.class);
-        job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
-        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        return job;
-    }
-
-    /**
-     * Gets a job for computing literal data type counts on RDF triple and/or
-     * quad inputs
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param outputPath
-     *            Output path
-     * @return Job
-     * @throws IOException
-     */
-    public static Job getDataTypeCountJob(Configuration config, String[] 
inputPaths, String outputPath) throws IOException {
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Literal Data Type Usage Count");
-
-        // Map/Reduce classes
-        job.setMapperClass(QuadDataTypeCountMapper.class);
-        job.setMapOutputKeyClass(NodeWritable.class);
-        job.setMapOutputValueClass(LongWritable.class);
-        job.setReducerClass(NodeCountReducer.class);
-
-        // Input and Output
-        job.setInputFormatClass(TriplesOrQuadsInputFormat.class);
-        job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
-        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        return job;
-    }
-
-    /**
-     * Gets a job for computing literal data type counts on RDF triple inputs
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param outputPath
-     *            Output path
-     * @return Job
-     * @throws IOException
-     */
-    public static Job getTripleNamespaceCountJob(Configuration config, 
String[] inputPaths, String outputPath) throws IOException {
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Triples Namespace Usage Count");
-
-        // Map/Reduce classes
-        job.setMapperClass(TripleNamespaceCountMapper.class);
-        job.setMapOutputKeyClass(Text.class);
-        job.setMapOutputValueClass(LongWritable.class);
-        job.setReducerClass(TextCountReducer.class);
-
-        // Input and Output
-        job.setInputFormatClass(TriplesInputFormat.class);
-        job.setOutputFormatClass(TextOutputFormat.class);
-        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        return job;
-    }
-
-    /**
-     * Gets a job for computing literal data type counts on RDF quad inputs
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param outputPath
-     *            Output path
-     * @return Job
-     * @throws IOException
-     */
-    public static Job getQuadNamespaceCountJob(Configuration config, String[] 
inputPaths, String outputPath) throws IOException {
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Quads Namespace Usage Count");
-
-        // Map/Reduce classes
-        job.setMapperClass(QuadNamespaceCountMapper.class);
-        job.setMapOutputKeyClass(Text.class);
-        job.setMapOutputValueClass(LongWritable.class);
-        job.setReducerClass(TextCountReducer.class);
-
-        // Input and Output
-        job.setInputFormatClass(QuadsInputFormat.class);
-        job.setOutputFormatClass(TextOutputFormat.class);
-        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        return job;
-    }
-
-    /**
-     * Gets a job for computing literal data type counts on RDF triple and/or
-     * quad inputs
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param outputPath
-     *            Output path
-     * @return Job
-     * @throws IOException
-     */
-    public static Job getNamespaceCountJob(Configuration config, String[] 
inputPaths, String outputPath) throws IOException {
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Namespace Usage Count");
-
-        // Map/Reduce classes
-        job.setMapperClass(QuadNamespaceCountMapper.class);
-        job.setMapOutputKeyClass(Text.class);
-        job.setMapOutputValueClass(LongWritable.class);
-        job.setReducerClass(TextCountReducer.class);
-
-        // Input and Output
-        job.setInputFormatClass(TriplesOrQuadsInputFormat.class);
-        job.setOutputFormatClass(TextOutputFormat.class);
-        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        return job;
-    }
-}

http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-common/pom.xml
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/jena-elephas-common/pom.xml 
b/jena-hadoop-rdf/jena-elephas-common/pom.xml
new file mode 100644
index 0000000..7dd68a0
--- /dev/null
+++ b/jena-hadoop-rdf/jena-elephas-common/pom.xml
@@ -0,0 +1,54 @@
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+       xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+       <modelVersion>4.0.0</modelVersion>
+       <parent>
+               <groupId>org.apache.jena</groupId>
+               <artifactId>jena-elephas</artifactId>
+               <version>0.9.0-SNAPSHOT</version>
+       </parent>
+       <artifactId>jena-elephas-common</artifactId>
+       <name>Apache Jena - Elephas - Common API</name>
+       <description>Common code for RDF on Hadoop such as writable types for 
RDF primitives</description>
+
+       <!-- Note that versions are managed by parent POMs -->
+       <dependencies>
+               <!-- Hadoop Dependencies -->
+               <!-- Note these will be provided on the Hadoop cluster hence 
the provided 
+                       scope -->
+               <dependency>
+                       <groupId>org.apache.hadoop</groupId>
+                       <artifactId>hadoop-common</artifactId>
+                       <scope>provided</scope>
+               </dependency>
+
+               <!-- Jena dependencies -->
+               <dependency>
+                       <groupId>org.apache.jena</groupId>
+                       <artifactId>jena-arq</artifactId>
+               </dependency>
+
+               <!-- Test Dependencies -->
+               <dependency>
+                       <groupId>junit</groupId>
+                       <artifactId>junit</artifactId>
+                       <scope>test</scope>
+               </dependency>
+       </dependencies>
+</project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-common/src/main/java/org/apache/jena/hadoop/rdf/types/AbstractNodeTupleWritable.java
----------------------------------------------------------------------
diff --git 
a/jena-hadoop-rdf/jena-elephas-common/src/main/java/org/apache/jena/hadoop/rdf/types/AbstractNodeTupleWritable.java
 
b/jena-hadoop-rdf/jena-elephas-common/src/main/java/org/apache/jena/hadoop/rdf/types/AbstractNodeTupleWritable.java
new file mode 100644
index 0000000..f0acc09
--- /dev/null
+++ 
b/jena-hadoop-rdf/jena-elephas-common/src/main/java/org/apache/jena/hadoop/rdf/types/AbstractNodeTupleWritable.java
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.types;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.WritableUtils;
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.sparql.util.NodeUtils;
+
+/**
+ * A abstract general purpose writable where the actual class represented is
+ * composed of a number of {@link Node} instances
+ * <p>
+ * The binary encoding of this base implementation is just a variable integer
+ * indicating the number of nodes present followed by the binary encodings of
+ * the {@link NodeWritable} instances. Derived implementations may wish to
+ * override the {@link #readFields(DataInput)} and {@link #write(DataOutput)}
+ * methods in order to use more specialised encodings.
+ * </p>
+ * 
+ * @param <T>
+ *            Tuple type
+ */
+public abstract class AbstractNodeTupleWritable<T> implements 
WritableComparable<AbstractNodeTupleWritable<T>> {
+
+    private T tuple;
+
+    /**
+     * Creates a new empty instance
+     */
+    protected AbstractNodeTupleWritable() {
+        this(null);
+    }
+
+    /**
+     * Creates a new instance with the given value
+     * 
+     * @param tuple
+     *            Tuple value
+     */
+    protected AbstractNodeTupleWritable(T tuple) {
+        this.tuple = tuple;
+    }
+
+    /**
+     * Gets the tuple
+     * 
+     * @return Tuple
+     */
+    public T get() {
+        return this.tuple;
+    }
+
+    /**
+     * Sets the tuple
+     * 
+     * @param tuple
+     *            Tuple
+     */
+    public void set(T tuple) {
+        this.tuple = tuple;
+    }
+
+    @Override
+    public void readFields(DataInput input) throws IOException {
+        // Determine how many nodes
+        int size = WritableUtils.readVInt(input);
+        Node[] ns = new Node[size];
+
+        NodeWritable nw = new NodeWritable();
+        for (int i = 0; i < ns.length; i++) {
+            nw.readFields(input);
+            ns[i] = nw.get();
+        }
+
+        // Load the tuple
+        this.tuple = this.createTuple(ns);
+    }
+
+    /**
+     * Creates the actual tuple type from an array of nodes
+     * 
+     * @param ns
+     *            Nodes
+     * @return Tuple
+     */
+    protected abstract T createTuple(Node[] ns);
+
+    @Override
+    public void write(DataOutput output) throws IOException {
+        // Determine how many nodes
+        Node[] ns = this.createNodes(this.tuple);
+        WritableUtils.writeVInt(output, ns.length);
+
+        // Write out nodes
+        NodeWritable nw = new NodeWritable();
+        for (int i = 0; i < ns.length; i++) {
+            nw.set(ns[i]);
+            nw.write(output);
+        }
+    }
+
+    /**
+     * Sets the tuple value
+     * <p>
+     * Intended only for internal use i.e. when a derived implementation
+     * overrides {@link #readFields(DataInput)} and needs to set the tuple 
value
+     * directly i.e. when a derived implementation is using a custom encoding
+     * scheme
+     * </p>
+     * 
+     * @param tuple
+     *            Tuple
+     */
+    protected final void setInternal(T tuple) {
+        this.tuple = tuple;
+    }
+
+    /**
+     * Converts the actual tuple type into an array of nodes
+     * 
+     * @param tuple
+     *            Tuples
+     * @return Nodes
+     */
+    protected abstract Node[] createNodes(T tuple);
+
+    /**
+     * Compares instances node by node
+     * <p>
+     * Derived implementations may wish to override this and substitute native
+     * tuple based comparisons
+     * </p>
+     * 
+     * @param other
+     *            Instance to compare with
+     */
+    @Override
+    public int compareTo(AbstractNodeTupleWritable<T> other) {
+        Node[] ns = this.createNodes(this.tuple);
+        Node[] otherNs = this.createNodes(other.tuple);
+
+        if (ns.length < otherNs.length) {
+            return -1;
+        } else if (ns.length > otherNs.length) {
+            return 1;
+        }
+        // Compare node by node
+        for (int i = 0; i < ns.length; i++) {
+            int c = NodeUtils.compareRDFTerms(ns[i], otherNs[i]);
+            if (c != 0)
+                return c;
+        }
+        return 0;
+    }
+
+    @Override
+    public String toString() {
+        return this.get().toString();
+    }
+
+    @Override
+    public int hashCode() {
+        return this.get().hashCode();
+    }
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public boolean equals(Object other) {
+        if (!(other instanceof AbstractNodeTupleWritable))
+            return false;
+        return this.compareTo((AbstractNodeTupleWritable<T>) other) == 0;
+    }
+}

Reply via email to